Merge tag 'v0.7.1' into v0.7.1-dev

afd0da21 · zhuwenwen · 1a11f127 · 4f4d427a · afd0da21 · afd0da21
Commit afd0da21 authored Feb 03, 2025 by zhuwenwen
20 changed files
--- a/examples/offline_inference/rlhf.py
+++ b/examples/offline_inference/rlhf.py
+"""
+a simple demonstration of RLHF with vLLM, inspired by
+the OpenRLHF framework https://github.com/OpenRLHF/OpenRLHF .
+It follows the design that, training processes and inference processes
+are different, and they live on different GPUs.
+Training processes send prompts to inference processes to generate data,
+and also synchronize the weights of the model by broadcasting the weights
+from the training process to the inference process.
+Note that this is a simple demonstration of one training instance and one
+inference instance. In practice, there could be multiple training instances
+and multiple inference instances. For the full implementation, please refer
+to the OpenRLHF framework.
+"""
+import os
+
+import ray
+import torch
+from ray.util.placement_group import placement_group
+from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
+from transformers import AutoModelForCausalLM
+
+from vllm import LLM, SamplingParams
+from vllm.utils import get_ip, get_open_port
+from vllm.worker.worker import Worker
+
+
+def stateless_init_process_group(master_address, master_port, rank, world_size,
+                                 device):
+    """
+    vLLM provides `StatelessProcessGroup` to create a process group
+    without considering the global process group in torch.distributed.
+    It is recommended to create `StatelessProcessGroup`, and then initialize
+    the data-plane communication (NCCL) between external (train processes) 
+    and vLLM workers.
+    """
+    from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator
+    from vllm.distributed.utils import StatelessProcessGroup
+    pg = StatelessProcessGroup.create(host=master_address,
+                                      port=master_port,
+                                      rank=rank,
+                                      world_size=world_size)
+    pynccl = PyNcclCommunicator(pg, device=device)
+    return pynccl
+
+
+class MyWorker(Worker):
+    """
+    The `MyWorker` class inherits from `Worker` to provide custom functions.
+    For simplicity, we define the `MyWorker` class in this self-contained 
+    script. Normally, we should define the `MyWorker` class in a separate 
+    file and pass the qualified name of the class to the `worker_cls` 
+    parameter.
+    """
+
+    def init_weight_update_group(self, master_address, master_port,
+                                 rank_offset, world_size):
+        from vllm.distributed.parallel_state import get_world_group
+        rank = get_world_group().rank + rank_offset
+        self.model_update_group = stateless_init_process_group(
+            master_address,
+            master_port,
+            rank,
+            world_size,
+            self.device,
+        )
+
+    def update_weight(self, name, dtype, shape):
+        weight = torch.empty(shape, dtype=dtype, device="cuda")
+        self.model_update_group.broadcast(weight,
+                                          src=0,
+                                          stream=torch.cuda.current_stream())
+
+        self.model_runner.model.load_weights(weights=[(name, weight)])
+
+        del weight
+
+    def check_weights_changed(self):
+        """
+        Check if the weights are updated to 0.
+        """
+        weights_updated = True
+        for name, p in self.model_runner.model.named_parameters():
+            weights_updated = weights_updated and torch.allclose(
+                p, torch.zeros_like(p))
+        return weights_updated
+
+
+class MyLLM(LLM):
+
+    def __init__(self, *args, **kwargs):
+        # a hack to make the script work.
+        # stop ray from manipulating CUDA_VISIBLE_DEVICES
+        # at the top-level
+        del os.environ["CUDA_VISIBLE_DEVICES"]
+        super().__init__(*args, **kwargs)
+
+
+"""
+Start the training process, here we use huggingface transformers 
+as an example to hold a model on GPU 0.
+"""
+
+train_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m")
+train_model.to("cuda:0")
+"""
+Start the inference process, here we use vLLM to hold a model on GPU 1 and 
+GPU 2. For the details on how to use ray, please refer to the ray 
+documentation https://docs.ray.io/en/latest/ .
+"""
+os.environ["CUDA_VISIBLE_DEVICES"] = "1,2"
+ray.init()
+
+pg_inference = placement_group([{"GPU": 1, "CPU": 0}] * 2)
+ray.get(pg_inference.ready())
+scheduling_inference = PlacementGroupSchedulingStrategy(
+    placement_group=pg_inference,
+    placement_group_capture_child_tasks=True,
+    placement_group_bundle_index=0,
+)
+"""
+launch the vLLM inference engine.
+here we use `enforce_eager` to reduce the start time.
+"""
+llm = ray.remote(
+    num_cpus=0,
+    num_gpus=0,
+    scheduling_strategy=scheduling_inference,
+)(MyLLM).remote(
+    model="facebook/opt-125m",
+    enforce_eager=True,
+    worker_cls=MyWorker,
+    tensor_parallel_size=2,
+    distributed_executor_backend="ray",
+)
+
+# Generate texts from the prompts.
+prompts = [
+    "Hello, my name is",
+    "The president of the United States is",
+    "The capital of France is",
+    "The future of AI is",
+]
+
+sampling_params = SamplingParams(temperature=0)
+
+outputs = ray.get(llm.generate.remote(prompts, sampling_params))
+
+for output in outputs:
+    prompt = output.prompt
+    generated_text = output.outputs[0].text
+    print(f"Prompt: {prompt!r}, "
+          f"Generated text: {generated_text!r}")
+
+# set up the communication between the training process
+# and the inference engine.
+master_address = get_ip()
+master_port = get_open_port()
+
+handle = llm.collective_rpc.remote("init_weight_update_group",
+                                   args=(master_address, master_port, 1, 3))
+model_update_group = stateless_init_process_group(master_address, master_port,
+                                                  0, 3, torch.device("cuda:0"))
+ray.get(handle)
+
+# simulate training, modify the weights of the model.
+for name, p in train_model.named_parameters():
+    p.data.zero_()
+
+# sync weight from the training process to the inference engine.
+for name, p in train_model.named_parameters():
+    handle = llm.collective_rpc.remote("update_weight",
+                                       args=(name, p.dtype, p.shape))
+    model_update_group.broadcast(p, src=0, stream=torch.cuda.current_stream())
+    ray.get(handle)
+
+# check if the weights are updated.
+assert all(ray.get(llm.collective_rpc.remote("check_weights_changed")))
+
+# use the updated model to generate texts, they will be nonsense
+# because the weights are all zeros.
+outputs_updated = ray.get(llm.generate.remote(prompts, sampling_params))
+for output in outputs_updated:
+    prompt = output.prompt
+    generated_text = output.outputs[0].text
+    print(f"Prompt: {prompt!r}, "
+          f"Generated text: {generated_text!r}")
--- a/examples/save_sharded_state.py
+++ b/examples/save_sharded_state.py
--- a/examples/offline_inference_scoring.py
+++ b/examples/offline_inference_scoring.py
--- a/examples/offline_inference_with_profiler.py
+++ b/examples/offline_inference_with_profiler.py
--- a/examples/offline_inference_structured_outputs.py
+++ b/examples/offline_inference_structured_outputs.py
--- a/examples/offline_inference/torchrun_example.py
+++ b/examples/offline_inference/torchrun_example.py
+"""
+experimental support for tensor-parallel inference with torchrun,
+see https://github.com/vllm-project/vllm/issues/11400 for
+the motivation and use case for this example.
+run the script with `torchrun --nproc-per-node=2 torchrun_example.py`,
+the argument 2 should match the `tensor_parallel_size` below.
+see `tests/distributed/test_torchrun_example.py` for the unit test.
+"""
+
+from vllm import LLM, SamplingParams
+
+# Create prompts, the same across all ranks
+prompts = [
+    "Hello, my name is",
+    "The president of the United States is",
+    "The capital of France is",
+    "The future of AI is",
+]
+
+# Create sampling parameters, the same across all ranks
+sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+
+# Use `distributed_executor_backend="external_launcher"` so that
+# this llm engine/instance only creates one worker.
+llm = LLM(
+    model="facebook/opt-125m",
+    tensor_parallel_size=2,
+    distributed_executor_backend="external_launcher",
+)
+
+outputs = llm.generate(prompts, sampling_params)
+
+# all ranks will have the same outputs
+for output in outputs:
+    prompt = output.prompt
+    generated_text = output.outputs[0].text
+    print(f"Prompt: {prompt!r}, "
+          f"Generated text: {generated_text!r}")
+"""
+Further tips:
+
+1. to communicate control messages across all ranks, use the cpu group,
+a PyTorch ProcessGroup with GLOO backend.
+
+```python
+from vllm.distributed.parallel_state import get_world_group
+cpu_group = get_world_group().cpu_group
+torch_rank = dist.get_rank(group=cpu_group)
+if torch_rank == 0:
+    # do something for rank 0, e.g. saving the results to disk.
+```
+
+2. to communicate data across all ranks, use the model's device group,
+a PyTorch ProcessGroup with NCCL backend.
+```python
+from vllm.distributed.parallel_state import get_world_group
+device_group = get_world_group().device_group
+```
+
+3. to access the model directly in every rank, use the following code:
+```python
+llm.llm_engine.model_executor.driver_worker.worker.model_runner.model
+```
+"""
--- a/examples/offline_inference_tpu.py
+++ b/examples/offline_inference_tpu.py
--- a/examples/offline_inference_vision_language.py
+++ b/examples/offline_inference_vision_language.py
@@ -24,13 +24,14 @@ def run_aria(question: str, modality: str):
    assert modality == "image"
    model_name = "rhymes-ai/Aria"

+    # NOTE: Need L40 (or equivalent) to avoid OOM
    llm = LLM(model=model_name,
-              tokenizer_mode="slow",
-              trust_remote_code=True,
+              max_model_len=4096,
+              max_num_seqs=2,
              dtype="bfloat16",
              disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache)

-    prompt = (f"<|im_start|>user\n<fim_prefix><|img|><fim_suffix>\n{question}"
+    prompt = (f"<|im_start|>user\n<fim_prefix><|img|><fim_suffix>{question}"
              "<|im_end|>\n<|im_start|>assistant\n")

    stop_token_ids = [93532, 93653, 944, 93421, 1019, 93653, 93519]
@@ -57,11 +58,29 @@ def run_chameleon(question: str, modality: str):
    prompt = f"{question}<image>"
    llm = LLM(model="facebook/chameleon-7b",
              max_model_len=4096,
+              max_num_seqs=2,
              disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache)
    stop_token_ids = None
    return llm, prompt, stop_token_ids


+# Deepseek-VL2
+def run_deepseek_vl2(question: str, modality: str):
+    assert modality == "image"
+
+    model_name = "deepseek-ai/deepseek-vl2-tiny"
+
+    llm = LLM(model=model_name,
+              max_model_len=4096,
+              max_num_seqs=2,
+              disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
+              hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]})
+
+    prompt = f"<|User|>: <image>\n{question}\n\n<|Assistant|>:"
+    stop_token_ids = None
+    return llm, prompt, stop_token_ids
+
+
 # Fuyu
 def run_fuyu(question: str, modality: str):
    assert modality == "image"
@@ -246,8 +265,9 @@ def run_mantis(question: str, modality: str):


 # MiniCPM-V
-def run_minicpmv(question: str, modality: str):
-    assert modality == "image"
+def run_minicpmv_base(question: str, modality: str, model_name):
+    assert modality in ["image", "video"]
+    # If you want to use `MiniCPM-o-2_6` with audio inputs, check `audio_language.py` # noqa

    # 2.0
    # The official repo doesn't work yet, so we need to use a fork for now
@@ -257,8 +277,16 @@ def run_minicpmv(question: str, modality: str):
    # 2.5
    # model_name = "openbmb/MiniCPM-Llama3-V-2_5"

-    #2.6
-    model_name = "openbmb/MiniCPM-V-2_6"
+    # 2.6
+    # model_name = "openbmb/MiniCPM-V-2_6"
+    # o2.6
+
+    # modality supports
+    # 2.0: image
+    # 2.5: image
+    # 2.6: image, video
+    # o2.6: image, video, audio
+    # model_name = "openbmb/MiniCPM-o-2_6"
    tokenizer = AutoTokenizer.from_pretrained(model_name,
                                              trust_remote_code=True)
    llm = LLM(
@@ -275,13 +303,18 @@ def run_minicpmv(question: str, modality: str):
    # 2.5
    # stop_token_ids = [tokenizer.eos_id, tokenizer.eot_id]

-    # 2.6
+    # 2.6 / o2.6
    stop_tokens = ['<|im_end|>', '<|endoftext|>']
    stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]

+    modality_placeholder = {
+        "image": "(<image>./</image>)",
+        "video": "(<video>./</video>)",
+    }
+
    messages = [{
        'role': 'user',
-        'content': f'(<image>./</image>)\n{question}'
+        'content': f'{modality_placeholder[modality]}\n{question}'
    }]
    prompt = tokenizer.apply_chat_template(messages,
                                           tokenize=False,
@@ -289,6 +322,14 @@ def run_minicpmv(question: str, modality: str):
    return llm, prompt, stop_token_ids


+def run_minicpmo(question: str, modality: str):
+    return run_minicpmv_base(question, modality, "openbmb/MiniCPM-o-2_6")
+
+
+def run_minicpmv(question: str, modality: str):
+    return run_minicpmv_base(question, modality, "openbmb/MiniCPM-V-2_6")
+
+
 # LLama 3.2
 def run_mllama(question: str, modality: str):
    assert modality == "image"
@@ -304,11 +345,23 @@ def run_mllama(question: str, modality: str):
        model=model_name,
        max_model_len=4096,
        max_num_seqs=16,
-        enforce_eager=True,
        disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
    )

-    prompt = f"<|image|><|begin_of_text|>{question}"
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    messages = [{
+        "role":
+        "user",
+        "content": [{
+            "type": "image"
+        }, {
+            "type": "text",
+            "text": f"{question}"
+        }]
+    }]
+    prompt = tokenizer.apply_chat_template(messages,
+                                           add_generation_prompt=True,
+                                           tokenize=False)
    stop_token_ids = None
    return llm, prompt, stop_token_ids

@@ -417,9 +470,11 @@ def run_pixtral_hf(question: str, modality: str):

    model_name = "mistral-community/pixtral-12b"

+    # NOTE: Need L40 (or equivalent) to avoid OOM
    llm = LLM(
        model=model_name,
        max_model_len=8192,
+        max_num_seqs=2,
        disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
    )

@@ -494,6 +549,7 @@ model_example_map = {
    "aria": run_aria,
    "blip-2": run_blip2,
    "chameleon": run_chameleon,
+    "deepseek_vl_v2": run_deepseek_vl2,
    "fuyu": run_fuyu,
    "glm4v": run_glm4v,
    "h2ovl_chat": run_h2ovl,
@@ -504,6 +560,7 @@ model_example_map = {
    "llava-next-video": run_llava_next_video,
    "llava-onevision": run_llava_onevision,
    "mantis": run_mantis,
+    "minicpmo": run_minicpmo,
    "minicpmv": run_minicpmv,
    "mllama": run_mllama,
    "molmo": run_molmo,

--- a/examples/offline_inference_vision_language_embedding.py
+++ b/examples/offline_inference_vision_language_embedding.py
--- a/examples/offline_inference_vision_language_multi_image.py
+++ b/examples/offline_inference_vision_language_multi_image.py
@@ -23,7 +23,7 @@ IMAGE_URLS = [
 class ModelRequestData(NamedTuple):
    llm: LLM
    prompt: str
-    stop_token_ids: Optional[List[str]]
+    stop_token_ids: Optional[List[int]]
    image_data: List[Image]
    chat_template: Optional[str]

@@ -44,12 +44,36 @@ def load_aria(question, image_urls: List[str]) -> ModelRequestData:
    prompt = (f"<|im_start|>user\n{placeholders}{question}<|im_end|>\n"
              "<|im_start|>assistant\n")
    stop_token_ids = [93532, 93653, 944, 93421, 1019, 93653, 93519]
+
    return ModelRequestData(
        llm=llm,
        prompt=prompt,
        stop_token_ids=stop_token_ids,
        image_data=[fetch_image(url) for url in image_urls],
-        chat_template=None)
+        chat_template=None,
+    )
+
+
+def load_deepseek_vl2(question: str, image_urls: List[str]):
+    model_name = "deepseek-ai/deepseek-vl2-tiny"
+
+    llm = LLM(model=model_name,
+              max_model_len=4096,
+              max_num_seqs=2,
+              hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]},
+              limit_mm_per_prompt={"image": len(image_urls)})
+
+    placeholder = "".join(f"image_{i}:<image>\n"
+                          for i, _ in enumerate(image_urls, start=1))
+    prompt = f"<|User|>: {placeholder}{question}\n\n<|Assistant|>:"
+
+    return ModelRequestData(
+        llm=llm,
+        prompt=prompt,
+        stop_token_ids=None,
+        image_data=[fetch_image(url) for url in image_urls],
+        chat_template=None,
+    )


 def load_h2onvl(question: str, image_urls: List[str]) -> ModelRequestData:
@@ -162,11 +186,11 @@ def load_mllama(question, image_urls: List[str]) -> ModelRequestData:
        model=model_name,
        max_model_len=4096,
        max_num_seqs=16,
-        enforce_eager=True,
        limit_mm_per_prompt={"image": len(image_urls)},
    )

-    prompt = f"<|image|><|image|><|begin_of_text|>{question}"
+    placeholders = "<|image|>" * len(image_urls)
+    prompt = f"{placeholders}<|begin_of_text|>{question}"
    return ModelRequestData(
        llm=llm,
        prompt=prompt,
@@ -209,6 +233,31 @@ def load_nvlm_d(question: str, image_urls: List[str]):
    )


+def load_pixtral_hf(question: str, image_urls: List[str]) -> ModelRequestData:
+    model_name = "mistral-community/pixtral-12b"
+
+    # Adjust this as necessary to fit in GPU
+    llm = LLM(
+        model=model_name,
+        max_model_len=8192,
+        max_num_seqs=2,
+        tensor_parallel_size=2,
+        limit_mm_per_prompt={"image": len(image_urls)},
+    )
+
+    placeholders = "[IMG]" * len(image_urls)
+    prompt = f"<s>[INST]{question}\n{placeholders}[/INST]"
+    stop_token_ids = None
+
+    return ModelRequestData(
+        llm=llm,
+        prompt=prompt,
+        stop_token_ids=stop_token_ids,
+        image_data=[fetch_image(url) for url in image_urls],
+        chat_template=None,
+    )
+
+
 def load_phi3v(question: str, image_urls: List[str]) -> ModelRequestData:
    # num_crops is an override kwarg to the multimodal image processor;
    # For some models, e.g., Phi-3.5-vision-instruct, it is recommended
@@ -244,7 +293,8 @@ def load_phi3v(question: str, image_urls: List[str]) -> ModelRequestData:
    )


-def load_qwenvl_chat(question: str, image_urls: List[str]) -> ModelRequestData:
+def load_qwen_vl_chat(question: str,
+                      image_urls: List[str]) -> ModelRequestData:
    model_name = "Qwen/Qwen-VL-Chat"
    llm = LLM(
        model=model_name,
@@ -274,6 +324,7 @@ def load_qwenvl_chat(question: str, image_urls: List[str]) -> ModelRequestData:

    stop_tokens = ["<|endoftext|>", "<|im_start|>", "<|im_end|>"]
    stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]
+
    return ModelRequestData(
        llm=llm,
        prompt=prompt,
@@ -342,13 +393,15 @@ def load_qwen2_vl(question, image_urls: List[str]) -> ModelRequestData:

 model_example_map = {
    "aria": load_aria,
+    "deepseek_vl_v2": load_deepseek_vl2,
    "h2ovl_chat": load_h2onvl,
    "idefics3": load_idefics3,
    "internvl_chat": load_internvl,
    "mllama": load_mllama,
    "NVLM_D": load_nvlm_d,
    "phi3_v": load_phi3v,
-    "qwen_vl_chat": load_qwenvl_chat,
+    "pixtral_hf": load_pixtral_hf,
+    "qwen_vl_chat": load_qwen_vl_chat,
    "qwen2_vl": load_qwen2_vl,
 }


--- a/examples/offline_inference/whisper.py
+++ b/examples/offline_inference/whisper.py
+import time
+
+from vllm import LLM, SamplingParams
+from vllm.assets.audio import AudioAsset
+
+# Create a Whisper encoder/decoder model instance
+llm = LLM(
+    model="openai/whisper-large-v3",
+    max_model_len=448,
+    max_num_seqs=400,
+    limit_mm_per_prompt={"audio": 1},
+    kv_cache_dtype="fp8",
+)
+
+prompts = [
+    {
+        "prompt": "<|startoftranscript|>",
+        "multi_modal_data": {
+            "audio": AudioAsset("mary_had_lamb").audio_and_sample_rate,
+        },
+    },
+    {  # Test explicit encoder/decoder prompt
+        "encoder_prompt": {
+            "prompt": "",
+            "multi_modal_data": {
+                "audio": AudioAsset("winning_call").audio_and_sample_rate,
+            },
+        },
+        "decoder_prompt": "<|startoftranscript|>",
+    }
+] * 1024
+
+# Create a sampling params object.
+sampling_params = SamplingParams(
+    temperature=0,
+    top_p=1.0,
+    max_tokens=200,
+)
+
+start = time.time()
+
+# Generate output tokens from the prompts. The output is a list of
+# RequestOutput objects that contain the prompt, generated
+# text, and other information.
+outputs = llm.generate(prompts, sampling_params)
+
+# Print the outputs.
+for output in outputs:
+    prompt = output.prompt
+    encoder_prompt = output.encoder_prompt
+    generated_text = output.outputs[0].text
+    print(f"Encoder prompt: {encoder_prompt!r}, "
+          f"Decoder prompt: {prompt!r}, "
+          f"Generated text: {generated_text!r}")
+
+duration = time.time() - start
+
+print("Duration:", duration)
+print("RPS:", len(prompts) / duration)
--- a/examples/api_client.py
+++ b/examples/api_client.py
--- a/examples/chart-helm/.helmignore
+++ b/examples/chart-helm/.helmignore
--- a/examples/chart-helm/Chart.yaml
+++ b/examples/chart-helm/Chart.yaml
--- a/examples/online_serving/chart-helm/README.md
+++ b/examples/online_serving/chart-helm/README.md
+# Helm Charts
+
+This directory contains a Helm chart for deploying the vllm application. The chart includes configurations for deployment, autoscaling, resource management, and more.
+
+## Files
+
+- Chart.yaml: Defines the chart metadata including name, version, and maintainers.
+- ct.yaml: Configuration for chart testing.
+- lintconf.yaml: Linting rules for YAML files.
+- values.schema.json: JSON schema for validating values.yaml.
+- values.yaml: Default values for the Helm chart.
+- templates/_helpers.tpl: Helper templates for defining common configurations.
+- templates/configmap.yaml: Template for creating ConfigMaps.
+- templates/custom-objects.yaml: Template for custom Kubernetes objects.
+- templates/deployment.yaml: Template for creating Deployments.
+- templates/hpa.yaml: Template for Horizontal Pod Autoscaler.
+- templates/job.yaml: Template for Kubernetes Jobs.
+- templates/poddisruptionbudget.yaml: Template for Pod Disruption Budget.
+- templates/pvc.yaml: Template for Persistent Volume Claims.
+- templates/secrets.yaml: Template for Kubernetes Secrets.
+- templates/service.yaml: Template for creating Services.
\ No newline at end of file
--- a/examples/chart-helm/ct.yaml
+++ b/examples/chart-helm/ct.yaml
--- a/examples/chart-helm/lintconf.yaml
+++ b/examples/chart-helm/lintconf.yaml
--- a/examples/chart-helm/templates/_helpers.tpl
+++ b/examples/chart-helm/templates/_helpers.tpl
--- a/examples/chart-helm/templates/configmap.yaml
+++ b/examples/chart-helm/templates/configmap.yaml
--- a/examples/chart-helm/templates/custom-objects.yaml
+++ b/examples/chart-helm/templates/custom-objects.yaml