v1.0

4d4d8f59 · chenzk · 4d4d8f59 · 4d4d8f59 · 4d4d8f59 · 4d4d8f59
Commit 4d4d8f59 authored Jun 04, 2025 by chenzk
20 changed files
--- a/distilabel/src/distilabel/steps/generators/utils.py
+++ b/distilabel/src/distilabel/steps/generators/utils.py
+# Copyright 2023-present, Argilla, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING, Dict, List, Optional, Union
+import pandas as pd
+from datasets import Dataset
+from distilabel.errors import DistilabelUserError
+from distilabel.steps.base import StepResources
+if TYPE_CHECKING:
+    from distilabel.pipeline.base import BasePipeline
+    from distilabel.steps import GeneratorStep
+def make_generator_step(
+    dataset: Union[Dataset, pd.DataFrame, List[Dict[str, str]]],
+    pipeline: Union["BasePipeline", None] = None,
+    batch_size: int = 50,
+    input_mappings: Optional[Dict[str, str]] = None,
+    output_mappings: Optional[Dict[str, str]] = None,
+    resources: StepResources = StepResources(),
+    repo_id: Optional[str] = "default_name",
+) -> "GeneratorStep":
+    """Helper method to create a `GeneratorStep` from a dataset, to simplify
+    Args:
+        dataset: The dataset to use in the `Pipeline`.
+        batch_size: The batch_size, will default to the same used by the `GeneratorStep`s.
+            Defaults to `50`.
+        input_mappings: Applies the same as any other step. Defaults to `None`.
+        output_mappings: Applies the same as any other step. Defaults to `None`.
+        resources: Applies the same as any other step. Defaults to `StepResources()`.
+        repo_id: The repository ID to use in the `LoadDataFromHub` step.
+            This shouldn't be necessary, but in case of error, the dataset will try to be loaded
+            using `load_dataset` internally. If that case happens, the `repo_id` will be used.
+    Raises:
+        ValueError: If the format is different from the ones supported.
+    Returns:
+        A `LoadDataFromDicts` if the input is a list of dicts, or `LoadDataFromHub` instance
+        if the input is a `pd.DataFrame` or a `Dataset`.
+    """
+    from distilabel.steps import LoadDataFromDicts, LoadDataFromHub
+    if isinstance(dataset, list):
+        return LoadDataFromDicts(
+            pipeline=pipeline,
+            data=dataset,
+            batch_size=batch_size,
+            input_mappings=input_mappings or {},
+            output_mappings=output_mappings or {},
+            resources=resources,
+        )
+    if isinstance(dataset, pd.DataFrame):
+        dataset = Dataset.from_pandas(dataset, preserve_index=False)
+    if not isinstance(dataset, Dataset):
+        raise DistilabelUserError(
+            f"Dataset type not allowed: {type(dataset)}, must be one of: "
+            "`datasets.Dataset`, `pd.DataFrame`, `List[Dict[str, str]]`",
+            page="sections/how_to_guides/basic/pipeline/?h=make_#__tabbed_1_2",
+        )
+    loader = LoadDataFromHub(
+        pipeline=pipeline,
+        repo_id=repo_id,
+        batch_size=batch_size,
+        input_mappings=input_mappings or {},
+        output_mappings=output_mappings or {},
+        resources=resources,
+    )
+    super(loader.__class__, loader).load()  # Ensure the logger is loaded
+    loader._dataset = dataset
+    loader.num_examples = len(dataset)
+    loader._dataset_info = {"default": dataset.info}
+    return loader
--- a/distilabel/src/distilabel/steps/globals/__init__.py
+++ b/distilabel/src/distilabel/steps/globals/__init__.py
+# Copyright 2023-present, Argilla, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/distilabel/src/distilabel/steps/globals/__pycache__/__init__.cpython-310.pyc
+++ b/distilabel/src/distilabel/steps/globals/__pycache__/__init__.cpython-310.pyc
--- a/distilabel/src/distilabel/steps/globals/__pycache__/huggingface.cpython-310.pyc
+++ b/distilabel/src/distilabel/steps/globals/__pycache__/huggingface.cpython-310.pyc
--- a/distilabel/src/distilabel/steps/globals/huggingface.py
+++ b/distilabel/src/distilabel/steps/globals/huggingface.py
+# Copyright 2023-present, Argilla, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from collections import defaultdict
+from typing import TYPE_CHECKING, Optional
+from datasets import Dataset
+from pydantic import Field
+from distilabel.mixins.runtime_parameters import RuntimeParameter
+from distilabel.steps.base import GlobalStep, StepInput
+if TYPE_CHECKING:
+    from distilabel.typing import StepOutput
+class PushToHub(GlobalStep):
+    """Push data to a Hugging Face Hub dataset.
+    A `GlobalStep` which creates a `datasets.Dataset` with the input data and pushes
+    it to the Hugging Face Hub.
+    Attributes:
+        repo_id: The Hugging Face Hub repository ID where the dataset will be uploaded.
+        split: The split of the dataset that will be pushed. Defaults to `"train"`.
+        private: Whether the dataset to be pushed should be private or not. Defaults to
+            `False`.
+        token: The token that will be used to authenticate in the Hub. If not provided, the
+            token will be tried to be obtained from the environment variable `HF_TOKEN`.
+            If not provided using one of the previous methods, then `huggingface_hub` library
+            will try to use the token from the local Hugging Face CLI configuration. Defaults
+            to `None`.
+    Runtime parameters:
+        - `repo_id`: The Hugging Face Hub repository ID where the dataset will be uploaded.
+        - `split`: The split of the dataset that will be pushed.
+        - `private`: Whether the dataset to be pushed should be private or not.
+        - `token`: The token that will be used to authenticate in the Hub.
+    Input columns:
+        - dynamic (`all`): all columns from the input will be used to create the dataset.
+    Categories:
+        - save
+        - dataset
+        - huggingface
+    Examples:
+        Push batches of your dataset to the Hugging Face Hub repository:
+        ```python
+        from distilabel.steps import PushToHub
+        push = PushToHub(repo_id="path_to/repo")
+        push.load()
+        result = next(
+            push.process(
+                [
+                    {
+                        "instruction": "instruction ",
+                        "generation": "generation"
+                    }
+                ],
+            )
+        )
+        # >>> result
+        # [{'instruction': 'instruction ', 'generation': 'generation'}]
+        ```
+    """
+    repo_id: RuntimeParameter[str] = Field(
+        default=None,
+        description="The Hugging Face Hub repository ID where the dataset will be uploaded.",
+    )
+    split: RuntimeParameter[str] = Field(
+        default="train",
+        description="The split of the dataset that will be pushed. Defaults to 'train'.",
+    )
+    private: RuntimeParameter[bool] = Field(
+        default=False,
+        description="Whether the dataset to be pushed should be private or not. Defaults"
+        " to `False`.",
+    )
+    token: Optional[RuntimeParameter[str]] = Field(
+        default=None,
+        description="The token that will be used to authenticate in the Hub. If not provided,"
+        " the token will be tried to be obtained from the environment variable `HF_TOKEN`."
+        " If not provided using one of the previous methods, then `huggingface_hub` library"
+        " will try to use the token from the local Hugging Face CLI configuration. Defaults"
+        " to `None`",
+    )
+    def process(self, inputs: StepInput) -> "StepOutput":  # type: ignore
+        """Method that processes the input data, respecting the `datasets.Dataset` formatting,
+        and pushes it to the Hugging Face Hub based on the `RuntimeParameter`s attributes.
+        Args:
+            inputs: that input data within a single object (as it's a GlobalStep) that
+                will be transformed into a `datasets.Dataset`.
+        Yields:
+            Propagates the received inputs so that the `Distiset` can be generated if this is
+            the last step of the `Pipeline`, or if this is not a leaf step and has follow up
+            steps.
+        """
+        dataset_dict = defaultdict(list)
+        for input in inputs:
+            for key, value in input.items():
+                dataset_dict[key].append(value)
+        dataset_dict = dict(dataset_dict)
+        dataset = Dataset.from_dict(dataset_dict)
+        dataset.push_to_hub(
+            self.repo_id,  # type: ignore
+            split=self.split,
+            private=self.private,
+            token=self.token or os.getenv("HF_TOKEN"),
+        )
+        yield inputs
--- a/distilabel/src/distilabel/steps/reward_model.py
+++ b/distilabel/src/distilabel/steps/reward_model.py
+# Copyright 2023-present, Argilla, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from typing import TYPE_CHECKING, Any, Dict, List, Union
+from pydantic import Field, PrivateAttr, SecretStr
+from distilabel.models.mixins.cuda_device_placement import CudaDevicePlacementMixin
+from distilabel.steps.base import Step, StepInput
+from distilabel.utils.huggingface import HF_TOKEN_ENV_VAR
+if TYPE_CHECKING:
+    import torch
+    from transformers import PreTrainedModel, PreTrainedTokenizer
+    from distilabel.typing import ChatType, StepColumns, StepOutput
+class RewardModelScore(Step, CudaDevicePlacementMixin):
+    """Assign a score to a response using a Reward Model.
+    `RewardModelScore` is a `Step` that using a Reward Model (RM) loaded using `transformers`,
+    assigns an score to a response generated for an instruction, or a score to a multi-turn
+    conversation.
+    Attributes:
+        model: the model Hugging Face Hub repo id or a path to a directory containing the
+            model weights and configuration files.
+        revision: if `model` refers to a Hugging Face Hub repository, then the revision
+            (e.g. a branch name or a commit id) to use. Defaults to `"main"`.
+        torch_dtype: the torch dtype to use for the model e.g. "float16", "float32", etc.
+            Defaults to `"auto"`.
+        trust_remote_code: whether to allow fetching and executing remote code fetched
+            from the repository in the Hub. Defaults to `False`.
+        device_map: a dictionary mapping each layer of the model to a device, or a mode like `"sequential"` or `"auto"`. Defaults to `None`.
+        token: the Hugging Face Hub token that will be used to authenticate to the Hugging
+            Face Hub. If not provided, the `HF_TOKEN` environment or `huggingface_hub` package
+            local configuration will be used. Defaults to `None`.
+        truncation: whether to truncate sequences at the maximum length. Defaults to `False`.
+        max_length: maximun length to use for padding or truncation. Defaults to `None`.
+    Input columns:
+        - instruction (`str`, optional): the instruction used to generate a `response`.
+            If provided, then `response` must be provided too.
+        - response (`str`, optional): the response generated for `instruction`. If provided,
+            then `instruction` must be provide too.
+        - conversation (`ChatType`, optional): a multi-turn conversation. If not provided,
+            then `instruction` and `response` columns must be provided.
+    Output columns:
+        - score (`float`): the score given by the reward model for the instruction-response
+            pair or the conversation.
+    Categories:
+        - scorer
+    Examples:
+        Assigning an score for an instruction-response pair:
+        ```python
+        from distilabel.steps import RewardModelScore
+        step = RewardModelScore(
+            model="RLHFlow/ArmoRM-Llama3-8B-v0.1", device_map="auto", trust_remote_code=True
+        )
+        step.load()
+        result = next(
+            step.process(
+                inputs=[
+                    {
+                        "instruction": "How much is 2+2?",
+                        "response": "The output of 2+2 is 4",
+                    },
+                    {"instruction": "How much is 2+2?", "response": "4"},
+                ]
+            )
+        )
+        # [
+        #   {'instruction': 'How much is 2+2?', 'response': 'The output of 2+2 is 4', 'score': 0.11690367758274078},
+        #   {'instruction': 'How much is 2+2?', 'response': '4', 'score': 0.10300665348768234}
+        # ]
+        ```
+        Assigning an score for a multi-turn conversation:
+        ```python
+        from distilabel.steps import RewardModelScore
+        step = RewardModelScore(
+            model="RLHFlow/ArmoRM-Llama3-8B-v0.1", device_map="auto", trust_remote_code=True
+        )
+        step.load()
+        result = next(
+            step.process(
+                inputs=[
+                    {
+                        "conversation": [
+                            {"role": "user", "content": "How much is 2+2?"},
+                            {"role": "assistant", "content": "The output of 2+2 is 4"},
+                        ],
+                    },
+                    {
+                        "conversation": [
+                            {"role": "user", "content": "How much is 2+2?"},
+                            {"role": "assistant", "content": "4"},
+                        ],
+                    },
+                ]
+            )
+        )
+        # [
+        #   {'conversation': [{'role': 'user', 'content': 'How much is 2+2?'}, {'role': 'assistant', 'content': 'The output of 2+2 is 4'}], 'score': 0.11690367758274078},
+        #   {'conversation': [{'role': 'user', 'content': 'How much is 2+2?'}, {'role': 'assistant', 'content': '4'}], 'score': 0.10300665348768234}
+        # ]
+        ```
+    """
+    model: str
+    revision: str = "main"
+    torch_dtype: str = "auto"
+    trust_remote_code: bool = False
+    device_map: Union[str, Dict[str, Any], None] = None
+    token: Union[SecretStr, None] = Field(
+        default_factory=lambda: os.getenv(HF_TOKEN_ENV_VAR), description=""
+    )
+    truncation: bool = False
+    max_length: Union[int, None] = None
+    _model: Union["PreTrainedModel", None] = PrivateAttr(None)
+    _tokenizer: Union["PreTrainedTokenizer", None] = PrivateAttr(None)
+    def load(self) -> None:
+        super().load()
+        if self.device_map in ["cuda", "auto"]:
+            CudaDevicePlacementMixin.load(self)
+        try:
+            from transformers import AutoModelForSequenceClassification, AutoTokenizer
+        except ImportError as e:
+            raise ImportError(
+                "`transformers` is not installed. Please install it using `pip install 'distilabel[hf-transformers]'`."
+            ) from e
+        token = self.token.get_secret_value() if self.token is not None else self.token
+        self._model = AutoModelForSequenceClassification.from_pretrained(
+            self.model,
+            revision=self.revision,
+            torch_dtype=self.torch_dtype,
+            trust_remote_code=self.trust_remote_code,
+            device_map=self.device_map,
+            token=token,
+        )
+        self._tokenizer = AutoTokenizer.from_pretrained(
+            self.model,
+            revision=self.revision,
+            torch_dtype=self.torch_dtype,
+            trust_remote_code=self.trust_remote_code,
+            token=token,
+        )
+    @property
+    def inputs(self) -> "StepColumns":
+        """Either `response` and `instruction`, or a `conversation` columns."""
+        return {
+            "response": False,
+            "instruction": False,
+            "conversation": False,
+        }
+    @property
+    def outputs(self) -> "StepColumns":
+        """The `score` given by the reward model."""
+        return ["score"]
+    def _prepare_conversation(self, input: Dict[str, Any]) -> "ChatType":
+        if "instruction" in input and "response" in input:
+            return [
+                {"role": "user", "content": input["instruction"]},
+                {"role": "assistant", "content": input["response"]},
+            ]
+        return input["conversation"]
+    def _prepare_inputs(self, inputs: List[Dict[str, Any]]) -> "torch.Tensor":
+        return self._tokenizer.apply_chat_template(  # type: ignore
+            [self._prepare_conversation(input) for input in inputs],  # type: ignore
+            return_tensors="pt",
+            padding=True,
+            truncation=self.truncation,
+            max_length=self.max_length,
+        ).to(self._model.device)  # type: ignore
+    def _inference(self, inputs: List[Dict[str, Any]]) -> List[float]:
+        import torch
+        input_ids = self._prepare_inputs(inputs)
+        with torch.no_grad():
+            output = self._model(input_ids)  # type: ignore
+            logits = output.logits
+            if logits.shape == (2, 1):
+                logits = logits.squeeze(-1)
+            return logits.tolist()
+    def process(self, inputs: StepInput) -> "StepOutput":  # type: ignore
+        scores = self._inference(inputs)
+        for input, score in zip(inputs, scores):
+            input["score"] = score
+        yield inputs
+    def unload(self) -> None:
+        if self.device_map in ["cuda", "auto"]:
+            CudaDevicePlacementMixin.unload(self)
+        super().unload()
--- a/distilabel/src/distilabel/steps/tasks/__init__.py
+++ b/distilabel/src/distilabel/steps/tasks/__init__.py
+# Copyright 2023-present, Argilla, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from distilabel.steps.tasks.apigen.execution_checker import APIGenExecutionChecker
+from distilabel.steps.tasks.apigen.generator import APIGenGenerator
+from distilabel.steps.tasks.apigen.semantic_checker import APIGenSemanticChecker
+from distilabel.steps.tasks.argilla_labeller import ArgillaLabeller
+from distilabel.steps.tasks.base import GeneratorTask, ImageTask, Task
+from distilabel.steps.tasks.clair import CLAIR
+from distilabel.steps.tasks.complexity_scorer import ComplexityScorer
+from distilabel.steps.tasks.decorator import task
+from distilabel.steps.tasks.evol_instruct.base import EvolInstruct
+from distilabel.steps.tasks.evol_instruct.evol_complexity.base import EvolComplexity
+from distilabel.steps.tasks.evol_instruct.evol_complexity.generator import (
+    EvolComplexityGenerator,
+)
+from distilabel.steps.tasks.evol_instruct.generator import EvolInstructGenerator
+from distilabel.steps.tasks.evol_quality.base import EvolQuality
+from distilabel.steps.tasks.generate_embeddings import GenerateEmbeddings
+from distilabel.steps.tasks.genstruct import Genstruct
+from distilabel.steps.tasks.image_generation import ImageGeneration
+from distilabel.steps.tasks.improving_text_embeddings import (
+    BitextRetrievalGenerator,
+    EmbeddingTaskGenerator,
+    GenerateLongTextMatchingData,
+    GenerateShortTextMatchingData,
+    GenerateTextClassificationData,
+    GenerateTextRetrievalData,
+    MonolingualTripletGenerator,
+)
+from distilabel.steps.tasks.instruction_backtranslation import (
+    InstructionBacktranslation,
+)
+from distilabel.steps.tasks.magpie.base import Magpie
+from distilabel.steps.tasks.magpie.generator import MagpieGenerator
+from distilabel.steps.tasks.math_shepherd.completer import MathShepherdCompleter
+from distilabel.steps.tasks.math_shepherd.generator import MathShepherdGenerator
+from distilabel.steps.tasks.math_shepherd.utils import FormatPRM
+from distilabel.steps.tasks.pair_rm import PairRM
+from distilabel.steps.tasks.prometheus_eval import PrometheusEval
+from distilabel.steps.tasks.quality_scorer import QualityScorer
+from distilabel.steps.tasks.self_instruct import SelfInstruct
+from distilabel.steps.tasks.sentence_transformers import GenerateSentencePair
+from distilabel.steps.tasks.structured_generation import StructuredGeneration
+from distilabel.steps.tasks.text_classification import TextClassification
+from distilabel.steps.tasks.text_generation import ChatGeneration, TextGeneration
+from distilabel.steps.tasks.text_generation_with_image import TextGenerationWithImage
+from distilabel.steps.tasks.ultrafeedback import UltraFeedback
+from distilabel.steps.tasks.urial import URIAL
+from distilabel.typing import ChatItem, ChatType
+__all__ = [
+    "CLAIR",
+    "URIAL",
+    "APIGenExecutionChecker",
+    "APIGenGenerator",
+    "APIGenSemanticChecker",
+    "ArgillaLabeller",
+    "ArgillaLabeller",
+    "BitextRetrievalGenerator",
+    "ChatGeneration",
+    "ChatItem",
+    "ChatType",
+    "ComplexityScorer",
+    "EmbeddingTaskGenerator",
+    "EvolComplexity",
+    "EvolComplexityGenerator",
+    "EvolInstruct",
+    "EvolInstructGenerator",
+    "EvolQuality",
+    "FormatPRM",
+    "GenerateEmbeddings",
+    "GenerateLongTextMatchingData",
+    "GenerateSentencePair",
+    "GenerateShortTextMatchingData",
+    "GenerateTextClassificationData",
+    "GenerateTextRetrievalData",
+    "GeneratorTask",
+    "Genstruct",
+    "ImageGeneration",
+    "ImageTask",
+    "InstructionBacktranslation",
+    "Magpie",
+    "MagpieGenerator",
+    "MathShepherdCompleter",
+    "MathShepherdGenerator",
+    "MonolingualTripletGenerator",
+    "MonolingualTripletGenerator",
+    "PairRM",
+    "PrometheusEval",
+    "QualityScorer",
+    "SelfInstruct",
+    "StructuredGeneration",
+    "Task",
+    "Task",
+    "TextClassification",
+    "TextGeneration",
+    "TextGenerationWithImage",
+    "UltraFeedback",
+    "task",
+]
--- a/distilabel/src/distilabel/steps/tasks/__pycache__/__init__.cpython-310.pyc
+++ b/distilabel/src/distilabel/steps/tasks/__pycache__/__init__.cpython-310.pyc
--- a/distilabel/src/distilabel/steps/tasks/__pycache__/argilla_labeller.cpython-310.pyc
+++ b/distilabel/src/distilabel/steps/tasks/__pycache__/argilla_labeller.cpython-310.pyc
--- a/distilabel/src/distilabel/steps/tasks/__pycache__/base.cpython-310.pyc
+++ b/distilabel/src/distilabel/steps/tasks/__pycache__/base.cpython-310.pyc
--- a/distilabel/src/distilabel/steps/tasks/__pycache__/clair.cpython-310.pyc
+++ b/distilabel/src/distilabel/steps/tasks/__pycache__/clair.cpython-310.pyc
--- a/distilabel/src/distilabel/steps/tasks/__pycache__/complexity_scorer.cpython-310.pyc
+++ b/distilabel/src/distilabel/steps/tasks/__pycache__/complexity_scorer.cpython-310.pyc
--- a/distilabel/src/distilabel/steps/tasks/__pycache__/decorator.cpython-310.pyc
+++ b/distilabel/src/distilabel/steps/tasks/__pycache__/decorator.cpython-310.pyc
--- a/distilabel/src/distilabel/steps/tasks/__pycache__/generate_embeddings.cpython-310.pyc
+++ b/distilabel/src/distilabel/steps/tasks/__pycache__/generate_embeddings.cpython-310.pyc
--- a/distilabel/src/distilabel/steps/tasks/__pycache__/genstruct.cpython-310.pyc
+++ b/distilabel/src/distilabel/steps/tasks/__pycache__/genstruct.cpython-310.pyc
--- a/distilabel/src/distilabel/steps/tasks/__pycache__/image_generation.cpython-310.pyc
+++ b/distilabel/src/distilabel/steps/tasks/__pycache__/image_generation.cpython-310.pyc
--- a/distilabel/src/distilabel/steps/tasks/__pycache__/improving_text_embeddings.cpython-310.pyc
+++ b/distilabel/src/distilabel/steps/tasks/__pycache__/improving_text_embeddings.cpython-310.pyc
--- a/distilabel/src/distilabel/steps/tasks/__pycache__/instruction_backtranslation.cpython-310.pyc
+++ b/distilabel/src/distilabel/steps/tasks/__pycache__/instruction_backtranslation.cpython-310.pyc
--- a/distilabel/src/distilabel/steps/tasks/__pycache__/pair_rm.cpython-310.pyc
+++ b/distilabel/src/distilabel/steps/tasks/__pycache__/pair_rm.cpython-310.pyc
--- a/distilabel/src/distilabel/steps/tasks/__pycache__/prometheus_eval.cpython-310.pyc
+++ b/distilabel/src/distilabel/steps/tasks/__pycache__/prometheus_eval.cpython-310.pyc