Commit 4d4d8f59 authored by chenzk's avatar chenzk
Browse files

v1.0

parents
Pipeline #2741 canceled with stages
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING
from distilabel.constants import DISTILABEL_METADATA_KEY
from distilabel.steps.base import Step, StepInput
from distilabel.steps.columns.utils import merge_distilabel_metadata
if TYPE_CHECKING:
from distilabel.typing import StepOutput
class CombineOutputs(Step):
"""Combine the outputs of several upstream steps.
`CombineOutputs` is a `Step` that takes the outputs of several upstream steps and combines
them to generate a new dictionary with all keys/columns of the upstream steps outputs.
Input columns:
- dynamic (based on the upstream `Step`s): All the columns of the upstream steps outputs.
Output columns:
- dynamic (based on the upstream `Step`s): All the columns of the upstream steps outputs.
Categories:
- columns
Examples:
Combine dictionaries of a dataset:
```python
from distilabel.steps import CombineOutputs
combine_outputs = CombineOutputs()
combine_outputs.load()
result = next(
combine_outputs.process(
[{"a": 1, "b": 2}, {"a": 3, "b": 4}],
[{"c": 5, "d": 6}, {"c": 7, "d": 8}],
)
)
# [
# {"a": 1, "b": 2, "c": 5, "d": 6},
# {"a": 3, "b": 4, "c": 7, "d": 8},
# ]
```
Combine upstream steps outputs in a pipeline:
```python
from distilabel.pipeline import Pipeline
from distilabel.steps import CombineOutputs
with Pipeline() as pipeline:
step_1 = ...
step_2 = ...
step_3 = ...
combine = CombineOutputs()
[step_1, step_2, step_3] >> combine
```
"""
def process(self, *inputs: StepInput) -> "StepOutput":
combined_outputs = []
for output_dicts in zip(*inputs):
combined_dict = {}
for output_dict in output_dicts:
combined_dict.update(
{
k: v
for k, v in output_dict.items()
if k != DISTILABEL_METADATA_KEY
}
)
if any(
DISTILABEL_METADATA_KEY in output_dict for output_dict in output_dicts
):
combined_dict[DISTILABEL_METADATA_KEY] = merge_distilabel_metadata(
*output_dicts
)
combined_outputs.append(combined_dict)
yield combined_outputs
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from itertools import zip_longest
from typing import TYPE_CHECKING, Any, Dict, List, Union
from pydantic import field_validator, model_validator
from typing_extensions import Self
from distilabel.steps.base import Step, StepInput
if TYPE_CHECKING:
from distilabel.typing import StepColumns, StepOutput
class ExpandColumns(Step):
"""Expand columns that contain lists into multiple rows.
`ExpandColumns` is a `Step` that takes a list of columns and expands them into multiple
rows. The new rows will have the same data as the original row, except for the expanded
column, which will contain a single item from the original list.
Attributes:
columns: A dictionary that maps the column to be expanded to the new column name
or a list of columns to be expanded. If a list is provided, the new column name
will be the same as the column name.
encoded: A bool to inform Whether the columns are JSON encoded lists. If this value is
set to True, the columns will be decoded before expanding. Alternatively, to specify
columns that can be encoded, a list can be provided. In this case, the column names
informed must be a subset of the columns selected for expansion.
split_statistics: A bool to inform whether the statistics in the `distilabel_metadata`
column should be split into multiple rows.
If we want to expand some columns containing a list of strings that come from
having parsed the output of an LLM, the tokens in the `statistics_{step_name}`
of the `distilabel_metadata` column should be splitted to avoid multiplying
them if we aggregate the data afterwards. For example, with a task that is supposed
to generate a list of N instructions, and we want each of those N instructions in
different rows, we should split the statistics by N.
In such a case, set this value to True.
Input columns:
- dynamic (determined by `columns` attribute): The columns to be expanded into
multiple rows.
Output columns:
- dynamic (determined by `columns` attribute): The expanded columns.
Categories:
- columns
Examples:
Expand the selected columns into multiple rows:
```python
from distilabel.steps import ExpandColumns
expand_columns = ExpandColumns(
columns=["generation"],
)
expand_columns.load()
result = next(
expand_columns.process(
[
{
"instruction": "instruction 1",
"generation": ["generation 1", "generation 2"]}
],
)
)
# >>> result
# [{'instruction': 'instruction 1', 'generation': 'generation 1'}, {'instruction': 'instruction 1', 'generation': 'generation 2'}]
```
Expand the selected columns which are JSON encoded into multiple rows:
```python
from distilabel.steps import ExpandColumns
expand_columns = ExpandColumns(
columns=["generation"],
encoded=True, # It can also be a list of columns that are encoded, i.e. ["generation"]
)
expand_columns.load()
result = next(
expand_columns.process(
[
{
"instruction": "instruction 1",
"generation": '["generation 1", "generation 2"]'}
],
)
)
# >>> result
# [{'instruction': 'instruction 1', 'generation': 'generation 1'}, {'instruction': 'instruction 1', 'generation': 'generation 2'}]
```
Expand the selected columns and split the statistics in the `distilabel_metadata` column:
```python
from distilabel.steps import ExpandColumns
expand_columns = ExpandColumns(
columns=["generation"],
split_statistics=True,
)
expand_columns.load()
result = next(
expand_columns.process(
[
{
"instruction": "instruction 1",
"generation": ["generation 1", "generation 2"],
"distilabel_metadata": {
"statistics_generation": {
"input_tokens": [12],
"output_tokens": [12],
},
},
}
],
)
)
# >>> result
# [{'instruction': 'instruction 1', 'generation': 'generation 1', 'distilabel_metadata': {'statistics_generation': {'input_tokens': [6], 'output_tokens': [6]}}}, {'instruction': 'instruction 1', 'generation': 'generation 2', 'distilabel_metadata': {'statistics_generation': {'input_tokens': [6], 'output_tokens': [6]}}}]
```
"""
columns: Union[Dict[str, str], List[str]]
encoded: Union[bool, List[str]] = False
split_statistics: bool = False
@field_validator("columns")
@classmethod
def always_dict(cls, value: Union[Dict[str, str], List[str]]) -> Dict[str, str]:
"""Ensure that the columns are always a dictionary.
Args:
value: The columns to be expanded.
Returns:
The columns to be expanded as a dictionary.
"""
if isinstance(value, list):
return {col: col for col in value}
return value
@model_validator(mode="after")
def is_subset(self) -> Self:
"""Ensure the "encoded" column names are a subset of the "columns" selected.
Returns:
The "encoded" attribute updated to work internally.
"""
if isinstance(self.encoded, list):
if not set(self.encoded).issubset(set(self.columns.keys())):
raise ValueError(
"The 'encoded' columns must be a subset of the 'columns' selected for expansion."
)
if isinstance(self.encoded, bool):
self.encoded = list(self.columns.keys()) if self.encoded else []
return self
@property
def inputs(self) -> "StepColumns":
"""The columns to be expanded."""
return list(self.columns.keys())
@property
def outputs(self) -> "StepColumns":
"""The expanded columns."""
return [
new_column if new_column else expand_column
for expand_column, new_column in self.columns.items()
]
def process(self, inputs: StepInput) -> "StepOutput": # type: ignore
"""Expand the columns in the input data.
Args:
inputs: The input data.
Yields:
The expanded rows.
"""
if self.encoded:
for input in inputs:
for column in self.encoded:
input[column] = json.loads(input[column])
yield [row for input in inputs for row in self._expand_columns(input)]
def _expand_columns(self, input: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Expand the columns in the input data.
Args:
input: The input data.
Returns:
The expanded rows.
"""
metadata_visited = False
expanded_rows = []
# Update the columns here to avoid doing the validation on the `inputs`, as the
# `distilabel_metadata` is not defined on Pipeline creation on the DAG.
columns = self.columns
if self.split_statistics:
columns["distilabel_metadata"] = "distilabel_metadata"
for expand_column, new_column in columns.items(): # type: ignore
data = input.get(expand_column)
input, metadata_visited = self._split_metadata(
input, len(data), metadata_visited
)
rows = []
for item, expanded in zip_longest(*[data, expanded_rows], fillvalue=input):
rows.append({**expanded, new_column: item})
expanded_rows = rows
return expanded_rows
def _split_metadata(
self, input: Dict[str, Any], n: int, metadata_visited: bool = False
) -> None:
"""Help method to split the statistics in `distilabel_metadata` column.
Args:
input: The input data.
n: Number of splits to apply to the tokens (if we have 12 tokens and want to split
them 3 times, n==3).
metadata_visited: Bool to prevent from updating the data more than once.
Returns:
Updated input with the `distilabel_metadata` updated.
"""
# - If we want to split the statistics, we need to ensure that the metadata is present.
# - Metadata can only be visited once per row to avoid successive splitting.
# TODO: For an odd number of tokens, this will miss 1, we have to fix it.
if (
self.split_statistics
and (metadata := input.get("distilabel_metadata", {}))
and not metadata_visited
):
for k, v in metadata.items():
if (
not v
): # In case it wasn't found in the metadata for some error, skip it
continue
if k.startswith("statistics_") and (
"input_tokens" in v and "output_tokens" in v
):
# For num_generations>1 we assume all the tokens should be divided by n
# TODO: The tokens should always come as a list, but there can
# be differences
if isinstance(v["input_tokens"], list):
input_tokens = [value // n for value in v["input_tokens"]]
else:
input_tokens = [v["input_tokens"] // n]
if isinstance(v["input_tokens"], list):
output_tokens = [value // n for value in v["output_tokens"]]
else:
output_tokens = [v["output_tokens"] // n]
input["distilabel_metadata"][k] = {
"input_tokens": input_tokens,
"output_tokens": output_tokens,
}
metadata_visited = True
# Once we have updated the metadata, Create a list out of it to let the
# following section to expand it as any other column.
if isinstance(input["distilabel_metadata"], dict):
input["distilabel_metadata"] = [input["distilabel_metadata"]] * n
return input, metadata_visited
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING, List, Optional
from typing_extensions import override
from distilabel.steps.base import Step, StepInput
from distilabel.steps.columns.utils import group_columns
if TYPE_CHECKING:
from distilabel.typing import StepOutput
class GroupColumns(Step):
"""Combines columns from a list of `StepInput`.
`GroupColumns` is a `Step` that implements the `process` method that calls the `group_dicts`
function to handle and combine a list of `StepInput`. Also `GroupColumns` provides two attributes
`columns` and `output_columns` to specify the columns to group and the output columns
which will override the default value for the properties `inputs` and `outputs`, respectively.
Attributes:
columns: List of strings with the names of the columns to group.
output_columns: Optional list of strings with the names of the output columns.
Input columns:
- dynamic (determined by `columns` attribute): The columns to group.
Output columns:
- dynamic (determined by `columns` and `output_columns` attributes): The columns
that were grouped.
Categories:
- columns
Examples:
Group columns of a dataset:
```python
from distilabel.steps import GroupColumns
group_columns = GroupColumns(
name="group_columns",
columns=["generation", "model_name"],
)
group_columns.load()
result = next(
group_columns.process(
[{"generation": "AI generated text"}, {"model_name": "my_model"}],
[{"generation": "Other generated text", "model_name": "my_model"}]
)
)
# >>> result
# [{'merged_generation': ['AI generated text', 'Other generated text'], 'merged_model_name': ['my_model']}]
```
Specify the name of the output columns:
```python
from distilabel.steps import GroupColumns
group_columns = GroupColumns(
name="group_columns",
columns=["generation", "model_name"],
output_columns=["generations", "generation_models"]
)
group_columns.load()
result = next(
group_columns.process(
[{"generation": "AI generated text"}, {"model_name": "my_model"}],
[{"generation": "Other generated text", "model_name": "my_model"}]
)
)
# >>> result
#[{'generations': ['AI generated text', 'Other generated text'], 'generation_models': ['my_model']}]
```
"""
columns: List[str]
output_columns: Optional[List[str]] = None
@property
def inputs(self) -> List[str]:
"""The inputs for the task are the column names in `columns`."""
return self.columns
@property
def outputs(self) -> List[str]:
"""The outputs for the task are the column names in `output_columns` or
`grouped_{column}` for each column in `columns`."""
return (
self.output_columns
if self.output_columns is not None
else [f"grouped_{column}" for column in self.columns]
)
@override
def process(self, *inputs: StepInput) -> "StepOutput":
"""The `process` method calls the `group_dicts` function to handle and combine a list of `StepInput`.
Args:
*inputs: A list of `StepInput` to be combined.
Yields:
A `StepOutput` with the combined `StepInput` using the `group_dicts` function.
"""
yield group_columns(
*inputs,
group_columns=self.inputs,
output_group_columns=self.outputs,
)
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING, List
from typing_extensions import override
from distilabel.steps.base import Step, StepInput
if TYPE_CHECKING:
from distilabel.typing import StepColumns, StepOutput
class KeepColumns(Step):
"""Keeps selected columns in the dataset.
`KeepColumns` is a `Step` that implements the `process` method that keeps only the columns
specified in the `columns` attribute. Also `KeepColumns` provides an attribute `columns` to
specify the columns to keep which will override the default value for the properties `inputs`
and `outputs`.
Note:
The order in which the columns are provided is important, as the output will be sorted
using the provided order, which is useful before pushing either a `dataset.Dataset` via
the `PushToHub` step or a `distilabel.Distiset` via the `Pipeline.run` output variable.
Attributes:
columns: List of strings with the names of the columns to keep.
Input columns:
- dynamic (determined by `columns` attribute): The columns to keep.
Output columns:
- dynamic (determined by `columns` attribute): The columns that were kept.
Categories:
- columns
Examples:
Select the columns to keep:
```python
from distilabel.steps import KeepColumns
keep_columns = KeepColumns(
columns=["instruction", "generation"],
)
keep_columns.load()
result = next(
keep_columns.process(
[{"instruction": "What's the brightest color?", "generation": "white", "model_name": "my_model"}],
)
)
# >>> result
# [{'instruction': "What's the brightest color?", 'generation': 'white'}]
```
"""
columns: List[str]
@property
def inputs(self) -> "StepColumns":
"""The inputs for the task are the column names in `columns`."""
return self.columns
@property
def outputs(self) -> "StepColumns":
"""The outputs for the task are the column names in `columns`."""
return self.columns
@override
def process(self, *inputs: StepInput) -> "StepOutput":
"""The `process` method keeps only the columns specified in the `columns` attribute.
Args:
*inputs: A list of dictionaries with the input data.
Yields:
A list of dictionaries with the output data.
"""
for input in inputs:
outputs = []
for item in input:
outputs.append({col: item[col] for col in self.columns})
yield outputs
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING, List, Optional
from typing_extensions import override
from distilabel.steps.base import Step, StepInput
from distilabel.steps.columns.utils import merge_columns
if TYPE_CHECKING:
from distilabel.typing import StepColumns, StepOutput
class MergeColumns(Step):
"""Merge columns from a row.
`MergeColumns` is a `Step` that implements the `process` method that calls the `merge_columns`
function to handle and combine columns in a `StepInput`. `MergeColumns` provides two attributes
`columns` and `output_column` to specify the columns to merge and the resulting output column.
This step can be useful if you have a `Task` that generates instructions for example, and you
want to have more examples of those. In such a case, you could for example use another `Task`
to multiply your instructions synthetically, what would yield two different columns splitted.
Using `MergeColumns` you can merge them and use them as a single column in your dataset for
further processing.
Attributes:
columns: List of strings with the names of the columns to merge.
output_column: str name of the output column
Input columns:
- dynamic (determined by `columns` attribute): The columns to merge.
Output columns:
- dynamic (determined by `columns` and `output_column` attributes): The columns
that were merged.
Categories:
- columns
Examples:
Combine columns in rows of a dataset:
```python
from distilabel.steps import MergeColumns
combiner = MergeColumns(
columns=["queries", "multiple_queries"],
output_column="queries",
)
combiner.load()
result = next(
combiner.process(
[
{
"queries": "How are you?",
"multiple_queries": ["What's up?", "Everything ok?"]
}
],
)
)
# >>> result
# [{'queries': ['How are you?', "What's up?", 'Everything ok?']}]
```
"""
columns: List[str]
output_column: Optional[str] = None
@property
def inputs(self) -> "StepColumns":
return self.columns
@property
def outputs(self) -> "StepColumns":
return [self.output_column] if self.output_column else ["merged_column"]
@override
def process(self, inputs: StepInput) -> "StepOutput":
combined = []
for input in inputs:
combined.append(
merge_columns(
input,
columns=self.columns,
new_column=self.outputs[0],
)
)
yield combined
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import defaultdict
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from distilabel.constants import DISTILABEL_METADATA_KEY
if TYPE_CHECKING:
from distilabel.steps.base import StepInput
def merge_distilabel_metadata(
*output_dicts: Dict[str, Any],
) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
"""
Merge the `DISTILABEL_METADATA_KEY` from multiple output dictionaries. `DISTILABEL_METADATA_KEY`
can be either a dictionary containing metadata keys or a list containing dictionaries
of metadata keys.
Args:
*output_dicts: Variable number of dictionaries or lists containing distilabel metadata.
Returns:
A merged dictionary or list containing all the distilabel metadata.
"""
merged_metadata = defaultdict(list)
for output_dict in output_dicts:
metadata = output_dict.get(DISTILABEL_METADATA_KEY, {})
# If `distilabel_metadata_key` is a `list` then it contains dictionaries with
# the metadata per `num_generations` created when `group_generations==True`
if isinstance(metadata, list):
if not isinstance(merged_metadata, list):
merged_metadata = []
merged_metadata.extend(metadata)
else:
for key, value in metadata.items():
merged_metadata[key].append(value)
if isinstance(merged_metadata, list):
return merged_metadata
final_metadata = {}
for key, value_list in merged_metadata.items():
if len(value_list) == 1:
final_metadata[key] = value_list[0]
else:
final_metadata[key] = value_list
return final_metadata
def group_columns(
*inputs: "StepInput",
group_columns: List[str],
output_group_columns: Optional[List[str]] = None,
) -> "StepInput":
"""Groups multiple list of dictionaries into a single list of dictionaries on the
specified `group_columns`. If `group_columns` are provided, then it will also rename
`group_columns`.
Args:
inputs: list of dictionaries to combine.
group_columns: list of keys to merge on.
output_group_columns: list of keys to rename the merge keys to. Defaults to `None`.
Returns:
A list of dictionaries where the values of the `group_columns` are combined into a
list and renamed to `output_group_columns`.
"""
if output_group_columns is not None and len(output_group_columns) != len(
group_columns
):
raise ValueError(
"The length of `output_group_columns` must be the same as the length of `group_columns`."
)
if output_group_columns is None:
output_group_columns = [f"grouped_{key}" for key in group_columns]
group_columns_dict = dict(zip(group_columns, output_group_columns))
result = []
# Use zip to iterate over lists based on their index
for dicts_at_index in zip(*inputs):
combined_dict = {}
metadata_dicts = []
# Iterate over dicts at the same index
for d in dicts_at_index:
# Extract metadata for merging
if DISTILABEL_METADATA_KEY in d:
metadata_dicts.append(
{DISTILABEL_METADATA_KEY: d[DISTILABEL_METADATA_KEY]}
)
# Iterate over key-value pairs in each dict
for key, value in d.items():
if key == DISTILABEL_METADATA_KEY:
continue
# If the key is in the merge_keys, append the value to the existing list
if key in group_columns_dict.keys():
combined_dict.setdefault(group_columns_dict[key], []).append(value)
# If the key is not in the merge_keys, create a new key-value pair
else:
combined_dict[key] = value
if metadata_dicts:
combined_dict[DISTILABEL_METADATA_KEY] = merge_distilabel_metadata(
*metadata_dicts
)
result.append(combined_dict)
return result
def merge_columns(
row: Dict[str, Any], columns: List[str], new_column: str = "combined_key"
) -> Dict[str, Any]:
"""Merge columns in a dictionary into a single column on the specified `new_column`.
Args:
row: Dictionary corresponding to a row in a dataset.
columns: List of keys to merge.
new_column: Name of the new key created.
Returns:
Dictionary with the new merged key.
"""
result = row.copy() # preserve the original dictionary
combined = []
for key in columns:
to_combine = result.pop(key)
if not isinstance(to_combine, list):
to_combine = [to_combine]
combined += to_combine
result[new_column] = combined
return result
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import inspect
from typing import (
TYPE_CHECKING,
Any,
Callable,
Literal,
Type,
Union,
overload,
)
from pydantic import create_model
from distilabel.mixins.runtime_parameters import _RUNTIME_PARAMETER_ANNOTATION
from distilabel.steps.base import (
_STEP_INPUT_ANNOTATION,
GeneratorStep,
GlobalStep,
Step,
)
from distilabel.utils.typing_ import is_parameter_annotated_with
if TYPE_CHECKING:
from distilabel.steps.base import _Step
from distilabel.typing import GeneratorStepOutput, StepColumns, StepOutput
_STEP_MAPPING = {
"normal": Step,
"global": GlobalStep,
"generator": GeneratorStep,
}
ProcessingFunc = Callable[..., Union["StepOutput", "GeneratorStepOutput"]]
@overload
def step(
inputs: Union["StepColumns", None] = None,
outputs: Union["StepColumns", None] = None,
step_type: Literal["normal"] = "normal",
) -> Callable[..., Type["Step"]]: ...
@overload
def step(
inputs: Union["StepColumns", None] = None,
outputs: Union["StepColumns", None] = None,
step_type: Literal["global"] = "global",
) -> Callable[..., Type["GlobalStep"]]: ...
@overload
def step(
inputs: None = None,
outputs: Union["StepColumns", None] = None,
step_type: Literal["generator"] = "generator",
) -> Callable[..., Type["GeneratorStep"]]: ...
def step(
inputs: Union["StepColumns", None] = None,
outputs: Union["StepColumns", None] = None,
step_type: Literal["normal", "global", "generator"] = "normal",
) -> Callable[..., Type["_Step"]]:
"""Creates an `Step` from a processing function.
Args:
inputs: a list containing the name of the inputs columns/keys or a dictionary
where the keys are the columns and the values are booleans indicating whether
the column is required or not, that are required by the step. If not provided
the default will be an empty list `[]` and it will be assumed that the step
doesn't need any specific columns. Defaults to `None`.
outputs: a list containing the name of the outputs columns/keys or a dictionary
where the keys are the columns and the values are booleans indicating whether
the column will be generated or not. If not provided the default will be an
empty list `[]` and it will be assumed that the step doesn't need any specific
columns. Defaults to `None`.
step_type: the kind of step to create. Valid choices are: "normal" (`Step`),
"global" (`GlobalStep`) or "generator" (`GeneratorStep`). Defaults to
`"normal"`.
Returns:
A callable that will generate the type given the processing function.
Example:
```python
# Normal step
@step(inputs=["instruction"], outputs=["generation"])
def GenerationStep(inputs: StepInput, dummy_generation: RuntimeParameter[str]) -> StepOutput:
for input in inputs:
input["generation"] = dummy_generation
yield inputs
# Global step
@step(inputs=["instruction"], step_type="global")
def FilteringStep(inputs: StepInput, max_length: RuntimeParameter[int] = 256) -> StepOutput:
yield [
input
for input in inputs
if len(input["instruction"]) <= max_length
]
# Generator step
@step(outputs=["num"], step_type="generator")
def RowGenerator(num_rows: RuntimeParameter[int] = 500) -> GeneratorStepOutput:
data = list(range(num_rows))
for i in range(0, len(data), 100):
last_batch = i + 100 >= len(data)
yield [{"num": num} for num in data[i : i + 100]], last_batch
```
"""
inputs = inputs or []
outputs = outputs or []
def decorator(func: ProcessingFunc) -> Type["_Step"]:
if step_type not in _STEP_MAPPING:
raise ValueError(
f"Invalid step type '{step_type}'. Please, review the '{func.__name__}'"
" function decorated with the `@step` decorator and provide a valid"
" `step_type`. Valid choices are: 'normal', 'global' or 'generator'."
)
BaseClass = _STEP_MAPPING[step_type]
signature = inspect.signature(func)
runtime_parameters = {
name: (
param.annotation,
param.default if param.default != param.empty else None,
)
for name, param in signature.parameters.items()
}
runtime_parameters = {}
step_input_parameter = None
for name, param in signature.parameters.items():
if is_parameter_annotated_with(param, _RUNTIME_PARAMETER_ANNOTATION):
runtime_parameters[name] = (
param.annotation,
param.default if param.default != param.empty else None,
)
if not step_type == "generator" and is_parameter_annotated_with(
param, _STEP_INPUT_ANNOTATION
):
if step_input_parameter is not None:
raise ValueError(
f"Function '{func.__name__}' has more than one parameter annotated"
f" with `StepInput`. Please, review the '{func.__name__}' function"
" decorated with the `@step` decorator and provide only one"
" argument annotated with `StepInput`."
)
step_input_parameter = param
RuntimeParametersModel = create_model( # type: ignore
"RuntimeParametersModel",
**runtime_parameters, # type: ignore
)
def inputs_property(self) -> "StepColumns":
return inputs
def outputs_property(self) -> "StepColumns":
return outputs
def process(
self, *args: Any, **kwargs: Any
) -> Union["StepOutput", "GeneratorStepOutput"]:
return func(*args, **kwargs)
return type( # type: ignore
func.__name__,
(
BaseClass,
RuntimeParametersModel,
),
{
"process": process,
"inputs": property(inputs_property),
"outputs": property(outputs_property),
"__module__": func.__module__,
"__doc__": func.__doc__,
"_built_from_decorator": True,
# Override the `get_process_step_input` method to return the parameter
# of the original function annotated with `StepInput`.
"get_process_step_input": lambda self: step_input_parameter,
},
)
return decorator
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING, List, Literal
import numpy as np
from pydantic import Field
from distilabel.mixins.runtime_parameters import RuntimeParameter
from distilabel.steps.base import GlobalStep, StepInput
if TYPE_CHECKING:
from distilabel.steps.typing import StepOutput
class DeitaFiltering(GlobalStep):
"""Filter dataset rows using DEITA filtering strategy.
Filter the dataset based on the DEITA score and the cosine distance between the embeddings.
It's an implementation of the filtering step from the paper 'What Makes Good Data
for Alignment? A Comprehensive Study of Automatic Data Selection in Instruction Tuning'.
Attributes:
data_budget: The desired size of the dataset after filtering.
diversity_threshold: If a row has a cosine distance with respect to it's nearest
neighbor greater than this value, it will be included in the filtered dataset.
Defaults to `0.9`.
normalize_embeddings: Whether to normalize the embeddings before computing the cosine
distance. Defaults to `True`.
Runtime parameters:
- `data_budget`: The desired size of the dataset after filtering.
- `diversity_threshold`: If a row has a cosine distance with respect to it's nearest
neighbor greater than this value, it will be included in the filtered dataset.
Input columns:
- evol_instruction_score (`float`): The score of the instruction generated by
`ComplexityScorer` step.
- evol_response_score (`float`): The score of the response generated by
`QualityScorer` step.
- embedding (`List[float]`): The embedding generated for the conversation of the
instruction-response pair using `GenerateEmbeddings` step.
Output columns:
- deita_score (`float`): The DEITA score for the instruction-response pair.
- deita_score_computed_with (`List[str]`): The scores used to compute the DEITA
score.
- nearest_neighbor_distance (`float`): The cosine distance between the embeddings
of the instruction-response pair.
Categories:
- filtering
References:
- [`What Makes Good Data for Alignment? A Comprehensive Study of Automatic Data Selection in Instruction Tuning`](https://arxiv.org/abs/2312.15685)
Examples:
Filter the dataset based on the DEITA score and the cosine distance between the embeddings:
```python
from distilabel.steps import DeitaFiltering
deita_filtering = DeitaFiltering(data_budget=1)
deita_filtering.load()
result = next(
deita_filtering.process(
[
{
"evol_instruction_score": 0.5,
"evol_response_score": 0.5,
"embedding": [-8.12729941, -5.24642847, -6.34003029],
},
{
"evol_instruction_score": 0.6,
"evol_response_score": 0.6,
"embedding": [2.99329242, 0.7800932, 0.7799726],
},
{
"evol_instruction_score": 0.7,
"evol_response_score": 0.7,
"embedding": [10.29041806, 14.33088073, 13.00557506],
},
],
)
)
# >>> result
# [{'evol_instruction_score': 0.5, 'evol_response_score': 0.5, 'embedding': [-8.12729941, -5.24642847, -6.34003029], 'deita_score': 0.25, 'deita_score_computed_with': ['evol_instruction_score', 'evol_response_score'], 'nearest_neighbor_distance': 1.9042812683723933}]
```
Citations:
```
@misc{liu2024makesgooddataalignment,
title={What Makes Good Data for Alignment? A Comprehensive Study of Automatic Data Selection in Instruction Tuning},
author={Wei Liu and Weihao Zeng and Keqing He and Yong Jiang and Junxian He},
year={2024},
eprint={2312.15685},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2312.15685},
}
```
"""
data_budget: RuntimeParameter[int] = Field(
default=None, description="The desired size of the dataset after filtering."
)
diversity_threshold: RuntimeParameter[float] = Field(
default=0.9,
description="If a row has a cosine distance with respect to it's nearest neighbor"
" greater than this value, it will be included in the filtered dataset.",
)
normalize_embeddings: RuntimeParameter[bool] = Field(
default=True,
description="Whether to normalize the embeddings before computing the cosine distance.",
)
distance_metric: RuntimeParameter[Literal["cosine", "manhattan"]] = Field(
default="cosine",
description="The distance metric to use. Currently only 'cosine' is supported.",
)
@property
def inputs(self) -> List[str]:
return ["evol_instruction_score", "evol_response_score", "embedding"]
@property
def outputs(self) -> List[str]:
return ["deita_score", "nearest_neighbor_distance", "deita_score_computed_with"]
def process(self, inputs: StepInput) -> "StepOutput": # type: ignore
"""Filter the dataset based on the DEITA score and the cosine distance between the
embeddings.
Args:
inputs: The input data.
Returns:
The filtered dataset.
"""
inputs = self._compute_deita_score(inputs)
inputs = self._compute_nearest_neighbor(inputs)
inputs.sort(key=lambda x: x["deita_score"], reverse=True)
selected_rows = []
for input in inputs:
if len(selected_rows) >= self.data_budget: # type: ignore
break
if input["nearest_neighbor_distance"] >= self.diversity_threshold:
selected_rows.append(input)
yield selected_rows
def _compute_deita_score(self, inputs: StepInput) -> StepInput:
"""Computes the DEITA score for each instruction-response pair. The DEITA score is
the product of the instruction score and the response score.
Args:
inputs: The input data.
Returns:
The input data with the DEITA score computed.
"""
for input_ in inputs:
evol_instruction_score = input_.get("evol_instruction_score")
evol_response_score = input_.get("evol_response_score")
if evol_instruction_score and evol_response_score:
deita_score = evol_instruction_score * evol_response_score
score_computed_with = ["evol_instruction_score", "evol_response_score"]
elif evol_instruction_score:
self._logger.warning(
"Response score is missing for the instruction-response pair. Using"
" instruction score as DEITA score."
)
deita_score = evol_instruction_score
score_computed_with = ["evol_instruction_score"]
elif evol_response_score:
self._logger.warning(
"Instruction score is missing for the instruction-response pair. Using"
" response score as DEITA score."
)
deita_score = evol_response_score
score_computed_with = ["evol_response_score"]
else:
self._logger.warning(
"Instruction and response scores are missing for the instruction-response"
" pair. Setting DEITA score to 0."
)
deita_score = 0
score_computed_with = []
input_.update(
{
"deita_score": deita_score,
"deita_score_computed_with": score_computed_with,
}
)
return inputs
def _compute_nearest_neighbor(self, inputs: StepInput) -> StepInput:
"""Computes the cosine distance between the embeddings of the instruction-response
pairs and the nearest neighbor.
Args:
inputs: The input data.
Returns:
The input data with the cosine distance computed.
"""
embeddings = np.array([input["embedding"] for input in inputs])
if self.normalize_embeddings:
embeddings = self._normalize_embeddings(embeddings)
self._logger.info("📏 Computing nearest neighbor distance...")
if self.distance_metric == "cosine":
self._logger.info("📏 Using cosine distance.")
distances = self._cosine_distance(embeddings)
else:
self._logger.info("📏 Using manhattan distance.")
distances = self._manhattan_distance(embeddings)
for distance, input in zip(distances, inputs):
input["nearest_neighbor_distance"] = distance
return inputs
def _normalize_embeddings(self, embeddings: np.ndarray) -> np.ndarray:
"""Normalize the embeddings.
Args:
embeddings: The embeddings to normalize.
Returns:
The normalized embeddings.
"""
self._logger.info("⚖️ Normalizing embeddings...")
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
return embeddings / norms
def _cosine_distance(self, embeddings: np.array) -> np.array: # type: ignore
"""Computes the cosine distance between the embeddings.
Args:
embeddings: The embeddings.
Returns:
The cosine distance between the embeddings.
"""
cosine_similarity = np.dot(embeddings, embeddings.T)
cosine_distance = 1 - cosine_similarity
# Ignore self-distance
np.fill_diagonal(cosine_distance, np.inf)
return np.min(cosine_distance, axis=1)
def _manhattan_distance(self, embeddings: np.array) -> np.array: # type: ignore
"""Computes the manhattan distance between the embeddings.
Args:
embeddings: The embeddings.
Returns:
The manhattan distance between the embeddings.
"""
manhattan_distance = np.abs(embeddings[:, None] - embeddings).sum(-1)
# Ignore self-distance
np.fill_diagonal(manhattan_distance, np.inf)
return np.min(manhattan_distance, axis=1)
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING
from distilabel.models.embeddings.base import Embeddings
from distilabel.steps.base import Step, StepInput
if TYPE_CHECKING:
from distilabel.typing import StepColumns, StepOutput
class EmbeddingGeneration(Step):
"""Generate embeddings using an `Embeddings` model.
`EmbeddingGeneration` is a `Step` that using an `Embeddings` model generates sentence
embeddings for the provided input texts.
Attributes:
embeddings: the `Embeddings` model used to generate the sentence embeddings.
Input columns:
- text (`str`): The text for which the sentence embedding has to be generated.
Output columns:
- embedding (`List[Union[float, int]]`): the generated sentence embedding.
Categories:
- embedding
Examples:
Generate sentence embeddings with Sentence Transformers:
```python
from distilabel.models import SentenceTransformerEmbeddings
from distilabel.steps import EmbeddingGeneration
embedding_generation = EmbeddingGeneration(
embeddings=SentenceTransformerEmbeddings(
model="mixedbread-ai/mxbai-embed-large-v1",
)
)
embedding_generation.load()
result = next(embedding_generation.process([{"text": "Hello, how are you?"}]))
# [{'text': 'Hello, how are you?', 'embedding': [0.06209656596183777, -0.015797119587659836, ...]}]
```
"""
embeddings: Embeddings
@property
def inputs(self) -> "StepColumns":
return ["text"]
@property
def outputs(self) -> "StepColumns":
return ["embedding", "model_name"]
def load(self) -> None:
"""Loads the `Embeddings` model."""
super().load()
self.embeddings.load()
def process(self, inputs: StepInput) -> "StepOutput": # type: ignore
embeddings = self.embeddings.encode(inputs=[input["text"] for input in inputs])
for input, embedding in zip(inputs, embeddings):
input["embedding"] = embedding
input["model_name"] = self.embeddings.model_name
yield inputs
def unload(self) -> None:
super().unload()
self.embeddings.unload()
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import importlib.util
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
import numpy as np
from datasets import Dataset
from pydantic import Field
from distilabel.mixins.runtime_parameters import RuntimeParameter
from distilabel.steps import GlobalStep, StepInput
if TYPE_CHECKING:
from distilabel.typing import StepOutput
class FaissNearestNeighbour(GlobalStep):
"""Create a `faiss` index to get the nearest neighbours.
`FaissNearestNeighbour` is a `GlobalStep` that creates a `faiss` index using the Hugging
Face `datasets` library integration, and then gets the nearest neighbours and the scores
or distance of the nearest neighbours for each input row.
Attributes:
device: the CUDA device ID or a list of IDs to be used. If negative integer, it
will use all the available GPUs. Defaults to `None`.
string_factory: the name of the factory to be used to build the `faiss` index.
Available string factories can be checked here: https://github.com/facebookresearch/faiss/wiki/Faiss-indexes.
Defaults to `None`.
metric_type: the metric to be used to measure the distance between the points. It's
an integer and the recommend way to pass it is importing `faiss` and then passing
one of `faiss.METRIC_x` variables. Defaults to `None`.
k: the number of nearest neighbours to search for each input row. Defaults to `1`.
search_batch_size: the number of rows to include in a search batch. The value can
be adjusted to maximize the resources usage or to avoid OOM issues. Defaults
to `50`.
train_size: If the index needs a training step, specifies how many vectors will be
used to train the index.
Runtime parameters:
- `device`: the CUDA device ID or a list of IDs to be used. If negative integer,
it will use all the available GPUs. Defaults to `None`.
- `string_factory`: the name of the factory to be used to build the `faiss` index.
Available string factories can be checked here: https://github.com/facebookresearch/faiss/wiki/Faiss-indexes.
Defaults to `None`.
- `metric_type`: the metric to be used to measure the distance between the points.
It's an integer and the recommend way to pass it is importing `faiss` and then
passing one of `faiss.METRIC_x` variables. Defaults to `None`.
- `k`: the number of nearest neighbours to search for each input row. Defaults to `1`.
- `search_batch_size`: the number of rows to include in a search batch. The value
can be adjusted to maximize the resources usage or to avoid OOM issues. Defaults
to `50`.
- `train_size`: If the index needs a training step, specifies how many vectors will
be used to train the index.
Input columns:
- embedding (`List[Union[float, int]]`): a sentence embedding.
Output columns:
- nn_indices (`List[int]`): a list containing the indices of the `k` nearest neighbours
in the inputs for the row.
- nn_scores (`List[float]`): a list containing the score or distance to each `k`
nearest neighbour in the inputs.
Categories:
- embedding
References:
- [`The Faiss library`](https://arxiv.org/abs/2401.08281)
Examples:
Generating embeddings and getting the nearest neighbours:
```python
from distilabel.models import SentenceTransformerEmbeddings
from distilabel.pipeline import Pipeline
from distilabel.steps import EmbeddingGeneration, FaissNearestNeighbour, LoadDataFromHub
with Pipeline(name="hello") as pipeline:
load_data = LoadDataFromHub(output_mappings={"prompt": "text"})
embeddings = EmbeddingGeneration(
embeddings=SentenceTransformerEmbeddings(
model="mixedbread-ai/mxbai-embed-large-v1"
)
)
nearest_neighbours = FaissNearestNeighbour()
load_data >> embeddings >> nearest_neighbours
if __name__ == "__main__":
distiset = pipeline.run(
parameters={
load_data.name: {
"repo_id": "distilabel-internal-testing/instruction-dataset-mini",
"split": "test",
},
},
use_cache=False,
)
```
Citations:
```
@misc{douze2024faisslibrary,
title={The Faiss library},
author={Matthijs Douze and Alexandr Guzhva and Chengqi Deng and Jeff Johnson and Gergely Szilvasy and Pierre-Emmanuel Mazaré and Maria Lomeli and Lucas Hosseini and Hervé Jégou},
year={2024},
eprint={2401.08281},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2401.08281},
}
```
"""
device: Optional[RuntimeParameter[Union[int, List[int]]]] = Field(
default=None,
description="The CUDA device ID or a list of IDs to be used. If negative integer,"
" it will use all the available GPUs.",
)
string_factory: Optional[RuntimeParameter[str]] = Field(
default=None,
description="The name of the factory to be used to build the `faiss` index."
"Available string factories can be checked here: https://github.com/facebookresearch/faiss/wiki/Faiss-indexes.",
)
metric_type: Optional[RuntimeParameter[int]] = Field(
default=None,
description="The metric to be used to measure the distance between the points. It's"
" an integer and the recommend way to pass it is importing `faiss` and thenpassing"
" one of `faiss.METRIC_x` variables.",
)
k: Optional[RuntimeParameter[int]] = Field(
default=1,
description="The number of nearest neighbours to search for each input row.",
)
search_batch_size: Optional[RuntimeParameter[int]] = Field(
default=50,
description="The number of rows to include in a search batch. The value can be adjusted"
" to maximize the resources usage or to avoid OOM issues.",
)
train_size: Optional[RuntimeParameter[int]] = Field(
default=None,
description="If the index needs a training step, specifies how many vectors will be used to train the index.",
)
def load(self) -> None:
super().load()
if importlib.util.find_spec("faiss") is None:
raise ImportError(
"`faiss` package is not installed. Please install it using `pip install"
" 'distilabel[faiss-cpu]' or 'distilabel[faiss-gpu]'`."
)
@property
def inputs(self) -> List[str]:
return ["embedding"]
@property
def outputs(self) -> List[str]:
return ["nn_indices", "nn_scores"]
def _build_index(self, inputs: List[Dict[str, Any]]) -> Dataset:
"""Builds a `faiss` index using `datasets` integration.
Args:
inputs: a list of dictionaries.
Returns:
The build `datasets.Dataset` with its `faiss` index.
"""
dataset = Dataset.from_list(inputs)
if self.train_size is not None and self.string_factory:
self._logger.info("🏋️‍♀️ Starting Faiss index training...")
dataset.add_faiss_index(
column="embedding",
device=self.device, # type: ignore
string_factory=self.string_factory,
metric_type=self.metric_type,
train_size=self.train_size,
)
return dataset
def _save_index(self, dataset: Dataset) -> None:
"""Save the generated Faiss index as an artifact of the step.
Args:
dataset: the dataset with the `faiss` index built.
"""
self.save_artifact(
name="faiss_index",
write_function=lambda path: dataset.save_faiss_index(
index_name="embedding", file=path / "index.faiss"
),
metadata={
"num_rows": len(dataset),
"embedding_dim": len(dataset[0]["embedding"]),
},
)
def _search(self, dataset: Dataset) -> Dataset:
"""Search the top `k` nearest neighbours for each row in the dataset.
Args:
dataset: the dataset with the `faiss` index built.
Returns:
The updated dataset containing the top `k` nearest neighbours for each row,
as well as the score or distance.
"""
def add_search_results(examples: Dict[str, List[Any]]) -> Dict[str, List[Any]]:
queries = np.array(examples["embedding"])
results = dataset.search_batch(
index_name="embedding",
queries=queries,
k=self.k + 1, # type: ignore
)
examples["nn_indices"] = [indices[1:] for indices in results.total_indices]
examples["nn_scores"] = [scores[1:] for scores in results.total_scores]
return examples
return dataset.map(
add_search_results, batched=True, batch_size=self.search_batch_size
)
def process(self, inputs: StepInput) -> "StepOutput": # type: ignore
dataset = self._build_index(inputs)
dataset_with_search_results = self._search(dataset)
self._save_index(dataset)
yield dataset_with_search_results.to_list()
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment