Commit 2581b885 authored by jerrrrry's avatar jerrrrry
Browse files

Initial commit

parents
Pipeline #3320 canceled with stages
import json
import pickle
from collections import defaultdict
from glob import glob
from multiprocessing import Pool
from pathlib import Path
import pandas as pd
import yaml
import numpy as np
import pyarrow as pa
from tqdm import tqdm
from index_kits.indexer import IndexV2Builder
from index_kits.bucket import build_multi_resolution_bucket
from index_kits.dataset.config_parse import DatasetConfig
def get_table(arrow_file):
return pa.ipc.RecordBatchFileReader(pa.memory_map(arrow_file, "r")).read_all()
def get_indices(arrow_file, repeat_times, filter_fn, repeat_fn, callback=None):
"""
Get valid indices from a single arrow_file.
Parameters
----------
arrow_file: str
repeat_times: int
Repeat remain indices multiple times.
filter_fn
callback
Returns
-------
"""
try:
table = pa.ipc.RecordBatchFileReader(pa.memory_map(arrow_file, "r")).read_all()
except Exception as e:
print(arrow_file, e)
raise e
length = len(table)
if len(table) == 0:
print(f"Warning: Empty table: {arrow_file}")
indices = []
stats = {}
else:
# Apply filter_fn if available
if filter_fn is not None:
mask, stats, md5s = filter_fn(arrow_file, table)
else:
mask = pd.Series([True] * length)
stats = {}
md5s = None
# Apply callback function if available
if callback is not None:
mask, stats = callback(arrow_file, table, mask, stats, md5s)
# Get indices
if mask is not None:
indices = np.where(mask)[0].tolist()
else:
indices = list(range(length))
# Apply indices repeat
if repeat_fn is not None:
indices, repeat_stats = repeat_fn(
arrow_file, table, indices, repeat_times, md5s
)
stats.update(repeat_stats)
return arrow_file, length, indices, stats
def load_md5_files(files, name=None):
if isinstance(files, str):
files = [files]
md5s = set()
for file in files:
md5s.update(Path(file).read_text().splitlines())
print(f" {name} md5s: {len(md5s):,}")
return md5s
def load_md52cls_files(files, name=None):
if isinstance(files, str):
files = [files]
md52cls = {}
for file in files:
with Path(file).open() as f:
md52cls.update(json.load(f))
print(f" {name} md52cls: {len(md52cls):,}")
return md52cls
def merge_and_build_index(data_type, src, dconfig, save_path):
if isinstance(src, str):
files = list(sorted(glob(src)))
else:
files = list(sorted(src))
print(f"Found {len(files):,} temp pickle files.")
for fname in files:
print(f" {fname}")
arrow_files = []
table_lengths = []
indices_list = []
bad_stats_total = defaultdict(int)
total_indices = 0
total_processed_length = 0
for file_name in tqdm(files):
with Path(file_name).open("rb") as f:
data = pickle.load(f)
for arrow_file, table_length, indices, *args in tqdm(data, leave=False):
arrow_files.append(arrow_file)
table_lengths.append(table_length)
total_processed_length += table_length
indices_list.append(indices)
total_indices += len(indices)
if len(args) > 0 and args[0]:
bad_stats = args[0]
for k, v in bad_stats.items():
bad_stats_total[k] += v
if len(bad_stats_total):
stats_save_dir = Path(save_path).parent
stats_save_dir.mkdir(parents=True, exist_ok=True)
stats_save_path = stats_save_dir / (Path(save_path).stem + "_stats.txt")
stats_save_path.write_text(
"\n".join([f"{k:>50s} {v}" for k, v in bad_stats_total.items()]) + "\n"
)
print(f"Save stats to {stats_save_path}")
print(f"Arrow files: {len(arrow_files):,}")
print(f"Processed indices: {total_processed_length:,}")
print(f"Valid indices: {total_indices:,}")
cum_length = 0
total_indices = []
cum_lengths = []
group_lengths = []
existed = set()
print(f"Accumulating indices...")
pbar = tqdm(
zip(arrow_files, table_lengths, indices_list),
total=len(arrow_files),
mininterval=1,
)
_count = 0
for arrow_file, table_length, indices in pbar:
if len(indices) > 0 and dconfig.remove_md5_dup:
new_indices = []
table = get_table(arrow_file)
if "md5" not in table.column_names:
raise ValueError(
f"Column 'md5' not found in {arrow_file}. "
f"When `remove_md5_dup: true` is set, md5 column is required."
)
md5s = table["md5"].to_pandas()
for i in indices:
md5 = md5s[i]
if md5 in existed:
continue
existed.add(md5)
new_indices.append(i)
indices = new_indices
total_indices.extend([int(i + cum_length) for i in indices])
cum_length += table_length
cum_lengths.append(cum_length)
group_lengths.append(len(indices))
_count += 1
if _count % 100 == 0:
pbar.set_description(f"Indices: {len(total_indices):,}")
builder = IndexV2Builder(
data_type=data_type,
arrow_files=arrow_files,
cum_length=cum_lengths,
group_length=group_lengths,
indices=total_indices,
config_file=dconfig.config_file,
)
builder.build(save_path)
print(
f"Build index finished!\n\n"
f" Save path: {Path(save_path).absolute()}\n"
f" Number of indices: {len(total_indices)}\n"
f"Number of arrow files: {len(arrow_files)}\n"
)
def worker_startup(rank, world_size, dconfig, prefix, work_dir, callback=None):
# Prepare names for this worker
num = (len(dconfig.names) + world_size - 1) // world_size
arrow_names = dconfig.names[rank * num : (rank + 1) * num]
print(f"Rank {rank} has {len(arrow_names):,} names.")
# Run get indices
print(f"Start getting indices...")
indices = []
for arrow_name, repeat_times in tqdm(
arrow_names, position=rank, desc=f"#{rank}: ", leave=False
):
indices.append(
get_indices(
arrow_name, repeat_times, dconfig.filter, dconfig.repeater, callback
)
)
# Save to a temp file
temp_save_path = (
work_dir / f"data/temp_pickles/{prefix}-{rank + 1}_of_{world_size}.pkl"
)
temp_save_path.parent.mkdir(parents=True, exist_ok=True)
with temp_save_path.open("wb") as f:
pickle.dump(indices, f)
print(f"Rank {rank} finished. Write temporary data to {temp_save_path}")
return temp_save_path
def startup(
config_file,
save,
world_size=1,
work_dir=".",
callback=None,
use_cache=False,
):
work_dir = Path(work_dir)
save_path = Path(save)
if save_path.suffix != ".json":
save_path = save_path.parent / (save_path.name + ".json")
print(f"Using save_path: {save_path}")
prefix = f"{save_path.stem}"
# Parse dataset config and build the data_type list
dconfig = DatasetConfig(work_dir, config_file)
data_type = []
for k, v in dconfig.data_type.items():
data_type.extend(v)
print(f"{k}:")
for x in v:
print(f" {x}")
if dconfig.remove_md5_dup:
data_type.append("Remove md5 duplicates.")
else:
data_type.append("Keep md5 duplicates.")
# Start processing
if not use_cache:
temp_pickles = []
if world_size == 1:
print(f"\nRunning in single process mode...")
temp_pickles.append(
worker_startup(
rank=0,
world_size=1,
dconfig=dconfig,
prefix=prefix,
work_dir=work_dir,
callback=callback,
)
)
else:
print(f"\nRunning in multi-process mode (world_size={world_size})...")
p = Pool(world_size)
temp_pickles_ = []
for i in range(world_size):
temp_pickles_.append(
p.apply_async(
worker_startup,
args=(i, world_size, dconfig, prefix, work_dir, callback),
)
)
for res in temp_pickles_:
temp_pickles.append(res.get())
# close
p.close()
p.join()
else:
temp_pickles = glob(
f"{work_dir}/data/temp_pickles/{prefix}-*_of_{world_size}.pkl"
)
# Merge temp pickles and build index
merge_and_build_index(
data_type,
temp_pickles,
dconfig,
save_path,
)
def make_multireso(
target,
config_file=None,
src=None,
base_size=None,
reso_step=None,
target_ratios=None,
align=None,
min_size=None,
md5_file=None,
):
if config_file is not None:
with Path(config_file).open() as f:
config = yaml.safe_load(f)
else:
config = {}
src = config.get("src", src)
base_size = config.get("base_size", base_size)
reso_step = config.get("reso_step", reso_step)
target_ratios = config.get("target_ratios", target_ratios)
align = config.get("align", align)
min_size = config.get("min_size", min_size)
md5_file = config.get("md5_file", md5_file)
if src is None:
raise ValueError("src must be provided in either config file or command line.")
if base_size is None:
raise ValueError("base_size must be provided.")
if reso_step is None and target_ratios is None:
raise ValueError("Either reso_step or target_ratios must be provided.")
if md5_file is not None:
with open(md5_file, "rb") as f:
md5_hw = pickle.load(f)
print(f"Md5 to height and width: {len(md5_hw):,}")
else:
md5_hw = None
build_multi_resolution_bucket(
config_file=config_file,
base_size=base_size,
reso_step=reso_step,
target_ratios=target_ratios,
align=align,
min_size=min_size,
src_index_files=src,
save_file=target,
md5_hw=md5_hw,
)
import bisect
import io
import json
import random
from pathlib import Path
import ast
from itertools import chain
from collections import defaultdict
from functools import partial
from glob import glob
import numpy as np
import pyarrow as pa
from PIL import Image
from tqdm import tqdm
def get_table(arrow_file):
"""
Read an arrow file and return an arrow table.
"""
return pa.ipc.RecordBatchFileReader(pa.memory_map(f"{arrow_file}", "r")).read_all()
def assert_type(data, dtype, msg=""):
if not isinstance(data, dtype):
raise ValueError(f"Expected {msg} type {dtype}, got {type(data)}.")
def ndarray_to_list(data):
if isinstance(data, np.ndarray):
data = data.tolist()
elif isinstance(data, dict):
data = {k: ndarray_to_list(v) for k, v in data.items()}
elif isinstance(data, (list, tuple)):
# Assert that all elements in data are python integer, not numpy integer.
# Because numpy integer cannot be serialized to json.
data = [int(x) for x in data]
else:
raise ValueError(
f"Expected data type list, tuple, dict or np.ndarray, got {type(data)}."
)
return data
class ArrowIndexV2(object):
"""
ArrowIndexV2 is a new version of ArrowIndex.
Parameters
----------
index_file: str or pathlib.Path
The path of index file. Either index_file or res_dict should be provided.
res_dict: dict
The index dict. Either index_file or res_dict should be provided.
align: int
Align the length of indices to be a multiple of align. Generally align should be the batch size * world_size.
shadow_file_fn: callable or dict
A callable function to map shadow file path to a new path. If None, the shadow file path will not be
changed. If a dict is provided, the keys are the shadow names to call the function, and the values are the
callable functions to map the shadow file path to a new path. If a callable function is provided, the key
is 'default'.
Examples
--------
>>> index_file = 'data.json'
>>> indexObj = ArrowIndexV2(index_file)
>>> pil_image = indexObj.get_image(0)
>>> text = indexObj.get_attribute(0, column='text_zh')
"""
def __init__(
self, index_file=None, res_dict=None, align=1, shadow_file_fn=None, **kwargs
):
if index_file is not None:
with open(index_file, "r") as f:
res_dict = json.load(f)
elif res_dict is not None:
pass
else:
raise ValueError(f"Either index_file or res_dict should be provided.")
self.shadow_file_fn = {}
if shadow_file_fn is not None:
if not callable(shadow_file_fn) and not isinstance(shadow_file_fn, dict):
raise ValueError(
"shadow_file_fn should be a callable function or a dict."
)
if callable(shadow_file_fn):
self.shadow_file_fn["default"] = shadow_file_fn
else:
for k, v in shadow_file_fn.items():
if not callable(v):
raise ValueError(f"{k} should be a callable function.")
self.shadow_file_fn[k] = v
self._data = res_dict
self.data_type = res_dict["data_type"]
self.arrow_files = res_dict["arrow_files"]
self.cum_length = res_dict["cum_length"]
self.group_length = res_dict["group_length"]
error_msg = f"Expected group_length type list, got {type(self.group_length)}."
if isinstance(self.group_length, dict):
raise ValueError(
f"{error_msg}\nNote: You may using a multi-resolution index file. "
"Try `MultiResolutionBucketIndexV2` instead."
)
elif not isinstance(self.group_length, list):
raise ValueError(error_msg)
self.indices = res_dict["indices"]
if "indices_file" in res_dict:
self.indices_file = res_dict["indices_file"]
if self.indices_file != "":
indices_file = Path(index_file).parent / self.indices_file
if Path(indices_file).exists():
self.indices = np.load(indices_file)["x"]
else:
raise ValueError(
f"This Index file contains an extra file {indices_file} which is missed."
)
else:
self.indices_file = ""
if not isinstance(self.indices, list) and not isinstance(
self.indices, np.ndarray
):
raise ValueError(
f"Expected indices type list or np.ndarray, got {type(self.indices)}."
)
if align > 1:
if isinstance(self.indices, np.ndarray):
self.indices = self.indices.tolist()
self.align(align)
self.indices = np.asarray(self.indices, int)
if len(self.arrow_files) != len(self.cum_length):
raise ValueError(
f"Length of arrow_files and cum_length does not match. {len(self.arrow_files)} != {len(self.cum_length)}"
)
if len(self.arrow_files) != len(self.group_length):
raise ValueError(
f"Length of arrow_files and group_length does not match. {len(self.arrow_files)} != {len(self.group_length)}"
)
if len(self.indices) == 0:
raise ValueError(f"No indices found in index_dict.")
if (
isinstance(self.indices, list)
and self.indices[-1] > self.cum_length[-1] - 1
):
raise ValueError(f"Indices exceed cum_length.")
# Warning:
# Ensure that indices are an increasing array. Currently,
# no checks are performed due to the potential slowness when dealing with hundreds of millions of data points.
self.bias = self.cum_length
self._cur_arrow_file = None
self._cur_table_map = None
self._cur_table = None
self._index_bias = 0
self.last_index = -1
self._shadow_cur_arrow_file = {}
self._shadow_cur_table_map = {}
self._shadow_cur_table = {}
self._shadow_index_bias = {}
self.shadow_last_index = {}
for k in self.shadow_file_fn.keys():
self._shadow_cur_arrow_file[k] = None
self._shadow_cur_table_map[k] = None
self._shadow_cur_table[k] = None
self._shadow_index_bias[k] = 0
self.shadow_last_index[k] = -1
def __len__(self):
return len(self.indices)
def __repr__(self):
return f"""
ArrowIndexV2(
data_type {self.data_type}
indices_file {self.indices_file}
arrow_files Count={len(self.arrow_files):,} ({self.arrow_files[0]}, ...)
cum_length Count={len(self.cum_length):,} ({self.cum_length[0]}, ...)
group_length Count={len(self.group_length):,} ({self.group_length[0]}, ...)
indices Count={len(self.indices):,}
example_indices Count={len(self._data['example_indices']):,}
)
"""
def check_exists(self):
for arrow_file in tqdm(self.arrow_files):
if not Path(arrow_file).exists():
print(arrow_file)
def align(self, align):
"""
Repeat the index so that the length is a multiple of batch_size * world_size.
"""
if len(self) % align == 0:
return
repeat_num = align - len(self) % align
if repeat_num >= len(self):
repeat_n = repeat_num // len(self)
repeat_times = [repeat_n + 1 for _ in self.indices]
group_length_new = [ll * (repeat_n + 1) for ll in self.group_length]
repeat_num -= repeat_n * len(self)
else:
repeat_times = [1 for _ in range(repeat_num)]
group_length_new = [ll for ll in self.group_length]
for i in range(repeat_num):
repeat_times[-i - 1] += 1
repeat_start_idx = len(self) - len(repeat_times)
group_id = -1
while group_length_new[group_id] == 0:
group_id -= 1
# Allocate the remaining indices that need to be repeated,
# while also counting how many indices have been checked.
# If the count reaches the group_length, switch to the next group
# The reason for paying attention to group_length is that when repeating indices,
# group_length also needs to be updated synchronously..
group_acc = 0
for i in range(repeat_num):
group_length_new[group_id] += 1
group_acc += 1
if group_acc == self.group_length[group_id]:
group_id -= 1
while group_length_new[group_id] == 0:
group_id -= 1
group_acc = 0
temp = []
for i, value in enumerate(self.indices[repeat_start_idx:]):
temp.extend([value] * repeat_times[i])
self.indices = np.concatenate([self.indices[:repeat_start_idx], temp])
self.group_length = group_length_new
def shuffle(self, seed=None, fast=False):
"""
It takes about 30 seconds for an index consisting of 100_000 arrows.
"""
if fast:
return self.shuffle_fast(seed)
indices = self.indices.tolist()
if seed is not None:
state = random.getstate()
random.seed(seed)
indices_group_list = []
group_cum_len = 0
for group_len in self.group_length:
indices_group = indices[group_cum_len : group_cum_len + group_len]
random.shuffle(indices_group)
indices_group_list.append((indices_group, group_len))
group_cum_len += group_len
random.shuffle(indices_group_list)
self.group_length = [x[1] for x in indices_group_list]
self.indices = np.asarray(
list(chain.from_iterable([x[0] for x in indices_group_list]))
)
if seed is not None:
random.setstate(state)
def shuffle_fast(self, seed=None):
if seed is not None:
sampler = np.random.RandomState(seed)
sampler.shuffle(self.indices)
else:
np.random.shuffle(self.indices)
def get_table(self, arrow_file, shadow=None):
"""
Read an arrow file and return an arrow table.
"""
if shadow is None:
if self._cur_table is not None:
if self._cur_arrow_file == arrow_file:
# This is the same arrow file. Return the cached table.
return self._cur_table
else:
# This is a different arrow file. Clear the cache.
self._cur_table_map.close()
self._cur_table = None
self._cur_arrow_file = arrow_file
self._cur_table_map = pa.memory_map(f"{arrow_file}", "r")
self._cur_table = pa.ipc.RecordBatchFileReader(
self._cur_table_map
).read_all()
return self._cur_table
else:
if self._shadow_cur_table[shadow] is not None:
if self._shadow_cur_arrow_file[shadow] == arrow_file:
return self._shadow_cur_table[shadow]
else:
self._shadow_cur_table_map[shadow].close()
self._shadow_cur_table[shadow] = None
self._shadow_cur_arrow_file[shadow] = arrow_file
self._shadow_cur_table_map[shadow] = pa.memory_map(f"{arrow_file}", "r")
self._shadow_cur_table[shadow] = pa.ipc.RecordBatchFileReader(
self._shadow_cur_table_map[shadow]
).read_all()
return self._shadow_cur_table[shadow]
def get_arrow_file_by_index(self, index, return_index_bias=False, shadow=None):
i = bisect.bisect_right(self.cum_length, index)
arrow_file = self.arrow_files[i]
if return_index_bias:
if i == 0:
index_bias = 0
else:
index_bias = self.cum_length[i - 1]
return arrow_file, index_bias
return arrow_file
def get_arrow_file(self, ind, shadow=None):
"""
Get arrow file by in-dataset index.
Parameters
----------
ind: int
The in-dataset index.
shadow: str
The shadow name. If None, return the main arrow file. If not None, return the shadow arrow file.
Returns
-------
arrow_file: str
The arrow file path.
"""
index = self.indices[ind]
return self.get_arrow_file_by_index(index, shadow=shadow)
def load_table_by_index(self, index, shadow=None):
if shadow is None:
if index == self.last_index:
return self._cur_table
arrow_file, self._index_bias = self.get_arrow_file_by_index(
index, return_index_bias=True
)
self._cur_table = self.get_table(arrow_file)
self.last_index = index
return self._cur_table
else:
if index == self.shadow_last_index[shadow]:
return self._shadow_cur_table[shadow]
shadow_arrow_file, _shadow_index_bias = self.get_arrow_file_by_index(
index, return_index_bias=True, shadow=shadow
)
self._shadow_index_bias[shadow] = _shadow_index_bias
self._shadow_cur_table[shadow] = self.get_table(
shadow_arrow_file, shadow=shadow
)
self.shadow_last_index[shadow] = index
return self._shadow_cur_table[shadow]
def get_data_by_index(
self, index, columns=None, allow_missing=False, return_meta=True, shadow=None
):
table = self.load_table_by_index(index, shadow=shadow)
if isinstance(columns, str):
columns = [columns]
if columns is None:
columns = list(table.column_names)
index_bias = (
self._index_bias if shadow is None else self._shadow_index_bias[shadow]
)
in_arrow_index = index - index_bias
if return_meta:
cur_arrow_file = (
self._cur_arrow_file
if shadow is None
else self._shadow_cur_arrow_file[shadow]
)
data = {
"index": index,
"in_arrow_index": in_arrow_index,
"arrow_name": cur_arrow_file,
}
else:
data = {}
if allow_missing:
for col in columns:
if col in table.column_names:
data[col] = table[col][in_arrow_index].as_py()
else:
for col in columns:
data[col] = table[col][in_arrow_index].as_py()
return data
def get_data(
self, ind, columns=None, allow_missing=False, return_meta=True, shadow=None
):
"""
Get data by in-dataset index.
Parameters
----------
ind: int
The in-dataset index.
columns: str or list
The columns to be returned. If None, return all columns.
allow_missing: bool
If True, omit missing columns. If False, raise an error if the column is missing.
return_meta: bool
If True, the resulting dict will contain some meta information:
in-json index, in-arrow index, and arrow_name.
shadow: str
The shadow name. If None, return the main data. If not None, return the shadow data.
Returns
-------
data: dict
A dict containing the data.
"""
index = self.indices[ind]
return self.get_data_by_index(
index,
columns,
allow_missing=allow_missing,
return_meta=return_meta,
shadow=shadow,
)
def get_attribute_by_index(self, index, column, shadow=None):
table = self.load_table_by_index(index, shadow=shadow)
index_bias = (
self._index_bias if shadow is None else self._shadow_index_bias[shadow]
)
return table[column][index - index_bias].as_py()
def get_attribute(self, ind, column, shadow=None):
"""
Get single attribute by in-dataset index.
Parameters
----------
ind: int
The in-dataset index.
column: str
The column name.
shadow: str
The shadow name. If None, return the main data. If not None, return the shadow data.
Returns
-------
data: can be any type
"""
index = self.indices[ind]
return self.get_attribute_by_index(index, column, shadow=shadow)
def get_image_by_index(
self, index, column="image", ret_type="pil", max_size=-1, shadow=None
):
table = self.load_table_by_index(index, shadow=shadow)
index_bias = (
self._index_bias if shadow is None else self._shadow_index_bias[shadow]
)
col = "image" if "image" in table.column_names else "binary"
temp = table[col][index - index_bias].as_py()
image_bytes = io.BytesIO(temp)
image_bytes.seek(0)
try:
# convert(RGB) has two purposes:
# 1. Convert the image to RGB mode. Some images are in grayscale/RGBA mode, which will cause channel
# inconsistency in following processing.
# 2. Convert the image to RGB mode. Some images are in P mode, which will be forced to use NEAREST resample
# method in resize (even if you specify LANCZOS), which will cause blurry images.
pil_image = Image.open(image_bytes).convert("RGB")
except Exception as e:
print(
f"get_image_by_index | Error: {e} ({self.get_arrow_file_by_index(index), index - index_bias})"
)
pil_image = Image.new("RGB", (256, 256), (255, 255, 255))
if max_size > 0:
# Resize the image to max_size. max_size is the size of long edge
w, h = pil_image.size
if w > h:
new_w = max_size
new_h = int(h * max_size / w)
else:
new_h = max_size
new_w = int(w * max_size / h)
pil_image = pil_image.resize((new_w, new_h))
if ret_type == "numpy":
return np.array(pil_image)
return pil_image
def get_image(self, ind, column="image", ret_type="pil", max_size=-1, shadow=None):
"""
Get image by in-dataset index.
Parameters
----------
ind: int
The in-dataset index.
column: str
[Deprecated] The column name of the image. Default to 'image'.
ret_type: str
The return type. Can be 'pil' or 'numpy'. Default to 'pil'.
max_size: int
If not -1, resize the image to max_size. max_size is the size of long edge.
shadow: str
The shadow name. If None, return the main image. If not None, return the shadow image.
Returns
-------
image: PIL.Image.Image or np.ndarray
"""
index = self.indices[ind]
return self.get_image_by_index(index, column, ret_type, max_size, shadow=shadow)
def get_md5_by_index(self, index, shadow=None):
table = self.load_table_by_index(index, shadow=shadow)
index_bias = (
self._index_bias if shadow is None else self._shadow_index_bias[shadow]
)
return table["md5"][index - index_bias].as_py()
def get_md5(self, ind, shadow=None):
index = self.indices[ind]
return self.get_md5_by_index(index, shadow=shadow)
def get_columns_by_index(self, index, shadow=None):
table = self.load_table_by_index(index, shadow=shadow)
return table.column_names
def get_columns(self, ind, shadow=None):
index = self.indices[ind]
return self.get_columns_by_index(index, shadow=shadow)
def source_distribution(self, save_path=None, shadow=None):
sources = defaultdict(int)
for index in tqdm(self.indices):
source = self.get_attribute_by_index(index, "source", shadow=shadow)
sources[source] += 1
sources = sorted(sources.items(), key=lambda x: x[1], reverse=True)
for k, v in sources:
print(f"{k:20s} {v:10d}")
if save_path is not None:
Path(save_path).write_text(
"\n".join([f"{k:20s} {v:10d}" for k, v in sources])
)
def save(self, save_path):
"""
Save the index to a json file.
Parameters
----------
save_path: str or pathlib.Path
The path to save the index file.
"""
builder = IndexV2Builder(
data_type=self.data_type,
arrow_files=self.arrow_files,
cum_length=self.cum_length,
indices=self.indices,
)
builder.build(save_path)
def sample_batch_indices(self, n):
return np.random.choice(self.indices, n)
def sample_batch(self, n, columns, progress=True, shadow=None):
if isinstance(n, int):
indices = self.sample_batch_indices(n)
else:
indices = n
if progress:
pbar = tqdm(indices)
else:
pbar = indices
batch_data = []
for i in pbar:
batch_data.append(self.get_data_by_index(i, columns, shadow=shadow))
return batch_data
@staticmethod
def resize_and_crop(image, target_size, resample=Image.LANCZOS, crop_type="random"):
"""
Resize image without changing aspect ratio, then crop the center/random part.
Parameters
----------
image: PIL.Image.Image
The input image to be resized and cropped.
target_size: tuple
The target size of the image.
resample:
The resample method. See PIL.Image.Image.resize for details. Default to Image.LANCZOS.
crop_type: str
'center' or 'random'. If 'center', crop the center part of the image. If 'random',
crop a random part of the image. Default to 'random'.
Returns
-------
image: PIL.Image.Image
The resized and cropped image.
crop_pos: tuple
The position of the cropped part. (crop_left, crop_top)
"""
tw, th = target_size
w, h = image.size
tr = th / tw
r = h / w
# resize
if r < tr:
resize_height = th
resize_width = int(round(th / h * w))
else:
resize_width = tw
resize_height = int(round(tw / w * h))
image = image.resize((resize_width, resize_height), resample=resample)
if crop_type == "center":
crop_top = int(round((resize_height - th) / 2.0))
crop_left = int(round((resize_width - tw) / 2.0))
elif crop_type == "random":
crop_top = random.randint(0, resize_height - th)
crop_left = random.randint(0, resize_width - tw)
else:
raise ValueError(f"crop_type must be center or random, but got {crop_type}")
image = image.crop((crop_left, crop_top, crop_left + tw, crop_top + th))
return image, (crop_left, crop_top)
class IndexV2Builder(object):
def __init__(
self,
arrow_files,
indices=None,
cum_length=None,
group_length=None,
data_type=None,
max_indices=5_000_000,
example_num=1000,
config_file=None,
):
"""
Build index v2 from an index dict.
Parameters
----------
arrow_files: list
A list of arrow files.
indices: list or dict
A list of indices or a dict of indices.
If not provided, it will be specified as range(cum_length[-1]).
cum_length: list
A list of cumulative length of arrow files.
If not provided, it will be calculated from arrow files.
group_length: list
A list of group length or a dict of group length for each arrow file.
If not provided, it will be calculated.
data_type: str or list
Some custom information of this index.
max_indices: int
If the number of indices is larger than max_indices, the indices will be saved in a separate file.
Default to 5_000_000.
example_num: int
The number of examples to be saved in the index file. Default to 1000.
config_file: str
The path of config file.
Examples
--------
>>> builder = IndexV2Builder(
>>> data_type='gold',
>>> arrow_files=arrow_files,
>>> cum_length=cum_length,
>>> indices=indices,
>>> )
>>> builder.build(save_path)
"""
self.arrow_files = arrow_files
self.indices = indices
self.cum_length = cum_length
self.group_length = group_length
self.data_type = data_type
self.max_indices = max_indices
self.example_num = example_num
self.config_file = config_file
if isinstance(arrow_files, str):
if "*" in arrow_files or "?" in arrow_files:
self.arrow_files = list(glob(arrow_files))
else:
self.arrow_files = [arrow_files]
elif isinstance(self.arrow_files, tuple):
self.arrow_files = list(self.arrow_files)
if not isinstance(self.arrow_files, list):
raise ValueError(
f"Expected arrow_files to be a list, got {type(self.arrow_files)}."
)
if self.cum_length is None:
continuous = False
if self.indices is None:
self.group_length = []
continuous = True
print(f"Calculating cum_length...")
self.cum_length = []
cur_cum_length = 0
pbar = tqdm(self.arrow_files)
for arrow_file in pbar:
table_length = len(get_table(arrow_file))
cur_cum_length += table_length
self.cum_length.append(cur_cum_length)
pbar.set_description(f"{self.cum_length[-1]:>12d}")
if continuous:
self.group_length.append(table_length)
if self.indices is None:
self.indices = list(range(self.cum_length[-1]))
if self.group_length is None:
self.group_length = []
if self.data_type is None:
self.data_type = ["Made by IndexV2Builder"]
elif isinstance(self.data_type, str):
self.data_type = [self.data_type]
assert_type(self.data_type, list, "data_type")
assert_type(self.cum_length, (list, np.ndarray), "cum_length")
assert_type(self.group_length, (list, dict, np.ndarray), "group_length")
assert_type(self.indices, (list, dict, np.ndarray), "indices")
self.cum_length = ndarray_to_list(self.cum_length)
self.group_length = ndarray_to_list(self.group_length)
self.indices = ndarray_to_list(self.indices)
if isinstance(self.indices, dict):
for k, v in self.indices.items():
assert_type(v, list, f"indices[{k}]")
if len(self.arrow_files) != len(self.cum_length):
raise ValueError(
f"Length of arrow_files and cum_length does not match. {len(self.arrow_files)} != {len(self.cum_length)}"
)
if len(self.indices) == 0:
raise ValueError(f"No indices found in index_dict.")
if (
isinstance(self.indices, list)
and self.indices[-1] > self.cum_length[-1] - 1
):
raise ValueError(
f"Indices exceed cum_length. {self.indices[-1]} > {self.cum_length[-1] - 1}"
)
if len(self.group_length) > 0:
if len(self.arrow_files) != len(self.group_length):
raise ValueError(
f"Length of arrow_files and group_length does not match. {len(self.arrow_files)} != {len(self.group_length)}"
)
if sum(self.group_length) != len(self.indices):
raise ValueError(
f"Sum of group_length does not match length of indices. {sum(self.group_length)} != {len(self.indices)}"
)
def encode(self):
# Encode arrow files
print("Encoding arrow files...")
arrow_files = []
for arrow_file in tqdm(self.arrow_files):
shortname = arrow_file
arrow_files.append(shortname)
self.arrow_files = arrow_files
# Calculate group_length
print("Calculating group length...")
if isinstance(self.indices, list):
if len(self.group_length) == 0:
self.group_length = self.calc_group_length(
self.indices, self.cum_length
)
else:
print("Group length already calculated, skip.")
elif isinstance(self.indices, dict):
if not isinstance(self.group_length, dict):
self.group_length = {}
for k, v in self.indices.items():
print(f"Calculating group length for {k}...")
if k not in self.group_length or len(self.group_length[k]) == 0:
self.group_length[k] = self.calc_group_length(v, self.cum_length)
else:
print("Group length already calculated, skip.")
else:
raise ValueError(
f"Expected indices type list or dict, got {type(self.indices)}."
)
return {
"data_type": self.data_type,
"config_file": self.config_file if self.config_file is not None else "",
"indices_file": "",
"arrow_files": self.arrow_files,
"cum_length": self.cum_length,
"group_length": self.group_length,
"indices": self.indices,
"example_indices": [],
}
def to_index_v2(self):
return ArrowIndexV2(res_dict=self.encode())
def build(self, save_path):
return self.save(save_path)
def save(self, save_path):
"""
Make index v2 from an index dict.
Parameters
----------
save_path: str or pathlib.Path
The path to save the index file.
"""
index_dict = self.encode()
# Ensure the indices either a list or a dict.
save_path = Path(save_path)
save_path.parent.mkdir(exist_ok=True, parents=True)
if (
isinstance(index_dict["indices"], list)
and len(index_dict["indices"]) > self.max_indices
):
self.example_indices = index_dict["indices"][: self.example_num]
indices_to_save = {"x": index_dict["indices"]}
index_dict["indices"] = []
elif isinstance(index_dict["indices"], dict):
indices_to_save = index_dict["indices"]
index_dict["indices"] = {}
num_keys = len(indices_to_save)
example_num_per_key = max(self.example_num // num_keys, 10)
index_dict["example_indices"] = {
k: v[:example_num_per_key] for k, v in index_dict["indices"].items()
}
else:
indices_to_save = None
# save indices
if indices_to_save is not None:
indices_file = save_path.parent / f"{save_path.stem}.index"
indices_dict = {k: np.array(v) for k, v in indices_to_save.items()}
np.savez_compressed(indices_file, **indices_dict)
index_dict["indices_file"] = indices_file.name + ".npz"
with save_path.open("w") as f:
json.dump(index_dict, f, indent=4, ensure_ascii=False)
@staticmethod
def calc_group_length(indices, cum_length):
group_lengths = []
cum_ind = 0
count = 0
for index in tqdm(indices):
if index < cum_length[cum_ind]:
# index is still in the current group
count += 1
else:
# index has exceeded the current group, need to switch to the next group
group_lengths.append(count)
cum_ind += 1
# if the index exceeds the next group, continue to switch to the next group
while index >= cum_length[cum_ind]:
group_lengths.append(0)
cum_ind += 1
count = 1
# The indices array is exhausted, and the last group containing the index should also be added.
group_lengths.append(count)
assert len(group_lengths) <= len(cum_length), (
len(group_lengths),
len(cum_length),
)
# Check if the number of groups is less than the number of cum_length,
# then the last n groups are empty and need to be filled with zeros.
if len(group_lengths) < len(cum_length):
group_lengths.extend([0] * (len(cum_length) - len(group_lengths)))
return group_lengths
import math
import torch.distributed as dist
from torch.utils.data.distributed import DistributedSampler
class BlockDistributedSampler(DistributedSampler):
def __init__(
self,
dataset,
num_replicas=None,
rank=None,
shuffle=True,
seed=0,
drop_last=False,
batch_size=-1,
start_index=0,
):
super().__init__(dataset, num_replicas, rank, shuffle, seed, drop_last)
if num_replicas is None:
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
num_replicas = dist.get_world_size()
if rank is None:
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
rank = dist.get_rank()
if rank >= num_replicas or rank < 0:
raise ValueError(
"Invalid rank {}, rank should be in the interval"
" [0, {}]".format(rank, num_replicas - 1)
)
if batch_size == -1:
raise ValueError("batch_size should be specified")
self.dataset = dataset
self.num_replicas = num_replicas
self.rank = rank
self.epoch = 0
self.drop_last = drop_last
self.shuffle = shuffle
self.seed = seed
self.batch_size = batch_size
self._start_index = start_index
self.recompute_sizes()
@property
def start_index(self):
return self._start_index
@start_index.setter
def start_index(self, value):
self._start_index = value
self.recompute_sizes()
def recompute_sizes(self):
self.num_samples = (
len(self.dataset) // self.batch_size * self.batch_size // self.num_replicas
- self._start_index
)
self.total_size = self.num_samples * self.num_replicas
def __iter__(self):
indices = list(range(len(self.dataset))) # type: ignore[arg-type]
raw_num_samples = (
len(indices) // self.batch_size * self.batch_size // self.num_replicas
)
raw_total_size = raw_num_samples * self.num_replicas
indices = indices[:raw_total_size]
# We require that the dataset size is divisible by batch_size * num_replicas
# This is naturally satisfied when using index_kits.
# In future, we can remove this assertion.
assert len(indices) == raw_total_size, f"{len(indices)} vs {raw_total_size}"
# subsample with start_index
indices = indices[
self.rank * raw_num_samples
+ self.start_index : (self.rank + 1) * raw_num_samples
]
assert (
len(indices) + self.start_index == raw_num_samples
), f"{len(indices) + self.start_index} vs {raw_num_samples}"
# This is a sequential sampler. The shuffle operation is done by the dataset itself.
return iter(indices)
class DistributedSamplerWithStartIndex(DistributedSampler):
def __init__(
self,
dataset,
num_replicas=None,
rank=None,
shuffle=True,
seed=0,
drop_last=False,
start_index=0,
):
super().__init__(dataset, num_replicas, rank, shuffle, seed, drop_last)
if num_replicas is None:
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
num_replicas = dist.get_world_size()
if rank is None:
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
rank = dist.get_rank()
if rank >= num_replicas or rank < 0:
raise ValueError(
"Invalid rank {}, rank should be in the interval"
" [0, {}]".format(rank, num_replicas - 1)
)
self.dataset = dataset
self.num_replicas = num_replicas
self.rank = rank
self.epoch = 0
self.drop_last = drop_last
self._start_index = start_index
self.recompute_sizes()
self.shuffle = shuffle
self.seed = seed
@property
def start_index(self):
return self._start_index
@start_index.setter
def start_index(self, value):
self._start_index = value
self.recompute_sizes()
def recompute_sizes(self):
# If the dataset length is evenly divisible by # of replicas, then there
# is no need to drop any data, since the dataset will be split equally.
if self.drop_last and (len(self.dataset) - self._start_index) % self.num_replicas != 0: # type: ignore[arg-type]
# Split to nearest available length that is evenly divisible.
# This is to ensure each rank receives the same amount of data when
# using this Sampler.
self.num_samples = math.ceil(
((len(self.dataset) - self._start_index) - self.num_replicas) / self.num_replicas # type: ignore[arg-type]
)
else:
self.num_samples = math.ceil((len(self.dataset) - self._start_index) / self.num_replicas) # type: ignore[arg-type]
self.total_size = self.num_samples * self.num_replicas
def __iter__(self):
indices = list(range(self._start_index, len(self.dataset))) # type: ignore[arg-type]
if not self.drop_last:
# add extra samples to make it evenly divisible
padding_size = self.total_size - len(indices)
if padding_size <= len(indices):
indices += indices[:padding_size]
else:
indices += (indices * math.ceil(padding_size / len(indices)))[
:padding_size
]
else:
# remove tail of data to make it evenly divisible.
indices = indices[: self.total_size]
assert len(indices) == self.total_size
# subsample with start_index
indices = indices[self.rank : self.total_size : self.num_replicas]
assert len(indices) == self.num_samples
return iter(indices)
import re
try:
from setuptools import setup
except ImportError:
from distutils.core import setup
with open("index_kits/__init__.py", "r") as file:
regex_version = r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]'
version = re.search(regex_version, file.read(), re.MULTILINE).group(1)
setup(
name="index_kits",
version=version,
author="jarvizhang",
author_email="jarvizhang@tencent.com",
description="An index kits for streaming reading arrow data.",
packages=["index_kits", "index_kits/dataset"],
scripts=["bin/idk"],
install_requires=[
"pillow>=9.3.0",
"tqdm>=4.60.0",
"pyarrow>=10.0.1",
"torch>=1.9",
],
python_requires=">=3.8.12",
)
TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT
Tencent Hunyuan DiT Release Date: 14 May 2024
THIS LICENSE AGREEMENT DOES NOT APPLY IN THE EUROPEAN UNION AND IS EXPRESSLY LIMITED TO THE TERRITORY, AS DEFINED BELOW.
By clicking to agree or by using, reproducing, modifying, distributing, performing or displaying any portion or element of the Tencent Hunyuan Works, including via any Hosted Service, You will be deemed to have recognized and accepted the content of this Agreement, which is effective immediately.
1. DEFINITIONS.
a. “Acceptable Use Policy” shall mean the policy made available by Tencent as set forth in the Exhibit A.
b. “Agreement” shall mean the terms and conditions for use, reproduction, distribution, modification, performance and displaying of Tencent Hunyuan Works or any portion or element thereof set forth herein.
c. “Documentation” shall mean the specifications, manuals and documentation for Tencent Hunyuan made publicly available by Tencent.
d. “Hosted Service” shall mean a hosted service offered via an application programming interface (API), web access, or any other electronic or remote means.
e. “Licensee,” “You” or “Your” shall mean a natural person or legal entity exercising the rights granted by this Agreement and/or using the Tencent Hunyuan Works for any purpose and in any field of use.
f. “Materials” shall mean, collectively, Tencent’s proprietary Tencent Hunyuan and Documentation (and any portion thereof) as made available by Tencent under this Agreement.
g. “Model Derivatives” shall mean all: (i) modifications to Tencent Hunyuan or any Model Derivative of Tencent Hunyuan; (ii) works based on Tencent Hunyuan or any Model Derivative of Tencent Hunyuan; or (iii) any other machine learning model which is created by transfer of patterns of the weights, parameters, operations, or Output of Tencent Hunyuan or any Model Derivative of Tencent Hunyuan, to that model in order to cause that model to perform similarly to Tencent Hunyuan or a Model Derivative of Tencent Hunyuan, including distillation methods, methods that use intermediate data representations, or methods based on the generation of synthetic data Outputs by Tencent Hunyuan or a Model Derivative of Tencent Hunyuan for training that model. For clarity, Outputs by themselves are not deemed Model Derivatives.
h. “Output” shall mean the information and/or content output of Tencent Hunyuan or a Model Derivative that results from operating or otherwise using Tencent Hunyuan or a Model Derivative, including via a Hosted Service.
i. “Tencent,” “We” or “Us” shall mean THL A29 Limited.
j. “Tencent Hunyuan” shall mean the large language models, text/image/video/audio/3D generation models, and multimodal large language models and their software and algorithms, including trained model weights, parameters (including optimizer states), machine-learning model code, inference-enabling code, training-enabling code, fine-tuning enabling code and other elements of the foregoing made publicly available by Us, including, without limitation to, Tencent Hunyuan DiT released at https://huggingface.co/Tencent-Hunyuan/HunyuanDiT.
k. “Tencent Hunyuan Works” shall mean: (i) the Materials; (ii) Model Derivatives; and (iii) all derivative works thereof.
l. “Territory” shall mean the worldwide territory, excluding the territory of the European Union.
m. “Third Party” or “Third Parties” shall mean individuals or legal entities that are not under common control with Us or You.
n. “including” shall mean including but not limited to.
2. GRANT OF RIGHTS.
We grant You, for the Territory only, a non-exclusive, non-transferable and royalty-free limited license under Tencent’s intellectual property or other rights owned by Us embodied in or utilized by the Materials to use, reproduce, distribute, create derivative works of (including Model Derivatives), and make modifications to the Materials, only in accordance with the terms of this Agreement and the Acceptable Use Policy, and You must not violate (or encourage or permit anyone else to violate) any term of this Agreement or the Acceptable Use Policy.
3. DISTRIBUTION.
You may, subject to Your compliance with this Agreement, distribute or make available to Third Parties the Tencent Hunyuan Works, exclusively in the Territory, provided that You meet all of the following conditions:
a. You must provide all such Third Party recipients of the Tencent Hunyuan Works or products or services using them a copy of this Agreement;
b. You must cause any modified files to carry prominent notices stating that You changed the files;
c. You are encouraged to: (i) publish at least one technology introduction blogpost or one public statement expressing Your experience of using the Tencent Hunyuan Works; and (ii) mark the products or services developed by using the Tencent Hunyuan Works to indicate that the product/service is “Powered by Tencent Hunyuan”; and
d. All distributions to Third Parties (other than through a Hosted Service) must be accompanied by a “Notice” text file that contains the following notice: “Tencent Hunyuan is licensed under the Tencent Hunyuan Community License Agreement, Copyright © 2024 Tencent. All Rights Reserved. The trademark rights of “Tencent Hunyuan” are owned by Tencent or its affiliate.”
You may add Your own copyright statement to Your modifications and, except as set forth in this Section and in Section 5, may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Model Derivatives as a whole, provided Your use, reproduction, modification, distribution, performance and display of the work otherwise complies with the terms and conditions of this Agreement (including as regards the Territory). If You receive Tencent Hunyuan Works from a Licensee as part of an integrated end user product, then this Section 3 of this Agreement will not apply to You.
4. ADDITIONAL COMMERCIAL TERMS.
If, on the Tencent Hunyuan version release date, the monthly active users of all products or services made available by or for Licensee is greater than 100 million monthly active users in the preceding calendar month, You must request a license from Tencent, which Tencent may grant to You in its sole discretion, and You are not authorized to exercise any of the rights under this Agreement unless or until Tencent otherwise expressly grants You such rights.
5. RULES OF USE.
a. Your use of the Tencent Hunyuan Works must comply with applicable laws and regulations (including trade compliance laws and regulations) and adhere to the Acceptable Use Policy for the Tencent Hunyuan Works, which is hereby incorporated by reference into this Agreement. You must include the use restrictions referenced in these Sections 5(a) and 5(b) as an enforceable provision in any agreement (e.g., license agreement, terms of use, etc.) governing the use and/or distribution of Tencent Hunyuan Works and You must provide notice to subsequent users to whom You distribute that Tencent Hunyuan Works are subject to the use restrictions in these Sections 5(a) and 5(b).
b. You must not use the Tencent Hunyuan Works or any Output or results of the Tencent Hunyuan Works to improve any other large language model (other than Tencent Hunyuan or Model Derivatives thereof).
c. You must not use, reproduce, modify, distribute, or display the Tencent Hunyuan Works, Output or results of the Tencent Hunyuan Works outside the Territory. Any such use outside the Territory is unlicensed and unauthorized under this Agreement.
6. INTELLECTUAL PROPERTY.
a. Subject to Tencent’s ownership of Tencent Hunyuan Works made by or for Tencent and intellectual property rights therein, conditioned upon Your compliance with the terms and conditions of this Agreement, as between You and Tencent, You will be the owner of any derivative works and modifications of the Materials and any Model Derivatives that are made by or for You.
b. No trademark licenses are granted under this Agreement, and in connection with the Tencent Hunyuan Works, Licensee may not use any name or mark owned by or associated with Tencent or any of its affiliates, except as required for reasonable and customary use in describing and distributing the Tencent Hunyuan Works. Tencent hereby grants You a license to use “Tencent Hunyuan” (the “Mark”) in the Territory solely as required to comply with the provisions of Section 3(c), provided that You comply with any applicable laws related to trademark protection. All goodwill arising out of Your use of the Mark will inure to the benefit of Tencent.
c. If You commence a lawsuit or other proceedings (including a cross-claim or counterclaim in a lawsuit) against Us or any person or entity alleging that the Materials or any Output, or any portion of any of the foregoing, infringe any intellectual property or other right owned or licensable by You, then all licenses granted to You under this Agreement shall terminate as of the date such lawsuit or other proceeding is filed. You will defend, indemnify and hold harmless Us from and against any claim by any Third Party arising out of or related to Your or the Third Party’s use or distribution of the Tencent Hunyuan Works.
d. Tencent claims no rights in Outputs You generate. You and Your users are solely responsible for Outputs and their subsequent uses.
7. DISCLAIMERS OF WARRANTY AND LIMITATIONS OF LIABILITY.
a. We are not obligated to support, update, provide training for, or develop any further version of the Tencent Hunyuan Works or to grant any license thereto.
b. UNLESS AND ONLY TO THE EXTENT REQUIRED BY APPLICABLE LAW, THE TENCENT HUNYUAN WORKS AND ANY OUTPUT AND RESULTS THEREFROM ARE PROVIDED “AS IS” WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND INCLUDING ANY WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, COURSE OF DEALING, USAGE OF TRADE, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING, REPRODUCING, MODIFYING, PERFORMING, DISPLAYING OR DISTRIBUTING ANY OF THE TENCENT HUNYUAN WORKS OR OUTPUTS AND ASSUME ANY AND ALL RISKS ASSOCIATED WITH YOUR OR A THIRD PARTY’S USE OR DISTRIBUTION OF ANY OF THE TENCENT HUNYUAN WORKS OR OUTPUTS AND YOUR EXERCISE OF RIGHTS AND PERMISSIONS UNDER THIS AGREEMENT.
c. TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL TENCENT OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, FOR ANY DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, EXEMPLARY, CONSEQUENTIAL OR PUNITIVE DAMAGES, OR LOST PROFITS OF ANY KIND ARISING FROM THIS AGREEMENT OR RELATED TO ANY OF THE TENCENT HUNYUAN WORKS OR OUTPUTS, EVEN IF TENCENT OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE FOREGOING.
8. SURVIVAL AND TERMINATION.
a. The term of this Agreement shall commence upon Your acceptance of this Agreement or access to the Materials and will continue in full force and effect until terminated in accordance with the terms and conditions herein.
b. We may terminate this Agreement if You breach any of the terms or conditions of this Agreement. Upon termination of this Agreement, You must promptly delete and cease use of the Tencent Hunyuan Works. Sections 6(a), 6(c), 7 and 9 shall survive the termination of this Agreement.
9. GOVERNING LAW AND JURISDICTION.
a. This Agreement and any dispute arising out of or relating to it will be governed by the laws of the Hong Kong Special Administrative Region of the People’s Republic of China, without regard to conflict of law principles, and the UN Convention on Contracts for the International Sale of Goods does not apply to this Agreement.
b. Exclusive jurisdiction and venue for any dispute arising out of or relating to this Agreement will be a court of competent jurisdiction in the Hong Kong Special Administrative Region of the People’s Republic of China, and Tencent and Licensee consent to the exclusive jurisdiction of such court with respect to any such dispute.
EXHIBIT A
ACCEPTABLE USE POLICY
Tencent reserves the right to update this Acceptable Use Policy from time to time.
Last modified: [insert date]
Tencent endeavors to promote safe and fair use of its tools and features, including Tencent Hunyuan. You agree not to use Tencent Hunyuan or Model Derivatives:
1. Outside the Territory;
2. In any way that violates any applicable national, federal, state, local, international or any other law or regulation;
3. To harm Yourself or others;
4. To repurpose or distribute output from Tencent Hunyuan or any Model Derivatives to harm Yourself or others;
5. To override or circumvent the safety guardrails and safeguards We have put in place;
6. For the purpose of exploiting, harming or attempting to exploit or harm minors in any way;
7. To generate or disseminate verifiably false information and/or content with the purpose of harming others or influencing elections;
8. To generate or facilitate false online engagement, including fake reviews and other means of fake online engagement;
9. To intentionally defame, disparage or otherwise harass others;
10. To generate and/or disseminate malware (including ransomware) or any other content to be used for the purpose of harming electronic systems;
11. To generate or disseminate personal identifiable information with the purpose of harming others;
12. To generate or disseminate information (including images, code, posts, articles), and place the information in any public context (including –through the use of bot generated tweets), without expressly and conspicuously identifying that the information and/or content is machine generated;
13. To impersonate another individual without consent, authorization, or legal right;
14. To make high-stakes automated decisions in domains that affect an individual’s safety, rights or wellbeing (e.g., law enforcement, migration, medicine/health, management of critical infrastructure, safety components of products, essential services, credit, employment, housing, education, social scoring, or insurance);
15. In a manner that violates or disrespects the social ethics and moral standards of other countries or regions;
16. To perform, facilitate, threaten, incite, plan, promote or encourage violent extremism or terrorism;
17. For any use intended to discriminate against or harm individuals or groups based on protected characteristics or categories, online or offline social behavior or known or predicted personal or personality characteristics;
18. To intentionally exploit any of the vulnerabilities of a specific group of persons based on their age, social, physical or mental characteristics, in order to materially distort the behavior of a person pertaining to that group in a manner that causes or is likely to cause that person or another person physical or psychological harm;
19. For military purposes;
20. To engage in the unauthorized or unlicensed practice of any profession including, but not limited to, financial, legal, medical/health, or other professional practices.
\ No newline at end of file
Usage and Legal Notices:
Tencent is pleased to support the open source community by making Tencent Hunyuan available.
Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. The below software and/or models in this distribution may have been modified by THL A29 Limited ("Tencent Modifications"). All Tencent Modifications are Copyright (C) THL A29 Limited.
Tencent Hunyuan is licensed under the Tencent Hunyuan Community License Agreement except for the third-party components listed below. Tencent Hunyuan does not impose any additional limitations beyond what is outlined in the repsective licenses of these third-party components. Users must comply with all terms and conditions of original licenses of these third-party components and must ensure that the usage of the third party components adheres to all relevant laws and regulations.
For avoidance of doubts, Tencent Hunyuan means the large language models and their software and algorithms, including trained model weights, parameters (including optimizer states), machine-learning model code, inference-enabling code, training-enabling code, fine-tuning enabling code and other elements of the foregoing made publicly available by Tencent in accordance with Tencent Hunyuan Community License Agreement.
Other dependencies and licenses:
Open Source Software Licensed under the BSD 3-Clause License and Other Licenses of the Third-Party Components therein:
--------------------------------------------------------------------
1. torch
Copyright (c) 2016- Facebook, Inc (Adam Paszke)
Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
Copyright (c) 2011-2013 NYU (Clement Farabet)
Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
Copyright (c) 2006 Idiap Research Institute (Samy Bengio)
Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
Terms of the BSD 3-Clause:
--------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
For the license of other third party components, please refer to the following URL:
https://github.com/pytorch/pytorch/blob/v1.13.1/NOTICE
Open Source Software Licensed under the BSD 3-Clause License and Other Licenses of the Third-Party Components therein:
--------------------------------------------------------------------
1. pandas
Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
All rights reserved.
Copyright (c) 2011-2023, Open source contributors.
A copy of the BSD 3-Clause is included in this file.
For the license of other third party components, please refer to the following URL:
https://github.com/pandas-dev/pandas/tree/v2.0.3/LICENSES
Open Source Software Licensed under the BSD 3-Clause License and Other Licenses of the Third-Party Components therein:
--------------------------------------------------------------------
1. numpy
Copyright (c) 2005-2022, NumPy Developers.
All rights reserved.
A copy of the BSD 3-Clause is included in this file.
For the license of other third party components, please refer to the following URL:
https://github.com/numpy/numpy/blob/v1.24.4/LICENSES_bundled.txt
Open Source Software/Model Licensed under the BSD 3-Clause License and Other Licenses of the Third-Party Components therein:
--------------------------------------------------------------------
1. Megatron-LM
Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
A copy of the BSD 3-Clause is included in this file.
For the license of other third party components, please refer to the following URL:
https://github.com/NVIDIA/Megatron-LM/blob/main/LICENSE
Open Source Software/Models Licensed under the Apache License Version 2.0:
The below software in this distribution may have been modified by THL A29 Limited ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2023 THL A29 Limited.
--------------------------------------------------------------------
1. diffusers
Copyright (c) diffusers original author and authors
Please note this software has been modified by Tencent in this distribution.
2. transformers
Copyright (c) transformers original author and authors
3. timm
Copyright 2019 Ross Wightman
4. text-to-text-transfer-transformer
Copyright (c) text-to-text-transfer-transformer original author and authors
Please note this software has been modified by Tencent in this distribution.
5. pytorch-fid
Copyright (c) pytorch-fid original author and authors
Please note this software has been modified by Tencent in this distribution.
6. Image-Quality-Assessment-Toolbox
Copyright 2021 Qunliang Xing
7. accelerate
Copyright (c) accelerate original author and authors
8. IP-Adapter
Copyright (c) IP-Adapter original author and authors
Please note this software has been modified by Tencent in this distribution.
9. mT5
Copyright (c) mT5 original author and authors
10. Mistral-7B
Copyright (c) 2024 Mistral AI, All rights reserved
11. peft
Copyright 2023 The HuggingFace Team. All rights reserved.
Terms of the Apache License Version 2.0:
--------------------------------------------------------------------
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
You must give any other recipients of the Work or Derivative Works a copy of this License; and
You must cause any modified files to carry prominent notices stating that You changed the files; and
You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Open Source Software/Model Licensed under the BSD 3-Clause License:
--------------------------------------------------------------------
1. torchvision
Copyright (c) Soumith Chintala 2016,
All rights reserved.
2. flash_attn
Copyright (c) 2022, the respective contributors, as shown by the AUTHORS file.
All rights reserved.
3. apex
Copyright (c) apex original author and authors
A copy of the BSD 3-Clause is included in this file.
Open Source Software Licensed under the HPND License:
--------------------------------------------------------------------
1. Pillow
Copyright © 2010-2023 by Jeffrey A. Clark (Alex) and contributors.
Terms of the HPND License:
--------------------------------------------------------------------
The Python Imaging Library (PIL) is
Copyright © 1997-2011 by Secret Labs AB
Copyright © 1995-2011 by Fredrik Lundh
Pillow is the friendly PIL fork. It is
Copyright © 2010-2023 by Jeffrey A. Clark (Alex) and contributors.
Like PIL, Pillow is licensed under the open source HPND License:
By obtaining, using, and/or copying this software and/or its associated
documentation, you agree that you have read, understood, and will comply
with the following terms and conditions:
Permission to use, copy, modify and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appears in all copies, and that
both that copyright notice and this permission notice appear in supporting
documentation, and that the name of Secret Labs AB or the author not be
used in advertising or publicity pertaining to distribution of the software
without specific, written prior permission.
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR ANY SPECIAL,
INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
Open Source Software/Model Licensed under the MIT License:
The below software in this distribution may have been modified by Tencent.
--------------------------------------------------------------------
1. einops
Copyright (c) 2018 Alex Rogozhnikov
2. loguru
Copyright (c) 2017
3. Chinese-CLIP
Copyright (c) 2012-2022 OFA-Sys Team
Copyright (c) 2012-2022 Gabriel Ilharco, Mitchell Wortsman, Nicholas Carlini, Rohan Taori, Achal Dave, Vaishaal Shankar, John Miller, Hongseok Namkoong, Hannaneh Hajishirzi, Ali Farhadi, Ludwig Schmidt
4. DeepSpeed
Copyright (c) Microsoft Corporation.
5. glid-3-xl
Copyright (c) 2021 OpenAI
6. lazysizes
Copyright (c) 2015 Alexander Farkas
7. thingsvision
Copyright (c) 2021 Vision and Computational Cognition Group
8. sd-vae-ft-ema
Copyright (c) sd-vae-ft-ema original author and authors
9. ComfyUI-Diffusers
Copyright (c) 2023 Limitex
10. glide-text2im
Copyright (c) 2021 OpenAI
11. improved-diffusion
Copyright (c) 2021 OpenAI
Terms of the MIT License:
--------------------------------------------------------------------
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Open Source Software Licensed under the MIT License and Other Licenses of the Third-Party Components therein:
--------------------------------------------------------------------
1. tqdm
Copyright (c) 2013 noamraph
A copy of the MIT is included in this file.
For the license of other third party components, please refer to the following URL:
https://github.com/tqdm/tqdm/blob/v4.66.1/LICENCE
Open Source Software/Model Licensed under the MIT License and Other Licenses of the Third-Party Components therein:
The below software in this distribution may have been modified by Tencent.
--------------------------------------------------------------------
1. generative-models
Copyright (c) 2023 Stability AI
A copy of the MIT is included in this file.
For the license of other third party components, please refer to the following URL:
https://github.com/Stability-AI/generative-models/blob/main/LICENSE-CODE
https://github.com/Stability-AI/generative-models/tree/main/model_licenses
Open Source Software/Model Licensed under the Apache License Version 2.0 and Other Licenses of the Third-Party Components therein:
--------------------------------------------------------------------
1. pyarrow
Copyright 2016-2024 The Apache Software Foundation
A copy of the Apache License Version 2.0 is included in this file.
For the license of other third party components, please refer to the following URL:
https://github.com/apache/arrow/blob/main/NOTICE.txt
Open Source Software Licensed under the MIT License and Other Licenses of the Third-Party Components therein:
The below software in this distribution may have been modified by THL A29 Limited ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2023 THL A29 Limited.
--------------------------------------------------------------------
1. opencv-python
Copyright (c) Olli-Pekka Heinisuo
Terms of the MIT:
--------------------------------------------------------------------
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
For the license of other third party components, please refer to the following URL:
https://github.com/opencv/opencv-python/blob/4.x/LICENSE-3RD-PARTY.txt
Open Source Software Licensed under the MIT License and Other Licenses of the Third-Party Components therein:
--------------------------------------------------------------------
1. onnxruntime
Copyright (c) Microsoft Corporation.
A copy of the MIT is included in this file.
For the license of other third party components, please refer to the following URL:
https://github.com/microsoft/onnxruntime/blob/v1.16.3/ThirdPartyNotices.txt
Open Source Software/Model Licensed under the Apache License Version 2.0:
The below software in this distribution may have been modified by Tencent.
--------------------------------------------------------------------
1. dwpose
Copyright 2018-2020 Open-MMLab.
Please note this software has been modified by Tencent in this distribution.
Terms of the Apache License Version 2.0:
--------------------------------------------------------------------
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
You must give any other recipients of the Work or Derivative Works a copy of this License; and
You must cause any modified files to carry prominent notices stating that You changed the files; and
You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Open Source Software Licensed under the License agreement for matplotlib and later and Other Licenses of the Third-Party Components therein:
--------------------------------------------------------------------
1. matplotlib
Copyright (c) 2012- Matplotlib Development Team; All Rights Reserved
Terms of the License agreement for matplotlib versions 1.3.0 and later:
--------------------------------------------------------------------
License agreement for matplotlib versions 1.3.0 and later
=========================================================
1. This LICENSE AGREEMENT is between the Matplotlib Development Team
("MDT"), and the Individual or Organization ("Licensee") accessing and
otherwise using matplotlib software in source or binary form and its
associated documentation.
2. Subject to the terms and conditions of this License Agreement, MDT
hereby grants Licensee a nonexclusive, royalty-free, world-wide license
to reproduce, analyze, test, perform and/or display publicly, prepare
derivative works, distribute, and otherwise use matplotlib
alone or in any derivative version, provided, however, that MDT's
License Agreement and MDT's notice of copyright, i.e., "Copyright (c)
2012- Matplotlib Development Team; All Rights Reserved" are retained in
matplotlib alone or in any derivative version prepared by
Licensee.
3. In the event Licensee prepares a derivative work that is based on or
incorporates matplotlib or any part thereof, and wants to
make the derivative work available to others as provided herein, then
Licensee hereby agrees to include in any such work a brief summary of
the changes made to matplotlib .
4. MDT is making matplotlib available to Licensee on an "AS
IS" basis. MDT MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, MDT MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB
WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
5. MDT SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR
LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING
MATPLOTLIB , OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF
THE POSSIBILITY THEREOF.
6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between MDT and
Licensee. This License Agreement does not grant permission to use MDT
trademarks or trade name in a trademark sense to endorse or promote
products or services of Licensee, or any third party.
8. By copying, installing or otherwise using matplotlib ,
Licensee agrees to be bound by the terms and conditions of this License
Agreement.
License agreement for matplotlib versions prior to 1.3.0
========================================================
1. This LICENSE AGREEMENT is between John D. Hunter ("JDH"), and the
Individual or Organization ("Licensee") accessing and otherwise using
matplotlib software in source or binary form and its associated
documentation.
2. Subject to the terms and conditions of this License Agreement, JDH
hereby grants Licensee a nonexclusive, royalty-free, world-wide license
to reproduce, analyze, test, perform and/or display publicly, prepare
derivative works, distribute, and otherwise use matplotlib
alone or in any derivative version, provided, however, that JDH's
License Agreement and JDH's notice of copyright, i.e., "Copyright (c)
2002-2011 John D. Hunter; All Rights Reserved" are retained in
matplotlib alone or in any derivative version prepared by
Licensee.
3. In the event Licensee prepares a derivative work that is based on or
incorporates matplotlib or any part thereof, and wants to
make the derivative work available to others as provided herein, then
Licensee hereby agrees to include in any such work a brief summary of
the changes made to matplotlib.
4. JDH is making matplotlib available to Licensee on an "AS
IS" basis. JDH MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, JDH MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB
WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
5. JDH SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR
LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING
MATPLOTLIB , OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF
THE POSSIBILITY THEREOF.
6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between JDH and
Licensee. This License Agreement does not grant permission to use JDH
trademarks or trade name in a trademark sense to endorse or promote
products or services of Licensee, or any third party.
8. By copying, installing or otherwise using matplotlib,
Licensee agrees to be bound by the terms and conditions of this License
Agreement.
For the license of other third party components, please refer to the following URL:
https://github.com/matplotlib/matplotlib/blob/v3.7.5/LICENSE
### 复现指南🔥🔥🔥
```shell
# 1. 环境准备
docker pull image.sourcefind.cn:5000/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.1-rc5-rocblas101839-0811-das1.6-py3.10-20250908-rc1
# 2. 创建容器
docker run -it \
--network=host \
--hostname=localhost \
--name=HUNYUAN \
-v /opt/hyhal:/opt/hyhal:ro \
-v $PWD:/workspace \
--ipc=host \
--device=/dev/kfd \
--device=/dev/mkfd \
--device=/dev/dri \
--shm-size=512G \
--privileged \
--group-add video \
--cap-add=SYS_PTRACE \
--security-opt seccomp=unconfined \
image.sourcefind.cn:5000/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.1-rc5-rocblas101839-0811-das1.6-py3.10-20250908-rc1 \
/bin/bash
# 3. 拉取代码
git clone http://developer.sourcefind.cn/codes/bw_bestperf/hunyuan-dit.git
# 4. 获取&安装依赖
Apex:
curl -f -C - -o apex-1.5.0+das.opt1.dtk25041-cp310-cp310-linux_x86_64.whl https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/e759f4e7fbb64b10
Lightop
curl -f -C - -o lightop-0.5.0+das.dtk25041.unknown-cp310-cp310-linux_x86_64.whl https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/3ca9654a8fc1b0b5
Deepspeed
wget https://download.sourcefind.cn:65024/directlink/4/deepspeed/DAS1.6/deepspeed-0.14.2+das.opt1.dtk25041-cp310-cp310-manylinux_2_28_x86_64.whl
pip install apex-1.5.0+das.opt1.dtk25041-cp310-cp310-linux_x86_64.whl
pip install lightop-0.5.0+das.dtk25041.unknown-cp310-cp310-linux_x86_64.whl
pip install deepspeed-0.14.2+das.opt1.dtk25041-cp310-cp310-manylinux_2_28_x86_64.whl
pip install -r requirements.txt -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
# 5. 下载优化包
hipblaslt
curl -f -C - -o hipblaslt-install0925.tar.gz https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/5857030947151012
miopen
curl -f -C - -o package_0915_ubuntu.tar.gz https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/0c80d0e60b9af80d
# 6. 下载模型
模型详见:https://modelscope.cn/models/dengcao/HunyuanDiT-v1.2
pip install modelscope
modelscope download --model dengcao/HunyuanDiT-v1.2 --local_dir ./HunyuanDiT-v1.2
还需要下载vae,tokenizer和tex_encoder
cd HunyuanDiT-v1.2
wget https://dit.hunyuan.tencent.com/download/HunyuanDiT/tokenizer.zip
wget https://dit.hunyuan.tencent.com/download/HunyuanDiT/sdxl-vae-fp16-fix.zip
wget https://dit.hunyuan.tencent.com/download/HunyuanDiT/clip_text_encoder.zip
下载完模型结构如下
```
<p align="center">
<img src="19115934112c36d5d67394265d1498e2.png" height=300>
</p>
## 测试指令
```
export LD_LIBRARY_PATH=/workspace/OEM_ADVTG_TEST/hunyuan/hipblaslt-install/lib/:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/workspace/OEM_ADVTG_TEST/hunyuan/package/miopen/lib/:$LD_LIBRARY_PATH
python sample_t2i_dcu.py --model-root /workspace/OEM_ADVTG_TEST/hunyuan/HunyuanDiT-v1.2/ --batch-size 4 --infer-mode fa --prompt "青花瓷风格,一只可爱的哈士奇" --no-enhance --load-key module --image-size 1024 1024 --infer-steps 20
```
import gradio as gr
import pandas as pd
from pathlib import Path
from PIL import Image, PngImagePlugin
import sys
import numpy as np
import torch
from torchvision import transforms as T
sys.path.insert(0, str(Path(__file__).parent.parent))
import datetime
from hydit.constants import SAMPLER_FACTORY
from sample_t2i import inferencer
import os
ROOT = Path(__file__).parent.parent
SAMPLERS = list(SAMPLER_FACTORY.keys())
norm_transform = T.Compose(
[
T.ToTensor(),
T.Normalize([0.5], [0.5]),
]
)
def get_strings(lang):
lang_file = Path(f"app/lang/{lang}.csv")
strings = pd.read_csv(lang_file, header=0)
strings = strings.set_index("key")["value"].to_dict()
return strings
def get_files_with_extension(path, extension):
return {
os.path.splitext(file)[0]: os.path.join(path, file)
for file in os.listdir(path)
if os.path.isfile(os.path.join(path, file))
and any(file.endswith(ext) for ext in extension)
}
args, gen, enhancer = inferencer()
output_dir = ROOT / f"{args.output_img_path}"
os.makedirs(output_dir, exist_ok=True)
strings = get_strings(args.lang)
controlnet_list = get_files_with_extension(
args.model_root + "/t2i/controlnet",
[".pt", ".safetensors"],
)
module_list = get_files_with_extension(
args.model_root + "/t2i/model",
[".pt", ".safetensors"],
)
lora_list = get_files_with_extension(
args.model_root + "/t2i/lora",
[".pt", ".safetensors"],
)
def upgrade_dit_model_load(model):
model_path = module_list[model]
gen.args.dit_weight = model_path
gen.load_torch_weights()
def generate_metadata(
prompt,
negative_prompt,
seed,
cfg_scale,
infer_steps,
sampler,
imgW,
imgH,
controlnet_module,
control_weight,
lora_ctrls,
):
"""生成图像元数据。"""
return {
"parameters": "Power by HunYun",
"prompt": prompt,
"negative_prompt": negative_prompt,
"seed": seed,
"cfg_scale": cfg_scale,
"infer_steps": infer_steps,
"sampler": sampler,
"imgW": imgW,
"imgH": imgH,
"controlnet_module": controlnet_module,
"control_weight": control_weight,
"lora_ctrls": [
{
"lora_enabled": lora_ctrl[0],
"lora_model": lora_ctrl[1],
"lora_weight": lora_ctrl[2],
}
for lora_ctrl in zip(*[iter(lora_ctrls)] * 3)
],
"model_name": gen.model_name,
}
def infer(
prompt,
negative_prompt,
seed,
cfg_scale,
infer_steps,
sampler,
imgW,
imgH,
input_image,
controlnet_module,
control_weight,
enhance,
img_crop_type,
*lora_ctrls,
):
if enhance and enhancer is not None:
success, enhanced_prompt = enhancer(prompt)
if not success:
fail_image = Image.open(ROOT / "app/fail.png")
return fail_image
else:
enhanced_prompt = None
active_loras = [
{"model": lora_ctrls[i + 1], "weight": lora_ctrls[i + 2]}
for i in range(0, len(lora_ctrls), 3)
if lora_ctrls[i]
]
if input_image is not None:
# # Convert image to PyTorch tensor if it is a NumPy array
if isinstance(input_image, np.ndarray):
input_image = Image.fromarray(input_image).convert("RGB")
input_image = gen.pixel_perfect_resolution(
input_image, imgH, imgW, img_crop_type
)
# Apply the normalization transform
input_image = norm_transform(input_image)
# Add batch dimension and move to GPU (if available)
input_image = (
input_image.unsqueeze(0).cuda()
if torch.cuda.is_available()
else input_image.unsqueeze(0)
)
results = gen.predict(
prompt,
image=input_image,
height=imgH,
width=imgW,
seed=seed,
enhanced_prompt=enhanced_prompt,
negative_prompt=negative_prompt,
infer_steps=infer_steps,
guidance_scale=cfg_scale,
batch_size=1,
src_size_cond=None,
sampler=sampler,
control_weight=control_weight,
controlnet=controlnet_module,
lora_ctrls=active_loras,
)
image = results["images"][0]
seed = results["seed"]
metadata = generate_metadata(
prompt,
negative_prompt,
seed,
cfg_scale,
infer_steps,
sampler,
imgW,
imgH,
controlnet_module,
control_weight,
active_loras,
)
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
output_path = output_dir.joinpath(f"generated_image_{timestamp}_{seed}.png")
png_info = PngImagePlugin.PngInfo()
for k, v in metadata.items():
png_info.add_text(k, str(v))
image.save(
output_path,
pnginfo=png_info,
)
return image
def ui():
block = gr.Blocks()
description = f"""
# {strings['title']}
## {strings['desc']}
"""
with block:
with gr.Row():
gr.Markdown(description)
with gr.Row():
with gr.Column():
prompt = gr.Textbox(
label=strings["prompt"], value=strings["default prompt"], lines=3
)
with gr.Row():
imgW = gr.Slider(
label=strings["width"],
minimum=64,
maximum=4096,
value=1024,
step=64,
)
imgH = gr.Slider(
label=strings["height"],
minimum=64,
maximum=4096,
value=1024,
step=64,
)
with gr.Row():
infer_steps = gr.Slider(
label=strings["infer steps"],
minimum=1,
maximum=200,
value=100,
step=1,
)
seed = gr.Number(
label=strings["seed"],
minimum=-1,
maximum=1_000_000_000,
value=0,
step=1,
precision=0,
)
enhance = gr.Checkbox(
label=strings["enhance"],
value=enhancer is not None,
interactive=True,
)
with gr.Accordion(strings["accordion"], open=False):
with gr.Row():
negative_prompt = gr.Textbox(
label=strings["negative_prompt"],
value=gen.default_negative_prompt,
lines=2,
)
with gr.Row():
sampler = gr.Dropdown(
SAMPLERS, label=strings["sampler"], value="ddpm"
)
cfg_scale = gr.Slider(
label=strings["cfg"],
minimum=1.0,
maximum=16.0,
value=6.0,
step=1,
)
with gr.Accordion(strings["model_list"], open=False):
with gr.Row():
dit_model = gr.Dropdown(
label=strings["dit_model"],
choices=[
name
for name, path in get_files_with_extension(
args.model_root + "/t2i/model",
[".pt", ".safetensors"],
).items()
],
value=f"pytorch_model_{args.load_key}",
)
dit_model.change(
fn=upgrade_dit_model_load,
inputs=dit_model,
outputs=None,
)
with gr.Accordion(strings["lora_list"], open=False):
lora_ctrls = []
for i in range(5):
with gr.Row():
lora_enabled = gr.Checkbox(
label="Enable",
value=False,
)
lora_model = gr.Dropdown(
label=f"Lora{i+1}",
choices=["none"]
+ [name for name, path in lora_list.items()],
value="none",
)
lora_weight = gr.Slider(
label="weight",
minimum=-1,
maximum=2,
step=0.01,
value=0,
scale=5,
)
lora_ctrls += [lora_enabled, lora_model, lora_weight]
with gr.Accordion(strings["controlnet"], open=False):
with gr.Row():
controlnet_module = gr.Dropdown(
label=strings["controlnet_model"],
choices=["None"]
+ [name for name, path in controlnet_list.items()],
value="None",
)
control_weight = gr.Slider(
label=strings["Control_Weight"],
minimum=0.0,
maximum=2.0,
value=1.0,
step=0.1,
)
input_image = gr.Image(label=strings["input image"])
with gr.Row():
img_crop_type = gr.Radio(
label=strings["Crop_mode"],
choices=[
(strings["Resize"], "Resize"),
(strings["Crop_and_Resize"], "Crop_and_Resize"),
(strings["Resize_and_Fill"], "Resize_and_Fill"),
],
value="Crop_and_Resize",
)
with gr.Row():
advanced_button = gr.Button(strings["run"])
with gr.Column():
default_img = Image.open(ROOT / "app/default.png")
output_img = gr.Image(
label=strings["generated image"],
interactive=False,
format="png",
value=default_img,
)
advanced_button.click(
fn=infer,
inputs=[
prompt,
negative_prompt,
seed,
cfg_scale,
infer_steps,
sampler,
imgW,
imgH,
input_image,
controlnet_module,
control_weight,
enhance,
img_crop_type,
*lora_ctrls,
],
outputs=output_img,
)
with gr.Row():
gr.Examples(
[
["一只小猫"],
[
"现实主义风格,画面主要描述一个巴洛克风格的花瓶,带有金色的装饰边框,花瓶上盛开着各种色彩鲜艳的花,白色背景"
],
["一只聪明的狐狸走在阔叶树林里, 旁边是一条小溪, 细节真实, 摄影"],
["飞流直下三千尺,疑是银河落九天"],
[
"一只长靴猫手持亮银色的宝剑,身着铠甲,眼神坚毅,站在一堆金币上,背景是暗色调的洞穴,图像上有金币的光影点缀。"
],
["麻婆豆腐"],
["苏州园林"],
[
"一颗新鲜的草莓特写,红色的外表,表面布满许多种子,背景是淡绿色的叶子"
],
["请将“杞人忧天”的样子画出来"],
["枯藤老树昏鸦,小桥流水人家"],
[
"湖水清澈,天空湛蓝,阳光灿烂。一只优雅的白天鹅在湖边游泳。它周围有几只小鸭子,看起来非常可爱,整个画面给人一种宁静祥和的感觉。"
],
["一朵鲜艳的红色玫瑰花,花瓣撒有一些水珠,晶莹剔透,特写镜头"],
["臭豆腐"],
["九寨沟"],
["俗语“鲤鱼跃龙门”"],
[
"风格是写实,画面主要描述一个亚洲戏曲艺术家正在表演,她穿着华丽的戏服,脸上戴着精致的面具,身姿优雅,背景是古色古香的舞台,镜头是近景"
],
],
[prompt],
label=strings["examples"],
)
return block
if __name__ == "__main__":
interface = ui()
interface.launch(
server_name=args.server_name,
server_port=args.server_port,
share=args.gradio_share,
)
key,value
size,Size
sampler,Sampler
prompt,Prompt
default prompt,"A cute cat"
negative_prompt,Negative Prompt
seed,Seed
cfg,CFG Scale
infer steps,Sampling Steps
batch size,Batch Size
width cond,Width Cond
height cond,Height Cond
enhance,Prompt Enhancement
run,Submit
square,Square(1024x1024)
landscape,Landscape(1280x768)
portrait,Portrait(768x1280)
accordion,Advanced Options
generated image,HunYuanDiT Generated Image
examples,More Examples
title,Hunyuan-DiT
desc,A Powerful Multi-Resolution Diffusion Transformer with Fine-Grained Chinese Understanding
controlnet,controlnet
controlnet_model,model list
Control_Weight,Control Weight
input image,input image
width,width
height,height
\ No newline at end of file
key,value
size,尺寸
sampler,采样器
prompt,文本描述
default prompt,"一只可爱的猫"
negative_prompt,负向词
seed,种子
cfg,CFG系数
infer steps,采样步数
batch size,批大小
width cond,宽度条件
height cond,高度条件
enhance,文本增强
run,提交生成
square,方形(1024x1024)
portrait,竖屏(1216x832)
landscape,横屏(832x1216)
accordion,高级设置
generated image,生成
examples,更多示例
title,HunYuanDiT
desc,具有细粒度中文理解的高性能多分辨率 Diffusion Transformer 模型
controlnet,条件控制网络
controlnet_model,模型列表
Control_Weight,控制网络权重
input image,输入图片
model_list,模型列表
dit_model,dit模型
width,width
height,height
Crop_mode,裁剪方式
Resize,仅缩放
Crop_and_Resize,裁剪并缩放
Resize_and_Fill,缩放并填充
lora_list,lora
\ No newline at end of file
# -- coding: utf-8 --
#!/usr/bin/env python
import gradio as gr
from PIL import Image
import sys
import os
sys.path.append(os.getcwd())
import json
import numpy as np
from pathlib import Path
import io
import hashlib
import requests
import base64
import pandas as pd
from sample_t2i import inferencer
from mllm.dialoggen_demo import init_dialoggen_model, eval_model
SIZES = {
"正方形(square, 1024x1024)": (1024, 1024),
"风景(landscape, 1280x768)": (768, 1280),
"人像(portrait, 768x1280)": (1280, 768),
}
global_seed = np.random.randint(0, 10000)
# Helper Functions
def image_to_base64(image_path):
with open(image_path, "rb") as image_file:
encoded_image = base64.b64encode(image_file.read()).decode()
return encoded_image
def get_strings(lang):
lang_file = Path(f"app/lang/{lang}.csv")
strings = pd.read_csv(lang_file, header=0)
strings = strings.set_index("key")["value"].to_dict()
return strings
def get_image_md5(image):
image_data = io.BytesIO()
image.save(image_data, format="PNG")
image_data = image_data.getvalue()
md5_hash = hashlib.md5(image_data).hexdigest()
return md5_hash
# mllm调用
def request_dialogGen(
server_url="http://0.0.0.0:8080",
history_messages=[],
question="画一个木制的鸟",
image="",
):
if image != "":
image = base64.b64encode(open(image, "rb").read()).decode()
print("history_messages before request", history_messages)
headers = {"accept": "application/json", "Content-Type": "application/json"}
data = {
"text": question,
"image": image, # "image为空字符串,则进行文本对话"
"history": history_messages,
}
response = requests.post(server_url, headers=headers, json=data)
print("response", response)
response = response.json()
print(response)
response_text = response["result"]
history_messages = response["history"]
print("history_messages before request", history_messages)
return history_messages, response_text
# 画图
def image_generation(prompt, infer_steps, seed, image_size):
print(
f"prompt sent to T2I model: {prompt}, infer_steps: {infer_steps}, seed: {seed}, size: {image_size}"
)
height, width = SIZES[image_size]
results = gen.predict(
prompt,
height=height,
width=width,
seed=seed,
infer_steps=infer_steps,
batch_size=1,
)
image = results["images"][0]
file_name = get_image_md5(image)
# Save images
save_dir = Path("results")
save_dir.mkdir(exist_ok=True)
save_path = f"results/multiRound_{file_name}.png"
image.save(save_path)
encoded_image = image_to_base64(save_path)
return encoded_image
# 图文对话
def chat(history_messages, input_text):
history_messages, response_text = request_dialogGen(
history_messages=history_messages, question=input_text
)
return history_messages, response_text
#
def pipeline(input_text, state, infer_steps, seed, image_size):
# 忽略空输入
if len(input_text) == 0:
return state, state[0]
conversation = state[0]
history_messages = state[1]
system_prompt = "请先判断用户的意图,若为画图则在输出前加入<画图>:"
print(f"input history:{history_messages}")
if not isinstance(history_messages, list) and len(history_messages.messages) >= 2:
response, history_messages = enhancer(
input_text, return_history=True, history=history_messages, skip_special=True
)
else:
response, history_messages = enhancer(
input_text,
return_history=True,
history=history_messages,
skip_special=False,
)
history_messages.messages[-1][-1] = response
if "<画图>" in response:
intention_draw = True
else:
intention_draw = False
print(f"response:{response}")
print("-" * 80)
print(f"history_messages:{history_messages}")
print(f"intention_draw:{intention_draw}")
if intention_draw:
prompt = response.split("<画图>")[-1]
# 画图
image_url = image_generation(prompt, infer_steps, seed, image_size)
response = f'<img src="data:image/png;base64,{image_url}" style="display: inline-block;"><p style="font-size: 14px; color: #555; margin-top: 0;">{prompt}</p>'
conversation += [((input_text, response))]
return [conversation, history_messages], conversation
# 页面设计
def upload_image(state, image_input):
conversation = state[0]
history_messages = state[1]
input_image = Image.open(image_input.name).resize((224, 224)).convert("RGB")
input_image.save(image_input.name) # Overwrite with smaller image.
system_prompt = "请先判断用户的意图,若为画图则在输出前加入<画图>:"
history_messages, response = request_dialogGen(
question="这张图描述了什么?",
history_messages=history_messages,
image=image_input.name,
)
conversation += [
(
f'<img src="./file={image_input.name}" style="display: inline-block;">',
response,
)
]
print("conversation", conversation)
print("history_messages after uploading image", history_messages)
return [conversation, history_messages], conversation
def reset():
global global_seed
global_seed = np.random.randint(0, 10000)
return [[], []], []
def reset_last(state):
conversation, history = state[0], state[1]
conversation = conversation[:-1]
history.messages = history.messages[:-2]
return [conversation, history], conversation
if __name__ == "__main__":
# Initialize dialoggen and HunyuanDiT model
args, gen, enhancer = inferencer()
strings = get_strings(args.lang)
css = """
#chatbot { min-height: 800px; }
#save-btn {
background-image: linear-gradient(to right bottom, rgba(130,217,244, 0.9), rgba(158,231,214, 1.0));
}
#save-btn:hover {
background-image: linear-gradient(to right bottom, rgba(110,197,224, 0.9), rgba(138,211,194, 1.0));
}
#share-btn {
background-image: linear-gradient(to right bottom, rgba(130,217,244, 0.9), rgba(158,231,214, 1.0));
}
#share-btn:hover {
background-image: linear-gradient(to right bottom, rgba(110,197,224, 0.9), rgba(138,211,194, 1.0));
}
#gallery { z-index: 999999; }
#gallery img:hover {transform: scale(2.3); z-index: 999999; position: relative; padding-right: 30%; padding-bottom: 30%;}
#gallery button img:hover {transform: none; z-index: 999999; position: relative; padding-right: 0; padding-bottom: 0;}
@media (hover: none) {
#gallery img:hover {transform: none; z-index: 999999; position: relative; padding-right: 0; 0;}
}
.html2canvas-container { width: 3000px !important; height: 3000px !important; }
"""
with gr.Blocks(css=css) as demo:
DESCRIPTION = """# <a style="color: black; text-decoration: none;">多轮对话绘图 Multi-turn Text2Image Generation</a>
你可以参照[DialogGen](https://arxiv.org/abs/2403.08857),通过简单的交互式语句来进行历史图片的修改,例如:主体编辑、增加主体、删除主体、背景更换、风格转换、镜头转换、图像合并。
(You can modify historical images through simple interactive statements referred to [DialogGen](https://arxiv.org/abs/2403.08857), such as: enity edit, add object, remove object, change background, change style, change lens, and combine images. )
例如,主体编辑 (For example, enity edit) :
```none
Round1: 画一个木制的鸟
(Round1: draw a wooden bird)
Round2: 变成玻璃的
(Round2: turn into glass)
```
"""
gr.Markdown(DESCRIPTION)
gr_state = gr.State([[], []]) # conversation, chat_history
with gr.Row():
with gr.Column(scale=1, min_width=1000):
with gr.Row():
chatbot = gr.Chatbot(
elem_id="chatbot", label="DialogGen&HunyuanDiT"
)
with gr.Row():
infer_steps = gr.Slider(
label="采样步数(sampling steps)",
minimum=1,
maximum=200,
value=100,
step=1,
)
seed = gr.Number(
label="种子(seed)",
minimum=-1,
maximum=1_000_000_000,
value=666,
step=1,
precision=0,
)
size_dropdown = gr.Dropdown(
choices=[
"正方形(square, 1024x1024)",
"风景(landscape, 1280x768)",
"人像(portrait, 768x1280)",
],
value="正方形(square, 1024x1024)",
label="图片尺寸(Image Size)",
)
with gr.Row():
# image_btn = gr.UploadButton("🖼️ Upload Image", file_types=["image"])
text_input = gr.Textbox(
label="提示词(prompt)", placeholder="输入提示词(Type a prompt)"
)
with gr.Column():
submit_btn = gr.Button(
"提交(Submit)", interactive=True, variant="primary"
)
clear_last_btn = gr.Button("回退(Undo)")
clear_btn = gr.Button("全部重置(Reset All)")
with gr.Row():
gr.Examples(
[
["画一个木制的鸟"],
["一只小猫"],
[
"现实主义风格,画面主要描述一个巴洛克风格的花瓶,带有金色的装饰边框,花瓶上盛开着各种色彩鲜艳的花,白色背景"
],
[
"一只聪明的狐狸走在阔叶树林里, 旁边是一条小溪, 细节真实, 摄影"
],
["飞流直下三千尺,疑是银河落九天"],
[
"一只长靴猫手持亮银色的宝剑,身着铠甲,眼神坚毅,站在一堆金币上,背景是暗色调的洞穴,图像上有金币的光影点缀。"
],
["麻婆豆腐"],
["苏州园林"],
[
"一颗新鲜的草莓特写,红色的外表,表面布满许多种子,背景是淡绿色的叶子"
],
["枯藤老树昏鸦,小桥流水人家"],
[
"湖水清澈,天空湛蓝,阳光灿烂。一只优雅的白天鹅在湖边游泳。它周围有几只小鸭子,看起来非常可爱,整个画面给人一种宁静祥和的感觉。"
],
[
"一朵鲜艳的红色玫瑰花,花瓣撒有一些水珠,晶莹剔透,特写镜头"
],
["臭豆腐"],
["九寨沟"],
["俗语“鲤鱼跃龙门”"],
[
"风格是写实,画面主要描述一个亚洲戏曲艺术家正在表演,她穿着华丽的戏服,脸上戴着精致的面具,身姿优雅,背景是古色古香的舞台,镜头是近景"
],
],
[text_input],
label=strings["examples"],
)
gr.Markdown(
"""<p style="font-size: 20px; color: #888;">powered by <a href="https://github.com/Centaurusalpha/DialogGen" target="_blank">DialogGen</a> and <a href="https://github.com/Tencent/HunyuanDiT" target="_blank">HunyuanDiT</a></p>"""
)
text_input.submit(
pipeline,
[text_input, gr_state, infer_steps, seed, size_dropdown],
[gr_state, chatbot],
)
text_input.submit(lambda: "", None, text_input) # Reset chatbox.
submit_btn.click(
pipeline,
[text_input, gr_state, infer_steps, seed, size_dropdown],
[gr_state, chatbot],
)
submit_btn.click(lambda: "", None, text_input) # Reset chatbox.
# image_btn.upload(upload_image, [gr_state, image_btn], [gr_state, chatbot])
clear_last_btn.click(reset_last, [gr_state], [gr_state, chatbot])
clear_btn.click(reset, [], [gr_state, chatbot])
interface = demo
interface.launch(server_name="0.0.0.0", server_port=443, share=False)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment