Initial commit

2581b885 · jerrrrry · 2581b885 · 2581b885 · 2581b885 · 2581b885
Commit 2581b885 authored Feb 02, 2026 by jerrrrry
20 changed files
--- a/IndexKits/index_kits/dataset/make_dataset_core.py
+++ b/IndexKits/index_kits/dataset/make_dataset_core.py
+import json
+import pickle
+from collections import defaultdict
+from glob import glob
+from multiprocessing import Pool
+from pathlib import Path
+
+import pandas as pd
+import yaml
+
+import numpy as np
+import pyarrow as pa
+from tqdm import tqdm
+
+from index_kits.indexer import IndexV2Builder
+from index_kits.bucket import build_multi_resolution_bucket
+from index_kits.dataset.config_parse import DatasetConfig
+
+
+def get_table(arrow_file):
+    return pa.ipc.RecordBatchFileReader(pa.memory_map(arrow_file, "r")).read_all()
+
+
+def get_indices(arrow_file, repeat_times, filter_fn, repeat_fn, callback=None):
+    """
+    Get valid indices from a single arrow_file.
+
+    Parameters
+    ----------
+    arrow_file: str
+    repeat_times: int
+        Repeat remain indices multiple times.
+    filter_fn
+    callback
+
+    Returns
+    -------
+
+    """
+    try:
+        table = pa.ipc.RecordBatchFileReader(pa.memory_map(arrow_file, "r")).read_all()
+    except Exception as e:
+        print(arrow_file, e)
+        raise e
+    length = len(table)
+
+    if len(table) == 0:
+        print(f"Warning: Empty table: {arrow_file}")
+        indices = []
+        stats = {}
+
+    else:
+        # Apply filter_fn if available
+        if filter_fn is not None:
+            mask, stats, md5s = filter_fn(arrow_file, table)
+        else:
+            mask = pd.Series([True] * length)
+            stats = {}
+            md5s = None
+
+        # Apply callback function if available
+        if callback is not None:
+            mask, stats = callback(arrow_file, table, mask, stats, md5s)
+
+        # Get indices
+        if mask is not None:
+            indices = np.where(mask)[0].tolist()
+        else:
+            indices = list(range(length))
+
+        # Apply indices repeat
+        if repeat_fn is not None:
+            indices, repeat_stats = repeat_fn(
+                arrow_file, table, indices, repeat_times, md5s
+            )
+            stats.update(repeat_stats)
+
+    return arrow_file, length, indices, stats
+
+
+def load_md5_files(files, name=None):
+    if isinstance(files, str):
+        files = [files]
+    md5s = set()
+    for file in files:
+        md5s.update(Path(file).read_text().splitlines())
+    print(f"    {name} md5s: {len(md5s):,}")
+
+    return md5s
+
+
+def load_md52cls_files(files, name=None):
+    if isinstance(files, str):
+        files = [files]
+    md52cls = {}
+    for file in files:
+        with Path(file).open() as f:
+            md52cls.update(json.load(f))
+    print(f"    {name} md52cls: {len(md52cls):,}")
+
+    return md52cls
+
+
+def merge_and_build_index(data_type, src, dconfig, save_path):
+    if isinstance(src, str):
+        files = list(sorted(glob(src)))
+    else:
+        files = list(sorted(src))
+    print(f"Found {len(files):,} temp pickle files.")
+    for fname in files:
+        print(f"    {fname}")
+    arrow_files = []
+    table_lengths = []
+    indices_list = []
+    bad_stats_total = defaultdict(int)
+    total_indices = 0
+    total_processed_length = 0
+    for file_name in tqdm(files):
+        with Path(file_name).open("rb") as f:
+            data = pickle.load(f)
+        for arrow_file, table_length, indices, *args in tqdm(data, leave=False):
+            arrow_files.append(arrow_file)
+            table_lengths.append(table_length)
+            total_processed_length += table_length
+            indices_list.append(indices)
+            total_indices += len(indices)
+            if len(args) > 0 and args[0]:
+                bad_stats = args[0]
+                for k, v in bad_stats.items():
+                    bad_stats_total[k] += v
+
+    if len(bad_stats_total):
+        stats_save_dir = Path(save_path).parent
+        stats_save_dir.mkdir(parents=True, exist_ok=True)
+        stats_save_path = stats_save_dir / (Path(save_path).stem + "_stats.txt")
+        stats_save_path.write_text(
+            "\n".join([f"{k:>50s} {v}" for k, v in bad_stats_total.items()]) + "\n"
+        )
+        print(f"Save stats to {stats_save_path}")
+
+    print(f"Arrow files: {len(arrow_files):,}")
+    print(f"Processed indices: {total_processed_length:,}")
+    print(f"Valid indices: {total_indices:,}")
+
+    cum_length = 0
+    total_indices = []
+    cum_lengths = []
+    group_lengths = []
+    existed = set()
+    print(f"Accumulating indices...")
+    pbar = tqdm(
+        zip(arrow_files, table_lengths, indices_list),
+        total=len(arrow_files),
+        mininterval=1,
+    )
+    _count = 0
+    for arrow_file, table_length, indices in pbar:
+        if len(indices) > 0 and dconfig.remove_md5_dup:
+            new_indices = []
+            table = get_table(arrow_file)
+            if "md5" not in table.column_names:
+                raise ValueError(
+                    f"Column 'md5' not found in {arrow_file}. "
+                    f"When `remove_md5_dup: true` is set, md5 column is required."
+                )
+            md5s = table["md5"].to_pandas()
+            for i in indices:
+                md5 = md5s[i]
+                if md5 in existed:
+                    continue
+                existed.add(md5)
+                new_indices.append(i)
+            indices = new_indices
+
+        total_indices.extend([int(i + cum_length) for i in indices])
+        cum_length += table_length
+        cum_lengths.append(cum_length)
+        group_lengths.append(len(indices))
+
+        _count += 1
+
+        if _count % 100 == 0:
+            pbar.set_description(f"Indices: {len(total_indices):,}")
+
+    builder = IndexV2Builder(
+        data_type=data_type,
+        arrow_files=arrow_files,
+        cum_length=cum_lengths,
+        group_length=group_lengths,
+        indices=total_indices,
+        config_file=dconfig.config_file,
+    )
+    builder.build(save_path)
+    print(
+        f"Build index finished!\n\n"
+        f"            Save path: {Path(save_path).absolute()}\n"
+        f"    Number of indices: {len(total_indices)}\n"
+        f"Number of arrow files: {len(arrow_files)}\n"
+    )
+
+
+def worker_startup(rank, world_size, dconfig, prefix, work_dir, callback=None):
+    # Prepare names for this worker
+    num = (len(dconfig.names) + world_size - 1) // world_size
+    arrow_names = dconfig.names[rank * num : (rank + 1) * num]
+    print(f"Rank {rank} has {len(arrow_names):,} names.")
+
+    # Run get indices
+    print(f"Start getting indices...")
+    indices = []
+    for arrow_name, repeat_times in tqdm(
+        arrow_names, position=rank, desc=f"#{rank}: ", leave=False
+    ):
+        indices.append(
+            get_indices(
+                arrow_name, repeat_times, dconfig.filter, dconfig.repeater, callback
+            )
+        )
+
+    # Save to a temp file
+    temp_save_path = (
+        work_dir / f"data/temp_pickles/{prefix}-{rank + 1}_of_{world_size}.pkl"
+    )
+    temp_save_path.parent.mkdir(parents=True, exist_ok=True)
+    with temp_save_path.open("wb") as f:
+        pickle.dump(indices, f)
+    print(f"Rank {rank} finished. Write temporary data to {temp_save_path}")
+
+    return temp_save_path
+
+
+def startup(
+    config_file,
+    save,
+    world_size=1,
+    work_dir=".",
+    callback=None,
+    use_cache=False,
+):
+    work_dir = Path(work_dir)
+    save_path = Path(save)
+    if save_path.suffix != ".json":
+        save_path = save_path.parent / (save_path.name + ".json")
+    print(f"Using save_path: {save_path}")
+    prefix = f"{save_path.stem}"
+
+    # Parse dataset config and build the data_type list
+    dconfig = DatasetConfig(work_dir, config_file)
+    data_type = []
+    for k, v in dconfig.data_type.items():
+        data_type.extend(v)
+        print(f"{k}:")
+        for x in v:
+            print(f"    {x}")
+    if dconfig.remove_md5_dup:
+        data_type.append("Remove md5 duplicates.")
+    else:
+        data_type.append("Keep md5 duplicates.")
+
+    # Start processing
+    if not use_cache:
+        temp_pickles = []
+        if world_size == 1:
+            print(f"\nRunning in single process mode...")
+            temp_pickles.append(
+                worker_startup(
+                    rank=0,
+                    world_size=1,
+                    dconfig=dconfig,
+                    prefix=prefix,
+                    work_dir=work_dir,
+                    callback=callback,
+                )
+            )
+        else:
+            print(f"\nRunning in multi-process mode (world_size={world_size})...")
+            p = Pool(world_size)
+            temp_pickles_ = []
+            for i in range(world_size):
+                temp_pickles_.append(
+                    p.apply_async(
+                        worker_startup,
+                        args=(i, world_size, dconfig, prefix, work_dir, callback),
+                    )
+                )
+
+            for res in temp_pickles_:
+                temp_pickles.append(res.get())
+            # close
+            p.close()
+            p.join()
+    else:
+        temp_pickles = glob(
+            f"{work_dir}/data/temp_pickles/{prefix}-*_of_{world_size}.pkl"
+        )
+
+    # Merge temp pickles and build index
+    merge_and_build_index(
+        data_type,
+        temp_pickles,
+        dconfig,
+        save_path,
+    )
+
+
+def make_multireso(
+    target,
+    config_file=None,
+    src=None,
+    base_size=None,
+    reso_step=None,
+    target_ratios=None,
+    align=None,
+    min_size=None,
+    md5_file=None,
+):
+    if config_file is not None:
+        with Path(config_file).open() as f:
+            config = yaml.safe_load(f)
+    else:
+        config = {}
+    src = config.get("src", src)
+    base_size = config.get("base_size", base_size)
+    reso_step = config.get("reso_step", reso_step)
+    target_ratios = config.get("target_ratios", target_ratios)
+    align = config.get("align", align)
+    min_size = config.get("min_size", min_size)
+    md5_file = config.get("md5_file", md5_file)
+
+    if src is None:
+        raise ValueError("src must be provided in either config file or command line.")
+    if base_size is None:
+        raise ValueError("base_size must be provided.")
+    if reso_step is None and target_ratios is None:
+        raise ValueError("Either reso_step or target_ratios must be provided.")
+
+    if md5_file is not None:
+        with open(md5_file, "rb") as f:
+            md5_hw = pickle.load(f)
+        print(f"Md5 to height and width: {len(md5_hw):,}")
+    else:
+        md5_hw = None
+
+    build_multi_resolution_bucket(
+        config_file=config_file,
+        base_size=base_size,
+        reso_step=reso_step,
+        target_ratios=target_ratios,
+        align=align,
+        min_size=min_size,
+        src_index_files=src,
+        save_file=target,
+        md5_hw=md5_hw,
+    )
--- a/IndexKits/index_kits/indexer.py
+++ b/IndexKits/index_kits/indexer.py
+import bisect
+import io
+import json
+import random
+from pathlib import Path
+import ast
+from itertools import chain
+from collections import defaultdict
+from functools import partial
+from glob import glob
+
+import numpy as np
+import pyarrow as pa
+from PIL import Image
+from tqdm import tqdm
+
+
+def get_table(arrow_file):
+    """
+    Read an arrow file and return an arrow table.
+    """
+    return pa.ipc.RecordBatchFileReader(pa.memory_map(f"{arrow_file}", "r")).read_all()
+
+
+def assert_type(data, dtype, msg=""):
+    if not isinstance(data, dtype):
+        raise ValueError(f"Expected {msg} type {dtype}, got {type(data)}.")
+
+
+def ndarray_to_list(data):
+    if isinstance(data, np.ndarray):
+        data = data.tolist()
+    elif isinstance(data, dict):
+        data = {k: ndarray_to_list(v) for k, v in data.items()}
+    elif isinstance(data, (list, tuple)):
+        # Assert that all elements in data are python integer, not numpy integer.
+        # Because numpy integer cannot be serialized to json.
+        data = [int(x) for x in data]
+    else:
+        raise ValueError(
+            f"Expected data type list, tuple, dict or np.ndarray, got {type(data)}."
+        )
+    return data
+
+
+class ArrowIndexV2(object):
+    """
+    ArrowIndexV2 is a new version of ArrowIndex.
+
+    Parameters
+    ----------
+    index_file: str or pathlib.Path
+        The path of index file. Either index_file or res_dict should be provided.
+    res_dict: dict
+        The index dict. Either index_file or res_dict should be provided.
+    align: int
+        Align the length of indices to be a multiple of align. Generally align should be the batch size * world_size.
+    shadow_file_fn: callable or dict
+        A callable function to map shadow file path to a new path. If None, the shadow file path will not be
+        changed. If a dict is provided, the keys are the shadow names to call the function, and the values are the
+        callable functions to map the shadow file path to a new path. If a callable function is provided, the key
+        is 'default'.
+
+    Examples
+    --------
+    >>> index_file = 'data.json'
+    >>> indexObj = ArrowIndexV2(index_file)
+    >>> pil_image = indexObj.get_image(0)
+    >>> text = indexObj.get_attribute(0, column='text_zh')
+
+    """
+
+    def __init__(
+        self, index_file=None, res_dict=None, align=1, shadow_file_fn=None, **kwargs
+    ):
+        if index_file is not None:
+            with open(index_file, "r") as f:
+                res_dict = json.load(f)
+        elif res_dict is not None:
+            pass
+        else:
+            raise ValueError(f"Either index_file or res_dict should be provided.")
+
+        self.shadow_file_fn = {}
+        if shadow_file_fn is not None:
+            if not callable(shadow_file_fn) and not isinstance(shadow_file_fn, dict):
+                raise ValueError(
+                    "shadow_file_fn should be a callable function or a dict."
+                )
+            if callable(shadow_file_fn):
+                self.shadow_file_fn["default"] = shadow_file_fn
+            else:
+                for k, v in shadow_file_fn.items():
+                    if not callable(v):
+                        raise ValueError(f"{k} should be a callable function.")
+                    self.shadow_file_fn[k] = v
+
+        self._data = res_dict
+        self.data_type = res_dict["data_type"]
+        self.arrow_files = res_dict["arrow_files"]
+        self.cum_length = res_dict["cum_length"]
+
+        self.group_length = res_dict["group_length"]
+        error_msg = f"Expected group_length type list, got {type(self.group_length)}."
+        if isinstance(self.group_length, dict):
+            raise ValueError(
+                f"{error_msg}\nNote: You may using a multi-resolution index file. "
+                "Try `MultiResolutionBucketIndexV2` instead."
+            )
+        elif not isinstance(self.group_length, list):
+            raise ValueError(error_msg)
+
+        self.indices = res_dict["indices"]
+        if "indices_file" in res_dict:
+            self.indices_file = res_dict["indices_file"]
+            if self.indices_file != "":
+                indices_file = Path(index_file).parent / self.indices_file
+                if Path(indices_file).exists():
+                    self.indices = np.load(indices_file)["x"]
+                else:
+                    raise ValueError(
+                        f"This Index file contains an extra file {indices_file} which is missed."
+                    )
+        else:
+            self.indices_file = ""
+
+        if not isinstance(self.indices, list) and not isinstance(
+            self.indices, np.ndarray
+        ):
+            raise ValueError(
+                f"Expected indices type list or np.ndarray, got {type(self.indices)}."
+            )
+
+        if align > 1:
+            if isinstance(self.indices, np.ndarray):
+                self.indices = self.indices.tolist()
+            self.align(align)
+
+        self.indices = np.asarray(self.indices, int)
+
+        if len(self.arrow_files) != len(self.cum_length):
+            raise ValueError(
+                f"Length of arrow_files and cum_length does not match. {len(self.arrow_files)} != {len(self.cum_length)}"
+            )
+        if len(self.arrow_files) != len(self.group_length):
+            raise ValueError(
+                f"Length of arrow_files and group_length does not match. {len(self.arrow_files)} != {len(self.group_length)}"
+            )
+        if len(self.indices) == 0:
+            raise ValueError(f"No indices found in index_dict.")
+        if (
+            isinstance(self.indices, list)
+            and self.indices[-1] > self.cum_length[-1] - 1
+        ):
+            raise ValueError(f"Indices exceed cum_length.")
+
+        # Warning:
+        #  Ensure that indices are an increasing array. Currently,
+        #  no checks are performed due to the potential slowness when dealing with hundreds of millions of data points.
+
+        self.bias = self.cum_length
+
+        self._cur_arrow_file = None
+        self._cur_table_map = None
+        self._cur_table = None
+        self._index_bias = 0
+        self.last_index = -1
+
+        self._shadow_cur_arrow_file = {}
+        self._shadow_cur_table_map = {}
+        self._shadow_cur_table = {}
+        self._shadow_index_bias = {}
+        self.shadow_last_index = {}
+        for k in self.shadow_file_fn.keys():
+            self._shadow_cur_arrow_file[k] = None
+            self._shadow_cur_table_map[k] = None
+            self._shadow_cur_table[k] = None
+            self._shadow_index_bias[k] = 0
+            self.shadow_last_index[k] = -1
+
+    def __len__(self):
+        return len(self.indices)
+
+    def __repr__(self):
+        return f"""
+        ArrowIndexV2(
+            data_type        {self.data_type}
+            indices_file     {self.indices_file}
+            arrow_files      Count={len(self.arrow_files):,}  ({self.arrow_files[0]}, ...)
+            cum_length       Count={len(self.cum_length):,}  ({self.cum_length[0]}, ...)
+            group_length     Count={len(self.group_length):,}  ({self.group_length[0]}, ...)
+            indices          Count={len(self.indices):,}
+            example_indices  Count={len(self._data['example_indices']):,}
+        )
+        """
+
+    def check_exists(self):
+        for arrow_file in tqdm(self.arrow_files):
+            if not Path(arrow_file).exists():
+                print(arrow_file)
+
+    def align(self, align):
+        """
+        Repeat the index so that the length is a multiple of batch_size * world_size.
+        """
+        if len(self) % align == 0:
+            return
+
+        repeat_num = align - len(self) % align
+        if repeat_num >= len(self):
+            repeat_n = repeat_num // len(self)
+            repeat_times = [repeat_n + 1 for _ in self.indices]
+            group_length_new = [ll * (repeat_n + 1) for ll in self.group_length]
+            repeat_num -= repeat_n * len(self)
+        else:
+            repeat_times = [1 for _ in range(repeat_num)]
+            group_length_new = [ll for ll in self.group_length]
+
+        for i in range(repeat_num):
+            repeat_times[-i - 1] += 1
+
+        repeat_start_idx = len(self) - len(repeat_times)
+
+        group_id = -1
+        while group_length_new[group_id] == 0:
+            group_id -= 1
+
+        # Allocate the remaining indices that need to be repeated,
+        # while also counting how many indices have been checked.
+        # If the count reaches the group_length, switch to the next group
+
+        # The reason for paying attention to group_length is that when repeating indices,
+        # group_length also needs to be updated synchronously..
+        group_acc = 0
+        for i in range(repeat_num):
+            group_length_new[group_id] += 1
+            group_acc += 1
+            if group_acc == self.group_length[group_id]:
+                group_id -= 1
+                while group_length_new[group_id] == 0:
+                    group_id -= 1
+                group_acc = 0
+
+        temp = []
+        for i, value in enumerate(self.indices[repeat_start_idx:]):
+            temp.extend([value] * repeat_times[i])
+
+        self.indices = np.concatenate([self.indices[:repeat_start_idx], temp])
+
+        self.group_length = group_length_new
+
+    def shuffle(self, seed=None, fast=False):
+        """
+        It takes about 30 seconds for an index consisting of 100_000 arrows.
+        """
+        if fast:
+            return self.shuffle_fast(seed)
+
+        indices = self.indices.tolist()
+
+        if seed is not None:
+            state = random.getstate()
+            random.seed(seed)
+
+        indices_group_list = []
+        group_cum_len = 0
+        for group_len in self.group_length:
+            indices_group = indices[group_cum_len : group_cum_len + group_len]
+            random.shuffle(indices_group)
+            indices_group_list.append((indices_group, group_len))
+            group_cum_len += group_len
+        random.shuffle(indices_group_list)
+        self.group_length = [x[1] for x in indices_group_list]
+        self.indices = np.asarray(
+            list(chain.from_iterable([x[0] for x in indices_group_list]))
+        )
+
+        if seed is not None:
+            random.setstate(state)
+
+    def shuffle_fast(self, seed=None):
+        if seed is not None:
+            sampler = np.random.RandomState(seed)
+            sampler.shuffle(self.indices)
+        else:
+            np.random.shuffle(self.indices)
+
+    def get_table(self, arrow_file, shadow=None):
+        """
+        Read an arrow file and return an arrow table.
+        """
+        if shadow is None:
+            if self._cur_table is not None:
+                if self._cur_arrow_file == arrow_file:
+                    # This is the same arrow file. Return the cached table.
+                    return self._cur_table
+                else:
+                    # This is a different arrow file. Clear the cache.
+                    self._cur_table_map.close()
+                    self._cur_table = None
+
+            self._cur_arrow_file = arrow_file
+            self._cur_table_map = pa.memory_map(f"{arrow_file}", "r")
+            self._cur_table = pa.ipc.RecordBatchFileReader(
+                self._cur_table_map
+            ).read_all()
+            return self._cur_table
+        else:
+            if self._shadow_cur_table[shadow] is not None:
+                if self._shadow_cur_arrow_file[shadow] == arrow_file:
+                    return self._shadow_cur_table[shadow]
+                else:
+                    self._shadow_cur_table_map[shadow].close()
+                    self._shadow_cur_table[shadow] = None
+
+            self._shadow_cur_arrow_file[shadow] = arrow_file
+            self._shadow_cur_table_map[shadow] = pa.memory_map(f"{arrow_file}", "r")
+            self._shadow_cur_table[shadow] = pa.ipc.RecordBatchFileReader(
+                self._shadow_cur_table_map[shadow]
+            ).read_all()
+            return self._shadow_cur_table[shadow]
+
+    def get_arrow_file_by_index(self, index, return_index_bias=False, shadow=None):
+        i = bisect.bisect_right(self.cum_length, index)
+        arrow_file = self.arrow_files[i]
+
+        if return_index_bias:
+            if i == 0:
+                index_bias = 0
+            else:
+                index_bias = self.cum_length[i - 1]
+
+            return arrow_file, index_bias
+
+        return arrow_file
+
+    def get_arrow_file(self, ind, shadow=None):
+        """
+        Get arrow file by in-dataset index.
+
+        Parameters
+        ----------
+        ind: int
+            The in-dataset index.
+        shadow: str
+            The shadow name. If None, return the main arrow file. If not None, return the shadow arrow file.
+
+        Returns
+        -------
+        arrow_file: str
+            The arrow file path.
+        """
+        index = self.indices[ind]
+        return self.get_arrow_file_by_index(index, shadow=shadow)
+
+    def load_table_by_index(self, index, shadow=None):
+        if shadow is None:
+            if index == self.last_index:
+                return self._cur_table
+            arrow_file, self._index_bias = self.get_arrow_file_by_index(
+                index, return_index_bias=True
+            )
+            self._cur_table = self.get_table(arrow_file)
+            self.last_index = index
+            return self._cur_table
+        else:
+            if index == self.shadow_last_index[shadow]:
+                return self._shadow_cur_table[shadow]
+            shadow_arrow_file, _shadow_index_bias = self.get_arrow_file_by_index(
+                index, return_index_bias=True, shadow=shadow
+            )
+            self._shadow_index_bias[shadow] = _shadow_index_bias
+            self._shadow_cur_table[shadow] = self.get_table(
+                shadow_arrow_file, shadow=shadow
+            )
+            self.shadow_last_index[shadow] = index
+            return self._shadow_cur_table[shadow]
+
+    def get_data_by_index(
+        self, index, columns=None, allow_missing=False, return_meta=True, shadow=None
+    ):
+        table = self.load_table_by_index(index, shadow=shadow)
+        if isinstance(columns, str):
+            columns = [columns]
+        if columns is None:
+            columns = list(table.column_names)
+
+        index_bias = (
+            self._index_bias if shadow is None else self._shadow_index_bias[shadow]
+        )
+        in_arrow_index = index - index_bias
+        if return_meta:
+            cur_arrow_file = (
+                self._cur_arrow_file
+                if shadow is None
+                else self._shadow_cur_arrow_file[shadow]
+            )
+            data = {
+                "index": index,
+                "in_arrow_index": in_arrow_index,
+                "arrow_name": cur_arrow_file,
+            }
+        else:
+            data = {}
+
+        if allow_missing:
+            for col in columns:
+                if col in table.column_names:
+                    data[col] = table[col][in_arrow_index].as_py()
+        else:
+            for col in columns:
+                data[col] = table[col][in_arrow_index].as_py()
+        return data
+
+    def get_data(
+        self, ind, columns=None, allow_missing=False, return_meta=True, shadow=None
+    ):
+        """
+        Get data by in-dataset index.
+
+        Parameters
+        ----------
+        ind: int
+            The in-dataset index.
+        columns: str or list
+            The columns to be returned. If None, return all columns.
+        allow_missing: bool
+            If True, omit missing columns. If False, raise an error if the column is missing.
+        return_meta: bool
+            If True, the resulting dict will contain some meta information:
+            in-json index, in-arrow index, and arrow_name.
+        shadow: str
+            The shadow name. If None, return the main data. If not None, return the shadow data.
+
+        Returns
+        -------
+        data: dict
+            A dict containing the data.
+        """
+        index = self.indices[ind]
+        return self.get_data_by_index(
+            index,
+            columns,
+            allow_missing=allow_missing,
+            return_meta=return_meta,
+            shadow=shadow,
+        )
+
+    def get_attribute_by_index(self, index, column, shadow=None):
+        table = self.load_table_by_index(index, shadow=shadow)
+        index_bias = (
+            self._index_bias if shadow is None else self._shadow_index_bias[shadow]
+        )
+        return table[column][index - index_bias].as_py()
+
+    def get_attribute(self, ind, column, shadow=None):
+        """
+        Get single attribute by in-dataset index.
+
+        Parameters
+        ----------
+        ind: int
+            The in-dataset index.
+        column: str
+            The column name.
+        shadow: str
+            The shadow name. If None, return the main data. If not None, return the shadow data.
+
+        Returns
+        -------
+        data: can be any type
+        """
+        index = self.indices[ind]
+        return self.get_attribute_by_index(index, column, shadow=shadow)
+
+    def get_image_by_index(
+        self, index, column="image", ret_type="pil", max_size=-1, shadow=None
+    ):
+        table = self.load_table_by_index(index, shadow=shadow)
+        index_bias = (
+            self._index_bias if shadow is None else self._shadow_index_bias[shadow]
+        )
+
+        col = "image" if "image" in table.column_names else "binary"
+        temp = table[col][index - index_bias].as_py()
+        image_bytes = io.BytesIO(temp)
+        image_bytes.seek(0)
+        try:
+            # convert(RGB) has two purposes:
+            # 1. Convert the image to RGB mode. Some images are in grayscale/RGBA mode, which will cause channel
+            #    inconsistency in following processing.
+            # 2. Convert the image to RGB mode. Some images are in P mode, which will be forced to use NEAREST resample
+            #    method in resize (even if you specify LANCZOS), which will cause blurry images.
+            pil_image = Image.open(image_bytes).convert("RGB")
+        except Exception as e:
+            print(
+                f"get_image_by_index | Error: {e} ({self.get_arrow_file_by_index(index), index - index_bias})"
+            )
+            pil_image = Image.new("RGB", (256, 256), (255, 255, 255))
+
+        if max_size > 0:
+            # Resize the image to max_size. max_size is the size of long edge
+            w, h = pil_image.size
+            if w > h:
+                new_w = max_size
+                new_h = int(h * max_size / w)
+            else:
+                new_h = max_size
+                new_w = int(w * max_size / h)
+            pil_image = pil_image.resize((new_w, new_h))
+
+        if ret_type == "numpy":
+            return np.array(pil_image)
+
+        return pil_image
+
+    def get_image(self, ind, column="image", ret_type="pil", max_size=-1, shadow=None):
+        """
+        Get image by in-dataset index.
+
+        Parameters
+        ----------
+        ind: int
+            The in-dataset index.
+        column: str
+            [Deprecated] The column name of the image. Default to 'image'.
+        ret_type: str
+            The return type. Can be 'pil' or 'numpy'. Default to 'pil'.
+        max_size: int
+            If not -1, resize the image to max_size. max_size is the size of long edge.
+        shadow: str
+            The shadow name. If None, return the main image. If not None, return the shadow image.
+
+        Returns
+        -------
+        image: PIL.Image.Image or np.ndarray
+        """
+        index = self.indices[ind]
+        return self.get_image_by_index(index, column, ret_type, max_size, shadow=shadow)
+
+    def get_md5_by_index(self, index, shadow=None):
+        table = self.load_table_by_index(index, shadow=shadow)
+        index_bias = (
+            self._index_bias if shadow is None else self._shadow_index_bias[shadow]
+        )
+        return table["md5"][index - index_bias].as_py()
+
+    def get_md5(self, ind, shadow=None):
+        index = self.indices[ind]
+        return self.get_md5_by_index(index, shadow=shadow)
+
+    def get_columns_by_index(self, index, shadow=None):
+        table = self.load_table_by_index(index, shadow=shadow)
+        return table.column_names
+
+    def get_columns(self, ind, shadow=None):
+        index = self.indices[ind]
+        return self.get_columns_by_index(index, shadow=shadow)
+
+    def source_distribution(self, save_path=None, shadow=None):
+        sources = defaultdict(int)
+        for index in tqdm(self.indices):
+            source = self.get_attribute_by_index(index, "source", shadow=shadow)
+            sources[source] += 1
+
+        sources = sorted(sources.items(), key=lambda x: x[1], reverse=True)
+        for k, v in sources:
+            print(f"{k:20s} {v:10d}")
+        if save_path is not None:
+            Path(save_path).write_text(
+                "\n".join([f"{k:20s} {v:10d}" for k, v in sources])
+            )
+
+    def save(self, save_path):
+        """
+        Save the index to a json file.
+
+        Parameters
+        ----------
+        save_path: str or pathlib.Path
+            The path to save the index file.
+        """
+        builder = IndexV2Builder(
+            data_type=self.data_type,
+            arrow_files=self.arrow_files,
+            cum_length=self.cum_length,
+            indices=self.indices,
+        )
+        builder.build(save_path)
+
+    def sample_batch_indices(self, n):
+        return np.random.choice(self.indices, n)
+
+    def sample_batch(self, n, columns, progress=True, shadow=None):
+        if isinstance(n, int):
+            indices = self.sample_batch_indices(n)
+        else:
+            indices = n
+
+        if progress:
+            pbar = tqdm(indices)
+        else:
+            pbar = indices
+
+        batch_data = []
+        for i in pbar:
+            batch_data.append(self.get_data_by_index(i, columns, shadow=shadow))
+        return batch_data
+
+    @staticmethod
+    def resize_and_crop(image, target_size, resample=Image.LANCZOS, crop_type="random"):
+        """
+        Resize image without changing aspect ratio, then crop the center/random part.
+
+        Parameters
+        ----------
+        image: PIL.Image.Image
+            The input image to be resized and cropped.
+        target_size: tuple
+            The target size of the image.
+        resample:
+            The resample method. See PIL.Image.Image.resize for details. Default to Image.LANCZOS.
+        crop_type: str
+            'center' or 'random'. If 'center', crop the center part of the image. If 'random',
+            crop a random part of the image. Default to 'random'.
+
+        Returns
+        -------
+        image: PIL.Image.Image
+            The resized and cropped image.
+        crop_pos: tuple
+            The position of the cropped part. (crop_left, crop_top)
+        """
+        tw, th = target_size
+        w, h = image.size
+
+        tr = th / tw
+        r = h / w
+
+        # resize
+        if r < tr:
+            resize_height = th
+            resize_width = int(round(th / h * w))
+        else:
+            resize_width = tw
+            resize_height = int(round(tw / w * h))
+
+        image = image.resize((resize_width, resize_height), resample=resample)
+
+        if crop_type == "center":
+            crop_top = int(round((resize_height - th) / 2.0))
+            crop_left = int(round((resize_width - tw) / 2.0))
+        elif crop_type == "random":
+            crop_top = random.randint(0, resize_height - th)
+            crop_left = random.randint(0, resize_width - tw)
+        else:
+            raise ValueError(f"crop_type must be center or random, but got {crop_type}")
+
+        image = image.crop((crop_left, crop_top, crop_left + tw, crop_top + th))
+        return image, (crop_left, crop_top)
+
+
+class IndexV2Builder(object):
+    def __init__(
+        self,
+        arrow_files,
+        indices=None,
+        cum_length=None,
+        group_length=None,
+        data_type=None,
+        max_indices=5_000_000,
+        example_num=1000,
+        config_file=None,
+    ):
+        """
+        Build index v2 from an index dict.
+
+        Parameters
+        ----------
+        arrow_files: list
+            A list of arrow files.
+        indices: list or dict
+            A list of indices or a dict of indices.
+            If not provided, it will be specified as range(cum_length[-1]).
+        cum_length: list
+            A list of cumulative length of arrow files.
+            If not provided, it will be calculated from arrow files.
+        group_length: list
+            A list of group length or a dict of group length for each arrow file.
+            If not provided, it will be calculated.
+        data_type: str or list
+            Some custom information of this index.
+        max_indices: int
+            If the number of indices is larger than max_indices, the indices will be saved in a separate file.
+            Default to 5_000_000.
+        example_num: int
+            The number of examples to be saved in the index file. Default to 1000.
+        config_file: str
+            The path of config file.
+
+        Examples
+        --------
+        >>> builder = IndexV2Builder(
+        >>>     data_type='gold',
+        >>>     arrow_files=arrow_files,
+        >>>     cum_length=cum_length,
+        >>>     indices=indices,
+        >>> )
+        >>> builder.build(save_path)
+
+        """
+        self.arrow_files = arrow_files
+        self.indices = indices
+        self.cum_length = cum_length
+        self.group_length = group_length
+        self.data_type = data_type
+        self.max_indices = max_indices
+        self.example_num = example_num
+        self.config_file = config_file
+
+        if isinstance(arrow_files, str):
+            if "*" in arrow_files or "?" in arrow_files:
+                self.arrow_files = list(glob(arrow_files))
+            else:
+                self.arrow_files = [arrow_files]
+        elif isinstance(self.arrow_files, tuple):
+            self.arrow_files = list(self.arrow_files)
+        if not isinstance(self.arrow_files, list):
+            raise ValueError(
+                f"Expected arrow_files to be a list, got {type(self.arrow_files)}."
+            )
+
+        if self.cum_length is None:
+            continuous = False
+            if self.indices is None:
+                self.group_length = []
+                continuous = True
+
+            print(f"Calculating cum_length...")
+            self.cum_length = []
+            cur_cum_length = 0
+            pbar = tqdm(self.arrow_files)
+            for arrow_file in pbar:
+                table_length = len(get_table(arrow_file))
+                cur_cum_length += table_length
+                self.cum_length.append(cur_cum_length)
+                pbar.set_description(f"{self.cum_length[-1]:>12d}")
+
+                if continuous:
+                    self.group_length.append(table_length)
+
+        if self.indices is None:
+            self.indices = list(range(self.cum_length[-1]))
+
+        if self.group_length is None:
+            self.group_length = []
+
+        if self.data_type is None:
+            self.data_type = ["Made by IndexV2Builder"]
+        elif isinstance(self.data_type, str):
+            self.data_type = [self.data_type]
+
+        assert_type(self.data_type, list, "data_type")
+        assert_type(self.cum_length, (list, np.ndarray), "cum_length")
+        assert_type(self.group_length, (list, dict, np.ndarray), "group_length")
+        assert_type(self.indices, (list, dict, np.ndarray), "indices")
+        self.cum_length = ndarray_to_list(self.cum_length)
+        self.group_length = ndarray_to_list(self.group_length)
+        self.indices = ndarray_to_list(self.indices)
+
+        if isinstance(self.indices, dict):
+            for k, v in self.indices.items():
+                assert_type(v, list, f"indices[{k}]")
+
+        if len(self.arrow_files) != len(self.cum_length):
+            raise ValueError(
+                f"Length of arrow_files and cum_length does not match. {len(self.arrow_files)} != {len(self.cum_length)}"
+            )
+        if len(self.indices) == 0:
+            raise ValueError(f"No indices found in index_dict.")
+        if (
+            isinstance(self.indices, list)
+            and self.indices[-1] > self.cum_length[-1] - 1
+        ):
+            raise ValueError(
+                f"Indices exceed cum_length. {self.indices[-1]} > {self.cum_length[-1] - 1}"
+            )
+        if len(self.group_length) > 0:
+            if len(self.arrow_files) != len(self.group_length):
+                raise ValueError(
+                    f"Length of arrow_files and group_length does not match. {len(self.arrow_files)} != {len(self.group_length)}"
+                )
+            if sum(self.group_length) != len(self.indices):
+                raise ValueError(
+                    f"Sum of group_length does not match length of indices. {sum(self.group_length)} != {len(self.indices)}"
+                )
+
+    def encode(self):
+        # Encode arrow files
+        print("Encoding arrow files...")
+        arrow_files = []
+        for arrow_file in tqdm(self.arrow_files):
+            shortname = arrow_file
+            arrow_files.append(shortname)
+        self.arrow_files = arrow_files
+
+        # Calculate group_length
+        print("Calculating group length...")
+        if isinstance(self.indices, list):
+            if len(self.group_length) == 0:
+                self.group_length = self.calc_group_length(
+                    self.indices, self.cum_length
+                )
+            else:
+                print("Group length already calculated, skip.")
+        elif isinstance(self.indices, dict):
+            if not isinstance(self.group_length, dict):
+                self.group_length = {}
+            for k, v in self.indices.items():
+                print(f"Calculating group length for {k}...")
+                if k not in self.group_length or len(self.group_length[k]) == 0:
+                    self.group_length[k] = self.calc_group_length(v, self.cum_length)
+                else:
+                    print("Group length already calculated, skip.")
+        else:
+            raise ValueError(
+                f"Expected indices type list or dict, got {type(self.indices)}."
+            )
+
+        return {
+            "data_type": self.data_type,
+            "config_file": self.config_file if self.config_file is not None else "",
+            "indices_file": "",
+            "arrow_files": self.arrow_files,
+            "cum_length": self.cum_length,
+            "group_length": self.group_length,
+            "indices": self.indices,
+            "example_indices": [],
+        }
+
+    def to_index_v2(self):
+        return ArrowIndexV2(res_dict=self.encode())
+        
+    def build(self, save_path):
+        return self.save(save_path)
+
+    def save(self, save_path):
+        """
+        Make index v2 from an index dict.
+
+        Parameters
+        ----------
+        save_path: str or pathlib.Path
+            The path to save the index file.
+        """
+        index_dict = self.encode()
+        # Ensure the indices either a list or a dict.
+
+        save_path = Path(save_path)
+        save_path.parent.mkdir(exist_ok=True, parents=True)
+
+        if (
+            isinstance(index_dict["indices"], list)
+            and len(index_dict["indices"]) > self.max_indices
+        ):
+            self.example_indices = index_dict["indices"][: self.example_num]
+            indices_to_save = {"x": index_dict["indices"]}
+            index_dict["indices"] = []
+        elif isinstance(index_dict["indices"], dict):
+            indices_to_save = index_dict["indices"]
+            index_dict["indices"] = {}
+            num_keys = len(indices_to_save)
+            example_num_per_key = max(self.example_num // num_keys, 10)
+            index_dict["example_indices"] = {
+                k: v[:example_num_per_key] for k, v in index_dict["indices"].items()
+            }
+        else:
+            indices_to_save = None
+
+        # save indices
+        if indices_to_save is not None:
+            indices_file = save_path.parent / f"{save_path.stem}.index"
+            indices_dict = {k: np.array(v) for k, v in indices_to_save.items()}
+            np.savez_compressed(indices_file, **indices_dict)
+            index_dict["indices_file"] = indices_file.name + ".npz"
+
+        with save_path.open("w") as f:
+            json.dump(index_dict, f, indent=4, ensure_ascii=False)
+
+    @staticmethod
+    def calc_group_length(indices, cum_length):
+        group_lengths = []
+        cum_ind = 0
+        count = 0
+        for index in tqdm(indices):
+            if index < cum_length[cum_ind]:
+                # index is still in the current group
+                count += 1
+            else:
+                # index has exceeded the current group, need to switch to the next group
+                group_lengths.append(count)
+                cum_ind += 1
+                # if the index exceeds the next group, continue to switch to the next group
+                while index >= cum_length[cum_ind]:
+                    group_lengths.append(0)
+                    cum_ind += 1
+                count = 1
+        # The indices array is exhausted, and the last group containing the index should also be added.
+        group_lengths.append(count)
+        assert len(group_lengths) <= len(cum_length), (
+            len(group_lengths),
+            len(cum_length),
+        )
+        # Check if the number of groups is less than the number of cum_length,
+        # then the last n groups are empty and need to be filled with zeros.
+        if len(group_lengths) < len(cum_length):
+            group_lengths.extend([0] * (len(cum_length) - len(group_lengths)))
+
+        return group_lengths
--- a/IndexKits/index_kits/sampler.py
+++ b/IndexKits/index_kits/sampler.py
+import math
+
+import torch.distributed as dist
+from torch.utils.data.distributed import DistributedSampler
+
+
+class BlockDistributedSampler(DistributedSampler):
+    def __init__(
+        self,
+        dataset,
+        num_replicas=None,
+        rank=None,
+        shuffle=True,
+        seed=0,
+        drop_last=False,
+        batch_size=-1,
+        start_index=0,
+    ):
+        super().__init__(dataset, num_replicas, rank, shuffle, seed, drop_last)
+        if num_replicas is None:
+            if not dist.is_available():
+                raise RuntimeError("Requires distributed package to be available")
+            num_replicas = dist.get_world_size()
+        if rank is None:
+            if not dist.is_available():
+                raise RuntimeError("Requires distributed package to be available")
+            rank = dist.get_rank()
+        if rank >= num_replicas or rank < 0:
+            raise ValueError(
+                "Invalid rank {}, rank should be in the interval"
+                " [0, {}]".format(rank, num_replicas - 1)
+            )
+        if batch_size == -1:
+            raise ValueError("batch_size should be specified")
+        self.dataset = dataset
+        self.num_replicas = num_replicas
+        self.rank = rank
+        self.epoch = 0
+        self.drop_last = drop_last
+        self.shuffle = shuffle
+        self.seed = seed
+        self.batch_size = batch_size
+        self._start_index = start_index
+        self.recompute_sizes()
+
+    @property
+    def start_index(self):
+        return self._start_index
+
+    @start_index.setter
+    def start_index(self, value):
+        self._start_index = value
+        self.recompute_sizes()
+
+    def recompute_sizes(self):
+        self.num_samples = (
+            len(self.dataset) // self.batch_size * self.batch_size // self.num_replicas
+            - self._start_index
+        )
+        self.total_size = self.num_samples * self.num_replicas
+
+    def __iter__(self):
+        indices = list(range(len(self.dataset)))  # type: ignore[arg-type]
+        raw_num_samples = (
+            len(indices) // self.batch_size * self.batch_size // self.num_replicas
+        )
+        raw_total_size = raw_num_samples * self.num_replicas
+        indices = indices[:raw_total_size]
+
+        # We require that the dataset size is divisible by batch_size * num_replicas
+        # This is naturally satisfied when using index_kits.
+        # In future, we can remove this assertion.
+        assert len(indices) == raw_total_size, f"{len(indices)} vs {raw_total_size}"
+
+        # subsample with start_index
+        indices = indices[
+            self.rank * raw_num_samples
+            + self.start_index : (self.rank + 1) * raw_num_samples
+        ]
+        assert (
+            len(indices) + self.start_index == raw_num_samples
+        ), f"{len(indices) + self.start_index} vs {raw_num_samples}"
+
+        # This is a sequential sampler. The shuffle operation is done by the dataset itself.
+        return iter(indices)
+
+
+class DistributedSamplerWithStartIndex(DistributedSampler):
+    def __init__(
+        self,
+        dataset,
+        num_replicas=None,
+        rank=None,
+        shuffle=True,
+        seed=0,
+        drop_last=False,
+        start_index=0,
+    ):
+        super().__init__(dataset, num_replicas, rank, shuffle, seed, drop_last)
+        if num_replicas is None:
+            if not dist.is_available():
+                raise RuntimeError("Requires distributed package to be available")
+            num_replicas = dist.get_world_size()
+        if rank is None:
+            if not dist.is_available():
+                raise RuntimeError("Requires distributed package to be available")
+            rank = dist.get_rank()
+        if rank >= num_replicas or rank < 0:
+            raise ValueError(
+                "Invalid rank {}, rank should be in the interval"
+                " [0, {}]".format(rank, num_replicas - 1)
+            )
+        self.dataset = dataset
+        self.num_replicas = num_replicas
+        self.rank = rank
+        self.epoch = 0
+        self.drop_last = drop_last
+        self._start_index = start_index
+        self.recompute_sizes()
+        self.shuffle = shuffle
+        self.seed = seed
+
+    @property
+    def start_index(self):
+        return self._start_index
+
+    @start_index.setter
+    def start_index(self, value):
+        self._start_index = value
+        self.recompute_sizes()
+
+    def recompute_sizes(self):
+        # If the dataset length is evenly divisible by # of replicas, then there
+        # is no need to drop any data, since the dataset will be split equally.
+        if self.drop_last and (len(self.dataset) - self._start_index) % self.num_replicas != 0:  # type: ignore[arg-type]
+            # Split to nearest available length that is evenly divisible.
+            # This is to ensure each rank receives the same amount of data when
+            # using this Sampler.
+            self.num_samples = math.ceil(
+                ((len(self.dataset) - self._start_index) - self.num_replicas) / self.num_replicas  # type: ignore[arg-type]
+            )
+        else:
+            self.num_samples = math.ceil((len(self.dataset) - self._start_index) / self.num_replicas)  # type: ignore[arg-type]
+        self.total_size = self.num_samples * self.num_replicas
+
+    def __iter__(self):
+        indices = list(range(self._start_index, len(self.dataset)))  # type: ignore[arg-type]
+
+        if not self.drop_last:
+            # add extra samples to make it evenly divisible
+            padding_size = self.total_size - len(indices)
+            if padding_size <= len(indices):
+                indices += indices[:padding_size]
+            else:
+                indices += (indices * math.ceil(padding_size / len(indices)))[
+                    :padding_size
+                ]
+        else:
+            # remove tail of data to make it evenly divisible.
+            indices = indices[: self.total_size]
+        assert len(indices) == self.total_size
+
+        # subsample with start_index
+        indices = indices[self.rank : self.total_size : self.num_replicas]
+        assert len(indices) == self.num_samples
+
+        return iter(indices)
--- a/IndexKits/setup.py
+++ b/IndexKits/setup.py
+import re
+
+try:
+    from setuptools import setup
+except ImportError:
+    from distutils.core import setup
+
+with open("index_kits/__init__.py", "r") as file:
+    regex_version = r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]'
+    version = re.search(regex_version, file.read(), re.MULTILINE).group(1)
+
+
+setup(
+    name="index_kits",
+    version=version,
+    author="jarvizhang",
+    author_email="jarvizhang@tencent.com",
+    description="An index kits for streaming reading arrow data.",
+    packages=["index_kits", "index_kits/dataset"],
+    scripts=["bin/idk"],
+    install_requires=[
+        "pillow>=9.3.0",
+        "tqdm>=4.60.0",
+        "pyarrow>=10.0.1",
+        "torch>=1.9",
+    ],
+    python_requires=">=3.8.12",
+)
--- a/LICENSE.txt
+++ b/LICENSE.txt
+TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT
+Tencent Hunyuan DiT Release Date: 14 May 2024
+THIS LICENSE AGREEMENT DOES NOT APPLY IN THE EUROPEAN UNION AND IS EXPRESSLY LIMITED TO THE TERRITORY, AS DEFINED BELOW.
+By clicking to agree or by using, reproducing, modifying, distributing, performing or displaying any portion or element of the Tencent Hunyuan Works, including via any Hosted Service, You will be deemed to have recognized and accepted the content of this Agreement, which is effective immediately.
+1.	DEFINITIONS.
+a.	“Acceptable Use Policy” shall mean the policy made available by Tencent as set forth in the Exhibit A.
+b.	“Agreement” shall mean the terms and conditions for use, reproduction, distribution, modification, performance and displaying of Tencent Hunyuan Works or any portion or element thereof set forth herein.
+c.	“Documentation” shall mean the specifications, manuals and documentation for Tencent Hunyuan made publicly available by Tencent.
+d.	“Hosted Service” shall mean a hosted service offered via an application programming interface (API), web access, or any other electronic or remote means.
+e.	“Licensee,” “You” or “Your” shall mean a natural person or legal entity exercising the rights granted by this Agreement and/or using the Tencent Hunyuan Works for any purpose and in any field of use.
+f.	“Materials” shall mean, collectively, Tencent’s proprietary Tencent Hunyuan and Documentation (and any portion thereof) as made available by Tencent under this Agreement.
+g.	“Model Derivatives” shall mean all: (i) modifications to Tencent Hunyuan or any Model Derivative of Tencent Hunyuan; (ii) works based on Tencent Hunyuan or any Model Derivative of Tencent Hunyuan; or (iii) any other machine learning model which is created by transfer of patterns of the weights, parameters, operations, or Output of Tencent Hunyuan or any Model Derivative of Tencent Hunyuan, to that model in order to cause that model to perform similarly to Tencent Hunyuan or a Model Derivative of Tencent Hunyuan, including distillation methods, methods that use intermediate data representations, or methods based on the generation of synthetic data Outputs by Tencent Hunyuan or a Model Derivative of Tencent Hunyuan for training that model. For clarity, Outputs by themselves are not deemed Model Derivatives.
+h.	“Output” shall mean the information and/or content output of Tencent Hunyuan or a Model Derivative that results from operating or otherwise using Tencent Hunyuan or a Model Derivative, including via a Hosted Service.
+i.	“Tencent,” “We” or “Us” shall mean THL A29 Limited.
+j.	“Tencent Hunyuan” shall mean the large language models, text/image/video/audio/3D generation models, and multimodal large language models and their software and algorithms, including trained model weights, parameters (including optimizer states), machine-learning model code, inference-enabling code, training-enabling code, fine-tuning enabling code and other elements of the foregoing made publicly available by Us, including, without limitation to, Tencent Hunyuan DiT released at https://huggingface.co/Tencent-Hunyuan/HunyuanDiT.
+k.	“Tencent Hunyuan Works” shall mean: (i) the Materials; (ii) Model Derivatives; and (iii) all derivative works thereof.
+l.	“Territory” shall mean the worldwide territory, excluding the territory of the European Union.
+m.	“Third Party” or “Third Parties” shall mean individuals or legal entities that are not under common control with Us or You.
+n.	“including” shall mean including but not limited to.
+2.	GRANT OF RIGHTS.
+We grant You, for the Territory only, a non-exclusive, non-transferable and royalty-free limited license under Tencent’s intellectual property or other rights owned by Us embodied in or utilized by the Materials to use, reproduce, distribute, create derivative works of (including Model Derivatives), and make modifications to the Materials, only in accordance with the terms of this Agreement and the Acceptable Use Policy, and You must not violate (or encourage or permit anyone else to violate) any term of this Agreement or the Acceptable Use Policy.
+3.	DISTRIBUTION.
+You may, subject to Your compliance with this Agreement, distribute or make available to Third Parties the Tencent Hunyuan Works, exclusively in the Territory, provided that You meet all of the following conditions:
+a.	You must provide all such Third Party recipients of the Tencent Hunyuan Works or products or services using them a copy of this Agreement;
+b.	You must cause any modified files to carry prominent notices stating that You changed the files;
+c.	You are encouraged to: (i) publish at least one technology introduction blogpost or one public statement expressing Your experience of using the Tencent Hunyuan Works; and (ii) mark the products or services developed by using the Tencent Hunyuan Works to indicate that the product/service is “Powered by Tencent Hunyuan”; and
+d.	All distributions to Third Parties (other than through a Hosted Service) must be accompanied by a “Notice” text file that contains the following notice: “Tencent Hunyuan is licensed under the Tencent Hunyuan Community License Agreement, Copyright © 2024 Tencent. All Rights Reserved. The trademark rights of “Tencent Hunyuan” are owned by Tencent or its affiliate.”
+You may add Your own copyright statement to Your modifications and, except as set forth in this Section and in Section 5, may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Model Derivatives as a whole, provided Your use, reproduction, modification, distribution, performance and display of the work otherwise complies with the terms and conditions of this Agreement (including as regards the Territory). If You receive Tencent Hunyuan Works from a Licensee as part of an integrated end user product, then this Section 3 of this Agreement will not apply to You.
+4.	ADDITIONAL COMMERCIAL TERMS.
+If, on the Tencent Hunyuan version release date, the monthly active users of all products or services made available by or for Licensee is greater than 100 million monthly active users in the preceding calendar month, You must request a license from Tencent, which Tencent may grant to You in its sole discretion, and You are not authorized to exercise any of the rights under this Agreement unless or until Tencent otherwise expressly grants You such rights.
+5.	RULES OF USE.
+a.	Your use of the Tencent Hunyuan Works must comply with applicable laws and regulations (including trade compliance laws and regulations) and adhere to the Acceptable Use Policy for the Tencent Hunyuan Works, which is hereby incorporated by reference into this Agreement. You must include the use restrictions referenced in these Sections 5(a) and 5(b) as an enforceable provision in any agreement (e.g., license agreement, terms of use, etc.) governing the use and/or distribution of Tencent Hunyuan Works and You must provide notice to subsequent users to whom You distribute that Tencent Hunyuan Works are subject to the use restrictions in these Sections 5(a) and 5(b).
+b.	You must not use the Tencent Hunyuan Works or any Output or results of the Tencent Hunyuan Works to improve any other large language model (other than Tencent Hunyuan or Model Derivatives thereof).
+c.	You must not use, reproduce, modify, distribute, or display the Tencent Hunyuan Works, Output or results of the Tencent Hunyuan Works outside the Territory. Any such use outside the Territory is unlicensed and unauthorized under this Agreement.
+6.	INTELLECTUAL PROPERTY.
+a.	Subject to Tencent’s ownership of Tencent Hunyuan Works made by or for Tencent and intellectual property rights therein, conditioned upon Your compliance with the terms and conditions of this Agreement, as between You and Tencent, You will be the owner of any derivative works and modifications of the Materials and any Model Derivatives that are made by or for You.
+b.	No trademark licenses are granted under this Agreement, and in connection with the Tencent Hunyuan Works, Licensee may not use any name or mark owned by or associated with Tencent or any of its affiliates, except as required for reasonable and customary use in describing and distributing the Tencent Hunyuan Works. Tencent hereby grants You a license to use “Tencent Hunyuan” (the “Mark”) in the Territory solely as required to comply with the provisions of Section 3(c), provided that You comply with any applicable laws related to trademark protection. All goodwill arising out of Your use of the Mark will inure to the benefit of Tencent.
+c.	If You commence a lawsuit or other proceedings (including a cross-claim or counterclaim in a lawsuit) against Us or any person or entity alleging that the Materials or any Output, or any portion of any of the foregoing, infringe any intellectual property or other right owned or licensable by You, then all licenses granted to You under this Agreement shall terminate as of the date such lawsuit or other proceeding is filed. You will defend, indemnify and hold harmless Us from and against any claim by any Third Party arising out of or related to Your or the Third Party’s use or distribution of the Tencent Hunyuan Works.
+d.	Tencent claims no rights in Outputs You generate. You and Your users are solely responsible for Outputs and their subsequent uses.
+7.	DISCLAIMERS OF WARRANTY AND LIMITATIONS OF LIABILITY.
+a.	We are not obligated to support, update, provide training for, or develop any further version of the Tencent Hunyuan Works or to grant any license thereto.
+b.	UNLESS AND ONLY TO THE EXTENT REQUIRED BY APPLICABLE LAW, THE TENCENT HUNYUAN WORKS AND ANY OUTPUT AND RESULTS THEREFROM ARE PROVIDED “AS IS” WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND INCLUDING ANY WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, COURSE OF DEALING, USAGE OF TRADE, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING, REPRODUCING, MODIFYING, PERFORMING, DISPLAYING OR DISTRIBUTING ANY OF THE TENCENT HUNYUAN WORKS OR OUTPUTS AND ASSUME ANY AND ALL RISKS ASSOCIATED WITH YOUR OR A THIRD PARTY’S USE OR DISTRIBUTION OF ANY OF THE TENCENT HUNYUAN WORKS OR OUTPUTS AND YOUR EXERCISE OF RIGHTS AND PERMISSIONS UNDER THIS AGREEMENT.
+c.	TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL TENCENT OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, FOR ANY DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, EXEMPLARY, CONSEQUENTIAL OR PUNITIVE DAMAGES, OR LOST PROFITS OF ANY KIND ARISING FROM THIS AGREEMENT OR RELATED TO ANY OF THE TENCENT HUNYUAN WORKS OR OUTPUTS, EVEN IF TENCENT OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE FOREGOING.
+8.	SURVIVAL AND TERMINATION.
+a.	The term of this Agreement shall commence upon Your acceptance of this Agreement or access to the Materials and will continue in full force and effect until terminated in accordance with the terms and conditions herein.
+b.	We may terminate this Agreement if You breach any of the terms or conditions of this Agreement. Upon termination of this Agreement, You must promptly delete and cease use of the Tencent Hunyuan Works. Sections 6(a), 6(c), 7 and 9 shall survive the termination of this Agreement.
+9.	GOVERNING LAW AND JURISDICTION.
+a.	This Agreement and any dispute arising out of or relating to it will be governed by the laws of the Hong Kong Special Administrative Region of the People’s Republic of China, without regard to conflict of law principles, and the UN Convention on Contracts for the International Sale of Goods does not apply to this Agreement.
+b.	Exclusive jurisdiction and venue for any dispute arising out of or relating to this Agreement will be a court of competent jurisdiction in the Hong Kong Special Administrative Region of the People’s Republic of China, and Tencent and Licensee consent to the exclusive jurisdiction of such court with respect to any such dispute.
+ 
+EXHIBIT A
+ACCEPTABLE USE POLICY
+
+Tencent reserves the right to update this Acceptable Use Policy from time to time.
+Last modified: [insert date]
+
+Tencent endeavors to promote safe and fair use of its tools and features, including Tencent Hunyuan. You agree not to use Tencent Hunyuan or Model Derivatives:
+1.	Outside the Territory;
+2.	In any way that violates any applicable national, federal, state, local, international or any other law or regulation;
+3.	To harm Yourself or others;
+4.	To repurpose or distribute output from Tencent Hunyuan or any Model Derivatives to harm Yourself or others;
+5.	To override or circumvent the safety guardrails and safeguards We have put in place;
+6.	For the purpose of exploiting, harming or attempting to exploit or harm minors in any way;
+7.	To generate or disseminate verifiably false information and/or content with the purpose of harming others or influencing elections;
+8.	To generate or facilitate false online engagement, including fake reviews and other means of fake online engagement;
+9.	To intentionally defame, disparage or otherwise harass others;
+10.	To generate and/or disseminate malware (including ransomware) or any other content to be used for the purpose of harming electronic systems;
+11.	To generate or disseminate personal identifiable information with the purpose of harming others;
+12.	To generate or disseminate information (including images, code, posts, articles), and place the information in any public context (including –through the use of bot generated tweets), without expressly and conspicuously identifying that the information and/or content is machine generated;
+13.	To impersonate another individual without consent, authorization, or legal right;
+14.	To make high-stakes automated decisions in domains that affect an individual’s safety, rights or wellbeing (e.g., law enforcement, migration, medicine/health, management of critical infrastructure, safety components of products, essential services, credit, employment, housing, education, social scoring, or insurance);
+15.	In a manner that violates or disrespects the social ethics and moral standards of other countries or regions;
+16.	To perform, facilitate, threaten, incite, plan, promote or encourage violent extremism or terrorism;
+17.	For any use intended to discriminate against or harm individuals or groups based on protected characteristics or categories, online or offline social behavior or known or predicted personal or personality characteristics;
+18.	To intentionally exploit any of the vulnerabilities of a specific group of persons based on their age, social, physical or mental characteristics, in order to materially distort the behavior of a person pertaining to that group in a manner that causes or is likely to cause that person or another person physical or psychological harm;
+19.	For military purposes;
+20.	To engage in the unauthorized or unlicensed practice of any profession including, but not limited to, financial, legal, medical/health, or other professional practices.
\ No newline at end of file
--- a/Notice
+++ b/Notice
+Usage and Legal Notices: 
+
+Tencent is pleased to support the open source community by making Tencent Hunyuan available. 
+
+Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved. The below software and/or models in this distribution may have been modified by THL A29 Limited ("Tencent Modifications"). All Tencent Modifications are Copyright (C) THL A29 Limited.
+
+Tencent Hunyuan is licensed under the Tencent Hunyuan Community License Agreement except for the third-party components listed below. Tencent Hunyuan does not impose any additional limitations beyond what is outlined in the repsective licenses of these third-party components. Users must comply with all terms and conditions of original licenses of these third-party components and must ensure that the usage of the third party components adheres to all relevant laws and regulations.
+
+For avoidance of doubts, Tencent Hunyuan means the large language models and their software and algorithms, including trained model weights, parameters (including optimizer states), machine-learning model code, inference-enabling code, training-enabling code, fine-tuning enabling code and other elements of the foregoing made publicly available by Tencent in accordance with Tencent Hunyuan Community License Agreement.
+
+
+
+Other dependencies and licenses:
+
+Open Source Software Licensed under the BSD 3-Clause License and Other Licenses of the Third-Party Components therein:
+--------------------------------------------------------------------
+1. torch
+Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
+Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
+Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
+Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
+Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
+Copyright (c) 2011-2013 NYU                      (Clement Farabet)
+Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
+Copyright (c) 2006      Idiap Research Institute (Samy Bengio)
+Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
+
+
+Terms of the BSD 3-Clause:
+--------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+For the license of other third party components, please refer to the following URL:
+https://github.com/pytorch/pytorch/blob/v1.13.1/NOTICE
+
+
+
+Open Source Software Licensed under the BSD 3-Clause License and Other Licenses of the Third-Party Components therein:
+--------------------------------------------------------------------
+1. pandas
+Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
+All rights reserved.
+
+Copyright (c) 2011-2023, Open source contributors.
+
+
+A copy of the BSD 3-Clause is included in this file.
+
+For the license of other third party components, please refer to the following URL:
+https://github.com/pandas-dev/pandas/tree/v2.0.3/LICENSES
+
+
+
+Open Source Software Licensed under the BSD 3-Clause License and Other Licenses of the Third-Party Components therein:
+--------------------------------------------------------------------
+1. numpy
+Copyright (c) 2005-2022, NumPy Developers.
+All rights reserved.
+
+
+A copy of the BSD 3-Clause is included in this file.
+
+For the license of other third party components, please refer to the following URL:
+https://github.com/numpy/numpy/blob/v1.24.4/LICENSES_bundled.txt
+
+
+
+Open Source Software/Model Licensed under the BSD 3-Clause License and Other Licenses of the Third-Party Components therein:
+--------------------------------------------------------------------
+1. Megatron-LM
+Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+
+
+A copy of the BSD 3-Clause is included in this file.
+
+For the license of other third party components, please refer to the following URL:
+https://github.com/NVIDIA/Megatron-LM/blob/main/LICENSE
+
+
+
+Open Source Software/Models Licensed under the Apache License Version 2.0:
+The below software in this distribution may have been modified by THL A29 Limited ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2023 THL A29 Limited.
+--------------------------------------------------------------------
+1. diffusers
+Copyright (c) diffusers original author and authors
+Please note this software has been modified by Tencent in this distribution.
+
+2. transformers
+Copyright (c) transformers original author and authors
+
+3. timm
+Copyright 2019 Ross Wightman
+
+4. text-to-text-transfer-transformer
+Copyright (c) text-to-text-transfer-transformer original author and authors
+Please note this software has been modified by Tencent in this distribution.
+
+5. pytorch-fid
+Copyright (c) pytorch-fid original author and authors
+Please note this software has been modified by Tencent in this distribution.
+
+6. Image-Quality-Assessment-Toolbox
+Copyright 2021 Qunliang Xing
+
+7. accelerate
+Copyright (c) accelerate original author and authors
+
+8. IP-Adapter
+Copyright (c) IP-Adapter original author and authors
+Please note this software has been modified by Tencent in this distribution.
+
+9. mT5
+Copyright (c) mT5 original author and authors
+
+10. Mistral-7B
+Copyright (c) 2024 Mistral AI, All rights reserved
+
+11. peft
+Copyright 2023 The HuggingFace Team. All rights reserved.
+
+
+Terms of the Apache License Version 2.0:
+--------------------------------------------------------------------
+Apache License
+
+Version 2.0, January 2004
+
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
+
+"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
+
+You must give any other recipients of the Work or Derivative Works a copy of this License; and
+
+You must cause any modified files to carry prominent notices stating that You changed the files; and
+
+You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
+
+If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
+
+You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+
+
+Open Source Software/Model Licensed under the BSD 3-Clause License:
+--------------------------------------------------------------------
+1. torchvision
+Copyright (c) Soumith Chintala 2016,
+All rights reserved.
+
+2. flash_attn
+Copyright (c) 2022, the respective contributors, as shown by the AUTHORS file.
+All rights reserved.
+
+3. apex
+Copyright (c) apex original author and authors
+
+
+A copy of the BSD 3-Clause is included in this file.
+
+
+
+Open Source Software Licensed under the HPND License:
+--------------------------------------------------------------------
+1. Pillow
+Copyright © 2010-2023 by Jeffrey A. Clark (Alex) and contributors.
+
+
+Terms of the HPND License:
+--------------------------------------------------------------------
+The Python Imaging Library (PIL) is
+
+    Copyright © 1997-2011 by Secret Labs AB
+    Copyright © 1995-2011 by Fredrik Lundh
+
+Pillow is the friendly PIL fork. It is
+
+    Copyright © 2010-2023 by Jeffrey A. Clark (Alex) and contributors.
+
+Like PIL, Pillow is licensed under the open source HPND License:
+
+By obtaining, using, and/or copying this software and/or its associated
+documentation, you agree that you have read, understood, and will comply
+with the following terms and conditions:
+
+Permission to use, copy, modify and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appears in all copies, and that
+both that copyright notice and this permission notice appear in supporting
+documentation, and that the name of Secret Labs AB or the author not be
+used in advertising or publicity pertaining to distribution of the software
+without specific, written prior permission.
+
+SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR ANY SPECIAL,
+INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+
+
+
+Open Source Software/Model Licensed under the MIT License:
+The below software in this distribution may have been modified by Tencent.
+--------------------------------------------------------------------
+1. einops
+Copyright (c) 2018 Alex Rogozhnikov
+
+2. loguru
+Copyright (c) 2017
+
+3. Chinese-CLIP
+Copyright (c) 2012-2022 OFA-Sys Team
+Copyright (c) 2012-2022 Gabriel Ilharco, Mitchell Wortsman, Nicholas Carlini, Rohan Taori, Achal Dave, Vaishaal Shankar, John Miller, Hongseok Namkoong, Hannaneh Hajishirzi, Ali Farhadi, Ludwig Schmidt
+
+4. DeepSpeed
+Copyright (c) Microsoft Corporation.
+
+5. glid-3-xl
+Copyright (c) 2021 OpenAI
+
+6. lazysizes
+Copyright (c) 2015 Alexander Farkas
+
+7. thingsvision
+Copyright (c) 2021 Vision and Computational Cognition Group
+
+8. sd-vae-ft-ema
+Copyright (c) sd-vae-ft-ema original author and authors
+
+9. ComfyUI-Diffusers
+Copyright (c) 2023 Limitex
+
+10. glide-text2im
+Copyright (c) 2021 OpenAI
+
+11. improved-diffusion
+Copyright (c) 2021 OpenAI
+
+
+Terms of the MIT License:
+--------------------------------------------------------------------
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+
+Open Source Software Licensed under the MIT License and Other Licenses of the Third-Party Components therein:
+--------------------------------------------------------------------
+1. tqdm
+Copyright (c) 2013 noamraph
+
+
+A copy of the MIT is included in this file.
+
+For the license of other third party components, please refer to the following URL:
+https://github.com/tqdm/tqdm/blob/v4.66.1/LICENCE
+
+
+Open Source Software/Model Licensed under the MIT License and Other Licenses of the Third-Party Components therein:
+The below software in this distribution may have been modified by Tencent.
+--------------------------------------------------------------------
+1. generative-models
+Copyright (c) 2023 Stability AI
+
+
+A copy of the MIT is included in this file.
+
+For the license of other third party components, please refer to the following URL:
+https://github.com/Stability-AI/generative-models/blob/main/LICENSE-CODE
+https://github.com/Stability-AI/generative-models/tree/main/model_licenses
+
+
+Open Source Software/Model Licensed under the Apache License Version 2.0 and Other Licenses of the Third-Party Components therein:
+--------------------------------------------------------------------
+1. pyarrow
+Copyright 2016-2024 The Apache Software Foundation
+
+
+A copy of the Apache License Version 2.0 is included in this file.
+
+For the license of other third party components, please refer to the following URL:
+https://github.com/apache/arrow/blob/main/NOTICE.txt
+
+
+Open Source Software Licensed under the MIT License and Other Licenses of the Third-Party Components therein:
+The below software in this distribution may have been modified by THL A29 Limited ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2023 THL A29 Limited.
+--------------------------------------------------------------------
+1. opencv-python
+Copyright (c) Olli-Pekka Heinisuo
+
+
+Terms of the MIT:
+--------------------------------------------------------------------
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+For the license of other third party components, please refer to the following URL:
+https://github.com/opencv/opencv-python/blob/4.x/LICENSE-3RD-PARTY.txt
+
+
+Open Source Software Licensed under the MIT License and Other Licenses of the Third-Party Components therein:
+--------------------------------------------------------------------
+1. onnxruntime
+Copyright (c) Microsoft Corporation.
+
+
+A copy of the MIT is included in this file.
+
+For the license of other third party components, please refer to the following URL:
+https://github.com/microsoft/onnxruntime/blob/v1.16.3/ThirdPartyNotices.txt
+
+
+Open Source Software/Model Licensed under the Apache License Version 2.0:
+The below software in this distribution may have been modified by Tencent.
+--------------------------------------------------------------------
+1. dwpose
+Copyright 2018-2020 Open-MMLab.
+Please note this software has been modified by Tencent in this distribution.
+
+
+Terms of the Apache License Version 2.0:
+--------------------------------------------------------------------
+Apache License
+
+Version 2.0, January 2004
+
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
+
+"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
+
+You must give any other recipients of the Work or Derivative Works a copy of this License; and
+
+You must cause any modified files to carry prominent notices stating that You changed the files; and
+
+You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
+
+If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
+
+You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+
+
+Open Source Software Licensed under the License agreement for matplotlib and later and Other Licenses of the Third-Party Components therein:
+--------------------------------------------------------------------
+1. matplotlib
+Copyright (c) 2012- Matplotlib Development Team; All Rights Reserved
+
+
+Terms of the License agreement for matplotlib versions 1.3.0 and later:
+--------------------------------------------------------------------
+License agreement for matplotlib versions 1.3.0 and later
+=========================================================
+
+1. This LICENSE AGREEMENT is between the Matplotlib Development Team
+("MDT"), and the Individual or Organization ("Licensee") accessing and
+otherwise using matplotlib software in source or binary form and its
+associated documentation.
+
+2. Subject to the terms and conditions of this License Agreement, MDT
+hereby grants Licensee a nonexclusive, royalty-free, world-wide license
+to reproduce, analyze, test, perform and/or display publicly, prepare
+derivative works, distribute, and otherwise use matplotlib
+alone or in any derivative version, provided, however, that MDT's
+License Agreement and MDT's notice of copyright, i.e., "Copyright (c)
+2012- Matplotlib Development Team; All Rights Reserved" are retained in
+matplotlib  alone or in any derivative version prepared by
+Licensee.
+
+3. In the event Licensee prepares a derivative work that is based on or
+incorporates matplotlib or any part thereof, and wants to
+make the derivative work available to others as provided herein, then
+Licensee hereby agrees to include in any such work a brief summary of
+the changes made to matplotlib .
+
+4. MDT is making matplotlib available to Licensee on an "AS
+IS" basis.  MDT MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, MDT MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB
+WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
+
+5. MDT SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB
+ FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR
+LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING
+MATPLOTLIB , OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF
+THE POSSIBILITY THEREOF.
+
+6. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+7. Nothing in this License Agreement shall be deemed to create any
+relationship of agency, partnership, or joint venture between MDT and
+Licensee.  This License Agreement does not grant permission to use MDT
+trademarks or trade name in a trademark sense to endorse or promote
+products or services of Licensee, or any third party.
+
+8. By copying, installing or otherwise using matplotlib ,
+Licensee agrees to be bound by the terms and conditions of this License
+Agreement.
+
+License agreement for matplotlib versions prior to 1.3.0
+========================================================
+
+1. This LICENSE AGREEMENT is between John D. Hunter ("JDH"), and the
+Individual or Organization ("Licensee") accessing and otherwise using
+matplotlib software in source or binary form and its associated
+documentation.
+
+2. Subject to the terms and conditions of this License Agreement, JDH
+hereby grants Licensee a nonexclusive, royalty-free, world-wide license
+to reproduce, analyze, test, perform and/or display publicly, prepare
+derivative works, distribute, and otherwise use matplotlib
+alone or in any derivative version, provided, however, that JDH's
+License Agreement and JDH's notice of copyright, i.e., "Copyright (c)
+2002-2011 John D. Hunter; All Rights Reserved" are retained in
+matplotlib  alone or in any derivative version prepared by
+Licensee.
+
+3. In the event Licensee prepares a derivative work that is based on or
+incorporates matplotlib  or any part thereof, and wants to
+make the derivative work available to others as provided herein, then
+Licensee hereby agrees to include in any such work a brief summary of
+the changes made to matplotlib.
+
+4. JDH is making matplotlib  available to Licensee on an "AS
+IS" basis.  JDH MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, JDH MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB
+WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
+
+5. JDH SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB
+ FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR
+LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING
+MATPLOTLIB , OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF
+THE POSSIBILITY THEREOF.
+
+6. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+7. Nothing in this License Agreement shall be deemed to create any
+relationship of agency, partnership, or joint venture between JDH and
+Licensee.  This License Agreement does not grant permission to use JDH
+trademarks or trade name in a trademark sense to endorse or promote
+products or services of Licensee, or any third party.
+
+8. By copying, installing or otherwise using matplotlib,
+Licensee agrees to be bound by the terms and conditions of this License
+Agreement.
+
+
+For the license of other third party components, please refer to the following URL:
+https://github.com/matplotlib/matplotlib/blob/v3.7.5/LICENSE
--- a/README.md
+++ b/README.md
+### 复现指南🔥🔥🔥
+
+```shell
+# 1. 环境准备
+docker pull image.sourcefind.cn:5000/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.1-rc5-rocblas101839-0811-das1.6-py3.10-20250908-rc1
+
+# 2. 创建容器
+docker run -it \
+--network=host \
+--hostname=localhost \
+--name=HUNYUAN \
+-v /opt/hyhal:/opt/hyhal:ro \
+-v $PWD:/workspace \
+--ipc=host \
+--device=/dev/kfd \
+--device=/dev/mkfd \
+--device=/dev/dri \
+--shm-size=512G \
+--privileged \
+--group-add video \
+--cap-add=SYS_PTRACE \
+--security-opt seccomp=unconfined \
+image.sourcefind.cn:5000/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.1-rc5-rocblas101839-0811-das1.6-py3.10-20250908-rc1 \
+/bin/bash  
+
+# 3. 拉取代码
+git clone http://developer.sourcefind.cn/codes/bw_bestperf/hunyuan-dit.git
+
+# 4. 获取&安装依赖
+Apex:
+curl -f -C - -o apex-1.5.0+das.opt1.dtk25041-cp310-cp310-linux_x86_64.whl https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/e759f4e7fbb64b10  
+Lightop
+curl -f -C - -o lightop-0.5.0+das.dtk25041.unknown-cp310-cp310-linux_x86_64.whl https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/3ca9654a8fc1b0b5  
+Deepspeed
+wget https://download.sourcefind.cn:65024/directlink/4/deepspeed/DAS1.6/deepspeed-0.14.2+das.opt1.dtk25041-cp310-cp310-manylinux_2_28_x86_64.whl
+
+pip install apex-1.5.0+das.opt1.dtk25041-cp310-cp310-linux_x86_64.whl 
+pip install lightop-0.5.0+das.dtk25041.unknown-cp310-cp310-linux_x86_64.whl 
+pip install deepspeed-0.14.2+das.opt1.dtk25041-cp310-cp310-manylinux_2_28_x86_64.whl
+pip install -r requirements.txt   -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+
+# 5. 下载优化包
+hipblaslt
+curl -f -C - -o hipblaslt-install0925.tar.gz https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/5857030947151012  
+miopen
+curl -f -C - -o package_0915_ubuntu.tar.gz https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/0c80d0e60b9af80d 
+
+# 6. 下载模型
+模型详见：https://modelscope.cn/models/dengcao/HunyuanDiT-v1.2 
+
+pip install modelscope 
+modelscope download --model dengcao/HunyuanDiT-v1.2  --local_dir ./HunyuanDiT-v1.2
+还需要下载vae,tokenizer和tex_encoder
+cd HunyuanDiT-v1.2
+wget https://dit.hunyuan.tencent.com/download/HunyuanDiT/tokenizer.zip
+wget https://dit.hunyuan.tencent.com/download/HunyuanDiT/sdxl-vae-fp16-fix.zip
+wget https://dit.hunyuan.tencent.com/download/HunyuanDiT/clip_text_encoder.zip
+
+下载完模型结构如下
+
+
+```
+<p align="center">
+  <img src="19115934112c36d5d67394265d1498e2.png"  height=300>
+</p>
+
+## 测试指令
+
+```
+export LD_LIBRARY_PATH=/workspace/OEM_ADVTG_TEST/hunyuan/hipblaslt-install/lib/:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=/workspace/OEM_ADVTG_TEST/hunyuan/package/miopen/lib/:$LD_LIBRARY_PATH
+python sample_t2i_dcu.py --model-root /workspace/OEM_ADVTG_TEST/hunyuan/HunyuanDiT-v1.2/ --batch-size 4 --infer-mode fa --prompt "青花瓷风格，一只可爱的哈士奇" --no-enhance  --load-key module --image-size 1024 1024 --infer-steps 20 
+
+```
--- a/app/default.png
+++ b/app/default.png
--- a/app/fail.png
+++ b/app/fail.png
--- a/app/hydit_app.py
+++ b/app/hydit_app.py
+import gradio as gr
+import pandas as pd
+from pathlib import Path
+from PIL import Image, PngImagePlugin
+import sys
+import numpy as np
+import torch
+from torchvision import transforms as T
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import datetime
+
+from hydit.constants import SAMPLER_FACTORY
+from sample_t2i import inferencer
+import os
+
+ROOT = Path(__file__).parent.parent
+SAMPLERS = list(SAMPLER_FACTORY.keys())
+
+norm_transform = T.Compose(
+    [
+        T.ToTensor(),
+        T.Normalize([0.5], [0.5]),
+    ]
+)
+
+
+def get_strings(lang):
+    lang_file = Path(f"app/lang/{lang}.csv")
+    strings = pd.read_csv(lang_file, header=0)
+    strings = strings.set_index("key")["value"].to_dict()
+    return strings
+
+
+def get_files_with_extension(path, extension):
+    return {
+        os.path.splitext(file)[0]: os.path.join(path, file)
+        for file in os.listdir(path)
+        if os.path.isfile(os.path.join(path, file))
+        and any(file.endswith(ext) for ext in extension)
+    }
+
+
+args, gen, enhancer = inferencer()
+output_dir = ROOT / f"{args.output_img_path}"
+os.makedirs(output_dir, exist_ok=True)
+strings = get_strings(args.lang)
+controlnet_list = get_files_with_extension(
+    args.model_root + "/t2i/controlnet",
+    [".pt", ".safetensors"],
+)
+module_list = get_files_with_extension(
+    args.model_root + "/t2i/model",
+    [".pt", ".safetensors"],
+)
+lora_list = get_files_with_extension(
+    args.model_root + "/t2i/lora",
+    [".pt", ".safetensors"],
+)
+
+
+def upgrade_dit_model_load(model):
+    model_path = module_list[model]
+    gen.args.dit_weight = model_path
+    gen.load_torch_weights()
+
+
+def generate_metadata(
+    prompt,
+    negative_prompt,
+    seed,
+    cfg_scale,
+    infer_steps,
+    sampler,
+    imgW,
+    imgH,
+    controlnet_module,
+    control_weight,
+    lora_ctrls,
+):
+    """生成图像元数据。"""
+    return {
+        "parameters": "Power by HunYun",
+        "prompt": prompt,
+        "negative_prompt": negative_prompt,
+        "seed": seed,
+        "cfg_scale": cfg_scale,
+        "infer_steps": infer_steps,
+        "sampler": sampler,
+        "imgW": imgW,
+        "imgH": imgH,
+        "controlnet_module": controlnet_module,
+        "control_weight": control_weight,
+        "lora_ctrls": [
+            {
+                "lora_enabled": lora_ctrl[0],
+                "lora_model": lora_ctrl[1],
+                "lora_weight": lora_ctrl[2],
+            }
+            for lora_ctrl in zip(*[iter(lora_ctrls)] * 3)
+        ],
+        "model_name": gen.model_name,
+    }
+
+
+def infer(
+    prompt,
+    negative_prompt,
+    seed,
+    cfg_scale,
+    infer_steps,
+    sampler,
+    imgW,
+    imgH,
+    input_image,
+    controlnet_module,
+    control_weight,
+    enhance,
+    img_crop_type,
+    *lora_ctrls,
+):
+    if enhance and enhancer is not None:
+        success, enhanced_prompt = enhancer(prompt)
+        if not success:
+            fail_image = Image.open(ROOT / "app/fail.png")
+            return fail_image
+    else:
+        enhanced_prompt = None
+    active_loras = [
+        {"model": lora_ctrls[i + 1], "weight": lora_ctrls[i + 2]}
+        for i in range(0, len(lora_ctrls), 3)
+        if lora_ctrls[i]
+    ]
+    if input_image is not None:
+        # # Convert image to PyTorch tensor if it is a NumPy array
+        if isinstance(input_image, np.ndarray):
+            input_image = Image.fromarray(input_image).convert("RGB")
+
+        input_image = gen.pixel_perfect_resolution(
+            input_image, imgH, imgW, img_crop_type
+        )
+        # Apply the normalization transform
+        input_image = norm_transform(input_image)
+
+        # Add batch dimension and move to GPU (if available)
+        input_image = (
+            input_image.unsqueeze(0).cuda()
+            if torch.cuda.is_available()
+            else input_image.unsqueeze(0)
+        )
+
+    results = gen.predict(
+        prompt,
+        image=input_image,
+        height=imgH,
+        width=imgW,
+        seed=seed,
+        enhanced_prompt=enhanced_prompt,
+        negative_prompt=negative_prompt,
+        infer_steps=infer_steps,
+        guidance_scale=cfg_scale,
+        batch_size=1,
+        src_size_cond=None,
+        sampler=sampler,
+        control_weight=control_weight,
+        controlnet=controlnet_module,
+        lora_ctrls=active_loras,
+    )
+    image = results["images"][0]
+    seed = results["seed"]
+    metadata = generate_metadata(
+        prompt,
+        negative_prompt,
+        seed,
+        cfg_scale,
+        infer_steps,
+        sampler,
+        imgW,
+        imgH,
+        controlnet_module,
+        control_weight,
+        active_loras,
+    )
+    timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    output_path = output_dir.joinpath(f"generated_image_{timestamp}_{seed}.png")
+    png_info = PngImagePlugin.PngInfo()
+    for k, v in metadata.items():
+        png_info.add_text(k, str(v))
+    image.save(
+        output_path,
+        pnginfo=png_info,
+    )
+    return image
+
+
+def ui():
+    block = gr.Blocks()
+    description = f"""
+    # {strings['title']}
+    
+    ## {strings['desc']}
+    
+    """
+
+    with block:
+        with gr.Row():
+            gr.Markdown(description)
+        with gr.Row():
+            with gr.Column():
+                prompt = gr.Textbox(
+                    label=strings["prompt"], value=strings["default prompt"], lines=3
+                )
+                with gr.Row():
+                    imgW = gr.Slider(
+                        label=strings["width"],
+                        minimum=64,
+                        maximum=4096,
+                        value=1024,
+                        step=64,
+                    )
+                    imgH = gr.Slider(
+                        label=strings["height"],
+                        minimum=64,
+                        maximum=4096,
+                        value=1024,
+                        step=64,
+                    )
+                with gr.Row():
+                    infer_steps = gr.Slider(
+                        label=strings["infer steps"],
+                        minimum=1,
+                        maximum=200,
+                        value=100,
+                        step=1,
+                    )
+                    seed = gr.Number(
+                        label=strings["seed"],
+                        minimum=-1,
+                        maximum=1_000_000_000,
+                        value=0,
+                        step=1,
+                        precision=0,
+                    )
+                    enhance = gr.Checkbox(
+                        label=strings["enhance"],
+                        value=enhancer is not None,
+                        interactive=True,
+                    )
+
+                with gr.Accordion(strings["accordion"], open=False):
+                    with gr.Row():
+                        negative_prompt = gr.Textbox(
+                            label=strings["negative_prompt"],
+                            value=gen.default_negative_prompt,
+                            lines=2,
+                        )
+                    with gr.Row():
+                        sampler = gr.Dropdown(
+                            SAMPLERS, label=strings["sampler"], value="ddpm"
+                        )
+                        cfg_scale = gr.Slider(
+                            label=strings["cfg"],
+                            minimum=1.0,
+                            maximum=16.0,
+                            value=6.0,
+                            step=1,
+                        )
+
+                    with gr.Accordion(strings["model_list"], open=False):
+                        with gr.Row():
+                            dit_model = gr.Dropdown(
+                                label=strings["dit_model"],
+                                choices=[
+                                    name
+                                    for name, path in get_files_with_extension(
+                                        args.model_root + "/t2i/model",
+                                        [".pt", ".safetensors"],
+                                    ).items()
+                                ],
+                                value=f"pytorch_model_{args.load_key}",
+                            )
+                            dit_model.change(
+                                fn=upgrade_dit_model_load,
+                                inputs=dit_model,
+                                outputs=None,
+                            )
+                    with gr.Accordion(strings["lora_list"], open=False):
+                        lora_ctrls = []
+                        for i in range(5):
+                            with gr.Row():
+                                lora_enabled = gr.Checkbox(
+                                    label="Enable",
+                                    value=False,
+                                )
+                                lora_model = gr.Dropdown(
+                                    label=f"Lora{i+1}",
+                                    choices=["none"]
+                                    + [name for name, path in lora_list.items()],
+                                    value="none",
+                                )
+                                lora_weight = gr.Slider(
+                                    label="weight",
+                                    minimum=-1,
+                                    maximum=2,
+                                    step=0.01,
+                                    value=0,
+                                    scale=5,
+                                )
+                                lora_ctrls += [lora_enabled, lora_model, lora_weight]
+
+                with gr.Accordion(strings["controlnet"], open=False):
+                    with gr.Row():
+                        controlnet_module = gr.Dropdown(
+                            label=strings["controlnet_model"],
+                            choices=["None"]
+                            + [name for name, path in controlnet_list.items()],
+                            value="None",
+                        )
+                        control_weight = gr.Slider(
+                            label=strings["Control_Weight"],
+                            minimum=0.0,
+                            maximum=2.0,
+                            value=1.0,
+                            step=0.1,
+                        )
+                    input_image = gr.Image(label=strings["input image"])
+                    with gr.Row():
+                        img_crop_type = gr.Radio(
+                            label=strings["Crop_mode"],
+                            choices=[
+                                (strings["Resize"], "Resize"),
+                                (strings["Crop_and_Resize"], "Crop_and_Resize"),
+                                (strings["Resize_and_Fill"], "Resize_and_Fill"),
+                            ],
+                            value="Crop_and_Resize",
+                        )
+                with gr.Row():
+                    advanced_button = gr.Button(strings["run"])
+            with gr.Column():
+                default_img = Image.open(ROOT / "app/default.png")
+                output_img = gr.Image(
+                    label=strings["generated image"],
+                    interactive=False,
+                    format="png",
+                    value=default_img,
+                )
+            advanced_button.click(
+                fn=infer,
+                inputs=[
+                    prompt,
+                    negative_prompt,
+                    seed,
+                    cfg_scale,
+                    infer_steps,
+                    sampler,
+                    imgW,
+                    imgH,
+                    input_image,
+                    controlnet_module,
+                    control_weight,
+                    enhance,
+                    img_crop_type,
+                    *lora_ctrls,
+                ],
+                outputs=output_img,
+            )
+
+        with gr.Row():
+            gr.Examples(
+                [
+                    ["一只小猫"],
+                    [
+                        "现实主义风格，画面主要描述一个巴洛克风格的花瓶，带有金色的装饰边框，花瓶上盛开着各种色彩鲜艳的花，白色背景"
+                    ],
+                    ["一只聪明的狐狸走在阔叶树林里, 旁边是一条小溪, 细节真实, 摄影"],
+                    ["飞流直下三千尺，疑是银河落九天"],
+                    [
+                        "一只长靴猫手持亮银色的宝剑，身着铠甲，眼神坚毅，站在一堆金币上，背景是暗色调的洞穴，图像上有金币的光影点缀。"
+                    ],
+                    ["麻婆豆腐"],
+                    ["苏州园林"],
+                    [
+                        "一颗新鲜的草莓特写，红色的外表，表面布满许多种子，背景是淡绿色的叶子"
+                    ],
+                    ["请将“杞人忧天”的样子画出来"],
+                    ["枯藤老树昏鸦，小桥流水人家"],
+                    [
+                        "湖水清澈，天空湛蓝，阳光灿烂。一只优雅的白天鹅在湖边游泳。它周围有几只小鸭子，看起来非常可爱，整个画面给人一种宁静祥和的感觉。"
+                    ],
+                    ["一朵鲜艳的红色玫瑰花，花瓣撒有一些水珠，晶莹剔透，特写镜头"],
+                    ["臭豆腐"],
+                    ["九寨沟"],
+                    ["俗语“鲤鱼跃龙门”"],
+                    [
+                        "风格是写实，画面主要描述一个亚洲戏曲艺术家正在表演，她穿着华丽的戏服，脸上戴着精致的面具，身姿优雅，背景是古色古香的舞台，镜头是近景"
+                    ],
+                ],
+                [prompt],
+                label=strings["examples"],
+            )
+    return block
+
+
+if __name__ == "__main__":
+    interface = ui()
+    interface.launch(
+        server_name=args.server_name,
+        server_port=args.server_port,
+        share=args.gradio_share,
+    )
--- a/app/lang/en.csv
+++ b/app/lang/en.csv
+key,value
+size,Size
+sampler,Sampler
+prompt,Prompt
+default prompt,"A cute cat"
+negative_prompt,Negative Prompt
+seed,Seed
+cfg,CFG Scale
+infer steps,Sampling Steps
+batch size,Batch Size
+width cond,Width Cond
+height cond,Height Cond
+enhance,Prompt Enhancement
+run,Submit
+square,Square(1024x1024)
+landscape,Landscape(1280x768)
+portrait,Portrait(768x1280)
+accordion,Advanced Options
+generated image,HunYuanDiT Generated Image
+examples,More Examples
+title,Hunyuan-DiT
+desc,A Powerful Multi-Resolution Diffusion Transformer with Fine-Grained Chinese Understanding
+controlnet,controlnet
+controlnet_model,model list
+Control_Weight,Control Weight
+input image,input image
+width,width
+height,height
\ No newline at end of file
--- a/app/lang/zh.csv
+++ b/app/lang/zh.csv
+key,value
+size,尺寸
+sampler,采样器
+prompt,文本描述
+default prompt,"一只可爱的猫"
+negative_prompt,负向词
+seed,种子
+cfg,CFG系数
+infer steps,采样步数
+batch size,批大小
+width cond,宽度条件
+height cond,高度条件
+enhance,文本增强
+run,提交生成
+square,方形(1024x1024)
+portrait,竖屏(1216x832)
+landscape,横屏(832x1216)
+accordion,高级设置
+generated image,生成
+examples,更多示例
+title,HunYuanDiT
+desc,具有细粒度中文理解的高性能多分辨率 Diffusion Transformer 模型
+controlnet,条件控制网络
+controlnet_model,模型列表
+Control_Weight,控制网络权重
+input image,输入图片
+model_list,模型列表
+dit_model,dit模型
+width,width
+height,height
+Crop_mode,裁剪方式
+Resize,仅缩放
+Crop_and_Resize,裁剪并缩放
+Resize_and_Fill,缩放并填充
+lora_list,lora
\ No newline at end of file
--- a/app/multiTurnT2I_app.py
+++ b/app/multiTurnT2I_app.py
+# -- coding: utf-8 --
+#!/usr/bin/env python
+import gradio as gr
+from PIL import Image
+import sys
+import os
+
+sys.path.append(os.getcwd())
+import json
+import numpy as np
+from pathlib import Path
+import io
+import hashlib
+import requests
+import base64
+import pandas as pd
+from sample_t2i import inferencer
+from mllm.dialoggen_demo import init_dialoggen_model, eval_model
+
+SIZES = {
+    "正方形(square, 1024x1024)": (1024, 1024),
+    "风景(landscape, 1280x768)": (768, 1280),
+    "人像(portrait, 768x1280)": (1280, 768),
+}
+
+global_seed = np.random.randint(0, 10000)
+
+
+# Helper Functions
+def image_to_base64(image_path):
+    with open(image_path, "rb") as image_file:
+        encoded_image = base64.b64encode(image_file.read()).decode()
+    return encoded_image
+
+
+def get_strings(lang):
+    lang_file = Path(f"app/lang/{lang}.csv")
+    strings = pd.read_csv(lang_file, header=0)
+    strings = strings.set_index("key")["value"].to_dict()
+    return strings
+
+
+def get_image_md5(image):
+    image_data = io.BytesIO()
+    image.save(image_data, format="PNG")
+    image_data = image_data.getvalue()
+    md5_hash = hashlib.md5(image_data).hexdigest()
+    return md5_hash
+
+
+# mllm调用
+def request_dialogGen(
+    server_url="http://0.0.0.0:8080",
+    history_messages=[],
+    question="画一个木制的鸟",
+    image="",
+):
+    if image != "":
+        image = base64.b64encode(open(image, "rb").read()).decode()
+    print("history_messages before request", history_messages)
+    headers = {"accept": "application/json", "Content-Type": "application/json"}
+    data = {
+        "text": question,
+        "image": image,  # "image为空字符串，则进行文本对话"
+        "history": history_messages,
+    }
+    response = requests.post(server_url, headers=headers, json=data)
+    print("response", response)
+    response = response.json()
+    print(response)
+    response_text = response["result"]
+    history_messages = response["history"]
+    print("history_messages before request", history_messages)
+    return history_messages, response_text
+
+
+# 画图
+def image_generation(prompt, infer_steps, seed, image_size):
+    print(
+        f"prompt sent to T2I model: {prompt}, infer_steps: {infer_steps}, seed: {seed}, size: {image_size}"
+    )
+    height, width = SIZES[image_size]
+    results = gen.predict(
+        prompt,
+        height=height,
+        width=width,
+        seed=seed,
+        infer_steps=infer_steps,
+        batch_size=1,
+    )
+    image = results["images"][0]
+    file_name = get_image_md5(image)
+    # Save images
+    save_dir = Path("results")
+    save_dir.mkdir(exist_ok=True)
+    save_path = f"results/multiRound_{file_name}.png"
+    image.save(save_path)
+    encoded_image = image_to_base64(save_path)
+
+    return encoded_image
+
+
+# 图文对话
+def chat(history_messages, input_text):
+
+    history_messages, response_text = request_dialogGen(
+        history_messages=history_messages, question=input_text
+    )
+    return history_messages, response_text
+
+
+#
+def pipeline(input_text, state, infer_steps, seed, image_size):
+
+    # 忽略空输入
+    if len(input_text) == 0:
+        return state, state[0]
+
+    conversation = state[0]
+    history_messages = state[1]
+
+    system_prompt = "请先判断用户的意图，若为画图则在输出前加入<画图>:"
+    print(f"input history:{history_messages}")
+    if not isinstance(history_messages, list) and len(history_messages.messages) >= 2:
+        response, history_messages = enhancer(
+            input_text, return_history=True, history=history_messages, skip_special=True
+        )
+    else:
+        response, history_messages = enhancer(
+            input_text,
+            return_history=True,
+            history=history_messages,
+            skip_special=False,
+        )
+
+    history_messages.messages[-1][-1] = response
+
+    if "<画图>" in response:
+        intention_draw = True
+    else:
+        intention_draw = False
+
+    print(f"response:{response}")
+    print("-" * 80)
+    print(f"history_messages:{history_messages}")
+    print(f"intention_draw:{intention_draw}")
+    if intention_draw:
+        prompt = response.split("<画图>")[-1]
+        # 画图
+        image_url = image_generation(prompt, infer_steps, seed, image_size)
+        response = f'<img src="data:image/png;base64,{image_url}" style="display: inline-block;"><p style="font-size: 14px; color: #555; margin-top: 0;">{prompt}</p>'
+    conversation += [((input_text, response))]
+    return [conversation, history_messages], conversation
+
+
+# 页面设计
+def upload_image(state, image_input):
+    conversation = state[0]
+    history_messages = state[1]
+    input_image = Image.open(image_input.name).resize((224, 224)).convert("RGB")
+    input_image.save(image_input.name)  # Overwrite with smaller image.
+    system_prompt = "请先判断用户的意图，若为画图则在输出前加入<画图>:"
+    history_messages, response = request_dialogGen(
+        question="这张图描述了什么？",
+        history_messages=history_messages,
+        image=image_input.name,
+    )
+    conversation += [
+        (
+            f'<img src="./file={image_input.name}"  style="display: inline-block;">',
+            response,
+        )
+    ]
+    print("conversation", conversation)
+    print("history_messages after uploading image", history_messages)
+    return [conversation, history_messages], conversation
+
+
+def reset():
+    global global_seed
+    global_seed = np.random.randint(0, 10000)
+    return [[], []], []
+
+
+def reset_last(state):
+    conversation, history = state[0], state[1]
+    conversation = conversation[:-1]
+    history.messages = history.messages[:-2]
+    return [conversation, history], conversation
+
+
+if __name__ == "__main__":
+
+    # Initialize dialoggen and HunyuanDiT model
+    args, gen, enhancer = inferencer()
+    strings = get_strings(args.lang)
+
+    css = """
+        #chatbot { min-height: 800px; }
+        #save-btn {
+            background-image: linear-gradient(to right bottom, rgba(130,217,244, 0.9), rgba(158,231,214, 1.0));
+        }
+        #save-btn:hover {
+            background-image: linear-gradient(to right bottom, rgba(110,197,224, 0.9), rgba(138,211,194, 1.0));
+        }
+        #share-btn {
+            background-image: linear-gradient(to right bottom, rgba(130,217,244, 0.9), rgba(158,231,214, 1.0));
+        }
+        #share-btn:hover {
+            background-image: linear-gradient(to right bottom, rgba(110,197,224, 0.9), rgba(138,211,194, 1.0));
+        }
+        #gallery { z-index: 999999; }
+        #gallery img:hover {transform: scale(2.3); z-index: 999999; position: relative; padding-right: 30%; padding-bottom: 30%;}
+        #gallery button img:hover {transform: none; z-index: 999999; position: relative; padding-right: 0; padding-bottom: 0;}
+        @media (hover: none) {
+            #gallery img:hover {transform: none; z-index: 999999; position: relative; padding-right: 0; 0;}
+        }
+        .html2canvas-container { width: 3000px !important; height: 3000px !important; }
+    """
+
+    with gr.Blocks(css=css) as demo:
+        DESCRIPTION = """# <a style="color: black; text-decoration: none;">多轮对话绘图 Multi-turn Text2Image Generation</a>
+            你可以参照[DialogGen](https://arxiv.org/abs/2403.08857)，通过简单的交互式语句来进行历史图片的修改，例如：主体编辑、增加主体、删除主体、背景更换、风格转换、镜头转换、图像合并。
+
+            (You can modify historical images through simple interactive statements referred to [DialogGen](https://arxiv.org/abs/2403.08857), such as: enity edit, add object, remove object, change background, change style, change lens, and combine images. )
+            
+            例如，主体编辑 (For example, enity edit) :
+            ```none
+            Round1: 画一个木制的鸟
+            (Round1: draw a wooden bird)
+            
+            Round2: 变成玻璃的
+            (Round2: turn into glass)
+            ```
+        """
+
+        gr.Markdown(DESCRIPTION)
+        gr_state = gr.State([[], []])  # conversation, chat_history
+
+        with gr.Row():
+            with gr.Column(scale=1, min_width=1000):
+                with gr.Row():
+                    chatbot = gr.Chatbot(
+                        elem_id="chatbot", label="DialogGen&HunyuanDiT"
+                    )
+                with gr.Row():
+                    infer_steps = gr.Slider(
+                        label="采样步数(sampling steps)",
+                        minimum=1,
+                        maximum=200,
+                        value=100,
+                        step=1,
+                    )
+                    seed = gr.Number(
+                        label="种子(seed)",
+                        minimum=-1,
+                        maximum=1_000_000_000,
+                        value=666,
+                        step=1,
+                        precision=0,
+                    )
+                    size_dropdown = gr.Dropdown(
+                        choices=[
+                            "正方形(square, 1024x1024)",
+                            "风景(landscape, 1280x768)",
+                            "人像(portrait, 768x1280)",
+                        ],
+                        value="正方形(square, 1024x1024)",
+                        label="图片尺寸(Image Size)",
+                    )
+
+                with gr.Row():
+                    # image_btn = gr.UploadButton("🖼️ Upload Image", file_types=["image"])
+                    text_input = gr.Textbox(
+                        label="提示词(prompt)", placeholder="输入提示词(Type a prompt)"
+                    )
+
+                    with gr.Column():
+                        submit_btn = gr.Button(
+                            "提交(Submit)", interactive=True, variant="primary"
+                        )
+                        clear_last_btn = gr.Button("回退(Undo)")
+                        clear_btn = gr.Button("全部重置(Reset All)")
+                with gr.Row():
+                    gr.Examples(
+                        [
+                            ["画一个木制的鸟"],
+                            ["一只小猫"],
+                            [
+                                "现实主义风格，画面主要描述一个巴洛克风格的花瓶，带有金色的装饰边框，花瓶上盛开着各种色彩鲜艳的花，白色背景"
+                            ],
+                            [
+                                "一只聪明的狐狸走在阔叶树林里, 旁边是一条小溪, 细节真实, 摄影"
+                            ],
+                            ["飞流直下三千尺，疑是银河落九天"],
+                            [
+                                "一只长靴猫手持亮银色的宝剑，身着铠甲，眼神坚毅，站在一堆金币上，背景是暗色调的洞穴，图像上有金币的光影点缀。"
+                            ],
+                            ["麻婆豆腐"],
+                            ["苏州园林"],
+                            [
+                                "一颗新鲜的草莓特写，红色的外表，表面布满许多种子，背景是淡绿色的叶子"
+                            ],
+                            ["枯藤老树昏鸦，小桥流水人家"],
+                            [
+                                "湖水清澈，天空湛蓝，阳光灿烂。一只优雅的白天鹅在湖边游泳。它周围有几只小鸭子，看起来非常可爱，整个画面给人一种宁静祥和的感觉。"
+                            ],
+                            [
+                                "一朵鲜艳的红色玫瑰花，花瓣撒有一些水珠，晶莹剔透，特写镜头"
+                            ],
+                            ["臭豆腐"],
+                            ["九寨沟"],
+                            ["俗语“鲤鱼跃龙门”"],
+                            [
+                                "风格是写实，画面主要描述一个亚洲戏曲艺术家正在表演，她穿着华丽的戏服，脸上戴着精致的面具，身姿优雅，背景是古色古香的舞台，镜头是近景"
+                            ],
+                        ],
+                        [text_input],
+                        label=strings["examples"],
+                    )
+                gr.Markdown(
+                    """<p style="font-size: 20px; color: #888;">powered by <a href="https://github.com/Centaurusalpha/DialogGen" target="_blank">DialogGen</a> and <a href="https://github.com/Tencent/HunyuanDiT" target="_blank">HunyuanDiT</a></p>"""
+                )
+
+        text_input.submit(
+            pipeline,
+            [text_input, gr_state, infer_steps, seed, size_dropdown],
+            [gr_state, chatbot],
+        )
+        text_input.submit(lambda: "", None, text_input)  # Reset chatbox.
+        submit_btn.click(
+            pipeline,
+            [text_input, gr_state, infer_steps, seed, size_dropdown],
+            [gr_state, chatbot],
+        )
+        submit_btn.click(lambda: "", None, text_input)  # Reset chatbox.
+
+        # image_btn.upload(upload_image, [gr_state, image_btn], [gr_state, chatbot])
+        clear_last_btn.click(reset_last, [gr_state], [gr_state, chatbot])
+        clear_btn.click(reset, [], [gr_state, chatbot])
+
+    interface = demo
+    interface.launch(server_name="0.0.0.0", server_port=443, share=False)
--- a/asset/Hunyuan_DiT_Tech_Report_05140553.pdf
+++ b/asset/Hunyuan_DiT_Tech_Report_05140553.pdf
--- a/asset/caption_demo.jpg
+++ b/asset/caption_demo.jpg
--- a/asset/chinese elements understanding.png
+++ b/asset/chinese elements understanding.png
--- a/asset/cover.png
+++ b/asset/cover.png
--- a/asset/framework.png
+++ b/asset/framework.png
--- a/asset/logo.png
+++ b/asset/logo.png
--- a/asset/long text understanding.png
+++ b/asset/long text understanding.png