v1.0

af238596 · chenzk · af238596 · af238596 · af238596 · af238596
Commit af238596 authored Feb 13, 2025 by chenzk
20 changed files
--- a/LICENSE
+++ b/LICENSE
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
--- a/README.md
+++ b/README.md
+# HLLM
+字节提出的双层LLM模型HLLM仅需相当于基于传统ID方法1/6至1/4的数据量即可达到同等性能水平，较SOTA模型性能提升0.705%。
+## 论文
+`HLLM: Enhancing Sequential Recommendations via Hierarchical Large Language Models for Item and User Modeling`
+- https://arxiv.org/pdf/2409.12740
+## 模型结构
+提取特征的Item LLM和User LLM采用TinyLlama-1.1B或Baichuan2-7B，LLM模型结构参照大模型通用的模型结构llama。
+<div align=center>
+    <img src="./doc/structure.png"/>
+</div>
+## 算法原理
+HLLM将Item建模与用户建模解耦，首先利用Item LLM提取Item特征，将复杂的文本描述压缩为Embedding，随后基于这些Item特征通过User LLM对用户画像进行建模，从而利用到LLM预训练模型的世界知识。
+<div align=center>
+    <img src="./doc/algorithm.png"/>
+</div>
+## 环境配置
+```
+mv HLLM_pytorch HLLM # 去框架名后缀
+```
+### Docker（方法一）
+```
+docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.3.0-py3.10-dtk24.04.3-ubuntu20.04
+# <your IMAGE ID>为以上拉取的docker的镜像ID替换，本镜像为：b272aae8ec72
+docker run -it --shm-size=64G -v $PWD/HLLM:/home/HLLM -v /opt/hyhal:/opt/hyhal:ro --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name hllm <your IMAGE ID> bash
+cd /home/HLLM
+pip install -r requirements.txt
+```
+### Dockerfile（方法二）
+```
+cd /home/HLLM/docker
+docker build --no-cache -t hllm:latest .
+docker run --shm-size=64G --name hllm -v /opt/hyhal:/opt/hyhal:ro --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video -v $PWD/../../HLLM:/home/HLLM -it hllm bash
+# 若遇到Dockerfile启动的方式安装环境需要长时间等待，可注释掉里面的pip安装，启动容器后再安装python库：pip install -r requirements.txt。
+```
+### Anaconda（方法三）
+1、关于本项目DCU显卡所需的特殊深度学习库可从光合开发者社区下载安装：
+- https://developer.hpccube.com/tool/
+```
+DTK驱动:dtk24.04.3
+python:python3.10
+torch:2.3.0
+torchvision:0.18.1
+torchaudio:2.1.2
+triton:2.1.0
+flash-attn:2.6.1
+deepspeed:0.14.2
+xformers:0.0.25
+transformers:4.41.1
+```
+`Tips：以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应。`
+2、其它非特殊库参照requirements.txt安装
+```
+cd /home/HLLM
+pip install -r requirements.txt
+```
+## 数据集
+`PixelRec:`
+[interaction](http://113.200.138.88:18080/aidatasets/pixelrec/interaction.git)、[iteminfo](http://113.200.138.88:18080/aidatasets/pixelrec/iteminfo.git)
+`Amazon Book Reviews:`
+[HLLM](http://113.200.138.88:18080/aidatasets/bytedance/HLLM.git)
+此步骤以`Pixel200K`为例进行使用说明 ，只使用`Pixel200K`即可进行步骤实验。
+数据的完整目录结构如下，下载数据集后按照以下目录结构整理：
+```
+/home/HLLM/
+    ├── dataset # Store Interactions
+    │   ├── amazon_books.csv
+    │   ├── Pixel1M.csv
+    │   ├── Pixel200K.csv
+    │   └── Pixel8M.csv
+    └── information # Store Item Information
+        ├── amazon_books.csv
+        ├── Pixel1M.csv
+        ├── Pixel200K.csv
+        └── Pixel8M.csv
+``` 
+## 训练
+### 单机多卡
+```
+cd /home/HLLM
+sh train.sh # 以数据集Pixel200K、预训练模型TinyLlama-1.1B-Chat-v1.0作为示例
+```
+若希望单独对验证集做验证（前面训练过程已包含验证）：
+```
+# 训练完成后，权重文件夹`code/saved_path/HLLM-0.pth/`中会生成`zero_to_fp32.py`, 参照代码说明转换权重.pt到.bin
+cd code
+python saved_path/HLLM-0.pth/zero_to_fp32.py saved_path/HLLM-0.pth saved_path/pytorch_model.bin
+# 
+cd /home/HLLM
+sh infer.sh
+```
+更多资料可参考源项目的[`README_origin`](./README_origin.md)
+## 推理
+无
+## result
+无
+### 精度
+数据集：Pixel200K，epoch为5，训练框架：pytorch。
+|   device   |   nce_top1_acc   |
+|:----------:|:-------:|
+| DCU K100AI | 0.164 |
+|  GPU A800  | 0.164|
+## 应用场景
+### 算法类别
+`推荐系统`
+### 热点应用行业
+`零售,广媒,金融,通信`
+## 预训练权重
+预训练权重快速下载中心：[SCNet AIModels](http://113.200.138.88:18080/aimodels) ，项目中的预训练权重可从快速下载通道下载：[HLLM](http://113.200.138.88:18080/aidatasets/bytedance/HLLM.git)
+Hugging Face下载地址为：[HLLM](https://huggingface.co/ByteDance/HLLM)
+## 源码仓库及问题反馈
+- http://developer.sourcefind.cn/codes/modelzoo/HLLM_pytorch.git
+## 参考资料
+- https://github.com/bytedance/HLLM.git
--- a/README_origin.md
+++ b/README_origin.md
+# [HLLM: Enhancing Sequential Recommendations via Hierarchical Large Language Models for Item and User Modeling](https://arxiv.org/abs/2409.12740)
+<div align="center">
+[![arXiv](https://img.shields.io/badge/arXiv%20paper-2409.12740-da282a.svg)](https://arxiv.org/abs/2409.12740)
+[![huggingface weights](https://img.shields.io/badge/%F0%9F%A4%97%20Weights-ByteDance/HLLM-yellow)](https://huggingface.co/ByteDance/HLLM)
+[![Recommendation](https://img.shields.io/badge/Task-Recommendation-blue)]()
+</div>
+## 🔥 Update
+- [2024.09.20] Codes and Weights are released !
+## Installation
+1. Install packages via `pip3 install -r requirements.txt`. 
+Some basic packages are shown below :
+```
+pytorch==2.1.0
+deepspeed==0.14.2
+transformers==4.41.1
+lightning==2.4.0
+flash-attn==2.5.9post1
+fbgemm-gpu==0.5.0 [optional for HSTU]
+sentencepiece==0.2.0 [optional for Baichuan2]
+```
+2. Prepare `PixelRec` and `Amazon Book Reviews` Datasets:
+    1. Download `PixelRec` Interactions and Item Information from [PixelRec](https://github.com/westlake-repl/PixelRec) and put into the dataset and information folder.
+    2. Download `Amazon Book Reviews` [Interactions](http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/ratings_Books.csv) and [Item Information](http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Books.json.gz), process them by `process_books.py`, and put into the dataset and information folder. We also provide [Interactions](https://huggingface.co/ByteDance/HLLM/resolve/main/Interactions/amazon_books.csv) and [Item Information](https://huggingface.co/ByteDance/HLLM/resolve/main/ItemInformation/amazon_books.csv) of Books after processing.
+    3. Please note that Interactions and Item Information should be put into two folders like:
+        ```bash
+        ├── dataset # Store Interactions
+        │   ├── amazon_books.csv
+        │   ├── Pixel1M.csv
+        │   ├── Pixel200K.csv
+        │   └── Pixel8M.csv
+        └── information # Store Item Information
+            ├── amazon_books.csv
+            ├── Pixel1M.csv
+            ├── Pixel200K.csv
+            └── Pixel8M.csv
+        ``` 
+        Here dataset represents **data_path**, and infomation represents **text_path**.
+3. Prepare pre-trained LLM models, such as [TinyLlama](https://github.com/jzhang38/TinyLlama), [Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base).
+## Training
+To train HLLM on PixelRec / Amazon Book Reviews, you can run the following command.
+> Set `master_addr`, `master_port`, `nproc_per_node`, `nnodes` and `node_rank` in environment variables for multinodes training.
+> All hyper-parameters (except model's config) can be found in code/REC/utils/argument_list.py and passed through CLI. More model's hyper-parameters are in `IDNet/*` or `HLLM/*`. 
+```python
+# Item and User LLM are initialized by specific pretrain_dir.
+python3 main.py \
+--config_file overall/LLM_deepspeed.yaml HLLM/HLLM.yaml \ # We use deepspeed for training by default.
+--loss nce \
+--epochs 5 \
+--dataset {Pixel200K / Pixel1M / Pixel8M / amazon_books} \
+--train_batch_size 16 \
+--MAX_TEXT_LENGTH 256 \
+--MAX_ITEM_LIST_LENGTH 10 \
+--checkpoint_dir saved_path \
+--optim_args.learning_rate 1e-4 \
+--item_pretrain_dir item_pretrain_dir \ # Set to LLM dir.
+--user_pretrain_dir user_pretrain_dir \ # Set to LLM dir.
+--text_path text_path \ # Use absolute path to text files.
+--text_keys '[\"title\", \"tag\", \"description\"]' # Please remove tag in books dataset.
+```
+> You can use `--gradient_checkpointing True` and `--stage 3` with deepspeed to save memory.
+You can also train ID-based models by the following command.
+```python
+python3 main.py \
+--config_file overall/ID.yaml IDNet/{hstu / sasrec / llama_id}.yaml \
+--loss nce \
+--epochs 201 \
+--dataset {Pixel200K / Pixel1M / Pixel8M / amazon_books} \
+--train_batch_size 64 \
+--MAX_ITEM_LIST_LENGTH 10 \
+--optim_args.learning_rate 1e-4
+```
+To reproduce our experiments on Pixel8M and Books you can run scripts in `reproduce` folder. You should be able to reproduce the following results.
+> For ID-based models, we follow the hyper-parameters from [PixelRec](https://github.com/westlake-repl/PixelRec) and [HSTU](https://github.com/facebookresearch/generative-recommenders/tree/main).
+| Method        | Dataset | Negatives | R@10       | R@50      | R@200     | N@10      | N@50      | N@200     |
+| ------------- | ------- |---------- | ---------- | --------- |---------- | --------- | --------- | --------- |
+| HSTU          | Pixel8M | 5632      | 4.83       | 10.30     | 18.28     | 2.75      | 3.94      | 5.13      |
+| SASRec        | Pixel8M | 5632      | 5.08       | 10.62     | 18.64     | 2.92      | 4.12      | 5.32      |
+| HLLM-1B       | Pixel8M | 5632      | **6.13**   | **12.48** | **21.18** | **3.54**  | **4.92**  | **6.22**  |
+| HSTU-large    | Books   | 512       | 5.00       | 11.29     | 20.13     | 2.78      | 4.14      | 5.47      |
+| SASRec        | Books   | 512       | 5.35       | 11.91     | 21.02     | 2.98      | 4.40      | 5.76      |
+| HLLM-1B       | Books   | 512       | **6.97**   | **14.61** | **24.78** | **3.98**  | **5.64**  | **7.16**  |
+| HSTU-large    | Books   | 28672     | 6.50       | 12.22     | 19.93     | 4.04      | 5.28      | 6.44      |
+| HLLM-1B       | Books   | 28672     | 9.28       | 17.34     | 27.22     | 5.65      | 7.41      | 8.89      |
+| HLLM-7B       | Books   | 28672     | **9.39**   | **17.65** | **27.59** | **5.69**  | **7.50**  | **8.99**  |
+## Inference
+We provide fine-tuned HLLM models for evaluation, you can download from the following links or hugginface. Remember put the weights to `checkpoint_dir`.
+| Model | Dataset | Weights |
+|:---|:---|:---|
+|HLLM-1B | Pixel8M | [HLLM-1B-Pixel8M](https://huggingface.co/ByteDance/HLLM/resolve/main/1B_Pixel8M/pytorch_model.bin)
+|HLLM-1B | Books | [HLLM-1B-Books-neg512](https://huggingface.co/ByteDance/HLLM/resolve/main/1B_books_neg512/pytorch_model.bin)
+|HLLM-1B | Books | [HLLM-1B-Books](https://huggingface.co/ByteDance/HLLM/resolve/main/1B_books/pytorch_model.bin)
+|HLLM-7B | Books | [HLLM-7B-Books](https://huggingface.co/ByteDance/HLLM/resolve/main/7B_books/pytorch_model.bin)
+> Please ensure compliance with the respective licenses of [TinyLlama-1.1B](https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md) and [Baichuan2-7B](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) when using corresponding weights.
+Then you can evaluate models by the following command (the same as training but val_only).
+```python
+python3 main.py \
+--config_file overall/LLM_deepspeed.yaml HLLM/HLLM.yaml \ # We use deepspeed for training by default.
+--loss nce \
+--epochs 5 \
+--dataset {Pixel200K / Pixel1M / Pixel8M / amazon_books} \
+--train_batch_size 16 \
+--MAX_TEXT_LENGTH 256 \
+--MAX_ITEM_LIST_LENGTH 10 \
+--checkpoint_dir saved_path \
+--optim_args.learning_rate 1e-4 \
+--item_pretrain_dir item_pretrain_dir \ # Set to LLM dir.
+--user_pretrain_dir user_pretrain_dir \ # Set to LLM dir.
+--text_path text_path \ # Use absolute path to text files.
+--text_keys '[\"title\", \"tag\", \"description\"]' \ # Please remove tag in books dataset.
+--val_only True # Add this for evaluation
+```
+## Citation
+If our work has been of assistance to your work, feel free to give us a star ⭐ or cite us using :  
+```
+@article{HLLM,
+title={HLLM: Enhancing Sequential Recommendations via Hierarchical Large Language Models for Item and User Modeling},
+author={Junyi Chen and Lu Chi and Bingyue Peng and Zehuan Yuan},
+journal={arXiv preprint arXiv:2409.12740},
+year={2024}
+}
+```
+> Thanks to the excellent code repository [RecBole](https://github.com/RUCAIBox/RecBole), [VisRec](https://github.com/ialab-puc/VisualRecSys-Tutorial-IUI2021), [PixelRec](https://github.com/westlake-repl/PixelRec) and [HSTU](https://github.com/facebookresearch/generative-recommenders/tree/main) ! 
+> HLLM is released under the Apache License 2.0, some codes are modified from HSTU and PixelRec, which are released under the Apache License 2.0 and MIT License, respectively.
\ No newline at end of file
--- a/TORCHRUN
+++ b/TORCHRUN
+#!/bin/bash
+set -x
+start_time="$(date +%FT%T)"
+if [[ "$master_addr" == "" && "$master_port" == "" ]]; then
+  nnodes=1
+  node_rank=0
+  master_port=12345
+  nproc_per_node=4
+  torchrun \
+  --master_port=$master_port \
+  --node_rank=$node_rank \
+  --nproc_per_node=$nproc_per_node \
+  --nnodes=$nnodes \
+  "$@"
+else
+  torchrun \
+  --master_addr=$master_addr \
+  --master_port=$master_port \
+  --node_rank=$node_rank \
+  --nproc_per_node=$nproc_per_node \
+  --nnodes=$nnodes \
+  "$@"
+fi
+echo "start_time: $start_time"
--- a/code/HLLM/HLLM.yaml
+++ b/code/HLLM/HLLM.yaml
+model: HLLM
+item_pretrain_dir: item_pretrain_dir # Item and User LLM are initialized by pretrain_dir
+item_llm_init: True
+user_pretrain_dir: user_pretrain_dir
+user_llm_init: True
+use_ft_flash_attn: True
\ No newline at end of file
--- a/code/IDNet/hstu.yaml
+++ b/code/IDNet/hstu.yaml
+model: HSTU
+n_layers: 2
+n_heads: 4
+item_embedding_size: 512
+hstu_embedding_size: 512
+hidden_dropout_prob: 0.1
+attn_dropout_prob: 0.1
+hidden_act: 'silu'
+enable_relative_attention_bias: True
--- a/code/IDNet/llama_id.yaml
+++ b/code/IDNet/llama_id.yaml
+model: LLMIDRec
+user_pretrain_dir: user_pretrain_dir
+user_llm_init: False
+use_ft_flash_attn: True
+item_embed_dim: 512
+show_progress: True
+# better to use cosine scheduler
+scheduler_args: {
+  type: cosine,
+  warmup: 0.0
+}
\ No newline at end of file
--- a/code/IDNet/sasrec.yaml
+++ b/code/IDNet/sasrec.yaml
+model: SASRec
+n_layers: 2
+n_heads: 4
+embedding_size: 512
+inner_size: 1
+hidden_dropout_prob: 0.1
+attn_dropout_prob: 0.1
+hidden_act: 'gelu'
+layer_norm_eps: 1e-12
+initializer_range: 0.02
\ No newline at end of file
--- a/code/REC/config/__init__.py
+++ b/code/REC/config/__init__.py
+from .configurator import Config
--- a/code/REC/config/__pycache__/__init__.cpython-310.pyc
+++ b/code/REC/config/__pycache__/__init__.cpython-310.pyc
--- a/code/REC/config/__pycache__/configurator.cpython-310.pyc
+++ b/code/REC/config/__pycache__/configurator.cpython-310.pyc
--- a/code/REC/config/configurator.py
+++ b/code/REC/config/configurator.py
+# Copyright (c) 2024 westlake-repl
+# SPDX-License-Identifier: MIT
+import re
+import os
+import sys
+import yaml
+import torch
+from logging import getLogger
+from enum import Enum
+from REC.evaluator import metric_types, smaller_metrics
+from REC.utils import get_model, \
+    general_arguments, training_arguments, evaluation_arguments, dataset_arguments, set_color
+class Config(object):
+    def __init__(self, config_file_list=None):
+        self._init_parameters_category()
+        self.yaml_loader = self._build_yaml_loader()
+        self.final_config_dict = self._load_config_files(config_file_list)
+        self.model_class = get_model(self.model)
+        self._set_default_parameters()
+    def _init_parameters_category(self):
+        self.parameters = dict()
+        self.parameters['General'] = general_arguments
+        self.parameters['Training'] = training_arguments
+        self.parameters['Evaluation'] = evaluation_arguments
+        self.parameters['Dataset'] = dataset_arguments
+    def _build_yaml_loader(self):
+        loader = yaml.FullLoader
+        loader.add_implicit_resolver(
+            u'tag:yaml.org,2002:float',
+            re.compile(
+                u'''^(?:
+             [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
+            |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
+            |\\.[0-9_]+(?:[eE][-+][0-9]+)?
+            |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
+            |[-+]?\\.(?:inf|Inf|INF)
+            |\\.(?:nan|NaN|NAN))$''', re.X
+            ), list(u'-+0123456789.')
+        )
+        return loader
+    def _convert_config_dict(self, config_dict):
+        r"""This function convert the str parameters to their original type.
+        """
+        for key in config_dict:
+            param = config_dict[key]
+            if not isinstance(param, str):
+                continue
+            try:
+                value = eval(param)
+                if value is not None and not isinstance(value, (str, int, float, list, tuple, dict, bool, Enum)):
+                    value = param
+            except (NameError, SyntaxError, TypeError):
+                if isinstance(param, str):
+                    if param.lower() == "true":
+                        value = True
+                    elif param.lower() == "false":
+                        value = False
+                    else:
+                        value = param
+                else:
+                    value = param
+            config_dict[key] = value
+        return config_dict
+    def _load_config_files(self, file_list):
+        file_config_dict = dict()
+        if file_list:
+            for file in file_list:
+                with open(file, 'r', encoding='utf-8') as f:
+                    file_config_dict.update(yaml.load(f.read(), Loader=self.yaml_loader))
+        return file_config_dict
+    def _load_variable_config_dict(self, config_dict):
+        # HyperTuning may set the parameters such as mlp_hidden_size in NeuMF in the format of ['[]', '[]']
+        # then config_dict will receive a str '[]', but indeed it's a list []
+        # temporarily use _convert_config_dict to solve this problem
+        return self._convert_config_dict(config_dict) if config_dict else dict()
+    def _update_internal_config_dict(self, file):
+        with open(file, 'r', encoding='utf-8') as f:
+            config_dict = yaml.load(f.read(), Loader=self.yaml_loader)
+            if config_dict is not None:
+                self.internal_config_dict.update(config_dict)
+        return config_dict
+    def _set_default_parameters(self):
+        if hasattr(self.model_class, 'input_type'):
+            self.final_config_dict['MODEL_INPUT_TYPE'] = self.model_class.input_type
+        metrics = self.final_config_dict['metrics']
+        if isinstance(metrics, str):
+            self.final_config_dict['metrics'] = [metrics]
+        eval_type = set()
+        for metric in self.final_config_dict['metrics']:
+            if metric.lower() in metric_types:
+                eval_type.add(metric_types[metric.lower()])
+            else:
+                raise NotImplementedError(f"There is no metric named '{metric}'")
+        if len(eval_type) > 1:
+            raise RuntimeError('Ranking metrics and value metrics can not be used at the same time.')
+        self.final_config_dict['eval_type'] = eval_type.pop()
+        valid_metric = self.final_config_dict['valid_metric'].split('@')[0]
+        self.final_config_dict['valid_metric_bigger'] = False if valid_metric.lower() in smaller_metrics else True
+        topk = self.final_config_dict['topk']
+        if isinstance(topk, (int, list)):
+            if isinstance(topk, int):
+                topk = [topk]
+            for k in topk:
+                if k <= 0:
+                    raise ValueError(
+                        f'topk must be a positive integer or a list of positive integers, but get `{k}`'
+                    )
+            self.final_config_dict['topk'] = topk
+        else:
+            raise TypeError(f'The topk [{topk}] must be a integer, list')
+    def __setitem__(self, key, value):
+        if not isinstance(key, str):
+            raise TypeError("index must be a str.")
+        self.final_config_dict[key] = value
+    def __getattr__(self, item):
+        if 'final_config_dict' not in self.__dict__:
+            raise AttributeError(f"'Config' object has no attribute 'final_config_dict'")
+        if item in self.final_config_dict:
+            return self.final_config_dict[item]
+        raise AttributeError(f"'Config' object has no attribute '{item}'")
+    def __getitem__(self, item):
+        if item in self.final_config_dict:
+            return self.final_config_dict[item]
+        else:
+            return None
+    def get(self, key, default=None):
+        res = self[key]
+        if res is None:
+            return default
+        return res
+    def __contains__(self, key):
+        if not isinstance(key, str):
+            raise TypeError("index must be a str.")
+        return key in self.final_config_dict
+    def __str__(self):
+        args_info = '\n'
+        for category in self.parameters:
+            args_info += set_color(category + ' Hyper Parameters:\n', 'pink')
+            args_info += '\n'.join([(set_color("{}", 'cyan') + " =" + set_color(" {}", 'yellow')).format(arg, value)
+                                    for arg, value in self.final_config_dict.items()
+                                    if arg in self.parameters[category]])
+            args_info += '\n\n'
+        args_info += set_color('Other Hyper Parameters: \n', 'pink')
+        args_info += '\n'.join([
+            (set_color("{}", 'cyan') + " = " + set_color("{}", 'yellow')).format(arg, value)
+            for arg, value in self.final_config_dict.items()
+            if arg not in {
+                _ for args in self.parameters.values() for _ in args
+            }.union({'model', 'dataset', 'config_files'})
+        ])
+        args_info += '\n\n'
+        return args_info
+    def __repr__(self):
+        return self.__str__()
--- a/code/REC/data/__init__.py
+++ b/code/REC/data/__init__.py
+from .utils import *
+__all__ = ['load_data', 'bulid_dataloader']
--- a/code/REC/data/__pycache__/__init__.cpython-310.pyc
+++ b/code/REC/data/__pycache__/__init__.cpython-310.pyc
--- a/code/REC/data/__pycache__/dataload.cpython-310.pyc
+++ b/code/REC/data/__pycache__/dataload.cpython-310.pyc
--- a/code/REC/data/__pycache__/utils.cpython-310.pyc
+++ b/code/REC/data/__pycache__/utils.cpython-310.pyc
--- a/code/REC/data/dataload.py
+++ b/code/REC/data/dataload.py
+# Copyright (c) 2024 westlake-repl
+# Copyright (c) 2024 Bytedance Ltd. and/or its affiliate
+# SPDX-License-Identifier: MIT
+# This file has been modified by Junyi Chen.
+#
+# Original file was released under MIT, with the full license text
+# available at https://choosealicense.com/licenses/mit/.
+#
+# This modified file is released under the same license.
+import copy
+import pickle
+import os
+import yaml
+from collections import Counter
+from logging import getLogger
+import numpy as np
+import pandas as pd
+import torch
+from REC.utils import set_color
+from REC.utils.enum_type import InputType
+from torch_geometric.utils import degree
+class Data:
+    def __init__(self, config):
+        self.config = config
+        self.dataset_path = config['data_path']
+        self.dataset_name = config['dataset']
+        self.data_split = config['data_split']
+        self.item_data = config['item_data']
+        self.logger = getLogger()
+        self._from_scratch()
+    def _from_scratch(self):
+        self.logger.info(set_color(f'Loading {self.__class__} from scratch with {self.data_split = }.', 'green'))
+        self._load_inter_feat(self.dataset_name, self.dataset_path, self.item_data)
+        self._data_processing()
+    def _load_inter_feat(self, token, dataset_path, item_data=None):
+        inter_feat_path = os.path.join(dataset_path, f'{token}.csv')
+        if not os.path.isfile(inter_feat_path):
+            raise ValueError(f'File {inter_feat_path} not exist.')
+        df = pd.read_csv(
+            inter_feat_path, delimiter=',', dtype={'item_id': str, 'user_id': str, 'timestamp': int}, header=0, names=['item_id', 'user_id', 'timestamp']
+        )
+        self.logger.info(f'Interaction feature loaded successfully from [{inter_feat_path}].')
+        self.inter_feat = df
+        if item_data:
+            item_data_path = os.path.join(dataset_path, f'{item_data}.csv')
+            item_df = pd.read_csv(
+                item_data_path, delimiter=',', dtype={'item_id': str, 'user_id': str, 'timestamp': int}, header=0, names=['item_id', 'user_id', 'timestamp']
+            )
+            self.item_feat = item_df
+            self.logger.info(f'Item feature loaded successfully from [{item_data}].')
+    def _data_processing(self):
+        self.id2token = {}
+        self.token2id = {}
+        remap_list = ['user_id', 'item_id']
+        for feature in remap_list:
+            if feature == 'item_id' and self.item_data:
+                feats = self.item_feat[feature]
+                feats_raw = self.inter_feat[feature]
+            else:
+                feats = self.inter_feat[feature]
+            new_ids_list, mp = pd.factorize(feats)
+            mp = ['[PAD]'] + list(mp)
+            token_id = {t: i for i, t in enumerate(mp)}
+            if feature == 'item_id' and self.item_data:
+                _, raw_mp = pd.factorize(feats_raw)
+                for x in raw_mp:
+                    if x not in token_id:
+                        token_id[x] = len(token_id)
+                        mp.append(x)
+            mp = np.array(mp)
+            self.id2token[feature] = mp
+            self.token2id[feature] = token_id
+            self.inter_feat[feature] = self.inter_feat[feature].map(token_id)
+        self.user_num = len(self.id2token['user_id'])
+        self.item_num = len(self.id2token['item_id'])
+        self.logger.info(f"{self.user_num = } {self.item_num = }")
+        self.logger.info(f"{self.inter_feat['item_id'].isna().any() = } {self.inter_feat['user_id'].isna().any() = }")
+        self.inter_num = len(self.inter_feat)
+        self.uid_field = 'user_id'
+        self.iid_field = 'item_id'
+        self.user_seq = None
+        self.train_feat = None
+        self.feat_name_list = ['inter_feat']  # self.inter_feat
+    def build(self):
+        self.logger.info(f"build {self.dataset_name} dataload")
+        self.sort(by='timestamp')
+        user_list = self.inter_feat['user_id'].values
+        item_list = self.inter_feat['item_id'].values
+        timestamp_list = self.inter_feat['timestamp'].values
+        grouped_index = self._grouped_index(user_list)
+        user_seq = {}
+        time_seq = {}
+        for uid, index in grouped_index.items():
+            user_seq[uid] = item_list[index]
+            time_seq[uid] = timestamp_list[index]
+        self.user_seq = user_seq
+        self.time_seq = time_seq
+        train_feat = dict()
+        indices = []
+        for index in grouped_index.values():
+            indices.extend(list(index)[:-2])
+        for k in self.inter_feat:
+            train_feat[k] = self.inter_feat[k].values[indices]
+        if self.config['MODEL_INPUT_TYPE'] == InputType.AUGSEQ:
+            train_feat = self._build_aug_seq(train_feat)
+        elif self.config['MODEL_INPUT_TYPE'] == InputType.SEQ:
+            train_feat = self._build_seq(train_feat)
+        self.train_feat = train_feat
+    def _grouped_index(self, group_by_list):
+        index = {}
+        for i, key in enumerate(group_by_list):
+            if key not in index:
+                index[key] = [i]
+            else:
+                index[key].append(i)
+        return index
+    def _build_seq(self, train_feat):
+        max_item_list_len = self.config['MAX_ITEM_LIST_LENGTH']+1
+        uid_list, item_list_index = [], []
+        seq_start = 0
+        save = False
+        user_list = train_feat['user_id']
+        user_list = np.append(user_list, -1)
+        last_uid = user_list[0]
+        for i, uid in enumerate(user_list):
+            if last_uid != uid:
+                save = True
+            if save:
+                if (self.data_split is None or self.data_split == True) and i - seq_start > max_item_list_len:
+                    offset = (i - seq_start) % max_item_list_len
+                    seq_start += offset
+                    x = torch.arange(seq_start, i)
+                    sx = torch.split(x, max_item_list_len)
+                    for sub in sx:
+                        uid_list.append(last_uid)
+                        item_list_index.append(slice(sub[0], sub[-1]+1))
+                else:
+                    uid_list.append(last_uid)
+                    item_list_index.append(slice(seq_start, i))  # maybe too long but will be truncated in dataloader
+                save = False
+                last_uid = uid
+                seq_start = i
+        seq_train_feat = {}
+        seq_train_feat['user_id'] = np.array(uid_list)
+        seq_train_feat['item_seq'] = []
+        seq_train_feat['time_seq'] = []
+        for index in item_list_index:
+            seq_train_feat['item_seq'].append(train_feat['item_id'][index])
+            seq_train_feat['time_seq'].append(train_feat['timestamp'][index])
+        return seq_train_feat
+    def _build_aug_seq(self, train_feat):
+        max_item_list_len = self.config['MAX_ITEM_LIST_LENGTH']+1
+        # by = ['user_id', 'timestamp']
+        # ascending = [True, True]
+        # for b, a in zip(by[::-1], ascending[::-1]):
+        #     index = np.argsort(train_feat[b], kind='stable')
+        #     if not a:
+        #         index = index[::-1]
+        #     for k in train_feat:
+        #         train_feat[k] = train_feat[k][index]
+        uid_list, item_list_index = [], []
+        seq_start = 0
+        save = False
+        user_list = train_feat['user_id']
+        user_list = np.append(user_list, -1)
+        last_uid = user_list[0]
+        for i, uid in enumerate(user_list):
+            if last_uid != uid:
+                save = True
+            if save:
+                if i - seq_start > max_item_list_len:
+                    offset = (i - seq_start) % max_item_list_len
+                    seq_start += offset
+                    x = torch.arange(seq_start, i)
+                    sx = torch.split(x, max_item_list_len)
+                    for sub in sx:
+                        uid_list.append(last_uid)
+                        item_list_index.append(slice(sub[0], sub[-1]+1))
+                else:
+                    uid_list.append(last_uid)
+                    item_list_index.append(slice(seq_start, i))
+                save = False
+                last_uid = uid
+                seq_start = i
+        seq_train_feat = {}
+        aug_uid_list = []
+        aug_item_list = []
+        for uid, item_index in zip(uid_list, item_list_index):
+            st = item_index.start
+            ed = item_index.stop
+            lens = ed - st
+            for sub_idx in range(1, lens):
+                aug_item_list.append(train_feat['item_id'][slice(st, st+sub_idx+1)])
+                aug_uid_list.append(uid)
+        seq_train_feat['user_id'] = np.array(aug_uid_list)
+        seq_train_feat['item_seq'] = aug_item_list
+        return seq_train_feat
+    def sort(self, by, ascending=True):
+        if isinstance(self.inter_feat, pd.DataFrame):
+            self.inter_feat.sort_values(by=by, ascending=ascending, inplace=True)
+        else:
+            if isinstance(by, str):
+                by = [by]
+            if isinstance(ascending, bool):
+                ascending = [ascending]
+            if len(by) != len(ascending):
+                if len(ascending) == 1:
+                    ascending = ascending * len(by)
+                else:
+                    raise ValueError(f'by [{by}] and ascending [{ascending}] should have same length.')
+            for b, a in zip(by[::-1], ascending[::-1]):
+                index = np.argsort(self.inter_feat[b], kind='stable')
+                if not a:
+                    index = index[::-1]
+                for k in self.inter_feat:
+                    self.inter_feat[k] = self.inter_feat[k][index]
+    @property
+    def avg_actions_of_users(self):
+        """Get the average number of users' interaction records.
+        Returns:
+            numpy.float64: Average number of users' interaction records.
+        """
+        if isinstance(self.inter_feat, pd.DataFrame):
+            return np.mean(self.inter_feat.groupby(self.uid_field).size())
+        else:
+            return np.mean(list(Counter(self.inter_feat[self.uid_field]).values()))
+    @property
+    def avg_actions_of_items(self):
+        """Get the average number of items' interaction records.
+        Returns:
+            numpy.float64: Average number of items' interaction records.
+        """
+        if isinstance(self.inter_feat, pd.DataFrame):
+            return np.mean(self.inter_feat.groupby(self.iid_field).size())
+        else:
+            return np.mean(list(Counter(self.inter_feat[self.iid_field]).values()))
+    @property
+    def sparsity(self):
+        """Get the sparsity of this dataset.
+        Returns:
+            float: Sparsity of this dataset.
+        """
+        return 1 - self.inter_num / self.user_num / self.item_num
+    def __repr__(self):
+        return self.__str__()
+    def __str__(self):
+        info = [set_color(self.dataset_name, 'pink')]
+        if self.uid_field:
+            info.extend([
+                set_color('The number of users', 'blue') + f': {self.user_num}',
+                set_color('Average actions of users', 'blue') + f': {self.avg_actions_of_users}'
+            ])
+        if self.iid_field:
+            info.extend([
+                set_color('The number of items', 'blue') + f': {self.item_num}',
+                set_color('Average actions of items', 'blue') + f': {self.avg_actions_of_items}'
+            ])
+        info.append(set_color('The number of inters', 'blue') + f': {self.inter_num}')
+        if self.uid_field and self.iid_field:
+            info.append(set_color('The sparsity of the dataset', 'blue') + f': {self.sparsity * 100}%')
+        return '\n'.join(info)
+    def copy(self, new_inter_feat):
+        """Given a new interaction feature, return a new :class:`Dataset` object,
+        whose interaction feature is updated with ``new_inter_feat``, and all the other attributes the same.
+        Args:
+            new_inter_feat (Interaction): The new interaction feature need to be updated.
+        Returns:
+            :class:`~Dataset`: the new :class:`~Dataset` object, whose interaction feature has been updated.
+        """
+        nxt = copy.copy(self)
+        nxt.inter_feat = new_inter_feat
+        return nxt
+    def counter(self, field):
+        if isinstance(self.inter_feat, pd.DataFrame):
+            return Counter(self.inter_feat[field].values)
+        else:
+            return Counter(self.inter_feat[field])
+    @property
+    def user_counter(self):
+        return self.counter('user_id')
+    @property
+    def item_counter(self):
+        return self.counter('item_id')
+    def get_norm_adj_mat(self):
+        r"""Get the normalized interaction matrix of users and items.
+        Construct the square matrix from the training data and normalize it
+        using the laplace matrix.
+        .. math::
+            A_{hat} = D^{-0.5} \times A \times D^{-0.5}
+        Returns:
+            The normalized interaction matrix in Tensor.
+        """
+        row = torch.tensor(self.train_feat[self.uid_field])
+        col = torch.tensor(self.train_feat[self.iid_field]) + self.user_num
+        edge_index1 = torch.stack([row, col])
+        edge_index2 = torch.stack([col, row])
+        edge_index = torch.cat([edge_index1, edge_index2], dim=1)
+        deg = degree(edge_index[0], self.user_num + self.item_num)
+        norm_deg = 1. / torch.sqrt(torch.where(deg == 0, torch.ones([1]), deg))
+        edge_weight = norm_deg[edge_index[0]] * norm_deg[edge_index[1]]
+        return edge_index, edge_weight
--- a/code/REC/data/dataset/__init__.py
+++ b/code/REC/data/dataset/__init__.py
+from .trainset import *
+from .evalset import *
+from .batchset import *
+from .collate_fn import *
--- a/code/REC/data/dataset/__pycache__/__init__.cpython-310.pyc
+++ b/code/REC/data/dataset/__pycache__/__init__.cpython-310.pyc
--- a/code/REC/data/dataset/__pycache__/batchset.cpython-310.pyc
+++ b/code/REC/data/dataset/__pycache__/batchset.cpython-310.pyc