init commit for opensource

Signed-off-by: huteng.ht <huteng.ht@bytedance.com>

init commit for opensource
Signed-off-by: huteng.ht <huteng.ht@bytedance.com>
615e9cbf · huteng.ht · 615e9cbf · 615e9cbf · 615e9cbf · 615e9cbf
Commit 615e9cbf authored Feb 21, 2024 by huteng.ht
20 changed files
--- a/.clang-format
+++ b/.clang-format
+---
+Language:        Cpp
+# BasedOnStyle:  Microsoft
+AccessModifierOffset: -2
+AlignAfterOpenBracket: Align
+AlignConsecutiveMacros: false
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Right
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllArgumentsOnNextLine: true
+AllowAllConstructorInitializersOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: Never
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: None
+AllowShortLambdasOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: MultiLine
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterCaseLabel:  false
+  AfterClass:      true
+  AfterControlStatement: true
+  AfterEnum:       true
+  AfterFunction:   true
+  AfterNamespace:  true
+  AfterObjCDeclaration: true
+  AfterStruct:     true
+  AfterUnion:      false
+  AfterExternBlock: true
+  BeforeCatch:     true
+  BeforeElse:      true
+  IndentBraces:    false
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
+  SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Custom
+BreakBeforeInheritanceComma: false
+BreakInheritanceList: BeforeColon
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeColon
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit:     120
+CommentPragmas:  '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DeriveLineEnding: true
+DerivePointerAlignment: false
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:
+  - foreach
+  - Q_FOREACH
+  - BOOST_FOREACH
+IncludeBlocks:   Preserve
+IncludeCategories:
+  - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
+    Priority:        2
+    SortPriority:    0
+  - Regex:           '^(<|"(gtest|gmock|isl|json)/)'
+    Priority:        3
+    SortPriority:    0
+  - Regex:           '.*'
+    Priority:        1
+    SortPriority:    0
+IncludeIsMainRegex: '(Test)?$'
+IncludeIsMainSourceRegex: ''
+IndentCaseLabels: false
+IndentGotoLabels: true
+IndentPPDirectives: None
+IndentWidth:     4
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: true
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Auto
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 1000
+PointerAlignment: Right
+ReflowComments:  true
+SortIncludes:    false
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyBlock: false
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  false
+SpacesInConditionalStatement: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+SpaceBeforeSquareBrackets: false
+Standard:        Latest
+StatementMacros:
+  - Q_UNUSED
+  - QT_REQUIRE_VERSION
+TabWidth:        4
+UseCRLF:         false
+UseTab:          Never
+...
--- a/.gitignore
+++ b/.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# local build cache
+build
+dist
+*.pt
+veturbo/ops/lib/
+veturbo/lego_pipeline/lib/
+# cmake
+CMakeFiles/
+CMakeCache.txt
+CMakeScripts/
+CMakeTmp/
+cmake_install.cmake
+Makefile
+cmake-build-debug/
+cmake-build-release/
+cmake-build-relwithdebinfo/
+cmake-build-minsize/
+# library
+!veturboio/ops/csrc/lib/
+!veturboio/ops/csrc/lib/*.so
+# vscode
+.vscode
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
+# Changelog
+All notable changes to this project will be documented in this file. See [conventional commits](https://www.conventionalcommits.org/) for commit guidelines.
+---
+## [0.1.2] - 2024-01-25
+### Bug Fixes
+- **(saver)** add return to remove repetitive writing 
+- **(security)** socket path and ut bug 
+- MANIFEST does not contain all fastcrypto lib files 
+### Documentation
+- update readme 
+### Features
+- **(security)** fetch key and iv 
+- **(security)** get and refresh sfcs aksk from datapipe 
+- **(security)** get namenode ip from datapipe and fix write xml bug 
+- **(sfcs)** decide load use sfcs sdk from environ 
+## [0.1.1] - 2023-11-17
+### Bug Fixes
+- **(sfcs)** keep in consistent with reading when open for writing 
+- **(ut)** delete potential residual test file before testing 
+- fix ci release and update readme for pip install 
+### Documentation
+- use index-url as default install method 
+### Features
+- **(ci)** add import format tool in ci 
+- **(saver)** introduce saver class to aggregate save operations 
+- **(security)** add cipher in sfcs sdk 
+- **(sfcs)** load and save pt 
+- load pt file in parallel from sfcs 
+### Miscellaneous Chores
+- bump version to v0.1.0 
+- bump version to v0.1.1 
+### Performance
+- make the read usage with good alignment. 
+<!-- generated by git-cliff -->
--- a/LICENSE
+++ b/LICENSE
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
--- a/MANIFEST.in
+++ b/MANIFEST.in
+include veturboio/ops/csrc/lib/*.so
+include veturboio/ops/csrc/lib/*.so.*
+include veturboio/ops/csrc/include/*.h
--- a/README.md
+++ b/README.md
+# veTurboIO
+火山引擎研发的一款用于高性能读写 PyTorch 模型文件的 Python 库。该库实现了主要基于 safetensors 文件格式，实现高效的存储与读取张量数据。
+## 安装
+可以直接通过以下方式进行安装：
+```bash
+pip install veturboio -f https://veturbo-cn-beijing.tos-cn-beijing.volces.com/veturboio/index.html
+```
+Tips: 该指令会优先下载与当前 Python、PyTorch 版本匹配的 whl 文件，如果没有找到匹配的 whl 文件，会自动下载源码进行编译安装。
+当使用源码安装时，可增加 `--no-build-isolation` 来使用当前的运行环境进行编译并安装（否则会尝试创建虚拟环境）。
+如果已经安装失败，可以尝试通过下载源码进行安装：
+```bash
+cd veturboio
+python setup.py install
+```
+## 快速开始
+```python
+import torch
+import veturboio
+tensors = {
+   "weight1": torch.zeros((1024, 1024)),
+   "weight2": torch.zeros((1024, 1024))
+}
+veturboio.save_file(tensors, "model.safetensors")
+reloaded_tensor = veturboio.load("model.safetensors", map_location="cpu")
+# check if the tensors are the same
+for k, v in tensors.items():
+    assert torch.allclose(v, reloaded_tensor[k])
+```
+### 使用锁页内存加速连续加载数据到GPU
+```python
+import torch
+import veturboio
+tensors1 = {
+   "weight1": torch.zeros((1024, 1024)),
+   "weight2": torch.zeros((1024, 1024))
+}
+veturboio.save_file(tensors1, "model1.safetensors")
+tensors2 = {
+   "weight1": torch.zeros((1024, 1024)),
+   "weight2": torch.zeros((1024, 1024))
+}
+veturboio.save_file(tensors2, "model2.safetensors")
+helper = veturboio.init_io_helper()
+reloaded_tensor1 = veturboio.load("model1.safetensors", map_location="cuda:0", use_pinmem=True, helper=helper)
+# the map_location may be different
+reloaded_tensor2 = veturboio.load("model2.safetensors", map_location="cuda:0", use_pinmem=True, helper=helper) 
+# check if the tensors are the same
+for k, v in tensors1.items():
+    assert torch.allclose(v.cuda(), reloaded_tensor1[k])
+for k, v in tensors2.items():
+    assert torch.allclose(v.cuda(), reloaded_tensor2[k])
+```
+### 使用SFCS读写模型并启用加解密
+该库可以读写 SFCS 上的模型文件，在此情况下可以启用加解密能力。读写 SFCS 上的模型文件和加解密所需的敏感信息，有两种获取方式：(1) 火山引擎可信服务的 unix domain socket，(2) 环境变量。在没有挂载可信服务 uds 的情况下，可以使用下面的环境变量：
+| 环境变量名                     | 含义                              |
+| ------------------------------ | --------------------------------- |
+| SFCS_ACCESS_KEY                | SFCS文件系统的AK                  |
+| SFCS_SECRET_KEY                | SFCS文件系统的SK                  |
+| SFCS_NAMENODE_ENDPOINT_ADDRESS | SFCS文件系统name节点地址          |
+| VETUROIO_KEY                   | 加解密的128位数据密钥的base64编码 |
+| VETUROIO_IV                    | 加解密的128位初始向量的base64编码 |
+挂载可信服务 uds 或者配置好环境变量后，可以参考下面代码在读写 SFCS 上的模型文件时启用加解密：
+```python
+import torch
+import veturboio
+tensors = {
+   "weight1": torch.zeros((1024, 1024)),
+   "weight2": torch.zeros((1024, 1024))
+}
+# use cpu to encrypt
+veturboio.save_file(tensors, "sfcs://model.safetensors", use_cipher=True)
+# use cpu to decrypt if map_location is cpu
+reloaded_tensor1 = veturboio.load("sfcs://model.safetensors", map_location="cpu", use_cipher=True)
+# use gpu to decrypt if map_location is cuda
+reloaded_tensor2 = veturboio.load("sfcs://model.safetensors", map_location="cuda:0", use_cipher=True)
+# check if the tensors are the same
+for k, v in tensors.items():
+    assert torch.allclose(v, reloaded_tensor1[k])
+for k, v in tensors.items():
+    assert torch.allclose(v, reloaded_tensor2[k])
+```
+### 转换现有的 PyTorch 文件
+```bash
+python -m veturboio.convert -i model.pt -o model.safetensors
+```
+## 性能测试
+直接运行
+```bash
+bash bench/io_bench.sh
+```
+可以得到如下结果
+```
+fs_name    tensor_size     veturboio load_time(s)             torch load_time(s)            
+shm        1073741824      0.08                               0.63                              
+shm        2147483648      0.19                               1.26                              
+shm        4294967296      0.36                               2.32    
+```
+也可以进一步根据以下命令的参数说明调整使用参数
+```bash
+python bench/io_bench.py -h
+```
+## 特性
+- [x] 多线程高性能读取文件；
+- [x] zero-copy 读取，不额外花费内存；
+- [x] 支持直接加载到 CUDA；
+- [x] bfloat16 数值 类型支持；
+- [x] 支持固定 pin-memory 用于让 GPU 快速反复读取大文件；
+- [x] 兼容 PyTorch 标准格式（无性能提升）；
+- [x] 兼容 safetensors 格式；
+- [x] 特殊加密格式存储；
+## 收益
+标准的 PyTorch 模型文件会经过 zip 与 pickle 两次操作，这两个操作极大的抑制了读取的速度，同时 unpickle 也会带来潜在的不安全性。我们使用一种自定义的模型格式来存储 tensor 数据，希望可以改善 PyTorch 标准格式所存在的这些问题。目前已经实现的优点有：
+- 多线程读取：当前文件对象主要的存放点为云端存储，单一进程无法达到云存储的带宽上限，必须使用多线程读取才能达到最大的读取速度。PyTorch 标准格式的读取速度受限于 pickle 解析速度，远无法达到云存储的速度上限；
+- 云端适配：基于火山引擎的云端存储（vePFS、SFCS）特性，最大化的利用了云端存储的带宽；
+- 安全性：不再使用 pickle 对象，避免了 pickle 的安全性问题；
+## 更新记录
+前往 [CHANGELOG](./CHANGELOG.md) 了解更多。
\ No newline at end of file
--- a/bench/io_bench.py
+++ b/bench/io_bench.py
+'''
+Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+import argparse
+import os
+import time
+from functools import lru_cache
+import numpy as np
+import torch
+import veturboio
+def human_read_to_byte(size):
+    factors = {
+        'B': 1,
+        'KB': 1024,
+        'MB': 1048576,
+        'GB': 1073741824,
+        'TB': 1099511627776,
+        'PB': 1125899906842624,
+        'EB': 1152921504606846976,
+        'ZB': 1180591620717411303424,
+        'YB': 1208925819614629174706176,
+    }
+    if size[-2:] in factors:
+        return factors[size[-2:]] * int(size[:-2])
+    elif size[-1:] in factors:
+        return int(size[:-1])
+    else:
+        return int(size)
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='benchmark veturboio, notice to clear page cache manually when benchmarking for existing file'
+    )
+    parser.add_argument(
+        '--begin',
+        default='1048576',
+        dest='begin',
+        help='specify the minimum file size to benchmark in bytes or in format like xxKB/MB/GB',
+    )
+    parser.add_argument(
+        '--end',
+        default='1048576',
+        dest='end',
+        help='specify the maximum file size to benchmark in bytes or in format like xxKB/MB/GB',
+    )
+    parser.add_argument('--base_dir', dest='base_dir', help='specify the the base dir of files to be benchmarked')
+    parser.add_argument('--fs_name', default='local_fs', help='file system name that would be displayed in the result')
+    parser.add_argument('--gen_data', default=False, action=argparse.BooleanOptionalAction, dest='gen_data')
+    parser.add_argument(
+        '--map_location', default='cpu', dest='map_location', help='map location of tensor to be loaded'
+    )
+    parser.add_argument('--use_pinmem', default=False, action=argparse.BooleanOptionalAction, dest='use_pinmem')
+    parser.add_argument(
+        '--load_mode', default='veturboio', dest='load_mode', help='load modes specified, seperated by comma'
+    )
+    args = parser.parse_args()
+    return args
+def print_header(load_modes):
+    mode_list = list(map(lambda mode: f"{mode}{' load_time(s)' + ' ':<25}", load_modes))
+    print(f"{'fs_name' + ' ':<10} {'tensor_size' + ' ':<15}", ' '.join(mode_list))
+def print_load_time(fs_name, tensor_size, load_times):
+    load_times = list(map(lambda load_time: f"{load_time}{' ':<30}", load_times))
+    print(f"{fs_name:<10} {str(tensor_size):<15}", ' '.join(load_times))
+def main():
+    args = parse_args()
+    load_modes = args.load_mode.split(',')
+    # warmup GPU otherwise the first case would be slow
+    device = torch.device(args.map_location)
+    if device.type == "cuda":
+        file_path = os.path.join(args.base_dir if args.base_dir else "", 'warmup.safetensors')
+        tensors = {"weight": torch.randn(10)}
+        veturboio.save_file(tensors, file_path)
+        veturboio.load(file_path, map_location=args.map_location, use_pinmem=args.use_pinmem)
+    print_header(load_modes)
+    tensor_size = human_read_to_byte(args.begin)
+    end_size = human_read_to_byte(args.end)
+    while tensor_size <= end_size:
+        if args.gen_data:
+            numel = tensor_size // np.dtype(float).itemsize * 2
+            tensors = {"weight": torch.randn(numel)}
+        load_times = []
+        for mode in load_modes:
+            if mode == 'veturboio':
+                file_path = os.path.join(args.base_dir if args.base_dir else "", f'{tensor_size}.safetensors')
+                if args.gen_data:
+                    veturboio.save_file(tensors, file_path)
+                start = time.time()
+                loaded_tensor = veturboio.load(file_path, map_location=args.map_location, use_pinmem=args.use_pinmem)
+            if mode == 'torch':
+                file_path = os.path.join(args.base_dir if args.base_dir else "", f'{tensor_size}.pt')
+                if args.gen_data:
+                    veturboio.save_pt(tensors, file_path)
+                start = time.time()
+                loaded_tensor = veturboio.load(file_path, map_location=args.map_location)
+            end = time.time()
+            load_times.append("%.2f" % (end - start))
+            if device.type == "cuda":
+                del loaded_tensor
+                torch.cuda.empty_cache()
+        print_load_time(args.fs_name, tensor_size, load_times)
+        tensor_size = tensor_size * 2
+if __name__ == '__main__':
+    main()
--- a/bench/io_bench.sh
+++ b/bench/io_bench.sh
+###
+ # Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
+ # 
+ # Licensed under the Apache License, Version 2.0 (the "License");
+ # you may not use this file except in compliance with the License.
+ # You may obtain a copy of the License at
+ # 
+ #     http://www.apache.org/licenses/LICENSE-2.0
+ # 
+ # Unless required by applicable law or agreed to in writing, software
+ # distributed under the License is distributed on an "AS IS" BASIS,
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ # See the License for the specific language governing permissions and
+ # limitations under the License.
+###
+# shm
+mkdir -p /dev/shm/test_files
+python bench/io_bench.py --load_mode=veturboio,torch --base_dir=/dev/shm/test_files --begin=1GB --end=4GB --gen_data --fs_name=shm
+# sfcs
+python bench/io_bench.py --load_mode=veturboio,torch --base_dir=sfcs:// --begin=1GB --end=4GB --gen_data --fs_name=sfcs
--- a/docs/api.md
+++ b/docs/api.md
+# API
+::: veturboio.io
--- a/docs/dynamic_load.md
+++ b/docs/dynamic_load.md
+# 动态加载
--- a/docs/index.md
+++ b/docs/index.md
+# veTurboIO
+火山引擎研发的一款用于高性能读写 PyTorch 模型文件的 Python 库。该库实现了主要基于 safetensors 文件格式，实现高效的存储与读取张量数据。
+## 安装
+```bash
+cd veturboio
+python setup.py install
+```
+## 快速开始
+```python
+import torch
+import veturboio
+tensors = {
+   "weight1": torch.zeros((1024, 1024)),
+   "weight2": torch.zeros((1024, 1024))
+}
+veturboio.save_file(tensors, "model.safetensors")
+reloaded_tensor = veturboio.load("model.safetensors", map_location="cpu")
+# check if the tensors are the same
+for k, v in tensors.items():
+    assert torch.allclose(v, reloaded_tensor[k])
+```
+### 使用锁页内存加速连续加载数据到GPU
+```python
+import torch
+import veturboio
+tensors1 = {
+   "weight1": torch.zeros((1024, 1024)),
+   "weight2": torch.zeros((1024, 1024))
+}
+veturboio.save_file(tensors1, "model1.safetensors")
+tensors2 = {
+   "weight1": torch.zeros((1024, 1024)),
+   "weight2": torch.zeros((1024, 1024))
+}
+veturboio.save_file(tensors2, "model2.safetensors")
+helper = veturboio.init_io_helper()
+reloaded_tensor1 = veturboio.load("model1.safetensors", map_location="cuda:0", use_pinmem=True, helper=helper)
+# the map_location may be different
+reloaded_tensor2 = veturboio.load("model2.safetensors", map_location="cuda:0", use_pinmem=True, helper=helper) 
+# check if the tensors are the same
+for k, v in tensors1.items():
+    assert torch.allclose(v.cuda(), reloaded_tensor1[k])
+for k, v in tensors2.items():
+    assert torch.allclose(v.cuda(), reloaded_tensor2[k])
+```
+### 转换现有的 PyTorch 文件
+```bash
+python -m veturboio.convert -i model.pt -o model.safetensors
+```
+## 特性
+- 多线程读取文件；
+- zero-copy 读取，不额外花费内存；
+- 支持直接加载到 CUDA；
+- BFloat16 数值支持；
+- 固定 pinmem 用于快速反复读取；
+- 兼容 PyTorch 标准格式（无性能提升）；
+- 兼容 safetensors 格式；
+## 收益
+标准的 PyTorch 模型文件会经过 zip 与 pickle 两次操作，这两个操作极大的抑制了读取的速度，同时 unpickle 也会带来潜在的不安全性。我们使用一种自定义的模型格式来存储 tensor 数据，希望可以改善 PyTorch 标准格式所存在的这些问题。目前已经实现的优点有：
+- 多线程读取：当前文件对象主要的存放点为云端存储，单一进程无法达到云存储的带宽上限，必须使用多线程读取才能达到最大的读取速度。PyTorch 标准格式的读取速度受限于 pickle 解析速度，远无法达到云存储的速度上限；
+- 云端适配：基于火山引擎的云端存储（vePFS、SFCS）特性，最大化的利用了云端存储的带宽；
+- 安全性：不再使用 pickle 对象，避免了 pickle 的安全性问题；
--- a/docs/release.md
+++ b/docs/release.md
--- a/docs/sfcs_support.md
+++ b/docs/sfcs_support.md
+# SFCS 加载优化
--- a/mkdocs.yml
+++ b/mkdocs.yml
+site_name: "veTurboIO"
+theme:
+  name: "material"
+docs_dir: docs
+nav: 
+  - 首页: index.md
+  - 最佳实践:
+    - 动态加载: dynamic_load.md
+    - SFCS 加载优化: sfcs_support.md
+  - API: api.md
+  - 发布日志: release.md
+plugins:
+- mkdocstrings:
+    default_handler: python
\ No newline at end of file
--- a/pyproject.toml
+++ b/pyproject.toml
+[tool.isort]
+profile = "black"  # black-compatible
+line_length = 119  # should match black parameters
+py_version = 310  # python 3.10 as a target version
+sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
+default_section = "THIRDPARTY"
+[tool.black]
+line_length = 119
+skip_string_normalization = true
--- a/setup.py
+++ b/setup.py
+'''
+Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+import os
+import torch
+from setuptools import find_packages, setup
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension
+import platform
+from pkg_resources import parse_version
+# initialize variables for compilation
+IS_LINUX = platform.system() == "Linux"
+IS_DARWIN = platform.system() == "Darwin"
+IS_WINDOWS = platform.system() == "Windows"
+def get_version():
+    import importlib.util
+    spec = importlib.util.spec_from_file_location("version", os.path.join("veturboio", "version.py"))
+    m = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(m)
+    return m.__version__
+def make_relative_rpath(path):
+    if IS_DARWIN:
+        return '-Wl,-rpath,@loader_path/' + path
+    elif IS_WINDOWS:
+        return ''
+    else:
+        return '-Wl,-rpath,$ORIGIN/' + path
+def get_veturboio_extension():
+    # prevent ninja from using too many resources
+    try:
+        import psutil
+        num_cpu = len(psutil.Process().cpu_affinity())
+        cpu_use = max(4, num_cpu - 1)
+    except (ModuleNotFoundError, AttributeError):
+        cpu_use = 4
+    os.environ.setdefault("MAX_JOBS", str(cpu_use))
+    # os.environ.setdefault("TORCH_CUDA_ARCH_LIST", "8.0;8.6")
+    os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
+    define_macros = []
+    # Before PyTorch1.8.0, when compiling CUDA code, `cxx` is a
+    # required key passed to PyTorch. Even if there is no flag passed
+    # to cxx, users also need to pass an empty list to PyTorch.
+    # Since PyTorch1.8.0, it has a default value so users do not need
+    # to pass an empty list anymore.
+    # More details at https://github.com/pytorch/pytorch/pull/45956
+    extra_compile_args = {'cxx': [], 'nvcc': ['-O3']}
+    if parse_version(torch.__version__) <= parse_version('1.12.1'):
+        extra_compile_args['cxx'] = ['-std=c++14']
+    else:
+        extra_compile_args['cxx'] = ['-std=c++17']
+    include_dirs = ["veturboio/ops/csrc/include"]
+    library_dirs = ["veturboio/ops/csrc/lib"]
+    libraries = ["cfs", "fastcrypto"]
+    extra_link_args = [make_relative_rpath("veturboio/ops/csrc/lib")]
+    return CUDAExtension(
+        name="veturboio_ext",
+        sources=[
+            "veturboio/ops/csrc/pybind.cpp",
+            "veturboio/ops/csrc/load_utils.cpp",
+            "veturboio/ops/csrc/sfcs.cpp",
+            "veturboio/ops/csrc/io_helper.cu",
+        ],
+        define_macros=define_macros,
+        include_dirs=include_dirs,
+        library_dirs=library_dirs,
+        libraries=libraries,
+        extra_compile_args=extra_compile_args,
+        extra_link_args=extra_link_args,
+    )
+setup(
+    name="veturboio",
+    version=get_version(),
+    description="Effcient PyTorch IO libraray on Volcanic Engine",
+    author="AML Team",
+    ext_modules=[get_veturboio_extension()],
+    packages=find_packages(exclude=("veturboio.ops.csrc.common.sfcs.lib")),
+    install_requires=[
+        "safetensors",
+        "numpy",
+        "loguru",
+        "requests-unixsocket",
+    ],
+    include_package_data=True,
+    cmdclass={"build_ext": BuildExtension},
+)
--- a/tests/test_assert_exceptions.py
+++ b/tests/test_assert_exceptions.py
+'''
+Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+import os
+import tempfile
+import unittest
+from unittest import TestCase
+import torch
+import veturboio
+class TestAssertException(TestCase):
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_modify_use_pinmem_attr(self):
+        helper = veturboio.init_io_helper()
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            filepath = os.path.join(tmpdirname, "model.safetensors")
+            veturboio.save_file(self.tensors, filepath)
+            with self.assertRaises(Exception) as context:
+                veturboio.load(filepath, map_location="cuda:0", use_pinmem=False, helper=helper)
+                veturboio.load(filepath, map_location="cuda:0", use_pinmem=True, helper=helper)
+            self.assertTrue(
+                'use_pinmem attribute of an exising IOHelper should not be changed' in str(context.exception)
+            )
+    @classmethod
+    def setUpClass(cls):
+        cls.tensors = {
+            "weight1": torch.randn(20, 10),
+            "weight2": torch.randn(20, 10),
+        }
--- a/tests/test_convert.py
+++ b/tests/test_convert.py
+'''
+Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+import os
+import tempfile
+from unittest import TestCase
+import torch
+import veturboio
+class TestConvertUtil(TestCase):
+    def test_convert(self):
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            filepath = os.path.join(tmpdirname, "model.pt")
+            torch.save(self.tensors, filepath)
+            convertpath = os.path.join(tmpdirname, "model.safetensors")
+            print(f"python -m veturboio.convert -i {filepath} -o {convertpath}")
+            os.system(f"python -m veturboio.convert -i {filepath} -o {convertpath}")
+            loaded_tensors = veturboio.load(convertpath)
+            for key in self.tensors.keys():
+                self.assertTrue(torch.allclose(self.tensors[key], loaded_tensors[key]))
+    @classmethod
+    def setUpClass(cls):
+        cls.tensors = {
+            "weight1": torch.randn(20, 10),
+            "weight2": torch.randn(20, 10),
+        }
--- a/tests/test_fetch_cipher.py
+++ b/tests/test_fetch_cipher.py
+'''
+Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+import base64
+import http.server
+import json
+import os
+import socketserver
+import tempfile
+import threading
+from datetime import datetime, timedelta
+from time import sleep
+from unittest import TestCase
+import numpy as np
+from veturboio.ops.cipher import CipherInfo, DataPipeClient
+from veturboio.ops.sfcs_utils import (
+    SFCS_OPT_ENV_LIST,
+    SFCS_PROPERTIES,
+    SFCS_REQ_ENV_LIST,
+    credentials_helper,
+    init_sfcs_conf,
+)
+class UnixSocketHttpServer(socketserver.UnixStreamServer):
+    def get_request(self):
+        request, client_address = super().get_request()
+        return (request, ["local", 0])
+class DatapipeHandler(http.server.SimpleHTTPRequestHandler):
+    def do_GET(self):
+        action = self.headers.get('X-Datapipe-Task-Type')
+        if action == 'encrypt-key':
+            self.send_response(200)
+            self.send_header('Content-Type', 'application/json')
+            self.end_headers()
+            self.wfile.write(
+                bytes(
+                    json.dumps({'Key': 'YWJjZGVmZ2gxMjM0NTY3OA==', 'IV': 'MTIzNDU2Nzg4NzY1NDMyMQ=='}), encoding='ascii'
+                )
+            )
+            return
+        if action == 'sfcs-sts':
+            self.send_response(200)
+            self.send_header('Content-Type', 'application/json')
+            self.end_headers()
+            date_now = datetime.now()
+            date_exp = date_now + timedelta(seconds=4)
+            res = {
+                'Cred': {
+                    'CurrentTime': date_now.isoformat(),
+                    'ExpiredTime': date_exp.isoformat(),
+                    'AccessKeyId': 'AKTPODg0MzV**2ZDcxMDg',
+                    'SecretAccessKey': 'TVRNNVlqRmxPR1**mRoTkdWbE1ESQ==',
+                    'SessionToken': 'STSeyJBY2NvdW50SW**kXXXXXXX',  # fake SessionToken real one is longer
+                },
+                'SfcsNameNodeAddress': '100.67.19.231',
+            }
+            self.wfile.write(bytes(json.dumps(res), encoding='ascii'))
+            return
+        self.send_response(400)
+        self.end_headers()
+        return
+class TestCipherInfo(TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.sock_dir = tempfile.TemporaryDirectory()
+        cls.server_address = os.path.join(cls.sock_dir.name, 'datapipe.sock')
+        cls.server = UnixSocketHttpServer(cls.server_address, DatapipeHandler, bind_and_activate=True)
+        def run():
+            cls.server.serve_forever()
+        cls.thread = threading.Thread(target=run)
+        cls.thread.start()
+        cls.target_key = np.frombuffer(b'abcdefgh12345678', dtype=np.byte)
+        cls.target_iv = np.frombuffer(b'1234567887654321', dtype=np.byte)
+    def test_fetch_from_datapipe(self):
+        DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address
+        info = CipherInfo(True)
+        self.assertTrue(info.use_cipher)
+        self.assertTrue(np.array_equal(info.key, self.target_key))
+        self.assertTrue(np.array_equal(info.iv, self.target_iv))
+    def test_fetch_from_env(self):
+        DataPipeClient.DATAPIPE_SOCKET_PATH = '/path/not/exist'
+        os.environ['VETUROIO_KEY'] = base64.b64encode(b'abcdefgh12345678').decode('ascii')
+        os.environ['VETUROIO_IV'] = base64.b64encode(b'1234567887654321').decode('ascii')
+        info = CipherInfo(True)
+        self.assertTrue(info.use_cipher)
+        self.assertTrue(np.array_equal(info.key, self.target_key))
+        self.assertTrue(np.array_equal(info.iv, self.target_iv))
+    def test_fallback(self):
+        DataPipeClient.DATAPIPE_SOCKET_PATH = '/path/not/exist'
+        os.environ['VETUROIO_KEY'] = base64.b64encode(b'abcdefgh12').decode('ascii')
+        os.environ['VETUROIO_IV'] = base64.b64encode(b'1234567887').decode('ascii')
+        info = CipherInfo(True)
+        self.assertFalse(info.use_cipher)
+    @classmethod
+    def tearDownClass(cls):
+        os.environ.pop('VETUROIO_KEY', None)
+        os.environ.pop('VETUROIO_IV', None)
+        cls.server.shutdown()
+        cls.server.server_close()
+        cls.thread.join()
+        cls.sock_dir.cleanup()
+class TestCredentials(TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.sock_dir = tempfile.TemporaryDirectory()
+        cls.server_address = os.path.join(cls.sock_dir.name, 'datapipe.sock')
+        cls.server = UnixSocketHttpServer(cls.server_address, DatapipeHandler, bind_and_activate=True)
+        def run():
+            cls.server.serve_forever()
+        cls.thread = threading.Thread(target=run)
+        cls.thread.start()
+    def test_sfcs_sts(self):
+        DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address
+        client = DataPipeClient()
+        cred = client.get_sfcs_ak_sk_st()
+        self.assertIsNotNone(cred)
+        self.assertEqual(cred['SfcsNameNodeAddress'], '100.67.19.231')
+        cred = cred['Cred']
+        self.assertEqual(cred['AccessKeyId'], 'AKTPODg0MzV**2ZDcxMDg')
+        self.assertEqual(cred['SecretAccessKey'], 'TVRNNVlqRmxPR1**mRoTkdWbE1ESQ==')
+        self.assertEqual(cred['SessionToken'], 'STSeyJBY2NvdW50SW**kXXXXXXX')
+    def test_sfcs_conf(self):
+        # case 1: a xml file already exists, do nothing
+        with tempfile.NamedTemporaryFile() as sfcs_conf:
+            os.environ['LIBCFS_CONF'] = sfcs_conf.name
+            init_sfcs_conf()
+            self.assertFalse(credentials_helper.running)
+        for e in SFCS_REQ_ENV_LIST:
+            os.environ[e] = 'test-value'
+        # case 2: env SFCS_ACCESS_KEY and SFCS_SECRET_KEY and SFCS_NAMENODE_ENDPOINT_ADDRESS exists
+        with tempfile.TemporaryDirectory() as conf_dir:
+            conf_path = os.path.join(conf_dir, 'libcfs.xml')
+            os.environ['LIBCFS_CONF'] = conf_path
+            os.environ['SFCS_ACCESS_KEY'] = 'AKTPODg0MzV**2ZDcxMDg'
+            os.environ['SFCS_SECRET_KEY'] = 'TVRNNVlqRmxPR1**mRoTkdWbE1ESQ=='
+            os.environ['SFCS_NAMENODE_ENDPOINT_ADDRESS'] = '100.67.19.231'
+            init_sfcs_conf()
+            self.assertEqual(SFCS_PROPERTIES['cfs.access.key'], 'AKTPODg0MzV**2ZDcxMDg')
+            self.assertEqual(SFCS_PROPERTIES['cfs.secret.key'], 'TVRNNVlqRmxPR1**mRoTkdWbE1ESQ==')
+            self.assertEqual(SFCS_PROPERTIES['cfs.namenode.endpoint.address.test-value'], '100.67.19.231')
+            self.assertFalse(credentials_helper.running)
+            self.assertTrue(os.path.exists(conf_path))
+        # case 3: use datapipe socket to get and refresh ak, sk, st and namenode_ip
+        DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address
+        with tempfile.TemporaryDirectory() as conf_dir:
+            conf_path = os.path.join(conf_dir, 'libcfs.xml')
+            os.environ['LIBCFS_CONF'] = conf_path
+            os.environ.pop('SFCS_ACCESS_KEY', None)
+            os.environ.pop('SFCS_SECRET_KEY', None)
+            os.environ.pop('SFCS_NAMENODE_ENDPOINT_ADDRESS', None)
+            SFCS_PROPERTIES.pop('cfs.access.key')
+            SFCS_PROPERTIES.pop('cfs.secret.key')
+            SFCS_PROPERTIES.pop('cfs.namenode.endpoint.address.test-value')
+            init_sfcs_conf()
+            self.assertEqual(SFCS_PROPERTIES['cfs.access.key'], 'AKTPODg0MzV**2ZDcxMDg')
+            self.assertEqual(SFCS_PROPERTIES['cfs.secret.key'], 'TVRNNVlqRmxPR1**mRoTkdWbE1ESQ==')
+            self.assertEqual(SFCS_PROPERTIES['cfs.namenode.endpoint.address.test-value'], '100.67.19.231')
+            self.assertEqual(SFCS_PROPERTIES['cfs.security.token'], 'STSeyJBY2NvdW50SW**kXXXXXXX')
+            self.assertTrue(credentials_helper.running)
+            self.assertTrue(os.path.exists(conf_path))
+            t1 = credentials_helper.current_time
+            sleep(3)
+            t2 = credentials_helper.current_time
+            self.assertTrue(t1 < t2)
+            credentials_helper.stop()
+    @classmethod
+    def tearDownClass(cls):
+        os.environ.pop('LIBCFS_CONF', None)
+        for e in SFCS_REQ_ENV_LIST:
+            os.environ.pop(e, None)
+        for e in SFCS_OPT_ENV_LIST:
+            os.environ.pop(e, None)
+        SFCS_PROPERTIES.pop('cfs.security.token', None)
+        cls.server.shutdown()
+        cls.server.server_close()
+        cls.thread.join()
+        cls.sock_dir.cleanup()
--- a/tests/test_load_op.py
+++ b/tests/test_load_op.py
+'''
+Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+import os
+import tempfile
+import unittest
+from copy import deepcopy
+from unittest import TestCase
+import torch
+import veturboio
+class TestLoad(TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.tempdir = tempfile.TemporaryDirectory()
+        cls.tensors_0 = {
+            "weight1": torch.randn(2000, 10),
+            "weight2": torch.randn(2000, 10),
+        }
+        cls.tensors_1 = {
+            "weight1": torch.randn(2000, 10),
+            "weight2": torch.randn(2000, 10),
+            "weight3": torch.randn(2000, 10),
+        }
+        cls.filepath_0 = os.path.join(cls.tempdir.name, "model_0.safetensors")
+        cls.filepath_1 = os.path.join(cls.tempdir.name, "model_1.safetensors")
+        veturboio.save_file(cls.tensors_0, cls.filepath_0)
+        veturboio.save_file(cls.tensors_1, cls.filepath_1)
+        cls.pt_filepath = os.path.join(cls.tempdir.name, "model.pt")
+        torch.save(cls.tensors_0, cls.pt_filepath)
+        if torch.cuda.is_available():
+            cls.cuda_tensors_0 = deepcopy(cls.tensors_0)
+            cls.cuda_tensors_1 = deepcopy(cls.tensors_1)
+            for key in cls.cuda_tensors_0.keys():
+                cls.cuda_tensors_0[key] = cls.cuda_tensors_0[key].cuda()
+            for key in cls.cuda_tensors_1.keys():
+                cls.cuda_tensors_1[key] = cls.cuda_tensors_1[key].cuda()
+    @classmethod
+    def tearDownClass(cls):
+        cls.tempdir.cleanup()
+    def _run_pipeline(self, tensors, filepath, map_location):
+        loaded_tensors = veturboio.load(filepath, map_location=map_location)
+        for key in tensors.keys():
+            self.assertTrue(torch.allclose(tensors[key], loaded_tensors[key]))
+        return loaded_tensors
+    def test_pipeline_cpu(self):
+        self._run_pipeline(self.tensors_0, self.filepath_0, "cpu")
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_pipeline_cuda(self):
+        self._run_pipeline(self.cuda_tensors_0, self.filepath_0, "cuda:0")
+    def test_read_multi_state_dict_cpu(self):
+        load_tensor_0 = self._run_pipeline(self.tensors_0, self.filepath_0, "cpu")
+        load_tensor_1 = self._run_pipeline(self.tensors_1, self.filepath_1, "cpu")
+        self.assertEqual(len(load_tensor_0), 2)
+        self.assertEqual(len(load_tensor_1), 3)
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_read_multi_state_dict_cuda(self):
+        load_tensor_0 = self._run_pipeline(self.cuda_tensors_0, self.filepath_0, "cuda:0")
+        load_tensor_1 = self._run_pipeline(self.cuda_tensors_1, self.filepath_1, "cuda:0")
+        self.assertEqual(len(load_tensor_0), 2)
+        self.assertEqual(len(load_tensor_1), 3)
+    def test_load_pt_cpu(self):
+        loaded_tensors = veturboio.load(self.pt_filepath, map_location="cpu")
+        for key in self.tensors_0.keys():
+            self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key]))
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_load_pt_cuda(self):
+        loaded_tensors = veturboio.load(self.pt_filepath, map_location="cuda:0")
+        for key in self.tensors_0.keys():
+            self.assertTrue(torch.allclose(self.cuda_tensors_0[key], loaded_tensors[key]))