init

12a8520e · wangkx1 · 12a8520e · 12a8520e · 12a8520e · 12a8520e
Commit 12a8520e authored Feb 26, 2026 by wangkx1
12 changed files
--- a/3part/test_torch_library_expand.cpp
+++ b/3part/test_torch_library_expand.cpp
+#include <torch/library.h>
+#include <ATen/ATen.h>
+#include <Python.h>
+#include <torch/all.h>
+#include "test_ops.h"
+
+#define TORCH_LIBRARY_EXPAND(NAME, MODULE) TORCH_LIBRARY(NAME, MODULE)
+#define TORCH_HAS_CUDA
+
+// Python模块初始化函数
+extern "C" {
+
+PyObject *PyInit__C(void) {
+    static struct PyModuleDef module_def = {
+            PyModuleDef_HEAD_INIT, "_C", /* name of module */
+            NULL, /* module documentation, may be NULL */
+            -1, /* size of per-interpreter state of the module,
+                     or -1 if the module keeps state in global variables. */
+            NULL, /* methods */
+    };
+    return PyModule_Create(&module_def);
+}
+}
+
+// 只在TORCH_LIBRARY中定义操作符，不实现
+TORCH_LIBRARY_EXPAND(test_ops, ops) {
+  ops.def("add_one(Tensor input) -> Tensor");
+  ops.def("multiply_by_two(Tensor input) -> Tensor");
+}
\ No newline at end of file
--- a/3part/test_torch_library_expand_build_sh.py
+++ b/3part/test_torch_library_expand_build_sh.py
+import torch
+import os
+import sys
+import _C as test_ops
+
+
+# 添加当前目录到 Python 路径
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+# 尝试加载自定义操作符
+# try:
+#     # 注意：实际使用时，你需要先编译 C++ 扩展
+#     # 这里我们假设扩展已经编译并可用
+#     import test_ops
+#     print("成功加载 test_ops 扩展")
+# except ImportError as e:
+#     print(f"加载扩展失败: {e}")
+#     print("请先编译 C++ 扩展")
+#     sys.exit(1)
+
+def test_add_one():
+    """测试 add_one 操作符"""
+    print("\n测试 add_one 操作符:")
+    
+    # 创建测试张量
+    x = torch.tensor([1.0, 2.0, 3.0])
+    print(f"输入: {x}")
+    
+    # 调用自定义操作符
+    y = torch.ops.test_ops.add_one(x)
+    print(f"输出: {y}")
+    
+    # 验证结果
+    expected = x + 1
+    assert torch.allclose(y, expected), f"结果不匹配: {y} vs {expected}"
+    print("✓ 测试通过")
+
+def test_multiply_by_two():
+    """测试 multiply_by_two 操作符"""
+    print("\n测试 multiply_by_two 操作符:")
+    
+    # 创建测试张量
+    x = torch.tensor([1.0, 2.0, 3.0])
+    print(f"输入: {x}")
+    
+    # 调用自定义操作符
+    y = torch.ops.test_ops.multiply_by_two(x)
+    print(f"输出: {y}")
+    
+    # 验证结果
+    expected = x * 2
+    assert torch.allclose(y, expected), f"结果不匹配: {y} vs {expected}"
+    print("✓ 测试通过")
+
+def test_cuda_support():
+    """测试 CUDA 支持（如果可用）"""
+    if torch.cuda.is_available():
+        print("\n测试 CUDA 支持:")
+        
+        # 创建 CUDA 张量
+        x = torch.tensor([1.0, 2.0, 3.0]).cuda()
+        print(f"CUDA 输入: {x}")
+        
+        # 测试 add_one
+        y = torch.ops.test_ops.add_one(x)
+        print(f"add_one 输出: {y}")
+        assert torch.allclose(y, x + 1), f"CUDA add_one 结果不匹配"
+        
+        # 测试 multiply_by_two
+        z = torch.ops.test_ops.multiply_by_two(x)
+        print(f"multiply_by_two 输出: {z}")
+        assert torch.allclose(z, x * 2), f"CUDA multiply_by_two 结果不匹配"
+        
+        print("✓ CUDA 测试通过")
+    else:
+        print("\nCUDA 不可用，跳过 CUDA 测试")
+
+if __name__ == "__main__":
+    print("测试 TORCH_LIBRARY_EXPAND 示例")
+    print("=" * 50)
+    
+    test_add_one()
+    test_multiply_by_two()
+    test_cuda_support()
+    
+    print("\n" + "=" * 50)
+    print("所有测试通过！")
--- a/3part/test_torch_library_expand_pip_install.py
+++ b/3part/test_torch_library_expand_pip_install.py
+import torch
+import os
+import sys
+
+# 添加当前目录到 Python 路径
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+# 尝试加载自定义操作符
+try:
+    # 注意：实际使用时，你需要先编译 C++ 扩展
+    # 这里我们假设扩展已经编译并可用
+    import test_ops
+    print("成功加载 test_ops 扩展")
+except ImportError as e:
+    print(f"加载扩展失败: {e}")
+    print("请先编译 C++ 扩展")
+    sys.exit(1)
+
+def test_add_one():
+    """测试 add_one 操作符"""
+    print("\n测试 add_one 操作符:")
+    
+    # 创建测试张量
+    x = torch.tensor([1.0, 2.0, 3.0])
+    print(f"输入: {x}")
+    
+    # 调用自定义操作符
+    y = torch.ops.test_ops.add_one(x)
+    print(f"输出: {y}")
+    
+    # 验证结果
+    expected = x + 1
+    assert torch.allclose(y, expected), f"结果不匹配: {y} vs {expected}"
+    print("✓ 测试通过")
+
+def test_multiply_by_two():
+    """测试 multiply_by_two 操作符"""
+    print("\n测试 multiply_by_two 操作符:")
+    
+    # 创建测试张量
+    x = torch.tensor([1.0, 2.0, 3.0])
+    print(f"输入: {x}")
+    
+    # 调用自定义操作符
+    y = torch.ops.test_ops.multiply_by_two(x)
+    print(f"输出: {y}")
+    
+    # 验证结果
+    expected = x * 2
+    assert torch.allclose(y, expected), f"结果不匹配: {y} vs {expected}"
+    print("✓ 测试通过")
+
+def test_cuda_support():
+    """测试 CUDA 支持（如果可用）"""
+    if torch.cuda.is_available():
+        print("\n测试 CUDA 支持:")
+        
+        # 创建 CUDA 张量
+        x = torch.tensor([1.0, 2.0, 3.0]).cuda()
+        print(f"CUDA 输入: {x}")
+        
+        # 测试 add_one
+        y = torch.ops.test_ops.add_one(x)
+        print(f"add_one 输出: {y}")
+        assert torch.allclose(y, x + 1), f"CUDA add_one 结果不匹配"
+        
+        # 测试 multiply_by_two
+        z = torch.ops.test_ops.multiply_by_two(x)
+        print(f"multiply_by_two 输出: {z}")
+        assert torch.allclose(z, x * 2), f"CUDA multiply_by_two 结果不匹配"
+        
+        print("✓ CUDA 测试通过")
+    else:
+        print("\nCUDA 不可用，跳过 CUDA 测试")
+
+if __name__ == "__main__":
+    print("测试 TORCH_LIBRARY_EXPAND 示例")
+    print("=" * 50)
+    
+    test_add_one()
+    test_multiply_by_two()
+    test_cuda_support()
+    
+    print("\n" + "=" * 50)
+    print("所有测试通过！")
--- a/4part/CMakeLists.txt
+++ b/4part/CMakeLists.txt
+cmake_minimum_required(VERSION 3.18)
+project(test_torch_library_expand)
+
+# 设置 C++ 标准
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
+
+# 查找 Python（更通用的方式）
+find_package(Python 3.10 COMPONENTS Interpreter Development REQUIRED)
+set(Torch_DIR /usr/local/lib/python3.10/dist-packages/torch/share/cmake/Torch)
+
+# 查找并加载 Torch 库
+find_package(Torch REQUIRED)
+
+# 创建扩展库 - 包含多个源文件
+add_library(test_ops SHARED 
+    test_torch_library_expand.cpp 
+    test_ops_impl.cpp
+)
+
+# 链接 PyTorch 和 Python 库
+target_link_libraries(test_ops PRIVATE 
+    ${TORCH_LIBRARIES}
+    Python::Python
+)
+
+# 设置扩展名称
+set_target_properties(test_ops PROPERTIES
+  PREFIX ""
+  SUFFIX ".so"
+)
+
+# 包含头文件
+target_include_directories(test_ops PRIVATE 
+    ${TORCH_INCLUDE_DIRS}
+    ${Python_INCLUDE_DIRS}
+)
+
+# 设置 CUDA 架构（如果需要）
+if (TORCH_CUDA_ARCH_LIST)
+  set(CUDA_ARCH_LIST ${TORCH_CUDA_ARCH_LIST})
+else()
+  set(CUDA_ARCH_LIST "6.0;6.1;7.0;7.5;8.0;8.6")
+endif()
+
+# 打印信息
+message(STATUS "PyTorch 版本: ${Torch_VERSION}")
+message(STATUS "CUDA 可用: ${TORCH_CUDA_AVAILABLE}")
+if (TORCH_CUDA_AVAILABLE)
+  message(STATUS "CUDA 版本: ${CUDA_VERSION_STRING}")
+  message(STATUS "CUDA 架构: ${CUDA_ARCH_LIST}")
+endif()
\ No newline at end of file
--- a/4part/build.sh
+++ b/4part/build.sh
+#!/bin/bash
+
+# 构建目录
+BUILD_DIR="./build"
+
+# 创建构建目录
+mkdir -p $BUILD_DIR
+cd $BUILD_DIR
+
+# 运行 CMake
+cmake .. 
+
+# 编译
+cmake --build .
+
+# 复制编译后的库到当前目录
+cp test_ops.so ..
+
+cd ..
+
+# 运行测试
+python test_torch_library_expand.py
--- a/4part/log.log
+++ b/4part/log.log
+Processing /data/wkx/develop/llm-infer-opt/vllm/workspace_4part
+  Preparing metadata (pyproject.toml): started
+  Preparing metadata (pyproject.toml): finished with status 'done'
+Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from test_ops==0.1.0) (2.5.1+das.opt1.dtk25042)
+Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->test_ops==0.1.0) (3.20.1)
+Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->test_ops==0.1.0) (4.15.0)
+Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->test_ops==0.1.0) (3.4.2)
+Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->test_ops==0.1.0) (3.1.6)
+Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->test_ops==0.1.0) (2025.10.0)
+Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->test_ops==0.1.0) (1.13.1)
+Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch>=1.10.0->test_ops==0.1.0) (1.3.0)
+Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->test_ops==0.1.0) (3.0.3)
+Building wheels for collected packages: test_ops
+  Building wheel for test_ops (pyproject.toml): started
+  Building wheel for test_ops (pyproject.toml): finished with status 'done'
+  Created wheel for test_ops: filename=test_ops-0.1.0-cp310-cp310-linux_x86_64.whl size=2411654 sha256=6a501c3539b689e504aee2a10cfe217131ba065c353c5670ae987181b4a19390
+  Stored in directory: /tmp/pip-ephem-wheel-cache-iorcxf73/wheels/c0/05/cc/eead000af8b8cafeb5b86d18cf5c5281da267de35757b7851d
+Successfully built test_ops
+Installing collected packages: test_ops
+  Attempting uninstall: test_ops
+    Found existing installation: test_ops 0.1.0
+    Can't uninstall 'test_ops'. No files were found to uninstall.
+Successfully installed test_ops-0.0.1
+WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.
+
+[notice] A new release of pip is available: 25.3 -> 26.0.1
+[notice] To update, run: python3 -m pip install --upgrade pip
--- a/4part/setup.py
+++ b/4part/setup.py
+import os
+import torch
+from setuptools import setup
+from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension
+
+library_name = "test_ops"
+
+# 获取当前目录
+current_dir = os.path.dirname(os.path.abspath(__file__))
+
+# 源文件列表
+sources = [
+    os.path.join(current_dir, "test_torch_library_expand.cpp"),
+    os.path.join(current_dir, "test_ops_impl.cpp"),
+]
+
+# 检查CUDA是否可用
+use_cuda = torch.cuda.is_available()
+extension = CUDAExtension if use_cuda else CppExtension
+
+if use_cuda:
+    # 如果有CUDA文件，可以添加
+    import glob
+    cuda_files = glob.glob(os.path.join(current_dir, "*.cu"))
+    sources.extend(cuda_files)
+    print(f"CUDA files found: {cuda_files}")
+
+# 编译参数
+extra_compile_args = {
+    'cxx': ['-O2', '-std=c++17'],
+}
+
+if use_cuda:
+    extra_compile_args['nvcc'] = ['-O2']
+
+setup(
+    name=library_name,
+    version='0.1.0',
+    ext_modules=[
+        extension(
+            name=library_name,
+            sources=sources,
+            extra_compile_args=extra_compile_args,
+            include_dirs=[current_dir],
+        )
+    ],
+    cmdclass={
+        'build_ext': BuildExtension
+    },
+    install_requires=['torch>=1.10.0'],
+)
\ No newline at end of file
--- a/4part/test_ops.h
+++ b/4part/test_ops.h
+#pragma once
+#include <torch/library.h>
+#include <ATen/ATen.h>
+
+// 声明操作符实现函数
+namespace test_ops_impl {
+    at::Tensor add_one(at::Tensor input);
+    at::Tensor multiply_by_two(at::Tensor input);
+}
\ No newline at end of file
--- a/4part/test_ops_impl.cpp
+++ b/4part/test_ops_impl.cpp
+#include <torch/library.h>
+#include <ATen/ATen.h>
+#include "test_ops.h"
+
+#define TORCH_LIBRARY_IMPL_EXPAND(NAME, DEVICE, MODULE) \
+  TORCH_LIBRARY_IMPL(NAME, DEVICE, MODULE)
+
+#define TORCH_HAS_CUDA
+
+namespace test_ops_impl {
+    // 操作符的具体实现
+    at::Tensor add_one(at::Tensor input) {
+        return input + 1;
+    }
+    
+    at::Tensor multiply_by_two(at::Tensor input) {
+        return input * 2;
+    }
+}
+
+// 在TORCH_LIBRARY_IMPL中注册CPU实现
+TORCH_LIBRARY_IMPL_EXPAND(test_ops, CPU, cpu_ops) {
+    cpu_ops.impl("add_one", &test_ops_impl::add_one);
+    cpu_ops.impl("multiply_by_two", &test_ops_impl::multiply_by_two);
+}
+
+// 在TORCH_LIBRARY_IMPL中注册CUDA实现（如果有CUDA）
+#ifdef TORCH_HAS_CUDA
+TORCH_LIBRARY_IMPL_EXPAND(test_ops, CUDA, cuda_ops) {
+    // 注意：这里假设CPU和CUDA使用相同的实现函数
+    // 如果CUDA需要不同的实现，可以定义专门的CUDA版本函数
+    cuda_ops.impl("add_one", &test_ops_impl::add_one);
+    cuda_ops.impl("multiply_by_two", &test_ops_impl::multiply_by_two);
+}
+#endif
\ No newline at end of file
--- a/4part/test_torch_library_expand.cpp
+++ b/4part/test_torch_library_expand.cpp
+#include <torch/library.h>
+#include <ATen/ATen.h>
+#include <Python.h>
+#include <torch/all.h>
+#include "test_ops.h"
+
+#define TORCH_LIBRARY_EXPAND(NAME, MODULE) TORCH_LIBRARY(NAME, MODULE)
+#define TORCH_HAS_CUDA
+
+// // Python模块初始化函数
+// extern "C" {
+// PyObject *PyInit_test_ops(void) {
+//     static struct PyModuleDef module_def = {
+//         PyModuleDef_HEAD_INIT,
+//         "test_ops",  // 模块名
+//         "Test operations module",  // 文档
+//         -1,
+//         NULL  // 方法定义
+//     };
+//     return PyModule_Create(&module_def);
+// }
+// }
+
+// 只在TORCH_LIBRARY中定义操作符，不实现
+TORCH_LIBRARY_EXPAND(test_ops, ops) {
+  ops.def("add_one(Tensor input) -> Tensor");
+  ops.def("multiply_by_two(Tensor input) -> Tensor");
+}
\ No newline at end of file
--- a/4part/test_torch_library_expand.py
+++ b/4part/test_torch_library_expand.py
+import torch
+import os
+import sys
+
+# 添加当前目录到 Python 路径
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+# 尝试加载自定义操作符
+try:
+    # 注意：实际使用时，你需要先编译 C++ 扩展
+    # 这里我们假设扩展已经编译并可用
+    import test_ops
+    print("成功加载 test_ops 扩展")
+except ImportError as e:
+    print(f"加载扩展失败: {e}")
+    print("请先编译 C++ 扩展")
+    sys.exit(1)
+
+def test_add_one():
+    """测试 add_one 操作符"""
+    print("\n测试 add_one 操作符:")
+    
+    # 创建测试张量
+    x = torch.tensor([1.0, 2.0, 3.0])
+    print(f"输入: {x}")
+    
+    # 调用自定义操作符
+    y = torch.ops.test_ops.add_one(x)
+    print(f"输出: {y}")
+    
+    # 验证结果
+    expected = x + 1
+    assert torch.allclose(y, expected), f"结果不匹配: {y} vs {expected}"
+    print("✓ 测试通过")
+
+def test_multiply_by_two():
+    """测试 multiply_by_two 操作符"""
+    print("\n测试 multiply_by_two 操作符:")
+    
+    # 创建测试张量
+    x = torch.tensor([1.0, 2.0, 3.0])
+    print(f"输入: {x}")
+    
+    # 调用自定义操作符
+    y = torch.ops.test_ops.multiply_by_two(x)
+    print(f"输出: {y}")
+    
+    # 验证结果
+    expected = x * 2
+    assert torch.allclose(y, expected), f"结果不匹配: {y} vs {expected}"
+    print("✓ 测试通过")
+
+def test_cuda_support():
+    """测试 CUDA 支持（如果可用）"""
+    if torch.cuda.is_available():
+        print("\n测试 CUDA 支持:")
+        
+        # 创建 CUDA 张量
+        x = torch.tensor([1.0, 2.0, 3.0]).cuda()
+        print(f"CUDA 输入: {x}")
+        
+        # 测试 add_one
+        y = torch.ops.test_ops.add_one(x)
+        print(f"add_one 输出: {y}")
+        assert torch.allclose(y, x + 1), f"CUDA add_one 结果不匹配"
+        
+        # 测试 multiply_by_two
+        z = torch.ops.test_ops.multiply_by_two(x)
+        print(f"multiply_by_two 输出: {z}")
+        assert torch.allclose(z, x * 2), f"CUDA multiply_by_two 结果不匹配"
+        
+        print("✓ CUDA 测试通过")
+    else:
+        print("\nCUDA 不可用，跳过 CUDA 测试")
+
+if __name__ == "__main__":
+    print("测试 TORCH_LIBRARY_EXPAND 示例")
+    print("=" * 50)
+    
+    test_add_one()
+    test_multiply_by_two()
+    test_cuda_support()
+    
+    print("\n" + "=" * 50)
+    print("所有测试通过！")
--- a/README.md
+++ b/README.md
+Pytorch 自定义算子的实现
+
+- 1part:
+
+test_torch_library_expand.cpp 中, 一个同时存在 TORCH_LIBRARY 下同时存在 define 和 impl;
+
+PyTorch 的设计允许：
+
+简单情况：直接在 TORCH_LIBRARY 中同时完成定义和默认实现
+
+复杂情况：使用 TORCH_LIBRARY_IMPL 为不同设备（CPU/CUDA）或后端提供专门的实现
+
+- 2part:
+
+> TORCH_LIBRARY 和 TORCH_LIBRARY_IMPL 的在多个CPP实现;
+
+test_torch_library_expand.cpp 包含 PyInit_test_ops 的初始化以及创建;
+
+支持2种编译方法：
+
+1. `bash build.sh`
+
+2. pip 安装:
+
+```bash
+pip install --no-build-isolation .
+python3 test_torch_library_expand.py
+```
+
+- 3part:
+
+PyInit 的模块名字发生变化;
+
+build.sh 方式编译的导入到python需要使用 test_torch_library_expand_build_sh.py
+
+- 4part:
+
+test_torch_library_expand.cpp 没有包含
+```c++
+// // Python模块初始化函数
+// extern "C" {
+// PyObject *PyInit_test_ops(void) {
+//     static struct PyModuleDef module_def = {
+//         PyModuleDef_HEAD_INIT,
+//         "test_ops",  // 模块名
+//         "Test operations module",  // 文档
+//         -1,
+//         NULL  // 方法定义
+//     };
+//     return PyModule_Create(&module_def);
+// }
+// }
+```
+
+报错：
+
+```
+加载扩展失败: dynamic module does not define module export function (PyInit_test_ops)
+请先编译 C++ 扩展
+```
\ No newline at end of file