Merge branch 'develop' into 'master'

Develop See merge request !2

Merge branch 'develop' into 'master'
Develop See merge request !2
b2735663 · liucong8560 · 5a36110c · f6ecdce1 · 5a36110c · 5a36110c
Commit b2735663 authored Jun 14, 2023 by liucong8560
20 changed files
--- a/3rdParty/InstallOpenCVDependences.sh
+++ b/3rdParty/InstallOpenCVDependences.sh
-#! /bin/sh
-
-############### Ubuntu  ###############
-# 参考：https://docs.opencv.org/3.4.11/d7/d9f/tutorial_linux_install.html
-# apt-get install build-essential -y
-# apt-get install cmake git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev -y
-# apt-get install python-dev python-numpy libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libjasper-dev libdc1394-22-dev -y # 处理图像所需的包，可选
-
-############### CentOS ###############
-yum install gcc gcc-c++ gtk2-devel gimp-devel gimp-devel-tools gimp-help-browser zlib-devel libtiff-devel libjpeg-devel libpng-devel gstreamer-devel libavc1394-devel libraw1394-devel libdc1394-devel jasper-devel jasper-utils swig python libtool nasm -y
\ No newline at end of file
--- a/3rdParty/opencv-3.4.11_mini.tar.gz
+++ b/3rdParty/opencv-3.4.11_mini.tar.gz
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,7 +2,7 @@
 cmake_minimum_required(VERSION 3.5)

 # 设置项目名
-project(MIGraphX_Samples)
+project(GPT2)

 # 设置编译器
 set(CMAKE_CXX_COMPILER g++)
@@ -12,7 +12,6 @@ set(CMAKE_BUILD_TYPE release)
 # 添加头文件路径
 set(INCLUDE_PATH    ${CMAKE_CURRENT_SOURCE_DIR}/Src/
                    ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/ 
-                    ${CMAKE_CURRENT_SOURCE_DIR}/Src/NLP/GPT2/
                    $ENV{DTKROOT}/include/
                    ${CMAKE_CURRENT_SOURCE_DIR}/depend/include/)
 include_directories(${INCLUDE_PATH})
@@ -23,11 +22,7 @@ set(LIBRARY_PATH ${CMAKE_CURRENT_SOURCE_DIR}/depend/lib64/
 link_directories(${LIBRARY_PATH})

 # 添加依赖库
-set(LIBRARY opencv_core
-            opencv_imgproc
-            opencv_imgcodecs
-            opencv_dnn
-            migraphx_ref
+set(LIBRARY migraphx_ref
            migraphx
            migraphx_c
            migraphx_device
@@ -37,12 +32,10 @@ link_libraries(${LIBRARY})

 # 添加源文件
 set(SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/Src/main.cpp
-                ${CMAKE_CURRENT_SOURCE_DIR}/Src/Sample.cpp
-                ${CMAKE_CURRENT_SOURCE_DIR}/Src/NLP/GPT2/tokenization.cpp
-                ${CMAKE_CURRENT_SOURCE_DIR}/Src/NLP/GPT2/utf8proc.c
-                ${CMAKE_CURRENT_SOURCE_DIR}/Src/NLP/GPT2/GPT2.cpp
-                ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/CommonUtility.cpp
+                ${CMAKE_CURRENT_SOURCE_DIR}/Src/GPT2.cpp
+                ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/tokenization.cpp
+                ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/utf8proc.c
                ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/Filesystem.cpp)

 # 添加可执行目标
-add_executable(MIGraphX_Samples ${SOURCE_FILES})
+add_executable(GPT2 ${SOURCE_FILES})
--- a/Doc/Images/GPT_04.png
+++ b/Doc/Images/GPT_04.png
--- a/Doc/Tutorial_Cpp/GPT2.md
+++ b/Doc/Tutorial_Cpp/GPT2.md
@@ -12,9 +12,9 @@ GPT（Generative Pre-trained Transformer）系列模型以不断堆叠transforme
 | GPT-2 | 2019 年 2 月 | 15 亿    | 40GB         |
 | GPT-3 | 2020 年 5 月 | 1,750 亿 | 45TB         |

-本次采用GPT-2模型进行诗词生成任务，模型文件下载链接：https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw, 提取码：4pmh。将GPT2_shici.onnx模型文件保存在Resource/Models/NLP/GPT2文件夹下。整体模型结构如下图所示，也可以通过netron工具：https://netron.app/ 查看GPT-2的模型结构。
+本次采用GPT-2模型进行诗词生成任务，模型文件下载链接：https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw, 提取码：4pmh。将GPT2_shici.onnx模型文件保存在Resource/文件夹下。整体模型结构如下图所示，也可以通过netron工具：https://netron.app/ 查看GPT-2的模型结构。

-<img src="../Images/GPT_01.png" style="zoom:100%;" align=middle>
+<img src="./Images/GPT_01.png" style="zoom:100%;" align=middle>

 ## 预处理

@@ -27,11 +27,11 @@ GPT（Generative Pre-trained Transformer）系列模型以不断堆叠transforme
 首先，根据提供的词汇表路径，通过cuBERT::FullTokenizer()函数加载词汇表，用于后续对输入文本的编码操作。其次，将词汇表中的内容依次保存到vector容器output中，用于数据后处理中的解码操作。

 ```c++
-cuBERT::FullTokenizer tokenizer = cuBERT::FullTokenizer("../Resource/Models/NLP/GPT2/vocab_shici.txt");
+cuBERT::FullTokenizer tokenizer = cuBERT::FullTokenizer("../Resource/vocab_shici.txt");
 std::ifstream infile;
 std::string buf;
 std::vector<std::string> output;
-infile.open("../Resource/Models/NLP/GPT2/vocab_shici.txt");
+infile.open("../Resource/vocab_shici.txt");
 while (std::getline(infile,buf))
 {
    output.push_back(buf);
@@ -46,7 +46,7 @@ ErrorCode GPT2::Preprocessing(cuBERT::FullTokenizer tokenizer,
                             std::vector<long unsigned int> &input_id)
 {
    // 对问题进行分词操作
-    int max_seq_length =1024;
+    int max_seq_length =1000;
    std::vector<std::string> tokens_question;
    tokens_question.reserve(max_seq_length);
    tokenizer.tokenize(question, &tokens_question, max_seq_length);
@@ -65,7 +65,7 @@ ErrorCode GPT2::Preprocessing(cuBERT::FullTokenizer tokenizer,

 对于GPT-2这种生成式语言模型来说，模型不是仅执行一次推理就结束，而是需要执行多次推理，才能得到最终的答案。如下图所示，GPT-2模型每次推理仅生成一个词，通过将生成的词与输入数据拼接，输入到模型中继续下一次的推理，直到循环结果或者生成[SEP]结束标识符才结束推理。

-<img src="../Images/GPT_02.png" style="zoom:70%;" align=middle>
+<img src="./Images/GPT_02.png" style="zoom:70%;" align=middle>

 具体GPT-2模型的推理，如下代码所示。首先，通过gpt2.Inference()函数实现模型的具体推理细节，推理结果保存在outputs中。其次，对每次推理结果进行判断，当判断为[SEP]结束标志符时，结束循环完成推理，否则就将推理结果outputs加入到输入数据input_id中，继续下一次的模型推理。

@@ -97,12 +97,12 @@ long unsigned int GPT2::Inference(const std::vector<long unsigned int> &input_id
        input[0][j] = input_id[j];
    }

-    // 设置输入shape
+    // 输入shape
    std::vector<std::vector<std::size_t>> inputShapes;
    inputShapes.push_back({1,input_id.size()});

    // 输入数据
-    migraphx::parameter_map inputData;
+    std::unordered_map<std::string, migraphx::argument> inputData;
    inputData[inputName]=migraphx::argument{migraphx::shape(inputShape.type(),inputShapes[0]),(long unsigned int*)input};

    // 推理

--- a/Doc/Tutorial_Python/GPT2.md
+++ b/Doc/Tutorial_Python/GPT2.md
@@ -12,9 +12,9 @@ GPT（Generative Pre-trained Transformer）系列模型以不断堆叠的transfo
 | GPT-2 | 2019 年 2 月 | 15 亿    | 40GB         |
 | GPT-3 | 2020 年 5 月 | 1,750 亿 | 45TB         |

-本次采用GPT-2模型进行诗词生成任务，模型文件下载链接：https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw , 提取码：4pmh 。将GPT2_shici.onnx模型文件保存在Resource/Models/NLP/GPT2文件夹下。整体模型结构如下图所示，也可以通过netron工具：https://netron.app/ 查看GPT-2的模型结构。
+本次采用GPT-2模型进行诗词生成任务，模型文件下载链接：https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw , 提取码：4pmh 。将GPT2_shici.onnx模型文件保存在Resource/文件夹下。整体模型结构如下图所示，也可以通过netron工具：https://netron.app/ 查看GPT-2的模型结构。

-<img src="../Images/GPT_01.png" style="zoom:100%;" align=middle>
+<img src="./Images/GPT_01.png" style="zoom:100%;" align=middle>

 ## 预处理

@@ -28,7 +28,7 @@ GPT（Generative Pre-trained Transformer）系列模型以不断堆叠的transfo

 ```python
 # 加载词汇表
-vocab_file = os.path.join('../../../Resource/Models/NLP/GPT2', 'vocab_shici.txt')
+vocab_file = os.path.join('../Resource/', 'vocab_shici.txt')
 tokenizer = BertTokenizerFast(vocab_file, sep_token="[SEP]", pad_token="[PAD]", cls_token="[CLS]")
 ```

@@ -63,7 +63,7 @@ maxInput={"input":[1,1000]}
 for _ in range(max_len):
                
      # 推理
-      result = model.run({inputName: migraphx.argument(input_ids)})
+      result = model.run({inputName: input_ids})
      logits = [float(x) for x in result[0].tolist()]
    
      ...
@@ -83,7 +83,7 @@ for _ in range(max_len):

 2.循环推理，GPT-2模型不像其他模型一样只需要执行一次推理，而是需要循环执行多次推理才能完成。首先，模型推理限定在for循环中，将输入数据input_ids，输入到model.run({...})中执行推理，生成一个token的id。其次，将推理结果拼接到输入数据input_ids中，执行下一次循环。最后，当循环结束或者生成的词为[SEP]结束标志符时，完成GPT-2模型的整体推理。如下图所示，为GPT-2模型的一次完整推理过程。

-<img src="../Images/GPT_02.png" style="zoom:70%;" align=middle>
+<img src="./Images/GPT_02.png" style="zoom:70%;" align=middle>

 ## 数据后处理


--- a/Python/NLP/GPT2/gpt2.py
+++ b/Python/NLP/GPT2/gpt2.py
@@ -5,7 +5,7 @@ import migraphx

 # 加载词汇表
 print("INFO: Complete loading the vocabulary")
-vocab_file = os.path.join('../../../Resource/Models/NLP/GPT2', 'vocab_shici.txt')
+vocab_file = os.path.join('../Resource/', 'vocab_shici.txt')
 tokenizer = BertTokenizerFast(vocab_file, sep_token="[SEP]", pad_token="[PAD]", cls_token="[CLS]")

 # 设置最大输入shape
@@ -13,7 +13,7 @@ maxInput={"input":[1,1000]}

 # 加载模型
 print("INFO: Parsing and compiling the model")
-model = migraphx.parse_onnx("../../../Resource/Models/NLP/GPT2/GPT2_shici.onnx", map_input_dims=maxInput)
+model = migraphx.parse_onnx("../Resource/GPT2_shici.onnx", map_input_dims=maxInput)
 inputName=model.get_parameter_names()[0]
 inputShape=model.get_parameter_shapes()[inputName].lens()
 print("inputName:{0} \ninputShape:{1}".format(inputName,inputShape))

--- a/Python/NLP/GPT2/requirements.txt
+++ b/Python/NLP/GPT2/requirements.txt
--- a/README.md
+++ b/README.md
@@ -6,30 +6,72 @@ GPT2模型：第二代生成式预训练模型（Generative Pre-Training2）。
 ## 模型结构
 GPT2主要使用Transformer的Decoder模块为特征提取器，并对Transformer Decoder进行了一些改动，原本的Decoder包含了两个Multi-Head Attention结构，而GPT2只保留了Mask Multi-Head Attention。

-## 构建安装
+## Python版本推理

-在光源可拉取推理的docker镜像，GPT2模型推理的镜像如下： 
+本次采用GPT-2模型进行诗词生成任务，模型文件下载链接：https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw , 提取码：4pmh ，并将GPT2_shici.onnx模型文件保存在Resource/文件夹下。下面介绍如何运行python代码示例，Python示例的详细说明见Doc目录下的Tutorial_Python.md。
+
+### 下载镜像
+
+在光源中下载MIGraphX镜像： 

 ```python
 docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:ort1.14.0_migraphx3.0.0-dtk22.10.1
 ```

-### 安装Opencv依赖
+### 设置Python环境变量
+
+```
+export PYTHONPATH=/opt/dtk/lib:$PYTHONPATH
+```
+
+### 安装依赖

 ```python
-cd <path_to_migraphx_samples>
-sh ./3rdParty/InstallOpenCVDependences.sh
+# 进入gpt2 migraphx工程根目录
+cd <path_to_gpt2_migraphx> 
+
+# 进入示例程序目录
+cd ./Python/
+
+# 安装依赖
+pip install -r requirements.txt
 ```

-### 修改CMakeLists.txt
+### 设置动态shape模式
+
+```python
+export MIGRAPHX_DYNAMIC_SHAPE=1
+```
+
+### 运行示例
+
+在Python目录下执行如下命令运行该示例程序：
+
+```python
+python gpt2.py
+```
+
+如下所示，采用交互式界面，通过输入开头诗词，GPT2模型可以生成后续的诗句。
+
+<img src="./Doc/Images/GPT_03.png" style="zoom:80%;" align=middle>
+
+## C++版本推理
+
+本次采用GPT-2模型进行诗词生成任务，模型文件下载链接：https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw , 提取码：4pmh ，并将GPT2_shici.onnx模型文件保存在Resource/文件夹下。下面介绍如何运行C++代码示例，C++示例的详细说明见Doc目录下的Tutorial_Cpp.md。
+
+### 下载镜像
+
+```
+docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:ort1.14.0_migraphx3.0.0-dtk22.10.1
+```

- 如果使用ubuntu系统，需要修改CMakeLists.txt中依赖库路径：
-  将"${CMAKE_CURRENT_SOURCE_DIR}/depend/lib64/"修改为"${CMAKE_CURRENT_SOURCE_DIR}/depend/lib/"
+### 修改CMakeLists.txt

- **MIGraphX2.3.0及以上版本需要c++17**
+如果使用ubuntu系统，需要修改CMakeLists.txt中依赖库路径：
+将"${CMAKE_CURRENT_SOURCE_DIR}/depend/lib64/"修改为"${CMAKE_CURRENT_SOURCE_DIR}/depend/lib/"


-### 安装OpenCV并构建工程
+### 构建工程

 ```
 rbuild build -d depend
@@ -42,13 +84,13 @@ rbuild build -d depend
 **Centos**:

 ```
-export LD_LIBRARY_PATH=<path_to_migraphx_samples>/depend/lib64/:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=<path_to_gpt2_migraphx>/depend/lib64/:$LD_LIBRARY_PATH
 ```

 **Ubuntu**:

 ```
-export LD_LIBRARY_PATH=<path_to_migraphx_samples>/depend/lib/:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=<path_to_gpt2_migraphx>/depend/lib/:$LD_LIBRARY_PATH
 ```

 然后执行:
@@ -57,61 +99,23 @@ export LD_LIBRARY_PATH=<path_to_migraphx_samples>/depend/lib/:$LD_LIBRARY_PATH
 source ~/.bashrc
 ```

-## 推理
-
-本次采用GPT-2模型进行诗词生成任务，模型文件下载链接：https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw , 提取码：4pmh ，并将GPT2_shici.onnx模型文件保存在model文件夹下。下面介绍如何运行python代码和C++代码示例，具体推理代码解析，在Doc目录中有详细说明。
-
-### python版本推理
-
-1.参考《MIGraphX教程》中的安装方法安装MIGraphX并设置好PYTHONPATH
-
-2.安装依赖：
-
-```python
-# 进入migraphx samples工程根目录
-cd <path_to_migraphx_samples> 
-
-# 进入示例程序目录
-cd Python/NLP/GPT2
+### 设置动态shape模式

-# 安装依赖
-pip install -r requirements.txt
 ```
-
-3.设置环境变量：
-
-```python
-# 设置动态shape模式
 export MIGRAPHX_DYNAMIC_SHAPE=1
 ```

-4.在Python/NLP/GPT2目录下执行如下命令运行该示例程序：
+### 运行示例

 ```python
-python gpt2.py
-```
-
-如下所示，采用交互式界面，通过输入开头诗词，GPT2模型可以生成后续的诗句。
-
-<img src="./Doc/Images/GPT_03.png" style="zoom:80%;" align=middle>
-
-### C++版本推理
-
-切换到build目录中，执行如下命令：
+# 进入gpt2 migraphx工程根目录
+cd <path_to_gpt2_migraphx> 

-```python
+# 进入build目录
 cd ./build/
-./MIGraphX_Samples
-```

-根据提示选择运行GPT2模型的示例程序
-
-```python
-# 设置动态shape模式
-export MIGRAPHX_DYNAMIC_SHAPE=1
-
-# 运行示例
-./MIGraphX_Samples 0
+# 执行示例程序
+./GPT2
 ```

 如下所示，采用交互式界面，通过输入开头诗词，GPT2模型可以推理出后续的诗句。
@@ -124,6 +128,4 @@ https://developer.hpccube.com/codes/modelzoo/gpt2_migraphx

 ## 参考资料

-https://github.com/yangjianxin1/GPT2-chitchat
-
 https://github.com/Morizeyao/GPT2-Chinese
\ No newline at end of file
--- a/Resource/Configuration.xml
+++ b/Resource/Configuration.xml
-<?xml version="1.0" encoding="GB2312"?>
-<opencv_storage>
-	
-	<!--GPT2-->
-	<GPT2>
-		<ModelPath>"../Resource/Models/NLP/GPT2/GPT2_shici.onnx"</ModelPath>
-	</GPT2>
-
-</opencv_storage>
--- a/Resource/Models/NLP/GPT2/vocab_shici.txt
+++ b/Resource/Models/NLP/GPT2/vocab_shici.txt
--- a/Src/NLP/GPT2/GPT2.cpp
+++ b/Src/NLP/GPT2/GPT2.cpp
-#include <fstream>
-#include <sstream>
+#include <GPT2.h>
+
 #include <migraphx/onnx.hpp>
 #include <migraphx/gpu/target.hpp>
-#include <migraphx/gpu/hip.hpp>
-#include <migraphx/generate.hpp>
-#include <migraphx/quantization.hpp>
-#include <migraphx/reshape2.hpp>
-#include <CommonUtility.h>
+
 #include <Filesystem.h>
 #include <SimpleLog.h>
 #include <algorithm>
-#include <string>
-#include <vector>
 #include <stdexcept>
-#include <GPT2.h>
 #include <tokenization.h>

 namespace migraphxSamples
 {

-GPT2::GPT2():logFile(NULL)
+GPT2::GPT2()
 {

 }

 GPT2::~GPT2()
 {
-    configurationFile.release();
+
 }

-ErrorCode GPT2::Initialize(InitializationParameterOfNLP initParamOfNLPGPT2)
+ErrorCode GPT2::Initialize()
 {
-    // 初始化(获取日志文件,加载配置文件等)
-    ErrorCode errorCode=DoCommonInitialization(initParamOfNLPGPT2);
-    if(errorCode!=SUCCESS)
-    {
-        LOG_ERROR(logFile,"fail to DoCommonInitialization\n");
-        return errorCode;
-    }
-    LOG_INFO(logFile,"succeed to DoCommonInitialization\n");
-
-    // 获取配置文件参数
-    FileNode netNode = configurationFile["GPT2"];
-    std::string modelPath=initializationParameter.parentPath+(std::string)netNode["ModelPath"];
+    // 获取模型文件
+    std::string modelPath="../Resource/GPT2_shici.onnx";

    // 设置最大输入shape
    migraphx::onnx_options onnx_options;
@@ -51,16 +34,16 @@ ErrorCode GPT2::Initialize(InitializationParameterOfNLP initParamOfNLPGPT2)
    // 加载模型
    if(Exists(modelPath)==false)
    {
-        LOG_ERROR(logFile,"%s not exist!\n",modelPath.c_str());
+        LOG_ERROR(stdout,"%s not exist!\n",modelPath.c_str());
        return MODEL_NOT_EXIST;
    }
    net = migraphx::parse_onnx(modelPath, onnx_options);        
-    LOG_INFO(logFile,"succeed to load model: %s\n",GetFileName(modelPath).c_str());
+    LOG_INFO(stdout,"succeed to load model: %s\n",GetFileName(modelPath).c_str());

    // 获取模型输入属性
-    std::pair<std::string, migraphx::shape> inputAttribute=*(net.get_parameter_shapes().begin());
-    inputName=inputAttribute.first;
-    inputShape=inputAttribute.second;
+    std::unordered_map<std::string, migraphx::shape> inputMap=net.get_parameter_shapes();
+    inputName=inputMap.begin()->first;
+    inputShape=inputMap.begin()->second;

    // 设置模型为GPU模式
    migraphx::target gpuTarget = migraphx::gpu::target{};
@@ -70,41 +53,8 @@ ErrorCode GPT2::Initialize(InitializationParameterOfNLP initParamOfNLPGPT2)
    options.device_id=0;                          // 设置GPU设备，默认为0号设备
    options.offload_copy=true;                    // 设置offload_copy
    net.compile(gpuTarget,options);
-    LOG_INFO(logFile,"succeed to compile model: %s\n",GetFileName(modelPath).c_str());                       
-
-    return SUCCESS;
-}
+    LOG_INFO(stdout,"succeed to compile model: %s\n",GetFileName(modelPath).c_str());                     

-ErrorCode GPT2::DoCommonInitialization(InitializationParameterOfNLP initParamOfNLPGPT2)
-{
-    initializationParameter = initParamOfNLPGPT2;
-
-    // 获取日志文件
-    logFile=LogManager::GetInstance()->GetLogFile(initializationParameter.logName);
-
-    // 加载配置文件
-    std::string configFilePath=initializationParameter.configFilePath;
-    if(!Exists(configFilePath))     
-    {
-        LOG_ERROR(logFile, "no configuration file!\n");
-        return CONFIG_FILE_NOT_EXIST;
-    }
-    if(!configurationFile.open(configFilePath, FileStorage::READ))
-    {
-        LOG_ERROR(logFile, "fail to open configuration file\n");
-        return FAIL_TO_OPEN_CONFIG_FILE;
-    }
-    LOG_INFO(logFile, "succeed to open configuration file\n");
-
-    // 修改父路径
-    std::string &parentPath = initializationParameter.parentPath;
-    if (!parentPath.empty())
-    {
-        if(!IsPathSeparator(parentPath[parentPath.size() - 1]))
-        {
-           parentPath+=PATH_SEPARATOR;
-        }
-    }
    return SUCCESS;
 }

@@ -126,8 +76,8 @@ long unsigned int GPT2::Inference(const std::vector<long unsigned int> &input_id
    std::vector<std::vector<std::size_t>> inputShapes;
    inputShapes.push_back({1,input_id.size()});

-    // 输入数据
-    migraphx::parameter_map inputData;
+    // 创建输入数据
+    std::unordered_map<std::string, migraphx::argument> inputData;
    inputData[inputName]=migraphx::argument{migraphx::shape(inputShape.type(),inputShapes[0]),(long unsigned int*)input};

    // 推理
@@ -163,7 +113,7 @@ ErrorCode GPT2::Preprocessing(cuBERT::FullTokenizer tokenizer,
                             std::vector<long unsigned int> &input_id)
 {
    // 分词操作
-    int max_seq_length =1024;
+    int max_seq_length =1000;
    std::vector<std::string> tokens_question;
    tokens_question.reserve(max_seq_length);
    tokenizer.tokenize(question, &tokens_question, max_seq_length);

--- a/Src/NLP/GPT2/GPT2.h
+++ b/Src/NLP/GPT2/GPT2.h
-#ifndef GPT2_H
-#define GPT2_H
+#ifndef __GPT2_H__
+#define __GPT2_H__

 #include <cstdint>
 #include <string>
 #include <migraphx/program.hpp>
-#include <CommonDefinition.h>
 #include <tokenization.h>
-using namespace cuBERT;

 namespace migraphxSamples
 {
+    typedef enum _ErrorCode
+    {
+        SUCCESS=0, 
+        MODEL_NOT_EXIST, 
+        CONFIG_FILE_NOT_EXIST, 
+        FAIL_TO_LOAD_MODEL, 
+        FAIL_TO_OPEN_CONFIG_FILE, 
+    }ErrorCode;
+
    typedef struct _Predictions
    {
        long unsigned int index;
@@ -24,7 +31,7 @@ public:
    
    ~GPT2();

-    ErrorCode Initialize(InitializationParameterOfNLP initParamOfNLPGPT2);
+    ErrorCode Initialize();

    ErrorCode Preprocessing(cuBERT::FullTokenizer tokenizer,
                             char *question,
@@ -33,13 +40,6 @@ public:
    long unsigned int Inference(const std::vector<long unsigned int> &input_id);

 private:
-    ErrorCode DoCommonInitialization(InitializationParameterOfNLP initParamOfNLPGPT2);
-
-private:
-    FILE *logFile;
-    cv::FileStorage configurationFile;
-    InitializationParameterOfNLP initializationParameter;
-
    migraphx::program net;
    std::string inputName;
    migraphx::shape inputShape;

--- a/Src/Sample.cpp
+++ b/Src/Sample.cpp
-#include <Sample.h>
-#include <SimpleLog.h>
-#include <GPT2.h>
-#include <tokenization.h>
-#include <fstream>
-
-using namespace std;
-using namespace migraphx;
-using namespace migraphxSamples;
-
-void Sample_GPT2()
-{
-    // 加载GPT2模型
-    GPT2 gpt2;
-    InitializationParameterOfNLP initParamOfNLPGPT2;
-    initParamOfNLPGPT2.parentPath = "";
-    initParamOfNLPGPT2.configFilePath = CONFIG_FILE;
-    initParamOfNLPGPT2.logName = "";
-    ErrorCode errorCode = gpt2.Initialize(initParamOfNLPGPT2);
-    if (errorCode != SUCCESS)
-    {
-        LOG_ERROR(stdout, "fail to initialize GPT2!\n");
-        exit(-1);
-    }
-    LOG_INFO(stdout, "succeed to initialize GPT2\n");
-
-    // 加载词汇表，用于编码和解码
-    cuBERT::FullTokenizer tokenizer = cuBERT::FullTokenizer("../Resource/Models/NLP/GPT2/vocab_shici.txt");
-    std::ifstream infile;
-    std::string buf;
-    std::vector<std::string> output;
-    infile.open("../Resource/Models/NLP/GPT2/vocab_shici.txt");
-    while (std::getline(infile,buf))
-    {
-        output.push_back(buf);
-    }
-
-    std::vector<long unsigned int> input_id;
-    char question[100];
-
-    std::vector<long unsigned int> score;
-    std::vector<std::string> result;
-
-    std::cout << "开始和GPT2对诗，输入CTRL + Z以退出" << std::endl;
-    while (true) 
-    {
-        // 数据预处理
-        std::cout << "question: ";
-        cin.getline(question, 100);
-        gpt2.Preprocessing(tokenizer, question, input_id);
-
-        // 推理
-        for(int i=0;i<50;++i)
-        {
-            long unsigned int outputs = gpt2.Inference(input_id);
-            if(outputs == 102)
-            {
-                break;
-            }
-            input_id.push_back(outputs);
-            score.push_back(outputs);
-        }
-
-        // 将数值映射为字符
-        for(int i=0;i<score.size();++i)
-        {
-            result.push_back(output[score[i]]);
-        }
-
-        // 打印结果
-        std::cout << "chatbot: ";
-        std::cout << question;
-        for(int j=0; j<result.size();++j)
-        {
-            std::cout << result[j];
-        }
-        std::cout << std::endl;
-        
-        // 清除数据
-        input_id.clear();
-        result.clear();
-        score.clear();
-    }
-}
--- a/Src/Sample.h
+++ b/Src/Sample.h
-// 示例程序
-
-#ifndef __SAMPLE_H__
-#define __SAMPLE_H__
-
-// GPT2 Dynamic sample
-void Sample_GPT2();
-
-#endif
\ No newline at end of file
--- a/Src/Utility/CommonDefinition.h
+++ b/Src/Utility/CommonDefinition.h
-// 常用数据类型和宏定义
-
-#ifndef __COMMON_DEFINITION_H__
-#define __COMMON_DEFINITION_H__
-
-#include <string>
-#include <opencv2/opencv.hpp>
-
-using namespace std;
-using namespace cv;
-
-namespace migraphxSamples
-{
-   
-// 路径分隔符(Linux:‘/’,Windows:’\\’)
-#ifdef _WIN32
-#define  PATH_SEPARATOR '\\'
-#else
-#define  PATH_SEPARATOR '/'
-#endif
-
-#define CONFIG_FILE                                                     "../Resource/Configuration.xml"
-
-typedef struct  __Time
-{
-    string year;
-    string month;
-    string day;
-    string hour;
-    string minute;
-    string second;
-    string millisecond; // ms
-    string microsecond; // us
-    string weekDay;
-}_Time;
-
-typedef enum  _ErrorCode
-{
-    SUCCESS=0,  // 0
-    MODEL_NOT_EXIST, // 模型不存在
-    CONFIG_FILE_NOT_EXIST, // 配置文件不存在
-    FAIL_TO_LOAD_MODEL, // 加载模型失败
-    FAIL_TO_OPEN_CONFIG_FILE, // 加载配置文件失败
-    IMAGE_ERROR, // 图像错误
-}ErrorCode;
-
-typedef struct  _ResultOfPrediction
-{
-    float confidence;
-    int label;
-    _ResultOfPrediction():confidence(0.0f),label(0){}
-
-}ResultOfPrediction;
-
-typedef struct  _ResultOfDetection
-{
-    Rect boundingBox;
-    float confidence;
-    int classID;
-    string className;
-    bool exist;
-
-    _ResultOfDetection():confidence(0.0f),classID(0),exist(true){}
-
-}ResultOfDetection;
-
-typedef struct  _InitializationParameterOfDetector
-{
-    std::string parentPath;
-    std::string configFilePath;
-    cv::Size inputSize;
-    std::string logName;
-}InitializationParameterOfDetector;
-
-typedef struct _InitializationParameterOfDetector InitializationParameterOfClassifier;
-typedef struct _InitializationParameterOfDetector InitializationParameterOfSuperresolution;
-typedef struct _InitializationParameterOfDetector InitializationParameterOfSegmentation;
-typedef struct _InitializationParameterOfDetector InitializationParameterOfNLP;
-typedef struct _InitializationParameterOfDetector InitializationParameterOfOcr;
-typedef struct _InitializationParameterOfDetector InitializationParameterOfDB;
-typedef struct _InitializationParameterOfDetector InitializationParameterOfSVTR;
-
-}
-
-#endif
-
--- a/Src/Utility/CommonUtility.cpp
+++ b/Src/Utility/CommonUtility.cpp
-#include <CommonUtility.h>
-#include <assert.h>
-#include <ctype.h>
-#include <time.h>
-#include <stdlib.h>
-#include <algorithm>
-#include <sstream>
-#include <vector>
-
-#ifdef _WIN32
-#include <io.h>
-#include <direct.h>
-#include <Windows.h>
-#else
-#include <unistd.h>
-#include <dirent.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#endif
-
-#include <SimpleLog.h>
-
-namespace migraphxSamples
-{
-
-
-_Time GetCurrentTime3()
-{
-    _Time currentTime;
-
-#if (defined WIN32 || defined _WIN32)
-	SYSTEMTIME systemTime;
-	GetLocalTime(&systemTime);
-
-	char temp[8] = { 0 };
-	sprintf(temp, "%04d", systemTime.wYear);
-	currentTime.year=string(temp);
-	sprintf(temp, "%02d", systemTime.wMonth);
-	currentTime.month=string(temp);
-	sprintf(temp, "%02d", systemTime.wDay);
-	currentTime.day=string(temp);
-	sprintf(temp, "%02d", systemTime.wHour);
-	currentTime.hour=string(temp);
-	sprintf(temp, "%02d", systemTime.wMinute);
-	currentTime.minute=string(temp);
-	sprintf(temp, "%02d", systemTime.wSecond);
-	currentTime.second=string(temp);
-	sprintf(temp, "%03d", systemTime.wMilliseconds);
-	currentTime.millisecond=string(temp);
-	sprintf(temp, "%d", systemTime.wDayOfWeek);
-	currentTime.weekDay=string(temp);
-#else
-	struct timeval    tv;
-	struct tm         *p;
-	gettimeofday(&tv, NULL);
-	p = localtime(&tv.tv_sec);
-
-	char temp[8]={0};
-    sprintf(temp,"%04d",1900+p->tm_year);
-    currentTime.year=string(temp);
-	sprintf(temp,"%02d",1+p->tm_mon);
-	currentTime.month=string(temp);
-	sprintf(temp,"%02d",p->tm_mday);
-	currentTime.day=string(temp);
-	sprintf(temp,"%02d",p->tm_hour);
-	currentTime.hour=string(temp);
-	sprintf(temp,"%02d",p->tm_min);
-	currentTime.minute=string(temp);
-	sprintf(temp,"%02d",p->tm_sec);
-	currentTime.second=string(temp);
-	sprintf(temp,"%03d",tv.tv_usec/1000);
-	currentTime.millisecond = string(temp);
-    sprintf(temp, "%03d", tv.tv_usec % 1000);
-	currentTime.microsecond = string(temp);
-    sprintf(temp, "%d", p->tm_wday);
-    currentTime.weekDay = string(temp);
-#endif
-    return currentTime;
-}
-
-std::vector<std::string> SplitString(std::string str, std::string separator)
-{
-    std::string::size_type pos;
-    std::vector<std::string> result;
-    str+=separator;//扩展字符串以方便操作
-    int size=str.size();
-
-    for(int i=0; i<size; i++)
-    {
-        pos=str.find(separator,i);
-        if(pos<size)
-        {
-            std::string s=str.substr(i,pos-i);
-            result.push_back(s);
-            i=pos+separator.size()-1;
-        }
-    }
-    return result;
-}
-
-
-bool CompareConfidence(const ResultOfDetection &L,const ResultOfDetection &R)
-{
-    return L.confidence > R.confidence;
-}
-
-bool CompareArea(const ResultOfDetection &L,const ResultOfDetection &R)
-{
-    return L.boundingBox.area() > R.boundingBox.area();
-}
-
-void NMS(vector<ResultOfDetection> &detections, float IOUThreshold)
-{
-    // sort
-    std::sort(detections.begin(), detections.end(), CompareConfidence);
-
-    for (int i = 0; i<detections.size(); ++i)
-    {
-        if (detections[i].exist)
-        {
-            for (int j = i + 1; j<detections.size(); ++j)
-            {
-                if (detections[j].exist)
-                {
-                    // compute IOU
-                    float intersectionArea = (detections[i].boundingBox & detections[j].boundingBox).area();
-                    float intersectionRate = intersectionArea / (detections[i].boundingBox.area() + detections[j].boundingBox.area() - intersectionArea);
-
-                    if (intersectionRate>IOUThreshold)
-                    {
-                        detections[j].exist = false;
-                    }
-                }
-            }
-        }
-    }
-
-}
-
-}
--- a/Src/Utility/CommonUtility.h
+++ b/Src/Utility/CommonUtility.h
-// 常用工具
-
-#ifndef __COMMON_UTILITY_H__
-#define __COMMON_UTILITY_H__
-
-#include <mutex>
-#include <string>
-#include <vector>
-#include <CommonDefinition.h>
-
-using namespace std;
-
-namespace migraphxSamples
-{
-
-// 分割字符串
-std::vector<std::string> SplitString(std::string str,std::string separator);
-
-// 排序规则: 按照置信度或者按照面积排序
-bool CompareConfidence(const ResultOfDetection &L,const ResultOfDetection &R);
-bool CompareArea(const ResultOfDetection &L,const ResultOfDetection &R);
-
-void NMS(std::vector<ResultOfDetection> &detections, float IOUThreshold);
-
-}
-
-#endif
--- a/Src/Utility/Filesystem.cpp
+++ b/Src/Utility/Filesystem.cpp
@@ -11,12 +11,7 @@
 #include <unistd.h>
 #include <dirent.h>
 #endif
-#include <CommonUtility.h>
-#include <opencv2/opencv.hpp>
-#include <SimpleLog.h>

-using namespace cv;
- 
 // 路径分隔符(Linux:‘/’,Windows:’\\’)
 #ifdef _WIN32
 #define  PATH_SEPARATOR '\\'
@@ -24,9 +19,31 @@ using namespace cv;
 #define  PATH_SEPARATOR '/'
 #endif

+using namespace std;
+
 namespace migraphxSamples
 {

+static std::vector<std::string> SplitString(std::string str, std::string separator)
+{
+    std::string::size_type pos;
+    std::vector<std::string> result;
+    str+=separator;//扩展字符串以方便操作
+    int size=str.size();
+
+    for(int i=0; i<size; i++)
+    {
+        pos=str.find(separator,i);
+        if(pos<size)
+        {
+            std::string s=str.substr(i,pos-i);
+            result.push_back(s);
+            i=pos+separator.size()-1;
+        }
+    }
+    return result;
+}
+
 #if defined _WIN32 || defined WINCE
    const char dir_separators[] = "/\\";

@@ -293,7 +310,7 @@ namespace migraphxSamples
 		}
 		else
 		{
-			LOG_INFO(stdout, "could not open directory: %s", directory.c_str());
+			printf("could not open directory: %s", directory.c_str());
 		}
 	}

@@ -390,7 +407,7 @@ namespace migraphxSamples
 #endif
 			if (!result)
 			{
-				LOG_INFO(stdout, "can't remove directory: %s\n", path.c_str());
+				printf("can't remove directory: %s\n", path.c_str());
 			}
 		}
 		else
@@ -402,7 +419,7 @@ namespace migraphxSamples
 #endif
 			if (!result)
 			{
-				LOG_INFO(stdout, "can't remove file: %s\n", path.c_str());
+				printf("can't remove file: %s\n", path.c_str());
 			}
 		}
 	}
@@ -438,7 +455,7 @@ namespace migraphxSamples
 					{
 						RemoveAll(path);
 						++numberOfFiles;
-						LOG_INFO(stdout, "%s deleted! number of deleted files:%d\n", path.c_str(), numberOfFiles);
+						printf("%s deleted! number of deleted files:%d\n", path.c_str(), numberOfFiles);
 					}

 				}
@@ -452,7 +469,7 @@ namespace migraphxSamples
 		}
 		else
 		{
-			LOG_INFO(stdout, "could not open directory: %s", directory.c_str());
+			printf("could not open directory: %s", directory.c_str());
 		}

 		// ����RemoveAllɾ��Ŀ¼
@@ -592,17 +609,17 @@ namespace migraphxSamples

        if(!srcFile.is_open())
        {
-            LOG_ERROR(stdout,"can not open %s\n",srcPath.c_str());
+            printf("can not open %s\n",srcPath.c_str());
            return false;
        }
        if(!dstFile.is_open())
        {
-			LOG_ERROR(stdout, "can not open %s\n", dstPath.c_str());
+			printf("can not open %s\n", dstPath.c_str());
            return false;
        }
        if(srcPath==dstPath)
        {
-			LOG_ERROR(stdout, "src can not be same with dst\n");
+			printf("src can not be same with dst\n");
            return false;
        }
        char buffer[2048];
@@ -622,7 +639,7 @@ namespace migraphxSamples
    {
        if(srcPath==dstPath)
        {
-			LOG_ERROR(stdout, "src can not be same with dst\n");
+			printf("src can not be same with dst\n");
            return false;
        }

@@ -662,9 +679,9 @@ namespace migraphxSamples

 			// process
 			double process = (1.0*(i + 1) / fileNameList.size()) * 100;
-			LOG_INFO(stdout, "%s done! %f% \n", GetFileName(fileNameList[i]).c_str(), process);
+			printf("%s done! %f% \n", GetFileName(fileNameList[i]).c_str(), process);
        }
-		LOG_INFO(stdout, "all done!(the number of files:%d)\n", fileNameList.size());
+		printf("all done!(the number of files:%d)\n", fileNameList.size());

        return true;


--- a/Src/Utility/Filesystem.h
+++ b/Src/Utility/Filesystem.h
@@ -3,10 +3,8 @@
 #ifndef __FILE_SYSTEM_H__
 #define __FILE_SYSTEM_H__

-#include <vector>
 #include <string>
-
-using namespace std;
+#include <vector>
 
 namespace migraphxSamples
 {
@@ -21,7 +19,7 @@ bool IsDirectory(const std::string &path);
 bool IsPathSeparator(char c);

 // 路径拼接
-string JoinPath(const std::string &base, const std::string &path);
+std::string JoinPath(const std::string &base, const std::string &path);

 // 创建多级目录,注意：创建多级目录的时候，目标目录是不能有文件存在的
 bool CreateDirectories(const std::string &directoryPath);
@@ -49,14 +47,13 @@ void Remove(const std::string &directory, const std::string &extension="");
 /** 获取路径的文件名和扩展名
 * 
 *  示例：path为D:/1/1.txt,则GetFileName()为1.txt,GetFileName_NoExtension()为1,GetExtension()为.txt,GetParentPath()为D:/1/
-
 */
-string GetFileName(const std::string &path); // 1.txt
-string GetFileName_NoExtension(const std::string &path); // 1
-string GetExtension(const std::string &path);// .txt
-string GetParentPath(const std::string &path);// D:/1/
+std::string GetFileName(const std::string &path);
+std::string GetFileName_NoExtension(const std::string &path); 
+std::string GetExtension(const std::string &path);
+std::string GetParentPath(const std::string &path);

-// 拷贝文件:CopyFile("D:/1.txt","D:/2.txt");将1.txt拷贝为2.txt
+// 拷贝文件
 bool CopyFile(const std::string srcPath,const std::string dstPath);

 /** 拷贝目录