精简代码

5c098b75 · liucong · 33430a56 · 5c098b75 · 5c098b75 · 5c098b75
Commit 5c098b75 authored Jun 13, 2023 by liucong
16 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,7 +12,6 @@ set(CMAKE_BUILD_TYPE release)
 # 添加头文件路径
 set(INCLUDE_PATH    ${CMAKE_CURRENT_SOURCE_DIR}/Src/
                    ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/ 
-                    ${CMAKE_CURRENT_SOURCE_DIR}/Src/GPT2/
                    $ENV{DTKROOT}/include/
                    ${CMAKE_CURRENT_SOURCE_DIR}/depend/include/)
 include_directories(${INCLUDE_PATH})
@@ -33,10 +32,9 @@ link_libraries(${LIBRARY})

 # 添加源文件
 set(SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/Src/main.cpp
+                ${CMAKE_CURRENT_SOURCE_DIR}/Src/GPT2.cpp
                ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/tokenization.cpp
                ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/utf8proc.c
-                ${CMAKE_CURRENT_SOURCE_DIR}/Src/GPT2/gpt2.cpp
-                ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/CommonUtility.cpp
                ${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/Filesystem.cpp)

 # 添加可执行目标

--- a/Doc/Tutorial_Cpp/GPT2.md
+++ b/Doc/Tutorial_Cpp/GPT2.md
@@ -12,9 +12,9 @@ GPT（Generative Pre-trained Transformer）系列模型以不断堆叠transforme
 | GPT-2 | 2019 年 2 月 | 15 亿    | 40GB         |
 | GPT-3 | 2020 年 5 月 | 1,750 亿 | 45TB         |

-本次采用GPT-2模型进行诗词生成任务，模型文件下载链接：https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw, 提取码：4pmh。将GPT2_shici.onnx模型文件保存在Resource/Models/NLP/GPT2文件夹下。整体模型结构如下图所示，也可以通过netron工具：https://netron.app/ 查看GPT-2的模型结构。
+本次采用GPT-2模型进行诗词生成任务，模型文件下载链接：https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw, 提取码：4pmh。将GPT2_shici.onnx模型文件保存在Resource/文件夹下。整体模型结构如下图所示，也可以通过netron工具：https://netron.app/ 查看GPT-2的模型结构。

-<img src="../Images/GPT_01.png" style="zoom:100%;" align=middle>
+<img src="./Images/GPT_01.png" style="zoom:100%;" align=middle>

 ## 预处理

@@ -27,11 +27,11 @@ GPT（Generative Pre-trained Transformer）系列模型以不断堆叠transforme
 首先，根据提供的词汇表路径，通过cuBERT::FullTokenizer()函数加载词汇表，用于后续对输入文本的编码操作。其次，将词汇表中的内容依次保存到vector容器output中，用于数据后处理中的解码操作。

 ```c++
-cuBERT::FullTokenizer tokenizer = cuBERT::FullTokenizer("../Resource/Models/vocab_shici.txt");
+cuBERT::FullTokenizer tokenizer = cuBERT::FullTokenizer("../Resource/vocab_shici.txt");
 std::ifstream infile;
 std::string buf;
 std::vector<std::string> output;
-infile.open("../Resource/Models/vocab_shici.txt");
+infile.open("../Resource/vocab_shici.txt");
 while (std::getline(infile,buf))
 {
    output.push_back(buf);
@@ -65,7 +65,7 @@ ErrorCode GPT2::Preprocessing(cuBERT::FullTokenizer tokenizer,

 对于GPT-2这种生成式语言模型来说，模型不是仅执行一次推理就结束，而是需要执行多次推理，才能得到最终的答案。如下图所示，GPT-2模型每次推理仅生成一个词，通过将生成的词与输入数据拼接，输入到模型中继续下一次的推理，直到循环结果或者生成[SEP]结束标识符才结束推理。

-<img src="../Images/GPT_02.png" style="zoom:70%;" align=middle>
+<img src="./Images/GPT_02.png" style="zoom:70%;" align=middle>

 具体GPT-2模型的推理，如下代码所示。首先，通过gpt2.Inference()函数实现模型的具体推理细节，推理结果保存在outputs中。其次，对每次推理结果进行判断，当判断为[SEP]结束标志符时，结束循环完成推理，否则就将推理结果outputs加入到输入数据input_id中，继续下一次的模型推理。

@@ -97,7 +97,7 @@ long unsigned int GPT2::Inference(const std::vector<long unsigned int> &input_id
        input[0][j] = input_id[j];
    }

-    // 设置输入shape
+    // 输入shape
    std::vector<std::vector<std::size_t>> inputShapes;
    inputShapes.push_back({1,input_id.size()});


--- a/Doc/Tutorial_Python/GPT2.md
+++ b/Doc/Tutorial_Python/GPT2.md
@@ -12,9 +12,9 @@ GPT（Generative Pre-trained Transformer）系列模型以不断堆叠的transfo
 | GPT-2 | 2019 年 2 月 | 15 亿    | 40GB         |
 | GPT-3 | 2020 年 5 月 | 1,750 亿 | 45TB         |

-本次采用GPT-2模型进行诗词生成任务，模型文件下载链接：https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw , 提取码：4pmh 。将GPT2_shici.onnx模型文件保存在Resource/Models/NLP/GPT2文件夹下。整体模型结构如下图所示，也可以通过netron工具：https://netron.app/ 查看GPT-2的模型结构。
+本次采用GPT-2模型进行诗词生成任务，模型文件下载链接：https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw , 提取码：4pmh 。将GPT2_shici.onnx模型文件保存在Resource/文件夹下。整体模型结构如下图所示，也可以通过netron工具：https://netron.app/ 查看GPT-2的模型结构。

-<img src="../Images/GPT_01.png" style="zoom:100%;" align=middle>
+<img src="./Images/GPT_01.png" style="zoom:100%;" align=middle>

 ## 预处理

@@ -28,7 +28,7 @@ GPT（Generative Pre-trained Transformer）系列模型以不断堆叠的transfo

 ```python
 # 加载词汇表
-vocab_file = os.path.join('../../../Resource/Models', 'vocab_shici.txt')
+vocab_file = os.path.join('../Resource/', 'vocab_shici.txt')
 tokenizer = BertTokenizerFast(vocab_file, sep_token="[SEP]", pad_token="[PAD]", cls_token="[CLS]")
 ```

@@ -83,7 +83,7 @@ for _ in range(max_len):

 2.循环推理，GPT-2模型不像其他模型一样只需要执行一次推理，而是需要循环执行多次推理才能完成。首先，模型推理限定在for循环中，将输入数据input_ids，输入到model.run({...})中执行推理，生成一个token的id。其次，将推理结果拼接到输入数据input_ids中，执行下一次循环。最后，当循环结束或者生成的词为[SEP]结束标志符时，完成GPT-2模型的整体推理。如下图所示，为GPT-2模型的一次完整推理过程。

-<img src="../Images/GPT_02.png" style="zoom:70%;" align=middle>
+<img src="./Images/GPT_02.png" style="zoom:70%;" align=middle>

 ## 数据后处理


--- a/Python/gpt2.py
+++ b/Python/gpt2.py
@@ -5,7 +5,7 @@ import migraphx

 # 加载词汇表
 print("INFO: Complete loading the vocabulary")
-vocab_file = os.path.join('../Resource/Models', 'vocab_shici.txt')
+vocab_file = os.path.join('../Resource/', 'vocab_shici.txt')
 tokenizer = BertTokenizerFast(vocab_file, sep_token="[SEP]", pad_token="[PAD]", cls_token="[CLS]")

 # 设置最大输入shape
@@ -13,7 +13,7 @@ maxInput={"input":[1,1000]}

 # 加载模型
 print("INFO: Parsing and compiling the model")
-model = migraphx.parse_onnx("../Resource/Models/GPT2_shici.onnx", map_input_dims=maxInput)
+model = migraphx.parse_onnx("../Resource/GPT2_shici.onnx", map_input_dims=maxInput)
 inputName=model.get_parameter_names()[0]
 inputShape=model.get_parameter_shapes()[inputName].lens()
 print("inputName:{0} \ninputShape:{1}".format(inputName,inputShape))

--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ GPT2主要使用Transformer的Decoder模块为特征提取器，并对Transforme

 ## Python版本推理

-本次采用GPT-2模型进行诗词生成任务，模型文件下载链接：https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw , 提取码：4pmh ，并将GPT2_shici.onnx模型文件保存在Resource/model文件夹下。下面介绍如何运行python代码示例，具体推理代码解析，在Doc/Tutorial_Python目录中有详细说明。
+本次采用GPT-2模型进行诗词生成任务，模型文件下载链接：https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw , 提取码：4pmh ，并将GPT2_shici.onnx模型文件保存在Resource/文件夹下。下面介绍如何运行python代码示例，具体推理代码解析，在Doc/Tutorial_Python.md目录中有详细说明。

 ### 安装镜像

@@ -20,13 +20,13 @@ docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:ort1.14.0_migraphx3.0

 ### 执行推理

-1.参考《MIGraphX教程》中的安装方法安装MIGraphX并设置好PYTHONPATH
+1.参考《MIGraphX教程》设置好PYTHONPATH

 2.安装依赖：

 ```python
-# 进入migraphx samples工程根目录
-cd <path_to_migraphx_samples> 
+# 进入gpt2 migraphx工程根目录
+cd <path_to_gpt2_migraphx> 

 # 进入示例程序目录
 cd ./Python/
@@ -79,13 +79,13 @@ rbuild build -d depend
 **Centos**:

 ```
-export LD_LIBRARY_PATH=<path_to_migraphx_samples>/depend/lib64/:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=<path_to_gpt2_migraphx>/depend/lib64/:$LD_LIBRARY_PATH
 ```

 **Ubuntu**:

 ```
-export LD_LIBRARY_PATH=<path_to_migraphx_samples>/depend/lib/:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=<path_to_gpt2_migraphx>/depend/lib/:$LD_LIBRARY_PATH
 ```

 然后执行:
@@ -99,8 +99,8 @@ source ~/.bashrc
 运行GPT2示例程序，具体执行如下命令：

 ```python
-# 进入migraphx samples工程根目录
-cd <path_to_migraphx_samples> 
+# 进入gpt2 migraphx工程根目录
+cd <path_to_gpt2_migraphx> 

 # 进入build目录
 cd ./build/

--- a/Resource/Models/vocab_shici.txt
+++ b/Resource/Models/vocab_shici.txt
--- a/Src/GPT2/gpt2.cpp
+++ b/Src/GPT2/gpt2.cpp
-#include <fstream>
-#include <sstream>
+#include <GPT2.h>
+
 #include <migraphx/onnx.hpp>
 #include <migraphx/gpu/target.hpp>
-#include <migraphx/reshape2.hpp>
+
+#include <Filesystem.h>
 #include <SimpleLog.h>
 #include <algorithm>
-#include <string>
 #include <stdexcept>
-#include <gpt2.h>
 #include <tokenization.h>

 namespace migraphxSamples
@@ -25,16 +24,21 @@ GPT2::~GPT2()

 ErrorCode GPT2::Initialize()
 {
-    // 设置模型路径
-    std::string modelPath="../Resource/Models/GPT2_shici.onnx";
+    // 获取模型文件
+    std::string modelPath="../Resource/GPT2_shici.onnx";

    // 设置最大输入shape
    migraphx::onnx_options onnx_options;
    onnx_options.map_input_dims["input"]={1,1000};

    // 加载模型
+    if(Exists(modelPath)==false)
+    {
+        LOG_ERROR(stdout,"%s not exist!\n",modelPath.c_str());
+        return MODEL_NOT_EXIST;
+    }
    net = migraphx::parse_onnx(modelPath, onnx_options);        
-    LOG_INFO(stdout,"succeed to load model: GPT2_shici\n");
+    LOG_INFO(stdout,"succeed to load model: %s\n",GetFileName(modelPath).c_str());

    // 获取模型输入属性
    std::unordered_map<std::string, migraphx::shape> inputMap=net.get_parameter_shapes();
@@ -47,9 +51,9 @@ ErrorCode GPT2::Initialize()
    // 编译模型
    migraphx::compile_options options;
    options.device_id=0;                          // 设置GPU设备，默认为0号设备
-    options.offload_copy=true;                    
+    options.offload_copy=true;                    // 设置offload_copy
    net.compile(gpuTarget,options);
-    LOG_INFO(stdout,"succeed to compile model: %s\n");                       
+    LOG_INFO(stdout,"succeed to compile model: %s\n",GetFileName(modelPath).c_str());                     

    return SUCCESS;
 }
@@ -72,7 +76,7 @@ long unsigned int GPT2::Inference(const std::vector<long unsigned int> &input_id
    std::vector<std::vector<std::size_t>> inputShapes;
    inputShapes.push_back({1,input_id.size()});

-    // 输入数据
+    // 创建输入数据
    std::unordered_map<std::string, migraphx::argument> inputData;
    inputData[inputName]=migraphx::argument{migraphx::shape(inputShape.type(),inputShapes[0]),(long unsigned int*)input};


--- a/Src/GPT2/gpt2.h
+++ b/Src/GPT2/gpt2.h
-#ifndef GPT2_H
-#define GPT2_H
+#ifndef __GPT2_H__
+#define __GPT2_H__

 #include <cstdint>
 #include <string>
 #include <migraphx/program.hpp>
-#include <CommonDefinition.h>
 #include <tokenization.h>

 namespace migraphxSamples
 {
+    typedef enum _ErrorCode
+    {
+        SUCCESS=0, 
+        MODEL_NOT_EXIST, 
+        CONFIG_FILE_NOT_EXIST, 
+        FAIL_TO_LOAD_MODEL, 
+        FAIL_TO_OPEN_CONFIG_FILE, 
+    }ErrorCode;
+
    typedef struct _Predictions
    {
        long unsigned int index;

--- a/Src/Utility/CommonDefinition.h
+++ b/Src/Utility/CommonDefinition.h
-// 常用数据类型和宏定义
+// 常用定义

 #ifndef __COMMON_DEFINITION_H__
 #define __COMMON_DEFINITION_H__

-#include <string>
 #include <opencv2/opencv.hpp>

-using namespace std;
-using namespace cv;
-
 namespace migraphxSamples
 {
   
@@ -21,20 +17,7 @@ namespace migraphxSamples

 #define CONFIG_FILE                                                     "../Resource/Configuration.xml"

-typedef struct  __Time
-{
-    string year;
-    string month;
-    string day;
-    string hour;
-    string minute;
-    string second;
-    string millisecond; // ms
-    string microsecond; // us
-    string weekDay;
-}_Time;
-
-typedef enum  _ErrorCode
+typedef enum _ErrorCode
 {
    SUCCESS=0,  // 0
    MODEL_NOT_EXIST, // 模型不存在
@@ -44,7 +27,7 @@ typedef enum  _ErrorCode
    IMAGE_ERROR, // 图像错误
 }ErrorCode;

-typedef struct  _ResultOfPrediction
+typedef struct _ResultOfPrediction
 {
    float confidence;
    int label;
@@ -52,24 +35,22 @@ typedef struct  _ResultOfPrediction

 }ResultOfPrediction;

-typedef struct  _ResultOfDetection
+typedef struct _ResultOfDetection
 {
-    Rect boundingBox;
+    cv::Rect boundingBox;
    float confidence;
    int classID;
-    string className;
+    std::string className;
    bool exist;

    _ResultOfDetection():confidence(0.0f),classID(0),exist(true){}

 }ResultOfDetection;

-typedef struct  _InitializationParameterOfDetector
+typedef struct _InitializationParameterOfDetector
 {
    std::string parentPath;
    std::string configFilePath;
-    cv::Size inputSize;
-    std::string logName;
 }InitializationParameterOfDetector;

 typedef struct _InitializationParameterOfDetector InitializationParameterOfClassifier;

--- a/Src/Utility/CommonUtility.cpp
+++ b/Src/Utility/CommonUtility.cpp
+#include <CommonUtility.h>
+
+namespace migraphxSamples
+{
+
+bool CompareConfidence(const ResultOfDetection &L,const ResultOfDetection &R)
+{
+    return L.confidence > R.confidence;
+}
+
+bool CompareArea(const ResultOfDetection &L,const ResultOfDetection &R)
+{
+    return L.boundingBox.area() > R.boundingBox.area();
+}
+
+void NMS(std::vector<ResultOfDetection> &detections, float IOUThreshold)
+{
+    // sort
+    std::sort(detections.begin(), detections.end(), CompareConfidence);
+
+    for (int i = 0; i<detections.size(); ++i)
+    {
+        if (detections[i].exist)
+        {
+            for (int j = i + 1; j<detections.size(); ++j)
+            {
+                if (detections[j].exist)
+                {
+                    // compute IOU
+                    float intersectionArea = (detections[i].boundingBox & detections[j].boundingBox).area();
+                    float intersectionRate = intersectionArea / (detections[i].boundingBox.area() + detections[j].boundingBox.area() - intersectionArea);
+
+                    if (intersectionRate>IOUThreshold)
+                    {
+                        detections[j].exist = false;
+                    }
+                }
+            }
+        }
+    }
+
+}
+
+}
--- a/Src/Utility/CommonUtility.h
+++ b/Src/Utility/CommonUtility.h
+// 常用工具
+
+#ifndef __COMMON_UTILITY_H__
+#define __COMMON_UTILITY_H__
+
+#include <CommonDefinition.h>
+
+namespace migraphxSamples
+{
+
+// 排序规则: 按照置信度或者按照面积排序
+bool CompareConfidence(const ResultOfDetection &L,const ResultOfDetection &R);
+bool CompareArea(const ResultOfDetection &L,const ResultOfDetection &R);
+
+// 非极大抑制
+void NMS(std::vector<ResultOfDetection> &detections, float IOUThreshold);
+
+}
+
+#endif
--- a/Src/Utility/Filesystem.cpp
+++ b/Src/Utility/Filesystem.cpp
+#include <Filesystem.h>
+#include <algorithm>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fstream>
+#ifdef _WIN32
+#include <io.h>
+#include <direct.h>
+#include <Windows.h>
+#else
+#include <unistd.h>
+#include <dirent.h>
+#endif
+
+// 路径分隔符(Linux:‘/’,Windows:’\\’)
+#ifdef _WIN32
+#define  PATH_SEPARATOR '\\'
+#else
+#define  PATH_SEPARATOR '/'
+#endif
+
+using namespace std;
+
+namespace migraphxSamples
+{
+
+static std::vector<std::string> SplitString(std::string str, std::string separator)
+{
+    std::string::size_type pos;
+    std::vector<std::string> result;
+    str+=separator;//扩展字符串以方便操作
+    int size=str.size();
+
+    for(int i=0; i<size; i++)
+    {
+        pos=str.find(separator,i);
+        if(pos<size)
+        {
+            std::string s=str.substr(i,pos-i);
+            result.push_back(s);
+            i=pos+separator.size()-1;
+        }
+    }
+    return result;
+}
+
+#if defined _WIN32 || defined WINCE
+    const char dir_separators[] = "/\\";
+
+	struct dirent
+	{
+		const char* d_name;
+	};
+
+	struct DIR
+	{
+#ifdef WINRT
+		WIN32_FIND_DATAW data;
+#else
+        WIN32_FIND_DATAA data;
+#endif
+		HANDLE handle;
+		dirent ent;
+#ifdef WINRT
+		DIR() { }
+		~DIR()
+		{
+			if (ent.d_name)
+				delete[] ent.d_name;
+		}
+#endif
+	};
+
+	DIR* opendir(const char* path)
+	{
+		DIR* dir = new DIR;
+		dir->ent.d_name = 0;
+#ifdef WINRT
+		string full_path = string(path) + "\\*";
+		wchar_t wfull_path[MAX_PATH];
+		size_t copied = mbstowcs(wfull_path, full_path.c_str(), MAX_PATH);
+		CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
+		dir->handle = ::FindFirstFileExW(wfull_path, FindExInfoStandard,
+			&dir->data, FindExSearchNameMatch, NULL, 0);
+#else
+		dir->handle = ::FindFirstFileExA((string(path) + "\\*").c_str(),
+			FindExInfoStandard, &dir->data, FindExSearchNameMatch, NULL, 0);
+#endif
+		if (dir->handle == INVALID_HANDLE_VALUE)
+		{
+			/*closedir will do all cleanup*/
+			delete dir;
+			return 0;
+		}
+		return dir;
+	}
+
+	dirent* readdir(DIR* dir)
+	{
+#ifdef WINRT
+		if (dir->ent.d_name != 0)
+		{
+			if (::FindNextFileW(dir->handle, &dir->data) != TRUE)
+				return 0;
+		}
+		size_t asize = wcstombs(NULL, dir->data.cFileName, 0);
+		CV_Assert((asize != 0) && (asize != (size_t)-1));
+		char* aname = new char[asize + 1];
+		aname[asize] = 0;
+		wcstombs(aname, dir->data.cFileName, asize);
+		dir->ent.d_name = aname;
+#else
+		if (dir->ent.d_name != 0)
+		{
+			if (::FindNextFileA(dir->handle, &dir->data) != TRUE)
+				return 0;
+		}
+		dir->ent.d_name = dir->data.cFileName;
+#endif
+		return &dir->ent;
+	}
+
+	void closedir(DIR* dir)
+	{
+		::FindClose(dir->handle);
+		delete dir;
+	}
+#else
+# include <dirent.h>
+# include <sys/stat.h>
+	const char dir_separators[] = "/";
+#endif
+
+	static bool isDir(const string &path, DIR* dir)
+	{
+#if defined _WIN32 || defined WINCE
+		DWORD attributes;
+		BOOL status = TRUE;
+		if (dir)
+			attributes = dir->data.dwFileAttributes;
+		else
+		{
+			WIN32_FILE_ATTRIBUTE_DATA all_attrs;
+#ifdef WINRT
+			wchar_t wpath[MAX_PATH];
+			size_t copied = mbstowcs(wpath, path.c_str(), MAX_PATH);
+			CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
+			status = ::GetFileAttributesExW(wpath, GetFileExInfoStandard, &all_attrs);
+#else
+			status = ::GetFileAttributesExA(path.c_str(), GetFileExInfoStandard, &all_attrs);
+#endif
+			attributes = all_attrs.dwFileAttributes;
+		}
+
+		return status && ((attributes & FILE_ATTRIBUTE_DIRECTORY) != 0);
+#else
+		(void)dir;
+		struct stat stat_buf;
+		if (0 != stat(path.c_str(), &stat_buf))
+			return false;
+		int is_dir = S_ISDIR(stat_buf.st_mode);
+		return is_dir != 0;
+#endif
+	}
+
+	bool IsDirectory(const string &path)
+	{
+		return isDir(path, NULL);
+	}
+
+	bool Exists(const string& path)
+	{
+
+#if defined _WIN32 || defined WINCE
+			BOOL status = TRUE;
+		{
+			WIN32_FILE_ATTRIBUTE_DATA all_attrs;
+#ifdef WINRT
+			wchar_t wpath[MAX_PATH];
+			size_t copied = mbstowcs(wpath, path.c_str(), MAX_PATH);
+			CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
+			status = ::GetFileAttributesExW(wpath, GetFileExInfoStandard, &all_attrs);
+#else
+			status = ::GetFileAttributesExA(path.c_str(), GetFileExInfoStandard, &all_attrs);
+#endif
+		}
+
+		return !!status;
+#else
+		struct stat stat_buf;
+		return (0 == stat(path.c_str(), &stat_buf));
+#endif
+	}
+
+    bool IsPathSeparator(char c)
+    {
+        return c == '/' || c == '\\';
+	}
+
+	string JoinPath(const string& base, const string& path)
+	{
+		if (base.empty())
+			return path;
+		if (path.empty())
+			return base;
+
+		bool baseSep = IsPathSeparator(base[base.size() - 1]);
+		bool pathSep = IsPathSeparator(path[0]);
+		string result;
+		if (baseSep && pathSep)
+		{
+			result = base + path.substr(1);
+		}
+		else if (!baseSep && !pathSep)
+		{
+            result = base + PATH_SEPARATOR + path;
+		}
+		else
+		{
+			result = base + path;
+		}
+		return result;
+	}
+
+	static bool wildcmp(const char *string, const char *wild)
+	{
+		const char *cp = 0, *mp = 0;
+
+		while ((*string) && (*wild != '*'))
+		{
+			if ((*wild != *string) && (*wild != '?'))
+			{
+				return false;
+			}
+
+			wild++;
+			string++;
+		}
+
+		while (*string)
+		{
+			if (*wild == '*')
+			{
+				if (!*++wild)
+				{
+					return true;
+				}
+
+				mp = wild;
+				cp = string + 1;
+			}
+			else if ((*wild == *string) || (*wild == '?'))
+			{
+				wild++;
+				string++;
+			}
+			else
+			{
+				wild = mp;
+				string = cp++;
+			}
+		}
+
+		while (*wild == '*')
+		{
+			wild++;
+		}
+
+		return *wild == 0;
+	}
+
+	static void glob_rec(const string &directory, const string& wildchart, std::vector<string>& result,
+		bool recursive, bool includeDirectories, const string& pathPrefix)
+	{
+		DIR *dir;
+
+		if ((dir = opendir(directory.c_str())) != 0)
+		{
+			/* find all the files and directories within directory */
+			try
+			{
+				struct dirent *ent;
+				while ((ent = readdir(dir)) != 0)
+				{
+					const char* name = ent->d_name;
+					if ((name[0] == 0) || (name[0] == '.' && name[1] == 0) || (name[0] == '.' && name[1] == '.' && name[2] == 0))
+						continue;
+
+					string path = JoinPath(directory, name);
+					string entry = JoinPath(pathPrefix, name);
+
+					if (isDir(path, dir))
+					{
+						if (recursive)
+							glob_rec(path, wildchart, result, recursive, includeDirectories, entry);
+						if (!includeDirectories)
+							continue;
+					}
+
+					if (wildchart.empty() || wildcmp(name, wildchart.c_str()))
+						result.push_back(entry);
+				}
+			}
+			catch (...)
+			{
+				closedir(dir);
+				throw;
+			}
+			closedir(dir);
+		}
+		else
+		{
+			printf("could not open directory: %s", directory.c_str());
+		}
+	}
+
+	void GetFileNameList(const string &directory, const string &pattern, std::vector<string>& result, bool recursive, bool addPath)
+	{
+        // split pattern
+        vector<string> patterns=SplitString(pattern,",");
+
+        result.clear();
+
+        for(int i=0;i<patterns.size();++i)
+        {
+            string eachPattern=patterns[i];
+            std::vector<string> eachResult;
+            glob_rec(directory, eachPattern, eachResult, recursive, true, directory);
+            for(int j=0;j<eachResult.size();++j)
+            {
+				if (IsDirectory(eachResult[j]))
+					continue;
+                if(addPath)
+                {
+                    result.push_back(eachResult[j]);
+                }
+                else
+                {
+                    result.push_back(GetFileName(eachResult[j]));
+                }
+            }
+        }
+		std::sort(result.begin(), result.end());
+	}
+
+	void GetFileNameList2(const string &directory, const string &pattern, std::vector<string>& result, bool recursive, bool addPath)
+	{
+		// split pattern
+		vector<string> patterns = SplitString(pattern, ",");
+
+        result.clear();
+
+		for (int i = 0; i<patterns.size(); ++i)
+		{
+			string eachPattern = patterns[i];
+			std::vector<string> eachResult;
+			glob_rec(directory, eachPattern, eachResult, recursive, true, directory);
+			for (int j = 0; j<eachResult.size(); ++j)
+			{
+				string filePath = eachResult[j];
+				if (IsDirectory(filePath))
+				{
+					filePath = filePath + "/";
+					for (int k = 0; k < filePath.size(); ++k)
+					{
+						if (IsPathSeparator(filePath[k]))
+						{
+							filePath[k] = '/';
+						}
+					}
+				}
+				if (addPath)
+				{
+					result.push_back(filePath);
+				}
+				else
+				{
+					if (!IsDirectory(filePath))
+					{
+						result.push_back(GetFileName(filePath));
+					}
+				}
+			}
+		}
+		std::sort(result.begin(), result.end());
+	}
+
+	void RemoveAll(const string& path)
+	{
+
+		if (!Exists(path))
+			return;
+
+		if (IsDirectory(path))
+		{
+			std::vector<string> entries;
+			GetFileNameList2(path, string(), entries, false, true);
+			for (size_t i = 0; i < entries.size(); i++)
+			{
+				const string& e = entries[i];
+				RemoveAll(e);
+			}
+#ifdef _MSC_VER
+			bool result = _rmdir(path.c_str()) == 0;
+#else
+			bool result = rmdir(path.c_str()) == 0;
+#endif
+			if (!result)
+			{
+				printf("can't remove directory: %s\n", path.c_str());
+			}
+		}
+		else
+		{
+#ifdef _MSC_VER
+			bool result = _unlink(path.c_str()) == 0;
+#else
+			bool result = unlink(path.c_str()) == 0;
+#endif
+			if (!result)
+			{
+				printf("can't remove file: %s\n", path.c_str());
+			}
+		}
+	}
+
+	void Remove(const string &directory, const string &extension)
+	{
+
+		DIR *dir;
+
+		static int numberOfFiles = 0;
+
+		if ((dir = opendir(directory.c_str())) != 0)
+		{
+			/* find all the files and directories within directory */
+			try
+			{
+				struct dirent *ent;
+				while ((ent = readdir(dir)) != 0)
+				{
+					const char* name = ent->d_name;
+					if ((name[0] == 0) || (name[0] == '.' && name[1] == 0) || (name[0] == '.' && name[1] == '.' && name[2] == 0))
+						continue;
+
+					string path = JoinPath(directory, name);
+
+					if (isDir(path, dir))
+					{
+						Remove(path, extension);
+					}
+
+					// �ж���չ��
+					if (extension.empty() || wildcmp(name, extension.c_str()))
+					{
+						RemoveAll(path);
+						++numberOfFiles;
+						printf("%s deleted! number of deleted files:%d\n", path.c_str(), numberOfFiles);
+					}
+
+				}
+			}
+			catch (...)
+			{
+				closedir(dir);
+				throw;
+			}
+			closedir(dir);
+		}
+		else
+		{
+			printf("could not open directory: %s", directory.c_str());
+		}
+
+		// ����RemoveAllɾ��Ŀ¼
+		RemoveAll(directory);
+	}
+	string GetFileName(const string &path)
+	{
+        string fileName;
+        int indexOfPathSeparator = -1;
+        for (int i = path.size() - 1; i >= 0; --i)
+        {
+            if (IsPathSeparator(path[i]))
+            {
+                fileName = path.substr(i + 1, path.size() - i - 1);
+                indexOfPathSeparator = i;
+                break;
+            }
+        }
+        if (indexOfPathSeparator == -1)
+        {
+            fileName = path;
+        }
+
+        return fileName;
+	}
+    string GetFileName_NoExtension(const string &path)
+    {
+        string fileName=GetFileName(path);
+        string fileName_NoExtension;
+        for(int i=fileName.size()-1;i>0;--i)
+        {
+            if(fileName[i]=='.')
+            {
+                fileName_NoExtension=fileName.substr(0,i);
+                break;
+            }
+        }
+
+        return fileName_NoExtension;
+    }
+
+	string GetExtension(const string &path)
+	{
+		string fileName;
+		for (int i = path.size() - 1; i >= 0; --i)
+		{
+			if (path[i]=='.')
+			{
+				fileName = path.substr(i, path.size() - i);
+				break;
+			}
+		}
+
+		return fileName;
+
+	}
+
+	string GetParentPath(const string &path)
+	{
+		string fileName;
+		for (int i = path.size() - 1; i >= 0; --i)
+		{
+			if (IsPathSeparator(path[i]))
+			{
+				fileName = path.substr(0, i+1);
+				break;
+			}
+		}
+
+		return fileName;
+	}
+
+    static bool CreateDirectory(const string &path)
+    {
+    #if defined WIN32 || defined _WIN32 || defined WINCE
+    #ifdef WINRT
+                wchar_t wpath[MAX_PATH];
+            size_t copied = mbstowcs(wpath, path.c_str(), MAX_PATH);
+            CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
+            int result = CreateDirectoryA(wpath, NULL) ? 0 : -1;
+    #else
+                int result = _mkdir(path.c_str());
+    #endif
+    #elif defined __linux__ || defined __APPLE__
+                int result = mkdir(path.c_str(), 0777);
+    #else
+                int result = -1;
+    #endif
+
+            if (result == -1)
+            {
+                return IsDirectory(path);
+            }
+            return true;
+    }
+
+	bool CreateDirectories(const string &directoryPath)
+	{
+		string path = directoryPath;
+
+		for (;;)
+		{
+			char last_char = path.empty() ? 0 : path[path.length() - 1];
+			if (IsPathSeparator(last_char))
+			{
+				path = path.substr(0, path.length() - 1);
+				continue;
+			}
+			break;
+		}
+
+		if (path.empty() || path == "./" || path == ".\\" || path == ".")
+			return true;
+		if (IsDirectory(path))
+			return true;
+
+		size_t pos = path.rfind('/');
+		if (pos == string::npos)
+			pos = path.rfind('\\');
+		if (pos != string::npos)
+		{
+			string parent_directory = path.substr(0, pos);
+			if (!parent_directory.empty())
+			{
+				if (!CreateDirectories(parent_directory))
+					return false;
+			}
+		}
+
+		return CreateDirectory(path);
+	}
+
+    bool CopyFile(const string srcPath, const string dstPath)
+    {
+        std::ifstream srcFile(srcPath,ios::binary);
+        std::ofstream dstFile(dstPath,ios::binary);
+
+        if(!srcFile.is_open())
+        {
+            printf("can not open %s\n",srcPath.c_str());
+            return false;
+        }
+        if(!dstFile.is_open())
+        {
+			printf("can not open %s\n", dstPath.c_str());
+            return false;
+        }
+        if(srcPath==dstPath)
+        {
+			printf("src can not be same with dst\n");
+            return false;
+        }
+        char buffer[2048];
+        unsigned int numberOfBytes=0;
+        while(srcFile)
+        {
+            srcFile.read(buffer,2048);
+            dstFile.write(buffer,srcFile.gcount());
+            numberOfBytes+=srcFile.gcount();
+        }
+        srcFile.close();
+        dstFile.close();
+        return true;
+    }
+
+    bool CopyDirectories(string srcPath, const string dstPath)
+    {
+        if(srcPath==dstPath)
+        {
+			printf("src can not be same with dst\n");
+            return false;
+        }
+
+		// ȥ������·���ָ���
+		srcPath = srcPath.substr(0, srcPath.size() - 1);
+
+        vector<string> fileNameList;
+        GetFileNameList2(srcPath, "", fileNameList, true, true);
+
+        string parentPathOfSrc=GetParentPath(srcPath);
+        int length=parentPathOfSrc.size();
+
+        // create all directories
+        for(int i=0;i<fileNameList.size();++i)
+        {
+            // create directory
+            string srcFilePath=fileNameList[i];
+            string subStr=srcFilePath.substr(length,srcFilePath.size()-length);
+            string dstFilePath=dstPath+subStr;
+            string parentPathOfDst=GetParentPath(dstFilePath);
+            CreateDirectories(parentPathOfDst);
+        }
+
+        // copy file
+        for(int i=0;i<fileNameList.size();++i)
+        {
+            string srcFilePath=fileNameList[i];
+			if (IsDirectory(srcFilePath))
+			{
+				continue;
+			}
+            string subStr=srcFilePath.substr(length,srcFilePath.size()-length);
+            string dstFilePath=dstPath+subStr;
+
+            // copy file
+            CopyFile(srcFilePath,dstFilePath);
+
+			// process
+			double process = (1.0*(i + 1) / fileNameList.size()) * 100;
+			printf("%s done! %f% \n", GetFileName(fileNameList[i]).c_str(), process);
+        }
+		printf("all done!(the number of files:%d)\n", fileNameList.size());
+
+        return true;
+
+
+    }
+
+}
+
+
--- a/Src/Utility/Filesystem.h
+++ b/Src/Utility/Filesystem.h
+// 文件以及目录处理
+
+#ifndef __FILE_SYSTEM_H__
+#define __FILE_SYSTEM_H__
+
+#include <string>
+#include <vector>
+ 
+namespace migraphxSamples
+{
+
+// 路径是否存在
+bool Exists(const std::string &path);
+
+// 路径是否为目录
+bool IsDirectory(const std::string &path);
+
+// 是否是路径分隔符(Linux:‘/’,Windows:’\\’)
+bool IsPathSeparator(char c);
+
+// 路径拼接
+std::string JoinPath(const std::string &base, const std::string &path);
+
+// 创建多级目录,注意：创建多级目录的时候，目标目录是不能有文件存在的
+bool CreateDirectories(const std::string &directoryPath);
+
+/** 生成符合指定模式的文件名列表(支持递归遍历)
+* 
+* pattern: 模式,比如"*.jpg","*.png","*.jpg,*.png"
+* addPath：是否包含父路径
+* 注意：
+    1. 多个模式使用","分割,比如"*.jpg,*.png"
+    2. 支持通配符'*','?' ,比如第一个字符是7的所有文件名:"7*.*", 以512结尾的所有jpg文件名："*512.jpg"
+    3. 使用"*.jpg"，而不是".jpg"
+    4. 空string表示返回所有结果
+    5. 不能返回子目录名
+*
+*/
+void GetFileNameList(const std::string &directory, const std::string &pattern, std::vector<std::string> &result, bool recursive, bool addPath);
+
+// 与GetFileNameList的区别在于如果有子目录，在addPath为true的时候会返回子目录路径(目录名最后有"/")
+void GetFileNameList2(const std::string &directory, const std::string &pattern, std::vector<std::string> &result, bool recursive, bool addPath);
+
+// 删除文件或者目录,支持递归删除
+void Remove(const std::string &directory, const std::string &extension="");
+
+/** 获取路径的文件名和扩展名
+ * 
+ *  示例：path为D:/1/1.txt,则GetFileName()为1.txt,GetFileName_NoExtension()为1,GetExtension()为.txt,GetParentPath()为D:/1/
+*/
+std::string GetFileName(const std::string &path);
+std::string GetFileName_NoExtension(const std::string &path); 
+std::string GetExtension(const std::string &path);
+std::string GetParentPath(const std::string &path);
+
+// 拷贝文件
+bool CopyFile(const std::string srcPath,const std::string dstPath);
+
+/** 拷贝目录
+ * 
+ * 示例：CopyDirectories("D:/0/1/2/","E:/3/");实现把D:/0/1/2/目录拷贝到E:/3/目录中(即拷贝完成后的目录结构为E:/3/2/)
+ * 注意：
+    1.第一个参数的最后不能加”/”
+    2.不能拷贝隐藏文件
+*/
+bool CopyDirectories(std::string srcPath,const std::string dstPath);
+
+}
+
+#endif
--- a/Src/Utility/SimpleLog.h
+++ b/Src/Utility/SimpleLog.h
@@ -19,7 +19,7 @@ using namespace std;

 /** 简易日志
 * 
- * 轻量级日志系统，不依赖于其他第三方库，只需要包含一个头文件就可以使用。提供了4种日志级别，包括INFO,DEBUG,WARN和ERROR。
+ * 不依赖于其他第三方库，只需要包含一个头文件就可以使用。提供了4种日志级别，包括INFO,DEBUG,WARN和ERROR。
 * 
 * 示例1:
    // 初始化日志，在./Log/目录下创建两个日志文件log1.log和log2.log(注意：目录./Log/需要存在，否则日志创建失败)

--- a/Src/main.cpp
+++ b/Src/main.cpp
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <gpt2.h>
+#include <GPT2.h>
 #include <fstream>
 #include <SimpleLog.h>
+#include <Filesystem.h>
 #include <tokenization.h>

 int main()
@@ -11,7 +12,7 @@ int main()
    // 加载GPT2模型
    migraphxSamples::GPT2 gpt2;
    migraphxSamples::ErrorCode errorCode = gpt2.Initialize();
-    if (errorCode != SUCCESS)
+    if (errorCode != migraphxSamples::SUCCESS)
    {
        LOG_ERROR(stdout, "fail to initialize GPT2!\n");
        exit(-1);
@@ -19,11 +20,11 @@ int main()
    LOG_INFO(stdout, "succeed to initialize GPT2\n");

    // 加载词汇表，用于编码和解码
-    cuBERT::FullTokenizer tokenizer = cuBERT::FullTokenizer("../Resource/Models/vocab_shici.txt");
+    cuBERT::FullTokenizer tokenizer = cuBERT::FullTokenizer("../Resource/vocab_shici.txt");
    std::ifstream infile;
    std::string buf;
    std::vector<std::string> output;
-    infile.open("../Resource/Models/vocab_shici.txt");
+    infile.open("../Resource/vocab_shici.txt");
    while (std::getline(infile,buf))
    {
        output.push_back(buf);

--- a/requirements.txt
+++ b/requirements.txt
-./3rdParty/opencv-3.4.11_mini.tar.gz
-