Commit 5c098b75 authored by liucong's avatar liucong
Browse files

精简代码

parent 33430a56
......@@ -12,7 +12,6 @@ set(CMAKE_BUILD_TYPE release)
# 添加头文件路径
set(INCLUDE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/Src/
${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/
${CMAKE_CURRENT_SOURCE_DIR}/Src/GPT2/
$ENV{DTKROOT}/include/
${CMAKE_CURRENT_SOURCE_DIR}/depend/include/)
include_directories(${INCLUDE_PATH})
......@@ -33,10 +32,9 @@ link_libraries(${LIBRARY})
# 添加源文件
set(SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/Src/main.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Src/GPT2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/tokenization.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/utf8proc.c
${CMAKE_CURRENT_SOURCE_DIR}/Src/GPT2/gpt2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/CommonUtility.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Src/Utility/Filesystem.cpp)
# 添加可执行目标
......
......@@ -12,9 +12,9 @@ GPT(Generative Pre-trained Transformer)系列模型以不断堆叠transforme
| GPT-2 | 2019 年 2 月 | 15 亿 | 40GB |
| GPT-3 | 2020 年 5 月 | 1,750 亿 | 45TB |
本次采用GPT-2模型进行诗词生成任务,模型文件下载链接:https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw, 提取码:4pmh。将GPT2_shici.onnx模型文件保存在Resource/Models/NLP/GPT2文件夹下。整体模型结构如下图所示,也可以通过netron工具:https://netron.app/ 查看GPT-2的模型结构。
本次采用GPT-2模型进行诗词生成任务,模型文件下载链接:https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw, 提取码:4pmh。将GPT2_shici.onnx模型文件保存在Resource/文件夹下。整体模型结构如下图所示,也可以通过netron工具:https://netron.app/ 查看GPT-2的模型结构。
<img src="../Images/GPT_01.png" style="zoom:100%;" align=middle>
<img src="./Images/GPT_01.png" style="zoom:100%;" align=middle>
## 预处理
......@@ -27,11 +27,11 @@ GPT(Generative Pre-trained Transformer)系列模型以不断堆叠transforme
首先,根据提供的词汇表路径,通过cuBERT::FullTokenizer()函数加载词汇表,用于后续对输入文本的编码操作。其次,将词汇表中的内容依次保存到vector容器output中,用于数据后处理中的解码操作。
```c++
cuBERT::FullTokenizer tokenizer = cuBERT::FullTokenizer("../Resource/Models/vocab_shici.txt");
cuBERT::FullTokenizer tokenizer = cuBERT::FullTokenizer("../Resource/vocab_shici.txt");
std::ifstream infile;
std::string buf;
std::vector<std::string> output;
infile.open("../Resource/Models/vocab_shici.txt");
infile.open("../Resource/vocab_shici.txt");
while (std::getline(infile,buf))
{
output.push_back(buf);
......@@ -65,7 +65,7 @@ ErrorCode GPT2::Preprocessing(cuBERT::FullTokenizer tokenizer,
对于GPT-2这种生成式语言模型来说,模型不是仅执行一次推理就结束,而是需要执行多次推理,才能得到最终的答案。如下图所示,GPT-2模型每次推理仅生成一个词,通过将生成的词与输入数据拼接,输入到模型中继续下一次的推理,直到循环结果或者生成[SEP]结束标识符才结束推理。
<img src="../Images/GPT_02.png" style="zoom:70%;" align=middle>
<img src="./Images/GPT_02.png" style="zoom:70%;" align=middle>
具体GPT-2模型的推理,如下代码所示。首先,通过gpt2.Inference()函数实现模型的具体推理细节,推理结果保存在outputs中。其次,对每次推理结果进行判断,当判断为[SEP]结束标志符时,结束循环完成推理,否则就将推理结果outputs加入到输入数据input_id中,继续下一次的模型推理。
......@@ -97,7 +97,7 @@ long unsigned int GPT2::Inference(const std::vector<long unsigned int> &input_id
input[0][j] = input_id[j];
}
// 设置输入shape
// 输入shape
std::vector<std::vector<std::size_t>> inputShapes;
inputShapes.push_back({1,input_id.size()});
......
......@@ -12,9 +12,9 @@ GPT(Generative Pre-trained Transformer)系列模型以不断堆叠的transfo
| GPT-2 | 2019 年 2 月 | 15 亿 | 40GB |
| GPT-3 | 2020 年 5 月 | 1,750 亿 | 45TB |
本次采用GPT-2模型进行诗词生成任务,模型文件下载链接:https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw , 提取码:4pmh 。将GPT2_shici.onnx模型文件保存在Resource/Models/NLP/GPT2文件夹下。整体模型结构如下图所示,也可以通过netron工具:https://netron.app/ 查看GPT-2的模型结构。
本次采用GPT-2模型进行诗词生成任务,模型文件下载链接:https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw , 提取码:4pmh 。将GPT2_shici.onnx模型文件保存在Resource/文件夹下。整体模型结构如下图所示,也可以通过netron工具:https://netron.app/ 查看GPT-2的模型结构。
<img src="../Images/GPT_01.png" style="zoom:100%;" align=middle>
<img src="./Images/GPT_01.png" style="zoom:100%;" align=middle>
## 预处理
......@@ -28,7 +28,7 @@ GPT(Generative Pre-trained Transformer)系列模型以不断堆叠的transfo
```python
# 加载词汇表
vocab_file = os.path.join('../../../Resource/Models', 'vocab_shici.txt')
vocab_file = os.path.join('../Resource/', 'vocab_shici.txt')
tokenizer = BertTokenizerFast(vocab_file, sep_token="[SEP]", pad_token="[PAD]", cls_token="[CLS]")
```
......@@ -83,7 +83,7 @@ for _ in range(max_len):
2.循环推理,GPT-2模型不像其他模型一样只需要执行一次推理,而是需要循环执行多次推理才能完成。首先,模型推理限定在for循环中,将输入数据input_ids,输入到model.run({...})中执行推理,生成一个token的id。其次,将推理结果拼接到输入数据input_ids中,执行下一次循环。最后,当循环结束或者生成的词为[SEP]结束标志符时,完成GPT-2模型的整体推理。如下图所示,为GPT-2模型的一次完整推理过程。
<img src="../Images/GPT_02.png" style="zoom:70%;" align=middle>
<img src="./Images/GPT_02.png" style="zoom:70%;" align=middle>
## 数据后处理
......
......@@ -5,7 +5,7 @@ import migraphx
# 加载词汇表
print("INFO: Complete loading the vocabulary")
vocab_file = os.path.join('../Resource/Models', 'vocab_shici.txt')
vocab_file = os.path.join('../Resource/', 'vocab_shici.txt')
tokenizer = BertTokenizerFast(vocab_file, sep_token="[SEP]", pad_token="[PAD]", cls_token="[CLS]")
# 设置最大输入shape
......@@ -13,7 +13,7 @@ maxInput={"input":[1,1000]}
# 加载模型
print("INFO: Parsing and compiling the model")
model = migraphx.parse_onnx("../Resource/Models/GPT2_shici.onnx", map_input_dims=maxInput)
model = migraphx.parse_onnx("../Resource/GPT2_shici.onnx", map_input_dims=maxInput)
inputName=model.get_parameter_names()[0]
inputShape=model.get_parameter_shapes()[inputName].lens()
print("inputName:{0} \ninputShape:{1}".format(inputName,inputShape))
......
......@@ -8,7 +8,7 @@ GPT2主要使用Transformer的Decoder模块为特征提取器,并对Transforme
## Python版本推理
本次采用GPT-2模型进行诗词生成任务,模型文件下载链接:https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw , 提取码:4pmh ,并将GPT2_shici.onnx模型文件保存在Resource/model文件夹下。下面介绍如何运行python代码示例,具体推理代码解析,在Doc/Tutorial_Python目录中有详细说明。
本次采用GPT-2模型进行诗词生成任务,模型文件下载链接:https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw , 提取码:4pmh ,并将GPT2_shici.onnx模型文件保存在Resource/文件夹下。下面介绍如何运行python代码示例,具体推理代码解析,在Doc/Tutorial_Python.md目录中有详细说明。
### 安装镜像
......@@ -20,13 +20,13 @@ docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:ort1.14.0_migraphx3.0
### 执行推理
1.参考《MIGraphX教程》中的安装方法安装MIGraphX并设置好PYTHONPATH
1.参考《MIGraphX教程》设置好PYTHONPATH
2.安装依赖:
```python
# 进入migraphx samples工程根目录
cd <path_to_migraphx_samples>
# 进入gpt2 migraphx工程根目录
cd <path_to_gpt2_migraphx>
# 进入示例程序目录
cd ./Python/
......@@ -79,13 +79,13 @@ rbuild build -d depend
**Centos**:
```
export LD_LIBRARY_PATH=<path_to_migraphx_samples>/depend/lib64/:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=<path_to_gpt2_migraphx>/depend/lib64/:$LD_LIBRARY_PATH
```
**Ubuntu**:
```
export LD_LIBRARY_PATH=<path_to_migraphx_samples>/depend/lib/:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=<path_to_gpt2_migraphx>/depend/lib/:$LD_LIBRARY_PATH
```
然后执行:
......@@ -99,8 +99,8 @@ source ~/.bashrc
运行GPT2示例程序,具体执行如下命令:
```python
# 进入migraphx samples工程根目录
cd <path_to_migraphx_samples>
# 进入gpt2 migraphx工程根目录
cd <path_to_gpt2_migraphx>
# 进入build目录
cd ./build/
......
#include <fstream>
#include <sstream>
#include <GPT2.h>
#include <migraphx/onnx.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/reshape2.hpp>
#include <Filesystem.h>
#include <SimpleLog.h>
#include <algorithm>
#include <string>
#include <stdexcept>
#include <gpt2.h>
#include <tokenization.h>
namespace migraphxSamples
......@@ -25,16 +24,21 @@ GPT2::~GPT2()
ErrorCode GPT2::Initialize()
{
// 设置模型路径
std::string modelPath="../Resource/Models/GPT2_shici.onnx";
// 获取模型文件
std::string modelPath="../Resource/GPT2_shici.onnx";
// 设置最大输入shape
migraphx::onnx_options onnx_options;
onnx_options.map_input_dims["input"]={1,1000};
// 加载模型
if(Exists(modelPath)==false)
{
LOG_ERROR(stdout,"%s not exist!\n",modelPath.c_str());
return MODEL_NOT_EXIST;
}
net = migraphx::parse_onnx(modelPath, onnx_options);
LOG_INFO(stdout,"succeed to load model: GPT2_shici\n");
LOG_INFO(stdout,"succeed to load model: %s\n",GetFileName(modelPath).c_str());
// 获取模型输入属性
std::unordered_map<std::string, migraphx::shape> inputMap=net.get_parameter_shapes();
......@@ -47,9 +51,9 @@ ErrorCode GPT2::Initialize()
// 编译模型
migraphx::compile_options options;
options.device_id=0; // 设置GPU设备,默认为0号设备
options.offload_copy=true;
options.offload_copy=true; // 设置offload_copy
net.compile(gpuTarget,options);
LOG_INFO(stdout,"succeed to compile model: %s\n");
LOG_INFO(stdout,"succeed to compile model: %s\n",GetFileName(modelPath).c_str());
return SUCCESS;
}
......@@ -72,7 +76,7 @@ long unsigned int GPT2::Inference(const std::vector<long unsigned int> &input_id
std::vector<std::vector<std::size_t>> inputShapes;
inputShapes.push_back({1,input_id.size()});
// 输入数据
// 创建输入数据
std::unordered_map<std::string, migraphx::argument> inputData;
inputData[inputName]=migraphx::argument{migraphx::shape(inputShape.type(),inputShapes[0]),(long unsigned int*)input};
......
#ifndef GPT2_H
#define GPT2_H
#ifndef __GPT2_H__
#define __GPT2_H__
#include <cstdint>
#include <string>
#include <migraphx/program.hpp>
#include <CommonDefinition.h>
#include <tokenization.h>
namespace migraphxSamples
{
typedef enum _ErrorCode
{
SUCCESS=0,
MODEL_NOT_EXIST,
CONFIG_FILE_NOT_EXIST,
FAIL_TO_LOAD_MODEL,
FAIL_TO_OPEN_CONFIG_FILE,
}ErrorCode;
typedef struct _Predictions
{
long unsigned int index;
......
// 常用数据类型和宏定义
// 常用定义
#ifndef __COMMON_DEFINITION_H__
#define __COMMON_DEFINITION_H__
#include <string>
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;
namespace migraphxSamples
{
......@@ -21,20 +17,7 @@ namespace migraphxSamples
#define CONFIG_FILE "../Resource/Configuration.xml"
typedef struct __Time
{
string year;
string month;
string day;
string hour;
string minute;
string second;
string millisecond; // ms
string microsecond; // us
string weekDay;
}_Time;
typedef enum _ErrorCode
typedef enum _ErrorCode
{
SUCCESS=0, // 0
MODEL_NOT_EXIST, // 模型不存在
......@@ -44,7 +27,7 @@ typedef enum _ErrorCode
IMAGE_ERROR, // 图像错误
}ErrorCode;
typedef struct _ResultOfPrediction
typedef struct _ResultOfPrediction
{
float confidence;
int label;
......@@ -52,24 +35,22 @@ typedef struct _ResultOfPrediction
}ResultOfPrediction;
typedef struct _ResultOfDetection
typedef struct _ResultOfDetection
{
Rect boundingBox;
cv::Rect boundingBox;
float confidence;
int classID;
string className;
std::string className;
bool exist;
_ResultOfDetection():confidence(0.0f),classID(0),exist(true){}
}ResultOfDetection;
typedef struct _InitializationParameterOfDetector
typedef struct _InitializationParameterOfDetector
{
std::string parentPath;
std::string configFilePath;
cv::Size inputSize;
std::string logName;
}InitializationParameterOfDetector;
typedef struct _InitializationParameterOfDetector InitializationParameterOfClassifier;
......
#include <CommonUtility.h>
namespace migraphxSamples
{
bool CompareConfidence(const ResultOfDetection &L,const ResultOfDetection &R)
{
return L.confidence > R.confidence;
}
bool CompareArea(const ResultOfDetection &L,const ResultOfDetection &R)
{
return L.boundingBox.area() > R.boundingBox.area();
}
void NMS(std::vector<ResultOfDetection> &detections, float IOUThreshold)
{
// sort
std::sort(detections.begin(), detections.end(), CompareConfidence);
for (int i = 0; i<detections.size(); ++i)
{
if (detections[i].exist)
{
for (int j = i + 1; j<detections.size(); ++j)
{
if (detections[j].exist)
{
// compute IOU
float intersectionArea = (detections[i].boundingBox & detections[j].boundingBox).area();
float intersectionRate = intersectionArea / (detections[i].boundingBox.area() + detections[j].boundingBox.area() - intersectionArea);
if (intersectionRate>IOUThreshold)
{
detections[j].exist = false;
}
}
}
}
}
}
}
// 常用工具
#ifndef __COMMON_UTILITY_H__
#define __COMMON_UTILITY_H__
#include <CommonDefinition.h>
namespace migraphxSamples
{
// 排序规则: 按照置信度或者按照面积排序
bool CompareConfidence(const ResultOfDetection &L,const ResultOfDetection &R);
bool CompareArea(const ResultOfDetection &L,const ResultOfDetection &R);
// 非极大抑制
void NMS(std::vector<ResultOfDetection> &detections, float IOUThreshold);
}
#endif
#include <Filesystem.h>
#include <algorithm>
#include <sys/stat.h>
#include <sys/types.h>
#include <fstream>
#ifdef _WIN32
#include <io.h>
#include <direct.h>
#include <Windows.h>
#else
#include <unistd.h>
#include <dirent.h>
#endif
// 路径分隔符(Linux:‘/’,Windows:’\\’)
#ifdef _WIN32
#define PATH_SEPARATOR '\\'
#else
#define PATH_SEPARATOR '/'
#endif
using namespace std;
namespace migraphxSamples
{
static std::vector<std::string> SplitString(std::string str, std::string separator)
{
std::string::size_type pos;
std::vector<std::string> result;
str+=separator;//扩展字符串以方便操作
int size=str.size();
for(int i=0; i<size; i++)
{
pos=str.find(separator,i);
if(pos<size)
{
std::string s=str.substr(i,pos-i);
result.push_back(s);
i=pos+separator.size()-1;
}
}
return result;
}
#if defined _WIN32 || defined WINCE
const char dir_separators[] = "/\\";
struct dirent
{
const char* d_name;
};
struct DIR
{
#ifdef WINRT
WIN32_FIND_DATAW data;
#else
WIN32_FIND_DATAA data;
#endif
HANDLE handle;
dirent ent;
#ifdef WINRT
DIR() { }
~DIR()
{
if (ent.d_name)
delete[] ent.d_name;
}
#endif
};
DIR* opendir(const char* path)
{
DIR* dir = new DIR;
dir->ent.d_name = 0;
#ifdef WINRT
string full_path = string(path) + "\\*";
wchar_t wfull_path[MAX_PATH];
size_t copied = mbstowcs(wfull_path, full_path.c_str(), MAX_PATH);
CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
dir->handle = ::FindFirstFileExW(wfull_path, FindExInfoStandard,
&dir->data, FindExSearchNameMatch, NULL, 0);
#else
dir->handle = ::FindFirstFileExA((string(path) + "\\*").c_str(),
FindExInfoStandard, &dir->data, FindExSearchNameMatch, NULL, 0);
#endif
if (dir->handle == INVALID_HANDLE_VALUE)
{
/*closedir will do all cleanup*/
delete dir;
return 0;
}
return dir;
}
dirent* readdir(DIR* dir)
{
#ifdef WINRT
if (dir->ent.d_name != 0)
{
if (::FindNextFileW(dir->handle, &dir->data) != TRUE)
return 0;
}
size_t asize = wcstombs(NULL, dir->data.cFileName, 0);
CV_Assert((asize != 0) && (asize != (size_t)-1));
char* aname = new char[asize + 1];
aname[asize] = 0;
wcstombs(aname, dir->data.cFileName, asize);
dir->ent.d_name = aname;
#else
if (dir->ent.d_name != 0)
{
if (::FindNextFileA(dir->handle, &dir->data) != TRUE)
return 0;
}
dir->ent.d_name = dir->data.cFileName;
#endif
return &dir->ent;
}
void closedir(DIR* dir)
{
::FindClose(dir->handle);
delete dir;
}
#else
# include <dirent.h>
# include <sys/stat.h>
const char dir_separators[] = "/";
#endif
static bool isDir(const string &path, DIR* dir)
{
#if defined _WIN32 || defined WINCE
DWORD attributes;
BOOL status = TRUE;
if (dir)
attributes = dir->data.dwFileAttributes;
else
{
WIN32_FILE_ATTRIBUTE_DATA all_attrs;
#ifdef WINRT
wchar_t wpath[MAX_PATH];
size_t copied = mbstowcs(wpath, path.c_str(), MAX_PATH);
CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
status = ::GetFileAttributesExW(wpath, GetFileExInfoStandard, &all_attrs);
#else
status = ::GetFileAttributesExA(path.c_str(), GetFileExInfoStandard, &all_attrs);
#endif
attributes = all_attrs.dwFileAttributes;
}
return status && ((attributes & FILE_ATTRIBUTE_DIRECTORY) != 0);
#else
(void)dir;
struct stat stat_buf;
if (0 != stat(path.c_str(), &stat_buf))
return false;
int is_dir = S_ISDIR(stat_buf.st_mode);
return is_dir != 0;
#endif
}
bool IsDirectory(const string &path)
{
return isDir(path, NULL);
}
bool Exists(const string& path)
{
#if defined _WIN32 || defined WINCE
BOOL status = TRUE;
{
WIN32_FILE_ATTRIBUTE_DATA all_attrs;
#ifdef WINRT
wchar_t wpath[MAX_PATH];
size_t copied = mbstowcs(wpath, path.c_str(), MAX_PATH);
CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
status = ::GetFileAttributesExW(wpath, GetFileExInfoStandard, &all_attrs);
#else
status = ::GetFileAttributesExA(path.c_str(), GetFileExInfoStandard, &all_attrs);
#endif
}
return !!status;
#else
struct stat stat_buf;
return (0 == stat(path.c_str(), &stat_buf));
#endif
}
bool IsPathSeparator(char c)
{
return c == '/' || c == '\\';
}
string JoinPath(const string& base, const string& path)
{
if (base.empty())
return path;
if (path.empty())
return base;
bool baseSep = IsPathSeparator(base[base.size() - 1]);
bool pathSep = IsPathSeparator(path[0]);
string result;
if (baseSep && pathSep)
{
result = base + path.substr(1);
}
else if (!baseSep && !pathSep)
{
result = base + PATH_SEPARATOR + path;
}
else
{
result = base + path;
}
return result;
}
static bool wildcmp(const char *string, const char *wild)
{
const char *cp = 0, *mp = 0;
while ((*string) && (*wild != '*'))
{
if ((*wild != *string) && (*wild != '?'))
{
return false;
}
wild++;
string++;
}
while (*string)
{
if (*wild == '*')
{
if (!*++wild)
{
return true;
}
mp = wild;
cp = string + 1;
}
else if ((*wild == *string) || (*wild == '?'))
{
wild++;
string++;
}
else
{
wild = mp;
string = cp++;
}
}
while (*wild == '*')
{
wild++;
}
return *wild == 0;
}
static void glob_rec(const string &directory, const string& wildchart, std::vector<string>& result,
bool recursive, bool includeDirectories, const string& pathPrefix)
{
DIR *dir;
if ((dir = opendir(directory.c_str())) != 0)
{
/* find all the files and directories within directory */
try
{
struct dirent *ent;
while ((ent = readdir(dir)) != 0)
{
const char* name = ent->d_name;
if ((name[0] == 0) || (name[0] == '.' && name[1] == 0) || (name[0] == '.' && name[1] == '.' && name[2] == 0))
continue;
string path = JoinPath(directory, name);
string entry = JoinPath(pathPrefix, name);
if (isDir(path, dir))
{
if (recursive)
glob_rec(path, wildchart, result, recursive, includeDirectories, entry);
if (!includeDirectories)
continue;
}
if (wildchart.empty() || wildcmp(name, wildchart.c_str()))
result.push_back(entry);
}
}
catch (...)
{
closedir(dir);
throw;
}
closedir(dir);
}
else
{
printf("could not open directory: %s", directory.c_str());
}
}
void GetFileNameList(const string &directory, const string &pattern, std::vector<string>& result, bool recursive, bool addPath)
{
// split pattern
vector<string> patterns=SplitString(pattern,",");
result.clear();
for(int i=0;i<patterns.size();++i)
{
string eachPattern=patterns[i];
std::vector<string> eachResult;
glob_rec(directory, eachPattern, eachResult, recursive, true, directory);
for(int j=0;j<eachResult.size();++j)
{
if (IsDirectory(eachResult[j]))
continue;
if(addPath)
{
result.push_back(eachResult[j]);
}
else
{
result.push_back(GetFileName(eachResult[j]));
}
}
}
std::sort(result.begin(), result.end());
}
void GetFileNameList2(const string &directory, const string &pattern, std::vector<string>& result, bool recursive, bool addPath)
{
// split pattern
vector<string> patterns = SplitString(pattern, ",");
result.clear();
for (int i = 0; i<patterns.size(); ++i)
{
string eachPattern = patterns[i];
std::vector<string> eachResult;
glob_rec(directory, eachPattern, eachResult, recursive, true, directory);
for (int j = 0; j<eachResult.size(); ++j)
{
string filePath = eachResult[j];
if (IsDirectory(filePath))
{
filePath = filePath + "/";
for (int k = 0; k < filePath.size(); ++k)
{
if (IsPathSeparator(filePath[k]))
{
filePath[k] = '/';
}
}
}
if (addPath)
{
result.push_back(filePath);
}
else
{
if (!IsDirectory(filePath))
{
result.push_back(GetFileName(filePath));
}
}
}
}
std::sort(result.begin(), result.end());
}
void RemoveAll(const string& path)
{
if (!Exists(path))
return;
if (IsDirectory(path))
{
std::vector<string> entries;
GetFileNameList2(path, string(), entries, false, true);
for (size_t i = 0; i < entries.size(); i++)
{
const string& e = entries[i];
RemoveAll(e);
}
#ifdef _MSC_VER
bool result = _rmdir(path.c_str()) == 0;
#else
bool result = rmdir(path.c_str()) == 0;
#endif
if (!result)
{
printf("can't remove directory: %s\n", path.c_str());
}
}
else
{
#ifdef _MSC_VER
bool result = _unlink(path.c_str()) == 0;
#else
bool result = unlink(path.c_str()) == 0;
#endif
if (!result)
{
printf("can't remove file: %s\n", path.c_str());
}
}
}
void Remove(const string &directory, const string &extension)
{
DIR *dir;
static int numberOfFiles = 0;
if ((dir = opendir(directory.c_str())) != 0)
{
/* find all the files and directories within directory */
try
{
struct dirent *ent;
while ((ent = readdir(dir)) != 0)
{
const char* name = ent->d_name;
if ((name[0] == 0) || (name[0] == '.' && name[1] == 0) || (name[0] == '.' && name[1] == '.' && name[2] == 0))
continue;
string path = JoinPath(directory, name);
if (isDir(path, dir))
{
Remove(path, extension);
}
// �ж���չ��
if (extension.empty() || wildcmp(name, extension.c_str()))
{
RemoveAll(path);
++numberOfFiles;
printf("%s deleted! number of deleted files:%d\n", path.c_str(), numberOfFiles);
}
}
}
catch (...)
{
closedir(dir);
throw;
}
closedir(dir);
}
else
{
printf("could not open directory: %s", directory.c_str());
}
// ����RemoveAllɾ��Ŀ¼
RemoveAll(directory);
}
string GetFileName(const string &path)
{
string fileName;
int indexOfPathSeparator = -1;
for (int i = path.size() - 1; i >= 0; --i)
{
if (IsPathSeparator(path[i]))
{
fileName = path.substr(i + 1, path.size() - i - 1);
indexOfPathSeparator = i;
break;
}
}
if (indexOfPathSeparator == -1)
{
fileName = path;
}
return fileName;
}
string GetFileName_NoExtension(const string &path)
{
string fileName=GetFileName(path);
string fileName_NoExtension;
for(int i=fileName.size()-1;i>0;--i)
{
if(fileName[i]=='.')
{
fileName_NoExtension=fileName.substr(0,i);
break;
}
}
return fileName_NoExtension;
}
string GetExtension(const string &path)
{
string fileName;
for (int i = path.size() - 1; i >= 0; --i)
{
if (path[i]=='.')
{
fileName = path.substr(i, path.size() - i);
break;
}
}
return fileName;
}
string GetParentPath(const string &path)
{
string fileName;
for (int i = path.size() - 1; i >= 0; --i)
{
if (IsPathSeparator(path[i]))
{
fileName = path.substr(0, i+1);
break;
}
}
return fileName;
}
static bool CreateDirectory(const string &path)
{
#if defined WIN32 || defined _WIN32 || defined WINCE
#ifdef WINRT
wchar_t wpath[MAX_PATH];
size_t copied = mbstowcs(wpath, path.c_str(), MAX_PATH);
CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1));
int result = CreateDirectoryA(wpath, NULL) ? 0 : -1;
#else
int result = _mkdir(path.c_str());
#endif
#elif defined __linux__ || defined __APPLE__
int result = mkdir(path.c_str(), 0777);
#else
int result = -1;
#endif
if (result == -1)
{
return IsDirectory(path);
}
return true;
}
bool CreateDirectories(const string &directoryPath)
{
string path = directoryPath;
for (;;)
{
char last_char = path.empty() ? 0 : path[path.length() - 1];
if (IsPathSeparator(last_char))
{
path = path.substr(0, path.length() - 1);
continue;
}
break;
}
if (path.empty() || path == "./" || path == ".\\" || path == ".")
return true;
if (IsDirectory(path))
return true;
size_t pos = path.rfind('/');
if (pos == string::npos)
pos = path.rfind('\\');
if (pos != string::npos)
{
string parent_directory = path.substr(0, pos);
if (!parent_directory.empty())
{
if (!CreateDirectories(parent_directory))
return false;
}
}
return CreateDirectory(path);
}
bool CopyFile(const string srcPath, const string dstPath)
{
std::ifstream srcFile(srcPath,ios::binary);
std::ofstream dstFile(dstPath,ios::binary);
if(!srcFile.is_open())
{
printf("can not open %s\n",srcPath.c_str());
return false;
}
if(!dstFile.is_open())
{
printf("can not open %s\n", dstPath.c_str());
return false;
}
if(srcPath==dstPath)
{
printf("src can not be same with dst\n");
return false;
}
char buffer[2048];
unsigned int numberOfBytes=0;
while(srcFile)
{
srcFile.read(buffer,2048);
dstFile.write(buffer,srcFile.gcount());
numberOfBytes+=srcFile.gcount();
}
srcFile.close();
dstFile.close();
return true;
}
bool CopyDirectories(string srcPath, const string dstPath)
{
if(srcPath==dstPath)
{
printf("src can not be same with dst\n");
return false;
}
// ȥ������·���ָ���
srcPath = srcPath.substr(0, srcPath.size() - 1);
vector<string> fileNameList;
GetFileNameList2(srcPath, "", fileNameList, true, true);
string parentPathOfSrc=GetParentPath(srcPath);
int length=parentPathOfSrc.size();
// create all directories
for(int i=0;i<fileNameList.size();++i)
{
// create directory
string srcFilePath=fileNameList[i];
string subStr=srcFilePath.substr(length,srcFilePath.size()-length);
string dstFilePath=dstPath+subStr;
string parentPathOfDst=GetParentPath(dstFilePath);
CreateDirectories(parentPathOfDst);
}
// copy file
for(int i=0;i<fileNameList.size();++i)
{
string srcFilePath=fileNameList[i];
if (IsDirectory(srcFilePath))
{
continue;
}
string subStr=srcFilePath.substr(length,srcFilePath.size()-length);
string dstFilePath=dstPath+subStr;
// copy file
CopyFile(srcFilePath,dstFilePath);
// process
double process = (1.0*(i + 1) / fileNameList.size()) * 100;
printf("%s done! %f% \n", GetFileName(fileNameList[i]).c_str(), process);
}
printf("all done!(the number of files:%d)\n", fileNameList.size());
return true;
}
}
// 文件以及目录处理
#ifndef __FILE_SYSTEM_H__
#define __FILE_SYSTEM_H__
#include <string>
#include <vector>
namespace migraphxSamples
{
// 路径是否存在
bool Exists(const std::string &path);
// 路径是否为目录
bool IsDirectory(const std::string &path);
// 是否是路径分隔符(Linux:‘/’,Windows:’\\’)
bool IsPathSeparator(char c);
// 路径拼接
std::string JoinPath(const std::string &base, const std::string &path);
// 创建多级目录,注意:创建多级目录的时候,目标目录是不能有文件存在的
bool CreateDirectories(const std::string &directoryPath);
/** 生成符合指定模式的文件名列表(支持递归遍历)
*
* pattern: 模式,比如"*.jpg","*.png","*.jpg,*.png"
* addPath:是否包含父路径
* 注意:
1. 多个模式使用","分割,比如"*.jpg,*.png"
2. 支持通配符'*','?' ,比如第一个字符是7的所有文件名:"7*.*", 以512结尾的所有jpg文件名:"*512.jpg"
3. 使用"*.jpg",而不是".jpg"
4. 空string表示返回所有结果
5. 不能返回子目录名
*
*/
void GetFileNameList(const std::string &directory, const std::string &pattern, std::vector<std::string> &result, bool recursive, bool addPath);
// 与GetFileNameList的区别在于如果有子目录,在addPath为true的时候会返回子目录路径(目录名最后有"/")
void GetFileNameList2(const std::string &directory, const std::string &pattern, std::vector<std::string> &result, bool recursive, bool addPath);
// 删除文件或者目录,支持递归删除
void Remove(const std::string &directory, const std::string &extension="");
/** 获取路径的文件名和扩展名
*
* 示例:path为D:/1/1.txt,则GetFileName()为1.txt,GetFileName_NoExtension()为1,GetExtension()为.txt,GetParentPath()为D:/1/
*/
std::string GetFileName(const std::string &path);
std::string GetFileName_NoExtension(const std::string &path);
std::string GetExtension(const std::string &path);
std::string GetParentPath(const std::string &path);
// 拷贝文件
bool CopyFile(const std::string srcPath,const std::string dstPath);
/** 拷贝目录
*
* 示例:CopyDirectories("D:/0/1/2/","E:/3/");实现把D:/0/1/2/目录拷贝到E:/3/目录中(即拷贝完成后的目录结构为E:/3/2/)
* 注意:
1.第一个参数的最后不能加”/”
2.不能拷贝隐藏文件
*/
bool CopyDirectories(std::string srcPath,const std::string dstPath);
}
#endif
......@@ -19,7 +19,7 @@ using namespace std;
/** 简易日志
*
* 轻量级日志系统,不依赖于其他第三方库,只需要包含一个头文件就可以使用。提供了4种日志级别,包括INFO,DEBUG,WARN和ERROR。
* 不依赖于其他第三方库,只需要包含一个头文件就可以使用。提供了4种日志级别,包括INFO,DEBUG,WARN和ERROR。
*
* 示例1:
// 初始化日志,在./Log/目录下创建两个日志文件log1.log和log2.log(注意:目录./Log/需要存在,否则日志创建失败)
......
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <gpt2.h>
#include <GPT2.h>
#include <fstream>
#include <SimpleLog.h>
#include <Filesystem.h>
#include <tokenization.h>
int main()
......@@ -11,7 +12,7 @@ int main()
// 加载GPT2模型
migraphxSamples::GPT2 gpt2;
migraphxSamples::ErrorCode errorCode = gpt2.Initialize();
if (errorCode != SUCCESS)
if (errorCode != migraphxSamples::SUCCESS)
{
LOG_ERROR(stdout, "fail to initialize GPT2!\n");
exit(-1);
......@@ -19,11 +20,11 @@ int main()
LOG_INFO(stdout, "succeed to initialize GPT2\n");
// 加载词汇表,用于编码和解码
cuBERT::FullTokenizer tokenizer = cuBERT::FullTokenizer("../Resource/Models/vocab_shici.txt");
cuBERT::FullTokenizer tokenizer = cuBERT::FullTokenizer("../Resource/vocab_shici.txt");
std::ifstream infile;
std::string buf;
std::vector<std::string> output;
infile.open("../Resource/Models/vocab_shici.txt");
infile.open("../Resource/vocab_shici.txt");
while (std::getline(infile,buf))
{
output.push_back(buf);
......
./3rdParty/opencv-3.4.11_mini.tar.gz
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment