Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
GPT2_migraphx
Commits
c99ee00b
Commit
c99ee00b
authored
Jun 09, 2023
by
liucong
Browse files
修改部分代码和文档
parent
9cffb74e
Changes
24
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
74 additions
and
1171 deletions
+74
-1171
3rdParty/InstallOpenCVDependences.sh
3rdParty/InstallOpenCVDependences.sh
+0
-10
3rdParty/opencv-3.4.11_mini.tar.gz
3rdParty/opencv-3.4.11_mini.tar.gz
+0
-0
CMakeLists.txt
CMakeLists.txt
+7
-12
Doc/Tutorial_Cpp/GPT2.md
Doc/Tutorial_Cpp/GPT2.md
+3
-3
Doc/Tutorial_Python/GPT2.md
Doc/Tutorial_Python/GPT2.md
+2
-2
Python/gpt2.py
Python/gpt2.py
+2
-2
Python/requirements.txt
Python/requirements.txt
+0
-0
README.md
README.md
+47
-49
Resource/Configuration.xml
Resource/Configuration.xml
+0
-9
Resource/Models/vocab_shici.txt
Resource/Models/vocab_shici.txt
+0
-0
Src/GPT2/gpt2.cpp
Src/GPT2/gpt2.cpp
+12
-66
Src/GPT2/gpt2.h
Src/GPT2/gpt2.h
+1
-9
Src/Sample.cpp
Src/Sample.cpp
+0
-84
Src/Sample.h
Src/Sample.h
+0
-9
Src/Utility/CommonUtility.cpp
Src/Utility/CommonUtility.cpp
+0
-140
Src/Utility/CommonUtility.h
Src/Utility/CommonUtility.h
+0
-27
Src/Utility/Filesystem.cpp
Src/Utility/Filesystem.cpp
+0
-676
Src/Utility/Filesystem.h
Src/Utility/Filesystem.h
+0
-73
Src/Utility/tokenization.cpp
Src/Utility/tokenization.cpp
+0
-0
Src/Utility/tokenization.h
Src/Utility/tokenization.h
+0
-0
No files found.
3rdParty/InstallOpenCVDependences.sh
deleted
100644 → 0
View file @
9cffb74e
#! /bin/sh
############### Ubuntu ###############
# 参考:https://docs.opencv.org/3.4.11/d7/d9f/tutorial_linux_install.html
# apt-get install build-essential -y
# apt-get install cmake git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev -y
# apt-get install python-dev python-numpy libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libjasper-dev libdc1394-22-dev -y # 处理图像所需的包,可选
############### CentOS ###############
yum
install
gcc gcc-c++ gtk2-devel gimp-devel gimp-devel-tools gimp-help-browser zlib-devel libtiff-devel libjpeg-devel libpng-devel gstreamer-devel libavc1394-devel libraw1394-devel libdc1394-devel jasper-devel jasper-utils swig python libtool nasm
-y
\ No newline at end of file
3rdParty/opencv-3.4.11_mini.tar.gz
deleted
100644 → 0
View file @
9cffb74e
File deleted
CMakeLists.txt
View file @
c99ee00b
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
cmake_minimum_required
(
VERSION 3.5
)
cmake_minimum_required
(
VERSION 3.5
)
# 设置项目名
# 设置项目名
project
(
MIGraphX_Samples
)
project
(
GPT2
)
# 设置编译器
# 设置编译器
set
(
CMAKE_CXX_COMPILER g++
)
set
(
CMAKE_CXX_COMPILER g++
)
...
@@ -12,7 +12,7 @@ set(CMAKE_BUILD_TYPE release)
...
@@ -12,7 +12,7 @@ set(CMAKE_BUILD_TYPE release)
# 添加头文件路径
# 添加头文件路径
set
(
INCLUDE_PATH
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/
set
(
INCLUDE_PATH
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/Utility/
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/Utility/
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/
NLP/
GPT2/
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/GPT2/
$ENV{DTKROOT}/include/
$ENV{DTKROOT}/include/
${
CMAKE_CURRENT_SOURCE_DIR
}
/depend/include/
)
${
CMAKE_CURRENT_SOURCE_DIR
}
/depend/include/
)
include_directories
(
${
INCLUDE_PATH
}
)
include_directories
(
${
INCLUDE_PATH
}
)
...
@@ -23,11 +23,7 @@ set(LIBRARY_PATH ${CMAKE_CURRENT_SOURCE_DIR}/depend/lib64/
...
@@ -23,11 +23,7 @@ set(LIBRARY_PATH ${CMAKE_CURRENT_SOURCE_DIR}/depend/lib64/
link_directories
(
${
LIBRARY_PATH
}
)
link_directories
(
${
LIBRARY_PATH
}
)
# 添加依赖库
# 添加依赖库
set
(
LIBRARY opencv_core
set
(
LIBRARY migraphx_ref
opencv_imgproc
opencv_imgcodecs
opencv_dnn
migraphx_ref
migraphx
migraphx
migraphx_c
migraphx_c
migraphx_device
migraphx_device
...
@@ -37,12 +33,11 @@ link_libraries(${LIBRARY})
...
@@ -37,12 +33,11 @@ link_libraries(${LIBRARY})
# 添加源文件
# 添加源文件
set
(
SOURCE_FILES
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/main.cpp
set
(
SOURCE_FILES
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/main.cpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/Sample.cpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/Utility/tokenization.cpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/NLP/GPT2/tokenization.cpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/Utility/utf8proc.c
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/NLP/GPT2/utf8proc.c
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/GPT2/gpt2.cpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/NLP/GPT2/GPT2.cpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/Utility/CommonUtility.cpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/Utility/CommonUtility.cpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/Utility/Filesystem.cpp
)
${
CMAKE_CURRENT_SOURCE_DIR
}
/Src/Utility/Filesystem.cpp
)
# 添加可执行目标
# 添加可执行目标
add_executable
(
MIGraphX_Samples
${
SOURCE_FILES
}
)
add_executable
(
GPT2
${
SOURCE_FILES
}
)
Doc/Tutorial_Cpp/GPT2.md
View file @
c99ee00b
...
@@ -27,11 +27,11 @@ GPT(Generative Pre-trained Transformer)系列模型以不断堆叠transforme
...
@@ -27,11 +27,11 @@ GPT(Generative Pre-trained Transformer)系列模型以不断堆叠transforme
首先,根据提供的词汇表路径,通过cuBERT::FullTokenizer()函数加载词汇表,用于后续对输入文本的编码操作。其次,将词汇表中的内容依次保存到vector容器output中,用于数据后处理中的解码操作。
首先,根据提供的词汇表路径,通过cuBERT::FullTokenizer()函数加载词汇表,用于后续对输入文本的编码操作。其次,将词汇表中的内容依次保存到vector容器output中,用于数据后处理中的解码操作。
```
c++
```
c++
cuBERT
::
FullTokenizer
tokenizer
=
cuBERT
::
FullTokenizer
(
"../Resource/Models/
NLP/GPT2/
vocab_shici.txt"
);
cuBERT
::
FullTokenizer
tokenizer
=
cuBERT
::
FullTokenizer
(
"../Resource/Models/vocab_shici.txt"
);
std
::
ifstream
infile
;
std
::
ifstream
infile
;
std
::
string
buf
;
std
::
string
buf
;
std
::
vector
<
std
::
string
>
output
;
std
::
vector
<
std
::
string
>
output
;
infile
.
open
(
"../Resource/Models/
NLP/GPT2/
vocab_shici.txt"
);
infile
.
open
(
"../Resource/Models/vocab_shici.txt"
);
while
(
std
::
getline
(
infile
,
buf
))
while
(
std
::
getline
(
infile
,
buf
))
{
{
output
.
push_back
(
buf
);
output
.
push_back
(
buf
);
...
@@ -102,7 +102,7 @@ long unsigned int GPT2::Inference(const std::vector<long unsigned int> &input_id
...
@@ -102,7 +102,7 @@ long unsigned int GPT2::Inference(const std::vector<long unsigned int> &input_id
inputShapes
.
push_back
({
1
,
input_id
.
size
()});
inputShapes
.
push_back
({
1
,
input_id
.
size
()});
// 输入数据
// 输入数据
migraphx
::
p
ar
ameter_map
inputData
;
std
::
unordered_map
<
std
::
string
,
migraphx
::
ar
gument
>
inputData
;
inputData
[
inputName
]
=
migraphx
::
argument
{
migraphx
::
shape
(
inputShape
.
type
(),
inputShapes
[
0
]),(
long
unsigned
int
*
)
input
};
inputData
[
inputName
]
=
migraphx
::
argument
{
migraphx
::
shape
(
inputShape
.
type
(),
inputShapes
[
0
]),(
long
unsigned
int
*
)
input
};
// 推理
// 推理
...
...
Doc/Tutorial_Python/GPT2.md
View file @
c99ee00b
...
@@ -28,7 +28,7 @@ GPT(Generative Pre-trained Transformer)系列模型以不断堆叠的transfo
...
@@ -28,7 +28,7 @@ GPT(Generative Pre-trained Transformer)系列模型以不断堆叠的transfo
```
python
```
python
# 加载词汇表
# 加载词汇表
vocab_file
=
os
.
path
.
join
(
'../../../Resource/Models
/NLP/GPT2
'
,
'vocab_shici.txt'
)
vocab_file
=
os
.
path
.
join
(
'../../../Resource/Models'
,
'vocab_shici.txt'
)
tokenizer
=
BertTokenizerFast
(
vocab_file
,
sep_token
=
"[SEP]"
,
pad_token
=
"[PAD]"
,
cls_token
=
"[CLS]"
)
tokenizer
=
BertTokenizerFast
(
vocab_file
,
sep_token
=
"[SEP]"
,
pad_token
=
"[PAD]"
,
cls_token
=
"[CLS]"
)
```
```
...
@@ -63,7 +63,7 @@ maxInput={"input":[1,1000]}
...
@@ -63,7 +63,7 @@ maxInput={"input":[1,1000]}
for _ in range(max_len):
for _ in range(max_len):
# 推理
# 推理
result = model.run({inputName:
migraphx.argument(
input_ids
)
})
result = model.run({inputName: input_ids})
logits = [float(x) for x in result[0].tolist()]
logits = [float(x) for x in result[0].tolist()]
...
...
...
...
Python/
NLP/GPT2/
gpt2.py
→
Python/gpt2.py
View file @
c99ee00b
...
@@ -5,7 +5,7 @@ import migraphx
...
@@ -5,7 +5,7 @@ import migraphx
# 加载词汇表
# 加载词汇表
print
(
"INFO: Complete loading the vocabulary"
)
print
(
"INFO: Complete loading the vocabulary"
)
vocab_file
=
os
.
path
.
join
(
'../
../../
Resource/Models
/NLP/GPT2
'
,
'vocab_shici.txt'
)
vocab_file
=
os
.
path
.
join
(
'../Resource/Models'
,
'vocab_shici.txt'
)
tokenizer
=
BertTokenizerFast
(
vocab_file
,
sep_token
=
"[SEP]"
,
pad_token
=
"[PAD]"
,
cls_token
=
"[CLS]"
)
tokenizer
=
BertTokenizerFast
(
vocab_file
,
sep_token
=
"[SEP]"
,
pad_token
=
"[PAD]"
,
cls_token
=
"[CLS]"
)
# 设置最大输入shape
# 设置最大输入shape
...
@@ -13,7 +13,7 @@ maxInput={"input":[1,1000]}
...
@@ -13,7 +13,7 @@ maxInput={"input":[1,1000]}
# 加载模型
# 加载模型
print
(
"INFO: Parsing and compiling the model"
)
print
(
"INFO: Parsing and compiling the model"
)
model
=
migraphx
.
parse_onnx
(
"../
../../
Resource/Models/
NLP/GPT2/
GPT2_shici.onnx"
,
map_input_dims
=
maxInput
)
model
=
migraphx
.
parse_onnx
(
"../Resource/Models/GPT2_shici.onnx"
,
map_input_dims
=
maxInput
)
inputName
=
model
.
get_parameter_names
()[
0
]
inputName
=
model
.
get_parameter_names
()[
0
]
inputShape
=
model
.
get_parameter_shapes
()[
inputName
].
lens
()
inputShape
=
model
.
get_parameter_shapes
()[
inputName
].
lens
()
print
(
"inputName:{0}
\n
inputShape:{1}"
.
format
(
inputName
,
inputShape
))
print
(
"inputName:{0}
\n
inputShape:{1}"
.
format
(
inputName
,
inputShape
))
...
...
Python/
NLP/GPT2/
requirements.txt
→
Python/requirements.txt
View file @
c99ee00b
File moved
README.md
View file @
c99ee00b
...
@@ -6,7 +6,11 @@ GPT2模型:第二代生成式预训练模型(Generative Pre-Training2)。
...
@@ -6,7 +6,11 @@ GPT2模型:第二代生成式预训练模型(Generative Pre-Training2)。
## 模型结构
## 模型结构
GPT2主要使用Transformer的Decoder模块为特征提取器,并对Transformer Decoder进行了一些改动,原本的Decoder包含了两个Multi-Head Attention结构,而GPT2只保留了Mask Multi-Head Attention。
GPT2主要使用Transformer的Decoder模块为特征提取器,并对Transformer Decoder进行了一些改动,原本的Decoder包含了两个Multi-Head Attention结构,而GPT2只保留了Mask Multi-Head Attention。
## 构建安装
## Python版本推理
本次采用GPT-2模型进行诗词生成任务,模型文件下载链接:https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw , 提取码:4pmh ,并将GPT2_shici.onnx模型文件保存在model文件夹下。下面介绍如何运行python代码示例,具体推理代码解析,在Doc/Tutorial_Python目录中有详细说明。
### 安装镜像
在光源可拉取推理的docker镜像,GPT2模型推理的镜像如下:
在光源可拉取推理的docker镜像,GPT2模型推理的镜像如下:
...
@@ -14,13 +18,46 @@ GPT2主要使用Transformer的Decoder模块为特征提取器,并对Transforme
...
@@ -14,13 +18,46 @@ GPT2主要使用Transformer的Decoder模块为特征提取器,并对Transforme
docker
pull
image
.
sourcefind
.
cn
:
5000
/
dcu
/
admin
/
base
/
custom
:
ort1
.
14.0
_migraphx3
.
0.0
-
dtk22
.
10.1
docker
pull
image
.
sourcefind
.
cn
:
5000
/
dcu
/
admin
/
base
/
custom
:
ort1
.
14.0
_migraphx3
.
0.0
-
dtk22
.
10.1
```
```
### 安装Opencv依赖
### 执行推理
1.
参考《MIGraphX教程》中的安装方法安装MIGraphX并设置好PYTHONPATH
2.
安装依赖:
```
python
# 进入migraphx samples工程根目录
cd
<
path_to_migraphx_samples
>
# 进入示例程序目录
cd
.
/
Python
/
# 安装依赖
pip
install
-
r
requirements
.
txt
```
3.
设置环境变量:
```
python
```
python
cd
<
path_to_migraphx_samples
>
# 设置动态shape模式
sh
.
/
3
rdParty
/
InstallOpenCVDependences
.
sh
export
MIGRAPHX_DYNAMIC_SHAPE
=
1
```
4.
在Python/NLP/GPT2目录下执行如下命令运行该示例程序:
```
python
python
gpt2
.
py
```
```
如下所示,采用交互式界面,通过输入开头诗词,GPT2模型可以生成后续的诗句。
<img
src=
"./Doc/Images/GPT_03.png"
style=
"zoom:80%;"
align=
middle
>
## C++版本推理
下面介绍如何运行python代码示例,具体推理代码解析,在Doc/Tutorial_Cpp目录中有详细说明。
参考Python版本推理中的构建安装,在光源中拉取推理的docker镜像。
### 修改CMakeLists.txt
### 修改CMakeLists.txt
-
如果使用ubuntu系统,需要修改CMakeLists.txt中依赖库路径:
-
如果使用ubuntu系统,需要修改CMakeLists.txt中依赖库路径:
...
@@ -29,7 +66,7 @@ sh ./3rdParty/InstallOpenCVDependences.sh
...
@@ -29,7 +66,7 @@ sh ./3rdParty/InstallOpenCVDependences.sh
-
**MIGraphX2.3.0及以上版本需要c++17**
-
**MIGraphX2.3.0及以上版本需要c++17**
###
安装OpenCV并
构建工程
### 构建工程
```
```
rbuild build -d depend
rbuild build -d depend
...
@@ -57,61 +94,22 @@ export LD_LIBRARY_PATH=<path_to_migraphx_samples>/depend/lib/:$LD_LIBRARY_PATH
...
@@ -57,61 +94,22 @@ export LD_LIBRARY_PATH=<path_to_migraphx_samples>/depend/lib/:$LD_LIBRARY_PATH
source ~/.bashrc
source ~/.bashrc
```
```
## 推理
### 执行推理
本次采用GPT-2模型进行诗词生成任务,模型文件下载链接:https://pan.baidu.com/s/1KWeoUuakCZ5dualK69qCcw , 提取码:4pmh ,并将GPT2_shici.onnx模型文件保存在model文件夹下。下面介绍如何运行python代码和C++代码示例,具体推理代码解析,在Doc目录中有详细说明。
### python版本推理
运行GPT2示例程序,具体执行如下命令:
1.
参考《MIGraphX教程》中的安装方法安装MIGraphX并设置好PYTHONPATH
2.
安装依赖:
```
python
```
python
# 进入migraphx samples工程根目录
# 进入migraphx samples工程根目录
cd
<
path_to_migraphx_samples
>
cd
<
path_to_migraphx_samples
>
# 进入示例程序目录
# 进入build目录
cd
Python
/
NLP
/
GPT2
# 安装依赖
pip
install
-
r
requirements
.
txt
```
3.
设置环境变量:
```
python
# 设置动态shape模式
export
MIGRAPHX_DYNAMIC_SHAPE
=
1
```
4.
在Python/NLP/GPT2目录下执行如下命令运行该示例程序:
```
python
python
gpt2
.
py
```
如下所示,采用交互式界面,通过输入开头诗词,GPT2模型可以生成后续的诗句。
<img
src=
"./Doc/Images/GPT_03.png"
style=
"zoom:80%;"
align=
middle
>
### C++版本推理
切换到build目录中,执行如下命令:
```
python
cd
.
/
build
/
cd
.
/
build
/
.
/
MIGraphX_Samples
```
根据提示选择运行GPT2模型的示例程序
```
python
# 设置动态shape模式
# 设置动态shape模式
export
MIGRAPHX_DYNAMIC_SHAPE
=
1
export
MIGRAPHX_DYNAMIC_SHAPE
=
1
#
运
行示例
#
执
行示例
程序
.
/
MIGraphX_Samples
0
.
/
GPT2
```
```
如下所示,采用交互式界面,通过输入开头诗词,GPT2模型可以推理出后续的诗句。
如下所示,采用交互式界面,通过输入开头诗词,GPT2模型可以推理出后续的诗句。
...
...
Resource/Configuration.xml
deleted
100644 → 0
View file @
9cffb74e
<?xml version="1.0" encoding="GB2312"?>
<opencv_storage>
<!--GPT2-->
<GPT2>
<ModelPath>
"../Resource/Models/NLP/GPT2/GPT2_shici.onnx"
</ModelPath>
</GPT2>
</opencv_storage>
Resource/Models/
NLP/GPT2/
vocab_shici.txt
→
Resource/Models/vocab_shici.txt
View file @
c99ee00b
File moved
Src/
NLP/
GPT2/
GPT
2.cpp
→
Src/GPT2/
gpt
2.cpp
View file @
c99ee00b
...
@@ -2,65 +2,44 @@
...
@@ -2,65 +2,44 @@
#include <sstream>
#include <sstream>
#include <migraphx/onnx.hpp>
#include <migraphx/onnx.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/quantization.hpp>
#include <migraphx/reshape2.hpp>
#include <migraphx/reshape2.hpp>
#include <CommonUtility.h>
#include <Filesystem.h>
#include <SimpleLog.h>
#include <SimpleLog.h>
#include <algorithm>
#include <algorithm>
#include <string>
#include <string>
#include <vector>
#include <stdexcept>
#include <stdexcept>
#include <
GPT
2.h>
#include <
gpt
2.h>
#include <tokenization.h>
#include <tokenization.h>
namespace
migraphxSamples
namespace
migraphxSamples
{
{
GPT2
::
GPT2
()
:
logFile
(
NULL
)
GPT2
::
GPT2
()
{
{
}
}
GPT2
::~
GPT2
()
GPT2
::~
GPT2
()
{
{
configurationFile
.
release
();
}
}
ErrorCode
GPT2
::
Initialize
(
InitializationParameterOfNLP
initParamOfNLPGPT2
)
ErrorCode
GPT2
::
Initialize
(
InitializationParameterOfNLP
initParamOfNLPGPT2
)
{
{
// 初始化(获取日志文件,加载配置文件等)
// 设置模型路径
ErrorCode
errorCode
=
DoCommonInitialization
(
initParamOfNLPGPT2
);
std
::
string
modelPath
=
"../Resource/Models/GPT2_shici.onnx"
;
if
(
errorCode
!=
SUCCESS
)
{
LOG_ERROR
(
logFile
,
"fail to DoCommonInitialization
\n
"
);
return
errorCode
;
}
LOG_INFO
(
logFile
,
"succeed to DoCommonInitialization
\n
"
);
// 获取配置文件参数
FileNode
netNode
=
configurationFile
[
"GPT2"
];
std
::
string
modelPath
=
initializationParameter
.
parentPath
+
(
std
::
string
)
netNode
[
"ModelPath"
];
// 设置最大输入shape
// 设置最大输入shape
migraphx
::
onnx_options
onnx_options
;
migraphx
::
onnx_options
onnx_options
;
onnx_options
.
map_input_dims
[
"input"
]
=
{
1
,
1000
};
onnx_options
.
map_input_dims
[
"input"
]
=
{
1
,
1000
};
// 加载模型
// 加载模型
if
(
Exists
(
modelPath
)
==
false
)
{
LOG_ERROR
(
logFile
,
"%s not exist!
\n
"
,
modelPath
.
c_str
());
return
MODEL_NOT_EXIST
;
}
net
=
migraphx
::
parse_onnx
(
modelPath
,
onnx_options
);
net
=
migraphx
::
parse_onnx
(
modelPath
,
onnx_options
);
LOG_INFO
(
logFile
,
"succeed to load model:
%s
\n
"
,
GetFileName
(
modelPath
).
c_str
()
);
LOG_INFO
(
stdout
,
"succeed to load model:
GPT2_shici
\n
"
);
// 获取模型输入属性
// 获取模型输入属性
std
::
pair
<
std
::
string
,
migraphx
::
shape
>
input
Attribute
=*
(
net
.
get_parameter_shapes
()
.
begin
())
;
std
::
unordered_map
<
std
::
string
,
migraphx
::
shape
>
input
Map
=
net
.
get_parameter_shapes
();
inputName
=
input
Attribute
.
first
;
inputName
=
input
Map
.
begin
()
->
first
;
inputShape
=
input
Attribute
.
second
;
inputShape
=
input
Map
.
begin
()
->
second
;
// 设置模型为GPU模式
// 设置模型为GPU模式
migraphx
::
target
gpuTarget
=
migraphx
::
gpu
::
target
{};
migraphx
::
target
gpuTarget
=
migraphx
::
gpu
::
target
{};
...
@@ -68,46 +47,13 @@ ErrorCode GPT2::Initialize(InitializationParameterOfNLP initParamOfNLPGPT2)
...
@@ -68,46 +47,13 @@ ErrorCode GPT2::Initialize(InitializationParameterOfNLP initParamOfNLPGPT2)
// 编译模型
// 编译模型
migraphx
::
compile_options
options
;
migraphx
::
compile_options
options
;
options
.
device_id
=
0
;
// 设置GPU设备,默认为0号设备
options
.
device_id
=
0
;
// 设置GPU设备,默认为0号设备
options
.
offload_copy
=
true
;
// 设置offload_copy
options
.
offload_copy
=
true
;
net
.
compile
(
gpuTarget
,
options
);
net
.
compile
(
gpuTarget
,
options
);
LOG_INFO
(
logFile
,
"succeed to compile model: %s
\n
"
,
GetFileName
(
modelPath
).
c_str
()
);
LOG_INFO
(
stdout
,
"succeed to compile model: %s
\n
"
);
return
SUCCESS
;
return
SUCCESS
;
}
}
ErrorCode
GPT2
::
DoCommonInitialization
(
InitializationParameterOfNLP
initParamOfNLPGPT2
)
{
initializationParameter
=
initParamOfNLPGPT2
;
// 获取日志文件
logFile
=
LogManager
::
GetInstance
()
->
GetLogFile
(
initializationParameter
.
logName
);
// 加载配置文件
std
::
string
configFilePath
=
initializationParameter
.
configFilePath
;
if
(
!
Exists
(
configFilePath
))
{
LOG_ERROR
(
logFile
,
"no configuration file!
\n
"
);
return
CONFIG_FILE_NOT_EXIST
;
}
if
(
!
configurationFile
.
open
(
configFilePath
,
FileStorage
::
READ
))
{
LOG_ERROR
(
logFile
,
"fail to open configuration file
\n
"
);
return
FAIL_TO_OPEN_CONFIG_FILE
;
}
LOG_INFO
(
logFile
,
"succeed to open configuration file
\n
"
);
// 修改父路径
std
::
string
&
parentPath
=
initializationParameter
.
parentPath
;
if
(
!
parentPath
.
empty
())
{
if
(
!
IsPathSeparator
(
parentPath
[
parentPath
.
size
()
-
1
]))
{
parentPath
+=
PATH_SEPARATOR
;
}
}
return
SUCCESS
;
}
static
bool
CompareM
(
Predictions
a
,
Predictions
b
)
static
bool
CompareM
(
Predictions
a
,
Predictions
b
)
{
{
return
a
.
predictionvalue
>
b
.
predictionvalue
;
return
a
.
predictionvalue
>
b
.
predictionvalue
;
...
@@ -127,7 +73,7 @@ long unsigned int GPT2::Inference(const std::vector<long unsigned int> &input_id
...
@@ -127,7 +73,7 @@ long unsigned int GPT2::Inference(const std::vector<long unsigned int> &input_id
inputShapes
.
push_back
({
1
,
input_id
.
size
()});
inputShapes
.
push_back
({
1
,
input_id
.
size
()});
// 输入数据
// 输入数据
migraphx
::
p
ar
ameter_map
inputData
;
std
::
unordered_map
<
std
::
string
,
migraphx
::
ar
gument
>
inputData
;
inputData
[
inputName
]
=
migraphx
::
argument
{
migraphx
::
shape
(
inputShape
.
type
(),
inputShapes
[
0
]),(
long
unsigned
int
*
)
input
};
inputData
[
inputName
]
=
migraphx
::
argument
{
migraphx
::
shape
(
inputShape
.
type
(),
inputShapes
[
0
]),(
long
unsigned
int
*
)
input
};
// 推理
// 推理
...
...
Src/
NLP/
GPT2/
GPT
2.h
→
Src/GPT2/
gpt
2.h
View file @
c99ee00b
...
@@ -6,7 +6,6 @@
...
@@ -6,7 +6,6 @@
#include <migraphx/program.hpp>
#include <migraphx/program.hpp>
#include <CommonDefinition.h>
#include <CommonDefinition.h>
#include <tokenization.h>
#include <tokenization.h>
using
namespace
cuBERT
;
namespace
migraphxSamples
namespace
migraphxSamples
{
{
...
@@ -24,7 +23,7 @@ public:
...
@@ -24,7 +23,7 @@ public:
~
GPT2
();
~
GPT2
();
ErrorCode
Initialize
(
InitializationParameterOfNLP
initParamOfNLPGPT2
);
ErrorCode
Initialize
();
ErrorCode
Preprocessing
(
cuBERT
::
FullTokenizer
tokenizer
,
ErrorCode
Preprocessing
(
cuBERT
::
FullTokenizer
tokenizer
,
char
*
question
,
char
*
question
,
...
@@ -33,13 +32,6 @@ public:
...
@@ -33,13 +32,6 @@ public:
long
unsigned
int
Inference
(
const
std
::
vector
<
long
unsigned
int
>
&
input_id
);
long
unsigned
int
Inference
(
const
std
::
vector
<
long
unsigned
int
>
&
input_id
);
private:
private:
ErrorCode
DoCommonInitialization
(
InitializationParameterOfNLP
initParamOfNLPGPT2
);
private:
FILE
*
logFile
;
cv
::
FileStorage
configurationFile
;
InitializationParameterOfNLP
initializationParameter
;
migraphx
::
program
net
;
migraphx
::
program
net
;
std
::
string
inputName
;
std
::
string
inputName
;
migraphx
::
shape
inputShape
;
migraphx
::
shape
inputShape
;
...
...
Src/Sample.cpp
deleted
100644 → 0
View file @
9cffb74e
#include <Sample.h>
#include <SimpleLog.h>
#include <GPT2.h>
#include <tokenization.h>
#include <fstream>
using
namespace
std
;
using
namespace
migraphx
;
using
namespace
migraphxSamples
;
void
Sample_GPT2
()
{
// 加载GPT2模型
GPT2
gpt2
;
InitializationParameterOfNLP
initParamOfNLPGPT2
;
initParamOfNLPGPT2
.
parentPath
=
""
;
initParamOfNLPGPT2
.
configFilePath
=
CONFIG_FILE
;
initParamOfNLPGPT2
.
logName
=
""
;
ErrorCode
errorCode
=
gpt2
.
Initialize
(
initParamOfNLPGPT2
);
if
(
errorCode
!=
SUCCESS
)
{
LOG_ERROR
(
stdout
,
"fail to initialize GPT2!
\n
"
);
exit
(
-
1
);
}
LOG_INFO
(
stdout
,
"succeed to initialize GPT2
\n
"
);
// 加载词汇表,用于编码和解码
cuBERT
::
FullTokenizer
tokenizer
=
cuBERT
::
FullTokenizer
(
"../Resource/Models/NLP/GPT2/vocab_shici.txt"
);
std
::
ifstream
infile
;
std
::
string
buf
;
std
::
vector
<
std
::
string
>
output
;
infile
.
open
(
"../Resource/Models/NLP/GPT2/vocab_shici.txt"
);
while
(
std
::
getline
(
infile
,
buf
))
{
output
.
push_back
(
buf
);
}
std
::
vector
<
long
unsigned
int
>
input_id
;
char
question
[
100
];
std
::
vector
<
long
unsigned
int
>
score
;
std
::
vector
<
std
::
string
>
result
;
std
::
cout
<<
"开始和GPT2对诗,输入CTRL + Z以退出"
<<
std
::
endl
;
while
(
true
)
{
// 数据预处理
std
::
cout
<<
"question: "
;
cin
.
getline
(
question
,
100
);
gpt2
.
Preprocessing
(
tokenizer
,
question
,
input_id
);
// 推理
for
(
int
i
=
0
;
i
<
50
;
++
i
)
{
long
unsigned
int
outputs
=
gpt2
.
Inference
(
input_id
);
if
(
outputs
==
102
)
{
break
;
}
input_id
.
push_back
(
outputs
);
score
.
push_back
(
outputs
);
}
// 将数值映射为字符
for
(
int
i
=
0
;
i
<
score
.
size
();
++
i
)
{
result
.
push_back
(
output
[
score
[
i
]]);
}
// 打印结果
std
::
cout
<<
"chatbot: "
;
std
::
cout
<<
question
;
for
(
int
j
=
0
;
j
<
result
.
size
();
++
j
)
{
std
::
cout
<<
result
[
j
];
}
std
::
cout
<<
std
::
endl
;
// 清除数据
input_id
.
clear
();
result
.
clear
();
score
.
clear
();
}
}
Src/Sample.h
deleted
100644 → 0
View file @
9cffb74e
// 示例程序
#ifndef __SAMPLE_H__
#define __SAMPLE_H__
// GPT2 Dynamic sample
void
Sample_GPT2
();
#endif
\ No newline at end of file
Src/Utility/CommonUtility.cpp
deleted
100644 → 0
View file @
9cffb74e
#include <CommonUtility.h>
#include <assert.h>
#include <ctype.h>
#include <time.h>
#include <stdlib.h>
#include <algorithm>
#include <sstream>
#include <vector>
#ifdef _WIN32
#include <io.h>
#include <direct.h>
#include <Windows.h>
#else
#include <unistd.h>
#include <dirent.h>
#include <sys/stat.h>
#include <sys/time.h>
#endif
#include <SimpleLog.h>
namespace
migraphxSamples
{
_Time
GetCurrentTime3
()
{
_Time
currentTime
;
#if (defined WIN32 || defined _WIN32)
SYSTEMTIME
systemTime
;
GetLocalTime
(
&
systemTime
);
char
temp
[
8
]
=
{
0
};
sprintf
(
temp
,
"%04d"
,
systemTime
.
wYear
);
currentTime
.
year
=
string
(
temp
);
sprintf
(
temp
,
"%02d"
,
systemTime
.
wMonth
);
currentTime
.
month
=
string
(
temp
);
sprintf
(
temp
,
"%02d"
,
systemTime
.
wDay
);
currentTime
.
day
=
string
(
temp
);
sprintf
(
temp
,
"%02d"
,
systemTime
.
wHour
);
currentTime
.
hour
=
string
(
temp
);
sprintf
(
temp
,
"%02d"
,
systemTime
.
wMinute
);
currentTime
.
minute
=
string
(
temp
);
sprintf
(
temp
,
"%02d"
,
systemTime
.
wSecond
);
currentTime
.
second
=
string
(
temp
);
sprintf
(
temp
,
"%03d"
,
systemTime
.
wMilliseconds
);
currentTime
.
millisecond
=
string
(
temp
);
sprintf
(
temp
,
"%d"
,
systemTime
.
wDayOfWeek
);
currentTime
.
weekDay
=
string
(
temp
);
#else
struct
timeval
tv
;
struct
tm
*
p
;
gettimeofday
(
&
tv
,
NULL
);
p
=
localtime
(
&
tv
.
tv_sec
);
char
temp
[
8
]
=
{
0
};
sprintf
(
temp
,
"%04d"
,
1900
+
p
->
tm_year
);
currentTime
.
year
=
string
(
temp
);
sprintf
(
temp
,
"%02d"
,
1
+
p
->
tm_mon
);
currentTime
.
month
=
string
(
temp
);
sprintf
(
temp
,
"%02d"
,
p
->
tm_mday
);
currentTime
.
day
=
string
(
temp
);
sprintf
(
temp
,
"%02d"
,
p
->
tm_hour
);
currentTime
.
hour
=
string
(
temp
);
sprintf
(
temp
,
"%02d"
,
p
->
tm_min
);
currentTime
.
minute
=
string
(
temp
);
sprintf
(
temp
,
"%02d"
,
p
->
tm_sec
);
currentTime
.
second
=
string
(
temp
);
sprintf
(
temp
,
"%03d"
,
tv
.
tv_usec
/
1000
);
currentTime
.
millisecond
=
string
(
temp
);
sprintf
(
temp
,
"%03d"
,
tv
.
tv_usec
%
1000
);
currentTime
.
microsecond
=
string
(
temp
);
sprintf
(
temp
,
"%d"
,
p
->
tm_wday
);
currentTime
.
weekDay
=
string
(
temp
);
#endif
return
currentTime
;
}
std
::
vector
<
std
::
string
>
SplitString
(
std
::
string
str
,
std
::
string
separator
)
{
std
::
string
::
size_type
pos
;
std
::
vector
<
std
::
string
>
result
;
str
+=
separator
;
//扩展字符串以方便操作
int
size
=
str
.
size
();
for
(
int
i
=
0
;
i
<
size
;
i
++
)
{
pos
=
str
.
find
(
separator
,
i
);
if
(
pos
<
size
)
{
std
::
string
s
=
str
.
substr
(
i
,
pos
-
i
);
result
.
push_back
(
s
);
i
=
pos
+
separator
.
size
()
-
1
;
}
}
return
result
;
}
bool
CompareConfidence
(
const
ResultOfDetection
&
L
,
const
ResultOfDetection
&
R
)
{
return
L
.
confidence
>
R
.
confidence
;
}
bool
CompareArea
(
const
ResultOfDetection
&
L
,
const
ResultOfDetection
&
R
)
{
return
L
.
boundingBox
.
area
()
>
R
.
boundingBox
.
area
();
}
void
NMS
(
vector
<
ResultOfDetection
>
&
detections
,
float
IOUThreshold
)
{
// sort
std
::
sort
(
detections
.
begin
(),
detections
.
end
(),
CompareConfidence
);
for
(
int
i
=
0
;
i
<
detections
.
size
();
++
i
)
{
if
(
detections
[
i
].
exist
)
{
for
(
int
j
=
i
+
1
;
j
<
detections
.
size
();
++
j
)
{
if
(
detections
[
j
].
exist
)
{
// compute IOU
float
intersectionArea
=
(
detections
[
i
].
boundingBox
&
detections
[
j
].
boundingBox
).
area
();
float
intersectionRate
=
intersectionArea
/
(
detections
[
i
].
boundingBox
.
area
()
+
detections
[
j
].
boundingBox
.
area
()
-
intersectionArea
);
if
(
intersectionRate
>
IOUThreshold
)
{
detections
[
j
].
exist
=
false
;
}
}
}
}
}
}
}
Src/Utility/CommonUtility.h
deleted
100644 → 0
View file @
9cffb74e
// 常用工具
#ifndef __COMMON_UTILITY_H__
#define __COMMON_UTILITY_H__
#include <mutex>
#include <string>
#include <vector>
#include <CommonDefinition.h>
using
namespace
std
;
namespace
migraphxSamples
{
// 分割字符串
std
::
vector
<
std
::
string
>
SplitString
(
std
::
string
str
,
std
::
string
separator
);
// 排序规则: 按照置信度或者按照面积排序
bool
CompareConfidence
(
const
ResultOfDetection
&
L
,
const
ResultOfDetection
&
R
);
bool
CompareArea
(
const
ResultOfDetection
&
L
,
const
ResultOfDetection
&
R
);
void
NMS
(
std
::
vector
<
ResultOfDetection
>
&
detections
,
float
IOUThreshold
);
}
#endif
Src/Utility/Filesystem.cpp
deleted
100644 → 0
View file @
9cffb74e
#include <Filesystem.h>
#include <algorithm>
#include <sys/stat.h>
#include <sys/types.h>
#include <fstream>
#ifdef _WIN32
#include <io.h>
#include <direct.h>
#include <Windows.h>
#else
#include <unistd.h>
#include <dirent.h>
#endif
#include <CommonUtility.h>
#include <opencv2/opencv.hpp>
#include <SimpleLog.h>
using
namespace
cv
;
// 路径分隔符(Linux:‘/’,Windows:’\\’)
#ifdef _WIN32
#define PATH_SEPARATOR '\\'
#else
#define PATH_SEPARATOR '/'
#endif
namespace
migraphxSamples
{
#if defined _WIN32 || defined WINCE
const
char
dir_separators
[]
=
"/
\\
"
;
struct
dirent
{
const
char
*
d_name
;
};
struct
DIR
{
#ifdef WINRT
WIN32_FIND_DATAW
data
;
#else
WIN32_FIND_DATAA
data
;
#endif
HANDLE
handle
;
dirent
ent
;
#ifdef WINRT
DIR
()
{
}
~
DIR
()
{
if
(
ent
.
d_name
)
delete
[]
ent
.
d_name
;
}
#endif
};
DIR
*
opendir
(
const
char
*
path
)
{
DIR
*
dir
=
new
DIR
;
dir
->
ent
.
d_name
=
0
;
#ifdef WINRT
string
full_path
=
string
(
path
)
+
"
\\
*"
;
wchar_t
wfull_path
[
MAX_PATH
];
size_t
copied
=
mbstowcs
(
wfull_path
,
full_path
.
c_str
(),
MAX_PATH
);
CV_Assert
((
copied
!=
MAX_PATH
)
&&
(
copied
!=
(
size_t
)
-
1
));
dir
->
handle
=
::
FindFirstFileExW
(
wfull_path
,
FindExInfoStandard
,
&
dir
->
data
,
FindExSearchNameMatch
,
NULL
,
0
);
#else
dir
->
handle
=
::
FindFirstFileExA
((
string
(
path
)
+
"
\\
*"
).
c_str
(),
FindExInfoStandard
,
&
dir
->
data
,
FindExSearchNameMatch
,
NULL
,
0
);
#endif
if
(
dir
->
handle
==
INVALID_HANDLE_VALUE
)
{
/*closedir will do all cleanup*/
delete
dir
;
return
0
;
}
return
dir
;
}
dirent
*
readdir
(
DIR
*
dir
)
{
#ifdef WINRT
if
(
dir
->
ent
.
d_name
!=
0
)
{
if
(
::
FindNextFileW
(
dir
->
handle
,
&
dir
->
data
)
!=
TRUE
)
return
0
;
}
size_t
asize
=
wcstombs
(
NULL
,
dir
->
data
.
cFileName
,
0
);
CV_Assert
((
asize
!=
0
)
&&
(
asize
!=
(
size_t
)
-
1
));
char
*
aname
=
new
char
[
asize
+
1
];
aname
[
asize
]
=
0
;
wcstombs
(
aname
,
dir
->
data
.
cFileName
,
asize
);
dir
->
ent
.
d_name
=
aname
;
#else
if
(
dir
->
ent
.
d_name
!=
0
)
{
if
(
::
FindNextFileA
(
dir
->
handle
,
&
dir
->
data
)
!=
TRUE
)
return
0
;
}
dir
->
ent
.
d_name
=
dir
->
data
.
cFileName
;
#endif
return
&
dir
->
ent
;
}
void
closedir
(
DIR
*
dir
)
{
::
FindClose
(
dir
->
handle
);
delete
dir
;
}
#else
# include <dirent.h>
# include <sys/stat.h>
const
char
dir_separators
[]
=
"/"
;
#endif
static
bool
isDir
(
const
string
&
path
,
DIR
*
dir
)
{
#if defined _WIN32 || defined WINCE
DWORD
attributes
;
BOOL
status
=
TRUE
;
if
(
dir
)
attributes
=
dir
->
data
.
dwFileAttributes
;
else
{
WIN32_FILE_ATTRIBUTE_DATA
all_attrs
;
#ifdef WINRT
wchar_t
wpath
[
MAX_PATH
];
size_t
copied
=
mbstowcs
(
wpath
,
path
.
c_str
(),
MAX_PATH
);
CV_Assert
((
copied
!=
MAX_PATH
)
&&
(
copied
!=
(
size_t
)
-
1
));
status
=
::
GetFileAttributesExW
(
wpath
,
GetFileExInfoStandard
,
&
all_attrs
);
#else
status
=
::
GetFileAttributesExA
(
path
.
c_str
(),
GetFileExInfoStandard
,
&
all_attrs
);
#endif
attributes
=
all_attrs
.
dwFileAttributes
;
}
return
status
&&
((
attributes
&
FILE_ATTRIBUTE_DIRECTORY
)
!=
0
);
#else
(
void
)
dir
;
struct
stat
stat_buf
;
if
(
0
!=
stat
(
path
.
c_str
(),
&
stat_buf
))
return
false
;
int
is_dir
=
S_ISDIR
(
stat_buf
.
st_mode
);
return
is_dir
!=
0
;
#endif
}
bool
IsDirectory
(
const
string
&
path
)
{
return
isDir
(
path
,
NULL
);
}
bool
Exists
(
const
string
&
path
)
{
#if defined _WIN32 || defined WINCE
BOOL
status
=
TRUE
;
{
WIN32_FILE_ATTRIBUTE_DATA
all_attrs
;
#ifdef WINRT
wchar_t
wpath
[
MAX_PATH
];
size_t
copied
=
mbstowcs
(
wpath
,
path
.
c_str
(),
MAX_PATH
);
CV_Assert
((
copied
!=
MAX_PATH
)
&&
(
copied
!=
(
size_t
)
-
1
));
status
=
::
GetFileAttributesExW
(
wpath
,
GetFileExInfoStandard
,
&
all_attrs
);
#else
status
=
::
GetFileAttributesExA
(
path
.
c_str
(),
GetFileExInfoStandard
,
&
all_attrs
);
#endif
}
return
!!
status
;
#else
struct
stat
stat_buf
;
return
(
0
==
stat
(
path
.
c_str
(),
&
stat_buf
));
#endif
}
bool
IsPathSeparator
(
char
c
)
{
return
c
==
'/'
||
c
==
'\\'
;
}
string
JoinPath
(
const
string
&
base
,
const
string
&
path
)
{
if
(
base
.
empty
())
return
path
;
if
(
path
.
empty
())
return
base
;
bool
baseSep
=
IsPathSeparator
(
base
[
base
.
size
()
-
1
]);
bool
pathSep
=
IsPathSeparator
(
path
[
0
]);
string
result
;
if
(
baseSep
&&
pathSep
)
{
result
=
base
+
path
.
substr
(
1
);
}
else
if
(
!
baseSep
&&
!
pathSep
)
{
result
=
base
+
PATH_SEPARATOR
+
path
;
}
else
{
result
=
base
+
path
;
}
return
result
;
}
static
bool
wildcmp
(
const
char
*
string
,
const
char
*
wild
)
{
const
char
*
cp
=
0
,
*
mp
=
0
;
while
((
*
string
)
&&
(
*
wild
!=
'*'
))
{
if
((
*
wild
!=
*
string
)
&&
(
*
wild
!=
'?'
))
{
return
false
;
}
wild
++
;
string
++
;
}
while
(
*
string
)
{
if
(
*
wild
==
'*'
)
{
if
(
!*++
wild
)
{
return
true
;
}
mp
=
wild
;
cp
=
string
+
1
;
}
else
if
((
*
wild
==
*
string
)
||
(
*
wild
==
'?'
))
{
wild
++
;
string
++
;
}
else
{
wild
=
mp
;
string
=
cp
++
;
}
}
while
(
*
wild
==
'*'
)
{
wild
++
;
}
return
*
wild
==
0
;
}
static
void
glob_rec
(
const
string
&
directory
,
const
string
&
wildchart
,
std
::
vector
<
string
>&
result
,
bool
recursive
,
bool
includeDirectories
,
const
string
&
pathPrefix
)
{
DIR
*
dir
;
if
((
dir
=
opendir
(
directory
.
c_str
()))
!=
0
)
{
/* find all the files and directories within directory */
try
{
struct
dirent
*
ent
;
while
((
ent
=
readdir
(
dir
))
!=
0
)
{
const
char
*
name
=
ent
->
d_name
;
if
((
name
[
0
]
==
0
)
||
(
name
[
0
]
==
'.'
&&
name
[
1
]
==
0
)
||
(
name
[
0
]
==
'.'
&&
name
[
1
]
==
'.'
&&
name
[
2
]
==
0
))
continue
;
string
path
=
JoinPath
(
directory
,
name
);
string
entry
=
JoinPath
(
pathPrefix
,
name
);
if
(
isDir
(
path
,
dir
))
{
if
(
recursive
)
glob_rec
(
path
,
wildchart
,
result
,
recursive
,
includeDirectories
,
entry
);
if
(
!
includeDirectories
)
continue
;
}
if
(
wildchart
.
empty
()
||
wildcmp
(
name
,
wildchart
.
c_str
()))
result
.
push_back
(
entry
);
}
}
catch
(...)
{
closedir
(
dir
);
throw
;
}
closedir
(
dir
);
}
else
{
LOG_INFO
(
stdout
,
"could not open directory: %s"
,
directory
.
c_str
());
}
}
void
GetFileNameList
(
const
string
&
directory
,
const
string
&
pattern
,
std
::
vector
<
string
>&
result
,
bool
recursive
,
bool
addPath
)
{
// split pattern
vector
<
string
>
patterns
=
SplitString
(
pattern
,
","
);
result
.
clear
();
for
(
int
i
=
0
;
i
<
patterns
.
size
();
++
i
)
{
string
eachPattern
=
patterns
[
i
];
std
::
vector
<
string
>
eachResult
;
glob_rec
(
directory
,
eachPattern
,
eachResult
,
recursive
,
true
,
directory
);
for
(
int
j
=
0
;
j
<
eachResult
.
size
();
++
j
)
{
if
(
IsDirectory
(
eachResult
[
j
]))
continue
;
if
(
addPath
)
{
result
.
push_back
(
eachResult
[
j
]);
}
else
{
result
.
push_back
(
GetFileName
(
eachResult
[
j
]));
}
}
}
std
::
sort
(
result
.
begin
(),
result
.
end
());
}
void
GetFileNameList2
(
const
string
&
directory
,
const
string
&
pattern
,
std
::
vector
<
string
>&
result
,
bool
recursive
,
bool
addPath
)
{
// split pattern
vector
<
string
>
patterns
=
SplitString
(
pattern
,
","
);
result
.
clear
();
for
(
int
i
=
0
;
i
<
patterns
.
size
();
++
i
)
{
string
eachPattern
=
patterns
[
i
];
std
::
vector
<
string
>
eachResult
;
glob_rec
(
directory
,
eachPattern
,
eachResult
,
recursive
,
true
,
directory
);
for
(
int
j
=
0
;
j
<
eachResult
.
size
();
++
j
)
{
string
filePath
=
eachResult
[
j
];
if
(
IsDirectory
(
filePath
))
{
filePath
=
filePath
+
"/"
;
for
(
int
k
=
0
;
k
<
filePath
.
size
();
++
k
)
{
if
(
IsPathSeparator
(
filePath
[
k
]))
{
filePath
[
k
]
=
'/'
;
}
}
}
if
(
addPath
)
{
result
.
push_back
(
filePath
);
}
else
{
if
(
!
IsDirectory
(
filePath
))
{
result
.
push_back
(
GetFileName
(
filePath
));
}
}
}
}
std
::
sort
(
result
.
begin
(),
result
.
end
());
}
void
RemoveAll
(
const
string
&
path
)
{
if
(
!
Exists
(
path
))
return
;
if
(
IsDirectory
(
path
))
{
std
::
vector
<
string
>
entries
;
GetFileNameList2
(
path
,
string
(),
entries
,
false
,
true
);
for
(
size_t
i
=
0
;
i
<
entries
.
size
();
i
++
)
{
const
string
&
e
=
entries
[
i
];
RemoveAll
(
e
);
}
#ifdef _MSC_VER
bool
result
=
_rmdir
(
path
.
c_str
())
==
0
;
#else
bool
result
=
rmdir
(
path
.
c_str
())
==
0
;
#endif
if
(
!
result
)
{
LOG_INFO
(
stdout
,
"can't remove directory: %s
\n
"
,
path
.
c_str
());
}
}
else
{
#ifdef _MSC_VER
bool
result
=
_unlink
(
path
.
c_str
())
==
0
;
#else
bool
result
=
unlink
(
path
.
c_str
())
==
0
;
#endif
if
(
!
result
)
{
LOG_INFO
(
stdout
,
"can't remove file: %s
\n
"
,
path
.
c_str
());
}
}
}
void
Remove
(
const
string
&
directory
,
const
string
&
extension
)
{
DIR
*
dir
;
static
int
numberOfFiles
=
0
;
if
((
dir
=
opendir
(
directory
.
c_str
()))
!=
0
)
{
/* find all the files and directories within directory */
try
{
struct
dirent
*
ent
;
while
((
ent
=
readdir
(
dir
))
!=
0
)
{
const
char
*
name
=
ent
->
d_name
;
if
((
name
[
0
]
==
0
)
||
(
name
[
0
]
==
'.'
&&
name
[
1
]
==
0
)
||
(
name
[
0
]
==
'.'
&&
name
[
1
]
==
'.'
&&
name
[
2
]
==
0
))
continue
;
string
path
=
JoinPath
(
directory
,
name
);
if
(
isDir
(
path
,
dir
))
{
Remove
(
path
,
extension
);
}
// �ж���չ��
if
(
extension
.
empty
()
||
wildcmp
(
name
,
extension
.
c_str
()))
{
RemoveAll
(
path
);
++
numberOfFiles
;
LOG_INFO
(
stdout
,
"%s deleted! number of deleted files:%d
\n
"
,
path
.
c_str
(),
numberOfFiles
);
}
}
}
catch
(...)
{
closedir
(
dir
);
throw
;
}
closedir
(
dir
);
}
else
{
LOG_INFO
(
stdout
,
"could not open directory: %s"
,
directory
.
c_str
());
}
// ����RemoveAllɾ��Ŀ¼
RemoveAll
(
directory
);
}
string
GetFileName
(
const
string
&
path
)
{
string
fileName
;
int
indexOfPathSeparator
=
-
1
;
for
(
int
i
=
path
.
size
()
-
1
;
i
>=
0
;
--
i
)
{
if
(
IsPathSeparator
(
path
[
i
]))
{
fileName
=
path
.
substr
(
i
+
1
,
path
.
size
()
-
i
-
1
);
indexOfPathSeparator
=
i
;
break
;
}
}
if
(
indexOfPathSeparator
==
-
1
)
{
fileName
=
path
;
}
return
fileName
;
}
string
GetFileName_NoExtension
(
const
string
&
path
)
{
string
fileName
=
GetFileName
(
path
);
string
fileName_NoExtension
;
for
(
int
i
=
fileName
.
size
()
-
1
;
i
>
0
;
--
i
)
{
if
(
fileName
[
i
]
==
'.'
)
{
fileName_NoExtension
=
fileName
.
substr
(
0
,
i
);
break
;
}
}
return
fileName_NoExtension
;
}
string
GetExtension
(
const
string
&
path
)
{
string
fileName
;
for
(
int
i
=
path
.
size
()
-
1
;
i
>=
0
;
--
i
)
{
if
(
path
[
i
]
==
'.'
)
{
fileName
=
path
.
substr
(
i
,
path
.
size
()
-
i
);
break
;
}
}
return
fileName
;
}
string
GetParentPath
(
const
string
&
path
)
{
string
fileName
;
for
(
int
i
=
path
.
size
()
-
1
;
i
>=
0
;
--
i
)
{
if
(
IsPathSeparator
(
path
[
i
]))
{
fileName
=
path
.
substr
(
0
,
i
+
1
);
break
;
}
}
return
fileName
;
}
static
bool
CreateDirectory
(
const
string
&
path
)
{
#if defined WIN32 || defined _WIN32 || defined WINCE
#ifdef WINRT
wchar_t
wpath
[
MAX_PATH
];
size_t
copied
=
mbstowcs
(
wpath
,
path
.
c_str
(),
MAX_PATH
);
CV_Assert
((
copied
!=
MAX_PATH
)
&&
(
copied
!=
(
size_t
)
-
1
));
int
result
=
CreateDirectoryA
(
wpath
,
NULL
)
?
0
:
-
1
;
#else
int
result
=
_mkdir
(
path
.
c_str
());
#endif
#elif defined __linux__ || defined __APPLE__
int
result
=
mkdir
(
path
.
c_str
(),
0777
);
#else
int
result
=
-
1
;
#endif
if
(
result
==
-
1
)
{
return
IsDirectory
(
path
);
}
return
true
;
}
bool
CreateDirectories
(
const
string
&
directoryPath
)
{
string
path
=
directoryPath
;
for
(;;)
{
char
last_char
=
path
.
empty
()
?
0
:
path
[
path
.
length
()
-
1
];
if
(
IsPathSeparator
(
last_char
))
{
path
=
path
.
substr
(
0
,
path
.
length
()
-
1
);
continue
;
}
break
;
}
if
(
path
.
empty
()
||
path
==
"./"
||
path
==
".
\\
"
||
path
==
"."
)
return
true
;
if
(
IsDirectory
(
path
))
return
true
;
size_t
pos
=
path
.
rfind
(
'/'
);
if
(
pos
==
string
::
npos
)
pos
=
path
.
rfind
(
'\\'
);
if
(
pos
!=
string
::
npos
)
{
string
parent_directory
=
path
.
substr
(
0
,
pos
);
if
(
!
parent_directory
.
empty
())
{
if
(
!
CreateDirectories
(
parent_directory
))
return
false
;
}
}
return
CreateDirectory
(
path
);
}
bool
CopyFile
(
const
string
srcPath
,
const
string
dstPath
)
{
std
::
ifstream
srcFile
(
srcPath
,
ios
::
binary
);
std
::
ofstream
dstFile
(
dstPath
,
ios
::
binary
);
if
(
!
srcFile
.
is_open
())
{
LOG_ERROR
(
stdout
,
"can not open %s
\n
"
,
srcPath
.
c_str
());
return
false
;
}
if
(
!
dstFile
.
is_open
())
{
LOG_ERROR
(
stdout
,
"can not open %s
\n
"
,
dstPath
.
c_str
());
return
false
;
}
if
(
srcPath
==
dstPath
)
{
LOG_ERROR
(
stdout
,
"src can not be same with dst
\n
"
);
return
false
;
}
char
buffer
[
2048
];
unsigned
int
numberOfBytes
=
0
;
while
(
srcFile
)
{
srcFile
.
read
(
buffer
,
2048
);
dstFile
.
write
(
buffer
,
srcFile
.
gcount
());
numberOfBytes
+=
srcFile
.
gcount
();
}
srcFile
.
close
();
dstFile
.
close
();
return
true
;
}
bool
CopyDirectories
(
string
srcPath
,
const
string
dstPath
)
{
if
(
srcPath
==
dstPath
)
{
LOG_ERROR
(
stdout
,
"src can not be same with dst
\n
"
);
return
false
;
}
// ȥ������·���ָ���
srcPath
=
srcPath
.
substr
(
0
,
srcPath
.
size
()
-
1
);
vector
<
string
>
fileNameList
;
GetFileNameList2
(
srcPath
,
""
,
fileNameList
,
true
,
true
);
string
parentPathOfSrc
=
GetParentPath
(
srcPath
);
int
length
=
parentPathOfSrc
.
size
();
// create all directories
for
(
int
i
=
0
;
i
<
fileNameList
.
size
();
++
i
)
{
// create directory
string
srcFilePath
=
fileNameList
[
i
];
string
subStr
=
srcFilePath
.
substr
(
length
,
srcFilePath
.
size
()
-
length
);
string
dstFilePath
=
dstPath
+
subStr
;
string
parentPathOfDst
=
GetParentPath
(
dstFilePath
);
CreateDirectories
(
parentPathOfDst
);
}
// copy file
for
(
int
i
=
0
;
i
<
fileNameList
.
size
();
++
i
)
{
string
srcFilePath
=
fileNameList
[
i
];
if
(
IsDirectory
(
srcFilePath
))
{
continue
;
}
string
subStr
=
srcFilePath
.
substr
(
length
,
srcFilePath
.
size
()
-
length
);
string
dstFilePath
=
dstPath
+
subStr
;
// copy file
CopyFile
(
srcFilePath
,
dstFilePath
);
// process
double
process
=
(
1.0
*
(
i
+
1
)
/
fileNameList
.
size
())
*
100
;
LOG_INFO
(
stdout
,
"%s done! %f%
\n
"
,
GetFileName
(
fileNameList
[
i
]).
c_str
(),
process
);
}
LOG_INFO
(
stdout
,
"all done!(the number of files:%d)
\n
"
,
fileNameList
.
size
());
return
true
;
}
}
Src/Utility/Filesystem.h
deleted
100644 → 0
View file @
9cffb74e
// 文件以及目录处理
#ifndef __FILE_SYSTEM_H__
#define __FILE_SYSTEM_H__
#include <vector>
#include <string>
using
namespace
std
;
namespace
migraphxSamples
{
// 路径是否存在
bool
Exists
(
const
std
::
string
&
path
);
// 路径是否为目录
bool
IsDirectory
(
const
std
::
string
&
path
);
// 是否是路径分隔符(Linux:‘/’,Windows:’\\’)
bool
IsPathSeparator
(
char
c
);
// 路径拼接
string
JoinPath
(
const
std
::
string
&
base
,
const
std
::
string
&
path
);
// 创建多级目录,注意:创建多级目录的时候,目标目录是不能有文件存在的
bool
CreateDirectories
(
const
std
::
string
&
directoryPath
);
/** 生成符合指定模式的文件名列表(支持递归遍历)
*
* pattern: 模式,比如"*.jpg","*.png","*.jpg,*.png"
* addPath:是否包含父路径
* 注意:
1. 多个模式使用","分割,比如"*.jpg,*.png"
2. 支持通配符'*','?' ,比如第一个字符是7的所有文件名:"7*.*", 以512结尾的所有jpg文件名:"*512.jpg"
3. 使用"*.jpg",而不是".jpg"
4. 空string表示返回所有结果
5. 不能返回子目录名
*
*/
void
GetFileNameList
(
const
std
::
string
&
directory
,
const
std
::
string
&
pattern
,
std
::
vector
<
std
::
string
>
&
result
,
bool
recursive
,
bool
addPath
);
// 与GetFileNameList的区别在于如果有子目录,在addPath为true的时候会返回子目录路径(目录名最后有"/")
void
GetFileNameList2
(
const
std
::
string
&
directory
,
const
std
::
string
&
pattern
,
std
::
vector
<
std
::
string
>
&
result
,
bool
recursive
,
bool
addPath
);
// 删除文件或者目录,支持递归删除
void
Remove
(
const
std
::
string
&
directory
,
const
std
::
string
&
extension
=
""
);
/** 获取路径的文件名和扩展名
*
* 示例:path为D:/1/1.txt,则GetFileName()为1.txt,GetFileName_NoExtension()为1,GetExtension()为.txt,GetParentPath()为D:/1/
*/
string
GetFileName
(
const
std
::
string
&
path
);
// 1.txt
string
GetFileName_NoExtension
(
const
std
::
string
&
path
);
// 1
string
GetExtension
(
const
std
::
string
&
path
);
// .txt
string
GetParentPath
(
const
std
::
string
&
path
);
// D:/1/
// 拷贝文件:CopyFile("D:/1.txt","D:/2.txt");将1.txt拷贝为2.txt
bool
CopyFile
(
const
std
::
string
srcPath
,
const
std
::
string
dstPath
);
/** 拷贝目录
*
* 示例:CopyDirectories("D:/0/1/2/","E:/3/");实现把D:/0/1/2/目录拷贝到E:/3/目录中(即拷贝完成后的目录结构为E:/3/2/)
* 注意:
1.第一个参数的最后不能加”/”
2.不能拷贝隐藏文件
*/
bool
CopyDirectories
(
std
::
string
srcPath
,
const
std
::
string
dstPath
);
}
#endif
Src/
NLP/GPT2
/tokenization.cpp
→
Src/
Utility
/tokenization.cpp
View file @
c99ee00b
File moved
Src/
NLP/GPT2
/tokenization.h
→
Src/
Utility
/tokenization.h
View file @
c99ee00b
File moved
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment