Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
fae6c92e
Unverified
Commit
fae6c92e
authored
Sep 05, 2023
by
Hongxin Liu
Committed by
GitHub
Sep 05, 2023
Browse files
Merge branch 'main' into feature/shardformer
parents
bd186784
ac178ca5
Changes
113
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
86 additions
and
97 deletions
+86
-97
docs/source/en/advanced_tutorials/train_vit_with_hybrid_parallelism.md
...n/advanced_tutorials/train_vit_with_hybrid_parallelism.md
+7
-6
docs/source/en/basics/engine_trainer.md
docs/source/en/basics/engine_trainer.md
+4
-3
docs/source/en/basics/model_checkpoint.md
docs/source/en/basics/model_checkpoint.md
+2
-1
docs/source/en/features/gradient_handler.md
docs/source/en/features/gradient_handler.md
+3
-2
docs/source/en/features/mixed_precision_training.md
docs/source/en/features/mixed_precision_training.md
+1
-1
docs/source/en/features/pipeline_parallel.md
docs/source/en/features/pipeline_parallel.md
+2
-1
docs/source/zh-Hans/advanced_tutorials/add_your_parallel.md
docs/source/zh-Hans/advanced_tutorials/add_your_parallel.md
+5
-4
docs/source/zh-Hans/advanced_tutorials/train_gpt_using_hybrid_parallelism.md
.../advanced_tutorials/train_gpt_using_hybrid_parallelism.md
+4
-3
docs/source/zh-Hans/advanced_tutorials/train_vit_using_pipeline_parallelism.md
...dvanced_tutorials/train_vit_using_pipeline_parallelism.md
+9
-8
docs/source/zh-Hans/advanced_tutorials/train_vit_with_hybrid_parallelism.md
...s/advanced_tutorials/train_vit_with_hybrid_parallelism.md
+7
-6
docs/source/zh-Hans/basics/engine_trainer.md
docs/source/zh-Hans/basics/engine_trainer.md
+4
-3
docs/source/zh-Hans/basics/model_checkpoint.md
docs/source/zh-Hans/basics/model_checkpoint.md
+2
-1
docs/source/zh-Hans/features/gradient_handler.md
docs/source/zh-Hans/features/gradient_handler.md
+3
-2
docs/source/zh-Hans/features/mixed_precision_training.md
docs/source/zh-Hans/features/mixed_precision_training.md
+1
-1
docs/source/zh-Hans/features/pipeline_parallel.md
docs/source/zh-Hans/features/pipeline_parallel.md
+2
-1
examples/language/gpt/titans/dataset/webtext.py
examples/language/gpt/titans/dataset/webtext.py
+1
-1
examples/language/gpt/titans/model/embed.py
examples/language/gpt/titans/model/embed.py
+1
-1
examples/language/gpt/titans/train_gpt.py
examples/language/gpt/titans/train_gpt.py
+1
-1
examples/tutorial/sequence_parallel/data/datasets/indexed_dataset.py
...torial/sequence_parallel/data/datasets/indexed_dataset.py
+26
-51
examples/tutorial/sequence_parallel/requirements.txt
examples/tutorial/sequence_parallel/requirements.txt
+1
-0
No files found.
docs/source/en/advanced_tutorials/train_vit_with_hybrid_parallelism.md
View file @
fae6c92e
...
@@ -79,7 +79,7 @@ from colossalai.core import global_context as gpc
...
@@ -79,7 +79,7 @@ from colossalai.core import global_context as gpc
from
colossalai.logging
import
disable_existing_loggers
,
get_dist_logger
from
colossalai.logging
import
disable_existing_loggers
,
get_dist_logger
from
colossalai.nn.lr_scheduler
import
LinearWarmupLR
from
colossalai.nn.lr_scheduler
import
LinearWarmupLR
from
colossalai.nn.metric
import
Accuracy
from
colossalai.nn.metric
import
Accuracy
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.
legacy.
trainer
import
Trainer
,
hooks
```
```
-
Other modules
-
Other modules
...
@@ -273,8 +273,8 @@ SEQ_LENGTH = (IMG_SIZE // PATCH_SIZE) ** 2 + 1 # add 1 for cls token
...
@@ -273,8 +273,8 @@ SEQ_LENGTH = (IMG_SIZE // PATCH_SIZE) ** 2 + 1 # add 1 for cls token
### Build pipeline model (`/hybrid_parallel/model/vit.py`)
### Build pipeline model (`/hybrid_parallel/model/vit.py`)
Colossal-AI provides two methods to build a pipeline model from the existing model.
Colossal-AI provides two methods to build a pipeline model from the existing model.
-
`colossalai.builder.build_pipeline_model_from_cfg`
-
`colossalai.
legacy.
builder.build_pipeline_model_from_cfg`
-
`colossalai.builder.build_pipeline_model`
-
`colossalai.
legacy.
builder.build_pipeline_model`
Besides, you can also build a pipeline model from scratch with Colossal-AI.
Besides, you can also build a pipeline model from scratch with Colossal-AI.
```
python
```
python
...
@@ -284,11 +284,11 @@ from typing import Callable
...
@@ -284,11 +284,11 @@ from typing import Callable
import
inspect
import
inspect
import
torch
import
torch
from
colossalai
import
nn
as
col_nn
from
colossalai
import
nn
as
col_nn
from
colossalai.registry
import
LAYERS
,
MODELS
from
colossalai.
legacy.
registry
import
LAYERS
,
MODELS
from
colossalai.logging
import
get_dist_logger
from
colossalai.logging
import
get_dist_logger
from
colossalai.core
import
global_context
as
gpc
from
colossalai.core
import
global_context
as
gpc
from
colossalai.context
import
ParallelMode
from
colossalai.context
import
ParallelMode
from
colossalai.builder.pipeline
import
partition_uniform
from
colossalai.
legacy.
builder.pipeline
import
partition_uniform
from
torch
import
dtype
,
nn
from
torch
import
dtype
,
nn
from
model_zoo.vit.vit
import
ViTBlock
,
ViTEmbedding
,
ViTHead
from
model_zoo.vit.vit
import
ViTBlock
,
ViTEmbedding
,
ViTHead
...
@@ -415,7 +415,7 @@ def build_pipeline_vit(num_layers, num_chunks, device=torch.device('cuda'), **kw
...
@@ -415,7 +415,7 @@ def build_pipeline_vit(num_layers, num_chunks, device=torch.device('cuda'), **kw
#### Import modules
#### Import modules
```
python
```
python
from
colossalai.engine.schedule
import
(
InterleavedPipelineSchedule
,
from
colossalai.
legacy.
engine.schedule
import
(
InterleavedPipelineSchedule
,
PipelineSchedule
)
PipelineSchedule
)
from
colossalai.utils
import
MultiTimer
from
colossalai.utils
import
MultiTimer
import
os
import
os
...
@@ -644,3 +644,4 @@ torchrun --standalone --nproc_per_node <NUM_GPUs> train_hybrid.py --config ./co
...
@@ -644,3 +644,4 @@ torchrun --standalone --nproc_per_node <NUM_GPUs> train_hybrid.py --config ./co
# If your torch >= 1.9.0
# If your torch >= 1.9.0
# python -m torch.distributed.run --standalone --nproc_per_node= <NUM_GPUs> train_hybrid.py --config ./configs/config_hybrid_parallel.py
# python -m torch.distributed.run --standalone --nproc_per_node= <NUM_GPUs> train_hybrid.py --config ./configs/config_hybrid_parallel.py
```
```
<!-- doc-test-command: echo -->
docs/source/en/basics/engine_trainer.md
View file @
fae6c92e
...
@@ -64,7 +64,7 @@ Trainer is a more high-level wrapper for the user to execute training with fewer
...
@@ -64,7 +64,7 @@ Trainer is a more high-level wrapper for the user to execute training with fewer
```
python
```
python
from
colossalai.logging
import
get_dist_logger
from
colossalai.logging
import
get_dist_logger
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.
legacy.
trainer
import
Trainer
,
hooks
# build components and initialize with colossalai.initialize
# build components and initialize with colossalai.initialize
...
...
...
@@ -107,7 +107,7 @@ If you want to customize your own hook class, you can inherit `hooks.BaseHook` a
...
@@ -107,7 +107,7 @@ If you want to customize your own hook class, you can inherit `hooks.BaseHook` a
```
python
```
python
from
colossalai.logging
import
get_dist_logger
from
colossalai.logging
import
get_dist_logger
from
colossalai.trainer
import
hooks
from
colossalai.
legacy.
trainer
import
hooks
class
LogMessageHook
(
hooks
.
BaseHook
):
class
LogMessageHook
(
hooks
.
BaseHook
):
...
@@ -345,7 +345,7 @@ If you wish to train with a trainer object, you can follow the code snippet belo
...
@@ -345,7 +345,7 @@ If you wish to train with a trainer object, you can follow the code snippet belo
```
python
```
python
from
colossalai.nn.metric
import
Accuracy
from
colossalai.nn.metric
import
Accuracy
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.
legacy.
trainer
import
Trainer
,
hooks
# create a trainer object
# create a trainer object
...
@@ -387,3 +387,4 @@ python -m torch.distributed.launch --nproc_per_node <num_gpus> --master_addr loc
...
@@ -387,3 +387,4 @@ python -m torch.distributed.launch --nproc_per_node <num_gpus> --master_addr loc
# with trainer
# with trainer
python
-m
torch.distributed.launch
--nproc_per_node
<num_gpus>
--master_addr
localhost
--master_port
29500 run_resnet_cifar10_with_trainer.py
python
-m
torch.distributed.launch
--nproc_per_node
<num_gpus>
--master_addr
localhost
--master_port
29500 run_resnet_cifar10_with_trainer.py
```
```
<!-- doc-test-command: echo -->
docs/source/en/basics/model_checkpoint.md
View file @
fae6c92e
...
@@ -41,7 +41,7 @@ for epoch in range(num_epochs):
...
@@ -41,7 +41,7 @@ for epoch in range(num_epochs):
#### Save when using trainer
#### Save when using trainer
```
python
```
python
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.
legacy.
trainer
import
Trainer
,
hooks
model
=
...
model
=
...
engine
,
_
,
_
,
_
=
colossalai
.
initialize
(
model
=
model
,
...)
engine
,
_
,
_
,
_
=
colossalai
.
initialize
(
model
=
model
,
...)
trainer
=
Trainer
(
engine
,
...)
trainer
=
Trainer
(
engine
,
...)
...
@@ -61,3 +61,4 @@ model = ...
...
@@ -61,3 +61,4 @@ model = ...
load_checkpoint
(
'xxx.pt'
,
model
)
load_checkpoint
(
'xxx.pt'
,
model
)
...
# train or test
...
# train or test
```
```
<!-- doc-test-command: echo -->
docs/source/en/features/gradient_handler.md
View file @
fae6c92e
...
@@ -28,8 +28,8 @@ To implement a customized gradient handler, you need to follow these steps.
...
@@ -28,8 +28,8 @@ To implement a customized gradient handler, you need to follow these steps.
3.
implement
`handle_gradient`
method.
3.
implement
`handle_gradient`
method.
```
python
```
python
from
colossalai.registry
import
GRADIENT_HANDLER
from
colossalai.
legacy.
registry
import
GRADIENT_HANDLER
from
colossalai.engine.gradient_handler
import
BaseGradientHandler
from
colossalai.
legacy.
engine.gradient_handler
import
BaseGradientHandler
@
GRADIENT_HANDLER
.
register_module
@
GRADIENT_HANDLER
.
register_module
...
@@ -61,3 +61,4 @@ to demonstrate the use of gradient handler. In this example, we used `DataParall
...
@@ -61,3 +61,4 @@ to demonstrate the use of gradient handler. In this example, we used `DataParall
```
shell
```
shell
python
-m
torch.distributed.launch
--nproc_per_node
4
--master_addr
localhost
--master_port
29500 train_with_engine.py
python
-m
torch.distributed.launch
--nproc_per_node
4
--master_addr
localhost
--master_port
29500 train_with_engine.py
```
```
<!-- doc-test-command: echo -->
docs/source/en/features/mixed_precision_training.md
View file @
fae6c92e
...
@@ -267,7 +267,7 @@ from pathlib import Path
...
@@ -267,7 +267,7 @@ from pathlib import Path
from
colossalai.core
import
global_context
as
gpc
from
colossalai.core
import
global_context
as
gpc
from
colossalai.logging
import
get_dist_logger
from
colossalai.logging
import
get_dist_logger
from
colossalai.utils
import
get_dataloader
from
colossalai.utils
import
get_dataloader
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.
legacy.
trainer
import
Trainer
,
hooks
from
colossalai.nn.lr_scheduler
import
LinearWarmupLR
from
colossalai.nn.lr_scheduler
import
LinearWarmupLR
from
timm.models
import
vit_base_patch16_224
from
timm.models
import
vit_base_patch16_224
from
torchvision
import
datasets
,
transforms
from
torchvision
import
datasets
,
transforms
...
...
docs/source/en/features/pipeline_parallel.md
View file @
fae6c92e
...
@@ -79,7 +79,7 @@ import colossalai.nn as col_nn
...
@@ -79,7 +79,7 @@ import colossalai.nn as col_nn
from
colossalai.core
import
global_context
as
gpc
from
colossalai.core
import
global_context
as
gpc
from
colossalai.logging
import
disable_existing_loggers
,
get_dist_logger
from
colossalai.logging
import
disable_existing_loggers
,
get_dist_logger
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.
legacy.
trainer
import
Trainer
,
hooks
from
colossalai.utils
import
MultiTimer
,
get_dataloader
from
colossalai.utils
import
MultiTimer
,
get_dataloader
from
colossalai.context
import
ParallelMode
from
colossalai.context
import
ParallelMode
from
colossalai.pipeline.pipelinable
import
PipelinableContext
from
colossalai.pipeline.pipelinable
import
PipelinableContext
...
@@ -157,3 +157,4 @@ trainer.fit(train_dataloader=train_dataloader,
...
@@ -157,3 +157,4 @@ trainer.fit(train_dataloader=train_dataloader,
```
```
We use
`2`
pipeline stages and the batch will be split into
`4`
micro batches.
We use
`2`
pipeline stages and the batch will be split into
`4`
micro batches.
<!-- doc-test-command: echo -->
docs/source/zh-Hans/advanced_tutorials/add_your_parallel.md
View file @
fae6c92e
...
@@ -81,14 +81,14 @@ Colossal-AI 为用户提供了一个全局 context,使他们能够轻松地管
...
@@ -81,14 +81,14 @@ Colossal-AI 为用户提供了一个全局 context,使他们能够轻松地管
## 梯度 Handler
## 梯度 Handler
梯度 handler 是对参数的梯度执行 all-reduce 操作的对象。由于不同的 all-reduce 策略或许在不同的并行中被执行,用户可以继承
梯度 handler 是对参数的梯度执行 all-reduce 操作的对象。由于不同的 all-reduce 策略或许在不同的并行中被执行,用户可以继承
`colossalai.engine.gradient_handler.BaseGradientHandler`
来实现其策略。目前,Colossal-AI 使用普通的数据并行梯度 handler 在数据并行的 rank 间 all-reduce 梯度。
`colossalai.
legacy.
engine.gradient_handler.BaseGradientHandler`
来实现其策略。目前,Colossal-AI 使用普通的数据并行梯度 handler 在数据并行的 rank 间 all-reduce 梯度。
如果数据并行被检测到,梯度 handler 会被自动添加进 engine。
如果数据并行被检测到,梯度 handler 会被自动添加进 engine。
你可以添加你自己的梯度 handler,如下所示:
你可以添加你自己的梯度 handler,如下所示:
```
python
```
python
from
colossalai.registry
import
GRADIENT_HANDLER
from
colossalai.
legacy.
registry
import
GRADIENT_HANDLER
from
colossalai.engine
import
BaseGradientHandler
from
colossalai.
legacy.
engine
import
BaseGradientHandler
@
GRADIENT_HANDLER
.
register_module
@
GRADIENT_HANDLER
.
register_module
class
YourGradientHandler
(
BaseGradientHandler
):
class
YourGradientHandler
(
BaseGradientHandler
):
...
@@ -109,4 +109,5 @@ gradient_handlers = [
...
@@ -109,4 +109,5 @@ gradient_handlers = [
## Schedule
## Schedule
Schedule 包含了如何执行前向和后向计算。目前, Colossal-AI 提供了流水和非流水的 schedule。
Schedule 包含了如何执行前向和后向计算。目前, Colossal-AI 提供了流水和非流水的 schedule。
如果你想修改前向和后向计算的执行方式,你可以继承
`colossalai.engine.schedule.BaseSchedule`
并实现
`forward_back_step`
函数。
如果你想修改前向和后向计算的执行方式,你可以继承
`colossalai.legacy.engine.schedule.BaseSchedule`
并实现
`forward_back_step`
函数。
<!-- doc-test-command: echo -->
docs/source/zh-Hans/advanced_tutorials/train_gpt_using_hybrid_parallelism.md
View file @
fae6c92e
...
@@ -36,14 +36,14 @@ import torch
...
@@ -36,14 +36,14 @@ import torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
colossalai
import
nn
as
col_nn
from
colossalai
import
nn
as
col_nn
from
colossalai.amp
import
AMP_TYPE
from
colossalai.amp
import
AMP_TYPE
from
colossalai.builder.pipeline
import
partition_uniform
from
colossalai.
legacy.
builder.pipeline
import
partition_uniform
from
colossalai.context.parallel_mode
import
ParallelMode
from
colossalai.context.parallel_mode
import
ParallelMode
from
colossalai.core
import
global_context
as
gpc
from
colossalai.core
import
global_context
as
gpc
from
colossalai.engine.schedule
import
(
InterleavedPipelineSchedule
,
from
colossalai.
legacy.
engine.schedule
import
(
InterleavedPipelineSchedule
,
PipelineSchedule
)
PipelineSchedule
)
from
colossalai.logging
import
disable_existing_loggers
,
get_dist_logger
from
colossalai.logging
import
disable_existing_loggers
,
get_dist_logger
from
colossalai.nn.layer.wrapper
import
PipelineSharedModuleWrapper
from
colossalai.nn.layer.wrapper
import
PipelineSharedModuleWrapper
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.
legacy.
trainer
import
Trainer
,
hooks
from
colossalai.utils.timer
import
MultiTimer
from
colossalai.utils.timer
import
MultiTimer
from
model_zoo.gpt
import
GPTLMLoss
from
model_zoo.gpt
import
GPTLMLoss
from
torch.nn
import
functional
as
F
from
torch.nn
import
functional
as
F
...
@@ -273,3 +273,4 @@ def train():
...
@@ -273,3 +273,4 @@ def train():
return_output_label
=
False
,
return_output_label
=
False
,
)
)
```
```
<!-- doc-test-command: echo -->
docs/source/zh-Hans/advanced_tutorials/train_vit_using_pipeline_parallelism.md
View file @
fae6c92e
...
@@ -32,11 +32,11 @@ import colossalai
...
@@ -32,11 +32,11 @@ import colossalai
import
colossalai.nn
as
col_nn
import
colossalai.nn
as
col_nn
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
colossalai.builder
import
build_pipeline_model
from
colossalai.
legacy.
builder
import
build_pipeline_model
from
colossalai.engine.schedule
import
(
InterleavedPipelineSchedule
,
from
colossalai.
legacy.
engine.schedule
import
(
InterleavedPipelineSchedule
,
PipelineSchedule
)
PipelineSchedule
)
from
colossalai.logging
import
disable_existing_loggers
,
get_dist_logger
from
colossalai.logging
import
disable_existing_loggers
,
get_dist_logger
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.
legacy.
trainer
import
Trainer
,
hooks
from
colossalai.utils
import
MultiTimer
,
get_dataloader
from
colossalai.utils
import
MultiTimer
,
get_dataloader
from
timm.models
import
vision_transformer
as
vit
from
timm.models
import
vision_transformer
as
vit
from
torchvision
import
transforms
from
torchvision
import
transforms
...
@@ -48,17 +48,17 @@ from torchvision.datasets import CIFAR10
...
@@ -48,17 +48,17 @@ from torchvision.datasets import CIFAR10
总的来说, 我们提供3种方法来建立一个流水并行的模型:
总的来说, 我们提供3种方法来建立一个流水并行的模型:
1.
`colossalai.builder.build_pipeline_model_from_cfg`
1.
`colossalai.
legacy.
builder.build_pipeline_model_from_cfg`
2.
`colossalai.builder.build_pipeline_model`
2.
`colossalai.
legacy.
builder.build_pipeline_model`
3.
自己按阶段拆分模型
3.
自己按阶段拆分模型
当你的内存能够容纳模型时,你可以使用前两种方法来建立你的模型,否则你必须自己分割模型。前两种方法首先在 CPU 上建立整个模型,然后分割模型,最后你可以直接把模型的相应部分移到 GPU 上。
当你的内存能够容纳模型时,你可以使用前两种方法来建立你的模型,否则你必须自己分割模型。前两种方法首先在 CPU 上建立整个模型,然后分割模型,最后你可以直接把模型的相应部分移到 GPU 上。
`colossalai.builder.build_pipeline_model_from_cfg()`
接收一个模型的配置文件,它可以均匀地(按层)或平衡地(按参数大小)分割模型。
`colossalai.
legacy.
builder.build_pipeline_model_from_cfg()`
接收一个模型的配置文件,它可以均匀地(按层)或平衡地(按参数大小)分割模型。
如果你熟悉
`PyTorch`
, 你可以使用
`colossalai.builder.build_pipeline_model()`
它接收一个
`torch.nn.Sequential`
模型并按层均匀分割。
如果你熟悉
`PyTorch`
, 你可以使用
`colossalai.
legacy.
builder.build_pipeline_model()`
它接收一个
`torch.nn.Sequential`
模型并按层均匀分割。
在本教程中,我们将修改
[
TIMM/ViT
](
https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
)
to
`torch.nn.Sequential`
,然后使用
`colossalai.builder.build_pipeline_model()`
来建立流水线模型。
在本教程中,我们将修改
[
TIMM/ViT
](
https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
)
to
`torch.nn.Sequential`
,然后使用
`colossalai.
legacy.
builder.build_pipeline_model()`
来建立流水线模型。
当数据是
**一个**
`Tensor`
, 你可以使用你的模型
`forward()`
中的位置参数来获得数据张量。对于流水线的第一阶段,
`forward()`
的第一个位置参数是从数据加载器加载的数据张量。对于其他阶段,
`forward()`
的第一个位置参数是上一阶段的输出张量。注意,如果该阶段不是最后一个阶段,则
`forward()`
的返回必须是一个
`Tensor`
。
当数据是
**一个**
`Tensor`
, 你可以使用你的模型
`forward()`
中的位置参数来获得数据张量。对于流水线的第一阶段,
`forward()`
的第一个位置参数是从数据加载器加载的数据张量。对于其他阶段,
`forward()`
的第一个位置参数是上一阶段的输出张量。注意,如果该阶段不是最后一个阶段,则
`forward()`
的返回必须是一个
`Tensor`
。
...
@@ -244,3 +244,4 @@ def train():
...
@@ -244,3 +244,4 @@ def train():
hooks
=
hook_list
,
hooks
=
hook_list
,
display_progress
=
True
)
display_progress
=
True
)
```
```
<!-- doc-test-command: echo -->
docs/source/zh-Hans/advanced_tutorials/train_vit_with_hybrid_parallelism.md
View file @
fae6c92e
...
@@ -74,7 +74,7 @@ from colossalai.core import global_context as gpc
...
@@ -74,7 +74,7 @@ from colossalai.core import global_context as gpc
from
colossalai.logging
import
disable_existing_loggers
,
get_dist_logger
from
colossalai.logging
import
disable_existing_loggers
,
get_dist_logger
from
colossalai.nn.lr_scheduler
import
LinearWarmupLR
from
colossalai.nn.lr_scheduler
import
LinearWarmupLR
from
colossalai.nn.metric
import
Accuracy
from
colossalai.nn.metric
import
Accuracy
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.
legacy.
trainer
import
Trainer
,
hooks
```
```
-
其他模块
-
其他模块
...
@@ -256,8 +256,8 @@ SEQ_LENGTH = (IMG_SIZE // PATCH_SIZE) ** 2 + 1 # add 1 for cls token
...
@@ -256,8 +256,8 @@ SEQ_LENGTH = (IMG_SIZE // PATCH_SIZE) ** 2 + 1 # add 1 for cls token
### 构建流水线模型 (`/hybrid_parallel/model/vit.py`)
### 构建流水线模型 (`/hybrid_parallel/model/vit.py`)
Colossal-AI 提供了两种从现有模型构建流水线模型的方法。
Colossal-AI 提供了两种从现有模型构建流水线模型的方法。
-
`colossalai.builder.build_pipeline_model_from_cfg`
-
`colossalai.
legacy.
builder.build_pipeline_model_from_cfg`
-
`colossalai.builder.build_pipeline_model`
-
`colossalai.
legacy.
builder.build_pipeline_model`
此外,您还可以使用 Colossal-AI 从头开始构建流水线模型。
此外,您还可以使用 Colossal-AI 从头开始构建流水线模型。
```
python
```
python
...
@@ -266,11 +266,11 @@ from typing import Callable
...
@@ -266,11 +266,11 @@ from typing import Callable
import
inspect
import
inspect
import
torch
import
torch
from
colossalai
import
nn
as
col_nn
from
colossalai
import
nn
as
col_nn
from
colossalai.registry
import
LAYERS
,
MODELS
from
colossalai.
legacy.
registry
import
LAYERS
,
MODELS
from
colossalai.logging
import
get_dist_logger
from
colossalai.logging
import
get_dist_logger
from
colossalai.core
import
global_context
as
gpc
from
colossalai.core
import
global_context
as
gpc
from
colossalai.context
import
ParallelMode
from
colossalai.context
import
ParallelMode
from
colossalai.builder.pipeline
import
partition_uniform
from
colossalai.
legacy.
builder.pipeline
import
partition_uniform
from
torch
import
dtype
,
nn
from
torch
import
dtype
,
nn
from
model_zoo.vit.vit
import
ViTBlock
,
ViTEmbedding
,
ViTHead
from
model_zoo.vit.vit
import
ViTBlock
,
ViTEmbedding
,
ViTHead
@
MODELS
.
register_module
@
MODELS
.
register_module
...
@@ -380,7 +380,7 @@ def build_pipeline_vit(num_layers, num_chunks, device=torch.device('cuda'), **kw
...
@@ -380,7 +380,7 @@ def build_pipeline_vit(num_layers, num_chunks, device=torch.device('cuda'), **kw
#### 导入模块
#### 导入模块
```
python
```
python
from
colossalai.engine.schedule
import
(
InterleavedPipelineSchedule
,
from
colossalai.
legacy.
engine.schedule
import
(
InterleavedPipelineSchedule
,
PipelineSchedule
)
PipelineSchedule
)
from
colossalai.utils
import
MultiTimer
from
colossalai.utils
import
MultiTimer
import
os
import
os
...
@@ -589,3 +589,4 @@ torchrun --standalone --nproc_per_node <NUM_GPUs> train_hybrid.py --config ./co
...
@@ -589,3 +589,4 @@ torchrun --standalone --nproc_per_node <NUM_GPUs> train_hybrid.py --config ./co
# If your torch >= 1.9.0
# If your torch >= 1.9.0
# python -m torch.distributed.run --standalone --nproc_per_node= <NUM_GPUs> train_hybrid.py --config ./configs/config_hybrid_parallel.py
# python -m torch.distributed.run --standalone --nproc_per_node= <NUM_GPUs> train_hybrid.py --config ./configs/config_hybrid_parallel.py
```
```
<!-- doc-test-command: echo -->
docs/source/zh-Hans/basics/engine_trainer.md
View file @
fae6c92e
...
@@ -61,7 +61,7 @@ Trainer 的参数 `schedule` 默认值是 `None` 。在大多数情况下,除
...
@@ -61,7 +61,7 @@ Trainer 的参数 `schedule` 默认值是 `None` 。在大多数情况下,除
```
python
```
python
from
colossalai.logging
import
get_dist_logger
from
colossalai.logging
import
get_dist_logger
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.
legacy.
trainer
import
Trainer
,
hooks
# build components and initialize with colossalai.initialize
# build components and initialize with colossalai.initialize
...
...
...
@@ -104,7 +104,7 @@ trainer.fit(
...
@@ -104,7 +104,7 @@ trainer.fit(
```
python
```
python
from
colossalai.logging
import
get_dist_logger
from
colossalai.logging
import
get_dist_logger
from
colossalai.trainer
import
hooks
from
colossalai.
legacy.
trainer
import
hooks
class
LogMessageHook
(
hooks
.
BaseHook
):
class
LogMessageHook
(
hooks
.
BaseHook
):
...
@@ -341,7 +341,7 @@ for epoch in range(gpc.config.NUM_EPOCHS):
...
@@ -341,7 +341,7 @@ for epoch in range(gpc.config.NUM_EPOCHS):
```
python
```
python
from
colossalai.nn.metric
import
Accuracy
from
colossalai.nn.metric
import
Accuracy
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.
legacy.
trainer
import
Trainer
,
hooks
# create a trainer object
# create a trainer object
...
@@ -384,3 +384,4 @@ python -m torch.distributed.launch --nproc_per_node <num_gpus> --master_addr loc
...
@@ -384,3 +384,4 @@ python -m torch.distributed.launch --nproc_per_node <num_gpus> --master_addr loc
# with trainer
# with trainer
python
-m
torch.distributed.launch
--nproc_per_node
<num_gpus>
--master_addr
localhost
--master_port
29500 run_resnet_cifar10_with_trainer.py
python
-m
torch.distributed.launch
--nproc_per_node
<num_gpus>
--master_addr
localhost
--master_port
29500 run_resnet_cifar10_with_trainer.py
```
```
<!-- doc-test-command: echo -->
docs/source/zh-Hans/basics/model_checkpoint.md
View file @
fae6c92e
...
@@ -41,7 +41,7 @@ for epoch in range(num_epochs):
...
@@ -41,7 +41,7 @@ for epoch in range(num_epochs):
#### 用 trainer 保存
#### 用 trainer 保存
```
python
```
python
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.
legacy.
trainer
import
Trainer
,
hooks
model
=
...
model
=
...
engine
,
_
,
_
,
_
=
colossalai
.
initialize
(
model
=
model
,
...)
engine
,
_
,
_
,
_
=
colossalai
.
initialize
(
model
=
model
,
...)
trainer
=
Trainer
(
engine
,
...)
trainer
=
Trainer
(
engine
,
...)
...
@@ -61,3 +61,4 @@ model = ...
...
@@ -61,3 +61,4 @@ model = ...
load_checkpoint
(
'xxx.pt'
,
model
)
load_checkpoint
(
'xxx.pt'
,
model
)
...
# train or test
...
# train or test
```
```
<!-- doc-test-command: echo -->
docs/source/zh-Hans/features/gradient_handler.md
View file @
fae6c92e
...
@@ -25,8 +25,8 @@
...
@@ -25,8 +25,8 @@
3.
实现
`handle_gradient`
3.
实现
`handle_gradient`
```
python
```
python
from
colossalai.registry
import
GRADIENT_HANDLER
from
colossalai.
legacy.
registry
import
GRADIENT_HANDLER
from
colossalai.engine.gradient_handler
import
BaseGradientHandler
from
colossalai.
legacy.
engine.gradient_handler
import
BaseGradientHandler
@
GRADIENT_HANDLER
.
register_module
@
GRADIENT_HANDLER
.
register_module
...
@@ -57,3 +57,4 @@ gradient_handler = [dict(type='MyGradientHandler')]
...
@@ -57,3 +57,4 @@ gradient_handler = [dict(type='MyGradientHandler')]
```
shell
```
shell
python
-m
torch.distributed.launch
--nproc_per_node
4
--master_addr
localhost
--master_port
29500 train_with_engine.py
python
-m
torch.distributed.launch
--nproc_per_node
4
--master_addr
localhost
--master_port
29500 train_with_engine.py
```
```
<!-- doc-test-command: echo -->
docs/source/zh-Hans/features/mixed_precision_training.md
View file @
fae6c92e
...
@@ -245,7 +245,7 @@ from pathlib import Path
...
@@ -245,7 +245,7 @@ from pathlib import Path
from
colossalai.core
import
global_context
as
gpc
from
colossalai.core
import
global_context
as
gpc
from
colossalai.logging
import
get_dist_logger
from
colossalai.logging
import
get_dist_logger
from
colossalai.utils
import
get_dataloader
from
colossalai.utils
import
get_dataloader
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.
legacy.
trainer
import
Trainer
,
hooks
from
colossalai.nn.lr_scheduler
import
LinearWarmupLR
from
colossalai.nn.lr_scheduler
import
LinearWarmupLR
from
timm.models
import
vit_base_patch16_224
from
timm.models
import
vit_base_patch16_224
from
torchvision
import
datasets
,
transforms
from
torchvision
import
datasets
,
transforms
...
...
docs/source/zh-Hans/features/pipeline_parallel.md
View file @
fae6c92e
...
@@ -78,7 +78,7 @@ import colossalai.nn as col_nn
...
@@ -78,7 +78,7 @@ import colossalai.nn as col_nn
from
colossalai.core
import
global_context
as
gpc
from
colossalai.core
import
global_context
as
gpc
from
colossalai.logging
import
disable_existing_loggers
,
get_dist_logger
from
colossalai.logging
import
disable_existing_loggers
,
get_dist_logger
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.
legacy.
trainer
import
Trainer
,
hooks
from
colossalai.utils
import
MultiTimer
,
get_dataloader
from
colossalai.utils
import
MultiTimer
,
get_dataloader
from
colossalai.context
import
ParallelMode
from
colossalai.context
import
ParallelMode
from
colossalai.pipeline.pipelinable
import
PipelinableContext
from
colossalai.pipeline.pipelinable
import
PipelinableContext
...
@@ -156,3 +156,4 @@ trainer.fit(train_dataloader=train_dataloader,
...
@@ -156,3 +156,4 @@ trainer.fit(train_dataloader=train_dataloader,
```
```
我们使用
`2`
个流水段,并且 batch 将被切分为
`4`
个 micro batches。
我们使用
`2`
个流水段,并且 batch 将被切分为
`4`
个 micro batches。
<!-- doc-test-command: echo -->
examples/language/gpt/titans/dataset/webtext.py
View file @
fae6c92e
...
@@ -6,7 +6,7 @@ import torch
...
@@ -6,7 +6,7 @@ import torch
from
torch.utils.data
import
Dataset
from
torch.utils.data
import
Dataset
from
transformers
import
GPT2Tokenizer
from
transformers
import
GPT2Tokenizer
from
colossalai.registry
import
DATASETS
from
colossalai.
legacy.
registry
import
DATASETS
@
DATASETS
.
register_module
@
DATASETS
.
register_module
...
...
examples/language/gpt/titans/model/embed.py
View file @
fae6c92e
...
@@ -8,11 +8,11 @@ from torch.nn.parameter import Parameter
...
@@ -8,11 +8,11 @@ from torch.nn.parameter import Parameter
from
colossalai.context
import
ParallelMode
,
seed
from
colossalai.context
import
ParallelMode
,
seed
from
colossalai.core
import
global_context
as
gpc
from
colossalai.core
import
global_context
as
gpc
from
colossalai.legacy.registry
import
LAYERS
,
LOSSES
,
MODELS
from
colossalai.nn.layer.base_layer
import
ParallelLayer
from
colossalai.nn.layer.base_layer
import
ParallelLayer
from
colossalai.nn.layer.parallel_1d._utils
import
gather_forward_split_backward
,
reduce_grad
,
reduce_input
from
colossalai.nn.layer.parallel_1d._utils
import
gather_forward_split_backward
,
reduce_grad
,
reduce_input
from
colossalai.nn.layer.parallel_1d.layers
import
Linear1D_Row
from
colossalai.nn.layer.parallel_1d.layers
import
Linear1D_Row
from
colossalai.nn.layer.utils
import
divide
from
colossalai.nn.layer.utils
import
divide
from
colossalai.registry
import
LAYERS
,
LOSSES
,
MODELS
from
colossalai.utils
import
get_current_device
from
colossalai.utils
import
get_current_device
...
...
examples/language/gpt/titans/train_gpt.py
View file @
fae6c92e
...
@@ -10,9 +10,9 @@ import colossalai
...
@@ -10,9 +10,9 @@ import colossalai
import
colossalai.utils
as
utils
import
colossalai.utils
as
utils
from
colossalai.context.parallel_mode
import
ParallelMode
from
colossalai.context.parallel_mode
import
ParallelMode
from
colossalai.core
import
global_context
as
gpc
from
colossalai.core
import
global_context
as
gpc
from
colossalai.legacy.trainer
import
Trainer
,
hooks
from
colossalai.logging
import
disable_existing_loggers
,
get_dist_logger
from
colossalai.logging
import
disable_existing_loggers
,
get_dist_logger
from
colossalai.nn
import
LinearWarmupLR
from
colossalai.nn
import
LinearWarmupLR
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.utils
import
colo_set_process_memory_fraction
,
is_using_pp
from
colossalai.utils
import
colo_set_process_memory_fraction
,
is_using_pp
from
colossalai.utils.timer
import
MultiTimer
from
colossalai.utils.timer
import
MultiTimer
from
colossalai.zero.legacy.init_ctx
import
ZeroInitContext
from
colossalai.zero.legacy.init_ctx
import
ZeroInitContext
...
...
examples/tutorial/sequence_parallel/data/datasets/indexed_dataset.py
View file @
fae6c92e
...
@@ -3,17 +3,16 @@
...
@@ -3,17 +3,16 @@
# This source code is licensed under the MIT license found in the
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# LICENSE file in the root directory of this source tree.
# copied from fairseq/fairseq/data/indexed_dataset.py
# copied from fairseq/fairseq/data/indexed_dataset.py
# Removed IndexedRawTextDataset since it relied on Fairseq dictionary
# Removed IndexedRawTextDataset since it relied on Fairseq dictionary
# other slight modifications to remove fairseq dependencies
# other slight modifications to remove fairseq dependencies
# Added document index to index file and made it accessible.
# Added document index to index file and made it accessible.
# An empty sentence no longer separates documents.
# An empty sentence no longer separates documents.
from
functools
import
lru_cache
import
os
import
os
import
shutil
import
shutil
import
struct
import
struct
from
functools
import
lru_cache
from
itertools
import
accumulate
from
itertools
import
accumulate
import
numpy
as
np
import
numpy
as
np
...
@@ -88,16 +87,7 @@ def write_longs(f, a):
...
@@ -88,16 +87,7 @@ def write_longs(f, a):
f
.
write
(
np
.
array
(
a
,
dtype
=
np
.
int64
))
f
.
write
(
np
.
array
(
a
,
dtype
=
np
.
int64
))
dtypes
=
{
dtypes
=
{
1
:
np
.
uint8
,
2
:
np
.
int8
,
3
:
np
.
int16
,
4
:
np
.
int32
,
5
:
np
.
int64
,
6
:
float
,
7
:
np
.
double
,
8
:
np
.
uint16
}
1
:
np
.
uint8
,
2
:
np
.
int8
,
3
:
np
.
int16
,
4
:
np
.
int32
,
5
:
np
.
int64
,
6
:
np
.
float
,
7
:
np
.
double
,
8
:
np
.
uint16
}
def
code
(
dtype
):
def
code
(
dtype
):
...
@@ -136,10 +126,8 @@ class IndexedDataset(torch.utils.data.Dataset):
...
@@ -136,10 +126,8 @@ class IndexedDataset(torch.utils.data.Dataset):
def
read_index
(
self
,
path
):
def
read_index
(
self
,
path
):
with
open
(
index_file_path
(
path
),
'rb'
)
as
f
:
with
open
(
index_file_path
(
path
),
'rb'
)
as
f
:
magic
=
f
.
read
(
8
)
magic
=
f
.
read
(
8
)
assert
magic
==
self
.
_HDR_MAGIC
,
(
assert
magic
==
self
.
_HDR_MAGIC
,
(
'Index file doesn
\'
t match expected format. '
'Index file doesn
\'
t match expected format. '
'Make sure that --dataset-impl is configured properly.'
)
'Make sure that --dataset-impl is configured properly.'
)
version
=
f
.
read
(
8
)
version
=
f
.
read
(
8
)
assert
struct
.
unpack
(
'<Q'
,
version
)
==
(
1
,)
assert
struct
.
unpack
(
'<Q'
,
version
)
==
(
1
,)
code
,
self
.
element_size
=
struct
.
unpack
(
'<QQ'
,
f
.
read
(
16
))
code
,
self
.
element_size
=
struct
.
unpack
(
'<QQ'
,
f
.
read
(
16
))
...
@@ -198,13 +186,11 @@ class IndexedDataset(torch.utils.data.Dataset):
...
@@ -198,13 +186,11 @@ class IndexedDataset(torch.utils.data.Dataset):
@
staticmethod
@
staticmethod
def
exists
(
path
):
def
exists
(
path
):
return
(
return
(
os
.
path
.
exists
(
index_file_path
(
path
))
and
os
.
path
.
exists
(
data_file_path
(
path
)))
os
.
path
.
exists
(
index_file_path
(
path
))
and
os
.
path
.
exists
(
data_file_path
(
path
))
)
@
property
@
property
def
supports_prefetch
(
self
):
def
supports_prefetch
(
self
):
return
False
# avoid prefetching to save memory
return
False
# avoid prefetching to save memory
class
IndexedCachedDataset
(
IndexedDataset
):
class
IndexedCachedDataset
(
IndexedDataset
):
...
@@ -233,7 +219,7 @@ class IndexedCachedDataset(IndexedDataset):
...
@@ -233,7 +219,7 @@ class IndexedCachedDataset(IndexedDataset):
for
i
in
indices
:
for
i
in
indices
:
self
.
cache_index
[
i
]
=
ptx
self
.
cache_index
[
i
]
=
ptx
size
=
self
.
data_offsets
[
i
+
1
]
-
self
.
data_offsets
[
i
]
size
=
self
.
data_offsets
[
i
+
1
]
-
self
.
data_offsets
[
i
]
a
=
self
.
cache
[
ptx
:
ptx
+
size
]
a
=
self
.
cache
[
ptx
:
ptx
+
size
]
self
.
data_file
.
seek
(
self
.
data_offsets
[
i
]
*
self
.
element_size
)
self
.
data_file
.
seek
(
self
.
data_offsets
[
i
]
*
self
.
element_size
)
self
.
data_file
.
readinto
(
a
)
self
.
data_file
.
readinto
(
a
)
ptx
+=
size
ptx
+=
size
...
@@ -250,7 +236,7 @@ class IndexedCachedDataset(IndexedDataset):
...
@@ -250,7 +236,7 @@ class IndexedCachedDataset(IndexedDataset):
tensor_size
=
self
.
sizes
[
self
.
dim_offsets
[
i
]:
self
.
dim_offsets
[
i
+
1
]]
tensor_size
=
self
.
sizes
[
self
.
dim_offsets
[
i
]:
self
.
dim_offsets
[
i
+
1
]]
a
=
np
.
empty
(
tensor_size
,
dtype
=
self
.
dtype
)
a
=
np
.
empty
(
tensor_size
,
dtype
=
self
.
dtype
)
ptx
=
self
.
cache_index
[
i
]
ptx
=
self
.
cache_index
[
i
]
np
.
copyto
(
a
,
self
.
cache
[
ptx
:
ptx
+
a
.
size
])
np
.
copyto
(
a
,
self
.
cache
[
ptx
:
ptx
+
a
.
size
])
return
a
return
a
elif
isinstance
(
idx
,
slice
):
elif
isinstance
(
idx
,
slice
):
# Hack just to make this work, can optimizer later if necessary
# Hack just to make this work, can optimizer later if necessary
...
@@ -261,15 +247,7 @@ class IndexedCachedDataset(IndexedDataset):
...
@@ -261,15 +247,7 @@ class IndexedCachedDataset(IndexedDataset):
class
IndexedDatasetBuilder
(
object
):
class
IndexedDatasetBuilder
(
object
):
element_sizes
=
{
element_sizes
=
{
np
.
uint8
:
1
,
np
.
int8
:
1
,
np
.
int16
:
2
,
np
.
int32
:
4
,
np
.
int64
:
8
,
float
:
4
,
np
.
double
:
8
}
np
.
uint8
:
1
,
np
.
int8
:
1
,
np
.
int16
:
2
,
np
.
int32
:
4
,
np
.
int64
:
8
,
np
.
float
:
4
,
np
.
double
:
8
}
def
__init__
(
self
,
out_file
,
dtype
=
np
.
int32
):
def
__init__
(
self
,
out_file
,
dtype
=
np
.
int32
):
self
.
out_file
=
open
(
out_file
,
'wb'
)
self
.
out_file
=
open
(
out_file
,
'wb'
)
...
@@ -332,12 +310,15 @@ def _warmup_mmap_file(path):
...
@@ -332,12 +310,15 @@ def _warmup_mmap_file(path):
class
MMapIndexedDataset
(
torch
.
utils
.
data
.
Dataset
):
class
MMapIndexedDataset
(
torch
.
utils
.
data
.
Dataset
):
class
Index
(
object
):
class
Index
(
object
):
_HDR_MAGIC
=
b
'MMIDIDX
\x00\x00
'
_HDR_MAGIC
=
b
'MMIDIDX
\x00\x00
'
@
classmethod
@
classmethod
def
writer
(
cls
,
path
,
dtype
):
def
writer
(
cls
,
path
,
dtype
):
class
_Writer
(
object
):
class
_Writer
(
object
):
def
__enter__
(
self
):
def
__enter__
(
self
):
self
.
_file
=
open
(
path
,
'wb'
)
self
.
_file
=
open
(
path
,
'wb'
)
...
@@ -384,10 +365,8 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
...
@@ -384,10 +365,8 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
def
__init__
(
self
,
path
,
skip_warmup
=
False
):
def
__init__
(
self
,
path
,
skip_warmup
=
False
):
with
open
(
path
,
'rb'
)
as
stream
:
with
open
(
path
,
'rb'
)
as
stream
:
magic_test
=
stream
.
read
(
9
)
magic_test
=
stream
.
read
(
9
)
assert
self
.
_HDR_MAGIC
==
magic_test
,
(
assert
self
.
_HDR_MAGIC
==
magic_test
,
(
'Index file doesn
\'
t match expected format. '
'Index file doesn
\'
t match expected format. '
'Make sure that --dataset-impl is configured properly.'
)
'Make sure that --dataset-impl is configured properly.'
)
version
=
struct
.
unpack
(
'<Q'
,
stream
.
read
(
8
))
version
=
struct
.
unpack
(
'<Q'
,
stream
.
read
(
8
))
assert
(
1
,)
==
version
assert
(
1
,)
==
version
...
@@ -406,16 +385,16 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
...
@@ -406,16 +385,16 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
self
.
_bin_buffer_mmap
=
np
.
memmap
(
path
,
mode
=
'r'
,
order
=
'C'
)
self
.
_bin_buffer_mmap
=
np
.
memmap
(
path
,
mode
=
'r'
,
order
=
'C'
)
self
.
_bin_buffer
=
memoryview
(
self
.
_bin_buffer_mmap
)
self
.
_bin_buffer
=
memoryview
(
self
.
_bin_buffer_mmap
)
print
(
" reading sizes..."
)
print
(
" reading sizes..."
)
self
.
_sizes
=
np
.
frombuffer
(
self
.
_sizes
=
np
.
frombuffer
(
self
.
_bin_buffer
,
dtype
=
np
.
int32
,
count
=
self
.
_len
,
offset
=
offset
)
self
.
_bin_buffer
,
dtype
=
np
.
int32
,
count
=
self
.
_len
,
offset
=
offset
)
print
(
" reading pointers..."
)
print
(
" reading pointers..."
)
self
.
_pointers
=
np
.
frombuffer
(
self
.
_bin_buffer
,
dtype
=
np
.
int64
,
count
=
self
.
_len
,
self
.
_pointers
=
np
.
frombuffer
(
self
.
_bin_buffer
,
dtype
=
np
.
int64
,
count
=
self
.
_len
,
offset
=
offset
+
self
.
_sizes
.
nbytes
)
offset
=
offset
+
self
.
_sizes
.
nbytes
)
print
(
" reading document index..."
)
print
(
" reading document index..."
)
self
.
_doc_idx
=
np
.
frombuffer
(
self
.
_bin_buffer
,
dtype
=
np
.
int64
,
count
=
self
.
_doc_count
,
self
.
_doc_idx
=
np
.
frombuffer
(
self
.
_bin_buffer
,
dtype
=
np
.
int64
,
count
=
self
.
_doc_count
,
offset
=
offset
+
self
.
_sizes
.
nbytes
+
self
.
_pointers
.
nbytes
)
offset
=
offset
+
self
.
_sizes
.
nbytes
+
self
.
_pointers
.
nbytes
)
def
__del__
(
self
):
def
__del__
(
self
):
...
@@ -480,8 +459,7 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
...
@@ -480,8 +459,7 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
def
__getitem__
(
self
,
idx
):
def
__getitem__
(
self
,
idx
):
if
isinstance
(
idx
,
int
):
if
isinstance
(
idx
,
int
):
ptr
,
size
=
self
.
_index
[
idx
]
ptr
,
size
=
self
.
_index
[
idx
]
np_array
=
np
.
frombuffer
(
self
.
_bin_buffer
,
dtype
=
self
.
_index
.
dtype
,
np_array
=
np
.
frombuffer
(
self
.
_bin_buffer
,
dtype
=
self
.
_index
.
dtype
,
count
=
size
,
offset
=
ptr
)
count
=
size
,
offset
=
ptr
)
return
np_array
return
np_array
elif
isinstance
(
idx
,
slice
):
elif
isinstance
(
idx
,
slice
):
start
,
stop
,
step
=
idx
.
indices
(
len
(
self
))
start
,
stop
,
step
=
idx
.
indices
(
len
(
self
))
...
@@ -491,8 +469,7 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
...
@@ -491,8 +469,7 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
sizes
=
self
.
_index
.
_sizes
[
idx
]
sizes
=
self
.
_index
.
_sizes
[
idx
]
offsets
=
list
(
accumulate
(
sizes
))
offsets
=
list
(
accumulate
(
sizes
))
total_size
=
sum
(
sizes
)
total_size
=
sum
(
sizes
)
np_array
=
np
.
frombuffer
(
self
.
_bin_buffer
,
dtype
=
self
.
_index
.
dtype
,
np_array
=
np
.
frombuffer
(
self
.
_bin_buffer
,
dtype
=
self
.
_index
.
dtype
,
count
=
total_size
,
offset
=
ptr
)
count
=
total_size
,
offset
=
ptr
)
sents
=
np
.
split
(
np_array
,
offsets
[:
-
1
])
sents
=
np
.
split
(
np_array
,
offsets
[:
-
1
])
return
sents
return
sents
...
@@ -506,8 +483,7 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
...
@@ -506,8 +483,7 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
if
length
is
None
:
if
length
is
None
:
length
=
size
-
offset
length
=
size
-
offset
ptr
+=
offset
*
np
.
dtype
(
self
.
_index
.
dtype
).
itemsize
ptr
+=
offset
*
np
.
dtype
(
self
.
_index
.
dtype
).
itemsize
np_array
=
np
.
frombuffer
(
self
.
_bin_buffer
,
dtype
=
self
.
_index
.
dtype
,
np_array
=
np
.
frombuffer
(
self
.
_bin_buffer
,
dtype
=
self
.
_index
.
dtype
,
count
=
length
,
offset
=
ptr
)
count
=
length
,
offset
=
ptr
)
return
np_array
return
np_array
@
property
@
property
...
@@ -530,12 +506,11 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
...
@@ -530,12 +506,11 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
@
staticmethod
@
staticmethod
def
exists
(
path
):
def
exists
(
path
):
return
(
return
(
os
.
path
.
exists
(
index_file_path
(
path
))
and
os
.
path
.
exists
(
data_file_path
(
path
)))
os
.
path
.
exists
(
index_file_path
(
path
))
and
os
.
path
.
exists
(
data_file_path
(
path
))
)
class
MMapIndexedDatasetBuilder
(
object
):
class
MMapIndexedDatasetBuilder
(
object
):
def
__init__
(
self
,
out_file
,
dtype
=
np
.
int64
):
def
__init__
(
self
,
out_file
,
dtype
=
np
.
int64
):
self
.
_data_file
=
open
(
out_file
,
'wb'
)
self
.
_data_file
=
open
(
out_file
,
'wb'
)
self
.
_dtype
=
dtype
self
.
_dtype
=
dtype
...
...
examples/tutorial/sequence_parallel/requirements.txt
View file @
fae6c92e
colossalai
colossalai
torch
torch
six
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment