Commit 01bcbb1e authored by lim's avatar lim
Browse files

Initial commit

parent 187361d1
Pipeline #3395 canceled with stages
{
"class": "GPTDataset",
"dataset_path": "/workspace/data/oscar/oscar-1GB_head-qwen_text_document",
"num_samples": 160,
"index_split": "test",
"random_seed": 1234,
"sequence_length": 4096,
"split": "949,50,1",
"split_matrix": [
[
0,
0.949
],
[
0.949,
0.999
],
[
0.999,
1.0
]
],
"tokenizer": {
"class": "megatron.core.tokenizers.text.models.default_tokenizer.DefaultTokenizerText",
"tokenizer_path": "/home/models/qwen3/Qwen3-8B",
"vocab_file": "None",
"merges_file": "None"
}
}
\ No newline at end of file
{
"class": "GPTDataset",
"dataset_path": "/workspace/data/oscar/oscar-1GB_head-qwen_text_document",
"num_samples": 160,
"index_split": "test",
"random_seed": 1234,
"sequence_length": 32768,
"split": "949,50,1",
"split_matrix": [
[
0,
0.949
],
[
0.949,
0.999
],
[
0.999,
1.0
]
],
"tokenizer": {
"class": "megatron.core.tokenizers.text.models.default_tokenizer.DefaultTokenizerText",
"tokenizer_path": "/home/models/qwen3/Qwen3-8B",
"vocab_file": "None",
"merges_file": "None"
}
}
\ No newline at end of file
{
"class": "GPTDataset",
"dataset_path": "/workspace/data/oscar/oscar-1GB_head-qwen_text_document",
"num_samples": 160,
"index_split": "valid",
"random_seed": 1234,
"sequence_length": 8192,
"split": "949,50,1",
"split_matrix": [
[
0,
0.949
],
[
0.949,
0.999
],
[
0.999,
1.0
]
],
"tokenizer": {
"class": "megatron.core.tokenizers.text.models.default_tokenizer.DefaultTokenizerText",
"tokenizer_path": "/home/models/qwen3/Qwen3-8B",
"vocab_file": "None",
"merges_file": "None"
}
}
\ No newline at end of file
{
"class": "GPTDataset",
"dataset_path": "/workspace/data/oscar/oscar-1GB_head-qwen_text_document",
"num_samples": 12800,
"index_split": "train",
"random_seed": 1234,
"sequence_length": 4096,
"split": "949,50,1",
"split_matrix": [
[
0,
0.949
],
[
0.949,
0.999
],
[
0.999,
1.0
]
],
"tokenizer": {
"class": "megatron.core.tokenizers.text.models.default_tokenizer.DefaultTokenizerText",
"tokenizer_path": "/home/models/qwen3/Qwen3-0.6B",
"vocab_file": "None",
"merges_file": "None"
}
}
\ No newline at end of file
This diff is collapsed.
from .adaptor import megatron_adaptor
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment