Commit 7f8094a3 authored by zhaoying1's avatar zhaoying1
Browse files

added baichuan2

parents
f14r1n19 slots=4
f14r2n00 slots=4
f14r2n01 slots=4
f14r2n02 slots=4
f14r2n03 slots=4
f14r2n04 slots=4
f14r2n05 slots=4
f14r2n06 slots=4
f14r2n07 slots=4
f14r2n08 slots=4
f14r2n09 slots=4
f14r2n10 slots=4
f14r2n11 slots=4
f14r2n12 slots=4
f14r2n13 slots=4
f14r2n14 slots=4
f14r2n15 slots=4
f14r2n16 slots=4
f14r2n17 slots=4
f14r2n18 slots=4
f14r2n19 slots=4
f14r3n00 slots=4
f14r3n01 slots=4
f14r3n02 slots=4
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation.utils import GenerationConfig
tokenizer = AutoTokenizer.from_pretrained("/public/home/zhaoying1/work/Baichuan2-main/fine-tune/slurm_script/output/checkpoint-420", use_fast=False, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("/public/home/zhaoying1/work/Baichuan2-main/fine-tune/slurm_script/output/checkpoint-420", device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True)
model.generation_config = GenerationConfig.from_pretrained("/public/home/zhaoying1/work/Baichuan2-main/fine-tune/slurm_script/output/checkpoint-420")
messages = []
messages.append({"role": "user", "content": "解释一下“温故而知新”"})
response = model.chat(tokenizer, messages)
print(response)
\ No newline at end of file
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.000419 11324 ProcessGroupNCCL.cpp:835] [Rank 19] NCCL watchdog thread started!
I1027 11:25:36.000407 10511 ProcessGroupNCCL.cpp:669] [Rank 19] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:35.994814 24977 ProcessGroupNCCL.cpp:835] [Rank 62] NCCL watchdog thread started!
I1027 11:25:35.994799 24232 ProcessGroupNCCL.cpp:669] [Rank 62] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.000789 26825 ProcessGroupNCCL.cpp:835] [Rank 22] NCCL watchdog thread started!
I1027 11:25:36.000782 26095 ProcessGroupNCCL.cpp:669] [Rank 22] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.003156 27045 ProcessGroupNCCL.cpp:835] [Rank 71] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.000727 11325 ProcessGroupNCCL.cpp:835] [Rank 18] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:35.998217 32364 ProcessGroupNCCL.cpp:835] [Rank 95] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.000821 26826 ProcessGroupNCCL.cpp:835] [Rank 23] NCCL watchdog thread started!
I1027 11:25:36.000818 26096 ProcessGroupNCCL.cpp:669] [Rank 23] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:25:36.003134 26319 ProcessGroupNCCL.cpp:669] [Rank 71] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:25:36.000705 10510 ProcessGroupNCCL.cpp:669] [Rank 18] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:25:35.998208 31587 ProcessGroupNCCL.cpp:669] [Rank 95] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.003333 27046 ProcessGroupNCCL.cpp:835] [Rank 70] NCCL watchdog thread started!
I1027 11:25:36.003324 26318 ProcessGroupNCCL.cpp:669] [Rank 70] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.006203 8400 ProcessGroupNCCL.cpp:835] [Rank 14] NCCL watchdog thread started!
I1027 11:25:36.006176 7503 ProcessGroupNCCL.cpp:669] [Rank 14] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.009217 32366 ProcessGroupNCCL.cpp:835] [Rank 94] NCCL watchdog thread started!
I1027 11:25:36.009191 31586 ProcessGroupNCCL.cpp:669] [Rank 94] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.012204 3415 ProcessGroupNCCL.cpp:835] [Rank 42] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.002801 21157 ProcessGroupNCCL.cpp:835] [Rank 91] NCCL watchdog thread started!
I1027 11:25:36.012202 2637 ProcessGroupNCCL.cpp:669] [Rank 42] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:25:36.002804 20260 ProcessGroupNCCL.cpp:669] [Rank 91] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.014935 24794 ProcessGroupNCCL.cpp:835] [Rank 30] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.012230 3416 ProcessGroupNCCL.cpp:835] [Rank 43] NCCL watchdog thread started!
I1027 11:25:36.012221 2638 ProcessGroupNCCL.cpp:669] [Rank 43] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:25:36.014866 23880 ProcessGroupNCCL.cpp:669] [Rank 30] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.018532 23508 ProcessGroupNCCL.cpp:835] [Rank 38] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.019204 14639 ProcessGroupNCCL.cpp:835] [Rank 57] NCCL watchdog thread started!
I1027 11:25:36.018523 22823 ProcessGroupNCCL.cpp:669] [Rank 38] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:25:36.019199 14036 ProcessGroupNCCL.cpp:669] [Rank 57] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.025336 21138 ProcessGroupNCCL.cpp:835] [Rank 79] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.018599 23509 ProcessGroupNCCL.cpp:835] [Rank 39] NCCL watchdog thread started!
I1027 11:25:36.018594 22824 ProcessGroupNCCL.cpp:669] [Rank 39] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.019300 14640 ProcessGroupNCCL.cpp:835] [Rank 56] NCCL watchdog thread started!
I1027 11:25:36.025269 20449 ProcessGroupNCCL.cpp:669] [Rank 79] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:25:36.019295 14034 ProcessGroupNCCL.cpp:669] [Rank 56] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.025368 21139 ProcessGroupNCCL.cpp:835] [Rank 78] NCCL watchdog thread started!
I1027 11:25:36.025368 20448 ProcessGroupNCCL.cpp:669] [Rank 78] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.035486 25002 ProcessGroupNCCL.cpp:835] [Rank 74] NCCL watchdog thread started!
I1027 11:25:36.035516 24084 ProcessGroupNCCL.cpp:669] [Rank 74] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.035588 25001 ProcessGroupNCCL.cpp:835] [Rank 75] NCCL watchdog thread started!
I1027 11:25:36.035571 24085 ProcessGroupNCCL.cpp:669] [Rank 75] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.032754 3251 ProcessGroupNCCL.cpp:835] [Rank 10] NCCL watchdog thread started!
I1027 11:25:36.032749 2343 ProcessGroupNCCL.cpp:669] [Rank 10] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.203402 10842 ProcessGroupNCCL.cpp:835] [Rank 86] NCCL watchdog thread started!
I1027 11:25:36.203394 10075 ProcessGroupNCCL.cpp:669] [Rank 86] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.196825 3253 ProcessGroupNCCL.cpp:835] [Rank 11] NCCL watchdog thread started!
I1027 11:25:36.196812 2344 ProcessGroupNCCL.cpp:669] [Rank 11] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.203168 8402 ProcessGroupNCCL.cpp:835] [Rank 15] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.202105 12499 ProcessGroupNCCL.cpp:835] [Rank 2] NCCL watchdog thread started!
I1027 11:25:36.202036 11396 ProcessGroupNCCL.cpp:669] [Rank 2] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.202195 12500 ProcessGroupNCCL.cpp:835] [Rank 3] NCCL watchdog thread started!
I1027 11:25:36.202183 11397 ProcessGroupNCCL.cpp:669] [Rank 3] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:25:36.203161 7504 ProcessGroupNCCL.cpp:669] [Rank 15] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.203104 32441 ProcessGroupNCCL.cpp:835] [Rank 67] NCCL watchdog thread started!
I1027 11:25:36.203104 31690 ProcessGroupNCCL.cpp:669] [Rank 67] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.196961 14643 ProcessGroupNCCL.cpp:835] [Rank 59] NCCL watchdog thread started!
I1027 11:25:36.196966 14038 ProcessGroupNCCL.cpp:669] [Rank 59] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.197103 14644 ProcessGroupNCCL.cpp:835] [Rank 58] NCCL watchdog thread started!
I1027 11:25:36.197136 14037 ProcessGroupNCCL.cpp:669] [Rank 58] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.204802 10844 ProcessGroupNCCL.cpp:835] [Rank 87] NCCL watchdog thread started!
I1027 11:25:36.204813 10076 ProcessGroupNCCL.cpp:669] [Rank 87] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.205030 24796 ProcessGroupNCCL.cpp:835] [Rank 31] NCCL watchdog thread started!
I1027 11:25:36.205034 23881 ProcessGroupNCCL.cpp:669] [Rank 31] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.202742 15657 ProcessGroupNCCL.cpp:835] [Rank 80] NCCL watchdog thread started!
I1027 11:25:36.202737 14727 ProcessGroupNCCL.cpp:669] [Rank 80] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.193079 21159 ProcessGroupNCCL.cpp:835] [Rank 90] NCCL watchdog thread started!
I1027 11:25:36.193073 20259 ProcessGroupNCCL.cpp:669] [Rank 90] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.196311 24979 ProcessGroupNCCL.cpp:835] [Rank 63] NCCL watchdog thread started!
I1027 11:25:36.196305 24233 ProcessGroupNCCL.cpp:669] [Rank 63] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.204185 32443 ProcessGroupNCCL.cpp:835] [Rank 66] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.203037 14731 ProcessGroupNCCL.cpp:669] [Rank 83] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:25:36.203060 15659 ProcessGroupNCCL.cpp:835] [Rank 83] NCCL watchdog thread started!
I1027 11:25:36.204182 31689 ProcessGroupNCCL.cpp:669] [Rank 66] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.203289 15658 ProcessGroupNCCL.cpp:835] [Rank 82] NCCL watchdog thread started!
I1027 11:25:36.203285 14730 ProcessGroupNCCL.cpp:669] [Rank 82] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.203330 15660 ProcessGroupNCCL.cpp:835] [Rank 81] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.202731 29614 ProcessGroupNCCL.cpp:835] [Rank 6] NCCL watchdog thread started!
I1027 11:25:36.203330 14729 ProcessGroupNCCL.cpp:669] [Rank 81] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:25:36.202728 28730 ProcessGroupNCCL.cpp:669] [Rank 6] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.202839 29615 ProcessGroupNCCL.cpp:835] [Rank 5] NCCL watchdog thread started!
I1027 11:25:36.202837 28729 ProcessGroupNCCL.cpp:669] [Rank 5] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.202972 29616 ProcessGroupNCCL.cpp:835] [Rank 4] NCCL watchdog thread started!
I1027 11:25:36.202955 28728 ProcessGroupNCCL.cpp:669] [Rank 4] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.203045 29617 ProcessGroupNCCL.cpp:835] [Rank 7] NCCL watchdog thread started!
I1027 11:25:36.203034 28731 ProcessGroupNCCL.cpp:669] [Rank 7] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.207926 16262 ProcessGroupNCCL.cpp:835] [Rank 35] NCCL watchdog thread started!
I1027 11:25:36.207923 15467 ProcessGroupNCCL.cpp:669] [Rank 35] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.207968 16263 ProcessGroupNCCL.cpp:835] [Rank 34] NCCL watchdog thread started!
I1027 11:25:36.207963 15466 ProcessGroupNCCL.cpp:669] [Rank 34] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.208457 20897 ProcessGroupNCCL.cpp:835] [Rank 27] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.208469 20898 ProcessGroupNCCL.cpp:835] [Rank 26] NCCL watchdog thread started!
I1027 11:25:36.208447 20095 ProcessGroupNCCL.cpp:669] [Rank 26] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:25:36.208431 20096 ProcessGroupNCCL.cpp:669] [Rank 27] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.211433 11294 ProcessGroupNCCL.cpp:835] [Rank 44] NCCL watchdog thread started!
I1027 11:25:36.211432 10427 ProcessGroupNCCL.cpp:669] [Rank 44] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.211643 11295 ProcessGroupNCCL.cpp:835] [Rank 46] NCCL watchdog thread started!
I1027 11:25:36.211635 10429 ProcessGroupNCCL.cpp:669] [Rank 46] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.211671 11296 ProcessGroupNCCL.cpp:835] [Rank 47] NCCL watchdog thread started!
I1027 11:25:36.211673 10430 ProcessGroupNCCL.cpp:669] [Rank 47] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.211741 11297 ProcessGroupNCCL.cpp:835] [Rank 45] NCCL watchdog thread started!
I1027 11:25:36.211752 10428 ProcessGroupNCCL.cpp:669] [Rank 45] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.210805 29328 ProcessGroupNCCL.cpp:669] [Rank 50] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:25:36.210826 29964 ProcessGroupNCCL.cpp:835] [Rank 50] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.210848 29965 ProcessGroupNCCL.cpp:835] [Rank 49] NCCL watchdog thread started!
I1027 11:25:36.210809 29327 ProcessGroupNCCL.cpp:669] [Rank 49] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.210841 29966 ProcessGroupNCCL.cpp:835] [Rank 51] NCCL watchdog thread started!
I1027 11:25:36.210840 29330 ProcessGroupNCCL.cpp:669] [Rank 51] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.211068 29967 ProcessGroupNCCL.cpp:835] [Rank 48] NCCL watchdog thread started!
I1027 11:25:36.211056 29325 ProcessGroupNCCL.cpp:669] [Rank 48] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.212285 22404 ProcessGroupNCCL.cpp:835] [Rank 52] NCCL watchdog thread started!
I1027 11:25:36.212198 21692 ProcessGroupNCCL.cpp:669] [Rank 52] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.212446 22405 ProcessGroupNCCL.cpp:835] [Rank 53] NCCL watchdog thread started!
I1027 11:25:36.212421 21694 ProcessGroupNCCL.cpp:669] [Rank 53] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.212572 22406 ProcessGroupNCCL.cpp:835] [Rank 54] NCCL watchdog thread started!
I1027 11:25:36.212548 21695 ProcessGroupNCCL.cpp:669] [Rank 54] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.212610 22407 ProcessGroupNCCL.cpp:835] [Rank 55] NCCL watchdog thread started!
I1027 11:25:36.212604 21696 ProcessGroupNCCL.cpp:669] [Rank 55] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.996578 32384 ProcessGroupNCCL.cpp:835] [Rank 93] NCCL watchdog thread started!
I1027 11:25:36.996528 31585 ProcessGroupNCCL.cpp:669] [Rank 93] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.996685 32385 ProcessGroupNCCL.cpp:835] [Rank 92] NCCL watchdog thread started!
I1027 11:25:36.996668 31583 ProcessGroupNCCL.cpp:669] [Rank 92] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.000815 21143 ProcessGroupNCCL.cpp:835] [Rank 76] NCCL watchdog thread started!
I1027 11:25:37.000811 20445 ProcessGroupNCCL.cpp:669] [Rank 76] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.000913 21144 ProcessGroupNCCL.cpp:835] [Rank 77] NCCL watchdog thread started!
I1027 11:25:37.000910 20447 ProcessGroupNCCL.cpp:669] [Rank 77] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.999774 3446 ProcessGroupNCCL.cpp:835] [Rank 41] NCCL watchdog thread started!
I1027 11:25:36.999768 2636 ProcessGroupNCCL.cpp:669] [Rank 41] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.999859 3447 ProcessGroupNCCL.cpp:835] [Rank 40] NCCL watchdog thread started!
I1027 11:25:36.999855 2634 ProcessGroupNCCL.cpp:669] [Rank 40] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.999356 26844 ProcessGroupNCCL.cpp:835] [Rank 20] NCCL watchdog thread started!
I1027 11:25:36.999341 26093 ProcessGroupNCCL.cpp:669] [Rank 20] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.999518 26845 ProcessGroupNCCL.cpp:835] [Rank 21] NCCL watchdog thread started!
I1027 11:25:36.999511 26094 ProcessGroupNCCL.cpp:669] [Rank 21] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.002300 10849 ProcessGroupNCCL.cpp:835] [Rank 84] NCCL watchdog thread started!
I1027 11:25:37.002291 10072 ProcessGroupNCCL.cpp:669] [Rank 84] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.999794 20901 ProcessGroupNCCL.cpp:835] [Rank 24] NCCL watchdog thread started!
I1027 11:25:36.999809 20092 ProcessGroupNCCL.cpp:669] [Rank 24] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.999497 11328 ProcessGroupNCCL.cpp:835] [Rank 16] NCCL watchdog thread started!
I1027 11:25:36.999490 10508 ProcessGroupNCCL.cpp:669] [Rank 16] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.001916 27074 ProcessGroupNCCL.cpp:835] [Rank 68] NCCL watchdog thread started!
I1027 11:25:37.001905 26315 ProcessGroupNCCL.cpp:669] [Rank 68] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.994484 23512 ProcessGroupNCCL.cpp:835] [Rank 37] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.001937 8405 ProcessGroupNCCL.cpp:835] [Rank 12] NCCL watchdog thread started!
I1027 11:25:36.994472 22822 ProcessGroupNCCL.cpp:669] [Rank 37] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:25:37.001932 7501 ProcessGroupNCCL.cpp:669] [Rank 12] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.002847 24817 ProcessGroupNCCL.cpp:835] [Rank 28] NCCL watchdog thread started!
I1027 11:25:37.002837 23877 ProcessGroupNCCL.cpp:669] [Rank 28] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.994053 24982 ProcessGroupNCCL.cpp:835] [Rank 60] NCCL watchdog thread started!
I1027 11:25:36.994030 24229 ProcessGroupNCCL.cpp:669] [Rank 60] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.995919 3256 ProcessGroupNCCL.cpp:835] [Rank 8] NCCL watchdog thread started!
I1027 11:25:36.995903 2340 ProcessGroupNCCL.cpp:669] [Rank 8] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.000293 20903 ProcessGroupNCCL.cpp:835] [Rank 25] NCCL watchdog thread started!
I1027 11:25:37.000286 20094 ProcessGroupNCCL.cpp:669] [Rank 25] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:36.991092 21162 ProcessGroupNCCL.cpp:835] [Rank 88] NCCL watchdog thread started!
I1027 11:25:36.991041 20256 ProcessGroupNCCL.cpp:669] [Rank 88] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.002975 10851 ProcessGroupNCCL.cpp:835] [Rank 85] NCCL watchdog thread started!
I1027 11:25:37.002985 10074 ProcessGroupNCCL.cpp:669] [Rank 85] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.002640 8407 ProcessGroupNCCL.cpp:835] [Rank 13] NCCL watchdog thread started!
I1027 11:25:37.002632 7502 ProcessGroupNCCL.cpp:669] [Rank 13] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.002166 32451 ProcessGroupNCCL.cpp:835] [Rank 64] NCCL watchdog thread started!
I1027 11:25:37.002157 31686 ProcessGroupNCCL.cpp:669] [Rank 64] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.201927 27076 ProcessGroupNCCL.cpp:835] [Rank 69] NCCL watchdog thread started!
I1027 11:25:37.201920 26317 ProcessGroupNCCL.cpp:669] [Rank 69] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.201634 12503 ProcessGroupNCCL.cpp:835] [Rank 1] NCCL watchdog thread started!
I1027 11:25:37.201622 11395 ProcessGroupNCCL.cpp:669] [Rank 1] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.200444 11330 ProcessGroupNCCL.cpp:835] [Rank 17] NCCL watchdog thread started!
I1027 11:25:37.200419 10509 ProcessGroupNCCL.cpp:669] [Rank 17] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.194712 24984 ProcessGroupNCCL.cpp:835] [Rank 61] NCCL watchdog thread started!
I1027 11:25:37.194701 24231 ProcessGroupNCCL.cpp:669] [Rank 61] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.195446 23514 ProcessGroupNCCL.cpp:835] [Rank 36] NCCL watchdog thread started!
I1027 11:25:37.195439 22820 ProcessGroupNCCL.cpp:669] [Rank 36] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.201769 16266 ProcessGroupNCCL.cpp:835] [Rank 33] NCCL watchdog thread started!
I1027 11:25:37.201746 15465 ProcessGroupNCCL.cpp:669] [Rank 33] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.197037 3258 ProcessGroupNCCL.cpp:835] [Rank 9] NCCL watchdog thread started!
I1027 11:25:37.197031 2342 ProcessGroupNCCL.cpp:669] [Rank 9] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.204128 25005 ProcessGroupNCCL.cpp:835] [Rank 72] NCCL watchdog thread started!
I1027 11:25:37.204124 24081 ProcessGroupNCCL.cpp:669] [Rank 72] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.202404 16268 ProcessGroupNCCL.cpp:835] [Rank 32] NCCL watchdog thread started!
I1027 11:25:37.202402 15463 ProcessGroupNCCL.cpp:669] [Rank 32] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.204619 24819 ProcessGroupNCCL.cpp:835] [Rank 29] NCCL watchdog thread started!
I1027 11:25:37.204551 23879 ProcessGroupNCCL.cpp:669] [Rank 29] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.204697 25007 ProcessGroupNCCL.cpp:835] [Rank 73] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.203425 32453 ProcessGroupNCCL.cpp:835] [Rank 65] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.192674 21164 ProcessGroupNCCL.cpp:835] [Rank 89] NCCL watchdog thread started!
I1027 11:25:37.203423 31688 ProcessGroupNCCL.cpp:669] [Rank 65] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:25:37.192667 20258 ProcessGroupNCCL.cpp:669] [Rank 89] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:25:37.204689 24083 ProcessGroupNCCL.cpp:669] [Rank 73] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:25:37.211710 12505 ProcessGroupNCCL.cpp:835] [Rank 0] NCCL watchdog thread started!
I1027 11:25:37.211706 11393 ProcessGroupNCCL.cpp:669] [Rank 0] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
pthread_mutex_timedlock() returned 110
Failed to initialize RSMI device mutex after 5 seconds. Previous execution may not have shutdown cleanly. To fix problem, stop all rocm_smi programs, and then delete the rocm_smi* shared memory files in /dev/shm.: Success
pthread_mutex_timedlock() returned 110
Failed to initialize RSMI device mutex after 5 seconds. Previous execution may not have shutdown cleanly. To fix problem, stop all rocm_smi programs, and then delete the rocm_smi* shared memory files in /dev/shm.: Success
pthread_mutex_timedlock() returned 110
Failed to initialize RSMI device mutex after 5 seconds. Previous execution may not have shutdown cleanly. To fix problem, stop all rocm_smi programs, and then delete the rocm_smi* shared memory files in /dev/shm.: Success
pthread_mutex_timedlock() returned 110
Failed to initialize RSMI device mutex after 5 seconds. Previous execution may not have shutdown cleanly. To fix problem, stop all rocm_smi programs, and then delete the rocm_smi* shared memory files in /dev/shm.: Success
rsmi_init() failed
rsmi_init() failed
rsmi_init() failed
rsmi_init() failed
I1027 11:25:45.175016 11393 ProcessGroupNCCL.cpp:1274] NCCL_DEBUG: INFO
Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.14s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.14s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.14s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.14s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.18s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.26s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.14s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.18s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.27s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.14s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.19s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.15s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.17s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.14s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.18s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.15s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.14s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.19s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.15s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.15s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.25s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.25s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.20s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.25s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.20s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.20s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.20s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.21s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.21s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.30s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.28s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.29s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.29s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.23s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.22s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.20s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.20s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.23s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.18s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.17s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.15s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.15s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.20s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.18s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.17s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.15s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.15s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.15s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.18s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.17s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.15s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.15s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.15s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.24s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.16s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.17s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.15s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.15s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.15s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.24s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.16s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.14s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.15s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.23s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.16s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.14s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.23s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.14s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.14s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.19s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.19s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.19s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.19s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.20s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.31s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.34s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.33s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.34s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.32s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.37s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.28s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.31s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.26s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.26s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.26s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.26s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.30s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.30s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.30s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.30s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.34s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.46s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.47s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.58s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.62s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.72s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.03s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.03s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.03s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.13s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.13s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.13s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.12s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.10s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.11s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.10s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.11s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.03s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.03s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.03s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.11s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.11s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.11s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.11s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.04s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.04s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.12s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.13s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.05s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.05s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.05s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.05s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.14s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.14s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.14s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.14s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.09s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.09s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.09s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.08s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.09s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.09s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.08s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.08s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.08s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.08s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.09s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.09s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.09s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.09s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.09s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.09s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.09s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.09s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.09s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.09s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.09s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.08s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.08s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.08s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.08s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.06s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.10s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.06s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.10s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.06s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.10s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.06s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 19.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.04s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.14s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.10s/it]
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.05s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.05s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.05s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.12s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.12s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.12s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.12s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.06s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.17s/it]
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.08s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.08s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.15s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.15s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.15s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.14s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.14s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.14s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.14s/it]
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.16s/it]
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.05s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.18s/it]
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.22s/it]
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 20.14s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:42<00:00, 21.24s/it]
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
I1027 11:26:51.949496 7502 ProcessGroupNCCL.cpp:669] [Rank 13] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949605 9852 ProcessGroupNCCL.cpp:835] [Rank 13] NCCL watchdog thread started!
I1027 11:26:51.949599 7501 ProcessGroupNCCL.cpp:669] [Rank 12] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949707 9853 ProcessGroupNCCL.cpp:835] [Rank 12] NCCL watchdog thread started!
I1027 11:26:51.949003 1358 ProcessGroupNCCL.cpp:835] [Rank 66] NCCL watchdog thread started!
I1027 11:26:51.948947 31689 ProcessGroupNCCL.cpp:669] [Rank 66] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949792 9854 ProcessGroupNCCL.cpp:835] [Rank 14] NCCL watchdog thread started!
I1027 11:26:51.949082 1359 ProcessGroupNCCL.cpp:835] [Rank 65] NCCL watchdog thread started!
I1027 11:26:51.949018 31688 ProcessGroupNCCL.cpp:669] [Rank 65] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.947327 21695 ProcessGroupNCCL.cpp:669] [Rank 54] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.947432 23522 ProcessGroupNCCL.cpp:835] [Rank 54] NCCL watchdog thread started!
I1027 11:26:51.950186 20256 ProcessGroupNCCL.cpp:669] [Rank 88] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.950294 22552 ProcessGroupNCCL.cpp:835] [Rank 88] NCCL watchdog thread started!
I1027 11:26:51.949731 7503 ProcessGroupNCCL.cpp:669] [Rank 14] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949229 31690 ProcessGroupNCCL.cpp:669] [Rank 67] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.947433 21696 ProcessGroupNCCL.cpp:669] [Rank 55] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949116 20445 ProcessGroupNCCL.cpp:669] [Rank 76] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949229 22126 ProcessGroupNCCL.cpp:835] [Rank 76] NCCL watchdog thread started!
I1027 11:26:51.949371 1360 ProcessGroupNCCL.cpp:835] [Rank 67] NCCL watchdog thread started!
I1027 11:26:51.947487 21692 ProcessGroupNCCL.cpp:669] [Rank 52] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.950353 20260 ProcessGroupNCCL.cpp:669] [Rank 91] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949461 1361 ProcessGroupNCCL.cpp:835] [Rank 64] NCCL watchdog thread started!
I1027 11:26:51.947544 23523 ProcessGroupNCCL.cpp:835] [Rank 55] NCCL watchdog thread started!
I1027 11:26:51.950441 22553 ProcessGroupNCCL.cpp:835] [Rank 91] NCCL watchdog thread started!
I1027 11:26:51.949990 7504 ProcessGroupNCCL.cpp:669] [Rank 15] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949276 24231 ProcessGroupNCCL.cpp:669] [Rank 61] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949373 26315 ProcessGroupNCCL.cpp:835] [Rank 61] NCCL watchdog thread started!
I1027 11:26:51.949352 20449 ProcessGroupNCCL.cpp:669] [Rank 79] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949402 31686 ProcessGroupNCCL.cpp:669] [Rank 64] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.947618 23524 ProcessGroupNCCL.cpp:835] [Rank 52] NCCL watchdog thread started!
I1027 11:26:51.950549 22554 ProcessGroupNCCL.cpp:835] [Rank 89] NCCL watchdog thread started!
I1027 11:26:51.950112 9855 ProcessGroupNCCL.cpp:835] [Rank 15] NCCL watchdog thread started!
I1027 11:26:51.949306 24229 ProcessGroupNCCL.cpp:669] [Rank 60] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949427 26316 ProcessGroupNCCL.cpp:835] [Rank 60] NCCL watchdog thread started!
I1027 11:26:51.949481 22127 ProcessGroupNCCL.cpp:835] [Rank 79] NCCL watchdog thread started!
I1027 11:26:51.947643 21694 ProcessGroupNCCL.cpp:669] [Rank 53] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.947762 23525 ProcessGroupNCCL.cpp:835] [Rank 53] NCCL watchdog thread started!
I1027 11:26:51.950492 20258 ProcessGroupNCCL.cpp:669] [Rank 89] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949349 24233 ProcessGroupNCCL.cpp:669] [Rank 63] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949486 26317 ProcessGroupNCCL.cpp:835] [Rank 63] NCCL watchdog thread started!
I1027 11:26:51.949513 20448 ProcessGroupNCCL.cpp:669] [Rank 78] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.944944 14729 ProcessGroupNCCL.cpp:669] [Rank 81] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.945068 16997 ProcessGroupNCCL.cpp:835] [Rank 81] NCCL watchdog thread started!
I1027 11:26:51.947634 10508 ProcessGroupNCCL.cpp:669] [Rank 16] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.947746 12725 ProcessGroupNCCL.cpp:835] [Rank 16] NCCL watchdog thread started!
I1027 11:26:51.949496 26318 ProcessGroupNCCL.cpp:835] [Rank 62] NCCL watchdog thread started!
I1027 11:26:51.949404 24232 ProcessGroupNCCL.cpp:669] [Rank 62] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949640 22128 ProcessGroupNCCL.cpp:835] [Rank 78] NCCL watchdog thread started!
I1027 11:26:51.945032 14727 ProcessGroupNCCL.cpp:669] [Rank 80] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.947654 10509 ProcessGroupNCCL.cpp:669] [Rank 17] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.947773 12726 ProcessGroupNCCL.cpp:835] [Rank 17] NCCL watchdog thread started!
I1027 11:26:51.949669 20447 ProcessGroupNCCL.cpp:669] [Rank 77] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.945151 16998 ProcessGroupNCCL.cpp:835] [Rank 80] NCCL watchdog thread started!
I1027 11:26:51.947707 10511 ProcessGroupNCCL.cpp:669] [Rank 19] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949764 22129 ProcessGroupNCCL.cpp:835] [Rank 77] NCCL watchdog thread started!
I1027 11:26:51.945264 16999 ProcessGroupNCCL.cpp:835] [Rank 82] NCCL watchdog thread started!
I1027 11:26:51.947822 12727 ProcessGroupNCCL.cpp:835] [Rank 19] NCCL watchdog thread started!
I1027 11:26:51.945180 14730 ProcessGroupNCCL.cpp:669] [Rank 82] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.947847 10510 ProcessGroupNCCL.cpp:669] [Rank 18] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.945367 14731 ProcessGroupNCCL.cpp:669] [Rank 83] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.947969 12728 ProcessGroupNCCL.cpp:835] [Rank 18] NCCL watchdog thread started!
I1027 11:26:51.945502 17000 ProcessGroupNCCL.cpp:835] [Rank 83] NCCL watchdog thread started!
I1027 11:26:51.951151 20259 ProcessGroupNCCL.cpp:669] [Rank 90] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.951236 22555 ProcessGroupNCCL.cpp:835] [Rank 90] NCCL watchdog thread started!
I1027 11:26:51.945559 26315 ProcessGroupNCCL.cpp:669] [Rank 68] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.945652 26319 ProcessGroupNCCL.cpp:669] [Rank 71] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.945685 28572 ProcessGroupNCCL.cpp:835] [Rank 68] NCCL watchdog thread started!
I1027 11:26:51.952735 23879 ProcessGroupNCCL.cpp:669] [Rank 29] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952816 26129 ProcessGroupNCCL.cpp:835] [Rank 29] NCCL watchdog thread started!
I1027 11:26:51.945760 26318 ProcessGroupNCCL.cpp:669] [Rank 70] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.945842 28574 ProcessGroupNCCL.cpp:835] [Rank 70] NCCL watchdog thread started!
I1027 11:26:51.945773 28573 ProcessGroupNCCL.cpp:835] [Rank 71] NCCL watchdog thread started!
I1027 11:26:51.945890 26317 ProcessGroupNCCL.cpp:669] [Rank 69] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.946013 28575 ProcessGroupNCCL.cpp:835] [Rank 69] NCCL watchdog thread started!
I1027 11:26:51.952797 23881 ProcessGroupNCCL.cpp:669] [Rank 31] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952850 23880 ProcessGroupNCCL.cpp:669] [Rank 30] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952908 26130 ProcessGroupNCCL.cpp:835] [Rank 31] NCCL watchdog thread started!
I1027 11:26:51.952955 26131 ProcessGroupNCCL.cpp:835] [Rank 30] NCCL watchdog thread started!
I1027 11:26:51.953125 26132 ProcessGroupNCCL.cpp:835] [Rank 28] NCCL watchdog thread started!
I1027 11:26:51.953084 23877 ProcessGroupNCCL.cpp:669] [Rank 28] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.950112 29328 ProcessGroupNCCL.cpp:669] [Rank 50] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.950232 31136 ProcessGroupNCCL.cpp:835] [Rank 50] NCCL watchdog thread started!
I1027 11:26:51.950434 29325 ProcessGroupNCCL.cpp:669] [Rank 48] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.950563 31137 ProcessGroupNCCL.cpp:835] [Rank 48] NCCL watchdog thread started!
I1027 11:26:51.950472 29327 ProcessGroupNCCL.cpp:669] [Rank 49] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.950568 31138 ProcessGroupNCCL.cpp:835] [Rank 49] NCCL watchdog thread started!
I1027 11:26:51.951445 10430 ProcessGroupNCCL.cpp:669] [Rank 47] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.950486 29330 ProcessGroupNCCL.cpp:669] [Rank 51] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.950613 31139 ProcessGroupNCCL.cpp:835] [Rank 51] NCCL watchdog thread started!
I1027 11:26:51.951589 12671 ProcessGroupNCCL.cpp:835] [Rank 47] NCCL watchdog thread started!
I1027 11:26:51.951678 10429 ProcessGroupNCCL.cpp:669] [Rank 46] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.951462 15463 ProcessGroupNCCL.cpp:669] [Rank 32] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.951588 17570 ProcessGroupNCCL.cpp:835] [Rank 32] NCCL watchdog thread started!
I1027 11:26:51.951716 10427 ProcessGroupNCCL.cpp:669] [Rank 44] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.951812 12673 ProcessGroupNCCL.cpp:835] [Rank 44] NCCL watchdog thread started!
I1027 11:26:51.951828 12672 ProcessGroupNCCL.cpp:835] [Rank 46] NCCL watchdog thread started!
I1027 11:26:51.951530 15465 ProcessGroupNCCL.cpp:669] [Rank 33] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.951676 17571 ProcessGroupNCCL.cpp:835] [Rank 33] NCCL watchdog thread started!
I1027 11:26:51.951695 17572 ProcessGroupNCCL.cpp:835] [Rank 34] NCCL watchdog thread started!
I1027 11:26:51.951634 15466 ProcessGroupNCCL.cpp:669] [Rank 34] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.951701 15467 ProcessGroupNCCL.cpp:669] [Rank 35] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.951804 17573 ProcessGroupNCCL.cpp:835] [Rank 35] NCCL watchdog thread started!
I1027 11:26:51.951908 10428 ProcessGroupNCCL.cpp:669] [Rank 45] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952037 12674 ProcessGroupNCCL.cpp:835] [Rank 45] NCCL watchdog thread started!
I1027 11:26:51.951092 4573 ProcessGroupNCCL.cpp:835] [Rank 10] NCCL watchdog thread started!
I1027 11:26:51.951079 2342 ProcessGroupNCCL.cpp:669] [Rank 9] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.951072 2344 ProcessGroupNCCL.cpp:669] [Rank 11] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.951015 2343 ProcessGroupNCCL.cpp:669] [Rank 10] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952960 22820 ProcessGroupNCCL.cpp:669] [Rank 36] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.951189 4575 ProcessGroupNCCL.cpp:835] [Rank 9] NCCL watchdog thread started!
I1027 11:26:51.951203 4574 ProcessGroupNCCL.cpp:835] [Rank 11] NCCL watchdog thread started!
I1027 11:26:51.951225 2340 ProcessGroupNCCL.cpp:669] [Rank 8] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.951320 4576 ProcessGroupNCCL.cpp:835] [Rank 8] NCCL watchdog thread started!
I1027 11:26:51.953114 22822 ProcessGroupNCCL.cpp:669] [Rank 37] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.953060 24875 ProcessGroupNCCL.cpp:835] [Rank 36] NCCL watchdog thread started!
I1027 11:26:51.953271 22823 ProcessGroupNCCL.cpp:669] [Rank 38] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.953379 24877 ProcessGroupNCCL.cpp:835] [Rank 38] NCCL watchdog thread started!
I1027 11:26:51.951431 28731 ProcessGroupNCCL.cpp:669] [Rank 7] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.951532 31006 ProcessGroupNCCL.cpp:835] [Rank 7] NCCL watchdog thread started!
I1027 11:26:51.954536 24083 ProcessGroupNCCL.cpp:669] [Rank 73] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.954618 26340 ProcessGroupNCCL.cpp:835] [Rank 73] NCCL watchdog thread started!
I1027 11:26:51.954618 24081 ProcessGroupNCCL.cpp:669] [Rank 72] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.954713 26341 ProcessGroupNCCL.cpp:835] [Rank 72] NCCL watchdog thread started!
I1027 11:26:51.953224 24876 ProcessGroupNCCL.cpp:835] [Rank 37] NCCL watchdog thread started!
I1027 11:26:51.953326 22824 ProcessGroupNCCL.cpp:669] [Rank 39] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.953436 24878 ProcessGroupNCCL.cpp:835] [Rank 39] NCCL watchdog thread started!
I1027 11:26:51.948441 11393 ProcessGroupNCCL.cpp:669] [Rank 0] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.948542 16439 ProcessGroupNCCL.cpp:835] [Rank 0] NCCL watchdog thread started!
I1027 11:26:51.951676 28728 ProcessGroupNCCL.cpp:669] [Rank 4] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.954780 24084 ProcessGroupNCCL.cpp:669] [Rank 74] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.951766 31007 ProcessGroupNCCL.cpp:835] [Rank 4] NCCL watchdog thread started!
I1027 11:26:51.951722 28729 ProcessGroupNCCL.cpp:669] [Rank 5] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.954936 26342 ProcessGroupNCCL.cpp:835] [Rank 74] NCCL watchdog thread started!
I1027 11:26:51.951814 31008 ProcessGroupNCCL.cpp:835] [Rank 5] NCCL watchdog thread started!
I1027 11:26:51.954890 24085 ProcessGroupNCCL.cpp:669] [Rank 75] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.948604 11397 ProcessGroupNCCL.cpp:669] [Rank 3] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.948693 16440 ProcessGroupNCCL.cpp:835] [Rank 3] NCCL watchdog thread started!
I1027 11:26:51.954980 26343 ProcessGroupNCCL.cpp:835] [Rank 75] NCCL watchdog thread started!
I1027 11:26:51.951839 28730 ProcessGroupNCCL.cpp:669] [Rank 6] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.951936 31009 ProcessGroupNCCL.cpp:835] [Rank 6] NCCL watchdog thread started!
I1027 11:26:51.951936 20096 ProcessGroupNCCL.cpp:669] [Rank 27] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952059 22487 ProcessGroupNCCL.cpp:835] [Rank 27] NCCL watchdog thread started!
I1027 11:26:51.953580 2638 ProcessGroupNCCL.cpp:669] [Rank 43] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952100 20092 ProcessGroupNCCL.cpp:669] [Rank 24] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.953667 2634 ProcessGroupNCCL.cpp:669] [Rank 40] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952216 22488 ProcessGroupNCCL.cpp:835] [Rank 24] NCCL watchdog thread started!
I1027 11:26:51.953668 4734 ProcessGroupNCCL.cpp:835] [Rank 43] NCCL watchdog thread started!
I1027 11:26:51.952178 20095 ProcessGroupNCCL.cpp:669] [Rank 26] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.953766 4735 ProcessGroupNCCL.cpp:835] [Rank 40] NCCL watchdog thread started!
I1027 11:26:51.952032 26093 ProcessGroupNCCL.cpp:669] [Rank 20] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952152 28321 ProcessGroupNCCL.cpp:835] [Rank 20] NCCL watchdog thread started!
I1027 11:26:51.952301 22489 ProcessGroupNCCL.cpp:835] [Rank 26] NCCL watchdog thread started!
I1027 11:26:51.953905 2637 ProcessGroupNCCL.cpp:669] [Rank 42] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952108 26095 ProcessGroupNCCL.cpp:669] [Rank 22] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949044 16442 ProcessGroupNCCL.cpp:835] [Rank 2] NCCL watchdog thread started!
I1027 11:26:51.948964 11396 ProcessGroupNCCL.cpp:669] [Rank 2] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952275 20094 ProcessGroupNCCL.cpp:669] [Rank 25] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.954031 4736 ProcessGroupNCCL.cpp:835] [Rank 42] NCCL watchdog thread started!
I1027 11:26:51.952210 28322 ProcessGroupNCCL.cpp:835] [Rank 22] NCCL watchdog thread started!
I1027 11:26:51.952401 22490 ProcessGroupNCCL.cpp:835] [Rank 25] NCCL watchdog thread started!
I1027 11:26:51.954025 2636 ProcessGroupNCCL.cpp:669] [Rank 41] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.949137 16443 ProcessGroupNCCL.cpp:835] [Rank 1] NCCL watchdog thread started!
I1027 11:26:51.949059 11395 ProcessGroupNCCL.cpp:669] [Rank 1] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.954125 4737 ProcessGroupNCCL.cpp:835] [Rank 41] NCCL watchdog thread started!
I1027 11:26:51.952327 26094 ProcessGroupNCCL.cpp:669] [Rank 21] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952345 26096 ProcessGroupNCCL.cpp:669] [Rank 23] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952450 28324 ProcessGroupNCCL.cpp:835] [Rank 23] NCCL watchdog thread started!
I1027 11:26:51.952440 28323 ProcessGroupNCCL.cpp:835] [Rank 21] NCCL watchdog thread started!
I1027 11:26:51.952198 14038 ProcessGroupNCCL.cpp:669] [Rank 59] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952296 14036 ProcessGroupNCCL.cpp:669] [Rank 57] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952248 14037 ProcessGroupNCCL.cpp:669] [Rank 58] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952311 15723 ProcessGroupNCCL.cpp:835] [Rank 59] NCCL watchdog thread started!
I1027 11:26:51.952401 15725 ProcessGroupNCCL.cpp:835] [Rank 57] NCCL watchdog thread started!
I1027 11:26:51.952426 14034 ProcessGroupNCCL.cpp:669] [Rank 56] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.952374 15724 ProcessGroupNCCL.cpp:835] [Rank 58] NCCL watchdog thread started!
I1027 11:26:51.952482 15726 ProcessGroupNCCL.cpp:835] [Rank 56] NCCL watchdog thread started!
I1027 11:26:51.955782 31583 ProcessGroupNCCL.cpp:669] [Rank 92] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.955814 31585 ProcessGroupNCCL.cpp:669] [Rank 93] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.955859 31587 ProcessGroupNCCL.cpp:669] [Rank 95] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.957005 10072 ProcessGroupNCCL.cpp:669] [Rank 84] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.957091 12176 ProcessGroupNCCL.cpp:835] [Rank 84] NCCL watchdog thread started!
I1027 11:26:51.955906 1173 ProcessGroupNCCL.cpp:835] [Rank 92] NCCL watchdog thread started!
I1027 11:26:51.957144 12177 ProcessGroupNCCL.cpp:835] [Rank 87] NCCL watchdog thread started!
I1027 11:26:51.955917 1174 ProcessGroupNCCL.cpp:835] [Rank 93] NCCL watchdog thread started!
I1027 11:26:51.955943 1175 ProcessGroupNCCL.cpp:835] [Rank 95] NCCL watchdog thread started!
I1027 11:26:51.956032 31586 ProcessGroupNCCL.cpp:669] [Rank 94] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.956175 1176 ProcessGroupNCCL.cpp:835] [Rank 94] NCCL watchdog thread started!
I1027 11:26:51.957084 10076 ProcessGroupNCCL.cpp:669] [Rank 87] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.957273 12178 ProcessGroupNCCL.cpp:835] [Rank 85] NCCL watchdog thread started!
I1027 11:26:51.957218 10074 ProcessGroupNCCL.cpp:669] [Rank 85] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:26:51.957412 12179 ProcessGroupNCCL.cpp:835] [Rank 86] NCCL watchdog thread started!
I1027 11:26:51.957370 10075 ProcessGroupNCCL.cpp:669] [Rank 86] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s]I1027 11:27:34.182696 19423 ProcessGroupNCCL.cpp:1274] NCCL_DEBUG: INFO
0%| | 1/420 [01:03<7:25:16, 63.76s/it] 0%| | 1/420 [01:03<7:25:13, 63.76s/it] 0%| | 1/420 [01:03<7:25:18, 63.77s/it] 0%| | 1/420 [01:03<7:25:14, 63.76s/it] 0%| | 1/420 [01:03<7:25:15, 63.76s/it] 0%| | 1/420 [01:03<7:25:12, 63.75s/it] 0%| | 1/420 [01:03<7:25:18, 63.77s/it] 0%| | 1/420 [01:03<7:25:14, 63.76s/it] 0%| | 1/420 [01:03<7:25:17, 63.77s/it] 0%| | 1/420 [01:03<7:25:15, 63.76s/it] 0%| | 1/420 [01:03<7:25:18, 63.77s/it] 0%| | 1/420 [01:03<7:25:16, 63.76s/it] 0%| | 1/420 [01:03<7:25:18, 63.77s/it] 0%| | 1/420 [01:03<7:25:18, 63.77s/it] 0%| | 1/420 [01:03<7:25:17, 63.77s/it] 0%| | 1/420 [01:03<7:25:18, 63.77s/it] 0%| | 1/420 [01:03<7:25:17, 63.77s/it] 0%| | 1/420 [01:03<7:25:18, 63.77s/it] 0%| | 1/420 [01:03<7:25:16, 63.76s/it] 0%| | 1/420 [01:03<7:25:18, 63.77s/it] 0%| | 1/420 [01:03<7:25:13, 63.76s/it] 0%| | 1/420 [01:03<7:25:15, 63.76s/it] 0%| | 1/420 [01:03<7:25:13, 63.76s/it] 0%| | 1/420 [01:03<7:25:15, 63.76s/it] 0%| | 1/420 [01:03<7:25:16, 63.76s/it] 0%| | 1/420 [01:03<7:25:18, 63.77s/it] 0%| | 1/420 [01:03<7:25:18, 63.77s/it] 0%| | 1/420 [01:03<7:25:18, 63.77s/it] 0%| | 1/420 [01:03<7:25:17, 63.77s/it] 0%| | 1/420 [01:03<7:25:12, 63.75s/it] 0%| | 1/420 [01:03<7:25:16, 63.76s/it] 0%| | 1/420 [01:03<7:25:17, 63.77s/it] 0%| | 1/420 [01:03<7:25:13, 63.76s/it] 0%| | 1/420 [01:03<7:25:18, 63.77s/it] 0%| | 1/420 [01:03<7:25:18, 63.77s/it] 0%| | 1/420 [01:03<7:25:14, 63.76s/it] 0%| | 1/420 [01:03<7:25:17, 63.77s/it] 0%| | 1/420 [01:03<7:25:14, 63.76s/it] 0%| | 1/420 [01:03<7:25:18, 63.77s/it] 0%| | 1/420 [01:03<7:25:13, 63.76s/it] 0%| | 1/420 [01:03<7:25:15, 63.76s/it] 0%| | 1/420 [01:03<7:25:15, 63.76s/it] 0%| | 1/420 [01:03<7:25:13, 63.76s/it] 0%| | 1/420 [01:03<7:25:16, 63.76s/it] 0%| | 1/420 [01:03<7:25:15, 63.76s/it] 0%| | 1/420 [01:03<7:25:18, 63.77s/it] 0%| | 1/420 [01:03<7:25:18, 63.77s/it] 0%| | 1/420 [01:03<7:25:15, 63.76s/it]slurmstepd: error: *** JOB 45668270 ON e08r2n08 CANCELLED AT 2023-10-27T11:39:49 ***
This source diff could not be displayed because it is too large. You can view the blob instead.
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.694067 13004 ProcessGroupNCCL.cpp:835] [Rank 13] NCCL watchdog thread started!
I1027 11:40:59.694059 12119 ProcessGroupNCCL.cpp:669] [Rank 13] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.689687 14514 ProcessGroupNCCL.cpp:835] [Rank 66] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.701745 16391 ProcessGroupNCCL.cpp:835] [Rank 38] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.695235 16070 ProcessGroupNCCL.cpp:835] [Rank 78] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.699540 17423 ProcessGroupNCCL.cpp:835] [Rank 50] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.701830 15702 ProcessGroupNCCL.cpp:835] [Rank 46] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.695412 6383 ProcessGroupNCCL.cpp:835] [Rank 82] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.701686 17105 ProcessGroupNCCL.cpp:835] [Rank 58] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.701151 2628 ProcessGroupNCCL.cpp:835] [Rank 70] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.696023 21980 ProcessGroupNCCL.cpp:835] [Rank 6] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.689921 15651 ProcessGroupNCCL.cpp:835] [Rank 86] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.678339 14673 ProcessGroupNCCL.cpp:835] [Rank 26] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.690449 16988 ProcessGroupNCCL.cpp:835] [Rank 42] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.695075 6529 ProcessGroupNCCL.cpp:835] [Rank 30] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.695425 1875 ProcessGroupNCCL.cpp:669] [Rank 22] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.677758 6910 ProcessGroupNCCL.cpp:835] [Rank 10] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.696250 19750 ProcessGroupNCCL.cpp:669] [Rank 74] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.702153 18814 ProcessGroupNCCL.cpp:835] [Rank 54] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.699782 16112 ProcessGroupNCCL.cpp:669] [Rank 90] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.696225 20964 ProcessGroupNCCL.cpp:835] [Rank 18] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.702178 19957 ProcessGroupNCCL.cpp:669] [Rank 34] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.689674 13567 ProcessGroupNCCL.cpp:669] [Rank 66] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.701737 15593 ProcessGroupNCCL.cpp:669] [Rank 38] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.695227 15274 ProcessGroupNCCL.cpp:669] [Rank 78] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.699527 16693 ProcessGroupNCCL.cpp:669] [Rank 50] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.701823 14882 ProcessGroupNCCL.cpp:669] [Rank 46] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.695387 5793 ProcessGroupNCCL.cpp:669] [Rank 82] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.701678 16266 ProcessGroupNCCL.cpp:669] [Rank 58] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.701143 2012 ProcessGroupNCCL.cpp:669] [Rank 70] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.696029 21054 ProcessGroupNCCL.cpp:669] [Rank 6] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.689911 14827 ProcessGroupNCCL.cpp:669] [Rank 86] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.678334 13847 ProcessGroupNCCL.cpp:669] [Rank 26] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.690456 16148 ProcessGroupNCCL.cpp:669] [Rank 42] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.695065 5835 ProcessGroupNCCL.cpp:669] [Rank 30] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.695485 2521 ProcessGroupNCCL.cpp:835] [Rank 22] NCCL watchdog thread started!
I1027 11:40:59.677734 6029 ProcessGroupNCCL.cpp:669] [Rank 10] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.696282 20639 ProcessGroupNCCL.cpp:835] [Rank 74] NCCL watchdog thread started!
I1027 11:40:59.702147 18005 ProcessGroupNCCL.cpp:669] [Rank 54] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.699805 16959 ProcessGroupNCCL.cpp:835] [Rank 90] NCCL watchdog thread started!
I1027 11:40:59.696215 20256 ProcessGroupNCCL.cpp:669] [Rank 18] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.702189 20824 ProcessGroupNCCL.cpp:835] [Rank 34] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.690634 16989 ProcessGroupNCCL.cpp:835] [Rank 43] NCCL watchdog thread started!
I1027 11:40:59.690613 16146 ProcessGroupNCCL.cpp:669] [Rank 43] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.699772 23262 ProcessGroupNCCL.cpp:835] [Rank 62] NCCL watchdog thread started!
I1027 11:40:59.699754 22359 ProcessGroupNCCL.cpp:669] [Rank 62] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.699798 23263 ProcessGroupNCCL.cpp:835] [Rank 61] NCCL watchdog thread started!
I1027 11:40:59.699793 22361 ProcessGroupNCCL.cpp:669] [Rank 61] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.704620 6273 ProcessGroupNCCL.cpp:835] [Rank 94] NCCL watchdog thread started!
I1027 11:40:59.704631 5555 ProcessGroupNCCL.cpp:669] [Rank 94] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.706770 13006 ProcessGroupNCCL.cpp:835] [Rank 14] NCCL watchdog thread started!
I1027 11:40:59.706761 12117 ProcessGroupNCCL.cpp:669] [Rank 14] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.901784 18816 ProcessGroupNCCL.cpp:835] [Rank 55] NCCL watchdog thread started!
I1027 11:40:59.901772 18006 ProcessGroupNCCL.cpp:669] [Rank 55] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.896070 20257 ProcessGroupNCCL.cpp:669] [Rank 19] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.896072 20966 ProcessGroupNCCL.cpp:835] [Rank 19] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.878460 14675 ProcessGroupNCCL.cpp:835] [Rank 27] NCCL watchdog thread started!
I1027 11:40:59.878463 13849 ProcessGroupNCCL.cpp:669] [Rank 27] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.890164 14516 ProcessGroupNCCL.cpp:835] [Rank 67] NCCL watchdog thread started!
I1027 11:40:59.890167 13569 ProcessGroupNCCL.cpp:669] [Rank 67] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.901363 2011 ProcessGroupNCCL.cpp:669] [Rank 71] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.901387 2630 ProcessGroupNCCL.cpp:835] [Rank 71] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.900033 16692 ProcessGroupNCCL.cpp:669] [Rank 51] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.900041 17425 ProcessGroupNCCL.cpp:835] [Rank 51] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.890388 15653 ProcessGroupNCCL.cpp:835] [Rank 87] NCCL watchdog thread started!
I1027 11:40:59.890359 14824 ProcessGroupNCCL.cpp:669] [Rank 87] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.896361 21052 ProcessGroupNCCL.cpp:669] [Rank 7] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.902295 19958 ProcessGroupNCCL.cpp:669] [Rank 35] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.902237 17107 ProcessGroupNCCL.cpp:835] [Rank 59] NCCL watchdog thread started!
I1027 11:40:59.896369 21982 ProcessGroupNCCL.cpp:835] [Rank 7] NCCL watchdog thread started!
I1027 11:40:59.902320 20826 ProcessGroupNCCL.cpp:835] [Rank 35] NCCL watchdog thread started!
I1027 11:40:59.902222 16263 ProcessGroupNCCL.cpp:669] [Rank 59] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.896574 19751 ProcessGroupNCCL.cpp:669] [Rank 75] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.895948 16072 ProcessGroupNCCL.cpp:835] [Rank 79] NCCL watchdog thread started!
I1027 11:40:59.896596 20641 ProcessGroupNCCL.cpp:835] [Rank 75] NCCL watchdog thread started!
I1027 11:40:59.895939 15275 ProcessGroupNCCL.cpp:669] [Rank 79] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.896593 6275 ProcessGroupNCCL.cpp:835] [Rank 95] NCCL watchdog thread started!
I1027 11:40:59.896600 5556 ProcessGroupNCCL.cpp:669] [Rank 95] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.896472 13023 ProcessGroupNCCL.cpp:835] [Rank 15] NCCL watchdog thread started!
I1027 11:40:59.896476 12118 ProcessGroupNCCL.cpp:669] [Rank 15] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.896719 30864 ProcessGroupNCCL.cpp:835] [Rank 2] NCCL watchdog thread started!
I1027 11:40:59.896713 29772 ProcessGroupNCCL.cpp:669] [Rank 2] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.896756 1873 ProcessGroupNCCL.cpp:669] [Rank 23] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.879051 6912 ProcessGroupNCCL.cpp:835] [Rank 11] NCCL watchdog thread started!
I1027 11:40:59.896766 2524 ProcessGroupNCCL.cpp:835] [Rank 23] NCCL watchdog thread started!
I1027 11:40:59.879048 6030 ProcessGroupNCCL.cpp:669] [Rank 11] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.903501 15704 ProcessGroupNCCL.cpp:835] [Rank 47] NCCL watchdog thread started!
I1027 11:40:59.903491 14883 ProcessGroupNCCL.cpp:669] [Rank 47] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.903488 16393 ProcessGroupNCCL.cpp:835] [Rank 39] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.897190 22358 ProcessGroupNCCL.cpp:669] [Rank 63] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.903478 15590 ProcessGroupNCCL.cpp:669] [Rank 39] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.897241 23265 ProcessGroupNCCL.cpp:835] [Rank 63] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.897130 6385 ProcessGroupNCCL.cpp:835] [Rank 83] NCCL watchdog thread started!
I1027 11:40:59.897117 5796 ProcessGroupNCCL.cpp:669] [Rank 83] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.901348 16961 ProcessGroupNCCL.cpp:835] [Rank 91] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:40:59.896620 6532 ProcessGroupNCCL.cpp:835] [Rank 31] NCCL watchdog thread started!
I1027 11:40:59.901336 16114 ProcessGroupNCCL.cpp:669] [Rank 91] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:40:59.896611 5836 ProcessGroupNCCL.cpp:669] [Rank 31] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.688014 15656 ProcessGroupNCCL.cpp:835] [Rank 84] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.688114 14535 ProcessGroupNCCL.cpp:835] [Rank 64] NCCL watchdog thread started!
I1027 11:41:00.688097 13566 ProcessGroupNCCL.cpp:669] [Rank 64] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:41:00.687999 14825 ProcessGroupNCCL.cpp:669] [Rank 84] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.688239 14536 ProcessGroupNCCL.cpp:835] [Rank 65] NCCL watchdog thread started!
I1027 11:41:00.688216 13568 ProcessGroupNCCL.cpp:669] [Rank 65] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.697958 17004 ProcessGroupNCCL.cpp:835] [Rank 88] NCCL watchdog thread started!
I1027 11:41:00.697953 16113 ProcessGroupNCCL.cpp:669] [Rank 88] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.698066 17005 ProcessGroupNCCL.cpp:835] [Rank 89] NCCL watchdog thread started!
I1027 11:41:00.698074 16115 ProcessGroupNCCL.cpp:669] [Rank 89] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.688750 15658 ProcessGroupNCCL.cpp:835] [Rank 85] NCCL watchdog thread started!
I1027 11:41:00.688750 14826 ProcessGroupNCCL.cpp:669] [Rank 85] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.694737 21985 ProcessGroupNCCL.cpp:835] [Rank 5] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.694449 23268 ProcessGroupNCCL.cpp:835] [Rank 60] NCCL watchdog thread started!
I1027 11:41:00.694729 21051 ProcessGroupNCCL.cpp:669] [Rank 5] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:41:00.694425 22360 ProcessGroupNCCL.cpp:669] [Rank 60] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.700575 19959 ProcessGroupNCCL.cpp:669] [Rank 32] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:41:00.700656 20844 ProcessGroupNCCL.cpp:835] [Rank 32] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.700824 15707 ProcessGroupNCCL.cpp:835] [Rank 44] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.677176 14677 ProcessGroupNCCL.cpp:835] [Rank 24] NCCL watchdog thread started!
I1027 11:41:00.677165 13850 ProcessGroupNCCL.cpp:669] [Rank 24] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.694236 2526 ProcessGroupNCCL.cpp:835] [Rank 20] NCCL watchdog thread started!
I1027 11:41:00.700820 14885 ProcessGroupNCCL.cpp:669] [Rank 44] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:41:00.694229 1872 ProcessGroupNCCL.cpp:669] [Rank 20] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.676532 6032 ProcessGroupNCCL.cpp:669] [Rank 8] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.698663 16695 ProcessGroupNCCL.cpp:669] [Rank 48] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:41:00.698670 17428 ProcessGroupNCCL.cpp:835] [Rank 48] NCCL watchdog thread started!
I1027 11:41:00.676607 6915 ProcessGroupNCCL.cpp:835] [Rank 8] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.695044 20968 ProcessGroupNCCL.cpp:835] [Rank 16] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.695091 21987 ProcessGroupNCCL.cpp:835] [Rank 4] NCCL watchdog thread started!
I1027 11:41:00.695036 20254 ProcessGroupNCCL.cpp:669] [Rank 16] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:41:00.695093 21053 ProcessGroupNCCL.cpp:669] [Rank 4] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.705013 16075 ProcessGroupNCCL.cpp:835] [Rank 76] NCCL watchdog thread started!
I1027 11:41:00.705024 15273 ProcessGroupNCCL.cpp:669] [Rank 76] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.900769 18819 ProcessGroupNCCL.cpp:835] [Rank 52] NCCL watchdog thread started!
I1027 11:41:00.900777 18003 ProcessGroupNCCL.cpp:669] [Rank 52] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.900789 2633 ProcessGroupNCCL.cpp:835] [Rank 68] NCCL watchdog thread started!
I1027 11:41:00.900777 2010 ProcessGroupNCCL.cpp:669] [Rank 68] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.895587 6278 ProcessGroupNCCL.cpp:835] [Rank 92] NCCL watchdog thread started!
I1027 11:41:00.895591 5554 ProcessGroupNCCL.cpp:669] [Rank 92] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.895395 13026 ProcessGroupNCCL.cpp:835] [Rank 12] NCCL watchdog thread started!
I1027 11:41:00.895373 12120 ProcessGroupNCCL.cpp:669] [Rank 12] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.901515 17121 ProcessGroupNCCL.cpp:835] [Rank 56] NCCL watchdog thread started!
I1027 11:41:00.901512 16264 ProcessGroupNCCL.cpp:669] [Rank 56] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.894690 6570 ProcessGroupNCCL.cpp:835] [Rank 28] NCCL watchdog thread started!
I1027 11:41:00.894701 5834 ProcessGroupNCCL.cpp:669] [Rank 28] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.895599 29771 ProcessGroupNCCL.cpp:669] [Rank 1] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:41:00.895607 30867 ProcessGroupNCCL.cpp:835] [Rank 1] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.901806 15591 ProcessGroupNCCL.cpp:669] [Rank 36] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:41:00.901808 16396 ProcessGroupNCCL.cpp:835] [Rank 36] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.899602 17430 ProcessGroupNCCL.cpp:835] [Rank 49] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.901721 17122 ProcessGroupNCCL.cpp:835] [Rank 57] NCCL watchdog thread started!
I1027 11:41:00.899591 16694 ProcessGroupNCCL.cpp:669] [Rank 49] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:41:00.901712 16265 ProcessGroupNCCL.cpp:669] [Rank 57] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.896236 20650 ProcessGroupNCCL.cpp:835] [Rank 72] NCCL watchdog thread started!
I1027 11:41:00.896224 19752 ProcessGroupNCCL.cpp:669] [Rank 72] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.895573 16077 ProcessGroupNCCL.cpp:835] [Rank 77] NCCL watchdog thread started!
I1027 11:41:00.895565 15272 ProcessGroupNCCL.cpp:669] [Rank 77] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.896273 20651 ProcessGroupNCCL.cpp:835] [Rank 73] NCCL watchdog thread started!
I1027 11:41:00.896270 19749 ProcessGroupNCCL.cpp:669] [Rank 73] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.901351 2636 ProcessGroupNCCL.cpp:835] [Rank 69] NCCL watchdog thread started!
I1027 11:41:00.901343 2009 ProcessGroupNCCL.cpp:669] [Rank 69] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.877744 6922 ProcessGroupNCCL.cpp:835] [Rank 9] NCCL watchdog thread started!
I1027 11:41:00.877739 6031 ProcessGroupNCCL.cpp:669] [Rank 9] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.895627 1874 ProcessGroupNCCL.cpp:669] [Rank 21] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:41:00.895638 2528 ProcessGroupNCCL.cpp:835] [Rank 21] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.895241 6572 ProcessGroupNCCL.cpp:835] [Rank 29] NCCL watchdog thread started!
I1027 11:41:00.895221 5833 ProcessGroupNCCL.cpp:669] [Rank 29] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.902349 14884 ProcessGroupNCCL.cpp:669] [Rank 45] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:41:00.902395 15709 ProcessGroupNCCL.cpp:835] [Rank 45] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.902419 18821 ProcessGroupNCCL.cpp:835] [Rank 53] NCCL watchdog thread started!
I1027 11:41:00.902426 18004 ProcessGroupNCCL.cpp:669] [Rank 53] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.902416 20846 ProcessGroupNCCL.cpp:835] [Rank 33] NCCL watchdog thread started!
I1027 11:41:00.902411 19960 ProcessGroupNCCL.cpp:669] [Rank 33] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.890990 16999 ProcessGroupNCCL.cpp:835] [Rank 41] NCCL watchdog thread started!
I1027 11:41:00.890982 16145 ProcessGroupNCCL.cpp:669] [Rank 41] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.896215 5794 ProcessGroupNCCL.cpp:669] [Rank 80] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:41:00.896296 6388 ProcessGroupNCCL.cpp:835] [Rank 80] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.897186 29773 ProcessGroupNCCL.cpp:669] [Rank 3] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:41:00.897205 30869 ProcessGroupNCCL.cpp:835] [Rank 3] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.897157 6390 ProcessGroupNCCL.cpp:835] [Rank 81] NCCL watchdog thread started!
I1027 11:41:00.897150 5795 ProcessGroupNCCL.cpp:669] [Rank 81] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.892146 17001 ProcessGroupNCCL.cpp:835] [Rank 40] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.897753 6280 ProcessGroupNCCL.cpp:835] [Rank 93] NCCL watchdog thread started!
I1027 11:41:00.892140 16147 ProcessGroupNCCL.cpp:669] [Rank 40] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:41:00.897747 5553 ProcessGroupNCCL.cpp:669] [Rank 93] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:00.897836 20971 ProcessGroupNCCL.cpp:835] [Rank 17] NCCL watchdog thread started!
I1027 11:41:00.897830 20255 ProcessGroupNCCL.cpp:669] [Rank 17] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:01.677418 13848 ProcessGroupNCCL.cpp:669] [Rank 25] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1027 11:41:01.677438 14968 ProcessGroupNCCL.cpp:835] [Rank 25] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:01.702608 16478 ProcessGroupNCCL.cpp:835] [Rank 37] NCCL watchdog thread started!
I1027 11:41:01.702615 15592 ProcessGroupNCCL.cpp:669] [Rank 37] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1027 11:41:01.701503 31064 ProcessGroupNCCL.cpp:835] [Rank 0] NCCL watchdog thread started!
I1027 11:41:01.701501 29770 ProcessGroupNCCL.cpp:669] [Rank 0] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Traceback (most recent call last):
File "../fine-tune.py", line 159, in <module>
train()
File "../fine-tune.py", line 120, in train
model = transformers.AutoModelForCausalLM.from_pretrained(
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/transformers/models/auto/auto_factory.py", line 466, in from_pretrained
return model_class.from_pretrained(
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 685, in from_pretrained
return super(BaichuanForCausalLM, cls).from_pretrained(pretrained_model_name_or_path, *model_args,
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/transformers/modeling_utils.py", line 2629, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 555, in __init__
self.model = BaichuanModel(config)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 356, in __init__
self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/torch/nn/modules/sparse.py", line 141, in __init__
self.weight = Parameter(torch.empty((num_embeddings, embedding_dim), **factory_kwargs))
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 229, in wrapped_fn
tensor: Tensor = fn(*args, **kwargs)
RuntimeError: HIP error: initialization error
HIP kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing HIP_LAUNCH_BLOCKING=1.
Traceback (most recent call last):
File "../fine-tune.py", line 159, in <module>
train()
File "../fine-tune.py", line 120, in train
model = transformers.AutoModelForCausalLM.from_pretrained(
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/transformers/models/auto/auto_factory.py", line 466, in from_pretrained
return model_class.from_pretrained(
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 685, in from_pretrained
return super(BaichuanForCausalLM, cls).from_pretrained(pretrained_model_name_or_path, *model_args,
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/transformers/modeling_utils.py", line 2629, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 555, in __init__
self.model = BaichuanModel(config)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 356, in __init__
self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/torch/nn/modules/sparse.py", line 141, in __init__
self.weight = Parameter(torch.empty((num_embeddings, embedding_dim), **factory_kwargs))
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 229, in wrapped_fn
tensor: Tensor = fn(*args, **kwargs)
RuntimeError: HIP error: initialization error
HIP kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing HIP_LAUNCH_BLOCKING=1.
I1027 11:41:27.398515 14536 ProcessGroupNCCL.cpp:837] [Rank 65] NCCL watchdog thread terminated normally
I1027 11:41:27.399645 14535 ProcessGroupNCCL.cpp:837] [Rank 64] NCCL watchdog thread terminated normally
Traceback (most recent call last):
File "../fine-tune.py", line 159, in <module>
train()
File "../fine-tune.py", line 120, in train
model = transformers.AutoModelForCausalLM.from_pretrained(
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/transformers/models/auto/auto_factory.py", line 466, in from_pretrained
return model_class.from_pretrained(
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 685, in from_pretrained
return super(BaichuanForCausalLM, cls).from_pretrained(pretrained_model_name_or_path, *model_args,
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/transformers/modeling_utils.py", line 2629, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 555, in __init__
self.model = BaichuanModel(config)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 356, in __init__
self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/torch/nn/modules/sparse.py", line 141, in __init__
self.weight = Parameter(torch.empty((num_embeddings, embedding_dim), **factory_kwargs))
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 229, in wrapped_fn
tensor: Tensor = fn(*args, **kwargs)
RuntimeError: HIP error: initialization error
HIP kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing HIP_LAUNCH_BLOCKING=1.
I1027 11:41:28.484403 14514 ProcessGroupNCCL.cpp:837] [Rank 66] NCCL watchdog thread terminated normally
Traceback (most recent call last):
File "../fine-tune.py", line 159, in <module>
train()
File "../fine-tune.py", line 120, in train
model = transformers.AutoModelForCausalLM.from_pretrained(
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/transformers/models/auto/auto_factory.py", line 466, in from_pretrained
return model_class.from_pretrained(
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 685, in from_pretrained
return super(BaichuanForCausalLM, cls).from_pretrained(pretrained_model_name_or_path, *model_args,
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/transformers/modeling_utils.py", line 2629, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 555, in __init__
self.model = BaichuanModel(config)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 356, in __init__
self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/torch/nn/modules/sparse.py", line 141, in __init__
self.weight = Parameter(torch.empty((num_embeddings, embedding_dim), **factory_kwargs))
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 229, in wrapped_fn
tensor: Tensor = fn(*args, **kwargs)
RuntimeError: HIP error: initialization error
HIP kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing HIP_LAUNCH_BLOCKING=1.
I1027 11:41:29.061339 14968 ProcessGroupNCCL.cpp:837] [Rank 25] NCCL watchdog thread terminated normally
Traceback (most recent call last):
File "../fine-tune.py", line 159, in <module>
train()
File "../fine-tune.py", line 120, in train
model = transformers.AutoModelForCausalLM.from_pretrained(
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/transformers/models/auto/auto_factory.py", line 466, in from_pretrained
return model_class.from_pretrained(
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 685, in from_pretrained
return super(BaichuanForCausalLM, cls).from_pretrained(pretrained_model_name_or_path, *model_args,
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/transformers/modeling_utils.py", line 2629, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 555, in __init__
self.model = BaichuanModel(config)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 356, in __init__
self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/torch/nn/modules/sparse.py", line 141, in __init__
self.weight = Parameter(torch.empty((num_embeddings, embedding_dim), **factory_kwargs))
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 229, in wrapped_fn
tensor: Tensor = fn(*args, **kwargs)
RuntimeError: HIP error: initialization error
HIP kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing HIP_LAUNCH_BLOCKING=1.
I1027 11:41:29.699555 14673 ProcessGroupNCCL.cpp:837] [Rank 26] NCCL watchdog thread terminated normally
Traceback (most recent call last):
File "../fine-tune.py", line 159, in <module>
train()
File "../fine-tune.py", line 120, in train
model = transformers.AutoModelForCausalLM.from_pretrained(
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/transformers/models/auto/auto_factory.py", line 466, in from_pretrained
return model_class.from_pretrained(
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 685, in from_pretrained
return super(BaichuanForCausalLM, cls).from_pretrained(pretrained_model_name_or_path, *model_args,
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/transformers/modeling_utils.py", line 2629, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 555, in __init__
self.model = BaichuanModel(config)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/.cache/huggingface/modules/transformers_modules/baichuan2-7b-base/modeling_baichuan.py", line 356, in __init__
self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 382, in wrapper
f(module, *args, **kwargs)
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/torch/nn/modules/sparse.py", line 141, in __init__
self.weight = Parameter(torch.empty((num_embeddings, embedding_dim), **factory_kwargs))
File "/public/home/zhaoying1/anaconda3/envs/baichuan/lib/python3.8/site-packages/deepspeed/runtime/zero/partition_parameters.py", line 229, in wrapped_fn
tensor: Tensor = fn(*args, **kwargs)
RuntimeError: HIP error: initialization error
HIP kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing HIP_LAUNCH_BLOCKING=1.
I1027 11:41:31.941821 14677 ProcessGroupNCCL.cpp:837] [Rank 24] NCCL watchdog thread terminated normally
slurmstepd: error: *** JOB 45668680 ON b17r3n15 CANCELLED AT 2023-10-31T16:57:25 ***
START TIME: Fri Oct 27 11:39:55 CST 2023
b17r3n15
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================0
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================2
WORLD_SIZE*************96
LRANK===============================2
LRANK===============================1
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================0
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================2
WORLD_SIZE*************96
LRANK===============================2
LRANK===============================2
LRANK===============================2
LRANK===============================2
LRANK===============================2
WORLD_SIZE*************96
LRANK===============================2
WORLD_SIZE*************96
LRANK===============================2
LRANK===============================2
LRANK===============================2
LRANK===============================2
LRANK===============================2
LRANK===============================2
LRANK===============================2
LRANK===============================1
WORLD_SIZE*************96
LRANK===============================3
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
LRANK===============================2
WORLD_SIZE*************96
LRANK===============================2
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
LRANK===============================2
LRANK===============================2
LRANK===============================2
LRANK===============================2
WORLD_SIZE*************96
LRANK===============================2
LRANK===============================3
WORLD_SIZE*************96
LRANK===============================3
LRANK===============================3
LRANK===============================3
LRANK===============================3
LRANK===============================3
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
LRANK===============================2
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
LRANK===============================3
LRANK===============================3
WORLD_SIZE*************96
LRANK===============================3
LRANK===============================3
LRANK===============================3
LRANK===============================3
LRANK===============================3
LRANK===============================3
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
LRANK===============================3
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
LRANK===============================3
LRANK===============================3
LRANK===============================3
LRANK===============================3
LRANK===============================3
LRANK===============================3
LRANK===============================3
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
WORLD_SIZE*************96
LRANK===============================3
WORLD_SIZE*************96
LRANK===============================1
WORLD_SIZE*************96
[2023-10-27 11:40:51,867] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,890] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,886] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,888] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,891] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,890] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,867] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,889] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,875] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,889] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,885] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,886] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,886] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,892] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,885] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,886] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,886] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,890] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,892] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,869] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,882] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,888] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,878] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,891] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,891] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,868] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,889] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,876] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,890] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,886] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,886] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,886] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,893] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,886] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,886] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,888] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,890] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,892] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,869] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,882] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,888] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,878] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,891] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,893] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,868] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,889] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,877] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,891] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,888] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,894] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,888] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,888] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,890] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,888] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,892] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,869] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,882] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,889] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,878] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,894] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,892] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,878] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,891] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,888] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,889] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,889] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,894] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,889] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,891] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,888] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,893] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,869] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,887] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,883] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,889] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:51,878] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:59,475] [INFO] [comm.py:606:init_distributed] Not using the DeepSpeed or dist launchers, attempting to detect MPI environment...
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=0, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:622:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
[2023-10-27 11:40:59,691] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=20, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,698] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=32, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,674] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=24, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=92, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,696] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=48, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,698] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=56, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=4, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=80, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,698] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=44, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=60, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=1, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=21, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,698] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=33, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,675] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=25, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=93, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,696] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=49, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,698] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=57, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=5, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=81, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,698] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=45, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=61, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=72, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=16, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,696] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=88, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,697] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=68, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,687] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=40, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,686] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=65, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,691] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=29, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,686] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=85, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,698] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=37, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,698] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=53, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=77, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,674] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=9, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=73, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=17, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,696] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=89, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,697] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=69, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,687] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=41, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,686] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=64, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,691] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=28, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,686] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=84, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,698] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=36, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,698] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=52, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=76, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,674] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=8, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=13, local_rank=1, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,692] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=12, local_rank=0, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,693] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=2, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,694] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=94, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,693] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=78, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,697] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=50, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,676] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=26, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,699] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=58, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,694] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=74, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,700] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=38, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,699] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=70, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,693] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=82, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,693] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=3, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,699] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=71, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,693] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=62, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,700] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=46, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,687] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=66, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,693] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=22, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,675] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=10, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,694] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=6, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,688] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=42, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,687] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=86, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,697] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=90, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,700] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=54, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,693] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=30, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,694] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=18, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,699] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=34, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,693] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=14, local_rank=2, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,693] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=63, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,700] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=47, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,688] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=67, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,693] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=23, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,675] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=11, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,694] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=7, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,688] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=43, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,688] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=87, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,697] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=91, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,700] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=55, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,693] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=31, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,694] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=19, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,700] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=35, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,693] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=15, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,697] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=51, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,694] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=75, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,700] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=39, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,676] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=27, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,693] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=79, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,693] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=83, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,700] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=59, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
[2023-10-27 11:40:59,694] [INFO] [comm.py:656:mpi_discovery] Discovered MPI settings of world_rank=95, local_rank=3, world_size=96, master_addr=10.2.17.56, master_port=29500
b17r3n15:29770:29770 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.56<0>
b17r3n15:29770:29770 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n15:29770:29770 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
RCCL version 2.13.4+hip5.4 HEAD:82f11f7
b17r3n15:29770:31277 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.56<0>
b17r3n15:29770:31277 [0] NCCL INFO Using network IB
b17r3n15:29772:29772 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.56<0>
b17r3n15:29772:29772 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n15:29772:29772 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r3n15:29771:29771 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.56<0>
b17r3n15:29771:29771 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n15:29771:29771 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r3n15:29773:29773 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.56<0>
b17r3n15:29773:29773 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n15:29773:29773 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n05:16147:16147 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.66<0>
b17r4n05:16147:16147 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n13:19752:19752 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.74<0>
b17r4n13:19752:19752 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n04:15593:15593 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.65<0>
b17r4n04:15593:15593 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n04:15591:15591 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.65<0>
b17r4n04:15591:15591 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n08:18005:18005 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.69<0>
b17r4n08:18005:18005 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n13:19749:19749 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.74<0>
b17r4n13:19749:19749 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n16:21053:21053 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.57<0>
b17r3n16:21053:21053 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n18:5554:5554 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.79<0>
b17r4n18:5554:5554 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n07:16695:16695 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.68<0>
b17r4n07:16695:16695 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n18:5553:5553 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.79<0>
b17r4n14:15272:15272 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.75<0>
b17r4n14:15272:15272 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n18:5553:5553 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n17:16113:16113 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.78<0>
b17r4n17:16113:16113 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n05:16147:16147 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n02:5834:5834 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.63<0>
b17r4n02:5834:5834 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n17:16115:16115 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.78<0>
b17r4n13:19752:19752 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n17:16115:16115 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n17:6032:6032 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.58<0>
b17r3n17:6032:6032 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n17:6029:6029 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.58<0>
b17r3n17:6029:6029 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n13:19749:19749 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n14:15273:15273 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.75<0>
b17r4n14:15273:15273 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n13:19750:19750 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.74<0>
b17r4n13:19750:19750 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n07:16692:16692 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.68<0>
b17r4n07:16693:16693 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.68<0>
b17r4n07:16693:16693 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n07:16692:16692 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n08:18006:18006 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.69<0>
b17r4n05:16145:16145 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.66<0>
b17r4n04:15593:15593 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n08:18006:18006 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n05:16148:16148 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.66<0>
b17r4n18:5555:5555 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.79<0>
b17r4n05:16145:16145 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n18:5555:5555 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n12:2010:2010 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.73<0>
b17r4n12:2010:2010 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n05:16148:16148 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n13:19751:19751 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.74<0>
b17r4n13:19751:19751 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n04:15591:15591 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n06:14885:14885 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.67<0>
b17r4n06:14885:14885 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n13:19750:19750 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n08:18004:18004 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.69<0>
b17r4n05:16145:16145 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n05:16148:16148 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n09:16265:16265 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.70<0>
b17r4n09:16265:16265 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n08:18004:18004 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n16:14826:14826 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.77<0>
b17r4n16:14826:14826 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n02:5833:5833 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.63<0>
b17r4n02:5833:5833 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n13:19751:19751 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r3n17:6030:6030 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.58<0>
b17r4n08:18005:18005 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r3n17:6030:6030 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n17:6031:6031 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.58<0>
b17r3n17:6031:6031 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n00:1874:1874 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.61<0>
b17r4n00:1874:1874 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n08:18006:18006 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n08:18003:18003 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.69<0>
b17r4n06:14884:14884 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.67<0>
b17r4n08:18003:18003 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n06:14884:14884 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n12:2009:2009 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.73<0>
b17r4n12:2012:2012 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.73<0>
b17r4n12:2012:2012 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n12:2009:2009 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n18:5556:5556 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.79<0>
b17r4n08:18004:18004 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n18:5556:5556 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n05:16146:16146 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.66<0>
b17r4n05:16146:16146 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n16:21053:21053 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n08:18003:18003 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n18:5554:5554 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n18:5553:5553 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n06:14883:14883 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.67<0>
b17r4n06:14883:14883 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n05:16146:16146 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n14:15272:15272 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n17:16115:16115 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n18:5555:5555 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n02:5835:5835 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.63<0>
b17r4n02:5835:5835 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n07:16695:16695 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n09:16266:16266 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.70<0>
b17r4n14:15273:15273 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n09:16266:16266 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n17:16113:16113 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n18:5556:5556 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n16:14827:14827 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.77<0>
b17r4n07:16692:16692 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n16:14827:14827 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n09:16264:16264 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.70<0>
b17r4n09:16264:16264 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n06:14882:14882 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.67<0>
b17r4n06:14882:14882 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n02:5836:5836 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.63<0>
b17r4n02:5836:5836 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n00:1875:1875 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.61<0>
b17r4n07:16693:16693 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n00:1875:1875 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n16:21054:21054 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.57<0>
b17r3n16:21052:21052 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.57<0>
b17r3n16:21054:21054 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n04:15592:15592 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.65<0>
b17r4n04:15592:15592 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n16:21052:21052 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n07:16694:16694 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.68<0>
b17r4n16:14825:14825 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.77<0>
b17r4n07:16694:16694 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n16:14825:14825 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n15:5795:5795 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.76<0>
b17r4n15:5795:5795 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n14:15274:15274 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.75<0>
b17r4n14:15274:15274 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n02:5834:5834 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r3n16:21052:21052 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r3n16:21054:21054 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n04:15592:15592 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n04:15590:15590 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.65<0>
b17r4n12:2011:2011 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.73<0>
b17r4n04:15590:15590 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n07:16694:16694 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n12:2011:2011 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n17:6032:6032 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n14:15275:15275 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.75<0>
b17r4n14:15275:15275 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n14:15274:15274 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n02:5833:5833 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r3n17:6029:6029 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n04:15590:15590 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n14:15275:15275 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r3n17:6030:6030 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n02:5835:5835 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n15:5794:5794 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.76<0>
b17r4n15:5794:5794 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n17:6031:6031 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n02:5836:5836 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n12:2010:2010 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n12:2012:2012 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n06:14884:14884 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n06:14885:14885 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n09:16266:16266 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n09:16264:16264 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n00:1873:1873 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.61<0>
b17r4n00:1873:1873 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n12:2009:2009 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n06:14883:14883 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n17:16112:16112 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.78<0>
b17r4n09:16265:16265 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n16:14826:14826 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n17:16114:16114 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.78<0>
b17r4n17:16112:16112 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n17:16114:16114 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n00:1874:1874 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n00:1875:1875 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n00:1873:1873 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n06:14882:14882 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n12:2011:2011 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n16:14827:14827 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n17:16112:16112 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n17:16114:16114 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n16:14824:14824 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.77<0>
b17r4n16:14824:14824 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n16:14825:14825 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n00:1872:1872 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.61<0>
b17r4n00:1872:1872 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n16:14824:14824 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n09:16263:16263 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.70<0>
b17r4n09:16263:16263 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n00:1872:1872 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n03:19959:19959 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.64<0>
b17r4n03:19959:19959 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n09:16263:16263 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n15:5795:5795 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n15:5794:5794 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n15:5793:5793 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.76<0>
b17r4n03:19960:19960 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.64<0>
b17r4n03:19960:19960 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n15:5793:5793 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n15:5796:5796 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.76<0>
b17r4n15:5796:5796 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n15:5793:5793 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n15:5796:5796 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n03:19957:19957 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.64<0>
b17r4n03:19958:19958 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.64<0>
b17r4n03:19957:19957 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n03:19958:19958 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n03:19959:19959 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n03:19960:19960 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n03:19958:19958 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n03:19957:19957 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r3n16:21051:21051 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.57<0>
b17r3n16:21051:21051 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n16:21051:21051 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n10:22360:22360 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.71<0>
b17r4n10:22360:22360 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n10:22361:22361 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.71<0>
b17r4n10:22361:22361 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n10:22360:22360 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n10:22361:22361 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n10:22358:22358 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.71<0>
b17r4n10:22358:22358 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n10:22358:22358 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r4n10:22359:22359 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.71<0>
b17r4n10:22359:22359 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r4n10:22359:22359 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r3n15:29771:31477 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.56<0>
b17r3n15:29771:31477 [1] NCCL INFO Using network IB
b17r3n15:29772:31478 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.56<0>
b17r3n15:29772:31478 [2] NCCL INFO Using network IB
b17r3n15:29773:31483 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.56<0>
b17r3n15:29773:31483 [3] NCCL INFO Using network IB
b17r4n05:16148:17553 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.66<0>
b17r4n05:16148:17553 [2] NCCL INFO Using network IB
b17r4n05:16145:17555 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.66<0>
b17r4n05:16145:17555 [1] NCCL INFO Using network IB
b17r4n05:16146:17556 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.66<0>
b17r4n05:16146:17556 [3] NCCL INFO Using network IB
b17r4n05:16147:17546 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.66<0>
b17r4n05:16147:17546 [0] NCCL INFO Using network IB
b17r4n13:19752:21189 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.74<0>
b17r4n13:19752:21189 [0] NCCL INFO Using network IB
b17r4n13:19750:21192 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.74<0>
b17r4n13:19750:21192 [2] NCCL INFO Using network IB
b17r4n13:19749:21187 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.74<0>
b17r4n13:19749:21187 [1] NCCL INFO Using network IB
b17r4n13:19751:21190 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.74<0>
b17r4n13:19751:21190 [3] NCCL INFO Using network IB
b17r4n14:15272:16709 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.75<0>
b17r4n14:15272:16709 [1] NCCL INFO Using network IB
b17r4n14:15275:16716 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.75<0>
b17r4n14:15275:16716 [3] NCCL INFO Using network IB
b17r4n14:15273:16711 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.75<0>
b17r4n14:15273:16711 [0] NCCL INFO Using network IB
b17r4n14:15274:16717 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.75<0>
b17r4n14:15274:16717 [2] NCCL INFO Using network IB
b17r4n08:18006:19383 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.69<0>
b17r4n08:18006:19383 [3] NCCL INFO Using network IB
b17r4n08:18003:19384 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.69<0>
b17r4n08:18003:19384 [0] NCCL INFO Using network IB
b17r4n08:18004:19385 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.69<0>
b17r4n08:18004:19385 [1] NCCL INFO Using network IB
b17r4n08:18005:19379 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.69<0>
b17r4n08:18005:19379 [2] NCCL INFO Using network IB
b17r4n04:15592:16961 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.65<0>
b17r4n04:15592:16961 [1] NCCL INFO Using network IB
b17r4n04:15590:16962 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.65<0>
b17r4n04:15590:16962 [3] NCCL INFO Using network IB
b17r4n04:15591:16957 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.65<0>
b17r4n04:15591:16957 [0] NCCL INFO Using network IB
b17r4n04:15593:16956 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.65<0>
b17r4n04:15593:16956 [2] NCCL INFO Using network IB
b17r3n16:21052:22531 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.57<0>
b17r3n16:21052:22531 [3] NCCL INFO Using network IB
b17r3n16:21054:22535 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.57<0>
b17r3n16:21054:22535 [2] NCCL INFO Using network IB
b17r3n16:21053:22526 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.57<0>
b17r3n16:21053:22526 [0] NCCL INFO Using network IB
b17r4n07:16692:17986 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.68<0>
b17r4n07:16692:17986 [3] NCCL INFO Using network IB
b17r4n07:16694:17987 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.68<0>
b17r4n07:16694:17987 [1] NCCL INFO Using network IB
b17r4n07:16695:17983 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.68<0>
b17r4n07:16695:17983 [0] NCCL INFO Using network IB
b17r4n07:16693:17982 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.68<0>
b17r4n07:16693:17982 [2] NCCL INFO Using network IB
b17r4n17:16113:17593 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.78<0>
b17r4n17:16113:17593 [0] NCCL INFO Using network IB
b17r4n17:16115:17594 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.78<0>
b17r4n17:16115:17594 [1] NCCL INFO Using network IB
b17r4n17:16114:17602 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.78<0>
b17r4n17:16114:17602 [3] NCCL INFO Using network IB
b17r4n17:16112:17599 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.78<0>
b17r4n17:16112:17599 [2] NCCL INFO Using network IB
b17r4n18:5554:6818 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.79<0>
b17r4n18:5554:6818 [0] NCCL INFO Using network IB
b17r4n18:5553:6820 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.79<0>
b17r4n18:5553:6820 [1] NCCL INFO Using network IB
b17r4n18:5556:6821 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.79<0>
b17r4n18:5556:6821 [3] NCCL INFO Using network IB
b17r4n18:5555:6817 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.79<0>
b17r4n18:5555:6817 [2] NCCL INFO Using network IB
b17r3n17:6030:7475 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.58<0>
b17r3n17:6030:7475 [3] NCCL INFO Using network IB
b17r3n17:6032:7472 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.58<0>
b17r3n17:6032:7472 [0] NCCL INFO Using network IB
b17r3n17:6031:7471 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.58<0>
b17r3n17:6031:7471 [1] NCCL INFO Using network IB
b17r3n17:6029:7476 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.58<0>
b17r3n17:6029:7476 [2] NCCL INFO Using network IB
b17r4n02:5833:7113 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.63<0>
b17r4n02:5833:7113 [1] NCCL INFO Using network IB
b17r4n02:5835:7111 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.63<0>
b17r4n02:5835:7111 [2] NCCL INFO Using network IB
b17r4n02:5834:7112 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.63<0>
b17r4n02:5834:7112 [0] NCCL INFO Using network IB
b17r4n12:2009:3188 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.73<0>
b17r4n12:2009:3188 [1] NCCL INFO Using network IB
b17r4n02:5836:7114 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.63<0>
b17r4n02:5836:7114 [3] NCCL INFO Using network IB
b17r4n12:2012:3189 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.73<0>
b17r4n12:2012:3189 [2] NCCL INFO Using network IB
b17r4n12:2010:3191 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.73<0>
b17r4n12:2010:3191 [0] NCCL INFO Using network IB
b17r4n12:2011:3187 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.73<0>
b17r4n12:2011:3187 [3] NCCL INFO Using network IB
b17r4n06:14885:16285 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.67<0>
b17r4n06:14885:16285 [0] NCCL INFO Using network IB
b17r4n06:14884:16288 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.67<0>
b17r4n06:14884:16288 [1] NCCL INFO Using network IB
b17r4n06:14882:16289 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.67<0>
b17r4n06:14882:16289 [2] NCCL INFO Using network IB
b17r4n06:14883:16284 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.67<0>
b17r4n06:14883:16284 [3] NCCL INFO Using network IB
b17r4n16:14825:16209 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.77<0>
b17r4n16:14825:16209 [0] NCCL INFO Using network IB
b17r4n16:14824:16211 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.77<0>
b17r4n16:14824:16211 [3] NCCL INFO Using network IB
b17r4n16:14826:16210 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.77<0>
b17r4n16:14826:16210 [1] NCCL INFO Using network IB
b17r4n16:14827:16212 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.77<0>
b17r4n16:14827:16212 [2] NCCL INFO Using network IB
b17r4n00:1875:3314 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.61<0>
b17r4n00:1875:3314 [2] NCCL INFO Using network IB
b17r4n00:1872:3313 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.61<0>
b17r4n00:1872:3313 [0] NCCL INFO Using network IB
b17r4n00:1874:3315 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.61<0>
b17r4n00:1874:3315 [1] NCCL INFO Using network IB
b17r4n00:1873:3317 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.61<0>
b17r4n00:1873:3317 [3] NCCL INFO Using network IB
b17r4n09:16265:17674 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.70<0>
b17r4n09:16265:17674 [1] NCCL INFO Using network IB
b17r4n09:16264:17677 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.70<0>
b17r4n09:16264:17677 [0] NCCL INFO Using network IB
b17r4n09:16263:17675 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.70<0>
b17r4n09:16263:17675 [3] NCCL INFO Using network IB
b17r4n09:16266:17676 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.70<0>
b17r4n09:16266:17676 [2] NCCL INFO Using network IB
b17r4n15:5794:6948 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.76<0>
b17r4n15:5794:6948 [0] NCCL INFO Using network IB
b17r4n15:5795:6947 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.76<0>
b17r4n15:5795:6947 [1] NCCL INFO Using network IB
b17r4n15:5793:6951 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.76<0>
b17r4n15:5793:6951 [2] NCCL INFO Using network IB
b17r4n15:5796:6949 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.76<0>
b17r4n15:5796:6949 [3] NCCL INFO Using network IB
b17r3n16:21051:22546 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.57<0>
b17r3n16:21051:22546 [1] NCCL INFO Using network IB
b17r4n03:19959:21381 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.64<0>
b17r4n03:19959:21381 [0] NCCL INFO Using network IB
b17r4n03:19960:21384 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.64<0>
b17r4n03:19960:21384 [1] NCCL INFO Using network IB
b17r4n03:19957:21385 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.64<0>
b17r4n03:19957:21385 [2] NCCL INFO Using network IB
b17r4n03:19958:21383 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.64<0>
b17r4n03:19958:21383 [3] NCCL INFO Using network IB
b17r4n10:22360:23820 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.71<0>
b17r4n10:22360:23820 [0] NCCL INFO Using network IB
b17r4n10:22359:23823 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.71<0>
b17r4n10:22359:23823 [2] NCCL INFO Using network IB
b17r4n10:22361:23819 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.71<0>
b17r4n10:22361:23819 [1] NCCL INFO Using network IB
b17r4n10:22358:23822 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.71<0>
b17r4n10:22358:23822 [3] NCCL INFO Using network IB
b17r3n18:12120:12120 [0] NCCL INFO Bootstrap : Using ib0:11.2.17.59<0>
b17r3n18:12120:12120 [0] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n18:12118:12118 [3] NCCL INFO Bootstrap : Using ib0:11.2.17.59<0>
b17r3n18:12118:12118 [3] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n18:12120:12120 [0] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r3n18:12118:12118 [3] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r3n18:12119:12119 [1] NCCL INFO Bootstrap : Using ib0:11.2.17.59<0>
b17r3n18:12119:12119 [1] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n18:12119:12119 [1] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r3n18:12117:12117 [2] NCCL INFO Bootstrap : Using ib0:11.2.17.59<0>
b17r3n18:12117:12117 [2] NCCL INFO Plugin name set by env to librccl-net-none.so
b17r3n18:12117:12117 [2] NCCL INFO NET/Plugin : No plugin found (librccl-net-none.so), using internal implementation
b17r3n18:12118:13578 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.59<0>
b17r3n18:12118:13578 [3] NCCL INFO Using network IB
b17r3n18:12119:13584 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.59<0>
b17r3n18:12119:13584 [1] NCCL INFO Using network IB
b17r3n18:12117:13585 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.59<0>
b17r3n18:12117:13585 [2] NCCL INFO Using network IB
b17r3n18:12120:13577 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/IB [RO]; OOB ib0:11.2.17.59<0>
b17r3n18:12120:13577 [0] NCCL INFO Using network IB
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.738611 23192 ProcessGroupNCCL.cpp:835] [Rank 66] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.738819 23191 ProcessGroupNCCL.cpp:835] [Rank 65] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.740980 23190 ProcessGroupNCCL.cpp:835] [Rank 67] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.762174 1811 ProcessGroupNCCL.cpp:835] [Rank 29] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.764740 3052 ProcessGroupNCCL.cpp:835] [Rank 87] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.762187 1812 ProcessGroupNCCL.cpp:835] [Rank 31] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.764750 3053 ProcessGroupNCCL.cpp:835] [Rank 85] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.764799 3054 ProcessGroupNCCL.cpp:835] [Rank 84] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.762208 1813 ProcessGroupNCCL.cpp:835] [Rank 28] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.762266 1814 ProcessGroupNCCL.cpp:835] [Rank 30] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.747395 2780 ProcessGroupNCCL.cpp:835] [Rank 25] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.747427 2784 ProcessGroupNCCL.cpp:835] [Rank 24] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.747443 2783 ProcessGroupNCCL.cpp:835] [Rank 26] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.747448 2782 ProcessGroupNCCL.cpp:835] [Rank 27] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.780685 3051 ProcessGroupNCCL.cpp:835] [Rank 86] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.784382 31911 ProcessGroupNCCL.cpp:835] [Rank 79] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.785180 31910 ProcessGroupNCCL.cpp:835] [Rank 77] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.786365 31908 ProcessGroupNCCL.cpp:835] [Rank 78] NCCL watchdog thread started!
I1109 12:05:43.783609 907 ProcessGroupNCCL.cpp:669] [Rank 29] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.783619 909 ProcessGroupNCCL.cpp:669] [Rank 31] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.783638 906 ProcessGroupNCCL.cpp:669] [Rank 28] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.783648 908 ProcessGroupNCCL.cpp:669] [Rank 30] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.790745 7019 ProcessGroupNCCL.cpp:835] [Rank 92] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.787755 31909 ProcessGroupNCCL.cpp:835] [Rank 76] NCCL watchdog thread started!
I1109 12:05:43.776654 1829 ProcessGroupNCCL.cpp:669] [Rank 27] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.776679 1828 ProcessGroupNCCL.cpp:669] [Rank 26] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.776686 1826 ProcessGroupNCCL.cpp:669] [Rank 24] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.776700 1827 ProcessGroupNCCL.cpp:669] [Rank 25] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.795926 31187 ProcessGroupNCCL.cpp:669] [Rank 79] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.795939 31185 ProcessGroupNCCL.cpp:669] [Rank 77] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.795955 31186 ProcessGroupNCCL.cpp:669] [Rank 78] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.795971 31184 ProcessGroupNCCL.cpp:669] [Rank 76] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.799852 7017 ProcessGroupNCCL.cpp:835] [Rank 93] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.801586 7020 ProcessGroupNCCL.cpp:835] [Rank 94] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.811719 31384 ProcessGroupNCCL.cpp:835] [Rank 5] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.808806 26632 ProcessGroupNCCL.cpp:835] [Rank 82] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.812083 31383 ProcessGroupNCCL.cpp:835] [Rank 4] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.807608 31917 ProcessGroupNCCL.cpp:835] [Rank 61] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.809540 26630 ProcessGroupNCCL.cpp:835] [Rank 81] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.792016 16252 ProcessGroupNCCL.cpp:835] [Rank 2] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.810068 26631 ProcessGroupNCCL.cpp:835] [Rank 83] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.798055 25435 ProcessGroupNCCL.cpp:835] [Rank 91] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.798069 25434 ProcessGroupNCCL.cpp:835] [Rank 89] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.798086 25436 ProcessGroupNCCL.cpp:835] [Rank 88] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.798103 25437 ProcessGroupNCCL.cpp:835] [Rank 90] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.810381 26629 ProcessGroupNCCL.cpp:835] [Rank 80] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.766757 23189 ProcessGroupNCCL.cpp:835] [Rank 64] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.809396 31914 ProcessGroupNCCL.cpp:835] [Rank 62] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.809392 27283 ProcessGroupNCCL.cpp:835] [Rank 33] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.809792 30029 ProcessGroupNCCL.cpp:835] [Rank 49] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.810344 30030 ProcessGroupNCCL.cpp:835] [Rank 51] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.809420 27285 ProcessGroupNCCL.cpp:835] [Rank 35] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.810529 30032 ProcessGroupNCCL.cpp:835] [Rank 50] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.809432 27286 ProcessGroupNCCL.cpp:835] [Rank 34] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.796785 23899 ProcessGroupNCCL.cpp:835] [Rank 45] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.810613 30031 ProcessGroupNCCL.cpp:835] [Rank 48] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.796801 23901 ProcessGroupNCCL.cpp:835] [Rank 46] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.796821 23900 ProcessGroupNCCL.cpp:835] [Rank 47] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.796831 23898 ProcessGroupNCCL.cpp:835] [Rank 44] NCCL watchdog thread started!
I1109 12:05:43.799458 24492 ProcessGroupNCCL.cpp:669] [Rank 90] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.799486 24491 ProcessGroupNCCL.cpp:669] [Rank 89] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.810114 31915 ProcessGroupNCCL.cpp:835] [Rank 63] NCCL watchdog thread started!
I1109 12:05:43.799516 24490 ProcessGroupNCCL.cpp:669] [Rank 88] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.799510 24493 ProcessGroupNCCL.cpp:669] [Rank 91] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.810142 27284 ProcessGroupNCCL.cpp:835] [Rank 32] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.810518 31916 ProcessGroupNCCL.cpp:835] [Rank 60] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.806154 27872 ProcessGroupNCCL.cpp:835] [Rank 73] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.806176 27871 ProcessGroupNCCL.cpp:835] [Rank 75] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.806185 27873 ProcessGroupNCCL.cpp:835] [Rank 72] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.792865 1516 ProcessGroupNCCL.cpp:835] [Rank 13] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.806206 27874 ProcessGroupNCCL.cpp:835] [Rank 74] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.810251 24060 ProcessGroupNCCL.cpp:835] [Rank 10] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.810259 24061 ProcessGroupNCCL.cpp:835] [Rank 9] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.810338 24058 ProcessGroupNCCL.cpp:835] [Rank 11] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.810349 24059 ProcessGroupNCCL.cpp:835] [Rank 8] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.810405 29034 ProcessGroupNCCL.cpp:835] [Rank 71] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.810407 29033 ProcessGroupNCCL.cpp:835] [Rank 70] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.810417 29035 ProcessGroupNCCL.cpp:835] [Rank 69] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.811352 31352 ProcessGroupNCCL.cpp:835] [Rank 43] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.810433 29036 ProcessGroupNCCL.cpp:835] [Rank 68] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.811365 31349 ProcessGroupNCCL.cpp:835] [Rank 42] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.811373 31351 ProcessGroupNCCL.cpp:835] [Rank 41] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.811383 31350 ProcessGroupNCCL.cpp:835] [Rank 40] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.811134 16727 ProcessGroupNCCL.cpp:835] [Rank 57] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.794016 1515 ProcessGroupNCCL.cpp:835] [Rank 14] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.795776 16250 ProcessGroupNCCL.cpp:835] [Rank 1] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.803664 7016 ProcessGroupNCCL.cpp:835] [Rank 95] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.792483 22750 ProcessGroupNCCL.cpp:835] [Rank 20] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.812626 12537 ProcessGroupNCCL.cpp:835] [Rank 36] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.794432 22749 ProcessGroupNCCL.cpp:835] [Rank 23] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.798696 22747 ProcessGroupNCCL.cpp:835] [Rank 22] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.805809 22748 ProcessGroupNCCL.cpp:835] [Rank 21] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.795354 1517 ProcessGroupNCCL.cpp:835] [Rank 15] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.813421 12538 ProcessGroupNCCL.cpp:835] [Rank 39] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.801244 17398 ProcessGroupNCCL.cpp:835] [Rank 55] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.813822 12535 ProcessGroupNCCL.cpp:835] [Rank 37] NCCL watchdog thread started!
I1109 12:05:43.809396 26996 ProcessGroupNCCL.cpp:669] [Rank 73] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.809410 26997 ProcessGroupNCCL.cpp:669] [Rank 74] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.809425 26998 ProcessGroupNCCL.cpp:669] [Rank 75] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.809437 26995 ProcessGroupNCCL.cpp:669] [Rank 72] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.818766 31382 ProcessGroupNCCL.cpp:835] [Rank 7] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.815119 12536 ProcessGroupNCCL.cpp:835] [Rank 38] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.803184 17399 ProcessGroupNCCL.cpp:835] [Rank 54] NCCL watchdog thread started!
I1109 12:05:43.815835 30379 ProcessGroupNCCL.cpp:669] [Rank 42] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.815008 28148 ProcessGroupNCCL.cpp:669] [Rank 70] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.815863 30380 ProcessGroupNCCL.cpp:669] [Rank 43] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.815024 28147 ProcessGroupNCCL.cpp:669] [Rank 69] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.815034 28145 ProcessGroupNCCL.cpp:669] [Rank 68] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.815855 30378 ProcessGroupNCCL.cpp:669] [Rank 41] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.815882 30377 ProcessGroupNCCL.cpp:669] [Rank 40] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.815049 28150 ProcessGroupNCCL.cpp:669] [Rank 71] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.797041 5585 ProcessGroupNCCL.cpp:835] [Rank 17] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.798377 5584 ProcessGroupNCCL.cpp:835] [Rank 19] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.816632 16726 ProcessGroupNCCL.cpp:835] [Rank 58] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.800619 16251 ProcessGroupNCCL.cpp:835] [Rank 0] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.805239 17396 ProcessGroupNCCL.cpp:835] [Rank 52] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.806950 17397 ProcessGroupNCCL.cpp:835] [Rank 53] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.803568 16249 ProcessGroupNCCL.cpp:835] [Rank 3] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.804039 5583 ProcessGroupNCCL.cpp:835] [Rank 16] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.804042 5586 ProcessGroupNCCL.cpp:835] [Rank 18] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.821933 16725 ProcessGroupNCCL.cpp:835] [Rank 56] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.804730 1514 ProcessGroupNCCL.cpp:835] [Rank 12] NCCL watchdog thread started!
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.824316 16724 ProcessGroupNCCL.cpp:835] [Rank 59] NCCL watchdog thread started!
I1109 12:05:43.824918 23099 ProcessGroupNCCL.cpp:669] [Rank 8] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.824932 23101 ProcessGroupNCCL.cpp:669] [Rank 9] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.824942 23103 ProcessGroupNCCL.cpp:669] [Rank 11] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.824954 23102 ProcessGroupNCCL.cpp:669] [Rank 10] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.827046 21865 ProcessGroupNCCL.cpp:669] [Rank 22] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.827071 21866 ProcessGroupNCCL.cpp:669] [Rank 23] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.827090 21864 ProcessGroupNCCL.cpp:669] [Rank 21] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.827100 21863 ProcessGroupNCCL.cpp:669] [Rank 20] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1109 12:05:43.846418 31381 ProcessGroupNCCL.cpp:835] [Rank 6] NCCL watchdog thread started!
I1109 12:05:43.847890 30601 ProcessGroupNCCL.cpp:669] [Rank 6] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.847877 30600 ProcessGroupNCCL.cpp:669] [Rank 5] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.847914 30599 ProcessGroupNCCL.cpp:669] [Rank 4] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.847930 30602 ProcessGroupNCCL.cpp:669] [Rank 7] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.915059 31209 ProcessGroupNCCL.cpp:669] [Rank 62] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.915076 31206 ProcessGroupNCCL.cpp:669] [Rank 60] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.915088 31207 ProcessGroupNCCL.cpp:669] [Rank 61] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.915100 31210 ProcessGroupNCCL.cpp:669] [Rank 63] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.902462 23025 ProcessGroupNCCL.cpp:669] [Rank 46] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.902482 23024 ProcessGroupNCCL.cpp:669] [Rank 45] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.902496 23026 ProcessGroupNCCL.cpp:669] [Rank 47] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.902506 23023 ProcessGroupNCCL.cpp:669] [Rank 44] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.903659 16527 ProcessGroupNCCL.cpp:669] [Rank 53] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.903671 16526 ProcessGroupNCCL.cpp:669] [Rank 52] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.919378 6048 ProcessGroupNCCL.cpp:669] [Rank 94] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.903695 16528 ProcessGroupNCCL.cpp:669] [Rank 54] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.903695 16529 ProcessGroupNCCL.cpp:669] [Rank 55] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.919389 6046 ProcessGroupNCCL.cpp:669] [Rank 92] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.919406 6049 ProcessGroupNCCL.cpp:669] [Rank 95] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.919417 6047 ProcessGroupNCCL.cpp:669] [Rank 93] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.915283 16038 ProcessGroupNCCL.cpp:669] [Rank 58] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.915297 16037 ProcessGroupNCCL.cpp:669] [Rank 57] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.915309 16039 ProcessGroupNCCL.cpp:669] [Rank 59] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.915318 16035 ProcessGroupNCCL.cpp:669] [Rank 56] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.898797 22275 ProcessGroupNCCL.cpp:669] [Rank 64] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.898820 22277 ProcessGroupNCCL.cpp:669] [Rank 66] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.898936 22276 ProcessGroupNCCL.cpp:669] [Rank 65] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.899005 22278 ProcessGroupNCCL.cpp:669] [Rank 67] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.921502 29372 ProcessGroupNCCL.cpp:669] [Rank 50] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.921516 29373 ProcessGroupNCCL.cpp:669] [Rank 51] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.921533 29369 ProcessGroupNCCL.cpp:669] [Rank 48] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.918622 26608 ProcessGroupNCCL.cpp:669] [Rank 32] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.921542 29371 ProcessGroupNCCL.cpp:669] [Rank 49] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.918643 26609 ProcessGroupNCCL.cpp:669] [Rank 33] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.918658 26611 ProcessGroupNCCL.cpp:669] [Rank 35] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.918660 26610 ProcessGroupNCCL.cpp:669] [Rank 34] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.906291 4756 ProcessGroupNCCL.cpp:669] [Rank 16] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.924923 11608 ProcessGroupNCCL.cpp:669] [Rank 37] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.924940 11609 ProcessGroupNCCL.cpp:669] [Rank 38] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.924954 11607 ProcessGroupNCCL.cpp:669] [Rank 36] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.924971 11610 ProcessGroupNCCL.cpp:669] [Rank 39] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.924710 2109 ProcessGroupNCCL.cpp:669] [Rank 85] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.924746 2111 ProcessGroupNCCL.cpp:669] [Rank 87] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.924723 2107 ProcessGroupNCCL.cpp:669] [Rank 84] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.924754 2110 ProcessGroupNCCL.cpp:669] [Rank 86] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.909556 4760 ProcessGroupNCCL.cpp:669] [Rank 19] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.911442 4758 ProcessGroupNCCL.cpp:669] [Rank 17] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.913373 4759 ProcessGroupNCCL.cpp:669] [Rank 18] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:44.008632 25919 ProcessGroupNCCL.cpp:669] [Rank 82] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:44.008651 25917 ProcessGroupNCCL.cpp:669] [Rank 80] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:44.008661 25918 ProcessGroupNCCL.cpp:669] [Rank 81] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:44.008675 25920 ProcessGroupNCCL.cpp:669] [Rank 83] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.989769 629 ProcessGroupNCCL.cpp:669] [Rank 15] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.989904 627 ProcessGroupNCCL.cpp:669] [Rank 13] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.989931 628 ProcessGroupNCCL.cpp:669] [Rank 14] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.989943 626 ProcessGroupNCCL.cpp:669] [Rank 12] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.992367 15187 ProcessGroupNCCL.cpp:669] [Rank 1] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.992380 15188 ProcessGroupNCCL.cpp:669] [Rank 2] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.992398 15185 ProcessGroupNCCL.cpp:669] [Rank 0] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:05:43.992398 15189 ProcessGroupNCCL.cpp:669] [Rank 3] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
I1109 12:05:48.245900 15185 ProcessGroupNCCL.cpp:1274] NCCL_DEBUG: INFO
Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.61s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.61s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.61s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.61s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.68s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.69s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.69s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.69s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.80s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.79s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.80s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.81s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.67s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.67s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.73s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.73s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.66s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.67s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.73s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.68s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.73s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.68s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.68s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.71s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.68s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.71s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.69s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.71s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.63s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.74s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.72s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.63s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.75s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.64s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.74s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.64s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.66s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.66s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.65s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.71s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.71s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.65s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.66s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.74s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.71s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.65s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.65s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.73s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.71s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.65s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.73s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.76s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.73s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.72s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.65s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.72s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.65s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.72s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.65s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.72s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.65s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.67s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.68s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.68s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.64s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.68s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.65s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.69s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.65s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.69s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.65s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.69s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.69s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.62s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.62s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.62s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.62s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.71s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.70s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.71s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.71s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.64s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.63s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.64s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.64s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.78s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.77s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.78s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.77s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.69s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.86s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:27<00:27, 27.97s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:28<00:28, 28.08s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:28<00:28, 28.12s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:28<00:28, 28.14s/it] Loading checkpoint shards: 50%|█████ | 1/2 [00:28<00:28, 28.13s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.04s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.03s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.04s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.04s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 22.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 22.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 22.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 22.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.96s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.96s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.96s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.96s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.96s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.96s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.96s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.96s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.96s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.97s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.03s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 20.99s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.04s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 22.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.04s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.06s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.04s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.04s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.00s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.03s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.05s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.05s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.06s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.03s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.03s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 20.98s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.01s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.01s/it]
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.02s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.06s/it]
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.06s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.11s/it]
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.07s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.13s/it]
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.08s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.14s/it]
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 21.06s/it] Loading checkpoint shards: 100%|██████████| 2/2 [00:44<00:00, 22.12s/it]
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (159 > 64). Running this sequence through the model will result in indexing errors
I1109 12:07:08.279536 488 ProcessGroupNCCL.cpp:835] [Rank 42] NCCL watchdog thread started!
I1109 12:07:08.279489 30379 ProcessGroupNCCL.cpp:669] [Rank 42] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.281185 21864 ProcessGroupNCCL.cpp:669] [Rank 21] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.281291 24365 ProcessGroupNCCL.cpp:835] [Rank 21] NCCL watchdog thread started!
I1109 12:07:08.281317 24366 ProcessGroupNCCL.cpp:835] [Rank 20] NCCL watchdog thread started!
I1109 12:07:08.281262 21863 ProcessGroupNCCL.cpp:669] [Rank 20] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.286374 1827 ProcessGroupNCCL.cpp:669] [Rank 25] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.286486 4426 ProcessGroupNCCL.cpp:835] [Rank 25] NCCL watchdog thread started!
I1109 12:07:08.286490 4427 ProcessGroupNCCL.cpp:835] [Rank 24] NCCL watchdog thread started!
I1109 12:07:08.286442 1826 ProcessGroupNCCL.cpp:669] [Rank 24] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.279827 489 ProcessGroupNCCL.cpp:835] [Rank 43] NCCL watchdog thread started!
I1109 12:07:08.286686 4428 ProcessGroupNCCL.cpp:835] [Rank 26] NCCL watchdog thread started!
I1109 12:07:08.279753 30380 ProcessGroupNCCL.cpp:669] [Rank 43] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.281432 21865 ProcessGroupNCCL.cpp:669] [Rank 22] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.286625 1828 ProcessGroupNCCL.cpp:669] [Rank 26] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.279861 30378 ProcessGroupNCCL.cpp:669] [Rank 41] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.281545 24367 ProcessGroupNCCL.cpp:835] [Rank 22] NCCL watchdog thread started!
I1109 12:07:08.280004 490 ProcessGroupNCCL.cpp:835] [Rank 41] NCCL watchdog thread started!
I1109 12:07:08.280022 492 ProcessGroupNCCL.cpp:835] [Rank 40] NCCL watchdog thread started!
I1109 12:07:08.281631 21866 ProcessGroupNCCL.cpp:669] [Rank 23] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.279981 30377 ProcessGroupNCCL.cpp:669] [Rank 40] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.281689 24368 ProcessGroupNCCL.cpp:835] [Rank 23] NCCL watchdog thread started!
I1109 12:07:08.286829 1829 ProcessGroupNCCL.cpp:669] [Rank 27] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.280354 28148 ProcessGroupNCCL.cpp:669] [Rank 70] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.280426 30691 ProcessGroupNCCL.cpp:835] [Rank 70] NCCL watchdog thread started!
I1109 12:07:08.281636 31187 ProcessGroupNCCL.cpp:669] [Rank 79] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.281746 822 ProcessGroupNCCL.cpp:835] [Rank 79] NCCL watchdog thread started!
I1109 12:07:08.286958 4429 ProcessGroupNCCL.cpp:835] [Rank 27] NCCL watchdog thread started!
I1109 12:07:08.281715 31186 ProcessGroupNCCL.cpp:669] [Rank 78] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.281827 823 ProcessGroupNCCL.cpp:835] [Rank 78] NCCL watchdog thread started!
I1109 12:07:08.281765 31185 ProcessGroupNCCL.cpp:669] [Rank 77] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.281867 824 ProcessGroupNCCL.cpp:835] [Rank 77] NCCL watchdog thread started!
I1109 12:07:08.280580 28150 ProcessGroupNCCL.cpp:669] [Rank 71] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.280694 30692 ProcessGroupNCCL.cpp:835] [Rank 71] NCCL watchdog thread started!
I1109 12:07:08.288532 4758 ProcessGroupNCCL.cpp:669] [Rank 17] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.288658 7214 ProcessGroupNCCL.cpp:835] [Rank 17] NCCL watchdog thread started!
I1109 12:07:08.288678 7215 ProcessGroupNCCL.cpp:835] [Rank 16] NCCL watchdog thread started!
I1109 12:07:08.288657 4756 ProcessGroupNCCL.cpp:669] [Rank 16] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.280786 30693 ProcessGroupNCCL.cpp:835] [Rank 68] NCCL watchdog thread started!
I1109 12:07:08.282076 31184 ProcessGroupNCCL.cpp:669] [Rank 76] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.288738 4759 ProcessGroupNCCL.cpp:669] [Rank 18] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.288847 7216 ProcessGroupNCCL.cpp:835] [Rank 18] NCCL watchdog thread started!
I1109 12:07:08.282176 825 ProcessGroupNCCL.cpp:835] [Rank 76] NCCL watchdog thread started!
I1109 12:07:08.280437 11608 ProcessGroupNCCL.cpp:669] [Rank 37] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.280534 14296 ProcessGroupNCCL.cpp:835] [Rank 37] NCCL watchdog thread started!
I1109 12:07:08.280761 28145 ProcessGroupNCCL.cpp:669] [Rank 68] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.288854 4760 ProcessGroupNCCL.cpp:669] [Rank 19] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.288935 7217 ProcessGroupNCCL.cpp:835] [Rank 19] NCCL watchdog thread started!
I1109 12:07:08.288336 15188 ProcessGroupNCCL.cpp:669] [Rank 2] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.280875 28147 ProcessGroupNCCL.cpp:669] [Rank 69] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.280987 30694 ProcessGroupNCCL.cpp:835] [Rank 69] NCCL watchdog thread started!
I1109 12:07:08.288458 15189 ProcessGroupNCCL.cpp:669] [Rank 3] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.288558 17945 ProcessGroupNCCL.cpp:835] [Rank 3] NCCL watchdog thread started!
I1109 12:07:08.280670 11609 ProcessGroupNCCL.cpp:669] [Rank 38] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.288497 17944 ProcessGroupNCCL.cpp:835] [Rank 2] NCCL watchdog thread started!
I1109 12:07:08.280702 11610 ProcessGroupNCCL.cpp:669] [Rank 39] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.288678 17946 ProcessGroupNCCL.cpp:835] [Rank 1] NCCL watchdog thread started!
I1109 12:07:08.288620 15187 ProcessGroupNCCL.cpp:669] [Rank 1] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.280814 14298 ProcessGroupNCCL.cpp:835] [Rank 39] NCCL watchdog thread started!
I1109 12:07:08.280835 14297 ProcessGroupNCCL.cpp:835] [Rank 38] NCCL watchdog thread started!
I1109 12:07:08.280948 11607 ProcessGroupNCCL.cpp:669] [Rank 36] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.281042 14299 ProcessGroupNCCL.cpp:835] [Rank 36] NCCL watchdog thread started!
I1109 12:07:08.289314 15185 ProcessGroupNCCL.cpp:669] [Rank 0] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.287496 26998 ProcessGroupNCCL.cpp:669] [Rank 75] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.287528 29506 ProcessGroupNCCL.cpp:835] [Rank 75] NCCL watchdog thread started!
I1109 12:07:08.289446 17947 ProcessGroupNCCL.cpp:835] [Rank 0] NCCL watchdog thread started!
I1109 12:07:08.287611 29507 ProcessGroupNCCL.cpp:835] [Rank 72] NCCL watchdog thread started!
I1109 12:07:08.287566 26995 ProcessGroupNCCL.cpp:669] [Rank 72] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.287781 29508 ProcessGroupNCCL.cpp:835] [Rank 73] NCCL watchdog thread started!
I1109 12:07:08.287703 26996 ProcessGroupNCCL.cpp:669] [Rank 73] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.287869 26997 ProcessGroupNCCL.cpp:669] [Rank 74] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.287973 29509 ProcessGroupNCCL.cpp:835] [Rank 74] NCCL watchdog thread started!
I1109 12:07:08.287228 6049 ProcessGroupNCCL.cpp:669] [Rank 95] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.287282 6047 ProcessGroupNCCL.cpp:669] [Rank 93] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.287370 8972 ProcessGroupNCCL.cpp:835] [Rank 93] NCCL watchdog thread started!
I1109 12:07:08.287458 8973 ProcessGroupNCCL.cpp:835] [Rank 92] NCCL watchdog thread started!
I1109 12:07:08.287418 6046 ProcessGroupNCCL.cpp:669] [Rank 92] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.287357 8971 ProcessGroupNCCL.cpp:835] [Rank 95] NCCL watchdog thread started!
I1109 12:07:08.287482 8974 ProcessGroupNCCL.cpp:835] [Rank 94] NCCL watchdog thread started!
I1109 12:07:08.287478 6048 ProcessGroupNCCL.cpp:669] [Rank 94] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.294358 25919 ProcessGroupNCCL.cpp:669] [Rank 82] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.294483 28023 ProcessGroupNCCL.cpp:835] [Rank 82] NCCL watchdog thread started!
I1109 12:07:08.294492 28024 ProcessGroupNCCL.cpp:835] [Rank 81] NCCL watchdog thread started!
I1109 12:07:08.294421 25918 ProcessGroupNCCL.cpp:669] [Rank 81] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.294567 25920 ProcessGroupNCCL.cpp:669] [Rank 83] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.294677 28025 ProcessGroupNCCL.cpp:835] [Rank 83] NCCL watchdog thread started!
I1109 12:07:08.294735 28026 ProcessGroupNCCL.cpp:835] [Rank 80] NCCL watchdog thread started!
I1109 12:07:08.294695 25917 ProcessGroupNCCL.cpp:669] [Rank 80] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.338613 2109 ProcessGroupNCCL.cpp:669] [Rank 85] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.338709 4860 ProcessGroupNCCL.cpp:835] [Rank 85] NCCL watchdog thread started!
I1109 12:07:08.338763 4861 ProcessGroupNCCL.cpp:835] [Rank 84] NCCL watchdog thread started!
I1109 12:07:08.338722 2107 ProcessGroupNCCL.cpp:669] [Rank 84] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.338838 2111 ProcessGroupNCCL.cpp:669] [Rank 87] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.338961 4862 ProcessGroupNCCL.cpp:835] [Rank 87] NCCL watchdog thread started!
I1109 12:07:08.338989 4863 ProcessGroupNCCL.cpp:835] [Rank 86] NCCL watchdog thread started!
I1109 12:07:08.338980 2110 ProcessGroupNCCL.cpp:669] [Rank 86] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.354326 26610 ProcessGroupNCCL.cpp:669] [Rank 34] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.354342 26609 ProcessGroupNCCL.cpp:669] [Rank 33] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.354434 28676 ProcessGroupNCCL.cpp:835] [Rank 34] NCCL watchdog thread started!
I1109 12:07:08.354470 28677 ProcessGroupNCCL.cpp:835] [Rank 33] NCCL watchdog thread started!
I1109 12:07:08.354533 26611 ProcessGroupNCCL.cpp:669] [Rank 35] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.354655 28678 ProcessGroupNCCL.cpp:835] [Rank 35] NCCL watchdog thread started!
I1109 12:07:08.354575 26608 ProcessGroupNCCL.cpp:669] [Rank 32] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.354676 28679 ProcessGroupNCCL.cpp:835] [Rank 32] NCCL watchdog thread started!
I1109 12:07:08.358031 16037 ProcessGroupNCCL.cpp:669] [Rank 57] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.358090 16039 ProcessGroupNCCL.cpp:669] [Rank 59] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.358119 18120 ProcessGroupNCCL.cpp:835] [Rank 57] NCCL watchdog thread started!
I1109 12:07:08.358218 18121 ProcessGroupNCCL.cpp:835] [Rank 59] NCCL watchdog thread started!
I1109 12:07:08.358311 16038 ProcessGroupNCCL.cpp:669] [Rank 58] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.358459 18122 ProcessGroupNCCL.cpp:835] [Rank 58] NCCL watchdog thread started!
I1109 12:07:08.358538 16035 ProcessGroupNCCL.cpp:669] [Rank 56] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.358603 18123 ProcessGroupNCCL.cpp:835] [Rank 56] NCCL watchdog thread started!
I1109 12:07:08.364681 31209 ProcessGroupNCCL.cpp:669] [Rank 62] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.364761 914 ProcessGroupNCCL.cpp:835] [Rank 62] NCCL watchdog thread started!
I1109 12:07:08.364889 3549 ProcessGroupNCCL.cpp:835] [Rank 28] NCCL watchdog thread started!
I1109 12:07:08.364820 906 ProcessGroupNCCL.cpp:669] [Rank 28] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.364773 31210 ProcessGroupNCCL.cpp:669] [Rank 63] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.364806 907 ProcessGroupNCCL.cpp:669] [Rank 29] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.364924 3548 ProcessGroupNCCL.cpp:835] [Rank 29] NCCL watchdog thread started!
I1109 12:07:08.364878 31207 ProcessGroupNCCL.cpp:669] [Rank 61] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.364895 915 ProcessGroupNCCL.cpp:835] [Rank 63] NCCL watchdog thread started!
I1109 12:07:08.364919 908 ProcessGroupNCCL.cpp:669] [Rank 30] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.364993 916 ProcessGroupNCCL.cpp:835] [Rank 61] NCCL watchdog thread started!
I1109 12:07:08.365032 3550 ProcessGroupNCCL.cpp:835] [Rank 30] NCCL watchdog thread started!
I1109 12:07:08.365072 917 ProcessGroupNCCL.cpp:835] [Rank 60] NCCL watchdog thread started!
I1109 12:07:08.364976 909 ProcessGroupNCCL.cpp:669] [Rank 31] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.365075 3551 ProcessGroupNCCL.cpp:835] [Rank 31] NCCL watchdog thread started!
I1109 12:07:08.365000 31206 ProcessGroupNCCL.cpp:669] [Rank 60] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.369995 16528 ProcessGroupNCCL.cpp:669] [Rank 54] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.370108 19035 ProcessGroupNCCL.cpp:835] [Rank 54] NCCL watchdog thread started!
I1109 12:07:08.370074 16527 ProcessGroupNCCL.cpp:669] [Rank 53] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.370175 19036 ProcessGroupNCCL.cpp:835] [Rank 53] NCCL watchdog thread started!
I1109 12:07:08.370452 16526 ProcessGroupNCCL.cpp:669] [Rank 52] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.370487 19038 ProcessGroupNCCL.cpp:835] [Rank 52] NCCL watchdog thread started!
I1109 12:07:08.370424 16529 ProcessGroupNCCL.cpp:669] [Rank 55] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.370527 19037 ProcessGroupNCCL.cpp:835] [Rank 55] NCCL watchdog thread started!
I1109 12:07:08.371943 22276 ProcessGroupNCCL.cpp:669] [Rank 65] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.372046 24734 ProcessGroupNCCL.cpp:835] [Rank 65] NCCL watchdog thread started!
I1109 12:07:08.372171 24735 ProcessGroupNCCL.cpp:835] [Rank 64] NCCL watchdog thread started!
I1109 12:07:08.372108 22275 ProcessGroupNCCL.cpp:669] [Rank 64] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.372331 22278 ProcessGroupNCCL.cpp:669] [Rank 67] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.372453 24736 ProcessGroupNCCL.cpp:835] [Rank 67] NCCL watchdog thread started!
I1109 12:07:08.372433 22277 ProcessGroupNCCL.cpp:669] [Rank 66] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.372527 24737 ProcessGroupNCCL.cpp:835] [Rank 66] NCCL watchdog thread started!
I1109 12:07:08.368604 627 ProcessGroupNCCL.cpp:669] [Rank 13] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.368707 3581 ProcessGroupNCCL.cpp:835] [Rank 13] NCCL watchdog thread started!
I1109 12:07:08.368700 3582 ProcessGroupNCCL.cpp:835] [Rank 12] NCCL watchdog thread started!
I1109 12:07:08.368662 626 ProcessGroupNCCL.cpp:669] [Rank 12] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.368832 628 ProcessGroupNCCL.cpp:669] [Rank 14] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.368942 3583 ProcessGroupNCCL.cpp:835] [Rank 14] NCCL watchdog thread started!
I1109 12:07:08.368960 3584 ProcessGroupNCCL.cpp:835] [Rank 15] NCCL watchdog thread started!
I1109 12:07:08.368876 629 ProcessGroupNCCL.cpp:669] [Rank 15] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.374657 30600 ProcessGroupNCCL.cpp:669] [Rank 5] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.374732 300 ProcessGroupNCCL.cpp:835] [Rank 5] NCCL watchdog thread started!
I1109 12:07:08.374866 30601 ProcessGroupNCCL.cpp:669] [Rank 6] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.375032 301 ProcessGroupNCCL.cpp:835] [Rank 6] NCCL watchdog thread started!
I1109 12:07:08.375046 302 ProcessGroupNCCL.cpp:835] [Rank 4] NCCL watchdog thread started!
I1109 12:07:08.374991 30599 ProcessGroupNCCL.cpp:669] [Rank 4] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.375012 30602 ProcessGroupNCCL.cpp:669] [Rank 7] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.375111 303 ProcessGroupNCCL.cpp:835] [Rank 7] NCCL watchdog thread started!
I1109 12:07:08.379796 24492 ProcessGroupNCCL.cpp:669] [Rank 90] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.379880 27105 ProcessGroupNCCL.cpp:835] [Rank 90] NCCL watchdog thread started!
I1109 12:07:08.373006 25692 ProcessGroupNCCL.cpp:835] [Rank 8] NCCL watchdog thread started!
I1109 12:07:08.372959 23099 ProcessGroupNCCL.cpp:669] [Rank 8] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.380694 25564 ProcessGroupNCCL.cpp:835] [Rank 45] NCCL watchdog thread started!
I1109 12:07:08.380645 23024 ProcessGroupNCCL.cpp:669] [Rank 45] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.379999 24491 ProcessGroupNCCL.cpp:669] [Rank 89] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.379987 24490 ProcessGroupNCCL.cpp:669] [Rank 88] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.380123 27107 ProcessGroupNCCL.cpp:835] [Rank 89] NCCL watchdog thread started!
I1109 12:07:08.380102 27106 ProcessGroupNCCL.cpp:835] [Rank 88] NCCL watchdog thread started!
I1109 12:07:08.373227 23101 ProcessGroupNCCL.cpp:669] [Rank 9] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.373335 25693 ProcessGroupNCCL.cpp:835] [Rank 9] NCCL watchdog thread started!
I1109 12:07:08.380844 23023 ProcessGroupNCCL.cpp:669] [Rank 44] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.380939 25565 ProcessGroupNCCL.cpp:835] [Rank 44] NCCL watchdog thread started!
I1109 12:07:08.380192 24493 ProcessGroupNCCL.cpp:669] [Rank 91] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.380986 25566 ProcessGroupNCCL.cpp:835] [Rank 46] NCCL watchdog thread started!
I1109 12:07:08.380287 27108 ProcessGroupNCCL.cpp:835] [Rank 91] NCCL watchdog thread started!
I1109 12:07:08.380942 23026 ProcessGroupNCCL.cpp:669] [Rank 47] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.373502 25694 ProcessGroupNCCL.cpp:835] [Rank 11] NCCL watchdog thread started!
I1109 12:07:08.380899 23025 ProcessGroupNCCL.cpp:669] [Rank 46] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.373447 23103 ProcessGroupNCCL.cpp:669] [Rank 11] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.381052 25567 ProcessGroupNCCL.cpp:835] [Rank 47] NCCL watchdog thread started!
I1109 12:07:08.373584 23102 ProcessGroupNCCL.cpp:669] [Rank 10] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.373701 25695 ProcessGroupNCCL.cpp:835] [Rank 10] NCCL watchdog thread started!
I1109 12:07:08.383754 29371 ProcessGroupNCCL.cpp:669] [Rank 49] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.383862 31528 ProcessGroupNCCL.cpp:835] [Rank 49] NCCL watchdog thread started!
I1109 12:07:08.384107 29373 ProcessGroupNCCL.cpp:669] [Rank 51] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.384075 29372 ProcessGroupNCCL.cpp:669] [Rank 50] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
I1109 12:07:08.384225 31529 ProcessGroupNCCL.cpp:835] [Rank 50] NCCL watchdog thread started!
I1109 12:07:08.384222 31530 ProcessGroupNCCL.cpp:835] [Rank 51] NCCL watchdog thread started!
I1109 12:07:08.384248 31531 ProcessGroupNCCL.cpp:835] [Rank 48] NCCL watchdog thread started!
I1109 12:07:08.384155 29369 ProcessGroupNCCL.cpp:669] [Rank 48] ProcessGroupNCCL initialized with following options:
NCCL_ASYNC_ERROR_HANDLING: 0
NCCL_DESYNC_DEBUG: 0
NCCL_BLOCKING_WAIT: 0
TIMEOUT(ms): 1800000
USE_HIGH_PRIORITY_STREAM: 0
0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s] 0%| | 0/420 [00:00<?, ?it/s]I1109 12:07:42.237160 19048 ProcessGroupNCCL.cpp:1274] NCCL_DEBUG: INFO
0%| | 1/420 [01:14<8:40:51, 74.59s/it] 0%| | 1/420 [01:14<8:40:29, 74.53s/it] 0%| | 1/420 [01:14<8:40:50, 74.58s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:50, 74.58s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:49, 74.58s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:50, 74.58s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:53, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:49, 74.58s/it] 0%| | 1/420 [01:14<8:40:51, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:50, 74.58s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:50, 74.58s/it] 0%| | 1/420 [01:14<8:40:53, 74.59s/it] 0%| | 1/420 [01:14<8:40:49, 74.58s/it] 0%| | 1/420 [01:14<8:40:50, 74.58s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:29, 74.53s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:49, 74.58s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 1/420 [01:14<8:40:52, 74.59s/it] 0%| | 2/420 [02:12<7:31:06, 64.75s/it] 0%| | 2/420 [02:12<7:31:01, 64.74s/it] 0%| | 2/420 [02:12<7:31:03, 64.74s/it] 0%| | 2/420 [02:12<7:31:03, 64.74s/it] 0%| | 2/420 [02:12<7:31:03, 64.74s/it] 0%| | 2/420 [02:12<7:31:03, 64.74s/it] 0%| | 2/420 [02:12<7:31:02, 64.74s/it] 0%| | 2/420 [02:12<7:31:02, 64.74s/it] 0%| | 2/420 [02:12<7:31:04, 64.75s/it] 0%| | 2/420 [02:12<7:31:03, 64.74s/it] 0%| | 2/420 [02:12<7:31:04, 64.75s/it] 0%| | 2/420 [02:12<7:30:57, 64.73s/it] 0%| | 2/420 [02:12<7:31:04, 64.75s/it] 0%| | 2/420 [02:12<7:31:02, 64.74s/it] 0%| | 2/420 [02:12<7:31:02, 64.74s/it] 0%| | 2/420 [02:12<7:31:03, 64.75s/it] 0%| | 2/420 [02:12<7:31:02, 64.74s/it] 0%| | 2/420 [02:12<7:31:03, 64.74s/it] 0%| | 2/420 [02:12<7:31:02, 64.74s/it] 0%| | 2/420 [02:12<7:31:02, 64.74s/it] 0%| | 2/420 [02:12<7:31:05, 64.75s/it] 0%| | 2/420 [02:12<7:31:03, 64.74s/it] 0%| | 2/420 [02:12<7:31:03, 64.74s/it] 0%| | 2/420 [02:12<7:31:06, 64.75s/it] 0%| | 2/420 [02:12<7:31:06, 64.75s/it] 0%| | 2/420 [02:12<7:31:01, 64.74s/it] 0%| | 2/420 [02:12<7:31:03, 64.75s/it] 0%| | 2/420 [02:12<7:31:03, 64.74s/it] 0%| | 2/420 [02:12<7:31:03, 64.74s/it] 0%| | 2/420 [02:12<7:30:57, 64.73s/it] 0%| | 2/420 [02:12<7:31:02, 64.74s/it] 0%| | 2/420 [02:12<7:31:03, 64.74s/it] 0%| | 2/420 [02:12<7:31:02, 64.74s/it] 0%| | 2/420 [02:12<7:31:03, 64.74s/it] 0%| | 2/420 [02:12<7:31:05, 64.75s/it] 0%| | 2/420 [02:12<7:31:04, 64.75s/it] 0%| | 2/420 [02:12<7:31:02, 64.74s/it] 0%| | 2/420 [02:12<7:31:02, 64.74s/it] 0%| | 2/420 [02:12<7:31:02, 64.74s/it] 0%| | 2/420 [02:12<7:31:03, 64.74s/it] 0%| | 2/420 [02:12<7:31:06, 64.75s/it] 0%| | 2/420 [02:12<7:31:03, 64.74s/it] 0%| | 2/420 [02:12<7:31:03, 64.74s/it] 0%| | 2/420 [02:12<7:31:02, 64.74s/it] 0%| | 2/420 [02:12<7:31:02, 64.74s/it] 0%| | 2/420 [02:12<7:31:04, 64.75s/it] 0%| | 2/420 [02:12<7:31:03, 64.74s/it] 0%| | 2/420 [02:12<7:31:04, 64.75s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:43, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:38, 61.53s/it] 1%| | 3/420 [03:10<7:07:43, 61.54s/it] 1%| | 3/420 [03:10<7:07:45, 61.55s/it] 1%| | 3/420 [03:10<7:07:43, 61.54s/it] 1%| | 3/420 [03:10<7:07:40, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:43, 61.54s/it] 1%| | 3/420 [03:10<7:07:42, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:42, 61.54s/it] 1%| | 3/420 [03:10<7:07:43, 61.54s/it] 1%| | 3/420 [03:10<7:07:40, 61.54s/it] 1%| | 3/420 [03:10<7:07:44, 61.55s/it] 1%| | 3/420 [03:10<7:07:42, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:43, 61.54s/it] 1%| | 3/420 [03:10<7:07:40, 61.54s/it] 1%| | 3/420 [03:10<7:07:43, 61.54s/it] 1%| | 3/420 [03:10<7:07:43, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:44, 61.55s/it] 1%| | 3/420 [03:10<7:07:43, 61.54s/it] 1%| | 3/420 [03:10<7:07:45, 61.55s/it] 1%| | 3/420 [03:10<7:07:43, 61.54s/it] 1%| | 3/420 [03:10<7:07:38, 61.53s/it] 1%| | 3/420 [03:10<7:07:42, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:40, 61.54s/it] 1%| | 3/420 [03:10<7:07:40, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:42, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:43, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:40, 61.54s/it] 1%| | 3/420 [03:10<7:07:42, 61.54s/it] 1%| | 3/420 [03:10<7:07:41, 61.54s/it] 1%| | 3/420 [03:10<7:07:43, 61.54s/it]slurmstepd: error: *** JOB 46528794 ON e07r1n18 CANCELLED AT 2023-11-09T12:10:47 ***
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import host_subplot
import json
filename = "/public/home/zhaoying1/work/Baichuan2-main/fine-tune/slurm_script/output/trainer_state.json"
with open(filename, "r") as file:
data = json.load(file)
log_history_array = data.get("log_history")
step_list = []
loss_list = []
for item in log_history_array:
step_list.append(item.get("step"))
loss_list.append(item.get("loss"))
print("Step list:", step_list)
print("Loss list:", loss_list)
def plot_acc_loss(step_list,loss_list):
host = host_subplot(111) # row=1 col=1 first pic
plt.subplots_adjust(right=0.8) # ajust the right boundary of the plot windo
# par1 = host.twinx() # 共享x轴
# set labels
host.set_xlabel("steps")
host.set_ylabel("loss")
# plot curves
p1, = host.plot(step_list,loss_list, label="loss")
host.legend(loc=5)
# set label color
host.axis["left"].label.set_color(p1.get_color())
# par1.axis["right"].label.set_color(p2.get_color())
# set the range of x axis of host and y axis of par1
plt.title("baichuan2_7bbase_ft_96c_bs1_acum1_fp16_lr2e-5")
# plt.title("6B_ds_ft_bs32_accum1_4cards_zero3_5e-5")
plt.draw()
plt.show()
# plt.savefig("6B_ds_ft_bs32_accum1_4cards_zero3_5e-5.jpg",dpi = 600)
plt.savefig("baichuan2_7bbase_ft_96c_bs1_acum1_fp16_lr2e-5.jpg",dpi = 600)
plot_acc_loss(step_list,loss_list)
#!/bin/bash
#SBATCH -p kshdnormal
#SBATCH -N 24
#SBATCH --cpus-per-task=1
#SBATCH --ntasks-per-node=32
#SBATCH --gres=dcu:4
#SBATCH -J bc2-7b
#SBATCH -o logs/pt-%j.out
#SBATCH -e logs/pt-%j.err
#SBATCH --exclusive
ulimit -u 200000
export HSA_FORCE_FINE_GRAIN_PCIE=1
export OMP_NUM_THREADS=1
export NCCL_DEBUG=INFO
export MIOPEN_FIND_MODE=3
export HSA_FORCE_FINE_GRAIN_PCIE=1
export MIOPEN_COMPILE_PARALLEL_LEVEL=1
export NCCL_PLUGIN_P2P=ucx
export NCCL_SOCKET_IFNAME=ib0
export NCCL_P2P_LEVEL=5
export NCCL_NET_PLUGIN=none
unset RCCL_NCHANNELS
unset NCCL_NET_GDR_LEVEL
rm -rf ./hostfile/*
echo "START TIME: $(date)"
hostfile=./hostfile/$SLURM_JOB_ID
scontrol show hostnames $SLURM_JOB_NODELIST > ${hostfile}
for i in `cat $hostfile`
do
echo ${i} slots=4 >> `pwd`/hostfile/hostfile-dl-$SLURM_JOB_ID
done
np=$(cat $hostfile|sort|uniq |wc -l)
np=$(($np*4))
nodename=$(cat $hostfile |sed -n "1p")
dist_url=`echo $nodename | awk '{print $1}'`
echo ${dist_url}
mpirun -np $np --hostfile hostfile/hostfile-dl-$SLURM_JOB_ID --bind-to none `pwd`/run_ft_single.sh
#!/bin/bash
export HSA_FORCE_FINE_GRAIN_PCIE=1
export MIOPEN_FIND_MODE=3
export MIOPEN_COMPILE_PARALLEL_LEVEL=1
export NCCL_PLUGIN_P2P=ucx
export RCCL_NCHANNELS=2
export NCCL_SOCKET_IFNAME=ib0
export NCCL_P2P_LEVEL=5
export NCCL_IB_HCA=mlx5_0
export NCCL_DEBUG=INFO
export NCCL_NET_GDR_LEVEL=SYS
export NCCL_NET_PLUGIN=none
unset RCCL_NCHANNELS
unset NCCL_NET_GDR_LEVEL
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
echo "LRANK===============================$lrank"
RANK=$OMPI_COMM_WORLD_RANK
WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
echo "WORLD_SIZE*************$WORLD_SIZE"
export HIP_VISIBLE_DEVICES=0,1,2,3
APP="python3 ../fine-tune.py \
--deepspeed ../ds_config.json \
--report_to "none" \
--data_path "../data/belle_chat_ramdon_10k.json" \
--model_name_or_path "../../baichuan2-7b-base" \
--output_dir "output" \
--model_max_length 64 \
--num_train_epochs 4 \
--per_device_train_batch_size 1 \
--gradient_accumulation_steps 1 \
--save_strategy epoch \
--learning_rate 2e-5 \
--lr_scheduler_type constant \
--adam_beta1 0.9 \
--adam_beta2 0.98 \
--adam_epsilon 1e-8 \
--max_grad_norm 1.0 \
--weight_decay 1e-4 \
--warmup_ratio 0.0 \
--logging_steps 1 \
--gradient_checkpointing False \
--fp16 \
--local_rank $lrank "
case ${lrank} in
[0])
export HIP_VISIBLE_DEVICES=0,1,2,3
export UCX_NET_DEVICES=mlx5_0:1
export UCX_IB_PCI_BW=mlx5_0:50Gbs
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export HIP_VISIBLE_DEVICES=0,1,2,3
export UCX_NET_DEVICES=mlx5_1:1
export UCX_IB_PCI_BW=mlx5_1:50Gbs
numactl --cpunodebind=1 --membind=1 ${APP}
;;
[2])
export HIP_VISIBLE_DEVICES=0,1,2,3
export UCX_NET_DEVICES=mlx5_2:1
export UCX_IB_PCI_BW=mlx5_2:50Gbs
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export HIP_VISIBLE_DEVICES=0,1,2,3
export UCX_NET_DEVICES=mlx5_3:1
export UCX_IB_PCI_BW=mlx5_3:50Gbs
numactl --cpunodebind=3 --membind=3 ${APP}
;;
esac
#!/bin/bash
#SBATCH -p kshdnormal
#SBATCH -N 24
#SBATCH --cpus-per-task=1
#SBATCH --ntasks-per-node=32
#SBATCH --gres=dcu:4
#SBATCH -J bc2-7b
#SBATCH -o logs/pt-%j.out
#SBATCH -e logs/pt-%j.err
#SBATCH --exclusive
ulimit -u 200000
export HSA_FORCE_FINE_GRAIN_PCIE=1
export OMP_NUM_THREADS=1
export NCCL_DEBUG=INFO
export MIOPEN_FIND_MODE=3
export HSA_FORCE_FINE_GRAIN_PCIE=1
export MIOPEN_COMPILE_PARALLEL_LEVEL=1
export NCCL_PLUGIN_P2P=ucx
export NCCL_SOCKET_IFNAME=ib0
export NCCL_P2P_LEVEL=5
export NCCL_NET_PLUGIN=none
unset RCCL_NCHANNELS
unset NCCL_NET_GDR_LEVEL
rm -rf ./hostfile/*
echo "START TIME: $(date)"
hostfile=./hostfile/$SLURM_JOB_ID
scontrol show hostnames $SLURM_JOB_NODELIST > ${hostfile}
for i in `cat $hostfile`
do
echo ${i} slots=4 >> `pwd`/hostfile/hostfile-dl-$SLURM_JOB_ID
done
np=$(cat $hostfile|sort|uniq |wc -l)
np=$(($np*4))
nodename=$(cat $hostfile |sed -n "1p")
dist_url=`echo $nodename | awk '{print $1}'`
echo ${dist_url}
mpirun -np $np --hostfile hostfile/hostfile-dl-$SLURM_JOB_ID --bind-to none `pwd`/run_lora_single.sh
#!/bin/bash
export HSA_FORCE_FINE_GRAIN_PCIE=1
export MIOPEN_FIND_MODE=3
export MIOPEN_COMPILE_PARALLEL_LEVEL=1
export NCCL_PLUGIN_P2P=ucx
export RCCL_NCHANNELS=2
export NCCL_SOCKET_IFNAME=ib0
export NCCL_P2P_LEVEL=5
export NCCL_IB_HCA=mlx5_0
export NCCL_DEBUG=INFO
export NCCL_NET_GDR_LEVEL=SYS
export NCCL_NET_PLUGIN=none
unset RCCL_NCHANNELS
unset NCCL_NET_GDR_LEVEL
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
echo "LRANK===============================$lrank"
RANK=$OMPI_COMM_WORLD_RANK
WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
echo "WORLD_SIZE*************$WORLD_SIZE"
export HIP_VISIBLE_DEVICES=0,1,2,3
APP="python3 ../fine-tune.py \
--deepspeed ../ds_config.json \
--report_to "none" \
--data_path "../data/belle_chat_ramdon_10k.json" \
--model_name_or_path "../../baichuan2-7b-base" \
--output_dir "output-lora" \
--model_max_length 512 \
--num_train_epochs 4 \
--per_device_train_batch_size 1 \
--gradient_accumulation_steps 1 \
--save_strategy epoch \
--learning_rate 1e-5 \
--lr_scheduler_type constant \
--adam_beta1 0.9 \
--adam_beta2 0.98 \
--adam_epsilon 1e-8 \
--max_grad_norm 1.0 \
--weight_decay 1e-4 \
--warmup_ratio 0.0 \
--logging_steps 1 \
--gradient_checkpointing False \
--fp16 \
--use_lora True \
--local_rank $lrank "
case ${lrank} in
[0])
export HIP_VISIBLE_DEVICES=0,1,2,3
export UCX_NET_DEVICES=mlx5_0:1
export UCX_IB_PCI_BW=mlx5_0:50Gbs
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export HIP_VISIBLE_DEVICES=0,1,2,3
export UCX_NET_DEVICES=mlx5_1:1
export UCX_IB_PCI_BW=mlx5_1:50Gbs
numactl --cpunodebind=1 --membind=1 ${APP}
;;
[2])
export HIP_VISIBLE_DEVICES=0,1,2,3
export UCX_NET_DEVICES=mlx5_2:1
export UCX_IB_PCI_BW=mlx5_2:50Gbs
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export HIP_VISIBLE_DEVICES=0,1,2,3
export UCX_NET_DEVICES=mlx5_3:1
export UCX_IB_PCI_BW=mlx5_3:50Gbs
numactl --cpunodebind=3 --membind=3 ${APP}
;;
esac
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment