"...lm-evaluation-harness.git" did not exist on "68687d6df1ac20e60d547efb739ccc2d48a2a72f"
Commit 0bf5e500 authored by Tri Dao's avatar Tri Dao
Browse files

Release training code

parent 9bc63d1e
# https://wandb.ai
wandb:
_target_: pytorch_lightning.loggers.wandb.WandbLogger
project: attention
name: ${name}
save_dir: "."
mode: online # set offline to store all logs only locally
id: ${oc.select:name} # pass correct id to resume experiment!
# entity: "" # set to name of your wandb team or just remove it
log_model: False
prefix: ""
job_type: "train"
group: ""
tags: []
# @package eval.metrics
acc:
_target_: src.metrics.accuracy.AccuracyMine
# @package eval.metrics
acc:
_target_: torchmetrics.Accuracy
ignore_index: -100
# @package eval.metrics
acctop5:
_target_: src.metrics.accuracy.AccuracyMine
top_k: 5
# @package eval.metrics
mse:
_target_: torchmetrics.MeanSquaredError
# @package eval.metrics
num-tokens:
_target_: src.metrics.num_tokens.NumTokens
# @package eval.metrics
ppl:
_target_: src.metrics.perplexity.Perplexity
# @package _global_
# run in debug mode with:
# `python run.py mode=debug`
defaults:
- override /trainer: debug.yaml
debug_mode: True
hydra:
# sets level of all command line loggers to 'DEBUG'
verbose: True
# https://hydra.cc/docs/tutorials/basic/running_your_app/logging/
# sets level of only chosen command line loggers to 'DEBUG'
# verbose: [src.train, src.utils.utils]
# sets output paths for all file logs to 'logs/debug/'
run:
dir: ${oc.env:RESULT_DIR,${work_dir}/logs}/debug/${now:%Y-%m-%d}/${now:%H-%M-%S}
sweep:
dir: ${oc.env:RESULT_DIR,${work_dir}/logs}/debug/multirun_${now:%Y-%m-%d_%H-%M-%S}
subdir: ${hydra.job.num}
# disable rich config printing, since it will be already printed by hydra when `verbose: True`
print_config: False
# @package _global_
# default running mode
default_mode: True
hydra:
# default output paths for all file logs
run:
dir: ${oc.env:RESULT_DIR,${work_dir}/logs}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
sweep:
dir: ${oc.env:RESULT_DIR,${work_dir}/logs}/multiruns/${now:%Y-%m-%d_%H-%M-%S}
subdir: ${hydra.job.num}
# @package _global_
# run in experiment mode with:
# `python run.py mode=exp name=experiment_name`
experiment_mode: True
# allows for custom naming of the experiment
name: ???
hydra:
# sets output paths for all file logs to `logs/experiment/name'
run:
dir: ${oc.env:RESULT_DIR,${work_dir}/logs}/experiments/${name}
sweep:
dir: ${oc.env:RESULT_DIR,${work_dir}/logs}/experiments/${name}
subdir: ${hydra.job.num}
# @package _global_
# Run the Pytorch profiler
trainer:
profiler:
_target_: pytorch_lightning.profilers.PyTorchProfiler
dirpath: ${hydra.run.dir}
schedule:
_target_: torch.profiler.schedule
wait: 5
warmup: 5
active: 5
use_cuda: True
max_steps: 20
logger:
wandb:
mode: disabled
callbacks:
model_checkpoint: null
model_checkpoint_progress: null
early_stopping: null
hydra:
# sets output paths for all file logs to 'logs/profile/'
run:
dir: ${oc.env:RESULT_DIR,${work_dir}/logs}/profile/${now:%Y-%m-%d}/${now:%H-%M-%S}
sweep:
dir: ${oc.env:RESULT_DIR,${work_dir}/logs}/profile/multirun_${now:%Y-%m-%d_%H-%M-%S}
subdir: ${hydra.job.num}
# @package _global_
# Smoke test: disable logging and model checkpointing
logger:
wandb:
mode: disabled
callbacks:
model_checkpoint: null
model_checkpoint_progress: null
hydra:
# https://hydra.cc/docs/tutorials/basic/running_your_app/logging/
# sets level of only chosen command line loggers to 'DEBUG'
# verbose: [src.train, src.utils.utils]
# sets output paths for all file logs to 'logs/debug/'
run:
dir: ${oc.env:RESULT_DIR,${work_dir}/logs}/debug/${now:%Y-%m-%d}/${now:%H-%M-%S}
sweep:
dir: ${oc.env:RESULT_DIR,${work_dir}/logs}/debug/multirun_${now:%Y-%m-%d_%H-%M-%S}
subdir: ${hydra.job.num}
defaults:
- _self_
- gpt2model: gpt2-small
_target_: transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel
_recursive_: True
config:
_target_: transformers.GPT2Config
# Mistral's config: https://github.com/stanford-crfm/mistral/blob/main/conf/models/gpt2-small.yaml
# However, reorder_and_upcast_attn slows things down
reorder_and_upcast_attn: false
scale_attn_by_inverse_layer_idx: true
n_positions: ${datamodule.max_length}
defaults:
- _self_
- gpt2model: gpt2-small
_target_: flash_attn.models.gpt.GPTLMHeadModel
_recursive_: True
config:
_target_: transformers.GPT2Config
# Mistral's config: # https://github.com/stanford-crfm/mistral/blob/main/conf/models/mistral-small.yaml
# However, reorder_and_upcast_attn slows things down
reorder_and_upcast_attn: false
scale_attn_by_inverse_layer_idx: true
n_positions: ${datamodule.max_length}
# @package _global_
model:
config:
n_embd: 1280
n_head: 20
n_layer: 36
# @package _global_
model:
config:
n_embd: 1024
n_head: 16
n_layer: 24
# @package _global_
model:
config:
n_embd: 768
n_head: 12
n_layer: 12
# @package _global_
model:
config:
n_embd: 1600
n_head: 25
n_layer: 48
# @package train.optimizer
_target_: torch.optim.Adam
# @package train.optimizer
_target_: apex.contrib.optimizers.distributed_fused_adam.DistributedFusedAdam
adam_w_mode: True
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment