Blame · flash_attn/utils/pretrained.py · 7c2191542afe110c87f61f227f8df4e95d0ea0af · gaoqiong / flash-attention · GitLab

Switch branch/tag

flash-attention

flash_attn

utils

pretrained.py
Find file
Normal viewHistoryPermalink

pretrained.py

381 Bytes

Newer

Older

Tweak CrossEntropyLoss to take process_group in init

Tri Dao
committed
Dec 27, 2022

import torch

from transformers.utils import WEIGHTS_NAME
from transformers.utils.hub import cached_file

[Gen] Make generation work with Tensor Parallel

Tri Dao
committed
Jan 15, 2023

def state_dict_from_pretrained(model_name, device=None, dtype=None):
    state_dict = torch.load(cached_file(model_name, WEIGHTS_NAME), map_location=device)
    if dtype is not None:
        state_dict = {k: v.to(dtype) for k, v in state_dict.items()}
    return state_dict