Commit 615e9cbf authored by huteng.ht's avatar huteng.ht
Browse files

init commit for opensource


Signed-off-by: default avatarhuteng.ht <huteng.ht@bytedance.com>
parents
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import os
import tempfile
import unittest
from copy import deepcopy
from unittest import TestCase
import torch
from safetensors import safe_open
import veturboio
class TestSave(TestCase):
@classmethod
def setUpClass(cls):
cls.tensors_0 = {
"weight1": torch.randn(2000, 10),
"weight2": torch.randn(2000, 10),
}
cls.tempdir = tempfile.TemporaryDirectory()
cls.filepath_0 = os.path.join(cls.tempdir.name, "model_0.safetensors")
cls.filepath_1 = os.path.join(cls.tempdir.name, "model_0.pt")
@classmethod
def tearDownClass(cls):
cls.tempdir.cleanup()
def test_save_file(self):
veturboio.save_file(self.tensors_0, self.filepath_0)
with safe_open(self.filepath_0, framework="pt", device="cpu") as f:
for key in f.keys():
self.assertTrue(torch.allclose(self.tensors_0[key], f.get_tensor(key)))
def test_save_pt(self):
veturboio.save_pt(self.tensors_0, self.filepath_1)
loaded_tensors = torch.load(self.filepath_1)
for key in self.tensors_0.keys():
self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key]))
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import base64
import os
import tempfile
import unittest
from copy import deepcopy
from unittest import TestCase
import numpy as np
import torch
import veturboio
import veturboio.ops.sfcs_utils as sfcs_utils
def init_sfcs_env():
sfcs_conf = os.getcwd() + '/libcfs.xml'
if os.path.exists(sfcs_conf):
os.remove(sfcs_conf)
os.environ['SFCS_FSNAME'] = 'byted-cpu-sfcs'
os.environ['SFCS_REGION'] = 'cn-beijing'
os.environ['SFCS_ACCESS_KEY'] = os.environ['CI_SFCS_AK']
os.environ['SFCS_SECRET_KEY'] = os.environ['CI_SFCS_SK']
os.environ['SFCS_AUTHENTICATION_SERVICE_NAME'] = 'cfs'
os.environ['SFCS_NS_ID'] = '18014398509481988'
os.environ['SFCS_UFS_PATH'] = 'tos://yinzq-bucket/'
os.environ['SFCS_MULTI_NIC_WHITELIST'] = 'eth0'
os.environ['SFCS_NETWORK_SEGMENT'] = '172.31.128.0/17'
os.environ['SFCS_NAMENODE_ENDPOINT_ADDRESS'] = '100.67.19.231'
os.environ['SFCS_LOG_SEVERITY'] = 'ERROR'
sfcs_utils.init_sfcs_conf()
class TestSFCS(TestCase):
@classmethod
def setUpClass(cls):
init_sfcs_env()
def _run_pipeline(self):
filepath = "/data.bin"
filesize = 1024 * 1024
sfcs_utils.sfcs_delete_file(filepath)
arr_0 = np.empty([filesize], dtype=np.byte)
length = sfcs_utils.sfcs_write_file(filepath, arr_0, filesize)
self.assertEqual(length, filesize)
size = sfcs_utils.sfcs_get_file_size(filepath)
self.assertEqual(size, filesize)
arr_1 = np.empty([filesize], dtype=np.byte)
length = sfcs_utils.sfcs_read_file(filepath, arr_1, filesize, 0)
self.assertEqual(length, filesize)
self.assertTrue((arr_0 == arr_1).all())
sfcs_utils.sfcs_delete_file(filepath)
def test_pipeline(self):
self._run_pipeline()
class TestSFCSLoad(TestCase):
@classmethod
def setUpClass(cls):
init_sfcs_env()
os.environ['VETUROIO_KEY'] = base64.b64encode(b'abcdefgh12345678').decode('ascii')
os.environ['VETUROIO_IV'] = base64.b64encode(b'1234567887654321').decode('ascii')
cls.filepath_0 = "sfcs://model.safetensors"
cls.filepath_1 = "sfcs://model.pt"
# mock /tmp as efs mount path
cls.filepath_2 = "/model.safetensors"
cls.tensors_0 = {
"weight1": torch.ones(50, 50),
"weight2": torch.zeros(50, 50),
}
class MockModel(torch.nn.Module):
def __init__(self) -> None:
super().__init__()
self.linear1 = torch.nn.Linear(50, 50)
self.linear2 = torch.nn.Linear(50, 50)
cls.model = MockModel()
if torch.cuda.is_available():
cls.cuda_tensors_0 = deepcopy(cls.tensors_0)
for key in cls.cuda_tensors_0.keys():
cls.cuda_tensors_0[key] = cls.cuda_tensors_0[key].cuda()
cls.cuda_model = MockModel().cuda()
@classmethod
def tearDownClass(cls):
sfcs_utils.sfcs_delete_file(cls.filepath_0[6:])
sfcs_utils.sfcs_delete_file(cls.filepath_1[6:])
def _run_pipeline(self, tensors, model, map_location, use_cipher):
veturboio.save_file(tensors, self.filepath_0, use_cipher=use_cipher)
loaded_tensors = veturboio.load(self.filepath_0, map_location=map_location, use_cipher=use_cipher)
for key in tensors.keys():
self.assertTrue(torch.allclose(tensors[key], loaded_tensors[key]))
veturboio.save_model(model, self.filepath_0, use_cipher=use_cipher)
loaded_tensors = veturboio.load(self.filepath_0, map_location=map_location, use_cipher=use_cipher)
state_dict = model.state_dict()
for key in state_dict.keys():
self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key]))
veturboio.save_pt(state_dict, self.filepath_1, use_cipher=use_cipher)
loaded_tensors = veturboio.load(self.filepath_1, map_location=map_location, use_cipher=use_cipher)
for key in state_dict.keys():
self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key]))
os.environ['VETURBOIO_USE_SFCS_SDK'] = '1'
loaded_tensors = veturboio.load(self.filepath_2, map_location=map_location, use_cipher=use_cipher)
del os.environ['VETURBOIO_USE_SFCS_SDK']
state_dict = model.state_dict()
for key in state_dict.keys():
self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key]))
def test_pipeline_cpu(self):
self._run_pipeline(self.tensors_0, self.model, "cpu", use_cipher=False)
self._run_pipeline(self.tensors_0, self.model, "cpu", use_cipher=True)
@unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
def test_pipeline_cuda(self):
self._run_pipeline(self.cuda_tensors_0, self.cuda_model, "cuda:0", use_cipher=False)
self._run_pipeline(self.cuda_tensors_0, self.cuda_model, "cuda:0", use_cipher=True)
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import os
import tempfile
from unittest import TestCase
import torch
import veturboio
class TestSharedTensorLoad(TestCase):
@classmethod
def setUpClass(cls):
class MockModel(torch.nn.Module):
def __init__(self) -> None:
super().__init__()
self.linear1 = torch.nn.Linear(10, 20)
self.linear2 = torch.nn.Linear(20, 10)
self.linear3 = self.linear2
cls.model = MockModel()
def test_pipeline(self):
with tempfile.TemporaryDirectory() as tmpdirname:
filepath = os.path.join(tmpdirname, "model.safetensors")
veturboio.save_model(self.model, filepath)
loaded_tensors = veturboio.load(filepath, map_location="cpu")
state_dict = self.model.state_dict()
for key in state_dict.keys():
self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key]))
def test_save_file(self):
with tempfile.TemporaryDirectory() as tmpdirname:
filepath = os.path.join(tmpdirname, "model.safetensors")
veturboio.save_file(self.model.state_dict(), filepath, force_save_shared_tensor=True)
loaded_tensors = veturboio.load(filepath, map_location="cpu")
state_dict = self.model.state_dict()
for key in state_dict.keys():
self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key]))
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
from veturboio.io import load, save_file, save_model, save_pt
from veturboio.ops.load_utils import init_io_helper
__all__ = ["load", "save_file", "save_model", "init_io_helper", "save_pt"]
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import argparse
import torch
from veturboio import save_file
parser = argparse.ArgumentParser()
parser.add_argument("--input", "-i", type=str, required=True)
parser.add_argument("--output", "-o", type=str, required=True)
if __name__ == "__main__":
args = parser.parse_args()
print(f"convert {args.input} to {args.output}")
ext_name = args.output.split(".")[-1]
if ext_name != "safetensors":
raise ValueError("output file should be safetensors file")
state_dict = torch.load(args.input)
save_file(state_dict, args.output, force_save_shared_tensor=True)
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import os
from typing import Dict, Optional
import torch
from safetensors.torch import _remove_duplicate_names
from safetensors.torch import save_file as safetenors_save_file
from safetensors.torch import save_model as safetensors_save_model
from veturboio.loader import FasterPosixLoader, PosixLoader, SfcsClientLoader
from veturboio.ops.load_utils import IOHelper
from veturboio.safetensors import SafetensorsFile
from veturboio.saver import PosixSaver, SfcsClientSaver
from veturboio.types import FILE_PATH
def is_sfcs_path(file: FILE_PATH):
if len(file) > 7 and file[:7] == "sfcs://":
return True, file[6:]
elif os.environ.get("VETURBOIO_USE_SFCS_SDK", "0") == "1":
return True, file
else:
return False, file
def load(
file: FILE_PATH,
map_location: Optional[str] = "cpu",
enable_fast_mode: Optional[bool] = True,
num_thread: Optional[int] = 32,
helper: Optional[IOHelper] = None,
use_pinmem: Optional[bool] = False,
use_direct_io: Optional[bool] = False,
use_cipher: Optional[bool] = False,
) -> Dict:
"""Load state dict object from checkpoint file. The file can be both safetensors file and pytorch file.
If the file is safetensors file, it will be loaded by veturboio and the loading speed will be accelerated.
Args:
file (FILE_PATH): file path
map_location (str, optional): map location. Defaults to "cpu".
enable_fast_mode (bool, optional): enable fast mode. Defaults to True.
use_pinmem (bool, optional): use pin memory. Defaults to False.
num_thread (int, optional): number of threads. Defaults to 32.
use_direct_io (bool, optional): open file in direct io mode. Defaults to False.
use_cipher (bool, optional): decrypt file when use sfcs sdk. Defaults to False.
Returns:
state_dict (Dict): state dict
Examples:
```
import veturboio
state_dict = veturboio.load("model.safetensors")
```
"""
if IOHelper is None:
enable_fast_mode = False
elif helper is None:
helper = IOHelper()
use_sfcs_sdk, file = is_sfcs_path(file)
if enable_fast_mode == False:
loader = PosixLoader()
elif use_sfcs_sdk:
loader = SfcsClientLoader(
helper,
num_thread=num_thread,
use_pinmem=use_pinmem,
use_direct_io=use_direct_io,
use_cipher=use_cipher,
)
else:
loader = FasterPosixLoader(
helper,
num_thread=num_thread,
use_pinmem=use_pinmem,
use_direct_io=use_direct_io,
)
safetensors_file = SafetensorsFile(file, loader)
return safetensors_file.load(map_location=map_location)
def save_file(
state_dict: Dict[str, torch.Tensor],
file: FILE_PATH,
force_contiguous: bool = True,
force_save_shared_tensor: bool = False,
metadata: Dict[str, str] = None,
use_cipher: Optional[bool] = False,
) -> None:
"""Save state dict object to safetensors file.
Args:
state_dict (Dict): state dict
file (FILE_PATH): file path
force_contiguous (bool, optional): force contiguous. Defaults to True.
force_save_shared_tensor (bool, optional): force save shared tensor. Defaults to False.
metadata (Dict[str, str], optional): metadata. Defaults to None.
use_cipher (bool, optional): decrypt file when use sfcs sdk. Defaults to False.
Examples:
```
import torch
import veturboio
state_dict = {"weight": torch.randn(10, 10)}
veturboio.save_file(state_dict, "model.safetensors")
```
"""
use_sfcs_sdk, file = is_sfcs_path(file)
if use_sfcs_sdk:
saver = SfcsClientSaver(use_cipher=use_cipher)
else:
saver = PosixSaver()
# TODO: there are some bugs while state_dict is loaded from veturboio
if not force_save_shared_tensor:
try:
saver.save_file(state_dict, file, metadata=metadata)
except ValueError as e:
msg = str(e)
raise ValueError(msg)
else:
return
to_removes = _remove_duplicate_names(state_dict)
for kept_name, to_remove_group in to_removes.items():
for to_remove in to_remove_group:
if metadata is None:
metadata = {}
if to_remove not in metadata:
# Do not override user data
metadata[to_remove] = kept_name
del state_dict[to_remove]
if force_contiguous:
state_dict = {k: v.contiguous() for k, v in state_dict.items()}
return saver.save_file(state_dict, file, metadata=metadata)
def save_model(model: torch.nn.Module, file: FILE_PATH, use_cipher: Optional[bool] = False) -> None:
"""Save model state dict to safetensors file.
Args:
model (torch.nn.Module): model
file (FILE_PATH): file path
use_cipher (bool, optional): decrypt file when use sfcs sdk. Defaults to False.
Examples:
```
import torch
import veturboio
model = torch.nn.Linear(10, 10)
veturboio.save_model(model, "model.safetensors")
```
"""
use_sfcs_sdk, file = is_sfcs_path(file)
if use_sfcs_sdk:
saver = SfcsClientSaver(use_cipher=use_cipher)
else:
saver = PosixSaver()
return saver.save_model(model, file)
def save_pt(state_dict: Dict[str, torch.Tensor], file: FILE_PATH, use_cipher: Optional[bool] = False) -> None:
"""Save state dict object to pytorch file.
Args:
state_dict (Dict): state dict
file (FILE_PATH): file path
use_cipher (bool, optional): encrypt file when use sfcs sdk. Defaults to False.
Examples:
```
import torch
import veturboio
state_dict = {"weight": torch.randn(10, 10)}
veturboio.save_pt(state_dict, "model.pt")
```
"""
use_sfcs_sdk, file = is_sfcs_path(file)
if use_sfcs_sdk:
saver = SfcsClientSaver(use_cipher=use_cipher)
else:
saver = PosixSaver()
return saver.save_pt(state_dict, file)
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
from veturboio.loader.base_loader import BaseLoader, PosixLoader
from veturboio.loader.faster_posix_loader import FasterPosixLoader
from veturboio.loader.sfcs_client_loader import SfcsClientLoader
__all__ = ["BaseLoader", "PosixLoader", "FasterPosixLoader", "SfcsClientLoader"]
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
from typing import Any, Dict
import numpy as np
import torch
from numpy import ndarray
# from veturboio.safetensors import SafetensorsFile
from veturboio.types import FILE_PATH
SAFETENSORS_FILE_MAGIC_NUM = 123
BUF_ALIGN_SIZE = 4096
class BaseLoader:
def __init__(self, method: str) -> None:
self.method = method
def load_to_bytes_array(self, file: FILE_PATH, offset: int, count: int) -> ndarray:
raise NotImplementedError
def load_safetensors(self, safetensors_file: Any, map_location: str = "cpu") -> Dict[str, torch.Tensor]:
raise NotImplementedError
def init_aligned_tensor(self, device, device_id: int, file_size, base_offset: int) -> torch.Tensor:
if device_id != -1:
try:
total_tensor = torch.empty(file_size - base_offset, dtype=torch.uint8, device=device)
except RuntimeError as e:
msg = str(e)
raise RuntimeError(msg)
else:
array = np.empty(file_size - base_offset + BUF_ALIGN_SIZE, dtype=np.uint8)
offset1 = array.ctypes.data % BUF_ALIGN_SIZE
offset2 = base_offset % BUF_ALIGN_SIZE
if offset1 > offset2:
align = BUF_ALIGN_SIZE - offset1 + offset2
else:
align = offset2 - offset1
sub_array = array[align : align + file_size - base_offset].view(dtype=np.uint8)
total_tensor = torch.from_numpy(sub_array)
return total_tensor
class PosixLoader(BaseLoader):
def __init__(self) -> None:
super().__init__(method="posix")
def load_to_bytes_array(self, file: FILE_PATH, offset: int, count: int) -> ndarray:
return np.fromfile(file, dtype=np.byte, offset=offset, count=count)
def load_safetensors(self, safetensors_file: Any, map_location: str = "cpu") -> Dict[str, torch.Tensor]:
state_dict = {}
base_offset = safetensors_file.tensor_offset
device = torch.device(map_location)
for tensor_meta in safetensors_file.meta.values():
tensor_bytes = np.memmap(
safetensors_file.file,
dtype=np.byte,
mode="r",
offset=base_offset + tensor_meta.data_offsets[0],
shape=tensor_meta.data_offsets[1] - tensor_meta.data_offsets[0],
)
tensor = torch.frombuffer(tensor_bytes, dtype=tensor_meta.dtype)
tensor = tensor.view(tensor_meta.shape)
if device.type == "cuda":
state_dict[tensor_meta.name] = tensor.pin_memory().to(device=device, non_blocking=True)
else:
state_dict[tensor_meta.name] = tensor
return state_dict
def load_pt(self, file: FILE_PATH, map_location: str = "cpu") -> Dict[str, torch.Tensor]:
return torch.load(file, map_location=map_location)
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import os
from typing import Dict
import torch
from veturboio.ops.load_utils import IOHelper, load_file_to_tensor
from veturboio.safetensors import SafetensorsFile
from veturboio.types import FILE_PATH
from .base_loader import PosixLoader
class FasterPosixLoader(PosixLoader):
def __init__(
self,
helper: IOHelper,
num_thread: int = 32,
use_pinmem: bool = False,
use_direct_io: bool = False,
) -> None:
super().__init__()
self.helper = helper
self.num_thread = num_thread
self.use_pinmem = use_pinmem
self.use_direct_io = use_direct_io
def load_safetensors(
self, safetensors_file: SafetensorsFile, map_location: str = "cpu"
) -> Dict[str, torch.Tensor]:
file_size = os.path.getsize(safetensors_file.file)
base_offset = safetensors_file.tensor_offset
device = torch.device(map_location)
if device.type == "cuda":
device_id = device.index if device.index is not None else torch.cuda.current_device()
else:
device_id = -1
total_tensor = self.init_aligned_tensor(device, device_id, file_size, base_offset)
load_file_to_tensor(
file_path=safetensors_file.file,
total_tensor=total_tensor,
sample_tensor=torch.ones([], dtype=torch.uint8),
offset=base_offset,
helper=self.helper,
device_id=device_id,
num_thread=self.num_thread,
use_pinmem=self.use_pinmem,
use_sfcs_sdk=False,
use_direct_io=self.use_direct_io,
)
return SafetensorsFile.split_tensor_to_state_dict(total_tensor, safetensors_file)
def load_pt(self, file: FILE_PATH, map_location: str = "cpu") -> Dict[str, torch.Tensor]:
return torch.load(file, map_location=map_location)
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import os
from io import BytesIO
from typing import Dict
import numpy as np
import torch
from numpy import ndarray
from veturboio.loader.base_loader import BaseLoader
from veturboio.ops.cipher import CipherInfo
from veturboio.ops.load_utils import IOHelper, load_file_to_tensor
from veturboio.ops.sfcs_utils import init_sfcs_conf, sfcs_get_file_size, sfcs_read_file
from veturboio.safetensors import SafetensorsFile
from veturboio.types import FILE_PATH
class SfcsClientLoader(BaseLoader):
def __init__(
self,
helper: IOHelper,
num_thread: int = 32,
use_pinmem: bool = False,
use_direct_io: bool = False,
use_cipher: bool = False,
) -> None:
super().__init__(method="client")
self.helper = helper
self.num_thread = num_thread
self.use_pinmem = use_pinmem
self.use_direct_io = use_direct_io
use_cipher = use_cipher or os.environ.get("VETURBOIO_USE_CIPHER", "0") == "1"
self.cipher_info = CipherInfo(use_cipher)
init_sfcs_conf()
def load_to_bytes_array(self, file: FILE_PATH, offset: int, count: int) -> ndarray:
candidate = np.empty([count], dtype=np.byte)
sfcs_read_file(
file, candidate, length=count, offset=offset, num_thread=self.num_thread, cipher_info=self.cipher_info
)
return candidate
def load_safetensors(
self, safetensors_file: SafetensorsFile, map_location: str = "cpu"
) -> Dict[str, torch.Tensor]:
file_size = sfcs_get_file_size(safetensors_file.file)
base_offset = safetensors_file.tensor_offset
device = torch.device(map_location)
if device.type == "cuda":
device_id = device.index if device.index is not None else torch.cuda.current_device()
else:
device_id = -1
total_tensor = self.init_aligned_tensor(device, device_id, file_size, base_offset)
load_file_to_tensor(
file_path=safetensors_file.file,
total_tensor=total_tensor,
sample_tensor=torch.ones([], dtype=torch.uint8),
offset=base_offset,
helper=self.helper,
device_id=device_id,
num_thread=self.num_thread,
use_pinmem=self.use_pinmem,
use_sfcs_sdk=True,
use_direct_io=self.use_direct_io,
cipher_info=self.cipher_info,
)
return SafetensorsFile.split_tensor_to_state_dict(total_tensor, safetensors_file)
def load_pt(self, file: FILE_PATH, map_location: str = "cpu") -> Dict[str, torch.Tensor]:
file_size = sfcs_get_file_size(file)
file_bytes = self.load_to_bytes_array(file, offset=0, count=file_size).tobytes()
return torch.load(BytesIO(file_bytes), map_location=map_location)
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import base64
import os
import threading
import urllib.parse
from datetime import datetime, timezone
from time import sleep
from typing import Optional, Tuple
import numpy as np
import requests_unixsocket
from loguru import logger
class DataPipeClient:
DATAPIPE_SOCKET_PATH = os.getenv('DATAPIPE_SOCKET_PATH', '/finetune/data/datapipe.sock')
ENCRYPT_HEADER = {'X-Datapipe-Task-Type': 'encrypt-key'}
SFCS_STS_HEADER = {'X-Datapipe-Task-Type': 'sfcs-sts'}
def __init__(self, retry: int = 3, interval: float = 0.5) -> None:
if os.path.exists(self.DATAPIPE_SOCKET_PATH):
self.url = 'http+unix://' + urllib.parse.quote(self.DATAPIPE_SOCKET_PATH, safe='')
self.session = requests_unixsocket.Session()
self.retry = retry
self.interval = interval
else:
self.url = None
self.session = None
def get_data_key_iv(self) -> Tuple[Optional[str], Optional[str]]:
if not self.session:
logger.warning('Datapipe client initialization failed')
return None, None
re = 0
while True:
try:
response = self.session.get(self.url, headers=self.ENCRYPT_HEADER)
if response.status_code == 200:
res = response.json()
return res['Key'], res['IV']
except Exception as e:
logger.warning(e)
if re > self.retry:
break
sleep(self.interval)
re += 1
return None, None
def get_sfcs_ak_sk_st(self) -> Optional[dict]:
if not self.session:
logger.warning('Datapipe client initialization failed')
return None
re = 0
while True:
try:
response = self.session.get(self.url, headers=self.SFCS_STS_HEADER)
if response.status_code == 200:
return response.json()
except Exception as e:
logger.warning(e)
if re > self.retry:
break
sleep(self.interval)
re += 1
return None
class CipherInfo:
ENV_KEY = 'VETUROIO_KEY'
ENV_IV = 'VETUROIO_IV'
def __init__(self, use_cipher: bool) -> None:
if use_cipher:
# first try to get key and iv from datapipe
client = DataPipeClient()
if client.session:
try:
key_b64, iv_b64 = client.get_data_key_iv()
self.key, self.iv = self.convert_key_iv(key_b64, iv_b64)
self.use_cipher = True
logger.info('get cipher info from datapipe socket')
return
except Exception as e:
logger.warning(e)
# then try to get key and iv from env
env_key = os.getenv(self.ENV_KEY)
env_iv = os.getenv(self.ENV_IV)
if env_key and env_iv:
try:
self.key, self.iv = self.convert_key_iv(env_key, env_iv)
self.use_cipher = True
logger.info('get cipher info from env')
return
except Exception as e:
logger.warning(e)
logger.warning('fail to get key and iv, fallback to no cipher')
self.use_cipher = False
self.key = np.frombuffer(b'\x00' * 16, dtype=np.byte)
self.iv = np.frombuffer(b'\x00' * 16, dtype=np.byte)
@staticmethod
def convert_key_iv(key_b64: str, iv_b64: str) -> Tuple[np.ndarray, np.ndarray]:
key_b = base64.b64decode(key_b64, validate=True)
iv_b = base64.b64decode(iv_b64, validate=True)
if len(key_b) != 16 or len(iv_b) != 16:
raise Exception('length of key or iv is not 16')
key = np.frombuffer(key_b, dtype=np.byte)
iv = np.frombuffer(iv_b, dtype=np.byte)
return key, iv
This diff is collapsed.
/*
* Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _CLOUDFS_LIBCFS3_CLIENT_CFS_AIO_H_
#define _CLOUDFS_LIBCFS3_CLIENT_CFS_AIO_H_
#include <stdint.h> /* for uint64_t, etc. */
#ifdef __cplusplus
extern "C"
{
#endif
/**
* Some utility decls used in libcfs.
*/
typedef int32_t tSize; /// size of data for read/write io ops
typedef int64_t tOffset; /// offset within the file
struct CfsFileSystemInternalWrapper;
typedef struct CfsFileSystemInternalWrapper *cfsFS;
struct CfsFileInternalWrapper;
typedef struct CfsFileInternalWrapper *cfsFile;
typedef enum cfsStatus
{
STATUS_OK = 0,
STATUS_MISSING_BLOCK = -1002,
STATUS_TIMEOUT = -1003,
STATUS_INVALID_RANGE = -1004,
STATUS_CONNECTION_CLOSED = -1005,
STATUS_WRITE_FAILED = -1006,
STATUS_IO_BUSY = -1007,
STATUS_INVALID_PARAMETER = -1098,
STATUS_UNSUPPORTED_OP = -1099,
STATUS_UNKNOWN_ERR = -1100,
} cfsStatus;
typedef void (*cfsWriteCallback)(cfsStatus status, void *args);
typedef void (*cfsReadCallback)(cfsStatus status, int32_t readLength, char *buffer, void *args);
typedef struct cfsAsyncContext
{
cfsReadCallback readCallback;
cfsWriteCallback writeCallback;
char *buffer;
void *args;
} cfsAsyncContext;
/**
* cfsAsyncPRead - Async positional read of data from an open file.
*
* @param fs The configured filesystem handle.
* @param file The file handle.
* @param offset Position from which to read.
* @param length The length of the buffer.
* @param context The callback context passed by user.
* @return Status of Async method.
*/
cfsStatus cfsAsyncPRead(cfsFS fs, cfsFile file, tSize length, tOffset offset, cfsAsyncContext *context);
/**
* cfsAsyncWrite - Write data to the internal buffer of outputstream,
*
* @param fs The configured filesystem handle.
* @param file The file handle.
* @param buffer The buffer to copy write bytes into.
* @param length The length of the buffer.
* @param context The callback context passed by user.
* @return Status of Async method.
*/
cfsStatus cfsAsyncWrite(cfsFS fs, cfsFile file, const void *buffer, tSize length, cfsAsyncContext *context);
/**
* cfsAsyncFlush - Wait for data is acked by remote dn.
*
* @param fs The configured filesystem handle.
* @param file The file handle.
* @param context The callback context passed by user.
* @return Status of Async method.
*/
cfsStatus cfsAsyncFlush(cfsFS fs, cfsFile file, cfsAsyncContext *context);
/**
* cfsAsyncWriteAndFlush - Write data to remote datanode and wait for ack.
*
* @param fs The configured filesystem handle.
* @param file The file handle.
* @param buffer The buffer to copy write bytes into.
* @param length The length of the buffer.
* @param context The callback context passed by user.
* @return Status of Async method.
*/
cfsStatus cfsAsyncWriteAndFlush(cfsFS fs, cfsFile file, const void *buffer, tSize length, cfsAsyncContext *context);
#ifdef __cplusplus
}
#endif
#endif /* _CLOUDFS_LIBCFS3_CLIENT_CFS_AIO_H_ */
/*
* Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef COMMON_H
#define COMMON_H
#include <torch/torch.h>
#include <torch/extension.h>
#include <cuda_runtime.h>
#include <fcntl.h>
#include <unistd.h>
#include <thread>
#include <stdexcept>
#include <sys/mman.h>
#include <sys/stat.h>
#include "cfs.h"
#include "logging.h"
#include "sfcs.h"
#define THREAD_NICE_ADJ -10
#define BUF_ALIGN_SIZE 4096
using namespace std;
#endif
\ No newline at end of file
/*
* Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AES_CPU_CTR_H
#define AES_CPU_CTR_H
#include <stdio.h>
extern const size_t EVP_UPDATE_MAX;
extern const size_t CTR_BLOCK_SIZE;
void ctr128_inc_by(unsigned char *counter, size_t n, size_t c);
typedef struct evp_cipher_ctx_st EVP_CIPHER_CTX;
typedef struct evp_cipher_st EVP_CIPHER;
class CtrEncrypter
{
private:
EVP_CIPHER_CTX *ctx = NULL;
EVP_CIPHER *cipher = NULL;
public:
CtrEncrypter(const unsigned char *key, const unsigned char *iv, size_t global_offset);
~CtrEncrypter();
int encrypt_update(unsigned char *pt, size_t pt_size, unsigned char *ct);
};
class CtrDecrypter
{
private:
EVP_CIPHER_CTX *ctx = NULL;
EVP_CIPHER *cipher = NULL;
public:
CtrDecrypter(const unsigned char *key, const unsigned char *iv, size_t global_offset);
~CtrDecrypter();
int decrypt_update(unsigned char *ct, size_t ct_size, unsigned char *pt);
};
#endif
#ifndef AES_GPU_CTR_H
#define AES_GPU_CTR_H
#include <stdio.h>
// Both encrypt and decrypt require length of ct and pt multiple of 16
int ctr_encrypt_gpu(const unsigned char *key, const unsigned char *iv, unsigned char *pt, size_t pt_size,
unsigned char *ct);
int ctr_decrypt_gpu(const unsigned char *key, const unsigned char *iv, unsigned char *ct, size_t ct_size,
unsigned char *pt);
#endif
\ No newline at end of file
/*
* Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef IO_HELPER_H
#define IO_HELPER_H
#include "load_utils.h"
class IOHelper
{
private:
char *pin_mem = NULL;
bool use_pinmem_ = false;
size_t buffer_size_ = 0;
public:
~IOHelper();
void load_file_to_tensor(std::string file_path, torch::Tensor res_tensor, torch::Tensor sample_tensor,
int64_t offset, int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk,
bool use_direct_io, bool use_cipher, pybind11::array_t<char> key_arr,
pybind11::array_t<char> iv_arr);
void init_buffer(string file_path, int64_t file_size, bool use_pinmem, bool use_sfcs_sdk);
void free_buffer();
};
#endif
\ No newline at end of file
/*
* Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LOAD_UTILS_H
#define LOAD_UTILS_H
#include "common.h"
void read_file(string file_path, char *addr, char *dev_mem, int num_thread, size_t total_size, size_t global_offset,
bool use_sfcs_sdk, bool use_direct_io, CipherInfo cipher_info);
size_t get_file_size(const char *file_name, bool use_sfcs_sdk);
#endif
\ No newline at end of file
/*
* Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LOGGER_H
#define LOGGER_H
#include <iostream>
using namespace std;
#define PR std::cout
#define ENDL std::endl
#define FILE_INFO "[" << __FUNCTION__ << " at " << __FILE__ << ":" << __LINE__ << "] "
#define ARG_COUNT_PRIVATE(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, N, ...) N
#define ARG_COUNT(...) ARG_COUNT_PRIVATE(0, __VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
#define FUN_COUNT_GLUE(M, count) M##count
#define FUN_JOIN_COUNT(M, count) FUN_COUNT_GLUE(M, count)
#define FUN_JOIN_ARGS(x, y) x y
#define CallSomeOne(fn, ...) FUN_JOIN_ARGS(FUN_JOIN_COUNT(fn, ARG_COUNT(__VA_ARGS__)), (__VA_ARGS__))
#define param1(a) a
#define param2(a, b) a << ", " #b ":" << b
#define param3(a, b, c) a << ", " #b ":" << b << ", " #c ":" << c
#define param4(a, b, c, d) a << ", " #b ":" << b << ", " #c ":" << c << ", " #d ":" << d
#define param5(a, b, c, d, e) a << ", " #b ":" << b << ", " #c ":" << c << ", " #d ":" << d << ", " #e ":" << e
#define pr1(...) param1(__VA_ARGS__)
#define pr2(...) param2(__VA_ARGS__)
#define pr3(...) param3(__VA_ARGS__)
#define pr4(...) param4(__VA_ARGS__)
#define pr5(...) param5(__VA_ARGS__)
#define logDebug(...) PR << "VETURBOIO_CPP_DEBUG " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL
#define logInfo(...) PR << "VETURBOIO_CPP_INFO " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL
#define logWarn(...) PR << "VETURBOIO_CPP_WARN " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL
#define logError(...) PR << "VETURBOIO_CPP_ERROR " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL
#endif // LOGGER_H
\ No newline at end of file
/*
* Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SFCS_H
#define SFCS_H
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include "common.h"
#include "cfs.h"
#include "logging.h"
#define SFCS_NAME_NODE "default"
#define SFCS_USER_NAME "demo-user"
using namespace std;
class CipherInfo
{
public:
bool use_cipher = false;
unsigned char *key = NULL;
unsigned char *iv = NULL;
CipherInfo(bool use_cipher, pybind11::array_t<char> key_arr, pybind11::array_t<char> iv_arr);
CipherInfo(){};
};
class SFCSFile
{
public:
cfsFS fs;
std::string file_path;
// cipher related
CipherInfo cipher_info;
SFCSFile(std::string file_path);
SFCSFile(std::string file_path, bool use_cipher, pybind11::array_t<char> key_arr, pybind11::array_t<char> iv_arr);
SFCSFile(std::string file_path, CipherInfo cipher_info);
~SFCSFile();
size_t get_file_size();
size_t read_file_parallel(char *addr, char *dev_mem, int num_thread, size_t total_size, size_t global_offset);
size_t read_file_to_array(pybind11::array_t<char> arr, size_t length, size_t offset, int num_thread);
size_t write_file_from_array(pybind11::array_t<char> arr, size_t length);
void delete_file();
private:
size_t read_file(char *addr, size_t length, size_t offset);
void read_file_thread(int thread_id, char *addr, char *dev_mem, size_t block_size, size_t total_size,
size_t global_offset);
size_t write_file(char *addr, size_t length);
};
#endif
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment