Commit 615e9cbf authored by huteng.ht's avatar huteng.ht
Browse files

init commit for opensource


Signed-off-by: default avatarhuteng.ht <huteng.ht@bytedance.com>
parents
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import os
import tempfile
import unittest
from copy import deepcopy
from unittest import TestCase
import torch
from safetensors import safe_open
import veturboio
class TestSave(TestCase):
@classmethod
def setUpClass(cls):
cls.tensors_0 = {
"weight1": torch.randn(2000, 10),
"weight2": torch.randn(2000, 10),
}
cls.tempdir = tempfile.TemporaryDirectory()
cls.filepath_0 = os.path.join(cls.tempdir.name, "model_0.safetensors")
cls.filepath_1 = os.path.join(cls.tempdir.name, "model_0.pt")
@classmethod
def tearDownClass(cls):
cls.tempdir.cleanup()
def test_save_file(self):
veturboio.save_file(self.tensors_0, self.filepath_0)
with safe_open(self.filepath_0, framework="pt", device="cpu") as f:
for key in f.keys():
self.assertTrue(torch.allclose(self.tensors_0[key], f.get_tensor(key)))
def test_save_pt(self):
veturboio.save_pt(self.tensors_0, self.filepath_1)
loaded_tensors = torch.load(self.filepath_1)
for key in self.tensors_0.keys():
self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key]))
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import base64
import os
import tempfile
import unittest
from copy import deepcopy
from unittest import TestCase
import numpy as np
import torch
import veturboio
import veturboio.ops.sfcs_utils as sfcs_utils
def init_sfcs_env():
sfcs_conf = os.getcwd() + '/libcfs.xml'
if os.path.exists(sfcs_conf):
os.remove(sfcs_conf)
os.environ['SFCS_FSNAME'] = 'byted-cpu-sfcs'
os.environ['SFCS_REGION'] = 'cn-beijing'
os.environ['SFCS_ACCESS_KEY'] = os.environ['CI_SFCS_AK']
os.environ['SFCS_SECRET_KEY'] = os.environ['CI_SFCS_SK']
os.environ['SFCS_AUTHENTICATION_SERVICE_NAME'] = 'cfs'
os.environ['SFCS_NS_ID'] = '18014398509481988'
os.environ['SFCS_UFS_PATH'] = 'tos://yinzq-bucket/'
os.environ['SFCS_MULTI_NIC_WHITELIST'] = 'eth0'
os.environ['SFCS_NETWORK_SEGMENT'] = '172.31.128.0/17'
os.environ['SFCS_NAMENODE_ENDPOINT_ADDRESS'] = '100.67.19.231'
os.environ['SFCS_LOG_SEVERITY'] = 'ERROR'
sfcs_utils.init_sfcs_conf()
class TestSFCS(TestCase):
@classmethod
def setUpClass(cls):
init_sfcs_env()
def _run_pipeline(self):
filepath = "/data.bin"
filesize = 1024 * 1024
sfcs_utils.sfcs_delete_file(filepath)
arr_0 = np.empty([filesize], dtype=np.byte)
length = sfcs_utils.sfcs_write_file(filepath, arr_0, filesize)
self.assertEqual(length, filesize)
size = sfcs_utils.sfcs_get_file_size(filepath)
self.assertEqual(size, filesize)
arr_1 = np.empty([filesize], dtype=np.byte)
length = sfcs_utils.sfcs_read_file(filepath, arr_1, filesize, 0)
self.assertEqual(length, filesize)
self.assertTrue((arr_0 == arr_1).all())
sfcs_utils.sfcs_delete_file(filepath)
def test_pipeline(self):
self._run_pipeline()
class TestSFCSLoad(TestCase):
@classmethod
def setUpClass(cls):
init_sfcs_env()
os.environ['VETUROIO_KEY'] = base64.b64encode(b'abcdefgh12345678').decode('ascii')
os.environ['VETUROIO_IV'] = base64.b64encode(b'1234567887654321').decode('ascii')
cls.filepath_0 = "sfcs://model.safetensors"
cls.filepath_1 = "sfcs://model.pt"
# mock /tmp as efs mount path
cls.filepath_2 = "/model.safetensors"
cls.tensors_0 = {
"weight1": torch.ones(50, 50),
"weight2": torch.zeros(50, 50),
}
class MockModel(torch.nn.Module):
def __init__(self) -> None:
super().__init__()
self.linear1 = torch.nn.Linear(50, 50)
self.linear2 = torch.nn.Linear(50, 50)
cls.model = MockModel()
if torch.cuda.is_available():
cls.cuda_tensors_0 = deepcopy(cls.tensors_0)
for key in cls.cuda_tensors_0.keys():
cls.cuda_tensors_0[key] = cls.cuda_tensors_0[key].cuda()
cls.cuda_model = MockModel().cuda()
@classmethod
def tearDownClass(cls):
sfcs_utils.sfcs_delete_file(cls.filepath_0[6:])
sfcs_utils.sfcs_delete_file(cls.filepath_1[6:])
def _run_pipeline(self, tensors, model, map_location, use_cipher):
veturboio.save_file(tensors, self.filepath_0, use_cipher=use_cipher)
loaded_tensors = veturboio.load(self.filepath_0, map_location=map_location, use_cipher=use_cipher)
for key in tensors.keys():
self.assertTrue(torch.allclose(tensors[key], loaded_tensors[key]))
veturboio.save_model(model, self.filepath_0, use_cipher=use_cipher)
loaded_tensors = veturboio.load(self.filepath_0, map_location=map_location, use_cipher=use_cipher)
state_dict = model.state_dict()
for key in state_dict.keys():
self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key]))
veturboio.save_pt(state_dict, self.filepath_1, use_cipher=use_cipher)
loaded_tensors = veturboio.load(self.filepath_1, map_location=map_location, use_cipher=use_cipher)
for key in state_dict.keys():
self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key]))
os.environ['VETURBOIO_USE_SFCS_SDK'] = '1'
loaded_tensors = veturboio.load(self.filepath_2, map_location=map_location, use_cipher=use_cipher)
del os.environ['VETURBOIO_USE_SFCS_SDK']
state_dict = model.state_dict()
for key in state_dict.keys():
self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key]))
def test_pipeline_cpu(self):
self._run_pipeline(self.tensors_0, self.model, "cpu", use_cipher=False)
self._run_pipeline(self.tensors_0, self.model, "cpu", use_cipher=True)
@unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
def test_pipeline_cuda(self):
self._run_pipeline(self.cuda_tensors_0, self.cuda_model, "cuda:0", use_cipher=False)
self._run_pipeline(self.cuda_tensors_0, self.cuda_model, "cuda:0", use_cipher=True)
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import os
import tempfile
from unittest import TestCase
import torch
import veturboio
class TestSharedTensorLoad(TestCase):
@classmethod
def setUpClass(cls):
class MockModel(torch.nn.Module):
def __init__(self) -> None:
super().__init__()
self.linear1 = torch.nn.Linear(10, 20)
self.linear2 = torch.nn.Linear(20, 10)
self.linear3 = self.linear2
cls.model = MockModel()
def test_pipeline(self):
with tempfile.TemporaryDirectory() as tmpdirname:
filepath = os.path.join(tmpdirname, "model.safetensors")
veturboio.save_model(self.model, filepath)
loaded_tensors = veturboio.load(filepath, map_location="cpu")
state_dict = self.model.state_dict()
for key in state_dict.keys():
self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key]))
def test_save_file(self):
with tempfile.TemporaryDirectory() as tmpdirname:
filepath = os.path.join(tmpdirname, "model.safetensors")
veturboio.save_file(self.model.state_dict(), filepath, force_save_shared_tensor=True)
loaded_tensors = veturboio.load(filepath, map_location="cpu")
state_dict = self.model.state_dict()
for key in state_dict.keys():
self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key]))
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
from veturboio.io import load, save_file, save_model, save_pt
from veturboio.ops.load_utils import init_io_helper
__all__ = ["load", "save_file", "save_model", "init_io_helper", "save_pt"]
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import argparse
import torch
from veturboio import save_file
parser = argparse.ArgumentParser()
parser.add_argument("--input", "-i", type=str, required=True)
parser.add_argument("--output", "-o", type=str, required=True)
if __name__ == "__main__":
args = parser.parse_args()
print(f"convert {args.input} to {args.output}")
ext_name = args.output.split(".")[-1]
if ext_name != "safetensors":
raise ValueError("output file should be safetensors file")
state_dict = torch.load(args.input)
save_file(state_dict, args.output, force_save_shared_tensor=True)
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import os
from typing import Dict, Optional
import torch
from safetensors.torch import _remove_duplicate_names
from safetensors.torch import save_file as safetenors_save_file
from safetensors.torch import save_model as safetensors_save_model
from veturboio.loader import FasterPosixLoader, PosixLoader, SfcsClientLoader
from veturboio.ops.load_utils import IOHelper
from veturboio.safetensors import SafetensorsFile
from veturboio.saver import PosixSaver, SfcsClientSaver
from veturboio.types import FILE_PATH
def is_sfcs_path(file: FILE_PATH):
if len(file) > 7 and file[:7] == "sfcs://":
return True, file[6:]
elif os.environ.get("VETURBOIO_USE_SFCS_SDK", "0") == "1":
return True, file
else:
return False, file
def load(
file: FILE_PATH,
map_location: Optional[str] = "cpu",
enable_fast_mode: Optional[bool] = True,
num_thread: Optional[int] = 32,
helper: Optional[IOHelper] = None,
use_pinmem: Optional[bool] = False,
use_direct_io: Optional[bool] = False,
use_cipher: Optional[bool] = False,
) -> Dict:
"""Load state dict object from checkpoint file. The file can be both safetensors file and pytorch file.
If the file is safetensors file, it will be loaded by veturboio and the loading speed will be accelerated.
Args:
file (FILE_PATH): file path
map_location (str, optional): map location. Defaults to "cpu".
enable_fast_mode (bool, optional): enable fast mode. Defaults to True.
use_pinmem (bool, optional): use pin memory. Defaults to False.
num_thread (int, optional): number of threads. Defaults to 32.
use_direct_io (bool, optional): open file in direct io mode. Defaults to False.
use_cipher (bool, optional): decrypt file when use sfcs sdk. Defaults to False.
Returns:
state_dict (Dict): state dict
Examples:
```
import veturboio
state_dict = veturboio.load("model.safetensors")
```
"""
if IOHelper is None:
enable_fast_mode = False
elif helper is None:
helper = IOHelper()
use_sfcs_sdk, file = is_sfcs_path(file)
if enable_fast_mode == False:
loader = PosixLoader()
elif use_sfcs_sdk:
loader = SfcsClientLoader(
helper,
num_thread=num_thread,
use_pinmem=use_pinmem,
use_direct_io=use_direct_io,
use_cipher=use_cipher,
)
else:
loader = FasterPosixLoader(
helper,
num_thread=num_thread,
use_pinmem=use_pinmem,
use_direct_io=use_direct_io,
)
safetensors_file = SafetensorsFile(file, loader)
return safetensors_file.load(map_location=map_location)
def save_file(
state_dict: Dict[str, torch.Tensor],
file: FILE_PATH,
force_contiguous: bool = True,
force_save_shared_tensor: bool = False,
metadata: Dict[str, str] = None,
use_cipher: Optional[bool] = False,
) -> None:
"""Save state dict object to safetensors file.
Args:
state_dict (Dict): state dict
file (FILE_PATH): file path
force_contiguous (bool, optional): force contiguous. Defaults to True.
force_save_shared_tensor (bool, optional): force save shared tensor. Defaults to False.
metadata (Dict[str, str], optional): metadata. Defaults to None.
use_cipher (bool, optional): decrypt file when use sfcs sdk. Defaults to False.
Examples:
```
import torch
import veturboio
state_dict = {"weight": torch.randn(10, 10)}
veturboio.save_file(state_dict, "model.safetensors")
```
"""
use_sfcs_sdk, file = is_sfcs_path(file)
if use_sfcs_sdk:
saver = SfcsClientSaver(use_cipher=use_cipher)
else:
saver = PosixSaver()
# TODO: there are some bugs while state_dict is loaded from veturboio
if not force_save_shared_tensor:
try:
saver.save_file(state_dict, file, metadata=metadata)
except ValueError as e:
msg = str(e)
raise ValueError(msg)
else:
return
to_removes = _remove_duplicate_names(state_dict)
for kept_name, to_remove_group in to_removes.items():
for to_remove in to_remove_group:
if metadata is None:
metadata = {}
if to_remove not in metadata:
# Do not override user data
metadata[to_remove] = kept_name
del state_dict[to_remove]
if force_contiguous:
state_dict = {k: v.contiguous() for k, v in state_dict.items()}
return saver.save_file(state_dict, file, metadata=metadata)
def save_model(model: torch.nn.Module, file: FILE_PATH, use_cipher: Optional[bool] = False) -> None:
"""Save model state dict to safetensors file.
Args:
model (torch.nn.Module): model
file (FILE_PATH): file path
use_cipher (bool, optional): decrypt file when use sfcs sdk. Defaults to False.
Examples:
```
import torch
import veturboio
model = torch.nn.Linear(10, 10)
veturboio.save_model(model, "model.safetensors")
```
"""
use_sfcs_sdk, file = is_sfcs_path(file)
if use_sfcs_sdk:
saver = SfcsClientSaver(use_cipher=use_cipher)
else:
saver = PosixSaver()
return saver.save_model(model, file)
def save_pt(state_dict: Dict[str, torch.Tensor], file: FILE_PATH, use_cipher: Optional[bool] = False) -> None:
"""Save state dict object to pytorch file.
Args:
state_dict (Dict): state dict
file (FILE_PATH): file path
use_cipher (bool, optional): encrypt file when use sfcs sdk. Defaults to False.
Examples:
```
import torch
import veturboio
state_dict = {"weight": torch.randn(10, 10)}
veturboio.save_pt(state_dict, "model.pt")
```
"""
use_sfcs_sdk, file = is_sfcs_path(file)
if use_sfcs_sdk:
saver = SfcsClientSaver(use_cipher=use_cipher)
else:
saver = PosixSaver()
return saver.save_pt(state_dict, file)
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
from veturboio.loader.base_loader import BaseLoader, PosixLoader
from veturboio.loader.faster_posix_loader import FasterPosixLoader
from veturboio.loader.sfcs_client_loader import SfcsClientLoader
__all__ = ["BaseLoader", "PosixLoader", "FasterPosixLoader", "SfcsClientLoader"]
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
from typing import Any, Dict
import numpy as np
import torch
from numpy import ndarray
# from veturboio.safetensors import SafetensorsFile
from veturboio.types import FILE_PATH
SAFETENSORS_FILE_MAGIC_NUM = 123
BUF_ALIGN_SIZE = 4096
class BaseLoader:
def __init__(self, method: str) -> None:
self.method = method
def load_to_bytes_array(self, file: FILE_PATH, offset: int, count: int) -> ndarray:
raise NotImplementedError
def load_safetensors(self, safetensors_file: Any, map_location: str = "cpu") -> Dict[str, torch.Tensor]:
raise NotImplementedError
def init_aligned_tensor(self, device, device_id: int, file_size, base_offset: int) -> torch.Tensor:
if device_id != -1:
try:
total_tensor = torch.empty(file_size - base_offset, dtype=torch.uint8, device=device)
except RuntimeError as e:
msg = str(e)
raise RuntimeError(msg)
else:
array = np.empty(file_size - base_offset + BUF_ALIGN_SIZE, dtype=np.uint8)
offset1 = array.ctypes.data % BUF_ALIGN_SIZE
offset2 = base_offset % BUF_ALIGN_SIZE
if offset1 > offset2:
align = BUF_ALIGN_SIZE - offset1 + offset2
else:
align = offset2 - offset1
sub_array = array[align : align + file_size - base_offset].view(dtype=np.uint8)
total_tensor = torch.from_numpy(sub_array)
return total_tensor
class PosixLoader(BaseLoader):
def __init__(self) -> None:
super().__init__(method="posix")
def load_to_bytes_array(self, file: FILE_PATH, offset: int, count: int) -> ndarray:
return np.fromfile(file, dtype=np.byte, offset=offset, count=count)
def load_safetensors(self, safetensors_file: Any, map_location: str = "cpu") -> Dict[str, torch.Tensor]:
state_dict = {}
base_offset = safetensors_file.tensor_offset
device = torch.device(map_location)
for tensor_meta in safetensors_file.meta.values():
tensor_bytes = np.memmap(
safetensors_file.file,
dtype=np.byte,
mode="r",
offset=base_offset + tensor_meta.data_offsets[0],
shape=tensor_meta.data_offsets[1] - tensor_meta.data_offsets[0],
)
tensor = torch.frombuffer(tensor_bytes, dtype=tensor_meta.dtype)
tensor = tensor.view(tensor_meta.shape)
if device.type == "cuda":
state_dict[tensor_meta.name] = tensor.pin_memory().to(device=device, non_blocking=True)
else:
state_dict[tensor_meta.name] = tensor
return state_dict
def load_pt(self, file: FILE_PATH, map_location: str = "cpu") -> Dict[str, torch.Tensor]:
return torch.load(file, map_location=map_location)
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import os
from typing import Dict
import torch
from veturboio.ops.load_utils import IOHelper, load_file_to_tensor
from veturboio.safetensors import SafetensorsFile
from veturboio.types import FILE_PATH
from .base_loader import PosixLoader
class FasterPosixLoader(PosixLoader):
def __init__(
self,
helper: IOHelper,
num_thread: int = 32,
use_pinmem: bool = False,
use_direct_io: bool = False,
) -> None:
super().__init__()
self.helper = helper
self.num_thread = num_thread
self.use_pinmem = use_pinmem
self.use_direct_io = use_direct_io
def load_safetensors(
self, safetensors_file: SafetensorsFile, map_location: str = "cpu"
) -> Dict[str, torch.Tensor]:
file_size = os.path.getsize(safetensors_file.file)
base_offset = safetensors_file.tensor_offset
device = torch.device(map_location)
if device.type == "cuda":
device_id = device.index if device.index is not None else torch.cuda.current_device()
else:
device_id = -1
total_tensor = self.init_aligned_tensor(device, device_id, file_size, base_offset)
load_file_to_tensor(
file_path=safetensors_file.file,
total_tensor=total_tensor,
sample_tensor=torch.ones([], dtype=torch.uint8),
offset=base_offset,
helper=self.helper,
device_id=device_id,
num_thread=self.num_thread,
use_pinmem=self.use_pinmem,
use_sfcs_sdk=False,
use_direct_io=self.use_direct_io,
)
return SafetensorsFile.split_tensor_to_state_dict(total_tensor, safetensors_file)
def load_pt(self, file: FILE_PATH, map_location: str = "cpu") -> Dict[str, torch.Tensor]:
return torch.load(file, map_location=map_location)
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import os
from io import BytesIO
from typing import Dict
import numpy as np
import torch
from numpy import ndarray
from veturboio.loader.base_loader import BaseLoader
from veturboio.ops.cipher import CipherInfo
from veturboio.ops.load_utils import IOHelper, load_file_to_tensor
from veturboio.ops.sfcs_utils import init_sfcs_conf, sfcs_get_file_size, sfcs_read_file
from veturboio.safetensors import SafetensorsFile
from veturboio.types import FILE_PATH
class SfcsClientLoader(BaseLoader):
def __init__(
self,
helper: IOHelper,
num_thread: int = 32,
use_pinmem: bool = False,
use_direct_io: bool = False,
use_cipher: bool = False,
) -> None:
super().__init__(method="client")
self.helper = helper
self.num_thread = num_thread
self.use_pinmem = use_pinmem
self.use_direct_io = use_direct_io
use_cipher = use_cipher or os.environ.get("VETURBOIO_USE_CIPHER", "0") == "1"
self.cipher_info = CipherInfo(use_cipher)
init_sfcs_conf()
def load_to_bytes_array(self, file: FILE_PATH, offset: int, count: int) -> ndarray:
candidate = np.empty([count], dtype=np.byte)
sfcs_read_file(
file, candidate, length=count, offset=offset, num_thread=self.num_thread, cipher_info=self.cipher_info
)
return candidate
def load_safetensors(
self, safetensors_file: SafetensorsFile, map_location: str = "cpu"
) -> Dict[str, torch.Tensor]:
file_size = sfcs_get_file_size(safetensors_file.file)
base_offset = safetensors_file.tensor_offset
device = torch.device(map_location)
if device.type == "cuda":
device_id = device.index if device.index is not None else torch.cuda.current_device()
else:
device_id = -1
total_tensor = self.init_aligned_tensor(device, device_id, file_size, base_offset)
load_file_to_tensor(
file_path=safetensors_file.file,
total_tensor=total_tensor,
sample_tensor=torch.ones([], dtype=torch.uint8),
offset=base_offset,
helper=self.helper,
device_id=device_id,
num_thread=self.num_thread,
use_pinmem=self.use_pinmem,
use_sfcs_sdk=True,
use_direct_io=self.use_direct_io,
cipher_info=self.cipher_info,
)
return SafetensorsFile.split_tensor_to_state_dict(total_tensor, safetensors_file)
def load_pt(self, file: FILE_PATH, map_location: str = "cpu") -> Dict[str, torch.Tensor]:
file_size = sfcs_get_file_size(file)
file_bytes = self.load_to_bytes_array(file, offset=0, count=file_size).tobytes()
return torch.load(BytesIO(file_bytes), map_location=map_location)
'''
Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import base64
import os
import threading
import urllib.parse
from datetime import datetime, timezone
from time import sleep
from typing import Optional, Tuple
import numpy as np
import requests_unixsocket
from loguru import logger
class DataPipeClient:
DATAPIPE_SOCKET_PATH = os.getenv('DATAPIPE_SOCKET_PATH', '/finetune/data/datapipe.sock')
ENCRYPT_HEADER = {'X-Datapipe-Task-Type': 'encrypt-key'}
SFCS_STS_HEADER = {'X-Datapipe-Task-Type': 'sfcs-sts'}
def __init__(self, retry: int = 3, interval: float = 0.5) -> None:
if os.path.exists(self.DATAPIPE_SOCKET_PATH):
self.url = 'http+unix://' + urllib.parse.quote(self.DATAPIPE_SOCKET_PATH, safe='')
self.session = requests_unixsocket.Session()
self.retry = retry
self.interval = interval
else:
self.url = None
self.session = None
def get_data_key_iv(self) -> Tuple[Optional[str], Optional[str]]:
if not self.session:
logger.warning('Datapipe client initialization failed')
return None, None
re = 0
while True:
try:
response = self.session.get(self.url, headers=self.ENCRYPT_HEADER)
if response.status_code == 200:
res = response.json()
return res['Key'], res['IV']
except Exception as e:
logger.warning(e)
if re > self.retry:
break
sleep(self.interval)
re += 1
return None, None
def get_sfcs_ak_sk_st(self) -> Optional[dict]:
if not self.session:
logger.warning('Datapipe client initialization failed')
return None
re = 0
while True:
try:
response = self.session.get(self.url, headers=self.SFCS_STS_HEADER)
if response.status_code == 200:
return response.json()
except Exception as e:
logger.warning(e)
if re > self.retry:
break
sleep(self.interval)
re += 1
return None
class CipherInfo:
ENV_KEY = 'VETUROIO_KEY'
ENV_IV = 'VETUROIO_IV'
def __init__(self, use_cipher: bool) -> None:
if use_cipher:
# first try to get key and iv from datapipe
client = DataPipeClient()
if client.session:
try:
key_b64, iv_b64 = client.get_data_key_iv()
self.key, self.iv = self.convert_key_iv(key_b64, iv_b64)
self.use_cipher = True
logger.info('get cipher info from datapipe socket')
return
except Exception as e:
logger.warning(e)
# then try to get key and iv from env
env_key = os.getenv(self.ENV_KEY)
env_iv = os.getenv(self.ENV_IV)
if env_key and env_iv:
try:
self.key, self.iv = self.convert_key_iv(env_key, env_iv)
self.use_cipher = True
logger.info('get cipher info from env')
return
except Exception as e:
logger.warning(e)
logger.warning('fail to get key and iv, fallback to no cipher')
self.use_cipher = False
self.key = np.frombuffer(b'\x00' * 16, dtype=np.byte)
self.iv = np.frombuffer(b'\x00' * 16, dtype=np.byte)
@staticmethod
def convert_key_iv(key_b64: str, iv_b64: str) -> Tuple[np.ndarray, np.ndarray]:
key_b = base64.b64decode(key_b64, validate=True)
iv_b = base64.b64decode(iv_b64, validate=True)
if len(key_b) != 16 or len(iv_b) != 16:
raise Exception('length of key or iv is not 16')
key = np.frombuffer(key_b, dtype=np.byte)
iv = np.frombuffer(iv_b, dtype=np.byte)
return key, iv
/*
* Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _CLOUDFS_LIBCFS3_CLIENT_CFS_H_
#define _CLOUDFS_LIBCFS3_CLIENT_CFS_H_
#include <errno.h> /* for EINTERNAL, etc. */
#include <fcntl.h> /* for O_RDONLY, O_WRONLY */
#include <stdint.h> /* for uint64_t, etc. */
#include <time.h> /* for time_t */
#include <stdbool.h>
#ifndef O_RDONLY
#define O_RDONLY 0x01
#endif
#ifndef O_WRONLY
#define O_WRONLY 0x02
#endif
#ifndef O_ASYNC
#define O_ASYNC 0x04
#endif
#ifndef O_LOCAL
#define O_LOCAL 0x01
#endif
#ifndef EINTERNAL
#define EINTERNAL 255
#endif
/** All APIs set errno to meaningful values */
#ifdef __cplusplus
extern "C"
{
#endif
/**
* Some utility decls used in libcfs.
*/
typedef int32_t tSize; /// size of data for read/write io ops
typedef time_t tTime; /// time type in seconds
typedef int64_t tLong; // size of data for read/write io ops
typedef int64_t tOffset; /// offset within the file
typedef uint16_t tPort; /// port
typedef enum tCfsObjectKind
{
kCfsObjectKindFile = 'F',
kCfsObjectKindDirectory = 'D',
} tCfsObjectKind;
typedef enum tCfsObjectAccStatus
{
kUnknown = 0,
kFileLocal = 1,
kFileToBePersisted = 2,
kFilePersisted = 3,
kDirLocal = 4,
kDirIncomplete = 5,
kDirSynced = 6,
} tCfsObjectAccStatus;
struct CfsFileSystemInternalWrapper;
typedef struct CfsFileSystemInternalWrapper *cfsFS;
struct CfsFileInternalWrapper;
typedef struct CfsFileInternalWrapper *cfsFile;
struct cfsBuilder;
/**
* cfsGetLastError - Return error information of last failed operation.
*
* @return A not NULL const string point of last error information.
* Caller can only read this message and keep it unchanged. No need to free it.
* If last operation finished successfully, the returned message is undefined.
*/
const char *cfsGetLastError();
/**
* cfsFileIsOpenForRead - Determine if a file is open for read.
*
* @param file The CFS file
* @return 1 if the file is open for read; 0 otherwise
*/
int cfsFileIsOpenForRead(cfsFile file);
/**
* cfsFileIsOpenForWrite - Determine if a file is open for write.
*
* @param file The CFS file
* @return 1 if the file is open for write; 0 otherwise
*/
int cfsFileIsOpenForWrite(cfsFile file);
/**
* cfsConnectAsUser - Connect to a cfs file system as a specific user
*
* Connect to the cfs.
*
* @param nn The NameNode. See cfsBuilderSetNameNode for details.
* @param port The port on which the server is listening.
* @param user the user name (this is hadoop domain user). Or NULL is equivelant to hcfsConnect(host, port)
* @return Returns a handle to the filesystem or NULL on error.
* @deprecated Use cfsBuilderConnect instead.
*/
cfsFS cfsConnectAsUser(const char *nn, tPort port, const char *user);
/**
* cfsConnect - Connect to a cfs file system.
*
* @param nn The NameNode. See cfsBuilderSetNameNode for details.
* @param port The port on which the server is listening.
* @return Returns a handle to the filesystem or NULL on error.
* @deprecated Use cfsBuilderConnect instead.
*/
cfsFS cfsConnect(const char *nn, tPort port);
/**
* cfsConnectAsUserNewInstance - Forces a new instance to be created
*
* @param nn The NameNode. See cfsBuilderSetNameNode for details.
* @param port The port on which the server is listening.
* @param user The user name to use when connecting
* @return Returns a handle to the filesystem or NULL on error.
* @deprecated Use cfsBuilderConnect instead.
*/
cfsFS cfsConnectAsUserNewInstance(const char *nn, tPort port, const char *user);
/**
* cfsConnectNewInstance - Forces a new instance to be created
*
* @param nn The NameNode. See cfsBuilderSetNameNode for details.
* @param port The port on which the server is listening.
* @return Returns a handle to the filesystem or NULL on error.
* @deprecated Use cfsBuilderConnect instead.
*/
cfsFS cfsConnectNewInstance(const char *nn, tPort port);
/**
* cfsBuilderConnect - Connect to CFS using the parameters defined by the builder.
*
* @param bld The CFS builder
* @param effective_user The user name.
* @return Returns a handle to the filesystem, or NULL on error.
*/
cfsFS cfsBuilderConnect(struct cfsBuilder *bld, const char *effective_user);
/**
* cfsNewBuilder - Create an CFS builder.
*
* @return The CFS builder, or NULL on error.
*/
struct cfsBuilder *cfsNewBuilder(void);
/**
* cfsBuilderSetForceNewInstance - Do nothing, we always create a new instance
*
* @param bld The CFS builder
*/
void cfsBuilderSetForceNewInstance(struct cfsBuilder *bld);
/**
* cfsBuilderSetNameNode - Set the CFS NameNode to connect to.
*
* @param bld The CFS builder
* @param nn The NameNode to use.
*
* If the string given is 'default', the default NameNode
* configuration will be used (from the XML configuration files)
*
* If NULL is given, a LocalFileSystem will be created.
*
* If the string starts with a protocol type such as file:// or
* cfs://, this protocol type will be used. If not, the
* cfs:// protocol type will be used.
*
* You may specify a NameNode port in the usual way by
* passing a string of the format cfs://<hostname>:<port>.
* Alternately, you may set the port with
* cfsBuilderSetNameNodePort. However, you must not pass the
* port in two different ways.
*/
void cfsBuilderSetNameNode(struct cfsBuilder *bld, const char *nn);
/**
* cfsBuilderSetNameNodePort - Set the port of the CFS NameNode to connect to.
*
* @param bld The CFS builder
* @param port The port.
*/
void cfsBuilderSetNameNodePort(struct cfsBuilder *bld, tPort port);
/**
* cfsBuilderSetUserName - Set the username to use when connecting to the CFS cluster.
*
* @param bld The CFS builder
* @param userName The user name. The string will be shallow-copied.
*/
void cfsBuilderSetUserName(struct cfsBuilder *bld, const char *userName);
/**
* cfsBuilderSetKerbTicketCachePath - Set the path to the Kerberos ticket
* cache to use when connecting to
* the CFS cluster.
*
* @param bld The CFS builder
* @param kerbTicketCachePath The Kerberos ticket cache path. The string
* will be shallow-copied.
*/
void cfsBuilderSetKerbTicketCachePath(struct cfsBuilder *bld, const char *kerbTicketCachePath);
/**
* cfsBuilderSetToken - Set the token used to authenticate
*
* @param bld The CFS builder
* @param token The token used to authenticate
*/
void cfsBuilderSetToken(struct cfsBuilder *bld, const char *token);
/**
* cfsBuilderSetToken - Set the GDPR token to authenticate
*
* @param fs The configured filesystem handle.
* @param token The custom GDPR token
*/
int cfsSetCustomToken(cfsFS fs, const char *token);
/**
* cfsFreeBuilder - Free an CFS builder.
*
* It is normally not necessary to call this function since
* cfsBuilderConnect frees the builder.
*
* @param bld The CFS builder
*/
void cfsFreeBuilder(struct cfsBuilder *bld);
/**
* cfsBuilderConfSetStr - Set a configuration string for an CfsBuilder.
*
* @param bld The CFS builder
* @param key The key to set.
* @param val The value, or NULL to set no value.
* This will be shallow-copied. You are responsible for
* ensuring that it remains valid until the builder is
* freed.
*
* @return 0 on success; nonzero error code otherwise.
*/
int cfsBuilderConfSetStr(struct cfsBuilder *bld, const char *key, const char *val);
/**
* cfsConfGetStr - Get a configuration string.
*
* @param key The key to find
* @param val (out param) The value. This will be set to NULL if the
* key isn't found. You must free this string with
* cfsConfStrFree.
*
* @return 0 on success; nonzero error code otherwise.
* Failure to find the key is not an error.
*/
int cfsConfGetStr(const char *key, char **val);
/**
* cfsConfGetInt - Get a configuration integer.
*
* @param key The key to find
* @param val (out param) The value. This will NOT be changed if the
* key isn't found.
*
* @return 0 on success; nonzero error code otherwise.
* Failure to find the key is not an error.
*/
int cfsConfGetInt(const char *key, int32_t *val);
/**
* cfsConfStrFree - Free a configuration string found with cfsConfGetStr.
*
* @param val A configuration string obtained from cfsConfGetStr
*/
void cfsConfStrFree(char *val);
/**
* cfsDisconnect - Disconnect from the cfs file system.
*
* Disconnect from cfs.
*
* @param fs The configured filesystem handle.
* @return Returns 0 on success, -1 on error.
* Even if there is an error, the resources associated with the
* cfsFS will be freed.
* @deprecated Use cfsBuilderConnect instead.
*/
int cfsDisconnect(cfsFS fs);
/**
* cfsOpenFile - Open a cfs file in given mode.
*
* @param fs The configured filesystem handle.
* @param path The full path to the file.
* @param flags an | of bits/fcntl.h file flags - supported flags are O_RDONLY, O_WRONLY (meaning create or
* overwrite i.e., implies O_TRUNCAT), O_WRONLY|O_APPEND and O_SYNC. Other flags are generally ignored other than
* (O_RDWR || (O_EXCL & O_CREAT)) which return NULL and set errno equal ENOTSUP.
* @param bufferSize Size of buffer for read/write - pass 0 if you want
* to use the default configured values.
* @param replication Block replication - pass 0 if you want to use
* the default configured values.
* @param blocksize Size of block - pass 0 if you want to use the
* default configured values.
* @return Returns the handle to the open file or NULL on error.
*/
cfsFile cfsOpenFile(cfsFS fs, const char *path, int flags, int bufferSize, short replication, tOffset blocksize);
/**
* cfsOpenFileV2 - Open a cfs file for Read (with ByteCool support).
*
* @param fs The configured filesystem handle.
* @param path The full path to the file.
* @param bufferSize Size of buffer for read/write - pass 0 if you want
* to use the default configured values.
* @param objectNamePtr object name for ByteCool - pass NULL to disable
* ByteCool support.
* @return Returns the handle to the open file or NULL on error.
*/
cfsFile cfsOpenFileV2(cfsFS fs, const char *path, int bufferSize, char **objectNamePtr);
/**
* cfsAppendFileV2 - Open a cfs file for Append
*
* @param fs The configured filesystem handle.
* @param path The full path to the file.
* @param bufferSize Size of buffer for read/write - pass 0 if you want
* to use the default configured values.
* @return Returns the handle to the open file or NULL on error.
*/
cfsFile cfsAppendFileV2(cfsFS fs, const char *path, int bufferSize);
/**
* cfsOpenFileACC - Open a cfs file in given mode. For ACC FS only.
*
* @param fs The configured filesystem handle.
* @param path The full path to the file.
* @param flags an | of bits/fcntl.h file flags - supported flags are O_RDONLY, O_WRONLY (meaning create or
* overwrite i.e., implies O_TRUNCAT), O_WRONLY|O_APPEND and O_SYNC. Other flags are generally ignored other than
* (O_RDWR || (O_EXCL & O_CREAT)) which return NULL and set errno equal ENOTSUP.
* @param mode File mode for create.
* @param createParent if the parent does not exist, create it.
* @param isAppendable Specify appendable object for ACC mode.
* @return Returns the handle to the open file or NULL on error.
*/
cfsFile cfsOpenFileAcc(cfsFS fs, const char *path, int flags, mode_t mode, int createParent, int isAppendable);
/**
* cfsCreateFileV2 - Create a cfs file
*
* @param fs The configured filesystem handle.
* @param path The full path to the file.
* @param overwrite if a file with this name already exists, then if 1,
* the file will be overwritten, and if 0 an error will be thrown.
* @param bufferSize Size of buffer for read/write - pass 0 if you want
* to use the default configured values.
* @param replication Block replication - pass 0 if you want to use
* the default configured values.
* @param blockSize Size of block - pass 0 if you want to use the
* default configured values.
* @return Returns the handle to the open file or NULL on error.
*/
cfsFile cfsCreateFileV2(cfsFS fs, const char *path, int overwrite, int bufferSize, short replication,
tSize blockSize);
/**
* cfsCloseFile - Close an open file.
*
* @param fs The configured filesystem handle.
* @param file The file handle.
* @return Returns 0 on success, -1 on error.
* On error, errno will be set appropriately.
* If the cfs file was valid, the memory associated with it will
* be freed at the end of this call, even if there was an I/O
* error.
*/
int cfsCloseFile(cfsFS fs, cfsFile file);
/**
* cfsExists - Checks if a given path exsits on the filesystem.
* Use cfsExistsExtended instead if possible.
*
* @param fs The configured filesystem handle.
* @param path The path to look for
* @return Returns 0 on success, -1 on error.
*/
int cfsExists(cfsFS fs, const char *path);
/**
* cfsExistsExtended - Checks if a given path exsits on the filesystem
*
* @param fs The configured filesystem handle.
* @param path The path to look for
* @return Returns 1 on success, 0 if file does not exist and -1 on error.
*/
int cfsExistsExtended(cfsFS fs, const char *path);
/**
* cfsSeek - Seek to given offset in file.
*
* This works only for files opened in read-only mode.
*
* @param fs The configured filesystem handle.
* @param file The file handle.
* @param desiredPos Offset into the file to seek into.
* @return Returns 0 on success, -1 on error.
*/
int cfsSeek(cfsFS fs, cfsFile file, tOffset desiredPos);
/**
* cfsTell - Get the current offset in the file, in bytes.
*
* @param fs The configured filesystem handle.
* @param file The file handle.
* @return Current offset, -1 on error.
*/
tOffset cfsTell(cfsFS fs, cfsFile file);
/**
* cfsRead - Read data from an open file.
*
* @param fs The configured filesystem handle.
* @param file The file handle.
* @param buffer The buffer to copy read bytes into.
* @param length The length of the buffer.
* @return On success, a positive number indicating how many bytes
* were read.
* On end-of-file, 0.
* On error, -1. Errno will be set to the error code.
* Just like the POSIX read function, cfsRead will return -1
* and set errno to EINTR if data is temporarily unavailable,
* but we are not yet at the end of the file.
*/
int64_t cfsRead(cfsFS fs, cfsFile file, void *buffer, uint64_t length);
/**
* cfsPread - Positional read of data from an open file.
*
* @param fs The configured filesystem handle.
* @param file The file handle.
* @param position Position from which to read
* @param buffer The buffer to copy read bytes into.
* @param length The length of the buffer.
* @return See cfsRead
*/
tSize cfsPread(cfsFS fs, cfsFile file, tOffset position, void *buffer, tSize length);
/**
* cfsWrite - Write data into an open file.
*
* @param fs The configured filesystem handle.
* @param file The file handle.
* @param buffer The data.
* @param length The nymber of bytes to write.
* @return Returns the number of bytes written, -1 on error.
*/
int64_t cfsWrite(cfsFS fs, cfsFile file, const void *buffer, uint64_t length);
/**
* cfsWrite - Flush the data.
*
* @param fs The configured filesystem handle.
* @param file The file handle.
* @return Returns 0 on success, -1 on error.
*/
int cfsFlush(cfsFS fs, cfsFile file);
/**
* cfsHFlush - Flush out the data in client's user buffer. After the
* return of this call, new readers will see the data.
*
* @param fs configured filesystem handle
* @param file file handle
* @return 0 on success, -1 on error and sets errno
*/
int cfsHFlush(cfsFS fs, cfsFile file);
/**
* cfsSync - Flush out and sync the data in client's user buffer. After the
* return of this call, new readers will see the data.
*
* @param fs configured filesystem handle
* @param file file handle
* @return 0 on success, -1 on error and sets errno
*/
int cfsSync(cfsFS fs, cfsFile file);
/**
* cfsHSync - Similar to posix fsync, Flush out the data in client's
* user buffer. all the way to the disk device (but the disk may have
* it in its cache).
*
* @param fs configured filesystem handle
* @param file file handle
* @return 0 on success, -1 on error and sets errno
*/
int cfsHSync(cfsFS fs, cfsFile file);
/**
* cfsHSyncAndUpdateLength - Similar to cfsHSync, but also update file
* length in NN.
*
* @param fs configured filesystem handle
* @param file file handle
* @return 0 on success, -1 on error and sets errno
*/
int cfsHSyncAndUpdateLength(cfsFS fs, cfsFile file);
/**
* cfsIsFileClosed - Check is file closed
*
* @param fs The configured filesystem handle.
* @param path The path to look for
* @return Returns 0 on success, -1 on error and sets errno.
errno = 0 means file not closed.
*/
int cfsIsFileClosed(cfsFS fs, const char *path);
/**
* cfsAvailable - Number of bytes that can be read from this
* input stream without blocking.
*
* @param fs The configured filesystem handle.
* @param file The file handle.
* @return Returns available bytes; -1 on error.
*/
int cfsAvailable(cfsFS fs, cfsFile file);
/**
* cfsCopy - Copy file from one filesystem to another.
*
* @param srcFS The handle to source filesystem.
* @param src The path of source file.
* @param dstFS The handle to destination filesystem.
* @param dst The path of destination file.
* @return Returns 0 on success, -1 on error.
*/
int cfsCopy(cfsFS srcFS, const char *src, cfsFS dstFS, const char *dst);
/**
* cfsMove - Move file from one filesystem to another.
*
* @param srcFS The handle to source filesystem.
* @param src The path of source file.
* @param dstFS The handle to destination filesystem.
* @param dst The path of destination file.
* @return Returns 0 on success, -1 on error.
*/
int cfsMove(cfsFS srcFS, const char *src, cfsFS dstFS, const char *dst);
/**
* cfsDelete - Delete file.
*
* @param fs The configured filesystem handle.
* @param path The path of the file.
* @param recursive if path is a directory and set to
* non-zero, the directory is deleted else throws an exception. In
* case of a file the recursive argument is irrelevant.
* @return Returns 0 on success, -1 on error.
*/
int cfsDelete(cfsFS fs, const char *path, int recursive);
/**
* cfsRename - Rename file.
*
* @param fs The configured filesystem handle.
* @param oldPath The path of the source file.
* @param newPath The path of the destination file.
* @return Returns 0 on success, -1 on error.
*/
int cfsRename(cfsFS fs, const char *oldPath, const char *newPath);
/**
* cfsRename - Rename file. cfsRename2 allows dst file to be overwrited, while cfsRename doesn't allow.
*
* @param fs The configured filesystem handle.
* @param oldPath The path of the source file.
* @param newPath The path of the destination file.
* @return Returns 0 on success, -1 on error.
*/
int cfsRename2(cfsFS fs, const char *oldPath, const char *newPath);
/**
* cfsConcat - Concatenate (move) the blocks in a list of source
* files into a single file deleting the source files. Source
* files must all have the same block size and replicationand all
* but the last source file must be an integer number of full
* blocks long. The source files are deleted on successful
* completion.
*
* @param fs The configured filesystem handle.
* @param trg The path of target (resulting) file
* @param scrs A list of paths to source files
* @return Returns 0 on success, -1 on error.
*/
int cfsConcat(cfsFS fs, const char *trg, const char **srcs);
/**
* cfsGetWorkingDirectory - Get the current working directory for
* the given filesystem.
*
* @param fs The configured filesystem handle.
* @param buffer The user-buffer to copy path of cwd into.
* @param bufferSize The length of user-buffer.
* @return Returns buffer, NULL on error.
*/
char *cfsGetWorkingDirectory(cfsFS fs, char *buffer, size_t bufferSize);
/**
* cfsSetWorkingDirectory - Set the working directory. All relative
* paths will be resolved relative to it.
*
* @param fs The configured filesystem handle.
* @param path The path of the new 'cwd'.
* @return Returns 0 on success, -1 on error.
*/
int cfsSetWorkingDirectory(cfsFS fs, const char *path);
/**
* cfsCreateDirectory - Make the given file and all non-existent
* parents into directories.
*
* @param fs The configured filesystem handle.
* @param path The path of the directory.
* @return Returns 0 on success, -1 on error.
*/
int cfsCreateDirectory(cfsFS fs, const char *path);
/**
* cfsCreateDirectoryEx - Make the given file with extended options
*
* @param fs The configured filesystem handle.
* @param path The path of the directory.
* @param mode The permissions for created file and directories.
* @param createParents Controls whether to create all non-existent parent directories or not
* @return Returns 0 on success, -1 on error.
*/
int cfsCreateDirectoryEx(cfsFS fs, const char *path, short mode, int createParents);
/**
* cfsSetReplication - Set the replication of the specified
* file to the supplied value
*
* @param fs The configured filesystem handle.
* @param path The path of the file.
* @param replication Block replication.
* @return Returns 0 on success, -1 on error.
*/
int cfsSetReplication(cfsFS fs, const char *path, int16_t replication);
/**
* cfsEncryptionZoneInfo- Information about an encryption zone.
*/
typedef struct
{
int mSuite; /* the suite of encryption zone */
int mCryptoProtocolVersion; /* the version of crypto protocol */
int64_t mId; /* the id of encryption zone */
char *mPath; /* the path of encryption zone */
char *mKeyName; /* the key name of encryption zone */
} cfsEncryptionZoneInfo;
/**
* cfsEncryptionFileInfo - Information about an encryption file/directory.
*/
typedef struct
{
int mSuite; /* the suite of encryption file/directory */
int mCryptoProtocolVersion; /* the version of crypto protocol */
char *mKey; /* the key of encryption file/directory */
char *mKeyName; /* the key name of encryption file/directory */
char *mIv; /* the iv of encryption file/directory */
char *mEzKeyVersionName; /* the version encryption file/directory */
} cfsEncryptionFileInfo;
typedef struct cfsBlockLocation
{
int numOfNodes; // Number of Datanodes which keep the block
bool isCached; // Replica be cached on Datanodes
char **hosts; // Datanode hostnames
char **names; // Datanode IP:xferPort for accessing the block
char **topologyPaths; // Full path name in network topology
tLong offset; // Offset of the block in the file
tLong length; // block length, may be 0 for the last block
tSize corrupt; // If the block is corrupt
} cfsBlockLocation;
/**
* cfsFileInfo - Information about a file/directory.
*/
typedef struct
{
tCfsObjectKind mKind; /* file or directory */
char *mName; /* the name of the file */
tTime mLastMod; /* the last modification time for the file in seconds */
tOffset mSize; /* the size of the file in bytes */
short mReplication; /* the count of replicas */
tOffset mBlockSize; /* the block size for the file */
char *mOwner; /* the owner of the file */
char *mGroup; /* the group associated with the file */
short mPermissions; /* the permissions associated with the file */
tTime mLastAccess; /* the last access time for the file in seconds */
cfsEncryptionFileInfo *mCfsEncryptionFileInfo; /* the encryption info of the file/directory */
cfsBlockLocation *mBlocks; /* LocatedBlock gives information about a block and its location. */
int mNumOfBlocks; /* Number of LocatedBlock */
tCfsObjectAccStatus mAccStatus; /* File or directory status in ACC mode */
} cfsFileInfo;
/**
* cfsListDirectory - Get list of files/directories for a given
* directory-path. cfsFreeFileInfo should be called to deallocate memory.
*
* @param fs The configured filesystem handle.
* @param path The path of the directory.
* @param numEntries Set to the number of files/directories in path.
* @param startAfter The file/directory path that begin to list in dictionary order.
* When you want to list partition directory entries, remind to
* pass hasRemaining and startAfter in the same time.
* Pass Null to list all directory items
* @param hasRemaining Set to 0 if there is no directory entries remained.
* Set to 1 if there are directory entries remained.
* @return Returns a dynamically-allocated array of cfsFileInfo
* objects; NULL on error. Specially When directory is empty,
* return pointer to an address with no memry.
*/
cfsFileInfo *cfsListDirectory(cfsFS fs, const char *path, int *numEntries, const char *startAfter,
int *hasRemaining);
/**
* cfsListPath - Get list of files/directories with block locations
*
* @param needLocation if the FileStatus should contain block locations.
*/
cfsFileInfo *cfsListPath(cfsFS fs, const char *path, int *numEntries, const char *startAfter, int *hasRemaining,
bool needLocation);
/**
* cfsGetPathInfo - Get information about a path as a (dynamically
* allocated) single cfsFileInfo struct. cfsFreeFileInfo should be
* called when the pointer is no longer needed.
*
* @param fs The configured filesystem handle.
* @param path The path of the file.
* @return Returns a dynamically-allocated cfsFileInfo object;
* NULL on error.
*/
cfsFileInfo *cfsGetPathInfo(cfsFS fs, const char *path);
/**
* cfsFileIsEncrypted: determine if a file is encrypted based on its
* cfsFileInfo.
*
* @param cfsFileInfo The array of dynamically-allocated cfsFileInfo
* objects.
* @return -1 if there was an error (errno will be set), 0 if the file is
* not encrypted, 1 if the file is encrypted.
*/
int cfsFileIsEncrypted(cfsFileInfo *cfsFileInfo);
/**
* cfsFreeFileInfo - Free up the cfsFileInfo array (including fields)
*
* @param infos The array of dynamically-allocated cfsFileInfo
* objects.
* @param numEntries The size of the array.
*/
void cfsFreeFileInfo(cfsFileInfo *infos, int numEntries);
/**
* cfsFreeEncryptionZoneInfo - Free up the cfsEncryptionZoneInfo array (including fields)
*
* @param infos The array of dynamically-allocated cfsEncryptionZoneInfo
* objects.
* @param numEntries The size of the array.
*/
void cfsFreeEncryptionZoneInfo(cfsEncryptionZoneInfo *infos, int numEntries);
/**
* cfsGetHosts - Get hostnames where a particular block (determined by
* pos & blocksize) of a file is stored. The last element in the array
* is NULL. Due to replication, a single block could be present on
* multiple hosts.
*
* @param fs The configured filesystem handle.
* @param path The path of the file.
* @param start The start of the block.
* @param length The length of the block.
* @return Returns a dynamically-allocated 2-d array of blocks-hosts;
* NULL on error.
*/
char ***cfsGetHosts(cfsFS fs, const char *path, tOffset start, tOffset length);
/**
* cfsFreeHosts - Free up the structure returned by cfsGetHosts
*
* @param blockHosts The array of dynamically-allocated blocks-hosts.
*/
void cfsFreeHosts(char ***blockHosts);
/**
* cfsGetDefaultBlockSize - Get the default blocksize.
*
* @param fs The configured filesystem handle.
* @deprecated Use cfsGetDefaultBlockSizeAtPath instead.
*
* @return Returns the default blocksize, or -1 on error.
*/
tOffset cfsGetDefaultBlockSize(cfsFS fs);
/**
* cfsGetCapacity - Return the raw capacity of the filesystem.
*
* @param fs The configured filesystem handle.
* @return Returns the raw-capacity; -1 on error.
*/
tOffset cfsGetCapacity(cfsFS fs);
/**
* cfsGetUsed - Return the total raw size of all files in the filesystem.
*
* @param fs The configured filesystem handle.
* @return Returns the total-size; -1 on error.
*/
tOffset cfsGetUsed(cfsFS fs);
/**
* Change the user and/or group of a file or directory.
*
* @param fs The configured filesystem handle.
* @param path the path to the file or directory
* @param owner User string. Set to NULL for 'no change'
* @param group Group string. Set to NULL for 'no change'
* @return 0 on success else -1
*/
int cfsChown(cfsFS fs, const char *path, const char *owner, const char *group);
/**
* cfsChmod
*
* @param fs The configured filesystem handle.
* @param path the path to the file or directory
* @param mode the bitmask to set it to
* @return 0 on success else -1
*/
int cfsChmod(cfsFS fs, const char *path, short mode);
/**
* cfsUtime
*
* @param fs The configured filesystem handle.
* @param path the path to the file or directory
* @param mtime new modification time or -1 for no change
* @param atime new access time or -1 for no change
* @return 0 on success else -1
*/
int cfsUtime(cfsFS fs, const char *path, tTime mtime, tTime atime);
/**
* cfsTruncate - Truncate the file in the indicated path to the indicated size.
*
* @param fs The configured filesystem handle.
* @param path The path to the file.
* @param pos The position the file will be truncated to.
* @param shouldWait output value, true if and client does not need to wait for block recovery,
* false if client needs to wait for block recovery.
* @return 0 on success else -1
*/
int cfsTruncate(cfsFS fs, const char *path, tOffset pos, int *shouldWait);
/**
* cfsGetDelegationToken - Get a delegation token from namenode.
* The token should be freed using cfsFreeDelegationToken after canceling the token or token expired.
*
* @param fs The file system
* @param renewer The user who will renew the token
*
* @return Return a delegation token, NULL on error.
*/
char *cfsGetDelegationToken(cfsFS fs, const char *renewer);
/**
* cfsFreeDelegationToken - Free a delegation token.
*
* @param token The token to be freed.
*/
void cfsFreeDelegationToken(char *token);
/**
* cfsRecoverLease - Recover the lease of the file
*
* @param fs The file system
* @param path the path whose lease should be recovered
*
* @return Returns 0 on success, -1 on error.
*/
int cfsRecoverLease(cfsFS fs, const char *path);
/**
* cfsRenewDelegationToken - Renew a delegation token.
*
* @param fs The file system.
* @param token The token to be renewed.
*
* @return the new expiration time
*/
int64_t cfsRenewDelegationToken(cfsFS fs, const char *token);
/**
* cfsCancelDelegationToken - Cancel a delegation token.
*
* @param fs The file system.
* @param token The token to be canceled.
*
* @return return 0 on success, -1 on error.
*/
int cfsCancelDelegationToken(cfsFS fs, const char *token);
typedef struct Namenode
{
char *rpc_addr; // namenode rpc address and port, such as "host:8020"
char *http_addr; // namenode http address and port, such as "host:50070"
} Namenode;
/**
* cfsGetHANamenodes - If cfs is configured with HA namenode, return all namenode informations as an array.
* Else return NULL.
*
* Using configure file which is given by environment parameter LIBCFS_CONF
* or "cloudfs.xml" in working directory.
*
* @param nameservice cfs name service id.
* @param size output the size of returning array.
*
* @return return an array of all namenode information.
*/
Namenode *cfsGetHANamenodes(const char *nameservice, int *size);
/**
* cfsGetHANamenodesWithConfig - If cfs is configured with HA namenode, return all namenode informations as an
* array. Else return NULL.
*
* @param conf the path of configure file.
* @param nameservice cfs name service id.
* @param size output the size of returning array.
*
* @return return an array of all namenode information.
*/
Namenode *cfsGetHANamenodesWithConfig(const char *conf, const char *nameservice, int *size);
/**
* cfsFreeNamenodeInformation - Free the array returned by cfsGetHANamenodesWithConfig()
*
* @param nameservice array return by cfsGetHANamenodesWithConfig()
* @param size output the size of returning array.
*/
void cfsFreeNamenodeInformation(Namenode *namenodes, int size);
/**
* cfsFileIsEncrypted - determine if a file is encrypted based on its
* cfsFileInfo.
*
* @param cfsFileInfo The array of dynamically-allocated cfsFileInfo
* objects.
* @return -1 if there was an error (errno will be set), 0 if the file is
* not encrypted, 1 if the file is encrypted.
*/
int cfsFileIsEncrypted(cfsFileInfo *cfsFileInfo);
/**
* cfsGetFileBlockLocations - Get an array containing hostnames,
* offset and size of portions of the given file.
*
* @param fs The file system
* @param path The path to the file
* @param start The start offset into the given file
* @param length The length for which to get locations for
* @param numOfBlock Output the number of elements in the returned array
*
* @return An array of BlockLocation struct.
*/
cfsBlockLocation *cfsGetFileBlockLocations(cfsFS fs, const char *path, tOffset start, tOffset length,
int *numOfBlock);
/**
* cfsFreeBlockLocations - Free the BlockLocation array returned
* by cfsGetFileBlockLocations
*
* @param locations The array returned by cfsGetFileBlockLocations
* @param numOfBlock The number of elements in the locaitons
*/
void cfsFreeBlockLocations(cfsBlockLocation *locations, int numOfBlock);
typedef struct
{
tOffset length;
tLong fileCount;
tLong directoryCount;
tOffset quota;
tOffset spaceConsumed;
tOffset spaceQuota;
} cfsContentSummary;
/**
* cfsGetContentSummary - Get the content summary.
*
* @param fs The file system
* @param path The path to the file
* @return The content summary
*/
cfsContentSummary *cfsGetContentSummary(cfsFS fs, const char *path);
/**
* cfsFreeContentSummary - Free the contentSummary returned by cfsGetContentSummary
*
* @param contentSummary The contentSummary returned by cfsGetContentSummary
*/
void cfsFreeContentSummary(cfsContentSummary *contentSummary);
/**
* cfsCreateEncryptionZone - Create encryption zone for the directory with specific key name
*
* @param fs The configured filesystem handle.
* @param path The path of the directory.
* @param keyname The key name of the encryption zone
* @return Returns 0 on success, -1 on error.
*/
int cfsCreateEncryptionZone(cfsFS fs, const char *path, const char *keyName);
/**
* cfsEncryptionZoneInfo - Get information about a path as a (dynamically
* allocated) single cfsEncryptionZoneInfo struct. cfsEncryptionZoneInfo should be
* called when the pointer is no longer needed.
*
* @param fs The configured filesystem handle.
* @param path The path of the encryption zone.
* @return Returns a dynamically-allocated cfsEncryptionZoneInfo object;
* NULL on error.
*/
cfsEncryptionZoneInfo *cfsGetEZForPath(cfsFS fs, const char *path);
/**
* cfsEncryptionZoneInfo - Get list of all the encryption zones.
*
* cfsFreeEncryptionZoneInfo should be called to deallocate memory.
*
* @param fs The configured filesystem handle.
* @param numEntries The number of list entries.
* @return Returns a dynamically-allocated array of cfsEncryptionZoneInfo objects;
* NULL on error.
*/
cfsEncryptionZoneInfo *cfsListEncryptionZones(cfsFS fs, int *numEntries);
/**
* cfsGetAccFileSystemMode - Get CloudFS Filesystem Mode.
*
* @param fs The configured filesystem handle.
* @return Return the FileSystem mode, "HDFS" or "ACC". NULL on error.
*/
const char *cfsGetFileSystemMode(cfsFS fs);
/**
* @cfsGetAccFileSystemUfsPrefix - Get CloudFS underlying file system prefix
* or working directory.
*
* @param fs The configured filesystem handle.
* @return Return the underlying file system prefix in ACC mode. NULL on
* error.
*/
const char *cfsGetAccFileSystemUfsPrefix(cfsFS fs);
/**
* @cfsSetCredentials - Set CloudFS credentials for IAM. Data is shadow copied.
* They must be available until bld is freed.
*
* @param bld The CFS builder
* @param accessKey The access key ID.
* @param secretKey The secret access key.
* @param securityToken The security token.
* @return Returns 0 on success, -1 on error.
*/
int cfsSetCredentials(struct cfsBuilder *bld, const char *accessKey, const char *secretKey,
const char *securityToken);
/**
* Copy data from the under storage system into Datanode
* @param fs The configured filesystem handle.
* @param path the path which data will be copied
* @param recursive recursively load in subdirectories
* @param loadMetadata load metadata from UFS to Namenode
* @param loadData load data from UFS to Datanode
* @param replicaNum load replica number
* @param dataCenter data be loaded
* @return Job status
*/
char *cfsLoad(cfsFS fs, const char *path, bool recursive, bool loadMetadata, bool loadData, int replicaNum,
const char *dcName, int dcId);
/**
* Delete data from Datanode
* @param fs The configured filesystem handle.
* @param path the path which data will be deleted
* @param recursive recursively free in subdirectories
* @param freeMetadata free metadata
* @return Job status
*/
char *cfsFree(cfsFS fs, const char *path, bool recursive, bool freeMetadata);
typedef enum tJobType
{
kUnknownJob = 0,
kLoadDataJob = 1,
kLoadMetadataJob = 2,
kFreeJob = 3,
} tJobType;
typedef enum tJobStatus
{
kAccepted = 0,
kSubmitted = 1,
kRunning = 2,
kFinished = 3,
kCancelled = 4,
kFailed = 5
} tJobStatus;
typedef struct
{
bool done;
tJobType jobType;
uint64_t createTimestamp;
uint64_t completeTimestamp;
uint64_t successTasks;
uint64_t failedTasks;
uint64_t canceledTasks;
uint64_t timeoutTasks;
uint64_t throttledTasks;
uint64_t totalTasks;
} cfsJobState;
typedef struct
{
cfsJobState *jobStates;
int numEntries;
tJobStatus jobStatus;
char *msg;
} cfsLookupJobResponse;
/**
* Lookup job status
* @param fs The configured filesystem handle.
* @param job_id
* @return Job status
*/
cfsLookupJobResponse cfsLookupJob(cfsFS fs, const char *job_id);
void cfsFreeLookupResp(cfsLookupJobResponse *resp);
/**
* Cancel job
* @param fs The configured filesystem handle.
* @param job_id
*/
void cfsCancelJob(cfsFS fs, const char *job_id);
#ifdef __cplusplus
}
#endif
#endif /* _CLOUDFS_LIBCFS3_CLIENT_CFS_H_ */
/*
* Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _CLOUDFS_LIBCFS3_CLIENT_CFS_AIO_H_
#define _CLOUDFS_LIBCFS3_CLIENT_CFS_AIO_H_
#include <stdint.h> /* for uint64_t, etc. */
#ifdef __cplusplus
extern "C"
{
#endif
/**
* Some utility decls used in libcfs.
*/
typedef int32_t tSize; /// size of data for read/write io ops
typedef int64_t tOffset; /// offset within the file
struct CfsFileSystemInternalWrapper;
typedef struct CfsFileSystemInternalWrapper *cfsFS;
struct CfsFileInternalWrapper;
typedef struct CfsFileInternalWrapper *cfsFile;
typedef enum cfsStatus
{
STATUS_OK = 0,
STATUS_MISSING_BLOCK = -1002,
STATUS_TIMEOUT = -1003,
STATUS_INVALID_RANGE = -1004,
STATUS_CONNECTION_CLOSED = -1005,
STATUS_WRITE_FAILED = -1006,
STATUS_IO_BUSY = -1007,
STATUS_INVALID_PARAMETER = -1098,
STATUS_UNSUPPORTED_OP = -1099,
STATUS_UNKNOWN_ERR = -1100,
} cfsStatus;
typedef void (*cfsWriteCallback)(cfsStatus status, void *args);
typedef void (*cfsReadCallback)(cfsStatus status, int32_t readLength, char *buffer, void *args);
typedef struct cfsAsyncContext
{
cfsReadCallback readCallback;
cfsWriteCallback writeCallback;
char *buffer;
void *args;
} cfsAsyncContext;
/**
* cfsAsyncPRead - Async positional read of data from an open file.
*
* @param fs The configured filesystem handle.
* @param file The file handle.
* @param offset Position from which to read.
* @param length The length of the buffer.
* @param context The callback context passed by user.
* @return Status of Async method.
*/
cfsStatus cfsAsyncPRead(cfsFS fs, cfsFile file, tSize length, tOffset offset, cfsAsyncContext *context);
/**
* cfsAsyncWrite - Write data to the internal buffer of outputstream,
*
* @param fs The configured filesystem handle.
* @param file The file handle.
* @param buffer The buffer to copy write bytes into.
* @param length The length of the buffer.
* @param context The callback context passed by user.
* @return Status of Async method.
*/
cfsStatus cfsAsyncWrite(cfsFS fs, cfsFile file, const void *buffer, tSize length, cfsAsyncContext *context);
/**
* cfsAsyncFlush - Wait for data is acked by remote dn.
*
* @param fs The configured filesystem handle.
* @param file The file handle.
* @param context The callback context passed by user.
* @return Status of Async method.
*/
cfsStatus cfsAsyncFlush(cfsFS fs, cfsFile file, cfsAsyncContext *context);
/**
* cfsAsyncWriteAndFlush - Write data to remote datanode and wait for ack.
*
* @param fs The configured filesystem handle.
* @param file The file handle.
* @param buffer The buffer to copy write bytes into.
* @param length The length of the buffer.
* @param context The callback context passed by user.
* @return Status of Async method.
*/
cfsStatus cfsAsyncWriteAndFlush(cfsFS fs, cfsFile file, const void *buffer, tSize length, cfsAsyncContext *context);
#ifdef __cplusplus
}
#endif
#endif /* _CLOUDFS_LIBCFS3_CLIENT_CFS_AIO_H_ */
/*
* Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef COMMON_H
#define COMMON_H
#include <torch/torch.h>
#include <torch/extension.h>
#include <cuda_runtime.h>
#include <fcntl.h>
#include <unistd.h>
#include <thread>
#include <stdexcept>
#include <sys/mman.h>
#include <sys/stat.h>
#include "cfs.h"
#include "logging.h"
#include "sfcs.h"
#define THREAD_NICE_ADJ -10
#define BUF_ALIGN_SIZE 4096
using namespace std;
#endif
\ No newline at end of file
/*
* Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AES_CPU_CTR_H
#define AES_CPU_CTR_H
#include <stdio.h>
extern const size_t EVP_UPDATE_MAX;
extern const size_t CTR_BLOCK_SIZE;
void ctr128_inc_by(unsigned char *counter, size_t n, size_t c);
typedef struct evp_cipher_ctx_st EVP_CIPHER_CTX;
typedef struct evp_cipher_st EVP_CIPHER;
class CtrEncrypter
{
private:
EVP_CIPHER_CTX *ctx = NULL;
EVP_CIPHER *cipher = NULL;
public:
CtrEncrypter(const unsigned char *key, const unsigned char *iv, size_t global_offset);
~CtrEncrypter();
int encrypt_update(unsigned char *pt, size_t pt_size, unsigned char *ct);
};
class CtrDecrypter
{
private:
EVP_CIPHER_CTX *ctx = NULL;
EVP_CIPHER *cipher = NULL;
public:
CtrDecrypter(const unsigned char *key, const unsigned char *iv, size_t global_offset);
~CtrDecrypter();
int decrypt_update(unsigned char *ct, size_t ct_size, unsigned char *pt);
};
#endif
#ifndef AES_GPU_CTR_H
#define AES_GPU_CTR_H
#include <stdio.h>
// Both encrypt and decrypt require length of ct and pt multiple of 16
int ctr_encrypt_gpu(const unsigned char *key, const unsigned char *iv, unsigned char *pt, size_t pt_size,
unsigned char *ct);
int ctr_decrypt_gpu(const unsigned char *key, const unsigned char *iv, unsigned char *ct, size_t ct_size,
unsigned char *pt);
#endif
\ No newline at end of file
/*
* Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef IO_HELPER_H
#define IO_HELPER_H
#include "load_utils.h"
class IOHelper
{
private:
char *pin_mem = NULL;
bool use_pinmem_ = false;
size_t buffer_size_ = 0;
public:
~IOHelper();
void load_file_to_tensor(std::string file_path, torch::Tensor res_tensor, torch::Tensor sample_tensor,
int64_t offset, int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk,
bool use_direct_io, bool use_cipher, pybind11::array_t<char> key_arr,
pybind11::array_t<char> iv_arr);
void init_buffer(string file_path, int64_t file_size, bool use_pinmem, bool use_sfcs_sdk);
void free_buffer();
};
#endif
\ No newline at end of file
/*
* Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LOAD_UTILS_H
#define LOAD_UTILS_H
#include "common.h"
void read_file(string file_path, char *addr, char *dev_mem, int num_thread, size_t total_size, size_t global_offset,
bool use_sfcs_sdk, bool use_direct_io, CipherInfo cipher_info);
size_t get_file_size(const char *file_name, bool use_sfcs_sdk);
#endif
\ No newline at end of file
/*
* Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LOGGER_H
#define LOGGER_H
#include <iostream>
using namespace std;
#define PR std::cout
#define ENDL std::endl
#define FILE_INFO "[" << __FUNCTION__ << " at " << __FILE__ << ":" << __LINE__ << "] "
#define ARG_COUNT_PRIVATE(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, N, ...) N
#define ARG_COUNT(...) ARG_COUNT_PRIVATE(0, __VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
#define FUN_COUNT_GLUE(M, count) M##count
#define FUN_JOIN_COUNT(M, count) FUN_COUNT_GLUE(M, count)
#define FUN_JOIN_ARGS(x, y) x y
#define CallSomeOne(fn, ...) FUN_JOIN_ARGS(FUN_JOIN_COUNT(fn, ARG_COUNT(__VA_ARGS__)), (__VA_ARGS__))
#define param1(a) a
#define param2(a, b) a << ", " #b ":" << b
#define param3(a, b, c) a << ", " #b ":" << b << ", " #c ":" << c
#define param4(a, b, c, d) a << ", " #b ":" << b << ", " #c ":" << c << ", " #d ":" << d
#define param5(a, b, c, d, e) a << ", " #b ":" << b << ", " #c ":" << c << ", " #d ":" << d << ", " #e ":" << e
#define pr1(...) param1(__VA_ARGS__)
#define pr2(...) param2(__VA_ARGS__)
#define pr3(...) param3(__VA_ARGS__)
#define pr4(...) param4(__VA_ARGS__)
#define pr5(...) param5(__VA_ARGS__)
#define logDebug(...) PR << "VETURBOIO_CPP_DEBUG " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL
#define logInfo(...) PR << "VETURBOIO_CPP_INFO " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL
#define logWarn(...) PR << "VETURBOIO_CPP_WARN " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL
#define logError(...) PR << "VETURBOIO_CPP_ERROR " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL
#endif // LOGGER_H
\ No newline at end of file
/*
* Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SFCS_H
#define SFCS_H
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include "common.h"
#include "cfs.h"
#include "logging.h"
#define SFCS_NAME_NODE "default"
#define SFCS_USER_NAME "demo-user"
using namespace std;
class CipherInfo
{
public:
bool use_cipher = false;
unsigned char *key = NULL;
unsigned char *iv = NULL;
CipherInfo(bool use_cipher, pybind11::array_t<char> key_arr, pybind11::array_t<char> iv_arr);
CipherInfo(){};
};
class SFCSFile
{
public:
cfsFS fs;
std::string file_path;
// cipher related
CipherInfo cipher_info;
SFCSFile(std::string file_path);
SFCSFile(std::string file_path, bool use_cipher, pybind11::array_t<char> key_arr, pybind11::array_t<char> iv_arr);
SFCSFile(std::string file_path, CipherInfo cipher_info);
~SFCSFile();
size_t get_file_size();
size_t read_file_parallel(char *addr, char *dev_mem, int num_thread, size_t total_size, size_t global_offset);
size_t read_file_to_array(pybind11::array_t<char> arr, size_t length, size_t offset, int num_thread);
size_t write_file_from_array(pybind11::array_t<char> arr, size_t length);
void delete_file();
private:
size_t read_file(char *addr, size_t length, size_t offset);
void read_file_thread(int thread_id, char *addr, char *dev_mem, size_t block_size, size_t total_size,
size_t global_offset);
size_t write_file(char *addr, size_t length);
};
#endif
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment