import tensorflow.compat.v1 as tf from copy import deepcopy import csv import json import os DTYPE = { 'float32': tf.float32, 'int32': tf.int32, 'int64': tf.int64, 'string': tf.string } def create_directory(path): if not os.path.exists(path): os.makedirs(path) print(f"Directory '{path}' created.") else: print(f"Directory '{path}' already exists.") def convert_nested_array_dtype(arr): converted_arr = [] for element in arr: if isinstance(element, list): # 如果元素是一个列表,则递归调用自身 converted_arr.append(convert_nested_array_dtype(element)) else: # 否则保持原样 converted_arr.append(element.decode('utf-8')) return converted_arr def generate_datas(tensors, batch_size): graph_datas = {} for tensor in tensors: _dtype = DTYPE[tensor[-1]] shapes = deepcopy(tensor[2]) if shapes == ['']: graph_datas[(tensor[1])] = tf.constant(3.14, dtype=_dtype) continue for i in range(len(shapes)): if shapes[i] == "None": shapes[i] = batch_size if shapes[i] != "": shapes[i] = int(shapes[i]) shapes = tuple([i for i in shapes]) if tensor[-1] == "int32": random_tensor = tf.random.uniform(shape=shapes, minval=0, maxval=10, dtype=_dtype) elif tensor[-1] == "int64": random_tensor = tf.random.uniform(shape=shapes, minval=0, maxval=10, dtype=_dtype) elif tensor[-1] == "string": # 生成字符串张量 batch_size = shapes[0] sequence_length = shapes[1] if len(shapes) > 1 else 1 random_tensor = tf.constant([["example_string"] * sequence_length] * batch_size, dtype=tf.string) else: random_tensor = tf.random.normal(shape=shapes, mean=0.0, stddev=1.0, dtype=_dtype) graph_datas[(tensor[1])] = random_tensor return graph_datas def read_csv_data(file_path): with open(file_path, 'r') as f: reader = csv.reader(f) next(reader) datas = list(reader) for data in datas: data[2] = data[2][1:-1].split(",") return datas def save_graph_datasets_json(input_tensors, output_tensors, batch_size, input_data_json_path, output_data_json_path): input_graph_datas = generate_datas(input_tensors, batch_size) output_graph_datas = generate_datas(output_tensors, batch_size) feed_dict = {} output_dict = {} # feed_dict = {key: value.numpy().tolist() for key, value in input_graph_datas.items()} # output_dict = {key: value.numpy().tolist() for key, value in output_graph_datas.items()} for key, value in input_graph_datas.items(): if value.dtype == tf.string: value = value.numpy().tolist() value = convert_nested_array_dtype(value) else: value = value.numpy().tolist() feed_dict[key] = value for key, value in output_graph_datas.items(): if value.dtype == tf.string: value = value.numpy().tolist() value = convert_nested_array_dtype(value) else: value = value.numpy().tolist() output_dict[key] = value with open(input_data_json_path, 'w') as f: json.dump(feed_dict, f, indent=4) with open(output_data_json_path, 'w') as f: json.dump(output_dict, f, indent=4) if __name__ == '__main__': model = "model_1" # 测试模型 name model_dir = "./models" # 模型目录 dataset_path = os.path.join(model_dir, f'{model}/dataset') input_tensors_path = os.path.join(model_dir, f'{model}/input_tensors.csv') output_tensors_path = os.path.join(model_dir, f'{model}/output_tensors.csv') input_tensors = read_csv_data(input_tensors_path) output_tensors = read_csv_data(output_tensors_path) create_directory(dataset_path) for batch_size in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048]: print("batch_size:", batch_size) input_data_json_path = os.path.join(dataset_path, f'input_tensor_datas_{batch_size}.json') output_data_json_path = os.path.join(dataset_path, f'output_tensor_datas_{batch_size}.json') save_graph_datasets_json(input_tensors, output_tensors, batch_size, input_data_json_path, output_data_json_path)