Commit 241da631 authored by Rayyyyy's avatar Rayyyyy
Browse files

fix params bug

parent fe3bae99
...@@ -43,7 +43,7 @@ class InferenceWrapper: ...@@ -43,7 +43,7 @@ class InferenceWrapper:
self.model = llm.from_hf(self.model, self.tokenizer, dtype="float16").cuda() self.model = llm.from_hf(self.model, self.tokenizer, dtype="float16").cuda()
except Exception as e: except Exception as e:
logger.error(str(e)) logger.error(f"fastllm initial failed, {e}")
def chat(self, prompt: str, history=[]): def chat(self, prompt: str, history=[]):
...@@ -58,7 +58,7 @@ class InferenceWrapper: ...@@ -58,7 +58,7 @@ class InferenceWrapper:
history, history,
do_sample=False) do_sample=False)
except Exception as e: except Exception as e:
logger.error(str(e)) logger.error(f"chat inference failed, {e}")
return output_text return output_text
......
...@@ -13,12 +13,12 @@ class ChatAgent: ...@@ -13,12 +13,12 @@ class ChatAgent:
self.reranker_model_path = config['feature_database']['reranker_model_path'] self.reranker_model_path = config['feature_database']['reranker_model_path']
reject_throttle = float(config['feature_database']['reject_throttle']) reject_throttle = float(config['feature_database']['reject_throttle'])
local_llm_path = config['llm']['local_llm_path'] local_llm_path = config['llm']['local_llm_path']
accelerate = config.getboolean('llm', 'accelerate') use_vllm = config.getboolean('llm', 'use_vllm')
self.retriever = CacheRetriever(self.embedding_model_path, self.retriever = CacheRetriever(self.embedding_model_path,
self.reranker_model_path).get(reject_throttle=reject_throttle, self.reranker_model_path).get(reject_throttle=reject_throttle,
work_dir=self.work_dir) work_dir=self.work_dir)
self.llm_server = LLMInference(local_llm_path, tensor_parallel_size, accelerate=accelerate) self.llm_server = LLMInference(local_llm_path, tensor_parallel_size, use_vllm=use_vllm)
def generate_prompt(self, def generate_prompt(self,
history_pair, history_pair,
......
...@@ -116,20 +116,17 @@ def auto_select_dcu(config): ...@@ -116,20 +116,17 @@ def auto_select_dcu(config):
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(description='Start all services.') parser = argparse.ArgumentParser(description='Start all services.')
parser.add_argument('--config_path', parser.add_argument('--config_path',
default='/path/of/config.ini', default='./config.ini',
help='Config directory') help='Config directory')
parser.add_argument('--log_path', parser.add_argument('--log_path',
default='', default='./log/assistant.log',
help='Set log file path') help='Set log file path')
return parser.parse_args() return parser.parse_args()
def main(): def main():
args = parse_args() args = parse_args()
log_path = './log/assistant.log' logger.add(sink=args.log_path, level="DEBUG", rotation="500MB", compression="zip", encoding="utf-8", enqueue=True)
if args.log_path:
log_path = args.log_path
logger.add(sink=log_path, level="DEBUG", rotation="500MB", compression="zip", encoding="utf-8", enqueue=True)
workflow(args) workflow(args)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment