"...ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "01bfbea15f010a6dfe448f09f9fcb5895d096067"
Unverified Commit bf1998f0 authored by jthomson04's avatar jthomson04 Committed by GitHub
Browse files

fix: Don't detokenize twice in TRT-LLM examples (#1955)

parent 343a4814
...@@ -126,6 +126,9 @@ async def init(runtime: DistributedRuntime, config: Config): ...@@ -126,6 +126,9 @@ async def init(runtime: DistributedRuntime, config: Config):
default_sampling_params._setup(tokenizer) default_sampling_params._setup(tokenizer)
default_sampling_params.stop = None default_sampling_params.stop = None
# We already detokenize inside HandlerBase. No need to also do it in TRTLLM.
default_sampling_params.detokenize = False
async with get_tensorrtllm_engine(engine_args) as engine: async with get_tensorrtllm_engine(engine_args) as engine:
endpoint = component.endpoint(config.endpoint) endpoint = component.endpoint(config.endpoint)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment