Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
28408cdf
Commit
28408cdf
authored
Dec 06, 2024
by
xuxz
Browse files
Update offline_streaming_inference_chat_demo.py
parent
8465317a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
10 deletions
+8
-10
examples/offline_streaming_inference_chat_demo.py
examples/offline_streaming_inference_chat_demo.py
+8
-10
No files found.
examples/offline_streaming_inference_chat_demo.py
View file @
28408cdf
'''
'''
python offline_streaming_inference_chat_demo.py --model /models/llama2/Llama-2-7b-chat-hf
--template template_llama_chat.jinja
--dtype float16 --enforce-eager -tp 1
python offline_streaming_inference_chat_demo.py --model /models/llama2/Llama-2-7b-chat-hf --dtype float16 --enforce-eager -tp 1
'''
'''
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.engine.async_llm_engine
import
AsyncEngineArgs
,
AsyncLLMEngine
from
vllm.engine.async_llm_engine
import
AsyncEngineArgs
,
AsyncLLMEngine
...
@@ -36,7 +36,6 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
...
@@ -36,7 +36,6 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
return
super
().
parse_args
(
processed_args
,
namespace
)
return
super
().
parse_args
(
processed_args
,
namespace
)
parser
=
FlexibleArgumentParser
()
parser
=
FlexibleArgumentParser
()
parser
.
add_argument
(
'--template'
,
type
=
str
,
help
=
"Path to template"
)
parser
=
AsyncEngineArgs
.
add_cli_args
(
parser
)
parser
=
AsyncEngineArgs
.
add_cli_args
(
parser
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
...
@@ -47,13 +46,13 @@ args = parser.parse_args()
...
@@ -47,13 +46,13 @@ args = parser.parse_args()
# ]
# ]
tokenizer
=
AutoTokenizer
.
from_pretrained
(
args
.
model
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
args
.
model
)
try
:
#
try:
f
=
open
(
args
.
template
,
'r'
)
#
f = open(args.template,'r')
tokenizer
.
chat_template
=
f
.
read
()
#
tokenizer.chat_template = f.read()
except
Exception
as
e
:
#
except Exception as e:
print
(
'except:'
,
e
)
#
print('except:',e)
finally
:
#
finally:
f
.
close
()
#
f.close()
...
@@ -110,4 +109,3 @@ while True:
...
@@ -110,4 +109,3 @@ while True:
history
.
append
({
"role"
:
"assistant"
,
"content"
:
response
})
history
.
append
({
"role"
:
"assistant"
,
"content"
:
response
})
print
()
print
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment