Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
dc1b6127
Commit
dc1b6127
authored
Jul 18, 2023
by
Yuanchen
Committed by
binmakeswell
Jul 26, 2023
Browse files
[NFC] polish applications/Chat/inference/server.py code style (#4274)
Co-authored-by:
Yuanchen Xu
<
yuanchen.xu00@gmail.com
>
parent
caa44330
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
2 deletions
+4
-2
applications/Chat/inference/server.py
applications/Chat/inference/server.py
+4
-2
No files found.
applications/Chat/inference/server.py
View file @
dc1b6127
...
@@ -14,7 +14,7 @@ from slowapi.errors import RateLimitExceeded
...
@@ -14,7 +14,7 @@ from slowapi.errors import RateLimitExceeded
from
slowapi.util
import
get_remote_address
from
slowapi.util
import
get_remote_address
from
sse_starlette.sse
import
EventSourceResponse
from
sse_starlette.sse
import
EventSourceResponse
from
transformers
import
AutoTokenizer
,
GenerationConfig
,
LlamaForCausalLM
from
transformers
import
AutoTokenizer
,
GenerationConfig
,
LlamaForCausalLM
from
utils
import
ChatPromptProcessor
,
Dialogue
,
LockedIterator
,
sample_streamingly
,
update_model_kwargs_fn
,
load_json
from
utils
import
ChatPromptProcessor
,
Dialogue
,
LockedIterator
,
load_json
,
sample_streamingly
,
update_model_kwargs_fn
CONTEXT
=
'Below is an instruction that describes a task. Write a response that appropriately completes the request. Do not generate new instructions.'
CONTEXT
=
'Below is an instruction that describes a task. Write a response that appropriately completes the request. Do not generate new instructions.'
MAX_LEN
=
512
MAX_LEN
=
512
...
@@ -145,7 +145,9 @@ if __name__ == '__main__':
...
@@ -145,7 +145,9 @@ if __name__ == '__main__':
help
=
'Group size for GPTQ. This is only useful when quantization mode is 4bit. Default: 128.'
)
help
=
'Group size for GPTQ. This is only useful when quantization mode is 4bit. Default: 128.'
)
parser
.
add_argument
(
'--http_host'
,
default
=
'0.0.0.0'
)
parser
.
add_argument
(
'--http_host'
,
default
=
'0.0.0.0'
)
parser
.
add_argument
(
'--http_port'
,
type
=
int
,
default
=
7070
)
parser
.
add_argument
(
'--http_port'
,
type
=
int
,
default
=
7070
)
parser
.
add_argument
(
'--profanity_file'
,
default
=
None
,
help
=
'Path to profanity words list. It should be a JSON file containing a list of words.'
)
parser
.
add_argument
(
'--profanity_file'
,
default
=
None
,
help
=
'Path to profanity words list. It should be a JSON file containing a list of words.'
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
if
args
.
quant
==
'4bit'
:
if
args
.
quant
==
'4bit'
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment