Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
f092ba9b
Unverified
Commit
f092ba9b
authored
Apr 27, 2023
by
Ehsan M. Kermani
Committed by
GitHub
Apr 27, 2023
Browse files
feat(server): add watermarking tests (#248)
parent
b9ae7e5d
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
98 additions
and
16 deletions
+98
-16
.github/workflows/client-tests.yaml
.github/workflows/client-tests.yaml
+1
-1
.github/workflows/tests.yaml
.github/workflows/tests.yaml
+1
-1
Makefile
Makefile
+7
-2
README.md
README.md
+11
-6
clients/python/pyproject.toml
clients/python/pyproject.toml
+4
-1
clients/python/tests/test_client.py
clients/python/tests/test_client.py
+13
-0
router/client/src/sharded_client.rs
router/client/src/sharded_client.rs
+1
-1
router/src/health.rs
router/src/health.rs
+6
-2
server/tests/utils/test_watermark.py
server/tests/utils/test_watermark.py
+54
-0
server/text_generation_server/models/__init__.py
server/text_generation_server/models/__init__.py
+0
-2
No files found.
.github/workflows/client-tests.yaml
View file @
f092ba9b
...
...
@@ -22,4 +22,4 @@ jobs:
-
name
:
Run tests
run
:
|
pip install pytest pytest-asyncio
cd clients/python && pytest
tests
make python-client-
tests
.github/workflows/tests.yaml
View file @
f092ba9b
...
...
@@ -66,7 +66,7 @@ jobs:
-
name
:
Run server tests
run
:
|
pip install pytest
HF_HUB_ENABLE_HF_TRANSFER=1 pytest -sv
server
/
tests
make python-
server
-
tests
-
name
:
Run Rust fmt
run
:
|
cargo fmt --check
...
...
Makefile
View file @
f092ba9b
...
...
@@ -21,8 +21,13 @@ router-dev:
integration-tests
:
install-router install-launcher
cargo
test
python-tests
:
cd
server
&&
HF_HUB_ENABLE_HF_TRANSFER
=
1 pytest tests
python-server-tests
:
HF_HUB_ENABLE_HF_TRANSFER
=
1 pytest server/tests
python-client-tests
:
pytest clients/python/tests
python-tests
:
python-server-tests python-client-tests
run-bloom-560m
:
text-generation-launcher
--model-id
bigscience/bloom-560m
--num-shard
2
--port
8080
...
...
README.md
View file @
f092ba9b
...
...
@@ -36,7 +36,7 @@ to power LLMs api-inference widgets.
-
[
Quantization
](
#quantization
)
-
[
Develop
](
#develop
)
-
[
Testing
](
#testing
)
## Features
-
Serve the most popular Large Language Models with a simple launcher
...
...
@@ -131,7 +131,7 @@ by setting the address to an OTLP collector with the `--otlp-endpoint` argument.
### A note on Shared Memory (shm)
[
`NCCL`
](
https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/index.html
)
is a communication framework used by
[
`NCCL`
](
https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/index.html
)
is a communication framework used by
`PyTorch`
to do distributed training/inference.
`text-generation-inference`
make
use of
`NCCL`
to enable Tensor Parallelism to dramatically speed up inference for large language models.
...
...
@@ -152,14 +152,14 @@ creating a volume with:
and mounting it to
`/dev/shm`
.
Finally, you can also disable SHM sharing by using the
`NCCL_SHM_DISABLE=1`
environment variable. However, note that
Finally, you can also disable SHM sharing by using the
`NCCL_SHM_DISABLE=1`
environment variable. However, note that
this will impact performance.
### Local install
You can also opt to install
`text-generation-inference`
locally.
You can also opt to install
`text-generation-inference`
locally.
First
[
install Rust
](
https://rustup.rs/
)
and create a Python virtual environment with at least
First
[
install Rust
](
https://rustup.rs/
)
and create a Python virtual environment with at least
Python 3.9, e.g. using
`conda`
:
```
shell
...
...
@@ -181,7 +181,7 @@ sudo unzip -o $PROTOC_ZIP -d /usr/local 'include/*'
rm
-f
$PROTOC_ZIP
```
On MacOS, using Homebrew:
On MacOS, using Homebrew:
```
shell
brew
install
protobuf
...
...
@@ -241,6 +241,11 @@ make router-dev
## Testing
```
shell
# python
make python-server-tests
make python-client-tests
# or both server and client tests
make python-tests
# rust cargo tests
make integration-tests
```
clients/python/pyproject.toml
View file @
f092ba9b
...
...
@@ -21,6 +21,9 @@ pytest = "^6.2.5"
pytest-asyncio
=
"^0.17.2"
pytest-cov
=
"^3.0.0"
[tool.pytest.ini_options]
asyncio_mode
=
"auto"
[build-system]
requires
=
["poetry-core"]
requires
=
["poetry-core
>=1.0.0
"]
build-backend
=
"poetry.core.masonry.api"
clients/python/tests/test_client.py
View file @
f092ba9b
...
...
@@ -87,6 +87,19 @@ async def test_generate_async(flan_t5_xxl_url, hf_headers):
)
@
pytest
.
mark
.
asyncio
async
def
test_generate_async_best_of
(
flan_t5_xxl_url
,
hf_headers
):
client
=
AsyncClient
(
flan_t5_xxl_url
,
hf_headers
)
response
=
await
client
.
generate
(
"test"
,
max_new_tokens
=
1
,
best_of
=
2
,
do_sample
=
True
)
assert
response
.
details
.
seed
is
not
None
assert
response
.
details
.
best_of_sequences
is
not
None
assert
len
(
response
.
details
.
best_of_sequences
)
==
1
assert
response
.
details
.
best_of_sequences
[
0
].
seed
is
not
None
@
pytest
.
mark
.
asyncio
async
def
test_generate_async_not_found
(
fake_url
,
hf_headers
):
client
=
AsyncClient
(
fake_url
,
hf_headers
)
...
...
router/client/src/sharded_client.rs
View file @
f092ba9b
...
...
@@ -20,7 +20,7 @@ impl ShardedClient {
/// the other shards and returns all uris/unix sockets with the `service_discovery` gRPC method.
async
fn
from_master_client
(
mut
master_client
:
Client
)
->
Result
<
Self
>
{
// Get all uris/unix sockets from the master client
let
uris
=
master_client
.service_discovery
()
.await
.unwrap
()
;
let
uris
=
master_client
.service_discovery
()
.await
?
;
let
futures
=
uris
.into_iter
()
.map
(
Client
::
connect_uds
);
let
clients
:
Result
<
Vec
<
Client
>>
=
join_all
(
futures
)
.await
.into_iter
()
.collect
();
Ok
(
Self
::
new
(
clients
?
))
...
...
router/src/health.rs
View file @
f092ba9b
...
...
@@ -4,6 +4,10 @@ use text_generation_client::{
Batch
,
NextTokenChooserParameters
,
Request
,
ShardedClient
,
StoppingCriteriaParameters
,
};
// Note: Request ids and batch ids cannot collide.
const
LIVENESS_ID
:
u64
=
u64
::
MAX
;
const
BATCH_ID
:
u64
=
u64
::
MAX
;
#[derive(Clone,
Debug)]
pub
(
crate
)
struct
Health
{
client
:
ShardedClient
,
...
...
@@ -27,7 +31,7 @@ impl Health {
// Dummy batch of 1 token and 1 generated token
let
liveness_request
=
Request
{
id
:
u64
::
MAX
,
id
:
LIVENESS_ID
,
inputs
:
"liveness"
.to_string
(),
truncate
:
10
,
parameters
:
Some
(
NextTokenChooserParameters
{
...
...
@@ -47,7 +51,7 @@ impl Health {
}),
};
let
batch
=
Batch
{
id
:
u64
::
MAX
,
id
:
BATCH_ID
,
requests
:
vec!
[
liveness_request
],
size
:
1
,
max_tokens
:
2
,
...
...
server/tests/utils/test_watermark.py
0 → 100644
View file @
f092ba9b
# test_watermark_logits_processor.py
import
os
import
numpy
as
np
import
torch
from
text_generation_server.utils.watermark
import
WatermarkLogitsProcessor
GAMMA
=
os
.
getenv
(
"WATERMARK_GAMMA"
,
0.5
)
DELTA
=
os
.
getenv
(
"WATERMARK_DELTA"
,
2.0
)
def
test_seed_rng
():
input_ids
=
[
101
,
2036
,
3731
,
102
,
2003
,
103
]
processor
=
WatermarkLogitsProcessor
()
processor
.
_seed_rng
(
input_ids
)
assert
isinstance
(
processor
.
rng
,
torch
.
Generator
)
def
test_get_greenlist_ids
():
input_ids
=
[
101
,
2036
,
3731
,
102
,
2003
,
103
]
processor
=
WatermarkLogitsProcessor
()
result
=
processor
.
_get_greenlist_ids
(
input_ids
,
10
,
torch
.
device
(
"cpu"
))
assert
max
(
result
)
<=
10
assert
len
(
result
)
==
int
(
10
*
0.5
)
def
test_calc_greenlist_mask
():
processor
=
WatermarkLogitsProcessor
()
scores
=
torch
.
tensor
([[
0.5
,
0.3
,
0.2
,
0.8
],
[
0.1
,
0.2
,
0.7
,
0.9
]])
greenlist_token_ids
=
torch
.
tensor
([
2
,
3
])
result
=
processor
.
_calc_greenlist_mask
(
scores
,
greenlist_token_ids
)
assert
result
.
tolist
()
==
[[
False
,
False
,
False
,
False
],
[
False
,
False
,
True
,
True
]]
assert
result
.
shape
==
scores
.
shape
def
test_bias_greenlist_logits
():
processor
=
WatermarkLogitsProcessor
()
scores
=
torch
.
tensor
([[
0.5
,
0.3
,
0.2
,
0.8
],
[
0.1
,
0.2
,
0.7
,
0.9
]])
green_tokens_mask
=
torch
.
tensor
(
[[
False
,
False
,
True
,
True
],
[
False
,
False
,
False
,
True
]]
)
greenlist_bias
=
2.0
result
=
processor
.
_bias_greenlist_logits
(
scores
,
green_tokens_mask
,
greenlist_bias
)
assert
np
.
allclose
(
result
.
tolist
(),
[[
0.5
,
0.3
,
2.2
,
2.8
],
[
0.1
,
0.2
,
0.7
,
2.9
]])
assert
result
.
shape
==
scores
.
shape
def
test_call
():
input_ids
=
[
101
,
2036
,
3731
,
102
,
2003
,
103
]
processor
=
WatermarkLogitsProcessor
()
scores
=
torch
.
tensor
([[
0.5
,
0.3
,
0.2
,
0.8
],
[
0.1
,
0.2
,
0.7
,
0.9
]])
result
=
processor
(
input_ids
,
scores
)
assert
result
.
shape
==
scores
.
shape
server/text_generation_server/models/__init__.py
View file @
f092ba9b
...
...
@@ -58,8 +58,6 @@ __all__ = [
"GalacticaSharded"
,
"GPTNeoxSharded"
,
"Seq2SeqLM"
,
"Galactica"
,
"GalacticaSharded"
,
"SantaCoder"
,
"OPT"
,
"OPTSharded"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment