Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
d96dcb17
Unverified
Commit
d96dcb17
authored
Dec 06, 2024
by
Nicolas Patry
Committed by
GitHub
Dec 06, 2024
Browse files
Adding A100 compute. (#2806)
parent
5df80590
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
9 deletions
+13
-9
launcher/src/main.rs
launcher/src/main.rs
+13
-9
No files found.
launcher/src/main.rs
View file @
d96dcb17
...
...
@@ -172,7 +172,9 @@ struct RawConfig {
vision_config
:
Option
<
VisionConfig
>
,
is_encoder_decoder
:
Option
<
bool
>
,
#[serde(rename
=
"num_experts_per_tok"
)]
experts
:
Option
<
usize
>
,
num_experts_per_token
:
Option
<
usize
>
,
#[serde(rename
=
"n_shared_experts"
)]
num_shared_experts
:
Option
<
usize
>
,
}
#[derive(Deserialize)]
...
...
@@ -196,7 +198,8 @@ struct Config {
model_type
:
Option
<
String
>
,
vision_config
:
Option
<
VisionConfig
>
,
is_encoder_decoder
:
bool
,
experts
:
Option
<
usize
>
,
num_experts_per_token
:
usize
,
num_shared_experts
:
usize
,
}
impl
Config
{
...
...
@@ -210,11 +213,9 @@ impl Config {
let
num_kv_heads
=
self
.num_kv_heads
?
as
u64
;
let
head_dim
=
self
.head_dim
?
as
u64
;
let
hidden_size
=
self
.hidden_size
?
as
u64
;
let
intermediate_size
=
if
let
Some
(
experts
)
=
self
.experts
{
(
self
.intermediate_size
?
*
experts
)
as
u64
}
else
{
self
.intermediate_size
?
as
u64
};
let
intermediate_size
=
(
self
.intermediate_size
?
*
(
self
.num_experts_per_token
+
self
.num_shared_experts
))
as
u64
;
let
num_layers
=
self
.num_layers
?
as
u64
;
let
q_flops
=
2
*
num_heads
*
head_dim
*
hidden_size
;
...
...
@@ -257,7 +258,8 @@ impl From<RawConfig> for Config {
let
model_type
=
other
.model_type
;
let
vision_config
=
other
.vision_config
;
let
is_encoder_decoder
=
other
.is_encoder_decoder
.unwrap_or
(
false
);
let
experts
=
other
.experts
;
let
num_experts_per_token
=
other
.num_experts_per_token
.unwrap_or
(
1
);
let
num_shared_experts
=
other
.num_shared_experts
.unwrap_or
(
0
);
Config
{
max_position_embeddings
,
quantize
,
...
...
@@ -270,7 +272,8 @@ impl From<RawConfig> for Config {
num_kv_heads
,
intermediate_size
,
num_layers
,
experts
,
num_experts_per_token
,
num_shared_experts
,
}
}
}
...
...
@@ -1547,6 +1550,7 @@ impl ComputeType {
// https://www.techpowerup.com/gpu-specs/docs/nvidia-gh100-architecture.pdf
"nvidia-h100-80gb-hbm3"
=>
Some
(
900
*
10u64
.pow
(
12
)),
// https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf
"nvidia-a100-sxm4-80gb"
=>
Some
(
312
*
10u64
.pow
(
12
)),
"nvidia-a100"
=>
Some
(
312
*
10u64
.pow
(
12
)),
card
=>
{
tracing
::
warn!
(
"Unkown compute for card {card}"
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment