Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tsoc
hg-misc-tools
Commits
1516fed0
Commit
1516fed0
authored
Jan 31, 2026
by
one
Browse files
Update evo2 entrypoint scripts
parent
b52f967e
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
253 additions
and
63 deletions
+253
-63
.vscode/extensions.json
.vscode/extensions.json
+6
-0
evo2/run.sh
evo2/run.sh
+39
-10
evo2/test_evo2_generation_batched.py
evo2/test_evo2_generation_batched.py
+208
-53
No files found.
.vscode/extensions.json
0 → 100644
View file @
1516fed0
{
"recommendations"
:
[
"ms-python.python"
,
"astral-sh.ruff"
]
}
\ No newline at end of file
evo2/run.sh
View file @
1516fed0
#!/bin/bash
set
-e
export
MODEL_PATH
=
/models/arcinstitute/evo2_7b
# export MIOPEN_ENABLE_LOGGING_CMD=1
# export MIOPEN_ENABLE_LOGGING=1
# export MIOPEN_LOG_LEVEL=6
# export ROCBLAS_LAYER=4
export
HIP_VISIBLE_DEVICES
=
1
export
MODEL_NAME
=
evo2_7b
export
MODEL_PATH
=
/models/arcinstitute/evo2_7b/evo2_7b.pt
#export MIOPEN_ENABLE_LOGGING_CMD=1
#export MIOPEN_ENABLE_LOGGING=1
#export MIOPEN_LOG_LEVEL=6
#export ROCBLAS_LAYER=3
EVO_CMD
=
"numactl -m 1 -N 1
\
python -m evo2.test.test_evo2_generation_batched
\
--model_name
${
MODEL_NAME
}
\
--local_path
${
MODEL_PATH
}
"
BATCH_SIZE
=
2
EVO2_CMD
=
"numactl -m 0 -N 0 python -m evo2.test.test_evo2_generation_batched --model_name evo2_7b --local_path
${
MODEL_PATH
}
/evo2_7b.pt --batch_size
${
BATCH_SIZE
}
"
# EVO2_CMD="numactl -m 0 -N 0 python -m evo2.test.test_evo2_generation --model_name evo2_7b --local_path ${MODEL_PATH}/evo2_7b.pt"
run_all_tests
()
{
local
batch_size
=
$1
#${EVO2_CMD}
echo
"================================================"
echo
"Running all tests for batch size
${
batch_size
}
"
echo
"================================================"
mkdir
-p
log &> /dev/null
hipprof
--hip-trace
-o
log/trace-padding-bs
${
BATCH_SIZE
}
${
EVO2_CMD
}
echo
"==== Normal run ===="
${
EVO_CMD
}
--batch_size
${
batch_size
}
# echo "==== Torch profiler trace for step 0 ===="
# ${EVO_CMD} --batch_size ${batch_size} --trace --trace_step 0
# echo "==== Torch profiler trace for step 1 ===="
# ${EVO_CMD} --batch_size ${batch_size} --trace --trace_step 1
# echo "==== Hipprof trace ===="
# hipprof --hip-trace -o log/trace-bs${batch_size} \
# ${EVO_CMD} --batch_size ${batch_size}
# echo "==== Nsight-systems trace ===="
# nsys profile --force-overwrite=true \
# --stats=true --trace=cuda \
# -o log/trace-bs${batch_size} \
# ${EVO_CMD} --batch_size ${batch_size}
}
run_all_tests 1
run_all_tests 2
evo2/test_evo2_generation_batched.py
View file @
1516fed0
...
...
@@ -2,14 +2,14 @@ import argparse
import
csv
from
importlib
import
resources
from
pathlib
import
Path
from
typing
import
List
,
Optional
,
Union
from
typing
import
Optional
import
numpy
as
np
import
time
import
torch
from
evo2
import
Evo2
def
read_prompts
(
input_file
):
"""Read prompts from input file or built-in test data.
...
...
@@ -20,25 +20,27 @@ def read_prompts(input_file):
# If it's a string that doesn't exist as a file path, assume it's a test data file
if
isinstance
(
input_file
,
str
)
and
not
Path
(
input_file
).
is_file
():
# This is the reliable way to get package data
with
resources
.
path
(
'
evo2.test.data
'
,
input_file
)
as
data_path
:
with
resources
.
path
(
"
evo2.test.data
"
,
input_file
)
as
data_path
:
input_file
=
data_path
# Your existing code to read the file
promptseqs
=
[]
with
open
(
input_file
,
encoding
=
'
utf-8-sig
'
,
newline
=
''
)
as
csvfile
:
with
open
(
input_file
,
encoding
=
"
utf-8-sig
"
,
newline
=
""
)
as
csvfile
:
reader
=
csv
.
reader
(
csvfile
)
next
(
reader
)
# Skip header
for
row
in
reader
:
promptseqs
.
append
(
row
[
0
])
return
promptseqs
def
mid_point_split
(
*
,
seq
,
num_tokens
):
"""Split sequence at midpoint for prompt and target."""
mid_point
=
2
*
(
len
(
seq
)
//
4
)
mid_point
=
2
*
(
len
(
seq
)
//
4
)
prompt
=
seq
[:
mid_point
]
target
=
seq
[
mid_point
:
mid_point
+
num_tokens
]
target
=
seq
[
mid_point
:
mid_point
+
num_tokens
]
return
prompt
,
target
def
calculate_sequence_identity
(
seq1
:
str
,
seq2
:
str
)
->
Optional
[
float
]:
"""Calculate sequence identity between two sequences through direct comparison."""
if
not
seq1
or
not
seq2
:
...
...
@@ -48,8 +50,18 @@ def calculate_sequence_identity(seq1: str, seq2: str) -> Optional[float]:
matches
=
sum
(
a
==
b
for
a
,
b
in
zip
(
seq1
[:
min_length
],
seq2
[:
min_length
]))
return
(
matches
/
min_length
)
*
100
def
generate_and_score
(
*
,
sequences
,
model
,
generations_per_prompt
=
5
,
n_tokens
=
500
,
temperature
=
1.0
,
top_k
=
1
,
top_p
=
1.0
,
batch_size
=
2
):
def
generate_and_score
(
*
,
sequences
,
model
,
generations_per_prompt
=
5
,
n_tokens
=
500
,
temperature
=
1.0
,
top_k
=
1
,
top_p
=
1.0
,
batch_size
=
2
,
):
"""Prompt with first half, generate and score on 2nd half."""
scores
=
[]
prompts
=
[]
...
...
@@ -62,12 +74,125 @@ def generate_and_score(*, sequences, model, generations_per_prompt=5, n_tokens=5
targets
.
extend
([
target
]
*
generations_per_prompt
)
for
i
in
range
(
0
,
len
(
prompts
),
batch_size
):
batch_prompts
=
prompts
[
i
:
i
+
batch_size
]
batch_targets
=
targets
[
i
:
i
+
batch_size
]
batch_prompts
=
prompts
[
i
:
i
+
batch_size
]
batch_targets
=
targets
[
i
:
i
+
batch_size
]
with
torch
.
inference_mode
():
torch
.
cuda
.
synchronize
()
step_time
=
-
time
.
perf_counter
()
generated
=
model
.
generate
(
prompt_seqs
=
batch_prompts
,
n_tokens
=
n_tokens
,
temperature
=
temperature
,
top_k
=
top_k
,
top_p
=
top_p
,
)
torch
.
cuda
.
synchronize
()
step_time
+=
time
.
perf_counter
()
print
(
f
"[
{
i
}
:
{
min
(
i
+
batch_size
,
len
(
prompts
))
}
) E2E Time for model.generate:
{
step_time
:.
3
f
}
s"
)
for
j
,
decoded_seq
in
enumerate
(
generated
.
sequences
):
score
=
calculate_sequence_identity
(
decoded_seq
,
batch_targets
[
j
])
scores
.
append
(
score
)
# Reshape scores to group by original sequence
reshaped_scores
=
[
scores
[
i
:
i
+
generations_per_prompt
]
for
i
in
range
(
0
,
len
(
scores
),
generations_per_prompt
)
]
return
reshaped_scores
def
custom_trace_handler
(
dir_name
=
"./log/pt-trace/"
,
sort_by
=
"self_device_time_total"
,
top_n
=
20
):
tb_handler
=
torch
.
profiler
.
tensorboard_trace_handler
(
dir_name
=
dir_name
)
field_fallbacks
=
{
"self_device_time_total"
:
"self_cuda_time_total"
,
"device_time_total"
:
"cuda_time_total"
,
"self_cuda_time_total"
:
"self_cpu_time_total"
,
}
def
handler
(
prof
):
tb_handler
(
prof
)
avgs
=
prof
.
key_averages
()
final_sort_key
=
sort_by
if
len
(
avgs
)
>
0
:
sample_event
=
avgs
[
0
]
# fallback
if
not
hasattr
(
sample_event
,
final_sort_key
):
fallback_key
=
field_fallbacks
.
get
(
final_sort_key
)
if
fallback_key
and
hasattr
(
sample_event
,
fallback_key
):
print
(
f
"[PROFILER] '
{
final_sort_key
}
' not found. Falling back to '
{
fallback_key
}
'."
)
final_sort_key
=
fallback_key
else
:
print
(
f
"[PROFILER] Sort key '
{
final_sort_key
}
' invalid. Using default order."
)
final_sort_key
=
None
print
(
avgs
.
table
(
sort_by
=
final_sort_key
,
row_limit
=
top_n
))
return
handler
def
generate_and_score_prof
(
*
,
sequences
,
model
,
generations_per_prompt
=
5
,
n_tokens
=
500
,
temperature
=
1.0
,
top_k
=
1
,
top_p
=
1.0
,
batch_size
=
2
,
trace_step
=
1
,
):
"""Prompt with first half, generate and score on 2nd half with torch profiler.
Profiler is enabled only for iteration i==1 to capture detailed performance data.
"""
scores
=
[]
prompts
=
[]
targets
=
[]
# Prepare all prompts and targets
for
seq
in
sequences
:
prompt
,
target
=
mid_point_split
(
seq
=
seq
,
num_tokens
=
n_tokens
)
prompts
.
extend
([
prompt
]
*
generations_per_prompt
)
targets
.
extend
([
target
]
*
generations_per_prompt
)
print
(
"
\n
[TRACE] Start profiling..."
)
with
torch
.
profiler
.
profile
(
activities
=
[
torch
.
profiler
.
ProfilerActivity
.
CPU
,
torch
.
profiler
.
ProfilerActivity
.
CUDA
,
],
schedule
=
torch
.
profiler
.
schedule
(
wait
=
0
,
warmup
=
trace_step
,
active
=
1
,
repeat
=
1
),
on_trace_ready
=
custom_trace_handler
(
dir_name
=
"./log/pt-trace/"
),
record_shapes
=
True
,
profile_memory
=
True
,
with_stack
=
True
,
with_flops
=
True
,
)
as
prof
:
for
i
in
range
(
0
,
len
(
prompts
),
batch_size
):
batch_prompts
=
prompts
[
i
:
i
+
batch_size
]
batch_targets
=
targets
[
i
:
i
+
batch_size
]
with
torch
.
inference_mode
():
if
torch
.
cuda
.
is_available
():
torch
.
cuda
.
synchronize
()
elapsed
_time
=
-
time
.
perf_counter
()
torch
.
cuda
.
synchronize
()
step
_time
=
-
time
.
perf_counter
()
generated
=
model
.
generate
(
prompt_seqs
=
batch_prompts
,
n_tokens
=
n_tokens
,
...
...
@@ -75,20 +200,26 @@ def generate_and_score(*, sequences, model, generations_per_prompt=5, n_tokens=5
top_k
=
top_k
,
top_p
=
top_p
,
)
if
torch
.
cuda
.
is_available
():
torch
.
cuda
.
synchronize
()
elapsed_time
+=
time
.
perf_counter
()
print
(
f
"[
{
i
}
:
{
min
(
i
+
batch_size
,
len
(
prompts
))
}
) Time for model.generate:
{
elapsed_time
:.
3
f
}
s"
)
torch
.
cuda
.
synchronize
()
step_time
+=
time
.
perf_counter
()
print
(
f
"[
{
i
}
:
{
min
(
i
+
batch_size
,
len
(
prompts
))
}
) E2E Time for model.generate:
{
step_time
:.
3
f
}
s"
)
for
j
,
decoded_seq
in
enumerate
(
generated
.
sequences
):
score
=
calculate_sequence_identity
(
decoded_seq
,
batch_targets
[
j
])
scores
.
append
(
score
)
prof
.
step
()
# Reshape scores to group by original sequence
reshaped_scores
=
[
scores
[
i
:
i
+
generations_per_prompt
]
for
i
in
range
(
0
,
len
(
scores
),
generations_per_prompt
)]
reshaped_scores
=
[
scores
[
i
:
i
+
generations_per_prompt
]
for
i
in
range
(
0
,
len
(
scores
),
generations_per_prompt
)
]
return
reshaped_scores
def
main
():
"""
Test sequence generation and scoring using the evo2 models
...
...
@@ -98,10 +229,30 @@ def main():
- Evo 2 1B base: 68.0%
"""
parser
=
argparse
.
ArgumentParser
(
description
=
"Test Evo2 Model Generation"
)
parser
.
add_argument
(
"--model_name"
,
choices
=
[
'evo2_7b'
,
'evo2_40b'
,
'evo2_1b_base'
],
default
=
'evo2_7b'
,
help
=
"Model to test (supports evo2_7b, evo2_40b, evo2_1b_base)"
)
parser
.
add_argument
(
"--model_name"
,
choices
=
[
"evo2_7b"
,
"evo2_40b"
,
"evo2_1b_base"
],
default
=
"evo2_7b"
,
help
=
"Model to test (supports evo2_7b, evo2_40b, evo2_1b_base)"
,
)
parser
.
add_argument
(
"--local_path"
,
type
=
str
,
default
=
None
)
parser
.
add_argument
(
"--batch_size"
,
type
=
int
,
default
=
1
,
help
=
"Batch size for generation"
)
parser
.
add_argument
(
"--n_tokens"
,
type
=
int
,
default
=
500
,
help
=
"Number of tokens to generate"
)
parser
.
add_argument
(
"--batch_size"
,
type
=
int
,
default
=
1
,
help
=
"Batch size for generation"
)
parser
.
add_argument
(
"--trace"
,
action
=
"store_true"
,
help
=
"Enable torch profiler"
,
)
parser
.
add_argument
(
"--trace_step"
,
type
=
int
,
default
=
1
,
help
=
"Attach torch profiler to specific step (default: 1)"
,
)
args
=
parser
.
parse_args
()
...
...
@@ -113,27 +264,34 @@ def main():
# Test parameters: greedy sampling of 500 tokens
test_params
=
{
'
n_tokens
'
:
500
,
'
temperature
'
:
1.0
,
'
top_k
'
:
1
,
'
top_p
'
:
1.0
,
'
generations_per_prompt
'
:
1
,
'
batch_size
'
:
args
.
batch_size
,
"
n_tokens
"
:
args
.
n_tokens
,
"
temperature
"
:
1.0
,
"
top_k
"
:
1
,
"
top_p
"
:
1.0
,
"
generations_per_prompt
"
:
1
,
"
batch_size
"
:
args
.
batch_size
,
}
# Read and process sequences
sequences
=
read_prompts
(
'
prompts.csv
'
)
sequences
=
read_prompts
(
"
prompts.csv
"
)
# Debugging: replace all prompts with the longest prompt
if
args
.
batch_size
>
1
:
longest_prompt
=
max
(
sequences
,
key
=
len
)
sequences
=
[
longest_prompt
]
*
len
(
sequences
)
print
(
f
"[debug] Using longest prompt len=
{
len
(
longest_prompt
)
}
for all sequences"
)
print
(
f
"[DEBUG] Using longest prompt len=
{
len
(
longest_prompt
)
}
for all sequences"
)
scores
=
generate_and_score
(
if
args
.
trace
:
print
(
"[TRACE] Using generate_and_score_prof with torch profiler"
)
scores
=
generate_and_score_prof
(
sequences
=
sequences
,
model
=
model
,
**
test_params
trace_step
=
args
.
trace_step
,
**
test_params
,
)
else
:
scores
=
generate_and_score
(
sequences
=
sequences
,
model
=
model
,
**
test_params
)
# Calculate and validate results
mean_score
=
np
.
mean
(
scores
)
...
...
@@ -142,11 +300,7 @@ def main():
# Validate against expected scores
eps
=
3
# large epsilon for direct comparison, since there are numeric differences by versions
expected_scores
=
{
'evo2_40b'
:
91.15
,
'evo2_7b'
:
89.25
,
'evo2_1b_base'
:
68.0
}
expected_scores
=
{
"evo2_40b"
:
91.15
,
"evo2_7b"
:
89.25
,
"evo2_1b_base"
:
68.0
}
expected_score
=
expected_scores
[
args
.
model_name
]
if
abs
(
mean_score
-
expected_score
)
<
eps
:
...
...
@@ -154,5 +308,6 @@ def main():
else
:
print
(
f
"
\n
Test Failed: Expected
{
expected_score
}
%, got
{
mean_score
}
%"
)
if
__name__
==
"__main__"
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment