Commit 1516fed0 authored by one's avatar one
Browse files

Update evo2 entrypoint scripts

parent b52f967e
{
"recommendations": [
"ms-python.python",
"astral-sh.ruff"
]
}
\ No newline at end of file
#!/bin/bash #!/bin/bash
set -e
export MODEL_PATH=/models/arcinstitute/evo2_7b # export MIOPEN_ENABLE_LOGGING_CMD=1
# export MIOPEN_ENABLE_LOGGING=1
# export MIOPEN_LOG_LEVEL=6
# export ROCBLAS_LAYER=4
export HIP_VISIBLE_DEVICES=1 export HIP_VISIBLE_DEVICES=1
export MODEL_NAME=evo2_7b
export MODEL_PATH=/models/arcinstitute/evo2_7b/evo2_7b.pt
#export MIOPEN_ENABLE_LOGGING_CMD=1 EVO_CMD="numactl -m 1 -N 1 \
#export MIOPEN_ENABLE_LOGGING=1 python -m evo2.test.test_evo2_generation_batched \
#export MIOPEN_LOG_LEVEL=6 --model_name ${MODEL_NAME} \
#export ROCBLAS_LAYER=3 --local_path ${MODEL_PATH}"
BATCH_SIZE=2 run_all_tests() {
EVO2_CMD="numactl -m 0 -N 0 python -m evo2.test.test_evo2_generation_batched --model_name evo2_7b --local_path ${MODEL_PATH}/evo2_7b.pt --batch_size ${BATCH_SIZE}" local batch_size=$1
# EVO2_CMD="numactl -m 0 -N 0 python -m evo2.test.test_evo2_generation --model_name evo2_7b --local_path ${MODEL_PATH}/evo2_7b.pt"
#${EVO2_CMD} echo "================================================"
echo "Running all tests for batch size ${batch_size}"
echo "================================================"
mkdir -p log &> /dev/null
hipprof --hip-trace -o log/trace-padding-bs${BATCH_SIZE} ${EVO2_CMD} echo "==== Normal run ===="
${EVO_CMD} --batch_size ${batch_size}
# echo "==== Torch profiler trace for step 0 ===="
# ${EVO_CMD} --batch_size ${batch_size} --trace --trace_step 0
# echo "==== Torch profiler trace for step 1 ===="
# ${EVO_CMD} --batch_size ${batch_size} --trace --trace_step 1
# echo "==== Hipprof trace ===="
# hipprof --hip-trace -o log/trace-bs${batch_size} \
# ${EVO_CMD} --batch_size ${batch_size}
# echo "==== Nsight-systems trace ===="
# nsys profile --force-overwrite=true \
# --stats=true --trace=cuda \
# -o log/trace-bs${batch_size} \
# ${EVO_CMD} --batch_size ${batch_size}
}
run_all_tests 1
run_all_tests 2
...@@ -2,17 +2,17 @@ import argparse ...@@ -2,17 +2,17 @@ import argparse
import csv import csv
from importlib import resources from importlib import resources
from pathlib import Path from pathlib import Path
from typing import List, Optional, Union from typing import Optional
import numpy as np import numpy as np
import time import time
import torch import torch
from evo2 import Evo2 from evo2 import Evo2
def read_prompts(input_file): def read_prompts(input_file):
"""Read prompts from input file or built-in test data. """Read prompts from input file or built-in test data.
Args: Args:
input_file: Either a path to a file, or the name of a test data file input_file: Either a path to a file, or the name of a test data file
(e.g., 'prompts.csv') (e.g., 'prompts.csv')
...@@ -20,54 +20,66 @@ def read_prompts(input_file): ...@@ -20,54 +20,66 @@ def read_prompts(input_file):
# If it's a string that doesn't exist as a file path, assume it's a test data file # If it's a string that doesn't exist as a file path, assume it's a test data file
if isinstance(input_file, str) and not Path(input_file).is_file(): if isinstance(input_file, str) and not Path(input_file).is_file():
# This is the reliable way to get package data # This is the reliable way to get package data
with resources.path('evo2.test.data', input_file) as data_path: with resources.path("evo2.test.data", input_file) as data_path:
input_file = data_path input_file = data_path
# Your existing code to read the file # Your existing code to read the file
promptseqs = [] promptseqs = []
with open(input_file, encoding='utf-8-sig', newline='') as csvfile: with open(input_file, encoding="utf-8-sig", newline="") as csvfile:
reader = csv.reader(csvfile) reader = csv.reader(csvfile)
next(reader) # Skip header next(reader) # Skip header
for row in reader: for row in reader:
promptseqs.append(row[0]) promptseqs.append(row[0])
return promptseqs return promptseqs
def mid_point_split(*, seq, num_tokens): def mid_point_split(*, seq, num_tokens):
"""Split sequence at midpoint for prompt and target.""" """Split sequence at midpoint for prompt and target."""
mid_point = 2*(len(seq)//4) mid_point = 2 * (len(seq) // 4)
prompt = seq[:mid_point] prompt = seq[:mid_point]
target = seq[mid_point:mid_point+num_tokens] target = seq[mid_point : mid_point + num_tokens]
return prompt, target return prompt, target
def calculate_sequence_identity(seq1: str, seq2: str) -> Optional[float]: def calculate_sequence_identity(seq1: str, seq2: str) -> Optional[float]:
"""Calculate sequence identity between two sequences through direct comparison.""" """Calculate sequence identity between two sequences through direct comparison."""
if not seq1 or not seq2: if not seq1 or not seq2:
return None return None
min_length = min(len(seq1), len(seq2)) min_length = min(len(seq1), len(seq2))
matches = sum(a == b for a, b in zip(seq1[:min_length], seq2[:min_length])) matches = sum(a == b for a, b in zip(seq1[:min_length], seq2[:min_length]))
return (matches / min_length) * 100 return (matches / min_length) * 100
def generate_and_score(*, sequences, model, generations_per_prompt=5, n_tokens=500,
temperature=1.0, top_k=1, top_p=1.0, batch_size=2): def generate_and_score(
*,
sequences,
model,
generations_per_prompt=5,
n_tokens=500,
temperature=1.0,
top_k=1,
top_p=1.0,
batch_size=2,
):
"""Prompt with first half, generate and score on 2nd half.""" """Prompt with first half, generate and score on 2nd half."""
scores = [] scores = []
prompts = [] prompts = []
targets = [] targets = []
# Prepare all prompts and targets # Prepare all prompts and targets
for seq in sequences: for seq in sequences:
prompt, target = mid_point_split(seq=seq, num_tokens=n_tokens) prompt, target = mid_point_split(seq=seq, num_tokens=n_tokens)
prompts.extend([prompt] * generations_per_prompt) prompts.extend([prompt] * generations_per_prompt)
targets.extend([target] * generations_per_prompt) targets.extend([target] * generations_per_prompt)
for i in range(0, len(prompts), batch_size): for i in range(0, len(prompts), batch_size):
batch_prompts = prompts[i:i + batch_size] batch_prompts = prompts[i : i + batch_size]
batch_targets = targets[i:i + batch_size] batch_targets = targets[i : i + batch_size]
with torch.inference_mode(): with torch.inference_mode():
if torch.cuda.is_available(): torch.cuda.synchronize() torch.cuda.synchronize()
elapsed_time = -time.perf_counter() step_time = -time.perf_counter()
generated = model.generate( generated = model.generate(
prompt_seqs=batch_prompts, prompt_seqs=batch_prompts,
n_tokens=n_tokens, n_tokens=n_tokens,
...@@ -75,84 +87,227 @@ def generate_and_score(*, sequences, model, generations_per_prompt=5, n_tokens=5 ...@@ -75,84 +87,227 @@ def generate_and_score(*, sequences, model, generations_per_prompt=5, n_tokens=5
top_k=top_k, top_k=top_k,
top_p=top_p, top_p=top_p,
) )
if torch.cuda.is_available(): torch.cuda.synchronize() torch.cuda.synchronize()
elapsed_time += time.perf_counter() step_time += time.perf_counter()
print(f"[{i}:{min(i+batch_size, len(prompts))}) Time for model.generate: {elapsed_time:.3f} s") print(
f"[{i}:{min(i + batch_size, len(prompts))}) E2E Time for model.generate: {step_time:.3f} s"
)
for j, decoded_seq in enumerate(generated.sequences): for j, decoded_seq in enumerate(generated.sequences):
score = calculate_sequence_identity(decoded_seq, batch_targets[j]) score = calculate_sequence_identity(decoded_seq, batch_targets[j])
scores.append(score) scores.append(score)
# Reshape scores to group by original sequence
reshaped_scores = [
scores[i : i + generations_per_prompt]
for i in range(0, len(scores), generations_per_prompt)
]
return reshaped_scores
def custom_trace_handler(
dir_name="./log/pt-trace/", sort_by="self_device_time_total", top_n=20
):
tb_handler = torch.profiler.tensorboard_trace_handler(dir_name=dir_name)
field_fallbacks = {
"self_device_time_total": "self_cuda_time_total",
"device_time_total": "cuda_time_total",
"self_cuda_time_total": "self_cpu_time_total",
}
def handler(prof):
tb_handler(prof)
avgs = prof.key_averages()
final_sort_key = sort_by
if len(avgs) > 0:
sample_event = avgs[0]
# fallback
if not hasattr(sample_event, final_sort_key):
fallback_key = field_fallbacks.get(final_sort_key)
if fallback_key and hasattr(sample_event, fallback_key):
print(
f"[PROFILER] '{final_sort_key}' not found. Falling back to '{fallback_key}'."
)
final_sort_key = fallback_key
else:
print(
f"[PROFILER] Sort key '{final_sort_key}' invalid. Using default order."
)
final_sort_key = None
print(avgs.table(sort_by=final_sort_key, row_limit=top_n))
return handler
def generate_and_score_prof(
*,
sequences,
model,
generations_per_prompt=5,
n_tokens=500,
temperature=1.0,
top_k=1,
top_p=1.0,
batch_size=2,
trace_step=1,
):
"""Prompt with first half, generate and score on 2nd half with torch profiler.
Profiler is enabled only for iteration i==1 to capture detailed performance data.
"""
scores = []
prompts = []
targets = []
# Prepare all prompts and targets
for seq in sequences:
prompt, target = mid_point_split(seq=seq, num_tokens=n_tokens)
prompts.extend([prompt] * generations_per_prompt)
targets.extend([target] * generations_per_prompt)
print("\n[TRACE] Start profiling...")
with torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
torch.profiler.ProfilerActivity.CUDA,
],
schedule=torch.profiler.schedule(wait=0, warmup=trace_step, active=1, repeat=1),
on_trace_ready=custom_trace_handler(dir_name="./log/pt-trace/"),
record_shapes=True,
profile_memory=True,
with_stack=True,
with_flops=True,
) as prof:
for i in range(0, len(prompts), batch_size):
batch_prompts = prompts[i : i + batch_size]
batch_targets = targets[i : i + batch_size]
with torch.inference_mode():
torch.cuda.synchronize()
step_time = -time.perf_counter()
generated = model.generate(
prompt_seqs=batch_prompts,
n_tokens=n_tokens,
temperature=temperature,
top_k=top_k,
top_p=top_p,
)
torch.cuda.synchronize()
step_time += time.perf_counter()
print(
f"[{i}:{min(i + batch_size, len(prompts))}) E2E Time for model.generate: {step_time:.3f} s"
)
for j, decoded_seq in enumerate(generated.sequences):
score = calculate_sequence_identity(decoded_seq, batch_targets[j])
scores.append(score)
prof.step()
# Reshape scores to group by original sequence # Reshape scores to group by original sequence
reshaped_scores = [scores[i:i + generations_per_prompt] reshaped_scores = [
for i in range(0, len(scores), generations_per_prompt)] scores[i : i + generations_per_prompt]
for i in range(0, len(scores), generations_per_prompt)
]
return reshaped_scores return reshaped_scores
def main(): def main():
""" """
Test sequence generation and scoring using the evo2 models Test sequence generation and scoring using the evo2 models
Expected results (direct comparison w/o alignment): Expected results (direct comparison w/o alignment):
- Evo 2 40B 1m: 91.15% - Evo 2 40B 1m: 91.15%
- Evo 2 7B 1m: 89.25% - Evo 2 7B 1m: 89.25%
- Evo 2 1B base: 68.0% - Evo 2 1B base: 68.0%
""" """
parser = argparse.ArgumentParser(description="Test Evo2 Model Generation") parser = argparse.ArgumentParser(description="Test Evo2 Model Generation")
parser.add_argument("--model_name", choices=['evo2_7b', 'evo2_40b', 'evo2_1b_base'], default='evo2_7b', parser.add_argument(
help="Model to test (supports evo2_7b, evo2_40b, evo2_1b_base)") "--model_name",
choices=["evo2_7b", "evo2_40b", "evo2_1b_base"],
default="evo2_7b",
help="Model to test (supports evo2_7b, evo2_40b, evo2_1b_base)",
)
parser.add_argument("--local_path", type=str, default=None) parser.add_argument("--local_path", type=str, default=None)
parser.add_argument("--batch_size", type=int, default=1, help="Batch size for generation") parser.add_argument(
"--n_tokens", type=int, default=500, help="Number of tokens to generate"
)
parser.add_argument(
"--batch_size", type=int, default=1, help="Batch size for generation"
)
parser.add_argument(
"--trace",
action="store_true",
help="Enable torch profiler",
)
parser.add_argument(
"--trace_step",
type=int,
default=1,
help="Attach torch profiler to specific step (default: 1)",
)
args = parser.parse_args() args = parser.parse_args()
# Set random seeds # Set random seeds
torch.manual_seed(1) torch.manual_seed(1)
torch.cuda.manual_seed(1) torch.cuda.manual_seed(1)
model = Evo2(args.model_name, local_path=args.local_path) model = Evo2(args.model_name, local_path=args.local_path)
# Test parameters: greedy sampling of 500 tokens # Test parameters: greedy sampling of 500 tokens
test_params = { test_params = {
'n_tokens': 500, "n_tokens": args.n_tokens,
'temperature': 1.0, "temperature": 1.0,
'top_k': 1, "top_k": 1,
'top_p': 1.0, "top_p": 1.0,
'generations_per_prompt': 1, "generations_per_prompt": 1,
'batch_size': args.batch_size, "batch_size": args.batch_size,
} }
# Read and process sequences # Read and process sequences
sequences = read_prompts('prompts.csv') sequences = read_prompts("prompts.csv")
# Debugging: replace all prompts with the longest prompt # Debugging: replace all prompts with the longest prompt
if args.batch_size > 1: if args.batch_size > 1:
longest_prompt = max(sequences, key=len) longest_prompt = max(sequences, key=len)
sequences = [longest_prompt] * len(sequences) sequences = [longest_prompt] * len(sequences)
print(f"[debug] Using longest prompt len={len(longest_prompt)} for all sequences") print(
f"[DEBUG] Using longest prompt len={len(longest_prompt)} for all sequences"
)
if args.trace:
print("[TRACE] Using generate_and_score_prof with torch profiler")
scores = generate_and_score_prof(
sequences=sequences,
model=model,
trace_step=args.trace_step,
**test_params,
)
else:
scores = generate_and_score(sequences=sequences, model=model, **test_params)
scores = generate_and_score(
sequences=sequences,
model=model,
**test_params
)
# Calculate and validate results # Calculate and validate results
mean_score = np.mean(scores) mean_score = np.mean(scores)
print("\nTest Results:") print("\nTest Results:")
print("% Matching Nucleotides:", mean_score) print("% Matching Nucleotides:", mean_score)
# Validate against expected scores # Validate against expected scores
eps = 3 # large epsilon for direct comparison, since there are numeric differences by versions eps = 3 # large epsilon for direct comparison, since there are numeric differences by versions
expected_scores = { expected_scores = {"evo2_40b": 91.15, "evo2_7b": 89.25, "evo2_1b_base": 68.0}
'evo2_40b': 91.15,
'evo2_7b': 89.25,
'evo2_1b_base': 68.0
}
expected_score = expected_scores[args.model_name] expected_score = expected_scores[args.model_name]
if abs(mean_score - expected_score) < eps: if abs(mean_score - expected_score) < eps:
print(f"\nTest Passed! Score matches expected {expected_score}%") print(f"\nTest Passed! Score matches expected {expected_score}%")
else: else:
print(f"\nTest Failed: Expected {expected_score}%, got {mean_score}%") print(f"\nTest Failed: Expected {expected_score}%, got {mean_score}%")
if __name__ == "__main__": if __name__ == "__main__":
main() main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment