Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
qwen2.5-coder_pytorch
Commits
53b3977b
Commit
53b3977b
authored
Jul 11, 2025
by
dongchy920
Browse files
Initial commit
parents
Pipeline
#2841
failed with stages
in 0 seconds
Changes
350
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
892 additions
and
0 deletions
+892
-0
finetuning/sft/utils/multiple_metrics/eval_sh.py
finetuning/sft/utils/multiple_metrics/eval_sh.py
+26
-0
finetuning/sft/utils/multiple_metrics/eval_swift.py
finetuning/sft/utils/multiple_metrics/eval_swift.py
+32
-0
finetuning/sft/utils/multiple_metrics/eval_ts.py
finetuning/sft/utils/multiple_metrics/eval_ts.py
+40
-0
finetuning/sft/utils/multiple_metrics/evaluation.py
finetuning/sft/utils/multiple_metrics/evaluation.py
+90
-0
finetuning/sft/utils/multiple_metrics/generic_eval.py
finetuning/sft/utils/multiple_metrics/generic_eval.py
+155
-0
finetuning/sft/utils/multiple_metrics/install.sh
finetuning/sft/utils/multiple_metrics/install.sh
+24
-0
finetuning/sft/utils/multiple_metrics/javatuples-1.2.jar
finetuning/sft/utils/multiple_metrics/javatuples-1.2.jar
+0
-0
finetuning/sft/utils/multiple_metrics/libeval.py
finetuning/sft/utils/multiple_metrics/libeval.py
+43
-0
finetuning/sft/utils/multiple_metrics/safe_subprocess/.gitkeep
...uning/sft/utils/multiple_metrics/safe_subprocess/.gitkeep
+0
-0
finetuning/sft/utils/multiple_metrics/safe_subprocess/__init__.py
...ng/sft/utils/multiple_metrics/safe_subprocess/__init__.py
+89
-0
finetuning/sft/utils/multiple_metrics/safe_subprocess/evil_programs/.gitkeep
...s/multiple_metrics/safe_subprocess/evil_programs/.gitkeep
+0
-0
finetuning/sft/utils/multiple_metrics/safe_subprocess/evil_programs/block_on_inputs.py
..._metrics/safe_subprocess/evil_programs/block_on_inputs.py
+2
-0
finetuning/sft/utils/multiple_metrics/safe_subprocess/evil_programs/close_outputs.py
...le_metrics/safe_subprocess/evil_programs/close_outputs.py
+7
-0
finetuning/sft/utils/multiple_metrics/safe_subprocess/evil_programs/fork_bomb.py
...ltiple_metrics/safe_subprocess/evil_programs/fork_bomb.py
+4
-0
finetuning/sft/utils/multiple_metrics/safe_subprocess/evil_programs/fork_once.py
...ltiple_metrics/safe_subprocess/evil_programs/fork_once.py
+6
-0
finetuning/sft/utils/multiple_metrics/safe_subprocess/evil_programs/sleep_forever.py
...le_metrics/safe_subprocess/evil_programs/sleep_forever.py
+4
-0
finetuning/sft/utils/multiple_metrics/safe_subprocess/evil_programs/unbounded_output.py
...metrics/safe_subprocess/evil_programs/unbounded_output.py
+4
-0
finetuning/sft/utils/multiple_metrics/safe_subprocess/module_test.py
...sft/utils/multiple_metrics/safe_subprocess/module_test.py
+102
-0
finetuning/sft/utils/multiple_metrics/single_experiment_pass_k.py
...ng/sft/utils/multiple_metrics/single_experiment_pass_k.py
+22
-0
finetuning/sft/utils/training_datasets.py
finetuning/sft/utils/training_datasets.py
+242
-0
No files found.
finetuning/sft/utils/multiple_metrics/eval_sh.py
0 → 100644
View file @
53b3977b
from
pathlib
import
Path
from
.safe_subprocess
import
run
LANG_NAME
=
"bash"
LANG_EXT
=
".sh"
def
eval_script
(
path
:
Path
):
# Capture output - will be generated regardless of success, fail, or syntax error
p
=
run
([
"bash"
,
path
])
if
p
.
timeout
:
status
=
"Timeout"
elif
p
.
exit_code
==
0
:
status
=
"OK"
elif
"syntax error"
in
p
.
stderr
:
status
=
"SyntaxError"
else
:
status
=
"Exception"
return
{
"status"
:
status
,
"exit_code"
:
p
.
exit_code
,
"stdout"
:
p
.
stdout
,
"stderr"
:
p
.
stderr
,
}
finetuning/sft/utils/multiple_metrics/eval_swift.py
0 → 100644
View file @
53b3977b
import
os
import
subprocess
from
pathlib
import
Path
from
.safe_subprocess
import
run
def
eval_script
(
path
:
Path
):
basename
=
"."
.
join
(
str
(
path
).
split
(
"."
)[:
-
1
])
r
=
run
([
"swiftc"
,
path
,
"-o"
,
basename
],
timeout_seconds
=
45
)
if
r
.
timeout
:
status
=
"Timeout"
elif
r
.
exit_code
!=
0
:
# Well, it's a compile error. May be a type error or
# something. But, why break the set convention
status
=
"SyntaxError"
else
:
r
=
run
([
basename
],
timeout_seconds
=
5
)
if
r
.
timeout
:
status
=
"Timeout"
elif
r
.
exit_code
!=
0
:
# Well, it's a panic
status
=
"Exception"
else
:
status
=
"OK"
os
.
remove
(
basename
)
return
{
"status"
:
status
,
"exit_code"
:
r
.
exit_code
,
"stdout"
:
r
.
stdout
,
"stderr"
:
r
.
stderr
,
}
finetuning/sft/utils/multiple_metrics/eval_ts.py
0 → 100644
View file @
53b3977b
from
pathlib
import
Path
import
subprocess
from
.safe_subprocess
import
run
def
eval_script
(
path
:
Path
):
"""
npm install -g typescript
"""
try
:
r
=
run
([
"tsc"
,
"--target"
,
"esnext"
,
str
(
path
)],
timeout_seconds
=
15
)
except
:
subprocess
.
run
(
"npm install -g typescript"
)
if
r
.
exit_code
!=
0
:
return
{
"status"
:
"SyntaxError"
,
"exit_code"
:
r
.
exit_code
,
"stdout"
:
r
.
stdout
,
"stderr"
:
r
.
stderr
,
}
r
=
run
([
"node"
,
str
(
path
).
replace
(
".ts"
,
".js"
)],
timeout_seconds
=
15
)
if
r
.
timeout
:
status
=
"Timeout"
elif
r
.
exit_code
==
0
:
status
=
"OK"
elif
"ERR_ASSERTION"
in
r
.
stderr
:
status
=
"AssertionError"
elif
"SyntaxError"
in
r
.
stderr
:
status
=
"SyntaxError"
elif
"ReferenceError"
in
r
.
stderr
:
status
=
"ReferenceError"
else
:
status
=
"Exception"
return
{
"status"
:
status
,
"exit_code"
:
r
.
exit_code
,
"stdout"
:
r
.
stdout
,
"stderr"
:
r
.
stderr
,
}
finetuning/sft/utils/multiple_metrics/evaluation.py
0 → 100644
View file @
53b3977b
import
json
import
os
import
time
from
concurrent.futures
import
ThreadPoolExecutor
from
pathlib
import
Path
from
threading
import
Lock
from
typing
import
Optional
from
.containerized_eval
import
eval_string_script
# Get working directory
WORKING_DIR
=
Path
(
__file__
).
parent
.
parent
# program: str => Result
CACHE
=
dict
()
CACHE_LOCK
=
Lock
()
def
cache_get
(
program
:
str
)
->
Optional
[
dict
]:
if
program
in
CACHE
:
result
=
CACHE
[
program
]
return
result
else
:
return
None
def
cache_set
(
program
:
str
,
result
:
dict
):
if
program
in
CACHE
:
print
(
"Setting already-existing cache"
)
CACHE
[
program
]
=
result
def
cached_eval_script
(
problem
,
index
)
->
dict
:
# here prompt is already included in completions
program
=
problem
[
"completions"
][
index
]
+
"
\n
"
+
problem
[
"tests"
]
CACHE_LOCK
.
acquire
(
True
)
cached
=
cache_get
(
program
)
if
cached
is
not
None
:
CACHE_LOCK
.
release
()
return
cached
else
:
result_yaml
=
dict
()
cache_set
(
program
,
result_yaml
)
CACHE_LOCK
.
release
()
result_dict
=
eval_string_script
(
problem
[
"language"
],
program
)
for
k
in
result_dict
.
keys
():
result_yaml
[
k
]
=
result_dict
[
k
]
result_yaml
[
"timestamp"
]
=
int
(
time
.
time
())
return
result_yaml
def
get_test_results_json_path
(
output_dir
:
str
,
problem_json_path
:
str
,
input_dir
:
Path
)
->
Path
:
suffixes
=
".results.json"
problem_name
=
problem_json_path
[:
-
len
(
".json"
)]
if
input_dir
:
raise
ValueError
(
"input dir given"
)
return
Path
(
output_dir
)
/
(
problem_json_path
.
relative_to
(
input_dir
).
parent
/
(
problem_name
+
suffixes
)
)
return
Path
(
output_dir
)
/
(
problem_name
+
suffixes
)
def
evaluate_problem
(
output_dir
:
str
,
problem_json_path
:
str
,
max_workers
:
int
,
input_dir
:
Path
=
None
):
with
open
(
problem_json_path
,
"r"
)
as
f
:
problem
=
json
.
load
(
f
)
test_results_path
=
get_test_results_json_path
(
output_dir
,
problem_json_path
,
input_dir
)
test_results_path
.
parent
.
mkdir
(
mode
=
0o755
,
parents
=
True
,
exist_ok
=
True
)
test_results
=
problem
.
copy
()
del
test_results
[
"completions"
]
test_results
[
"results"
]
=
[]
num_problems
=
len
(
problem
[
"completions"
])
min_problem
=
len
(
test_results
[
"results"
])
with
ThreadPoolExecutor
(
max_workers
=
max_workers
)
as
executor
:
for
j
in
executor
.
map
(
lambda
index
:
cached_eval_script
(
problem
,
index
),
range
(
min_problem
,
num_problems
),
):
test_results
[
"results"
].
append
(
j
)
test_results
[
"results"
][
-
1
].
update
(
problem
)
with
open
(
test_results_path
,
"w"
)
as
f
:
f
.
write
(
json
.
dumps
(
test_results
,
indent
=
2
))
finetuning/sft/utils/multiple_metrics/generic_eval.py
0 → 100644
View file @
53b3977b
# This is a helper script for evaluating benchmarks that have been translated to
# different languages.
#
# To use this script, call eval_lang.py.
# The --directory argument is required, and tells the script where the benchmarks are located.
# The --files argument is optional, and takes a list of numbers corresponding to the files to be evaluated.
#
# The script will print the results on each benchmark, and also write to results/lang.csv.
# When the script completes, it will print a summary.
#
# Examples
#
# To run the entire benchmark suite:
# python3 src/eval_php.py --directory datasets/php-keep-code_davinci_001_temp_0.2-0/
#
# To run benchmarks 1, 2, and 3:
# python3 src/eval_php.py --directory datasets/php-keep-code_davinci_001_temp_0.2-0/ --files 1 2 3
import
argparse
import
sys
from
pathlib
import
Path
from
sys
import
exit
as
sysexit
def
list_files
(
directory
,
ext
):
files_unsorted
=
directory
.
glob
(
f
"HumanEval_*
{
ext
}
"
)
# assumption: base filenames are in the format of HumanEval_X_*
# Where X is a valid number
def
key
(
s
):
return
int
(
str
(
s
.
name
).
split
(
"_"
)[
1
])
files_sorted
=
sorted
(
files_unsorted
,
key
=
(
lambda
s
:
key
(
s
)))
# assumption: there may be missing files, but no extra files
# so we build files_array where the index corresponds to the file's number,
# and a missing file is represented by None
size
=
key
(
files_sorted
[
-
1
])
+
1
files_array
=
[
None
]
*
size
for
f
in
files_sorted
:
k
=
key
(
f
)
files_array
[
k
]
=
f
return
files_array
def
main
(
eval_script
,
language
,
extension
):
args
=
argparse
.
ArgumentParser
()
args
.
add_argument
(
"--directory"
,
type
=
str
,
required
=
True
,
help
=
"Directory to read benchmarks from"
)
args
.
add_argument
(
"--files"
,
type
=
int
,
nargs
=
"*"
,
default
=
[],
help
=
"Specify the benchmarks to evaluate by their number, e.g. --files 0 1 2"
,
)
args
=
args
.
parse_args
()
directory
=
Path
(
args
.
directory
).
resolve
()
files_sorted
=
list_files
(
directory
,
extension
)
# the directory you specified does not contain the right language
if
len
(
files_sorted
)
==
0
:
print
(
f
"The specified directory does not contain files of type
{
extension
}
"
)
sysexit
(
1
)
files_index
=
[]
if
len
(
args
.
files
)
>
0
:
files_index
=
args
.
files
else
:
files_index
=
range
(
len
(
files_sorted
))
total
=
0
passed
=
0
syntax_error
=
0
results_file
=
Path
(
Path
(
__file__
).
parent
,
".."
,
"results"
,
language
.
lower
()
+
".csv"
).
resolve
()
with
open
(
results_file
,
"w"
)
as
f
:
for
i
in
files_index
:
filepath
=
files_sorted
[
i
]
if
filepath
is
None
:
print
(
"File {} does not exist!"
.
format
(
i
))
continue
res
=
eval_script
(
filepath
)
output
=
f
"
{
language
}
,
{
filepath
.
stem
}
,
{
res
[
'status'
]
}
\n
"
f
.
write
(
output
)
print
(
output
,
end
=
""
)
total
+=
1
if
res
[
"status"
]
==
"OK"
:
passed
+=
1
elif
res
[
"status"
]
==
"SyntaxError"
:
syntax_error
+=
1
print
(
f
"Total
{
total
}
, Syntax Error
{
syntax_error
}
, Passed
{
passed
}
"
)
def
main_check_stubs
(
check_script
,
language
,
extension
):
args
=
argparse
.
ArgumentParser
()
args
.
add_argument
(
"--directory"
,
type
=
str
,
required
=
True
,
help
=
"Directory to read benchmarks from"
)
args
.
add_argument
(
"--files"
,
type
=
int
,
nargs
=
"*"
,
default
=
[],
help
=
"Specify the benchmarks to evaluate by their number, e.g. --files 0 1 2"
,
)
args
=
args
.
parse_args
()
directory
=
Path
(
args
.
directory
).
resolve
()
files_sorted
=
list_files
(
directory
,
extension
)
# the directory you specified does not contain the right language
if
len
(
files_sorted
)
==
0
:
print
(
f
"The specified directory does not contain files of type
{
extension
}
"
)
sysexit
(
1
)
files_index
=
[]
if
len
(
args
.
files
)
>
0
:
files_index
=
args
.
files
else
:
files_index
=
range
(
len
(
files_sorted
))
total
=
0
passed
=
0
results_file
=
Path
(
Path
(
__file__
).
parent
,
".."
,
"check_results"
,
language
.
lower
()
+
".csv"
).
resolve
()
with
open
(
results_file
,
"w"
)
as
f
:
for
i
in
files_index
:
filepath
=
files_sorted
[
i
]
if
filepath
is
None
:
print
(
"File {} does not exist!"
.
format
(
i
))
continue
res
=
check_script
(
filepath
)
output
=
f
"
{
language
}
,
{
filepath
.
stem
}
,
{
res
[
'status'
]
}
\n
"
f
.
write
(
output
)
print
(
output
,
end
=
""
)
total
+=
1
if
res
[
"status"
]
==
"OK"
:
passed
+=
1
print
(
f
"Total
{
total
}
, Passed
{
passed
}
"
)
if
total
!=
passed
:
sys
.
exit
(
1
)
finetuning/sft/utils/multiple_metrics/install.sh
0 → 100644
View file @
53b3977b
sudo
apt-get update
#install c-sharp environment
sudo
apt-get
install
mono-runtime mono-complete
-y
#install php
sudo
apt-get
install
php
-y
#install javascript
#sudo apt-get install nodejs -y
#install typescript
mkdir
-p
/usr/lib/npm/
tar
-xzf
zips/node-v16.14.0-linux-x64.tar.gz
-C
/usr/lib/npm/
export
PATH
=
$PATH
:/usr/lib/npm/node-v16.14.0-linux-x64/bin/
npm
install
-g
typescript
#install java
mkdir
-p
/usr/lib/jvm
sudo tar
-xzf
zips/openlogic-openjdk-8u412-b08-linux-x64.tar.gz
-C
/usr/lib/jvm
export
JAVA_HOME
=
/usr/lib/jvm/openlogic-openjdk-8u412-b08-linux-x64
export
PATH
=
$JAVA_HOME
/bin:
$PATH
#c++
#sudo apt-get install libboost-all-dev -y
cd
zips/boost_1_76_0
./bootstrap.sh
--prefix
=
/usr/local
sudo
./b2
install
export
LD_LIBRARY_PATH
=
/usr/local/lib:
$LD_LIBRARY_PATH
\ No newline at end of file
finetuning/sft/utils/multiple_metrics/javatuples-1.2.jar
0 → 100644
View file @
53b3977b
File added
finetuning/sft/utils/multiple_metrics/libeval.py
0 → 100644
View file @
53b3977b
import
os
import
signal
import
subprocess
from
typing
import
List
from
.
import
generic_eval
def
testing_mail
(
x
,
y
,
z
):
generic_eval
.
gmain
(
x
,
y
,
z
)
def
run_without_exn
(
args
:
List
[
str
]):
"""
Runs the given program with a five second timeout. Does not throw an exception
no matter what happens. The output is a dictionary of the format that we expect
for our evaluation scripts. The "status" field is "OK" when the exit code is
zero. If that isn't enough, you may want to tweak the status based on the
captured stderr and stdout.
"""
p
=
subprocess
.
Popen
(
args
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
start_new_session
=
True
)
try
:
stdout
,
stderr
=
p
.
communicate
(
timeout
=
5
)
exit_code
=
p
.
returncode
status
=
"OK"
if
exit_code
==
0
else
"Exception"
except
subprocess
.
TimeoutExpired
as
exc
:
stdout
,
stderr
=
p
.
stdout
.
read
(),
p
.
stderr
.
read
()
os
.
killpg
(
os
.
getpgid
(
p
.
pid
),
signal
.
SIGTERM
)
exit_code
=
-
1
status
=
"Timeout"
if
stdout
is
None
:
stdout
=
b
""
if
stderr
is
None
:
stderr
=
b
""
return
{
"status"
:
status
,
"exit_code"
:
exit_code
,
"stdout"
:
stdout
.
decode
(
"utf-8"
,
errors
=
"ignore"
),
"stderr"
:
stderr
.
decode
(
"utf-8"
,
errors
=
"ignore"
),
}
finetuning/sft/utils/multiple_metrics/safe_subprocess/.gitkeep
0 → 100644
View file @
53b3977b
finetuning/sft/utils/multiple_metrics/safe_subprocess/__init__.py
0 → 100644
View file @
53b3977b
import
fcntl
import
os
import
signal
import
subprocess
import
time
from
typing
import
List
MAX_BYTES_PER_READ
=
1024
SLEEP_BETWEEN_READS
=
0.1
class
Result
:
timeout
:
int
exit_code
:
int
stdout
:
str
stderr
:
str
def
__init__
(
self
,
timeout
,
exit_code
,
stdout
,
stderr
):
self
.
timeout
=
timeout
self
.
exit_code
=
exit_code
self
.
stdout
=
stdout
self
.
stderr
=
stderr
def
set_nonblocking
(
reader
):
fd
=
reader
.
fileno
()
fl
=
fcntl
.
fcntl
(
fd
,
fcntl
.
F_GETFL
)
fcntl
.
fcntl
(
fd
,
fcntl
.
F_SETFL
,
fl
|
os
.
O_NONBLOCK
)
def
run
(
args
:
List
[
str
],
timeout_seconds
:
int
=
15
,
max_output_size
:
int
=
2048
,
env
=
None
,
)
->
Result
:
"""
Runs the given program with arguments. After the timeout elapses, kills the process
and all other processes in the process group. Captures at most max_output_size bytes
of stdout and stderr each, and discards any output beyond that.
"""
p
=
subprocess
.
Popen
(
args
,
env
=
env
,
stdin
=
subprocess
.
DEVNULL
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
start_new_session
=
True
,
bufsize
=
MAX_BYTES_PER_READ
,
)
set_nonblocking
(
p
.
stdout
)
set_nonblocking
(
p
.
stderr
)
process_group_id
=
os
.
getpgid
(
p
.
pid
)
# We sleep for 0.1 seconds in each iteration.
max_iterations
=
timeout_seconds
*
10
stdout_saved_bytes
=
[]
stderr_saved_bytes
=
[]
stdout_bytes_read
=
0
stderr_bytes_read
=
0
for
_
in
range
(
max_iterations
):
this_stdout_read
=
p
.
stdout
.
read
(
MAX_BYTES_PER_READ
)
this_stderr_read
=
p
.
stderr
.
read
(
MAX_BYTES_PER_READ
)
# this_stdout_read and this_stderr_read may be None if stdout or stderr
# are closed. Without these checks, test_close_output fails.
if
this_stdout_read
is
not
None
and
stdout_bytes_read
<
max_output_size
:
stdout_saved_bytes
.
append
(
this_stdout_read
)
stdout_bytes_read
+=
len
(
this_stdout_read
)
if
this_stderr_read
is
not
None
and
stderr_bytes_read
<
max_output_size
:
stderr_saved_bytes
.
append
(
this_stderr_read
)
stderr_bytes_read
+=
len
(
this_stderr_read
)
exit_code
=
p
.
poll
()
if
exit_code
is
not
None
:
break
time
.
sleep
(
SLEEP_BETWEEN_READS
)
try
:
# Kills the process group. Without this line, test_fork_once fails.
os
.
killpg
(
process_group_id
,
signal
.
SIGKILL
)
except
ProcessLookupError
:
pass
timeout
=
exit_code
is
None
exit_code
=
exit_code
if
exit_code
is
not
None
else
-
1
stdout
=
b
""
.
join
(
stdout_saved_bytes
).
decode
(
"utf-8"
,
errors
=
"ignore"
)
stderr
=
b
""
.
join
(
stderr_saved_bytes
).
decode
(
"utf-8"
,
errors
=
"ignore"
)
return
Result
(
timeout
=
timeout
,
exit_code
=
exit_code
,
stdout
=
stdout
,
stderr
=
stderr
)
finetuning/sft/utils/multiple_metrics/safe_subprocess/evil_programs/.gitkeep
0 → 100644
View file @
53b3977b
finetuning/sft/utils/multiple_metrics/safe_subprocess/evil_programs/block_on_inputs.py
0 → 100644
View file @
53b3977b
while
True
:
input
()
finetuning/sft/utils/multiple_metrics/safe_subprocess/evil_programs/close_outputs.py
0 → 100644
View file @
53b3977b
import
sys
print
(
"This is the end"
)
sys
.
stdout
.
close
()
sys
.
stderr
.
close
()
while
True
:
pass
finetuning/sft/utils/multiple_metrics/safe_subprocess/evil_programs/fork_bomb.py
0 → 100644
View file @
53b3977b
import
os
while
True
:
os
.
fork
()
finetuning/sft/utils/multiple_metrics/safe_subprocess/evil_programs/fork_once.py
0 → 100644
View file @
53b3977b
import
os
import
time
if
os
.
fork
()
==
0
:
while
True
:
time
.
sleep
(
60
)
finetuning/sft/utils/multiple_metrics/safe_subprocess/evil_programs/sleep_forever.py
0 → 100644
View file @
53b3977b
import
time
while
True
:
time
.
sleep
(
60
)
finetuning/sft/utils/multiple_metrics/safe_subprocess/evil_programs/unbounded_output.py
0 → 100644
View file @
53b3977b
b
=
True
while
True
:
print
(
b
)
b
=
not
b
finetuning/sft/utils/multiple_metrics/safe_subprocess/module_test.py
0 → 100644
View file @
53b3977b
import
time
from
pathlib
import
Path
from
safe_subprocess
import
run
ROOT
=
Path
(
__file__
).
resolve
().
parent
/
"evil_programs"
def
assert_no_running_evil
():
result
=
run
([
"pgrep"
,
"-f"
,
ROOT
],
timeout_seconds
=
1
,
max_output_size
=
1024
)
assert
(
result
.
exit_code
==
1
),
f
"There are still evil processes running:
{
result
.
stdout
}
"
assert
len
(
result
.
stderr
)
==
0
assert
len
(
result
.
stdout
)
==
0
def
test_fork_once
():
# The program exits cleanly and immediately. But, it forks a child that runs
# forever.
result
=
run
(
[
"python3"
,
ROOT
/
"fork_once.py"
],
timeout_seconds
=
2
,
max_output_size
=
1024
,
)
assert
result
.
exit_code
==
0
assert
result
.
timeout
==
False
assert
len
(
result
.
stderr
)
==
0
assert
len
(
result
.
stdout
)
==
0
assert_no_running_evil
()
def
test_close_outputs
():
# The program prints to stdout, closes its output, and then runs forever.
result
=
run
(
[
"python3"
,
ROOT
/
"close_outputs.py"
],
timeout_seconds
=
2
,
max_output_size
=
1024
,
)
assert
result
.
exit_code
==
-
1
assert
result
.
timeout
==
True
assert
len
(
result
.
stderr
)
==
0
assert
result
.
stdout
==
"This is the end
\n
"
assert_no_running_evil
()
def
test_unbounded_output
():
result
=
run
(
[
"python3"
,
ROOT
/
"unbounded_output.py"
],
timeout_seconds
=
3
,
max_output_size
=
1024
,
)
assert
result
.
exit_code
==
-
1
assert
result
.
timeout
==
True
assert
len
(
result
.
stderr
)
==
0
assert
len
(
result
.
stdout
)
==
1024
assert_no_running_evil
()
def
test_sleep_forever
():
result
=
run
(
[
"python3"
,
ROOT
/
"sleep_forever.py"
],
timeout_seconds
=
2
,
max_output_size
=
1024
,
)
assert
result
.
exit_code
==
-
1
assert
result
.
timeout
==
True
assert
len
(
result
.
stderr
)
==
0
assert
len
(
result
.
stdout
)
==
0
assert_no_running_evil
()
def
test_fork_bomb
():
result
=
run
(
[
"python3"
,
ROOT
/
"fork_bomb.py"
],
timeout_seconds
=
2
,
max_output_size
=
1024
,
)
assert
result
.
exit_code
==
-
1
assert
result
.
timeout
==
True
assert
len
(
result
.
stderr
)
==
0
assert
len
(
result
.
stdout
)
==
0
# Unfortunately, this sleep seems to be necessary. My theories:
# 1. os.killpg doesn't block until the whole process group is dead.
# 2. pgrep can produce stale output
time
.
sleep
(
2
)
assert_no_running_evil
()
def
test_block_on_inputs
():
# We run the subprocess with /dev/null as input. So, any program that tries
# to read input will error.
result
=
run
(
[
"python3"
,
ROOT
/
"block_on_inputs.py"
],
timeout_seconds
=
2
,
max_output_size
=
1024
,
)
assert
result
.
exit_code
==
1
assert
result
.
timeout
==
False
assert
len
(
result
.
stdout
)
==
0
assert
"EOF when reading a line"
in
result
.
stderr
assert_no_running_evil
()
finetuning/sft/utils/multiple_metrics/single_experiment_pass_k.py
0 → 100644
View file @
53b3977b
import
json
import
numpy
as
np
def
estimator
(
n
:
int
,
c
:
int
,
k
:
int
)
->
float
:
"""
Calculates 1 - comb(n - c, k) / comb(n, k).
"""
if
n
-
c
<
k
:
return
1.0
return
1.0
-
np
.
prod
(
1.0
-
k
/
np
.
arange
(
n
-
c
+
1
,
n
+
1
))
def
for_file
(
path
):
with
open
(
path
,
"r"
)
as
f
:
data
=
json
.
load
(
f
)
n
=
len
(
data
[
"results"
])
c
=
len
(
[
True
for
r
in
data
[
"results"
]
if
r
[
"status"
]
==
"OK"
and
r
[
"exit_code"
]
==
0
]
)
return
np
.
array
([
estimator
(
n
,
c
,
1
),
estimator
(
n
,
c
,
10
),
estimator
(
n
,
c
,
100
)])
finetuning/sft/utils/training_datasets.py
0 → 100644
View file @
53b3977b
import
json
import
torch
import
random
import
os
from
torch.utils.data
import
IterableDataset
,
Dataset
from
typing
import
Dict
,
Optional
,
Sequence
import
transformers
import
logging
import
numpy
as
np
import
utils
logging
.
basicConfig
(
level
=
logging
.
DEBUG
)
class
SupervisedDataset
(
Dataset
):
"""Dataset for supervised fine-tuning."""
def
__init__
(
self
,
data_path
:
str
,
tokenizer
:
transformers
.
PreTrainedTokenizer
,
args
):
super
(
SupervisedDataset
,
self
).
__init__
()
logging
.
warning
(
"Loading data..."
)
if
data_path
.
endswith
(
".npy"
):
self
.
input_ids
=
np
.
load
(
data_path
,
allow_pickle
=
True
)
else
:
self
.
input_ids
=
utils
.
read_jsonl_file
(
data_path
)
original_data_num
=
len
(
self
.
input_ids
)
logging
.
info
(
"Completely Loading tokenized sentences..."
)
def
truncate
(
sentence
):
return
torch
.
tensor
(
sentence
[:
args
.
model_max_length
]
+
[
tokenizer
.
eos_token_id
]
if
len
(
sentence
)
>
args
.
model_max_length
else
sentence
,
dtype
=
torch
.
long
)
if
args
.
truncate_source
:
self
.
labels
=
[
truncate
(
example
[
"label"
])
for
example
in
self
.
input_ids
]
self
.
input_ids
=
[
truncate
(
example
[
"input_ids"
])
for
example
in
self
.
input_ids
]
else
:
self
.
labels
=
[
torch
.
tensor
(
example
[
"label"
],
dtype
=
torch
.
long
)
for
example
in
self
.
input_ids
if
len
(
example
[
"input_ids"
])
<
args
.
model_max_length
]
self
.
input_ids
=
[
torch
.
tensor
(
example
[
"input_ids"
],
dtype
=
torch
.
long
)
for
example
in
self
.
input_ids
if
len
(
example
[
"input_ids"
])
<
args
.
model_max_length
]
print
(
f
"Samples:
{
original_data_num
}
->
{
len
(
self
.
input_ids
)
}
"
)
def
__len__
(
self
):
return
len
(
self
.
input_ids
)
def
__getitem__
(
self
,
i
)
->
Dict
[
str
,
torch
.
Tensor
]:
return
dict
(
input_ids
=
self
.
input_ids
[
i
],
labels
=
self
.
labels
[
i
])
class
MMAPSupervisedDataset
(
Dataset
):
"""Dataset for supervised fine-tuning."""
def
__init__
(
self
,
data_path
:
str
,
tokenizer
:
transformers
.
PreTrainedTokenizer
,
args
):
super
(
Dataset
,
self
).
__init__
()
logging
.
warning
(
"Loading data..."
)
input_ids_path
=
data_path
labels_path
=
data_path
.
replace
(
".input_ids.mmap"
,
".labels.mmap"
)
lengths_path
=
data_path
.
replace
(
".input_ids.mmap"
,
".lengths.mmap"
)
input_ids_shape_path
=
input_ids_path
+
".shape.json"
labels_shape_path
=
labels_path
+
".shape.json"
lengths_shape_path
=
lengths_path
+
".shape.json"
with
open
(
input_ids_shape_path
,
'r'
)
as
f
:
input_ids_shape_info
=
json
.
load
(
f
)
with
open
(
labels_shape_path
,
'r'
)
as
f
:
labels_shape_info
=
json
.
load
(
f
)
with
open
(
lengths_shape_path
,
'r'
)
as
f
:
lengths_shape_info
=
json
.
load
(
f
)
self
.
input_ids
=
np
.
memmap
(
input_ids_path
,
dtype
=
np
.
int32
,
mode
=
'r'
,
shape
=
(
input_ids_shape_info
[
'n_samples'
],
input_ids_shape_info
[
'max_len'
])
)
self
.
labels
=
np
.
memmap
(
labels_path
,
dtype
=
np
.
int32
,
mode
=
'r'
,
shape
=
(
labels_shape_info
[
'n_samples'
],
labels_shape_info
[
'max_len'
])
)
self
.
lengths
=
np
.
memmap
(
lengths_path
,
dtype
=
np
.
int32
,
mode
=
'r'
,
shape
=
(
lengths_shape_info
[
'n_samples'
],
lengths_shape_info
[
'max_len'
])
)
logging
.
info
(
f
"Loaded
{
len
(
self
.
input_ids
)
}
samples using mmap"
)
def
__len__
(
self
):
return
len
(
self
.
input_ids
)
def
__getitem__
(
self
,
i
)
->
Dict
[
str
,
torch
.
Tensor
]:
length
=
int
(
self
.
lengths
[
i
])
input_ids
=
self
.
input_ids
[
i
][:
self
.
lengths
[
i
]]
return
dict
(
input_ids
=
input_ids
,
labels
=
self
.
labels
[
i
])
class
BufferedJsonlDataset
(
IterableDataset
):
def
__init__
(
self
,
data_path
:
str
,
buffer_size
:
int
=
1000
,
# 缓冲区大小
seed
:
Optional
[
int
]
=
None
,
shuffle
:
bool
=
True
):
super
().
__init__
()
self
.
data_path
=
data_path
self
.
buffer_size
=
buffer_size
self
.
shuffle
=
shuffle
self
.
seed
=
seed
self
.
file_size
=
os
.
path
.
getsize
(
data_path
)
logging
.
info
(
f
"Reading from
{
data_path
}
:
{
len
(
self
.
file_size
)
}
"
)
def
__iter__
(
self
):
worker_info
=
torch
.
utils
.
data
.
get_worker_info
()
if
self
.
seed
is
not
None
:
random_seed
=
self
.
seed
if
worker_info
is
not
None
:
random_seed
+=
worker_info
.
id
np
.
random
.
seed
(
random_seed
)
start_pos
=
np
.
random
.
randint
(
0
,
self
.
file_size
)
if
self
.
shuffle
else
0
buffer
=
[]
with
open
(
self
.
data_path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
while
True
:
if
not
buffer
:
f
.
seek
(
start_pos
)
partial_line
=
f
.
readline
()
if
not
partial_line
:
# 如果到达文件末尾,从头开始
f
.
seek
(
0
)
partial_line
=
f
.
readline
()
buffer
=
[]
for
_
in
range
(
self
.
buffer_size
):
line
=
f
.
readline
()
if
not
line
:
f
.
seek
(
0
)
line
=
f
.
readline
()
try
:
data
=
json
.
loads
(
line
.
strip
())
if
"input_ids"
in
data
:
buffer
.
append
(
data
[
"input_ids"
])
except
json
.
JSONDecodeError
:
logging
.
info
(
"Invalid json line"
)
continue
if
self
.
shuffle
:
random
.
shuffle
(
buffer
)
if
buffer
:
yield
buffer
.
pop
()
else
:
break
def
__len__
(
self
):
return
self
.
file_size
import
json
import
mmap
import
os
import
torch
import
random
from
torch.utils.data
import
IterableDataset
class
JSONLDataset
(
IterableDataset
):
def
__init__
(
self
,
data_path
,
buffer_size
=
1000
):
"""
Args:
data_path: jsonl文件路径
buffer_size: 缓存大小
"""
super
().
__init__
()
self
.
data_path
=
data_path
self
.
buffer_size
=
buffer_size
self
.
file_size
=
os
.
path
.
getsize
(
data_path
)
def
get_random_start_pos
(
self
,
mm
):
"""获取随机起始位置"""
# 随机选择一个文件位置
pos
=
random
.
randint
(
0
,
self
.
file_size
-
1
)
# 调整到最近的行首
while
pos
>
0
and
mm
[
pos
-
1
]
!=
ord
(
'
\n
'
):
pos
-=
1
return
pos
def
read_lines
(
self
,
mm
,
start_pos
):
"""从指定位置读取数据"""
buffer
=
[]
current_pos
=
start_pos
while
len
(
buffer
)
<
self
.
buffer_size
and
current_pos
<
self
.
file_size
:
line_start
=
current_pos
# 找到行尾
while
current_pos
<
self
.
file_size
and
mm
[
current_pos
]
!=
ord
(
'
\n
'
):
current_pos
+=
1
if
current_pos
<
self
.
file_size
:
line
=
mm
[
line_start
:
current_pos
].
decode
(
'utf-8'
)
try
:
data
=
json
.
loads
(
line
)
if
"input_ids"
in
data
:
buffer
.
append
(
data
[
"input_ids"
])
except
json
.
JSONDecodeError
:
pass
# 跳过无效的JSON行
current_pos
+=
1
# 跳过换行符
return
buffer
,
current_pos
def
__iter__
(
self
):
worker_info
=
torch
.
utils
.
data
.
get_worker_info
()
with
open
(
self
.
data_path
,
'rb'
)
as
f
:
mm
=
mmap
.
mmap
(
f
.
fileno
(),
0
,
access
=
mmap
.
ACCESS_READ
)
if
worker_info
is
not
None
:
start_pos
=
self
.
get_random_start_pos
(
mm
)
else
:
start_pos
=
0
current_pos
=
start_pos
while
True
:
buffer
,
next_pos
=
self
.
read_lines
(
mm
,
current_pos
)
if
not
buffer
and
next_pos
>=
self
.
file_size
:
current_pos
=
0
continue
elif
not
buffer
:
current_pos
=
next_pos
continue
random
.
shuffle
(
buffer
)
for
item
in
buffer
:
yield
torch
.
tensor
(
item
)
current_pos
=
next_pos
def
__len__
(
self
):
return
int
(
self
.
file_size
/
100
)
# 假设每行平均100字节
if
__name__
==
"__main__"
:
from
torch.utils.data
import
DataLoader
dataset
=
BufferedJsonlDataset
(
data_path
=
"path/to/your/large.jsonl"
,
buffer_size
=
1000
,
seed
=
42
,
shuffle
=
True
)
dataloader
=
DataLoader
(
dataset
,
batch_size
=
32
,
num_workers
=
4
,
pin_memory
=
True
)
for
batch
in
dataloader
:
pass
Prev
1
…
13
14
15
16
17
18
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment