Commit ccf4a58a authored by Baber's avatar Baber
Browse files

add ruler

parent 527a4352
......@@ -108,21 +108,12 @@ async def get_essays() -> Dict[str, str]:
files_repo = sorted(glob.glob(os.path.join(temp_folder_repo, "*.txt")))
files_html = sorted(glob.glob(os.path.join(temp_folder_html, "*.txt")))
# print(
# f"Downloaded {len(files_repo)} essays from `https://github.com/gkamradt/LLMTest_NeedleInAHaystack/`"
# )
# print(f"Downloaded {len(files_html)} essays from `http://www.paulgraham.com/`")
# Combine all texts
text = ""
for file in files_repo + files_html:
with open(file, "r", encoding="utf-8") as f:
text += f.read()
# Cleanup
# shutil.rmtree(temp_folder_repo)
# shutil.rmtree(temp_folder_html)
return {"text": text}
......
......@@ -108,7 +108,6 @@ def sys_kwext(
incremental=input_max_len // 32,
alpha=alpha,
)
# print("num_example_words:", num_example_words)
# Generate samples
for index in tqdm(
range(num_samples), desc=f"Generating FWE Samples | {max_seq_length}"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment