Commit 40f9dc14 authored by Baber's avatar Baber
Browse files

get essays from HF

parent 5f2dae76
...@@ -19,6 +19,7 @@ import re ...@@ -19,6 +19,7 @@ import re
import uuid import uuid
from functools import lru_cache, cache from functools import lru_cache, cache
from typing import List, Union, Literal from typing import List, Union, Literal
import datasets
import numpy as np import numpy as np
import wonderwords import wonderwords
...@@ -326,8 +327,8 @@ def get_haystack( ...@@ -326,8 +327,8 @@ def get_haystack(
) -> Union[list[str], str]: ) -> Union[list[str], str]:
NEEDLE = "One of the special magic {type_needle_v} for {key} is: {value}." NEEDLE = "One of the special magic {type_needle_v} for {key} is: {value}."
if type_haystack == "essay": if type_haystack == "essay":
essay = get_all_essays()["text"] essay = datasets.load_dataset("baber/paul_graham_essays", split="train")["text"]
# essay = json.load(open(essay))["text"] essay = " ".join(essay)
haystack = re.sub(r"\s+", " ", essay).split(" ") haystack = re.sub(r"\s+", " ", essay).split(" ")
elif type_haystack == "repeat": elif type_haystack == "repeat":
haystack = "The grass is green. The sky is blue. The sun is yellow. Here we go. There and back again." haystack = "The grass is green. The sky is blue. The sun is yellow. Here we go. There and back again."
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment