# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
importitertools
importrandom
fromfunctoolsimportcache
importdatasets
importwonderwords
fromtqdmimporttqdm
fromtransformersimportAutoTokenizer
fromlm_eval.tasks.ruler.utilsimportSEQ_LENGTHS
RNG=random.Random(42)
TEMPLATE=""
TEMPLATE="Below is a numbered list of words. In these words, some appear more often than others. Memorize the ones that appear most often.\n{context}\nQuestion: What are the 10 most common words in the above list?\n\nAnswer: The top 10 words that appear most often in the list are:"
r=wonderwords.RandomWord()
WORDS=sorted(
list(
...
...
@@ -74,12 +80,12 @@ def generate_input_output(
defsys_word_pair_random(
num_samples:int,
max_seq_length:int,
TOKENIZER=None,
tokenizer=None,
incremental:int=10,
remove_newline_tab=False,
tokens_to_generate=120,
tokens_to_generate=30,
):
assertTOKENIZERisnotNone,"Tokenizer is not provided."
asserttokenizerisnotNone,"Tokenizer is not provided."