Commit c69f6c38 authored by cardy20's avatar cardy20
Browse files

conflict solved

parent 18c0fa29
export PYTHONPATH=$PWD
python3 scripts/clean_training_data/generate_13_grams.py \
-dir /fsx/polyglot/massivetext_large_data/ \
-sdir /fsx/lime12/ngram_train2/ -n 13 -buckets 500
export PYTHONPATH=$PWD
python3 scripts/clean_training_data/generate_13_grams.py \
-dir /fsx/kevinai/data/ko/merged_raw/ \
-sdir /fsx/lime12/ngram_merged_raw -n 13 -buckets 500
\ No newline at end of file
......@@ -50,18 +50,24 @@ from . import blimp
from . import asdiv
from . import gsm8k
from . import storycloze
<<<<<<< HEAD
from . import kobest
from . import nsmc
from . import klue
from . import ko_translation
from . import korquad
from . import korunsmile
from . import kohatespeech
=======
from . import kold
from . import toxigen
from . import crowspairs
from . import json
from . import xcopa
from . import bigbench
>>>>>>> 0542d35d5e56768dd9041ef9b88b90256970d843
from . import xstorycloze
from . import xwinograd
from . import pawsx
from . import xnli
from . import mgsm
########################################
# Translation tasks
......@@ -113,15 +119,6 @@ TASK_REGISTRY = {
"record": superglue.ReCoRD,
"wic": superglue.WordsInContext,
"wsc": superglue.SGWinogradSchemaChallenge,
<<<<<<< HEAD
# Order by benchmark/genre?
"coqa": coqa.CoQA,
"drop": drop.DROP,
"lambada": lambada.LAMBADA,
"lambada_cloze": lambada_cloze.LAMBADA_cloze,
=======
# Order by benchmark/genre?
"coqa": coqa.CoQA,
"drop": drop.DROP,
......@@ -129,7 +126,6 @@ TASK_REGISTRY = {
"lambada_standard": lambada.LambadaStandard,
"lambada_openai_cloze": lambada_cloze.LambadaOpenAICloze,
"lambada_standard_cloze": lambada_cloze.LambadaStandardCloze,
>>>>>>> 0542d35d5e56768dd9041ef9b88b90256970d843
# multilingual lambada
**lambada_multilingual.construct_tasks(),
"wikitext": wikitext.WikiText,
......@@ -235,10 +231,6 @@ TASK_REGISTRY = {
"pile_ubuntu-irc": pile.PileUbuntuIrc,
"pile_wikipedia": pile.PileWikipedia,
"pile_youtubesubtitles": pile.PileYoutubeSubtitles,
<<<<<<< HEAD
=======
>>>>>>> 0542d35d5e56768dd9041ef9b88b90256970d843
# BLiMP
"blimp_adjunct_island": blimp.BlimpAdjunctIsland,
"blimp_anaphor_gender_agreement": blimp.BlimpAnaphorGenderAgreement,
......@@ -307,8 +299,6 @@ TASK_REGISTRY = {
"blimp_wh_vs_that_no_gap_long_distance": blimp.BlimpWhVsThatNoGapLongDistance,
"blimp_wh_vs_that_with_gap": blimp.BlimpWhVsThatWithGap,
"blimp_wh_vs_that_with_gap_long_distance": blimp.BlimpWhVsThatWithGapLongDistance,
<<<<<<< HEAD
=======
"toxigen": toxigen.ToxiGen,
"crows_pairs_english": crowspairs.CrowsPairsEnglish,
"crows_pairs_english_race_color": crowspairs.CrowsPairsEnglishRaceColor,
......@@ -332,16 +322,16 @@ TASK_REGISTRY = {
"crows_pairs_french_nationality": crowspairs.CrowsPairsFrenchNationality,
"crows_pairs_french_physical_appearance": crowspairs.CrowsPairsFrenchPhysicalAppearance,
"crows_pairs_french_autre": crowspairs.CrowsPairsFrenchAutre,
>>>>>>> 0542d35d5e56768dd9041ef9b88b90256970d843
# Requires manual download of data.
# "storycloze_2016": storycloze.StoryCloze2016,
# "storycloze_2018": storycloze.StoryCloze2018,
# "sat": sat.SATAnalogies,
<<<<<<< HEAD
"kold_level_a": kold.KoldLevelA,
"kold_level_b": kold.KoldLevelB,
"klue_sts": klue.STS,
"klue_ynat": klue.YNAT,
"klue_nli": klue.NLI,
"klue_mrc": klue.MRC,
"nsmc": nsmc.NSMC,
"korquad": korquad.Korquad,
"kobest_boolq": kobest.BoolQ,
......@@ -351,19 +341,57 @@ TASK_REGISTRY = {
"kobest_sentineg": kobest.SentiNeg,
"ko_en_translation": ko_translation.KoEnTranslation,
"en_ko_translation": ko_translation.EnKoTranslation,
"korunsmile": korunsmile.KorUnSmile,
"kohatespeech":kohatespeech.HateSpeech,
"kohatespeech_gen_bias":kohatespeech.GenderBias,
"kohatespeech_apeach":kohatespeech.Apeach
=======
"kohatespeech_apeach":kohatespeech.Apeach,
**xcopa.construct_tasks(),
**bigbench.create_all_tasks(),
>>>>>>> 0542d35d5e56768dd9041ef9b88b90256970d843
**xstorycloze.create_all_tasks(),
**xwinograd.create_all_tasks(),
**pawsx.construct_tasks(),
**xnli.construct_tasks(),
**mgsm.construct_tasks(),
}
ALL_TASKS = sorted(list(TASK_REGISTRY))
_EXAMPLE_JSON_PATH = "split:key:/absolute/path/to/data.json"
def add_json_task(task_name):
"""Add a JSON perplexity task if the given task name matches the
JSON task specification.
See `json.JsonPerplexity`.
"""
if not task_name.startswith("json"):
return
def create_json_task():
splits = task_name.split("=", 1)
if len(splits) != 2 or not splits[1]:
raise ValueError(
"json tasks need a path argument pointing to the local "
"dataset, specified like this: json="
+ _EXAMPLE_JSON_PATH
+ ' (if there are no splits, use "train")'
)
json_path = splits[1]
if json_path == _EXAMPLE_JSON_PATH:
raise ValueError(
"please do not copy the example path directly, but substitute "
"it with a path to your local dataset"
)
return lambda: json.JsonPerplexity(json_path)
TASK_REGISTRY[task_name] = create_json_task()
def get_task(task_name):
try:
add_json_task(task_name)
return TASK_REGISTRY[task_name]
except KeyError:
print("Available tasks:")
......@@ -396,4 +424,4 @@ def get_task_dict(task_name_list: List[Union[str, lm_eval.base.Task]]):
if not isinstance(task_object, str)
}
assert set(task_name_dict.keys()).isdisjoint(set(task_name_from_object_dict.keys()))
return {**task_name_dict, **task_name_from_object_dict}
return {**task_name_dict, **task_name_from_object_dict}
\ No newline at end of file
INFO - 05/29/23 02:24:05 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 02:24:05 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 02:26:29 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 02:26:29 - 0:00:00 - Starting at pile document index 106000
INFO - 05/29/23 02:29:19 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 02:29:19 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 02:31:50 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 02:31:50 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 02:32:22 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 02:32:22 - 0:00:00 - ngrams already generated and bucketed, skipping
INFO - 05/29/23 02:34:01 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 02:34:01 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 02:34:58 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 02:34:58 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 07:12:33 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 07:12:33 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 07:26:46 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 07:26:46 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 07:30:21 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 07:30:21 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 07:31:54 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 07:31:54 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 13:27:39 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 13:27:39 - 0:00:00 - Starting at pile document index 8432000
INFO - 05/29/23 13:30:28 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 13:30:28 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 14:27:00 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 14:27:00 - 0:00:00 - Starting at pile document index 0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment