Commit c69f6c38 authored by cardy20's avatar cardy20
Browse files

conflict solved

parent 18c0fa29
export PYTHONPATH=$PWD
python3 scripts/clean_training_data/generate_13_grams.py \
-dir /fsx/polyglot/massivetext_large_data/ \
-sdir /fsx/lime12/ngram_train2/ -n 13 -buckets 500
export PYTHONPATH=$PWD
python3 scripts/clean_training_data/generate_13_grams.py \
-dir /fsx/kevinai/data/ko/merged_raw/ \
-sdir /fsx/lime12/ngram_merged_raw -n 13 -buckets 500
\ No newline at end of file
...@@ -50,18 +50,24 @@ from . import blimp ...@@ -50,18 +50,24 @@ from . import blimp
from . import asdiv from . import asdiv
from . import gsm8k from . import gsm8k
from . import storycloze from . import storycloze
<<<<<<< HEAD
from . import kobest from . import kobest
from . import nsmc from . import nsmc
from . import klue from . import klue
from . import ko_translation from . import ko_translation
from . import korquad from . import korquad
from . import korunsmile
from . import kohatespeech from . import kohatespeech
======= from . import kold
from . import toxigen from . import toxigen
from . import crowspairs from . import crowspairs
from . import json
from . import xcopa
from . import bigbench from . import bigbench
>>>>>>> 0542d35d5e56768dd9041ef9b88b90256970d843 from . import xstorycloze
from . import xwinograd
from . import pawsx
from . import xnli
from . import mgsm
######################################## ########################################
# Translation tasks # Translation tasks
...@@ -113,15 +119,6 @@ TASK_REGISTRY = { ...@@ -113,15 +119,6 @@ TASK_REGISTRY = {
"record": superglue.ReCoRD, "record": superglue.ReCoRD,
"wic": superglue.WordsInContext, "wic": superglue.WordsInContext,
"wsc": superglue.SGWinogradSchemaChallenge, "wsc": superglue.SGWinogradSchemaChallenge,
<<<<<<< HEAD
# Order by benchmark/genre?
"coqa": coqa.CoQA,
"drop": drop.DROP,
"lambada": lambada.LAMBADA,
"lambada_cloze": lambada_cloze.LAMBADA_cloze,
=======
# Order by benchmark/genre? # Order by benchmark/genre?
"coqa": coqa.CoQA, "coqa": coqa.CoQA,
"drop": drop.DROP, "drop": drop.DROP,
...@@ -129,7 +126,6 @@ TASK_REGISTRY = { ...@@ -129,7 +126,6 @@ TASK_REGISTRY = {
"lambada_standard": lambada.LambadaStandard, "lambada_standard": lambada.LambadaStandard,
"lambada_openai_cloze": lambada_cloze.LambadaOpenAICloze, "lambada_openai_cloze": lambada_cloze.LambadaOpenAICloze,
"lambada_standard_cloze": lambada_cloze.LambadaStandardCloze, "lambada_standard_cloze": lambada_cloze.LambadaStandardCloze,
>>>>>>> 0542d35d5e56768dd9041ef9b88b90256970d843
# multilingual lambada # multilingual lambada
**lambada_multilingual.construct_tasks(), **lambada_multilingual.construct_tasks(),
"wikitext": wikitext.WikiText, "wikitext": wikitext.WikiText,
...@@ -235,10 +231,6 @@ TASK_REGISTRY = { ...@@ -235,10 +231,6 @@ TASK_REGISTRY = {
"pile_ubuntu-irc": pile.PileUbuntuIrc, "pile_ubuntu-irc": pile.PileUbuntuIrc,
"pile_wikipedia": pile.PileWikipedia, "pile_wikipedia": pile.PileWikipedia,
"pile_youtubesubtitles": pile.PileYoutubeSubtitles, "pile_youtubesubtitles": pile.PileYoutubeSubtitles,
<<<<<<< HEAD
=======
>>>>>>> 0542d35d5e56768dd9041ef9b88b90256970d843
# BLiMP # BLiMP
"blimp_adjunct_island": blimp.BlimpAdjunctIsland, "blimp_adjunct_island": blimp.BlimpAdjunctIsland,
"blimp_anaphor_gender_agreement": blimp.BlimpAnaphorGenderAgreement, "blimp_anaphor_gender_agreement": blimp.BlimpAnaphorGenderAgreement,
...@@ -307,8 +299,6 @@ TASK_REGISTRY = { ...@@ -307,8 +299,6 @@ TASK_REGISTRY = {
"blimp_wh_vs_that_no_gap_long_distance": blimp.BlimpWhVsThatNoGapLongDistance, "blimp_wh_vs_that_no_gap_long_distance": blimp.BlimpWhVsThatNoGapLongDistance,
"blimp_wh_vs_that_with_gap": blimp.BlimpWhVsThatWithGap, "blimp_wh_vs_that_with_gap": blimp.BlimpWhVsThatWithGap,
"blimp_wh_vs_that_with_gap_long_distance": blimp.BlimpWhVsThatWithGapLongDistance, "blimp_wh_vs_that_with_gap_long_distance": blimp.BlimpWhVsThatWithGapLongDistance,
<<<<<<< HEAD
=======
"toxigen": toxigen.ToxiGen, "toxigen": toxigen.ToxiGen,
"crows_pairs_english": crowspairs.CrowsPairsEnglish, "crows_pairs_english": crowspairs.CrowsPairsEnglish,
"crows_pairs_english_race_color": crowspairs.CrowsPairsEnglishRaceColor, "crows_pairs_english_race_color": crowspairs.CrowsPairsEnglishRaceColor,
...@@ -332,16 +322,16 @@ TASK_REGISTRY = { ...@@ -332,16 +322,16 @@ TASK_REGISTRY = {
"crows_pairs_french_nationality": crowspairs.CrowsPairsFrenchNationality, "crows_pairs_french_nationality": crowspairs.CrowsPairsFrenchNationality,
"crows_pairs_french_physical_appearance": crowspairs.CrowsPairsFrenchPhysicalAppearance, "crows_pairs_french_physical_appearance": crowspairs.CrowsPairsFrenchPhysicalAppearance,
"crows_pairs_french_autre": crowspairs.CrowsPairsFrenchAutre, "crows_pairs_french_autre": crowspairs.CrowsPairsFrenchAutre,
>>>>>>> 0542d35d5e56768dd9041ef9b88b90256970d843
# Requires manual download of data. # Requires manual download of data.
# "storycloze_2016": storycloze.StoryCloze2016, # "storycloze_2016": storycloze.StoryCloze2016,
# "storycloze_2018": storycloze.StoryCloze2018, # "storycloze_2018": storycloze.StoryCloze2018,
# "sat": sat.SATAnalogies, # "sat": sat.SATAnalogies,
<<<<<<< HEAD "kold_level_a": kold.KoldLevelA,
"kold_level_b": kold.KoldLevelB,
"klue_sts": klue.STS, "klue_sts": klue.STS,
"klue_ynat": klue.YNAT, "klue_ynat": klue.YNAT,
"klue_nli": klue.NLI, "klue_nli": klue.NLI,
"klue_mrc": klue.MRC,
"nsmc": nsmc.NSMC, "nsmc": nsmc.NSMC,
"korquad": korquad.Korquad, "korquad": korquad.Korquad,
"kobest_boolq": kobest.BoolQ, "kobest_boolq": kobest.BoolQ,
...@@ -351,19 +341,57 @@ TASK_REGISTRY = { ...@@ -351,19 +341,57 @@ TASK_REGISTRY = {
"kobest_sentineg": kobest.SentiNeg, "kobest_sentineg": kobest.SentiNeg,
"ko_en_translation": ko_translation.KoEnTranslation, "ko_en_translation": ko_translation.KoEnTranslation,
"en_ko_translation": ko_translation.EnKoTranslation, "en_ko_translation": ko_translation.EnKoTranslation,
"korunsmile": korunsmile.KorUnSmile,
"kohatespeech":kohatespeech.HateSpeech, "kohatespeech":kohatespeech.HateSpeech,
"kohatespeech_gen_bias":kohatespeech.GenderBias, "kohatespeech_gen_bias":kohatespeech.GenderBias,
"kohatespeech_apeach":kohatespeech.Apeach "kohatespeech_apeach":kohatespeech.Apeach,
======= **xcopa.construct_tasks(),
**bigbench.create_all_tasks(), **bigbench.create_all_tasks(),
>>>>>>> 0542d35d5e56768dd9041ef9b88b90256970d843 **xstorycloze.create_all_tasks(),
**xwinograd.create_all_tasks(),
**pawsx.construct_tasks(),
**xnli.construct_tasks(),
**mgsm.construct_tasks(),
} }
ALL_TASKS = sorted(list(TASK_REGISTRY)) ALL_TASKS = sorted(list(TASK_REGISTRY))
_EXAMPLE_JSON_PATH = "split:key:/absolute/path/to/data.json"
def add_json_task(task_name):
"""Add a JSON perplexity task if the given task name matches the
JSON task specification.
See `json.JsonPerplexity`.
"""
if not task_name.startswith("json"):
return
def create_json_task():
splits = task_name.split("=", 1)
if len(splits) != 2 or not splits[1]:
raise ValueError(
"json tasks need a path argument pointing to the local "
"dataset, specified like this: json="
+ _EXAMPLE_JSON_PATH
+ ' (if there are no splits, use "train")'
)
json_path = splits[1]
if json_path == _EXAMPLE_JSON_PATH:
raise ValueError(
"please do not copy the example path directly, but substitute "
"it with a path to your local dataset"
)
return lambda: json.JsonPerplexity(json_path)
TASK_REGISTRY[task_name] = create_json_task()
def get_task(task_name): def get_task(task_name):
try: try:
add_json_task(task_name)
return TASK_REGISTRY[task_name] return TASK_REGISTRY[task_name]
except KeyError: except KeyError:
print("Available tasks:") print("Available tasks:")
...@@ -396,4 +424,4 @@ def get_task_dict(task_name_list: List[Union[str, lm_eval.base.Task]]): ...@@ -396,4 +424,4 @@ def get_task_dict(task_name_list: List[Union[str, lm_eval.base.Task]]):
if not isinstance(task_object, str) if not isinstance(task_object, str)
} }
assert set(task_name_dict.keys()).isdisjoint(set(task_name_from_object_dict.keys())) assert set(task_name_dict.keys()).isdisjoint(set(task_name_from_object_dict.keys()))
return {**task_name_dict, **task_name_from_object_dict} return {**task_name_dict, **task_name_from_object_dict}
\ No newline at end of file
INFO - 05/29/23 02:24:05 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 02:24:05 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 02:26:29 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 02:26:29 - 0:00:00 - Starting at pile document index 106000
INFO - 05/29/23 02:29:19 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 02:29:19 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 02:31:50 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 02:31:50 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 02:32:22 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 02:32:22 - 0:00:00 - ngrams already generated and bucketed, skipping
INFO - 05/29/23 02:34:01 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 02:34:01 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 02:34:58 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 02:34:58 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 07:12:33 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 07:12:33 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 07:26:46 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 07:26:46 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 07:30:21 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 07:30:21 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 07:31:54 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 07:31:54 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 13:27:39 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 13:27:39 - 0:00:00 - Starting at pile document index 8432000
INFO - 05/29/23 13:30:28 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 13:30:28 - 0:00:00 - Starting at pile document index 0
INFO - 05/29/23 14:27:00 - 0:00:00 - Generating 13-grams and bucketing.
INFO - 05/29/23 14:27:00 - 0:00:00 - Starting at pile document index 0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment