Commit 0ae96ff8 authored by Julien Chaumond's avatar Julien Chaumond Committed by GitHub
Browse files

BIG Reorganize examples (#4213)

* Created using Colaboratory

* [examples] reorganize files

* remove run_tpu_glue.py as superseded by TPU support in Trainer

* Bugfix: int, not tuple

* move files around
parent cafa6a9e
...@@ -28,7 +28,7 @@ class DataCollatorIntegrationTest(unittest.TestCase): ...@@ -28,7 +28,7 @@ class DataCollatorIntegrationTest(unittest.TestCase):
MODEL_ID = "bert-base-cased-finetuned-mrpc" MODEL_ID = "bert-base-cased-finetuned-mrpc"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
data_args = GlueDataTrainingArguments( data_args = GlueDataTrainingArguments(
task_name="mrpc", data_dir="./examples/tests_samples/MRPC", overwrite_cache=True task_name="mrpc", data_dir="./tests/fixtures/tests_samples/MRPC", overwrite_cache=True
) )
dataset = GlueDataset(data_args, tokenizer=tokenizer, evaluate=True) dataset = GlueDataset(data_args, tokenizer=tokenizer, evaluate=True)
data_collator = DefaultDataCollator() data_collator = DefaultDataCollator()
...@@ -39,7 +39,7 @@ class DataCollatorIntegrationTest(unittest.TestCase): ...@@ -39,7 +39,7 @@ class DataCollatorIntegrationTest(unittest.TestCase):
MODEL_ID = "distilroberta-base" MODEL_ID = "distilroberta-base"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
data_args = GlueDataTrainingArguments( data_args = GlueDataTrainingArguments(
task_name="sts-b", data_dir="./examples/tests_samples/STS-B", overwrite_cache=True task_name="sts-b", data_dir="./tests/fixtures/tests_samples/STS-B", overwrite_cache=True
) )
dataset = GlueDataset(data_args, tokenizer=tokenizer, evaluate=True) dataset = GlueDataset(data_args, tokenizer=tokenizer, evaluate=True)
data_collator = DefaultDataCollator() data_collator = DefaultDataCollator()
...@@ -91,7 +91,7 @@ class TrainerIntegrationTest(unittest.TestCase): ...@@ -91,7 +91,7 @@ class TrainerIntegrationTest(unittest.TestCase):
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID) model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
data_args = GlueDataTrainingArguments( data_args = GlueDataTrainingArguments(
task_name="mrpc", data_dir="./examples/tests_samples/MRPC", overwrite_cache=True task_name="mrpc", data_dir="./tests/fixtures/tests_samples/MRPC", overwrite_cache=True
) )
eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, evaluate=True) eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, evaluate=True)
......
--- ---
- step: - step:
name: Execute python examples/run_glue.py name: Execute python examples/text-classification/run_glue.py
image: pytorch/pytorch:nightly-devel-cuda10.0-cudnn7 image: pytorch/pytorch:nightly-devel-cuda10.0-cudnn7
command: command:
- python /valohai/repository/utils/download_glue_data.py --data_dir=/glue_data - python /valohai/repository/utils/download_glue_data.py --data_dir=/glue_data
- pip install -e . - pip install -e .
- pip install -r examples/requirements.txt - pip install -r examples/requirements.txt
- python examples/run_glue.py --do_train --data_dir=/glue_data/{parameter-value:task_name} {parameters} - python examples/text-classification/run_glue.py --do_train --data_dir=/glue_data/{parameter-value:task_name} {parameters}
parameters: parameters:
- name: model_type - name: model_type
pass-as: --model_type={v} pass-as: --model_type={v}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment