"src/include/blockwise_batched_gemm.hpp" did not exist on "cd29b09a824311bb33fd3f66b4d97a291b5e90e0"
Unverified Commit 36485d7a authored by Leo Gao's avatar Leo Gao Committed by GitHub
Browse files

Update README.md

parent 1de3b743
...@@ -12,90 +12,97 @@ The goal of this project is to build a set of tools for evaluating LMs on typica ...@@ -12,90 +12,97 @@ The goal of this project is to build a set of tools for evaluating LMs on typica
### Overview of Tasks ### Overview of Tasks
| Task Name |Train|Val|Test| Metrics | | Task Name |Train|Val|Test| Metrics |
|---------------|-----|---|----|---------------| |------------------------------|-----|---|----|---------------|
|cola |✓ |✓ |✓ |mcc | |cola |✓ |✓ |✓ |mcc |
|mnli |✓ |✓ |✓ |acc | |mnli |✓ |✓ |✓ |acc |
|mnli_mismatched|✓ |✓ |✓ |acc | |mnli_mismatched |✓ |✓ |✓ |acc |
|mrpc |✓ |✓ |✓ |acc, f1 | |mrpc |✓ |✓ |✓ |acc, f1 |
|rte |✓ |✓ |✓ |acc | |rte |✓ |✓ |✓ |acc |
|qnli |✓ |✓ |✓ |acc | |qnli |✓ |✓ |✓ |acc |
|qqp |✓ |✓ |✓ |acc, f1 | |qqp |✓ |✓ |✓ |acc, f1 |
|sst |✓ |✓ |✓ |acc | |sst |✓ |✓ |✓ |acc |
|wnli |✓ |✓ |✓ |acc | |wnli |✓ |✓ |✓ |acc |
|boolq |✓ |✓ |✓ |acc | |boolq |✓ |✓ |✓ |acc |
|cb |✓ |✓ |✓ |acc, f1 | |cb |✓ |✓ |✓ |acc, f1 |
|copa |✓ |✓ |✓ |acc | |copa |✓ |✓ |✓ |acc |
|multirc |✓ |✓ |✓ |acc | |multirc |✓ |✓ |✓ |acc |
|record |✓ |✓ | |f1, em | |record |✓ |✓ | |f1, em |
|wic |✓ |✓ |✓ |acc | |wic |✓ |✓ |✓ |acc |
|wsc |✓ |✓ |✓ |acc | |wsc |✓ |✓ |✓ |acc |
|coqa |✓ |✓ | |f1, em | |coqa |✓ |✓ | |f1, em |
|lambada | |✓ | |ppl, acc | |drop |✓ |✓ | |em, f1 |
|piqa |✓ |✓ | |acc | |lambada | |✓ | |ppl, acc |
|pubmedqa | | |✓ |acc | |piqa |✓ |✓ | |acc |
|sciq |✓ |✓ |✓ |acc | |pubmedqa | | |✓ |acc |
|qa4mre_2011 | | |✓ |acc | |sciq |✓ |✓ |✓ |acc |
|qa4mre_2012 | | |✓ |acc | |qa4mre_2011 | | |✓ |acc |
|qa4mre_2013 | | |✓ |acc | |qa4mre_2012 | | |✓ |acc |
|arc_easy |✓ |✓ |✓ |acc | |qa4mre_2013 | | |✓ |acc |
|arc_challenge |✓ |✓ |✓ |acc | |arc_easy |✓ |✓ |✓ |acc |
|hellaswag |✓ |✓ | |acc | |arc_challenge |✓ |✓ |✓ |acc |
|openbookqa |✓ |✓ |✓ |acc | |hellaswag |✓ |✓ | |acc |
|race |✓ |✓ |✓ |acc | |openbookqa |✓ |✓ |✓ |acc |
|headqa |✓ |✓ |✓ |acc | |race |✓ |✓ |✓ |acc |
|mathqa |✓ |✓ |✓ |acc | |headqa |✓ |✓ |✓ |acc |
|webqs |✓ | |✓ |acc | |mathqa |✓ |✓ |✓ |acc |
|wsc273 | | |✓ |acc | |webqs |✓ | |✓ |acc |
|winogrande |✓ |✓ | |acc | |wsc273 | | |✓ |acc |
|anli_r1 |✓ |✓ |✓ |acc | |winogrande |✓ |✓ | |acc |
|anli_r2 |✓ |✓ |✓ |acc | |anli_r1 |✓ |✓ |✓ |acc |
|anli_r3 |✓ |✓ |✓ |acc | |anli_r2 |✓ |✓ |✓ |acc |
|ethics_cm |✓ |✓ |✓ |acc | |anli_r3 |✓ |✓ |✓ |acc |
|ethics_deontology |✓ |✓ |✓ |acc | |ethics_cm |✓ |✓ |✓ |acc |
|ethics_justice |✓ |✓ |✓ |acc | |ethics_deontology |✓ |✓ |✓ |acc, em |
|ethics_utilitarianism |✓ |✓ |✓ |acc | |ethics_justice |✓ |✓ |✓ |acc, em |
|ethics_virtue |✓ |✓ |✓ |acc | |ethics_utilitarianism_original|✓ |✓ |✓ |acc |
|arithmetic_2da | |✓ | |acc | |ethics_utilitarianism |✓ |✓ |✓ |acc |
|arithmetic_2ds | |✓ | |acc | |ethics_virtue |✓ |✓ |✓ |acc, em |
|arithmetic_3da | |✓ | |acc | |arithmetic_2da | |✓ | |acc |
|arithmetic_3ds | |✓ | |acc | |arithmetic_2ds | |✓ | |acc |
|arithmetic_4da | |✓ | |acc | |arithmetic_3da | |✓ | |acc |
|arithmetic_4ds | |✓ | |acc | |arithmetic_3ds | |✓ | |acc |
|arithmetic_5da | |✓ | |acc | |arithmetic_4da | |✓ | |acc |
|arithmetic_5ds | |✓ | |acc | |arithmetic_4ds | |✓ | |acc |
|arithmetic_2dm | |✓ | |acc | |arithmetic_5da | |✓ | |acc |
|arithmetic_1dc | |✓ | |acc | |arithmetic_5ds | |✓ | |acc |
|wmt14-en-fr | | |✓ |bleu, chrf, ter| |arithmetic_2dm | |✓ | |acc |
|wmt14-fr-en | | |✓ |bleu, chrf, ter| |arithmetic_1dc | |✓ | |acc |
|wmt16-en-ro | | |✓ |bleu, chrf, ter| |wmt14-en-fr | | |✓ |bleu, chrf, ter|
|wmt16-ro-en | | |✓ |bleu, chrf, ter| |wmt14-fr-en | | |✓ |bleu, chrf, ter|
|wmt16-de-en | | |✓ |bleu, chrf, ter| |wmt16-en-ro | | |✓ |bleu, chrf, ter|
|wmt16-en-de | | |✓ |bleu, chrf, ter| |wmt16-ro-en | | |✓ |bleu, chrf, ter|
|wmt20-cs-en | | |✓ |bleu, chrf, ter| |wmt16-de-en | | |✓ |bleu, chrf, ter|
|wmt20-de-en | | |✓ |bleu, chrf, ter| |wmt16-en-de | | |✓ |bleu, chrf, ter|
|wmt20-de-fr | | |✓ |bleu, chrf, ter| |wmt20-cs-en | | |✓ |bleu, chrf, ter|
|wmt20-en-cs | | |✓ |bleu, chrf, ter| |wmt20-de-en | | |✓ |bleu, chrf, ter|
|wmt20-en-de | | |✓ |bleu, chrf, ter| |wmt20-de-fr | | |✓ |bleu, chrf, ter|
|wmt20-en-iu | | |✓ |bleu, chrf, ter| |wmt20-en-cs | | |✓ |bleu, chrf, ter|
|wmt20-en-ja | | |✓ |bleu, chrf, ter| |wmt20-en-de | | |✓ |bleu, chrf, ter|
|wmt20-en-km | | |✓ |bleu, chrf, ter| |wmt20-en-iu | | |✓ |bleu, chrf, ter|
|wmt20-en-pl | | |✓ |bleu, chrf, ter| |wmt20-en-ja | | |✓ |bleu, chrf, ter|
|wmt20-en-ps | | |✓ |bleu, chrf, ter| |wmt20-en-km | | |✓ |bleu, chrf, ter|
|wmt20-en-ru | | |✓ |bleu, chrf, ter| |wmt20-en-pl | | |✓ |bleu, chrf, ter|
|wmt20-en-ta | | |✓ |bleu, chrf, ter| |wmt20-en-ps | | |✓ |bleu, chrf, ter|
|wmt20-en-zh | | |✓ |bleu, chrf, ter| |wmt20-en-ru | | |✓ |bleu, chrf, ter|
|wmt20-fr-de | | |✓ |bleu, chrf, ter| |wmt20-en-ta | | |✓ |bleu, chrf, ter|
|wmt20-iu-en | | |✓ |bleu, chrf, ter| |wmt20-en-zh | | |✓ |bleu, chrf, ter|
|wmt20-ja-en | | |✓ |bleu, chrf, ter| |wmt20-fr-de | | |✓ |bleu, chrf, ter|
|wmt20-km-en | | |✓ |bleu, chrf, ter| |wmt20-iu-en | | |✓ |bleu, chrf, ter|
|wmt20-pl-en | | |✓ |bleu, chrf, ter| |wmt20-ja-en | | |✓ |bleu, chrf, ter|
|wmt20-ps-en | | |✓ |bleu, chrf, ter| |wmt20-km-en | | |✓ |bleu, chrf, ter|
|wmt20-ru-en | | |✓ |bleu, chrf, ter| |wmt20-pl-en | | |✓ |bleu, chrf, ter|
|wmt20-ta-en | | |✓ |bleu, chrf, ter| |wmt20-ps-en | | |✓ |bleu, chrf, ter|
|wmt20-zh-en | | |✓ |bleu, chrf, ter| |wmt20-ru-en | | |✓ |bleu, chrf, ter|
|iwslt17-en-ar | | |✓ |bleu, chrf, ter| |wmt20-ta-en | | |✓ |bleu, chrf, ter|
|iwslt17-ar-en | | |✓ |bleu, chrf, ter| |wmt20-zh-en | | |✓ |bleu, chrf, ter|
|iwslt17-en-ar | | |✓ |bleu, chrf, ter|
|iwslt17-ar-en | | |✓ |bleu, chrf, ter|
|anagrams1 | |✓ | |acc |
|anagrams2 | |✓ | |acc |
|cycle_letters | |✓ | |acc |
|random_insertion | |✓ | |acc |
|reversed_words | |✓ | |acc |
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment