uodate some change according to meta-llama/llama3

06c5529f · Rayyyyy · 0a0618ac · 06c5529f · 06c5529f · 06c5529f
Commit 06c5529f authored May 21, 2024 by Rayyyyy
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 6 deletions

download.sh download.sh +2 -1

eval_details.md eval_details.md +1 -1

llama/test_tokenizer.py llama/test_tokenizer.py +4 -4

No files found.
--- a/download.sh
+++ b/download.sh
@@ -51,7 +51,8 @@ do
    wget --continue ${PRESIGNED_URL/'*'/"${MODEL_PATH}/tokenizer.model"} -O ${TARGET_FOLDER}"/${MODEL_FOLDER_PATH}/tokenizer.model"
    wget --continue ${PRESIGNED_URL/'*'/"${MODEL_PATH}/checklist.chk"} -O ${TARGET_FOLDER}"/${MODEL_FOLDER_PATH}/checklist.chk"
    echo "Checking checksums"
-    if [ "$CPU_ARCH" = "arm64" ]; then
+    CPU_ARCH=$(uname -m)
+    if [[ "$CPU_ARCH" == "arm64" ]]; then
      (cd ${TARGET_FOLDER}"/${MODEL_FOLDER_PATH}" && md5 checklist.chk)
    else
      (cd ${TARGET_FOLDER}"/${MODEL_FOLDER_PATH}" && md5sum -c checklist.chk)

--- a/eval_details.md
+++ b/eval_details.md
@@ -3,7 +3,7 @@ This document contains additional context on the settings and parameters for how
 ### Auto-eval benchmark notes
 #### MMLU
 - We are reporting macro averages for MMLU benchmarks. The micro average numbers for MMLU are: 65.4 and 67.4 for the 8B pre-trained and instruct-aligned models, 78.9 and 82.0 for the 70B pre-trained and instruct-aligned models
- For the instruct-aligned MMLU we ask the model to generate the best choice character
+- The pre-trained models are evaluated in the standard way by calualting the likelihood of each choice character. For the instruct-aligned models, we use a dialogue prompt (*user/assistant*) for the shots and ask the model to generate the best choice character as answer.
 #### AGI English
 - We use the default few-shot and prompt settings as specified [here](https://github.com/ruixiangcui/AGIEval). The score is averaged over the english subtasks.
 #### CommonSenseQA

--- a/llama/test_tokenizer.py
+++ b/llama/test_tokenizer.py
@@ -46,7 +46,7 @@ class TokenizerTests(TestCase):
            [
                128006,  # <|start_header_id|>
                882,  # "user"
-                128007,  # <|end_of_header|>
+                128007,  # <|end_header_id|>
                271,  # "\n\n"
                2028, 374, 264, 1296, 11914, 13,  # This is a test sentence.
                128009,  # <|eot_id|>
@@ -70,19 +70,19 @@ class TokenizerTests(TestCase):
                128000,  # <|begin_of_text|>
                128006,  # <|start_header_id|>
                9125,     # "system"
-                128007,  # <|end_of_header|>
+                128007,  # <|end_header_id|>
                271,     # "\n\n"
                2028, 374, 264, 1296, 11914, 13,  # "This is a test sentence."
                128009,  # <|eot_id|>
                128006,  # <|start_header_id|>
                882,     # "user"
-                128007,  # <|end_of_header|>
+                128007,  # <|end_header_id|>
                271,     # "\n\n"
                2028, 374, 264, 2077, 13,  # "This is a response.",
                128009,  # <|eot_id|>
                128006,  # <|start_header_id|>
                78191,   # "assistant"
-                128007,  # <|end_of_header|>
+                128007,  # <|end_header_id|>
                271,     # "\n\n"
            ]
        )