{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 128000, "content": "<|begin_of_text|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 128001, "content": "<|end_of_text|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 128002, "content": "<|reserved_special_token_0|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 128003, "content": "<|reserved_special_token_1|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 128004, "content": "<|reserved_special_token_2|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 128005, "content": "<|reserved_special_token_3|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 128006, "content": "<|start_header_id|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 128007, "content": "<|end_header_id|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 128008, "content": "<|reserved_special_token_4|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 128009, "content": "<|eot_id|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 128010, "content": "<|reserved_special_token_5|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Sequence", "pretokenizers": [ { "type": "Split", "pattern": { "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" }, "behavior": "Isolated", "invert": false }, { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": false } ] }, "post_processor": { "type": "Sequence", "processors": [ { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": false, "use_regex": true }, { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "<|begin_of_text|>", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "<|begin_of_text|>", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "<|begin_of_text|>", "type_id": 1 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "<|begin_of_text|>": { "id": "<|begin_of_text|>", "ids": [ 128000 ], "tokens": [ "<|begin_of_text|>" ] } } } ] }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": true, "vocab": {}, "merges": [] } }