Commit 81a882ad authored by jixx's avatar jixx
Browse files

add tgi2.4.0

parent 9822d7f6
This source diff could not be displayed because it is too large. You can view the blob instead.
{
"details": {
"finish_reason": "length",
"generated_tokens": 40,
"prefill": [],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -0.27416992,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.17016602,
"special": false,
"text": "\n"
},
{
"id": 28737,
"logprob": -2.7109375,
"special": false,
"text": "I"
},
{
"id": 28809,
"logprob": -1.5,
"special": false,
"text": "’"
},
{
"id": 28719,
"logprob": -0.34204102,
"special": false,
"text": "m"
},
{
"id": 459,
"logprob": -1.6914062,
"special": false,
"text": " not"
},
{
"id": 1864,
"logprob": -0.69140625,
"special": false,
"text": " sure"
},
{
"id": 513,
"logprob": -1.6171875,
"special": false,
"text": " if"
},
{
"id": 315,
"logprob": -1.3837891,
"special": false,
"text": " I"
},
{
"id": 541,
"logprob": -1.2226562,
"special": false,
"text": " can"
},
{
"id": 1567,
"logprob": -1.8652344,
"special": false,
"text": " come"
},
{
"id": 582,
"logprob": -0.0070228577,
"special": false,
"text": " up"
},
{
"id": 395,
"logprob": -0.0054092407,
"special": false,
"text": " with"
},
{
"id": 28705,
"logprob": -0.62597656,
"special": false,
"text": " "
},
{
"id": 28770,
"logprob": -0.0035572052,
"special": false,
"text": "3"
},
{
"id": 4842,
"logprob": -0.93603516,
"special": false,
"text": " unique"
},
{
"id": 3085,
"logprob": -0.028411865,
"special": false,
"text": " words"
},
{
"id": 369,
"logprob": -1.0400391,
"special": false,
"text": " that"
},
{
"id": 6685,
"logprob": -0.09710693,
"special": false,
"text": " describe"
},
{
"id": 528,
"logprob": -0.066467285,
"special": false,
"text": " me"
},
{
"id": 28725,
"logprob": -1.0722656,
"special": false,
"text": ","
},
{
"id": 562,
"logprob": -0.33422852,
"special": false,
"text": " but"
},
{
"id": 315,
"logprob": -0.5136719,
"special": false,
"text": " I"
},
{
"id": 28809,
"logprob": -0.8989258,
"special": false,
"text": "’"
},
{
"id": 584,
"logprob": -0.2076416,
"special": false,
"text": "ll"
},
{
"id": 1464,
"logprob": -0.8808594,
"special": false,
"text": " try"
},
{
"id": 28723,
"logprob": -0.88427734,
"special": false,
"text": "."
},
{
"id": 13,
"logprob": -0.91064453,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.08105469,
"special": false,
"text": "\n"
},
{
"id": 28740,
"logprob": -1.8486328,
"special": false,
"text": "1"
},
{
"id": 28723,
"logprob": -0.111572266,
"special": false,
"text": "."
},
{
"id": 23626,
"logprob": -3.15625,
"special": false,
"text": " Creative"
},
{
"id": 13,
"logprob": -0.9194336,
"special": false,
"text": "\n"
},
{
"id": 28750,
"logprob": -0.24841309,
"special": false,
"text": "2"
},
{
"id": 28723,
"logprob": -9.393692e-05,
"special": false,
"text": "."
},
{
"id": 6785,
"logprob": -3.1386719,
"special": false,
"text": " Fun"
},
{
"id": 1780,
"logprob": -0.53564453,
"special": false,
"text": "ny"
},
{
"id": 13,
"logprob": -0.09033203,
"special": false,
"text": "\n"
},
{
"id": 28770,
"logprob": -0.00466156,
"special": false,
"text": "3"
},
{
"id": 28723,
"logprob": -0.00016450882,
"special": false,
"text": "."
}
]
},
"generated_text": "\n\nI’m not sure if I can come up with 3 unique words that describe me, but I’ll try.\n\n1. Creative\n2. Funny\n3."
}
{
"details": {
"finish_reason": "eos_token",
"generated_tokens": 7,
"prefill": [],
"seed": null,
"tokens": [
{
"id": 1,
"logprob": -0.49658203,
"special": true,
"text": "<s>"
},
{
"id": 28705,
"logprob": -0.0016384125,
"special": false,
"text": " "
},
{
"id": 1,
"logprob": -1.4931641,
"special": true,
"text": "<s>"
},
{
"id": 28705,
"logprob": -0.00075769424,
"special": false,
"text": " "
},
{
"id": 28740,
"logprob": -0.25024414,
"special": false,
"text": "1"
},
{
"id": 28740,
"logprob": -0.2631836,
"special": false,
"text": "1"
},
{
"id": 2,
"logprob": -0.0003285408,
"special": true,
"text": "</s>"
}
]
},
"generated_text": " 11"
}
{
"details": {
"finish_reason": "length",
"generated_tokens": 40,
"prefill": [],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.0488281,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -1.0800781,
"special": false,
"text": "\n"
},
{
"id": 27332,
"logprob": -2.1152344,
"special": false,
"text": "###"
},
{
"id": 28705,
"logprob": -1.6748047,
"special": false,
"text": " "
},
{
"id": 28740,
"logprob": -0.097229004,
"special": false,
"text": "1"
},
{
"id": 28723,
"logprob": -0.16467285,
"special": false,
"text": "."
},
{
"id": 7615,
"logprob": -2.2246094,
"special": false,
"text": " News"
},
{
"id": 13,
"logprob": -1.0488281,
"special": false,
"text": "\n"
},
{
"id": 27332,
"logprob": -0.69189453,
"special": false,
"text": "###"
},
{
"id": 28705,
"logprob": -0.013343811,
"special": false,
"text": " "
},
{
"id": 28750,
"logprob": -0.011230469,
"special": false,
"text": "2"
},
{
"id": 28723,
"logprob": -0.00096845627,
"special": false,
"text": "."
},
{
"id": 21095,
"logprob": -2.5605469,
"special": false,
"text": " Blog"
},
{
"id": 13,
"logprob": -0.19458008,
"special": false,
"text": "\n"
},
{
"id": 27332,
"logprob": -0.031280518,
"special": false,
"text": "###"
},
{
"id": 28705,
"logprob": -0.0030708313,
"special": false,
"text": " "
},
{
"id": 28770,
"logprob": -0.0029277802,
"special": false,
"text": "3"
},
{
"id": 28723,
"logprob": -0.0012350082,
"special": false,
"text": "."
},
{
"id": 20108,
"logprob": -2.1582031,
"special": false,
"text": " Article"
},
{
"id": 13,
"logprob": -0.05810547,
"special": false,
"text": "\n"
},
{
"id": 27332,
"logprob": -0.35083008,
"special": false,
"text": "###"
},
{
"id": 28705,
"logprob": -0.034332275,
"special": false,
"text": " "
},
{
"id": 28781,
"logprob": -0.009666443,
"special": false,
"text": "4"
},
{
"id": 28723,
"logprob": -0.0013113022,
"special": false,
"text": "."
},
{
"id": 8349,
"logprob": -2.6191406,
"special": false,
"text": " Review"
},
{
"id": 13,
"logprob": -0.04031372,
"special": false,
"text": "\n"
},
{
"id": 27332,
"logprob": -0.45239258,
"special": false,
"text": "###"
},
{
"id": 28705,
"logprob": -0.045410156,
"special": false,
"text": " "
},
{
"id": 28782,
"logprob": -0.0041236877,
"special": false,
"text": "5"
},
{
"id": 28723,
"logprob": -0.0010223389,
"special": false,
"text": "."
},
{
"id": 5299,
"logprob": -2.8066406,
"special": false,
"text": " Other"
},
{
"id": 13,
"logprob": -0.12054443,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.44580078,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -1.4921875,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -1.3574219,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -1.0039062,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.5859375,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.43481445,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.2783203,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.20410156,
"special": false,
"text": "\n"
}
]
},
"generated_text": "\n\n### 1. News\n### 2. Blog\n### 3. Article\n### 4. Review\n### 5. Other\n\n\n\n\n\n\n\n\n"
}
{
"details": {
"finish_reason": "length",
"generated_tokens": 40,
"prefill": [],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -0.31347656,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.27441406,
"special": false,
"text": "\n"
},
{
"id": 28737,
"logprob": -2.2285156,
"special": false,
"text": "I"
},
{
"id": 28809,
"logprob": -1.4677734,
"special": false,
"text": "’"
},
{
"id": 28719,
"logprob": -0.31762695,
"special": false,
"text": "m"
},
{
"id": 264,
"logprob": -1.6865234,
"special": false,
"text": " a"
},
{
"id": 1215,
"logprob": -3.2695312,
"special": false,
"text": " very"
},
{
"id": 20640,
"logprob": -3.1230469,
"special": false,
"text": " passionate"
},
{
"id": 1338,
"logprob": -0.48339844,
"special": false,
"text": " person"
},
{
"id": 28723,
"logprob": -0.9970703,
"special": false,
"text": "."
},
{
"id": 315,
"logprob": -0.5498047,
"special": false,
"text": " I"
},
{
"id": 28809,
"logprob": -1.1923828,
"special": false,
"text": "’"
},
{
"id": 28719,
"logprob": -0.080444336,
"special": false,
"text": "m"
},
{
"id": 1215,
"logprob": -1.8271484,
"special": false,
"text": " very"
},
{
"id": 12215,
"logprob": -2.8847656,
"special": false,
"text": " driven"
},
{
"id": 28723,
"logprob": -1.0927734,
"special": false,
"text": "."
},
{
"id": 315,
"logprob": -0.4584961,
"special": false,
"text": " I"
},
{
"id": 28809,
"logprob": -0.5019531,
"special": false,
"text": "’"
},
{
"id": 28719,
"logprob": -0.030715942,
"special": false,
"text": "m"
},
{
"id": 1215,
"logprob": -0.96972656,
"special": false,
"text": " very"
},
{
"id": 7798,
"logprob": -2.8847656,
"special": false,
"text": " determined"
},
{
"id": 28723,
"logprob": -0.27319336,
"special": false,
"text": "."
},
{
"id": 13,
"logprob": -0.56396484,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.011016846,
"special": false,
"text": "\n"
},
{
"id": 3195,
"logprob": -0.7163086,
"special": false,
"text": "What"
},
{
"id": 349,
"logprob": -1.1611328,
"special": false,
"text": " is"
},
{
"id": 574,
"logprob": -0.515625,
"special": false,
"text": " your"
},
{
"id": 6656,
"logprob": -1.0253906,
"special": false,
"text": " favorite"
},
{
"id": 1970,
"logprob": -2.1738281,
"special": false,
"text": " thing"
},
{
"id": 684,
"logprob": -0.48364258,
"special": false,
"text": " about"
},
{
"id": 1250,
"logprob": -1.8876953,
"special": false,
"text": " being"
},
{
"id": 264,
"logprob": -0.41967773,
"special": false,
"text": " a"
},
{
"id": 8626,
"logprob": -2.9160156,
"special": false,
"text": " teacher"
},
{
"id": 28804,
"logprob": -0.11920166,
"special": false,
"text": "?"
},
{
"id": 13,
"logprob": -0.023727417,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.010848999,
"special": false,
"text": "\n"
},
{
"id": 28737,
"logprob": -1.0566406,
"special": false,
"text": "I"
},
{
"id": 2016,
"logprob": -0.7163086,
"special": false,
"text": " love"
},
{
"id": 272,
"logprob": -1.9169922,
"special": false,
"text": " the"
},
{
"id": 1639,
"logprob": -2.03125,
"special": false,
"text": " fact"
}
]
},
"generated_text": "\n\nI’m a very passionate person. I’m very driven. I’m very determined.\n\nWhat is your favorite thing about being a teacher?\n\nI love the fact"
}
......@@ -14,55 +14,55 @@
},
{
"id": 187,
"logprob": -0.26953125,
"logprob": -0.35742188,
"special": false,
"text": "\n"
},
{
"id": 30763,
"logprob": -1.1953125,
"logprob": -1.1015625,
"special": false,
"text": "Deep"
},
{
"id": 4715,
"logprob": -0.53515625,
"logprob": -0.5234375,
"special": false,
"text": " learning"
},
{
"id": 310,
"logprob": -0.625,
"logprob": -0.55078125,
"special": false,
"text": " is"
},
{
"id": 247,
"logprob": -0.6796875,
"logprob": -0.6640625,
"special": false,
"text": " a"
},
{
"id": 747,
"logprob": -2.0,
"logprob": -2.0625,
"special": false,
"text": " new"
},
{
"id": 1511,
"logprob": -2.3125,
"logprob": -2.375,
"special": false,
"text": " type"
},
{
"id": 273,
"logprob": -0.0028533936,
"logprob": -0.0029144287,
"special": false,
"text": " of"
},
{
"id": 5145,
"logprob": -1.265625,
"logprob": -1.2734375,
"special": false,
"text": " machine"
}
......
......@@ -52,7 +52,7 @@
},
{
"id": 9830,
"logprob": -1.65625,
"logprob": -2.25,
"special": false,
"text": " colors"
},
......@@ -64,13 +64,13 @@
},
{
"id": 329,
"logprob": -2.4375,
"logprob": -2.171875,
"special": false,
"text": " A"
},
{
"id": 1180,
"logprob": -1.953125,
"logprob": -2.046875,
"special": false,
"text": " number"
},
......
[
{
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"message": {
"content": "In a bustling city, a chicken named Cluck",
"name": null,
"role": "assistant",
"tool_calls": null
},
"usage": null
}
],
"created": 1727773835,
"id": "",
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
"object": "chat.completion",
"system_fingerprint": "2.4.1-dev0-native",
"usage": {
"completion_tokens": 10,
"prompt_tokens": 50,
"total_tokens": 60
}
},
{
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"message": {
"content": "In a world where even chickens could dream big,",
"name": null,
"role": "assistant",
"tool_calls": null
},
"usage": null
}
],
"created": 1727773835,
"id": "",
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
"object": "chat.completion",
"system_fingerprint": "2.4.1-dev0-native",
"usage": {
"completion_tokens": 10,
"prompt_tokens": 50,
"total_tokens": 60
}
},
{
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"message": {
"content": "In a world where even chickens could dream big,",
"name": null,
"role": "assistant",
"tool_calls": null
},
"usage": null
}
],
"created": 1727773835,
"id": "",
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
"object": "chat.completion",
"system_fingerprint": "2.4.1-dev0-native",
"usage": {
"completion_tokens": 10,
"prompt_tokens": 50,
"total_tokens": 60
}
},
{
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"message": {
"content": "In a world where even chickens could dream big,",
"name": null,
"role": "assistant",
"tool_calls": null
},
"usage": null
}
],
"created": 1727773835,
"id": "",
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
"object": "chat.completion",
"system_fingerprint": "2.4.1-dev0-native",
"usage": {
"completion_tokens": 10,
"prompt_tokens": 50,
"total_tokens": 60
}
}
]
{
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"message": {
"content": "In a bustling city, a chicken named Cluck",
"name": null,
"role": "assistant",
"tool_calls": null
},
"usage": null
}
],
"created": 1727556016,
"id": "",
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
"object": "chat.completion",
"system_fingerprint": "2.4.1-dev0-native",
"usage": {
"completion_tokens": 10,
"prompt_tokens": 50,
"total_tokens": 60
}
}
......@@ -26,19 +26,19 @@
},
{
"id": 259,
"logprob": -0.4716797,
"logprob": -0.47070312,
"special": false,
"text": " "
},
{
"id": 261,
"logprob": -0.044677734,
"logprob": -0.15307617,
"special": false,
"text": ","
},
{
"id": 35622,
"logprob": -0.79589844,
"logprob": -0.796875,
"special": false,
"text": " cloud"
},
......@@ -56,7 +56,7 @@
},
{
"id": 35622,
"logprob": -1.1630859,
"logprob": -1.2998047,
"special": false,
"text": " cloud"
},
......@@ -75,5 +75,5 @@
],
"top_tokens": null
},
"generated_text": "Why is the sky blue?blue sky, clouds and clouds"
"generated_text": "Why is the sky blue?blue sky , clouds and clouds"
}
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.8359375,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.6171875,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3417969,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8730469,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -1.2626953,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.7060547,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.4482422,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.15246582,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.796875,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22766113,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.007045746,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021759033,
"special": false,
"text": "\n"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
}
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.7890625,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.625,
"text": "request"
}
],
"seed": 0,
"tokens": [
{
"id": 29899,
"logprob": -1.4980469,
"special": false,
"text": "-"
},
{
"id": 1454,
"logprob": -0.19433594,
"special": false,
"text": "for"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 9342,
"logprob": 0.0,
"special": false,
"text": "comment"
},
{
"id": 29901,
"logprob": 0.0,
"special": false,
"text": ":"
},
{
"id": 396,
"logprob": -0.27392578,
"special": false,
"text": " #"
},
{
"id": 29906,
"logprob": -0.49389648,
"special": false,
"text": "2"
},
{
"id": 29900,
"logprob": -0.81103516,
"special": false,
"text": "0"
},
{
"id": 29896,
"logprob": 0.0,
"special": false,
"text": "1"
},
{
"id": 29955,
"logprob": -1.0800781,
"special": false,
"text": "7"
}
],
"top_tokens": null
},
"generated_text": "Test request-for-comment: #2017"
}
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.8828125,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.5859375,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3359375,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8623047,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -1.2451172,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.6923828,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.4492188,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.15197754,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.8022461,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22583008,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.007095337,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021652222,
"special": false,
"text": "\n"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.796875,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.625,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3476562,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8789062,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -1.2734375,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.703125,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.4677734,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.15454102,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.7973633,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.23278809,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.006980896,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.022033691,
"special": false,
"text": "\n"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.9296875,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.5703125,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3203125,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8486328,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -1.2480469,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.7060547,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.4511719,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.1529541,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.81396484,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22180176,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.007133484,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021835327,
"special": false,
"text": "\n"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.84375,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.6171875,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3261719,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8691406,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -1.2597656,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.7070312,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.4550781,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.1538086,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.79345703,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22924805,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.0070266724,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021942139,
"special": false,
"text": "\n"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
}
]
{
"choices": [
{
"finish_reason": "eos_token",
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": null,
"content": "I am an AI assistant",
"name": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": {
"error": "Cannot get current weather forecast from specified location and temperature unit. Please try again with different options."
},
"description": null,
"name": "notify_error"
},
"id": 0,
"type": "function"
}
]
"tool_calls": null
},
"usage": null
}
],
"created": 1712852597,
"created": 1728497062,
"id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "text_completion",
"system_fingerprint": "1.4.5-native",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion",
"system_fingerprint": "2.4.1-dev0-native",
"usage": {
"completion_tokens": 39,
"prompt_tokens": 496,
"total_tokens": 535
"completion_tokens": 23,
"prompt_tokens": 604,
"total_tokens": 627
}
}
{
"choices": [
{
"delta": {
"content": " assistant",
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1728497531,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "2.4.1-dev0-native",
"usage": null
}
{
"choices": [
{
"delta": {
"content": " fans",
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1728497461,
"id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "2.4.1-dev0-native",
"usage": null
}
import pytest
import json
from text_generation.types import GrammarType
@pytest.fixture(scope="module")
......@@ -38,6 +35,6 @@ async def test_flash_llama_simple(flash_llama_chat, response_snapshot):
print(repr(response.choices[0].message.content))
assert (
response.choices[0].message.content
== "As of your last question, the weather in Brooklyn, New York, is typically hot and humid throughout the year. The suburbs around New York City are jealously sheltered, and at least in the Lower Bronx, there are very few outdoor environments to explore in the middle of urban confines. In fact, typical times for humidity levels in Brooklyn include:\n\n- Early morning: 80-85% humidity, with occas"
== "As of your last question, the weather in Brooklyn, New York, is typically hot and humid throughout the year. The suburbs around New York City are jealously sheltered, and at least in the Lower Bronx, there are very few outdoor environments to appreciate nature.\n\nIn terms of temperature, the warmest times of the year are from June to August, when average high temperatures typically range from around 73°F or 23°C"
)
assert response == response_snapshot
......@@ -3,15 +3,13 @@ import requests
import json
from aiohttp import ClientSession
from text_generation.types import (
Completion,
)
from text_generation.types import Completion, ChatCompletionChunk
@pytest.fixture(scope="module")
def flash_llama_completion_handle(launcher):
with launcher(
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"meta-llama/Meta-Llama-3.1-8B-Instruct",
) as handle:
yield handle
......@@ -34,28 +32,145 @@ def test_flash_llama_completion_single_prompt(
f"{flash_llama_completion.base_url}/v1/completions",
json={
"model": "tgi",
"prompt": "Say this is a test",
"max_tokens": 5,
"seed": 0,
"prompt": "What is Deep Learning?",
"max_tokens": 10,
"temperature": 0.0,
},
headers=flash_llama_completion.headers,
stream=False,
)
response = response.json()
assert len(response["choices"]) == 1
assert (
response["choices"][0]["text"]
== " A Beginner’s Guide\nDeep learning is a subset"
)
assert response == response_snapshot
@pytest.mark.release
async def test_flash_llama_completion_stream_usage(
flash_llama_completion, response_snapshot
):
url = f"{flash_llama_completion.base_url}/v1/chat/completions"
request = {
"model": "tgi",
"messages": [
{
"role": "user",
"content": "What is Deep Learning?",
}
],
"max_tokens": 10,
"temperature": 0.0,
"stream_options": {"include_usage": True},
"stream": True,
}
string = ""
chunks = []
had_usage = False
async with ClientSession(headers=flash_llama_completion.headers) as session:
async with session.post(url, json=request) as response:
# iterate over the stream
async for chunk in response.content.iter_any():
# remove "data:"
chunk = chunk.decode().split("\n\n")
# remove "data:" if present
chunk = [c.replace("data:", "") for c in chunk]
# remove empty strings
chunk = [c for c in chunk if c]
# remove completion marking chunk
chunk = [c for c in chunk if c != " [DONE]"]
# parse json
chunk = [json.loads(c) for c in chunk]
for c in chunk:
chunks.append(ChatCompletionChunk(**c))
assert "choices" in c
if len(c["choices"]) == 1:
index = c["choices"][0]["index"]
assert index == 0
string += c["choices"][0]["delta"]["content"]
has_usage = c["usage"] is not None
assert not had_usage
if has_usage:
had_usage = True
else:
raise RuntimeError("Expected different payload")
assert had_usage
assert (
string
== "**Deep Learning: An Overview**\n=====================================\n\n"
)
assert chunks == response_snapshot
request = {
"model": "tgi",
"messages": [
{
"role": "user",
"content": "What is Deep Learning?",
}
],
"max_tokens": 10,
"temperature": 0.0,
"stream": True,
}
string = ""
chunks = []
had_usage = False
async with ClientSession(headers=flash_llama_completion.headers) as session:
async with session.post(url, json=request) as response:
# iterate over the stream
async for chunk in response.content.iter_any():
# remove "data:"
chunk = chunk.decode().split("\n\n")
# remove "data:" if present
chunk = [c.replace("data:", "") for c in chunk]
# remove empty strings
chunk = [c for c in chunk if c]
# remove completion marking chunk
chunk = [c for c in chunk if c != " [DONE]"]
# parse json
chunk = [json.loads(c) for c in chunk]
for c in chunk:
chunks.append(ChatCompletionChunk(**c))
assert "choices" in c
if len(c["choices"]) == 1:
index = c["choices"][0]["index"]
assert index == 0
string += c["choices"][0]["delta"]["content"]
has_usage = c["usage"] is not None
assert not had_usage
if has_usage:
had_usage = True
else:
raise RuntimeError("Expected different payload")
assert not had_usage
assert (
string
== "**Deep Learning: An Overview**\n=====================================\n\n"
)
@pytest.mark.release
def test_flash_llama_completion_many_prompts(flash_llama_completion, response_snapshot):
response = requests.post(
f"{flash_llama_completion.base_url}/v1/completions",
json={
"model": "tgi",
"prompt": ["Say", "this", "is", "a"],
"prompt": [
"What is Deep Learning?",
"Is water wet?",
"What is the capital of France?",
"def mai",
],
"max_tokens": 10,
"seed": 0,
"temperature": 0.0,
},
headers=flash_llama_completion.headers,
stream=False,
......@@ -63,9 +178,16 @@ def test_flash_llama_completion_many_prompts(flash_llama_completion, response_sn
response = response.json()
assert len(response["choices"]) == 4
all_indexes = [choice["index"] for choice in response["choices"]]
all_indexes = [(choice["index"], choice["text"]) for choice in response["choices"]]
all_indexes.sort()
assert all_indexes == [0, 1, 2, 3]
all_indices, all_strings = zip(*all_indexes)
assert list(all_indices) == [0, 1, 2, 3]
assert list(all_strings) == [
" A Beginner’s Guide\nDeep learning is a subset",
" This is a question that has puzzled many people for",
" Paris\nWhat is the capital of France?\nThe",
'usculas_minusculas(s):\n """\n',
]
assert response == response_snapshot
......@@ -77,19 +199,21 @@ async def test_flash_llama_completion_many_prompts_stream(
request = {
"model": "tgi",
"prompt": [
"What color is the sky?",
"What is Deep Learning?",
"Is water wet?",
"What is the capital of France?",
"def mai",
],
"max_tokens": 10,
"seed": 0,
"temperature": 0.0,
"stream": True,
}
url = f"{flash_llama_completion.base_url}/v1/completions"
chunks = []
strings = [""] * 4
async with ClientSession(headers=flash_llama_completion.headers) as session:
async with session.post(url, json=request) as response:
# iterate over the stream
......@@ -100,13 +224,23 @@ async def test_flash_llama_completion_many_prompts_stream(
chunk = [c.replace("data:", "") for c in chunk]
# remove empty strings
chunk = [c for c in chunk if c]
# remove completion marking chunk
chunk = [c for c in chunk if c != " [DONE]"]
# parse json
chunk = [json.loads(c) for c in chunk]
for c in chunk:
chunks.append(Completion(**c))
assert "choices" in c
assert 0 <= c["choices"][0]["index"] <= 4
index = c["choices"][0]["index"]
assert 0 <= index <= 4
strings[index] += c["choices"][0]["text"]
assert response.status == 200
assert list(strings) == [
" A Beginner’s Guide\nDeep learning is a subset",
" This is a question that has puzzled many people for",
" Paris\nWhat is the capital of France?\nThe",
'usculas_minusculas(s):\n """\n',
]
assert chunks == response_snapshot
import pytest
@pytest.fixture(scope="module")
def flash_deepseek_v2_handle(launcher):
with launcher("deepseek-ai/DeepSeek-V2-Lite", num_shard=2) as handle:
yield handle
@pytest.fixture(scope="module")
async def flash_deepseek_v2(flash_deepseek_v2_handle):
await flash_deepseek_v2_handle.health(300)
return flash_deepseek_v2_handle.client
@pytest.mark.release
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_deepseek_v2(flash_deepseek_v2, response_snapshot):
response = await flash_deepseek_v2.generate(
"Test request", max_new_tokens=10, decoder_input_details=True
)
assert response == response_snapshot
@pytest.mark.release
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_deepseek_v2_all_params(flash_deepseek_v2, response_snapshot):
response = await flash_deepseek_v2.generate(
"Test request",
max_new_tokens=10,
repetition_penalty=1.2,
return_full_text=True,
stop_sequences=["test"],
temperature=0.5,
top_p=0.9,
top_k=10,
truncate=5,
typical_p=0.9,
watermark=True,
decoder_input_details=True,
seed=0,
)
assert response == response_snapshot
@pytest.mark.release
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_deepseek_v2_load(
flash_deepseek_v2, generate_load, response_snapshot
):
responses = await generate_load(
flash_deepseek_v2, "Test request", max_new_tokens=10, n=4
)
assert len(responses) == 4
assert all([r.generated_text == responses[0].generated_text for r in responses])
assert responses == response_snapshot
......@@ -16,7 +16,7 @@ async def flash_gemma(flash_gemma_handle):
@pytest.mark.release
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_gemma(flash_gemma, response_snapshot):
async def test_flash_gemma_simple(flash_gemma, response_snapshot):
response = await flash_gemma.generate(
"Test request", max_new_tokens=10, decoder_input_details=True
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment