feat(integration-tests): improve comparison and health checks (#336)

dbdc587d · OlivierDehaene · GitHub · e71471be · dbdc587d · dbdc587d
Unverified Commit dbdc587d authored May 16, 2023 by OlivierDehaene Committed by GitHub May 16, 2023
19 changed files
--- a/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder.json
+++ b/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder.json
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 563,
+        "logprob": null,
+        "text": "def"
+      },
+      {
+        "id": 942,
+        "logprob": -5.1367188,
+        "text": " print"
+      },
+      {
+        "id": 62,
+        "logprob": -0.24450684,
+        "text": "_"
+      },
+      {
+        "id": 7196,
+        "logprob": -6.9609375,
+        "text": "hello"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 1241,
+        "logprob": -0.9863281,
+        "special": false,
+        "text": "():"
+      },
+      {
+        "id": 258,
+        "logprob": -0.21447754,
+        "special": false,
+        "text": "\n   "
+      },
+      {
+        "id": 942,
+        "logprob": -0.43701172,
+        "special": false,
+        "text": " print"
+      },
+      {
+        "id": 372,
+        "logprob": -0.5361328,
+        "special": false,
+        "text": "(\""
+      },
+      {
+        "id": 7371,
+        "logprob": -0.44555664,
+        "special": false,
+        "text": "Hello"
+      },
+      {
+        "id": 9956,
+        "logprob": -1.2412109,
+        "special": false,
+        "text": " World"
+      },
+      {
+        "id": 8657,
+        "logprob": -0.7583008,
+        "special": false,
+        "text": "!\")"
+      },
+      {
+        "id": 185,
+        "logprob": -0.76171875,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 185,
+        "logprob": -0.20837402,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 1018,
+        "logprob": -1.2470703,
+        "special": false,
+        "text": "print"
+      }
+    ]
+  },
+  "generated_text": "():\n    print(\"Hello World!\")\n\nprint"
+}
--- a/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder_load.json
+++ b/integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder_load.json
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 563,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 942,
+          "logprob": -5.1367188,
+          "text": " print"
+        },
+        {
+          "id": 62,
+          "logprob": -0.24450684,
+          "text": "_"
+        },
+        {
+          "id": 7196,
+          "logprob": -6.9609375,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 1241,
+          "logprob": -0.9863281,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 258,
+          "logprob": -0.21362305,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 942,
+          "logprob": -0.44360352,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 372,
+          "logprob": -0.54248047,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 7371,
+          "logprob": -0.44555664,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 9956,
+          "logprob": -1.2441406,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 8657,
+          "logprob": -0.75878906,
+          "special": false,
+          "text": "!\")"
+        },
+        {
+          "id": 185,
+          "logprob": -0.76171875,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 185,
+          "logprob": -0.2084961,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 1018,
+          "logprob": -1.2460938,
+          "special": false,
+          "text": "print"
+        }
+      ]
+    },
+    "generated_text": "():\n    print(\"Hello World!\")\n\nprint"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 563,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 942,
+          "logprob": -5.1367188,
+          "text": " print"
+        },
+        {
+          "id": 62,
+          "logprob": -0.24450684,
+          "text": "_"
+        },
+        {
+          "id": 7196,
+          "logprob": -6.9609375,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 1241,
+          "logprob": -0.9863281,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 258,
+          "logprob": -0.21362305,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 942,
+          "logprob": -0.44360352,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 372,
+          "logprob": -0.54248047,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 7371,
+          "logprob": -0.44555664,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 9956,
+          "logprob": -1.2441406,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 8657,
+          "logprob": -0.75878906,
+          "special": false,
+          "text": "!\")"
+        },
+        {
+          "id": 185,
+          "logprob": -0.76171875,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 185,
+          "logprob": -0.2084961,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 1018,
+          "logprob": -1.2460938,
+          "special": false,
+          "text": "print"
+        }
+      ]
+    },
+    "generated_text": "():\n    print(\"Hello World!\")\n\nprint"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 563,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 942,
+          "logprob": -5.1367188,
+          "text": " print"
+        },
+        {
+          "id": 62,
+          "logprob": -0.24450684,
+          "text": "_"
+        },
+        {
+          "id": 7196,
+          "logprob": -6.9609375,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 1241,
+          "logprob": -0.9863281,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 258,
+          "logprob": -0.21362305,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 942,
+          "logprob": -0.44360352,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 372,
+          "logprob": -0.54248047,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 7371,
+          "logprob": -0.44555664,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 9956,
+          "logprob": -1.2441406,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 8657,
+          "logprob": -0.75878906,
+          "special": false,
+          "text": "!\")"
+        },
+        {
+          "id": 185,
+          "logprob": -0.76171875,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 185,
+          "logprob": -0.2084961,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 1018,
+          "logprob": -1.2460938,
+          "special": false,
+          "text": "print"
+        }
+      ]
+    },
+    "generated_text": "():\n    print(\"Hello World!\")\n\nprint"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 563,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 942,
+          "logprob": -5.1367188,
+          "text": " print"
+        },
+        {
+          "id": 62,
+          "logprob": -0.24450684,
+          "text": "_"
+        },
+        {
+          "id": 7196,
+          "logprob": -6.9609375,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 1241,
+          "logprob": -0.9863281,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 258,
+          "logprob": -0.21362305,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 942,
+          "logprob": -0.44360352,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 372,
+          "logprob": -0.54248047,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 7371,
+          "logprob": -0.44555664,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 9956,
+          "logprob": -1.2441406,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 8657,
+          "logprob": -0.75878906,
+          "special": false,
+          "text": "!\")"
+        },
+        {
+          "id": 185,
+          "logprob": -0.76171875,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 185,
+          "logprob": -0.2084961,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 1018,
+          "logprob": -1.2460938,
+          "special": false,
+          "text": "print"
+        }
+      ]
+    },
+    "generated_text": "():\n    print(\"Hello World!\")\n\nprint"
+  }
+]
--- a/integration-tests/models/__snapshots__/test_flash_starcoder.ambr
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder.ambr
-# serializer version: 1
-# name: test_flash_starcoder
-  dict({
-    'details': dict({
-      'best_of_sequences': None,
-      'finish_reason': <FinishReason.Length: 'length'>,
-      'generated_tokens': 10,
-      'prefill': list([
-        dict({
-          'id': 589,
-          'text': 'def',
-        }),
-        dict({
-          'id': 1459,
-          'text': ' print',
-        }),
-        dict({
-          'id': 81,
-          'text': '_',
-        }),
-        dict({
-          'id': 7656,
-          'text': 'hello',
-        }),
-      ]),
-      'seed': None,
-      'tokens': list([
-        dict({
-          'id': 2262,
-          'special': False,
-          'text': '():',
-        }),
-        dict({
-          'id': 284,
-          'special': False,
-          'text': '''
-            
-               
-          ''',
-        }),
-        dict({
-          'id': 1459,
-          'special': False,
-          'text': ' print',
-        }),
-        dict({
-          'id': 440,
-          'special': False,
-          'text': '("',
-        }),
-        dict({
-          'id': 8279,
-          'special': False,
-          'text': 'Hello',
-        }),
-        dict({
-          'id': 10896,
-          'special': False,
-          'text': ' World',
-        }),
-        dict({
-          'id': 657,
-          'special': False,
-          'text': '")',
-        }),
-        dict({
-          'id': 203,
-          'special': False,
-          'text': '''
-            
-  
-          ''',
-        }),
-        dict({
-          'id': 203,
-          'special': False,
-          'text': '''
-            
-  
-          ''',
-        }),
-        dict({
-          'id': 589,
-          'special': False,
-          'text': 'def',
-        }),
-      ]),
-    }),
-    'generated_text': '''
-      ():
-          print("Hello World")
-      
-      def
-    ''',
-  })
-# ---
-# name: test_flash_starcoder_default_params
-  dict({
-    'details': dict({
-      'best_of_sequences': None,
-      'finish_reason': <FinishReason.EndOfSequenceToken: 'eos_token'>,
-      'generated_tokens': 12,
-      'prefill': list([
-        dict({
-          'id': 589,
-          'text': 'def',
-        }),
-        dict({
-          'id': 1459,
-          'text': ' print',
-        }),
-        dict({
-          'id': 81,
-          'text': '_',
-        }),
-        dict({
-          'id': 7656,
-          'text': 'hello',
-        }),
-      ]),
-      'seed': 0,
-      'tokens': list([
-        dict({
-          'id': 2262,
-          'special': False,
-          'text': '():',
-        }),
-        dict({
-          'id': 284,
-          'special': False,
-          'text': '''
-            
-               
-          ''',
-        }),
-        dict({
-          'id': 5741,
-          'special': False,
-          'text': ' logging',
-        }),
-        dict({
-          'id': 32,
-          'special': False,
-          'text': '.',
-        }),
-        dict({
-          'id': 1338,
-          'special': False,
-          'text': 'info',
-        }),
-        dict({
-          'id': 463,
-          'special': False,
-          'text': "('",
-        }),
-        dict({
-          'id': 8279,
-          'special': False,
-          'text': 'Hello',
-        }),
-        dict({
-          'id': 30,
-          'special': False,
-          'text': ',',
-        }),
-        dict({
-          'id': 10896,
-          'special': False,
-          'text': ' World',
-        }),
-        dict({
-          'id': 683,
-          'special': False,
-          'text': "')",
-        }),
-        dict({
-          'id': 203,
-          'special': False,
-          'text': '''
-            
-  
-          ''',
-        }),
-        dict({
-          'id': 0,
-          'special': True,
-          'text': '<|endoftext|>',
-        }),
-      ]),
-    }),
-    'generated_text': '''
-      ():
-          logging.info('Hello, World')
-      <|endoftext|>
-    ''',
-  })
-# ---
-# name: test_flash_starcoder_load
-  list([
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 589,
-            'text': 'def',
-          }),
-          dict({
-            'id': 1459,
-            'text': ' print',
-          }),
-          dict({
-            'id': 81,
-            'text': '_',
-          }),
-          dict({
-            'id': 7656,
-            'text': 'hello',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 2262,
-            'special': False,
-            'text': '():',
-          }),
-          dict({
-            'id': 284,
-            'special': False,
-            'text': '''
-              
-                 
-            ''',
-          }),
-          dict({
-            'id': 1459,
-            'special': False,
-            'text': ' print',
-          }),
-          dict({
-            'id': 440,
-            'special': False,
-            'text': '("',
-          }),
-          dict({
-            'id': 8279,
-            'special': False,
-            'text': 'Hello',
-          }),
-          dict({
-            'id': 10896,
-            'special': False,
-            'text': ' World',
-          }),
-          dict({
-            'id': 657,
-            'special': False,
-            'text': '")',
-          }),
-          dict({
-            'id': 203,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 203,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 589,
-            'special': False,
-            'text': 'def',
-          }),
-        ]),
-      }),
-      'generated_text': '''
-        ():
-            print("Hello World")
-        
-        def
-      ''',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 589,
-            'text': 'def',
-          }),
-          dict({
-            'id': 1459,
-            'text': ' print',
-          }),
-          dict({
-            'id': 81,
-            'text': '_',
-          }),
-          dict({
-            'id': 7656,
-            'text': 'hello',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 2262,
-            'special': False,
-            'text': '():',
-          }),
-          dict({
-            'id': 284,
-            'special': False,
-            'text': '''
-              
-                 
-            ''',
-          }),
-          dict({
-            'id': 1459,
-            'special': False,
-            'text': ' print',
-          }),
-          dict({
-            'id': 440,
-            'special': False,
-            'text': '("',
-          }),
-          dict({
-            'id': 8279,
-            'special': False,
-            'text': 'Hello',
-          }),
-          dict({
-            'id': 10896,
-            'special': False,
-            'text': ' World',
-          }),
-          dict({
-            'id': 657,
-            'special': False,
-            'text': '")',
-          }),
-          dict({
-            'id': 203,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 203,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 589,
-            'special': False,
-            'text': 'def',
-          }),
-        ]),
-      }),
-      'generated_text': '''
-        ():
-            print("Hello World")
-        
-        def
-      ''',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 589,
-            'text': 'def',
-          }),
-          dict({
-            'id': 1459,
-            'text': ' print',
-          }),
-          dict({
-            'id': 81,
-            'text': '_',
-          }),
-          dict({
-            'id': 7656,
-            'text': 'hello',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 2262,
-            'special': False,
-            'text': '():',
-          }),
-          dict({
-            'id': 284,
-            'special': False,
-            'text': '''
-              
-                 
-            ''',
-          }),
-          dict({
-            'id': 1459,
-            'special': False,
-            'text': ' print',
-          }),
-          dict({
-            'id': 440,
-            'special': False,
-            'text': '("',
-          }),
-          dict({
-            'id': 8279,
-            'special': False,
-            'text': 'Hello',
-          }),
-          dict({
-            'id': 10896,
-            'special': False,
-            'text': ' World',
-          }),
-          dict({
-            'id': 657,
-            'special': False,
-            'text': '")',
-          }),
-          dict({
-            'id': 203,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 203,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 589,
-            'special': False,
-            'text': 'def',
-          }),
-        ]),
-      }),
-      'generated_text': '''
-        ():
-            print("Hello World")
-        
-        def
-      ''',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.Length: 'length'>,
-        'generated_tokens': 10,
-        'prefill': list([
-          dict({
-            'id': 589,
-            'text': 'def',
-          }),
-          dict({
-            'id': 1459,
-            'text': ' print',
-          }),
-          dict({
-            'id': 81,
-            'text': '_',
-          }),
-          dict({
-            'id': 7656,
-            'text': 'hello',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 2262,
-            'special': False,
-            'text': '():',
-          }),
-          dict({
-            'id': 284,
-            'special': False,
-            'text': '''
-              
-                 
-            ''',
-          }),
-          dict({
-            'id': 1459,
-            'special': False,
-            'text': ' print',
-          }),
-          dict({
-            'id': 440,
-            'special': False,
-            'text': '("',
-          }),
-          dict({
-            'id': 8279,
-            'special': False,
-            'text': 'Hello',
-          }),
-          dict({
-            'id': 10896,
-            'special': False,
-            'text': ' World',
-          }),
-          dict({
-            'id': 657,
-            'special': False,
-            'text': '")',
-          }),
-          dict({
-            'id': 203,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 203,
-            'special': False,
-            'text': '''
-              
-  
-            ''',
-          }),
-          dict({
-            'id': 589,
-            'special': False,
-            'text': 'def',
-          }),
-        ]),
-      }),
-      'generated_text': '''
-        ():
-            print("Hello World")
-        
-        def
-      ''',
-    }),
-  ])
-# ---
--- a/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder.json
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder.json
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 589,
+        "logprob": null,
+        "text": "def"
+      },
+      {
+        "id": 1459,
+        "logprob": -5.6289062,
+        "text": " print"
+      },
+      {
+        "id": 81,
+        "logprob": -1.6005859,
+        "text": "_"
+      },
+      {
+        "id": 7656,
+        "logprob": -5.9921875,
+        "text": "hello"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 2262,
+        "logprob": -0.7705078,
+        "special": false,
+        "text": "():"
+      },
+      {
+        "id": 284,
+        "logprob": -0.2590332,
+        "special": false,
+        "text": "\n   "
+      },
+      {
+        "id": 1459,
+        "logprob": -0.39379883,
+        "special": false,
+        "text": " print"
+      },
+      {
+        "id": 440,
+        "logprob": -0.61376953,
+        "special": false,
+        "text": "(\""
+      },
+      {
+        "id": 8279,
+        "logprob": -0.47338867,
+        "special": false,
+        "text": "Hello"
+      },
+      {
+        "id": 10896,
+        "logprob": -1.5068359,
+        "special": false,
+        "text": " World"
+      },
+      {
+        "id": 657,
+        "logprob": -0.80810547,
+        "special": false,
+        "text": "\")"
+      },
+      {
+        "id": 203,
+        "logprob": -0.7397461,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 203,
+        "logprob": -0.35229492,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 589,
+        "logprob": -1.0371094,
+        "special": false,
+        "text": "def"
+      }
+    ]
+  },
+  "generated_text": "():\n    print(\"Hello World\")\n\ndef"
+}
--- a/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_default_params.json
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_default_params.json
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "eos_token",
+    "generated_tokens": 12,
+    "prefill": [
+      {
+        "id": 589,
+        "logprob": null,
+        "text": "def"
+      },
+      {
+        "id": 1459,
+        "logprob": -5.6289062,
+        "text": " print"
+      },
+      {
+        "id": 81,
+        "logprob": -1.6005859,
+        "text": "_"
+      },
+      {
+        "id": 7656,
+        "logprob": -5.9921875,
+        "text": "hello"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 2262,
+        "logprob": -0.7451172,
+        "special": false,
+        "text": "():"
+      },
+      {
+        "id": 284,
+        "logprob": -0.21325684,
+        "special": false,
+        "text": "\n   "
+      },
+      {
+        "id": 5741,
+        "logprob": -5.734375,
+        "special": false,
+        "text": " logging"
+      },
+      {
+        "id": 32,
+        "logprob": 0.0,
+        "special": false,
+        "text": "."
+      },
+      {
+        "id": 1338,
+        "logprob": -0.3232422,
+        "special": false,
+        "text": "info"
+      },
+      {
+        "id": 463,
+        "logprob": -1.0380859,
+        "special": false,
+        "text": "('"
+      },
+      {
+        "id": 8279,
+        "logprob": -0.8378906,
+        "special": false,
+        "text": "Hello"
+      },
+      {
+        "id": 30,
+        "logprob": -1.9501953,
+        "special": false,
+        "text": ","
+      },
+      {
+        "id": 10896,
+        "logprob": -1.3476562,
+        "special": false,
+        "text": " World"
+      },
+      {
+        "id": 683,
+        "logprob": -1.796875,
+        "special": false,
+        "text": "')"
+      },
+      {
+        "id": 203,
+        "logprob": -0.9873047,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 0,
+        "logprob": -0.7495117,
+        "special": true,
+        "text": "<|endoftext|>"
+      }
+    ]
+  },
+  "generated_text": "():\n    logging.info('Hello, World')\n<|endoftext|>"
+}
--- a/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_load.json
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_load.json
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 589,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 1459,
+          "logprob": -5.6289062,
+          "text": " print"
+        },
+        {
+          "id": 81,
+          "logprob": -1.6005859,
+          "text": "_"
+        },
+        {
+          "id": 7656,
+          "logprob": -5.9921875,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 2262,
+          "logprob": -0.7705078,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 284,
+          "logprob": -0.2602539,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 1459,
+          "logprob": -0.39282227,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 440,
+          "logprob": -0.6113281,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 8279,
+          "logprob": -0.4765625,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 10896,
+          "logprob": -1.5068359,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 657,
+          "logprob": -0.8154297,
+          "special": false,
+          "text": "\")"
+        },
+        {
+          "id": 203,
+          "logprob": -0.7319336,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 203,
+          "logprob": -0.35229492,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 589,
+          "logprob": -1.0380859,
+          "special": false,
+          "text": "def"
+        }
+      ]
+    },
+    "generated_text": "():\n    print(\"Hello World\")\n\ndef"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 589,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 1459,
+          "logprob": -5.6289062,
+          "text": " print"
+        },
+        {
+          "id": 81,
+          "logprob": -1.6005859,
+          "text": "_"
+        },
+        {
+          "id": 7656,
+          "logprob": -5.9921875,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 2262,
+          "logprob": -0.7705078,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 284,
+          "logprob": -0.2602539,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 1459,
+          "logprob": -0.39282227,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 440,
+          "logprob": -0.6113281,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 8279,
+          "logprob": -0.4765625,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 10896,
+          "logprob": -1.5068359,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 657,
+          "logprob": -0.8154297,
+          "special": false,
+          "text": "\")"
+        },
+        {
+          "id": 203,
+          "logprob": -0.7319336,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 203,
+          "logprob": -0.35229492,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 589,
+          "logprob": -1.0380859,
+          "special": false,
+          "text": "def"
+        }
+      ]
+    },
+    "generated_text": "():\n    print(\"Hello World\")\n\ndef"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 589,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 1459,
+          "logprob": -5.6289062,
+          "text": " print"
+        },
+        {
+          "id": 81,
+          "logprob": -1.6005859,
+          "text": "_"
+        },
+        {
+          "id": 7656,
+          "logprob": -5.9921875,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 2262,
+          "logprob": -0.7705078,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 284,
+          "logprob": -0.2602539,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 1459,
+          "logprob": -0.39282227,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 440,
+          "logprob": -0.6113281,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 8279,
+          "logprob": -0.4765625,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 10896,
+          "logprob": -1.5068359,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 657,
+          "logprob": -0.8154297,
+          "special": false,
+          "text": "\")"
+        },
+        {
+          "id": 203,
+          "logprob": -0.7319336,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 203,
+          "logprob": -0.35229492,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 589,
+          "logprob": -1.0380859,
+          "special": false,
+          "text": "def"
+        }
+      ]
+    },
+    "generated_text": "():\n    print(\"Hello World\")\n\ndef"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 589,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 1459,
+          "logprob": -5.6289062,
+          "text": " print"
+        },
+        {
+          "id": 81,
+          "logprob": -1.6005859,
+          "text": "_"
+        },
+        {
+          "id": 7656,
+          "logprob": -5.9921875,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 2262,
+          "logprob": -0.7705078,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 284,
+          "logprob": -0.2602539,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 1459,
+          "logprob": -0.39282227,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 440,
+          "logprob": -0.6113281,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 8279,
+          "logprob": -0.4765625,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 10896,
+          "logprob": -1.5068359,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 657,
+          "logprob": -0.8154297,
+          "special": false,
+          "text": "\")"
+        },
+        {
+          "id": 203,
+          "logprob": -0.7319336,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 203,
+          "logprob": -0.35229492,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 589,
+          "logprob": -1.0380859,
+          "special": false,
+          "text": "def"
+        }
+      ]
+    },
+    "generated_text": "():\n    print(\"Hello World\")\n\ndef"
+  }
+]
--- a/integration-tests/models/__snapshots__/test_mt0_base.ambr
+++ b/integration-tests/models/__snapshots__/test_mt0_base.ambr
-# serializer version: 1
-# name: test_mt0_base
-  dict({
-    'details': dict({
-      'best_of_sequences': None,
-      'finish_reason': <FinishReason.EndOfSequenceToken: 'eos_token'>,
-      'generated_tokens': 5,
-      'prefill': list([
-        dict({
-          'id': 0,
-          'text': '<pad>',
-        }),
-      ]),
-      'seed': 0,
-      'tokens': list([
-        dict({
-          'id': 926,
-          'special': False,
-          'text': 'To',
-        }),
-        dict({
-          'id': 18295,
-          'special': False,
-          'text': ' sell',
-        }),
-        dict({
-          'id': 7868,
-          'special': False,
-          'text': ' things',
-        }),
-        dict({
-          'id': 260,
-          'special': False,
-          'text': '.',
-        }),
-        dict({
-          'id': 1,
-          'special': True,
-          'text': '</s>',
-        }),
-      ]),
-    }),
-    'generated_text': 'To sell things.',
-  })
-# ---
-# name: test_mt0_base_all_params
-  dict({
-    'details': dict({
-      'best_of_sequences': None,
-      'finish_reason': <FinishReason.Length: 'length'>,
-      'generated_tokens': 10,
-      'prefill': list([
-        dict({
-          'id': 0,
-          'text': '<pad>',
-        }),
-      ]),
-      'seed': 0,
-      'tokens': list([
-        dict({
-          'id': 16017,
-          'special': False,
-          'text': 'blue',
-        }),
-        dict({
-          'id': 20495,
-          'special': False,
-          'text': ' sky',
-        }),
-        dict({
-          'id': 259,
-          'special': False,
-          'text': ' ',
-        }),
-        dict({
-          'id': 15484,
-          'special': False,
-          'text': 'appear',
-        }),
-        dict({
-          'id': 345,
-          'special': False,
-          'text': 'ed',
-        }),
-        dict({
-          'id': 288,
-          'special': False,
-          'text': ' to',
-        }),
-        dict({
-          'id': 35622,
-          'special': False,
-          'text': ' cloud',
-        }),
-        dict({
-          'id': 263,
-          'special': False,
-          'text': 's',
-        }),
-        dict({
-          'id': 14701,
-          'special': False,
-          'text': ' above',
-        }),
-        dict({
-          'id': 751,
-          'special': False,
-          'text': ' all',
-        }),
-      ]),
-    }),
-    'generated_text': 'Why is the sky blue?blue sky appeared to clouds above all',
-  })
-# ---
-# name: test_mt0_base_load
-  list([
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.EndOfSequenceToken: 'eos_token'>,
-        'generated_tokens': 6,
-        'prefill': list([
-          dict({
-            'id': 0,
-            'text': '<pad>',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 259,
-            'special': False,
-            'text': '',
-          }),
-          dict({
-            'id': 39261,
-            'special': False,
-            'text': 'Because',
-          }),
-          dict({
-            'id': 609,
-            'special': False,
-            'text': ' it',
-          }),
-          dict({
-            'id': 339,
-            'special': False,
-            'text': ' is',
-          }),
-          dict({
-            'id': 16017,
-            'special': False,
-            'text': ' blue',
-          }),
-          dict({
-            'id': 1,
-            'special': True,
-            'text': '</s>',
-          }),
-        ]),
-      }),
-      'generated_text': 'Because it is blue',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.EndOfSequenceToken: 'eos_token'>,
-        'generated_tokens': 6,
-        'prefill': list([
-          dict({
-            'id': 0,
-            'text': '<pad>',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 259,
-            'special': False,
-            'text': '',
-          }),
-          dict({
-            'id': 39261,
-            'special': False,
-            'text': 'Because',
-          }),
-          dict({
-            'id': 609,
-            'special': False,
-            'text': ' it',
-          }),
-          dict({
-            'id': 339,
-            'special': False,
-            'text': ' is',
-          }),
-          dict({
-            'id': 16017,
-            'special': False,
-            'text': ' blue',
-          }),
-          dict({
-            'id': 1,
-            'special': True,
-            'text': '</s>',
-          }),
-        ]),
-      }),
-      'generated_text': 'Because it is blue',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.EndOfSequenceToken: 'eos_token'>,
-        'generated_tokens': 6,
-        'prefill': list([
-          dict({
-            'id': 0,
-            'text': '<pad>',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 259,
-            'special': False,
-            'text': '',
-          }),
-          dict({
-            'id': 39261,
-            'special': False,
-            'text': 'Because',
-          }),
-          dict({
-            'id': 609,
-            'special': False,
-            'text': ' it',
-          }),
-          dict({
-            'id': 339,
-            'special': False,
-            'text': ' is',
-          }),
-          dict({
-            'id': 16017,
-            'special': False,
-            'text': ' blue',
-          }),
-          dict({
-            'id': 1,
-            'special': True,
-            'text': '</s>',
-          }),
-        ]),
-      }),
-      'generated_text': 'Because it is blue',
-    }),
-    dict({
-      'details': dict({
-        'best_of_sequences': None,
-        'finish_reason': <FinishReason.EndOfSequenceToken: 'eos_token'>,
-        'generated_tokens': 6,
-        'prefill': list([
-          dict({
-            'id': 0,
-            'text': '<pad>',
-          }),
-        ]),
-        'seed': None,
-        'tokens': list([
-          dict({
-            'id': 259,
-            'special': False,
-            'text': '',
-          }),
-          dict({
-            'id': 39261,
-            'special': False,
-            'text': 'Because',
-          }),
-          dict({
-            'id': 609,
-            'special': False,
-            'text': ' it',
-          }),
-          dict({
-            'id': 339,
-            'special': False,
-            'text': ' is',
-          }),
-          dict({
-            'id': 16017,
-            'special': False,
-            'text': ' blue',
-          }),
-          dict({
-            'id': 1,
-            'special': True,
-            'text': '</s>',
-          }),
-        ]),
-      }),
-      'generated_text': 'Because it is blue',
-    }),
-  ])
-# ---
--- a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base.json
+++ b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base.json
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "eos_token",
+    "generated_tokens": 5,
+    "prefill": [
+      {
+        "id": 0,
+        "logprob": null,
+        "text": "<pad>"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 926,
+        "logprob": -4.3554688,
+        "special": false,
+        "text": "To"
+      },
+      {
+        "id": 18295,
+        "logprob": -7.7734375,
+        "special": false,
+        "text": " sell"
+      },
+      {
+        "id": 7868,
+        "logprob": -3.9257812,
+        "special": false,
+        "text": " things"
+      },
+      {
+        "id": 260,
+        "logprob": -2.4179688,
+        "special": false,
+        "text": "."
+      },
+      {
+        "id": 1,
+        "logprob": 0.0,
+        "special": true,
+        "text": "</s>"
+      }
+    ]
+  },
+  "generated_text": "To sell things."
+}
--- a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json
+++ b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 0,
+        "logprob": null,
+        "text": "<pad>"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 16017,
+        "logprob": -1.3505859,
+        "special": false,
+        "text": "blue"
+      },
+      {
+        "id": 20495,
+        "logprob": -0.50439453,
+        "special": false,
+        "text": " sky"
+      },
+      {
+        "id": 259,
+        "logprob": -1.2011719,
+        "special": false,
+        "text": " "
+      },
+      {
+        "id": 15484,
+        "logprob": -2.8378906,
+        "special": false,
+        "text": "appear"
+      },
+      {
+        "id": 345,
+        "logprob": -0.87597656,
+        "special": false,
+        "text": "ed"
+      },
+      {
+        "id": 288,
+        "logprob": -1.8447266,
+        "special": false,
+        "text": " to"
+      },
+      {
+        "id": 35622,
+        "logprob": -7.1445312,
+        "special": false,
+        "text": " cloud"
+      },
+      {
+        "id": 263,
+        "logprob": -1.2929688,
+        "special": false,
+        "text": "s"
+      },
+      {
+        "id": 14701,
+        "logprob": -3.0761719,
+        "special": false,
+        "text": " above"
+      },
+      {
+        "id": 751,
+        "logprob": -4.4375,
+        "special": false,
+        "text": " all"
+      }
+    ]
+  },
+  "generated_text": "Why is the sky blue?blue sky appeared to clouds above all"
+}
--- a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_load.json
+++ b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_load.json
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "eos_token",
+      "generated_tokens": 6,
+      "prefill": [
+        {
+          "id": 0,
+          "logprob": null,
+          "text": "<pad>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 259,
+          "logprob": -1.3789062,
+          "special": false,
+          "text": ""
+        },
+        {
+          "id": 39261,
+          "logprob": -0.36279297,
+          "special": false,
+          "text": "Because"
+        },
+        {
+          "id": 609,
+          "logprob": -1.0966797,
+          "special": false,
+          "text": " it"
+        },
+        {
+          "id": 339,
+          "logprob": -0.8276367,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 16017,
+          "logprob": -1.6845703,
+          "special": false,
+          "text": " blue"
+        },
+        {
+          "id": 1,
+          "logprob": -0.72753906,
+          "special": true,
+          "text": "</s>"
+        }
+      ]
+    },
+    "generated_text": "Because it is blue"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "eos_token",
+      "generated_tokens": 6,
+      "prefill": [
+        {
+          "id": 0,
+          "logprob": null,
+          "text": "<pad>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 259,
+          "logprob": -1.3798828,
+          "special": false,
+          "text": ""
+        },
+        {
+          "id": 39261,
+          "logprob": -0.36328125,
+          "special": false,
+          "text": "Because"
+        },
+        {
+          "id": 609,
+          "logprob": -1.0947266,
+          "special": false,
+          "text": " it"
+        },
+        {
+          "id": 339,
+          "logprob": -0.8286133,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 16017,
+          "logprob": -1.6826172,
+          "special": false,
+          "text": " blue"
+        },
+        {
+          "id": 1,
+          "logprob": -0.7290039,
+          "special": true,
+          "text": "</s>"
+        }
+      ]
+    },
+    "generated_text": "Because it is blue"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "eos_token",
+      "generated_tokens": 6,
+      "prefill": [
+        {
+          "id": 0,
+          "logprob": null,
+          "text": "<pad>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 259,
+          "logprob": -1.3789062,
+          "special": false,
+          "text": ""
+        },
+        {
+          "id": 39261,
+          "logprob": -0.36279297,
+          "special": false,
+          "text": "Because"
+        },
+        {
+          "id": 609,
+          "logprob": -1.0966797,
+          "special": false,
+          "text": " it"
+        },
+        {
+          "id": 339,
+          "logprob": -0.8276367,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 16017,
+          "logprob": -1.6845703,
+          "special": false,
+          "text": " blue"
+        },
+        {
+          "id": 1,
+          "logprob": -0.72753906,
+          "special": true,
+          "text": "</s>"
+        }
+      ]
+    },
+    "generated_text": "Because it is blue"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "eos_token",
+      "generated_tokens": 6,
+      "prefill": [
+        {
+          "id": 0,
+          "logprob": null,
+          "text": "<pad>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 259,
+          "logprob": -1.3789062,
+          "special": false,
+          "text": ""
+        },
+        {
+          "id": 39261,
+          "logprob": -0.36279297,
+          "special": false,
+          "text": "Because"
+        },
+        {
+          "id": 609,
+          "logprob": -1.0966797,
+          "special": false,
+          "text": " it"
+        },
+        {
+          "id": 339,
+          "logprob": -0.8276367,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 16017,
+          "logprob": -1.6845703,
+          "special": false,
+          "text": " blue"
+        },
+        {
+          "id": 1,
+          "logprob": -0.72753906,
+          "special": true,
+          "text": "</s>"
+        }
+      ]
+    },
+    "generated_text": "Because it is blue"
+  }
+]
--- a/integration-tests/models/test_bloom_560m.py
+++ b/integration-tests/models/test_bloom_560m.py
 import pytest

-from utils import health_check
+
+@pytest.fixture(scope="module")
+def bloom_560_handle(launcher):
+    with launcher("bigscience/bloom-560m") as handle:
+        yield handle


 @pytest.fixture(scope="module")
-def bloom_560(launcher):
-    with launcher("bigscience/bloom-560m") as client:
-        yield client
+async def bloom_560(bloom_560_handle):
+    await bloom_560_handle.health(60)
+    return bloom_560_handle.client


 @pytest.mark.asyncio
-async def test_bloom_560m(bloom_560, snapshot_test):
-    await health_check(bloom_560, 60)
-
+async def test_bloom_560m(bloom_560, response_snapshot):
    response = await bloom_560.generate(
        "Pour déguster un ortolan, il faut tout d'abord",
        max_new_tokens=10,
@@ -21,13 +23,11 @@ async def test_bloom_560m(bloom_560, snapshot_test):
    )

    assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot


 @pytest.mark.asyncio
-async def test_bloom_560m_all_params(bloom_560, snapshot_test):
-    await health_check(bloom_560, 60)
-
+async def test_bloom_560m_all_params(bloom_560, response_snapshot):
    response = await bloom_560.generate(
        "Pour déguster un ortolan, il faut tout d'abord",
        max_new_tokens=10,
@@ -44,13 +44,11 @@ async def test_bloom_560m_all_params(bloom_560, snapshot_test):
    )

    assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot


 @pytest.mark.asyncio
-async def test_bloom_560m_load(bloom_560, generate_load, snapshot_test):
-    await health_check(bloom_560, 60)
-
+async def test_bloom_560m_load(bloom_560, generate_load, response_snapshot):
    responses = await generate_load(
        bloom_560,
        "Pour déguster un ortolan, il faut tout d'abord",
@@ -59,5 +57,6 @@ async def test_bloom_560m_load(bloom_560, generate_load, snapshot_test):
    )

    assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])

-    assert snapshot_test(responses)
+    assert responses == response_snapshot
--- a/integration-tests/models/test_bloom_560m_sharded.py
+++ b/integration-tests/models/test_bloom_560m_sharded.py
 import pytest

-from utils import health_check
+
+@pytest.fixture(scope="module")
+def bloom_560m_sharded_handle(launcher):
+    with launcher("bigscience/bloom-560m", num_shard=2) as handle:
+        yield handle


 @pytest.fixture(scope="module")
-def bloom_560m_sharded(launcher):
-    with launcher("bigscience/bloom-560m", num_shard=2) as client:
-        yield client
+async def bloom_560m_sharded(bloom_560m_sharded_handle):
+    await bloom_560m_sharded_handle.health(60)
+    return bloom_560m_sharded_handle.client


 @pytest.mark.asyncio
-async def test_bloom_560m_sharded(bloom_560m_sharded, snapshot_test):
-    await health_check(bloom_560m_sharded, 60)
-
+async def test_bloom_560m_sharded(bloom_560m_sharded, response_snapshot):
    response = await bloom_560m_sharded.generate(
        "Pour déguster un ortolan, il faut tout d'abord",
        max_new_tokens=10,
@@ -21,15 +23,13 @@ async def test_bloom_560m_sharded(bloom_560m_sharded, snapshot_test):
    )

    assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot


 @pytest.mark.asyncio
 async def test_bloom_560m_sharded_load(
-    bloom_560m_sharded, generate_load, snapshot_test
+    bloom_560m_sharded, generate_load, response_snapshot
 ):
-    await health_check(bloom_560m_sharded, 60)
-
    responses = await generate_load(
        bloom_560m_sharded,
        "Pour déguster un ortolan, il faut tout d'abord",
@@ -38,5 +38,6 @@ async def test_bloom_560m_sharded_load(
    )

    assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])

-    assert snapshot_test(responses)
+    assert responses == response_snapshot
--- a/integration-tests/models/test_flash_llama.py
+++ b/integration-tests/models/test_flash_llama.py
 import pytest

-from utils import health_check
+
+@pytest.fixture(scope="module")
+def flash_llama_handle(launcher):
+    with launcher("huggingface/llama-7b", num_shard=2) as handle:
+        yield handle


 @pytest.fixture(scope="module")
-def flash_llama(launcher):
-    with launcher("huggingface/llama-7b", num_shard=2) as client:
-        yield client
+async def flash_llama(flash_llama_handle):
+    await flash_llama_handle.health(120)
+    return flash_llama_handle.client


 @pytest.mark.asyncio
 @pytest.mark.private
-async def test_flash_llama(flash_llama, snapshot_test):
-    await health_check(flash_llama, 120)
-
+async def test_flash_llama(flash_llama, response_snapshot):
    response = await flash_llama.generate("Test request", max_new_tokens=10)

    assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot


 @pytest.mark.asyncio
 @pytest.mark.private
-async def test_flash_llama_all_params(flash_llama, snapshot_test):
-    await health_check(flash_llama, 120)
-
+async def test_flash_llama_all_params(flash_llama, response_snapshot):
    response = await flash_llama.generate(
        "Test request",
        max_new_tokens=10,
@@ -41,16 +41,15 @@ async def test_flash_llama_all_params(flash_llama, snapshot_test):
    )

    assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot


 @pytest.mark.asyncio
 @pytest.mark.private
-async def test_flash_llama_load(flash_llama, generate_load, snapshot_test):
-    await health_check(flash_llama, 120)
-
+async def test_flash_llama_load(flash_llama, generate_load, response_snapshot):
    responses = await generate_load(flash_llama, "Test request", max_new_tokens=10, n=4)

    assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])

-    assert snapshot_test(responses)
+    assert responses == response_snapshot
--- a/integration-tests/models/test_flash_neox.py
+++ b/integration-tests/models/test_flash_neox.py
 import pytest

-from utils import health_check
+
+@pytest.fixture(scope="module")
+def flash_neox_handle(launcher):
+    with launcher("OpenAssistant/oasst-sft-1-pythia-12b", num_shard=2) as handle:
+        yield handle


 @pytest.fixture(scope="module")
-def flash_neox(launcher):
-    with launcher("OpenAssistant/oasst-sft-1-pythia-12b", num_shard=2) as client:
-        yield client
+async def flash_neox(flash_neox_handle):
+    await flash_neox_handle.health(240)
+    return flash_neox_handle.client


 @pytest.mark.asyncio
-async def test_flash_neox(flash_neox, snapshot_test):
-    await health_check(flash_neox, 240)
-
+async def test_flash_neox(flash_neox, response_snapshot):
    response = await flash_neox.generate(
        "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>",
        max_new_tokens=10,
    )

    assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot


 @pytest.mark.asyncio
-async def test_flash_neox_load(flash_neox, generate_load, snapshot_test):
-    await health_check(flash_neox, 240)
-
+async def test_flash_neox_load(flash_neox, generate_load, response_snapshot):
    responses = await generate_load(
        flash_neox,
        "<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>",
@@ -34,5 +34,6 @@ async def test_flash_neox_load(flash_neox, generate_load, snapshot_test):
    )

    assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])

-    assert snapshot_test(responses)
+    assert responses == response_snapshot
--- a/integration-tests/models/test_flash_santacoder.py
+++ b/integration-tests/models/test_flash_santacoder.py
 import pytest

-from utils import health_check
+
+@pytest.fixture(scope="module")
+def flash_santacoder_handle(launcher):
+    with launcher("bigcode/santacoder") as handle:
+        yield handle


 @pytest.fixture(scope="module")
-def flash_santacoder(launcher):
-    with launcher("bigcode/santacoder") as client:
-        yield client
+async def flash_santacoder(flash_santacoder_handle):
+    await flash_santacoder_handle.health(240)
+    return flash_santacoder_handle.client


 @pytest.mark.asyncio
-async def test_flash_santacoder(flash_santacoder, snapshot_test):
-    await health_check(flash_santacoder, 60)
-
+async def test_flash_santacoder(flash_santacoder, response_snapshot):
    response = await flash_santacoder.generate("def print_hello", max_new_tokens=10)

    assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot


 @pytest.mark.asyncio
-async def test_flash_santacoder_load(flash_santacoder, generate_load, snapshot_test):
-    await health_check(flash_santacoder, 60)
-
+async def test_flash_santacoder_load(
+    flash_santacoder, generate_load, response_snapshot
+):
    responses = await generate_load(
        flash_santacoder, "def print_hello", max_new_tokens=10, n=4
    )

    assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])

-    assert snapshot_test(responses)
+    assert responses == response_snapshot
--- a/integration-tests/models/test_flash_starcoder.py
+++ b/integration-tests/models/test_flash_starcoder.py
 import pytest

-from utils import health_check
+
+@pytest.fixture(scope="module")
+def flash_starcoder_handle(launcher):
+    with launcher("bigcode/starcoder", num_shard=2) as handle:
+        yield handle


 @pytest.fixture(scope="module")
-def flash_starcoder(launcher):
-    with launcher("bigcode/starcoder", num_shard=2) as client:
-        yield client
+async def flash_starcoder(flash_starcoder_handle):
+    await flash_starcoder_handle.health(240)
+    return flash_starcoder_handle.client


 @pytest.mark.asyncio
 @pytest.mark.private
-async def test_flash_starcoder(flash_starcoder, snapshot_test):
-    await health_check(flash_starcoder, 240)
-
+async def test_flash_starcoder(flash_starcoder, response_snapshot):
    response = await flash_starcoder.generate("def print_hello", max_new_tokens=10)

    assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot


 @pytest.mark.asyncio
 @pytest.mark.private
-async def test_flash_starcoder_default_params(flash_starcoder, snapshot_test):
-    await health_check(flash_starcoder, 240)
-
+async def test_flash_starcoder_default_params(flash_starcoder, response_snapshot):
    response = await flash_starcoder.generate(
        "def print_hello", max_new_tokens=60, temperature=0.2, top_p=0.95, seed=0
    )

    assert response.details.generated_tokens == 12
-    assert snapshot_test(response)
+    assert response == response_snapshot


 @pytest.mark.asyncio
 @pytest.mark.private
-async def test_flash_starcoder_load(flash_starcoder, generate_load, snapshot_test):
-    await health_check(flash_starcoder, 240)
-
+async def test_flash_starcoder_load(flash_starcoder, generate_load, response_snapshot):
    responses = await generate_load(
        flash_starcoder, "def print_hello", max_new_tokens=10, n=4
    )

    assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])

-    assert snapshot_test(responses)
+    assert responses == response_snapshot
--- a/integration-tests/models/test_mt0_base.py
+++ b/integration-tests/models/test_mt0_base.py
 import pytest

-from utils import health_check
+
+@pytest.fixture(scope="module")
+def mt0_base_handle(launcher):
+    with launcher("bigscience/mt0-base") as handle:
+        yield handle


 @pytest.fixture(scope="module")
-def mt0_base(launcher):
-    with launcher("bigscience/mt0-base") as client:
-        yield client
+async def mt0_base(mt0_base_handle):
+    await mt0_base_handle.health(60)
+    return mt0_base_handle.client


 @pytest.mark.asyncio
-async def test_mt0_base(mt0_base, snapshot_test):
-    await health_check(mt0_base, 60)
-
+async def test_mt0_base(mt0_base, response_snapshot):
    response = await mt0_base.generate(
        "Why is the sky blue?",
        max_new_tokens=10,
@@ -21,13 +23,11 @@ async def test_mt0_base(mt0_base, snapshot_test):
    )

    assert response.details.generated_tokens == 5
-    assert snapshot_test(response)
+    assert response == response_snapshot


 @pytest.mark.asyncio
-async def test_mt0_base_all_params(mt0_base, snapshot_test):
-    await health_check(mt0_base, 60)
-
+async def test_mt0_base_all_params(mt0_base, response_snapshot):
    response = await mt0_base.generate(
        "Why is the sky blue?",
        max_new_tokens=10,
@@ -44,13 +44,11 @@ async def test_mt0_base_all_params(mt0_base, snapshot_test):
    )

    assert response.details.generated_tokens == 10
-    assert snapshot_test(response)
+    assert response == response_snapshot


 @pytest.mark.asyncio
-async def test_mt0_base_load(mt0_base, generate_load, snapshot_test):
-    await health_check(mt0_base, 60)
-
+async def test_mt0_base_load(mt0_base, generate_load, response_snapshot):
    responses = await generate_load(
        mt0_base,
        "Why is the sky blue?",
@@ -59,5 +57,6 @@ async def test_mt0_base_load(mt0_base, generate_load, snapshot_test):
    )

    assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])

-    assert snapshot_test(responses)
+    assert responses == response_snapshot
--- a/integration-tests/models/utils.py
+++ b/integration-tests/models/utils.py
-import time
-
-from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError
-from text_generation import AsyncClient
-
-
-async def health_check(client: AsyncClient, timeout: int = 60):
-    assert timeout > 0
-    for _ in range(timeout):
-        try:
-            await client.generate("test")
-            return
-        except (ClientConnectorError, ClientOSError, ServerDisconnectedError) as e:
-            time.sleep(1)
-    raise RuntimeError("Health check failed")
--- a/integration-tests/requirements.txt
+++ b/integration-tests/requirements.txt
 syrupy
-text-generation==0.5.1
+text-generation==0.5.2
 pytest
 pytest-asyncio==0.17.2
 docker
\ No newline at end of file