"vscode:/vscode.git/clone" did not exist on "09c2eb85ddd3b2585979f4cd9cc97168d86718b6"
Commit 66b809cc authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.7.2' into v0.7.2-dev

parents 37b63c24 0408efc6
# SPDX-License-Identifier: Apache-2.0
"""Benchmark the latency of processing a single batch of requests.""" """Benchmark the latency of processing a single batch of requests."""
import argparse import argparse
import dataclasses import dataclasses
......
# SPDX-License-Identifier: Apache-2.0
""" """
Offline benchmark to test the long document QA throughput. Offline benchmark to test the long document QA throughput.
......
# SPDX-License-Identifier: Apache-2.0
""" """
Benchmark the efficiency of prefix caching. Benchmark the efficiency of prefix caching.
......
# SPDX-License-Identifier: Apache-2.0
"""Benchmark offline prioritization.""" """Benchmark offline prioritization."""
import argparse import argparse
import dataclasses import dataclasses
......
# SPDX-License-Identifier: Apache-2.0
r"""Benchmark online serving throughput. r"""Benchmark online serving throughput.
On the server side, run one of the following commands: On the server side, run one of the following commands:
......
# SPDX-License-Identifier: Apache-2.0
r"""Benchmark online serving throughput with guided decoding. r"""Benchmark online serving throughput with guided decoding.
On the server side, run one of the following commands: On the server side, run one of the following commands:
......
# SPDX-License-Identifier: Apache-2.0
"""Benchmark offline inference throughput.""" """Benchmark offline inference throughput."""
import argparse import argparse
import dataclasses import dataclasses
......
# SPDX-License-Identifier: Apache-2.0
import argparse import argparse
import copy import copy
import itertools import itertools
......
# SPDX-License-Identifier: Apache-2.0
# Cutlass bench utils # Cutlass bench utils
from typing import Iterable, Tuple from typing import Iterable, Tuple
......
# SPDX-License-Identifier: Apache-2.0
import argparse import argparse
import copy import copy
import itertools import itertools
......
# SPDX-License-Identifier: Apache-2.0
# Weight Shapes are in the format # Weight Shapes are in the format
# ([K, N], TP_SPLIT_DIM) # ([K, N], TP_SPLIT_DIM)
# Example: # Example:
......
# SPDX-License-Identifier: Apache-2.0
import os import os
import aiohttp import aiohttp
......
# SPDX-License-Identifier: Apache-2.0
import asyncio import asyncio
import itertools import itertools
......
# SPDX-License-Identifier: Apache-2.0
import json import json
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
......
# SPDX-License-Identifier: Apache-2.0
import pickle as pkl import pickle as pkl
import time import time
from dataclasses import dataclass from dataclasses import dataclass
......
# SPDX-License-Identifier: Apache-2.0
import os import os
import sys import sys
from typing import Optional from typing import Optional
......
# SPDX-License-Identifier: Apache-2.0
import time import time
import torch import torch
......
# SPDX-License-Identifier: Apache-2.0
import argparse import argparse
import copy import copy
import json import json
......
# SPDX-License-Identifier: Apache-2.0
import argparse import argparse
import copy import copy
import itertools import itertools
......
# SPDX-License-Identifier: Apache-2.0
from typing import List from typing import List
import torch import torch
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment