Commit 66b809cc authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.7.2' into v0.7.2-dev

parents 37b63c24 0408efc6
# SPDX-License-Identifier: Apache-2.0
"""Benchmark the latency of processing a single batch of requests."""
import argparse
import dataclasses
......
# SPDX-License-Identifier: Apache-2.0
"""
Offline benchmark to test the long document QA throughput.
......
# SPDX-License-Identifier: Apache-2.0
"""
Benchmark the efficiency of prefix caching.
......
# SPDX-License-Identifier: Apache-2.0
"""Benchmark offline prioritization."""
import argparse
import dataclasses
......
# SPDX-License-Identifier: Apache-2.0
r"""Benchmark online serving throughput.
On the server side, run one of the following commands:
......
# SPDX-License-Identifier: Apache-2.0
r"""Benchmark online serving throughput with guided decoding.
On the server side, run one of the following commands:
......
# SPDX-License-Identifier: Apache-2.0
"""Benchmark offline inference throughput."""
import argparse
import dataclasses
......
# SPDX-License-Identifier: Apache-2.0
import argparse
import copy
import itertools
......
# SPDX-License-Identifier: Apache-2.0
# Cutlass bench utils
from typing import Iterable, Tuple
......
# SPDX-License-Identifier: Apache-2.0
import argparse
import copy
import itertools
......
# SPDX-License-Identifier: Apache-2.0
# Weight Shapes are in the format
# ([K, N], TP_SPLIT_DIM)
# Example:
......
# SPDX-License-Identifier: Apache-2.0
import os
import aiohttp
......
# SPDX-License-Identifier: Apache-2.0
import asyncio
import itertools
......
# SPDX-License-Identifier: Apache-2.0
import json
import matplotlib.pyplot as plt
......
# SPDX-License-Identifier: Apache-2.0
import pickle as pkl
import time
from dataclasses import dataclass
......
# SPDX-License-Identifier: Apache-2.0
import os
import sys
from typing import Optional
......
# SPDX-License-Identifier: Apache-2.0
import time
import torch
......
# SPDX-License-Identifier: Apache-2.0
import argparse
import copy
import json
......
# SPDX-License-Identifier: Apache-2.0
import argparse
import copy
import itertools
......
# SPDX-License-Identifier: Apache-2.0
from typing import List
import torch
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment