LICENSE
MANIFEST.in
README.md
setup.py
examples/__init__.py
lmdeploy/__init__.py
lmdeploy/api.py
lmdeploy/model.py
lmdeploy/tokenizer.py
lmdeploy/utils.py
lmdeploy/version.py
lmdeploy.egg-info/PKG-INFO
lmdeploy.egg-info/SOURCES.txt
lmdeploy.egg-info/dependency_links.txt
lmdeploy.egg-info/entry_points.txt
lmdeploy.egg-info/requires.txt
lmdeploy.egg-info/top_level.txt
lmdeploy/cli/__init__.py
lmdeploy/cli/chat.py
lmdeploy/cli/cli.py
lmdeploy/cli/lite.py
lmdeploy/cli/serve.py
lmdeploy/lite/__init__.py
lmdeploy/lite/defaults.py
lmdeploy/lite/apis/__init__.py
lmdeploy/lite/apis/auto_awq.py
lmdeploy/lite/apis/calibrate.py
lmdeploy/lite/apis/get_small_sharded_hf.py
lmdeploy/lite/apis/kv_qparams.py
lmdeploy/lite/quantization/__init__.py
lmdeploy/lite/quantization/awq.py
lmdeploy/lite/quantization/calibration.py
lmdeploy/lite/quantization/activation/__init__.py
lmdeploy/lite/quantization/activation/observer.py
lmdeploy/lite/quantization/weight/__init__.py
lmdeploy/lite/quantization/weight/quantizer.py
lmdeploy/lite/utils/__init__.py
lmdeploy/lite/utils/batch_split.py
lmdeploy/lite/utils/cal_qparams.py
lmdeploy/lite/utils/calib_dataloader.py
lmdeploy/lite/utils/collect.py
lmdeploy/lite/utils/export_turbomind.py
lmdeploy/lite/utils/global_avail.py
lmdeploy/lite/utils/load.py
lmdeploy/lite/utils/memory_efficient.py
lmdeploy/pytorch/__init__.py
lmdeploy/pytorch/accel.py
lmdeploy/pytorch/chat.py
lmdeploy/pytorch/decode.py
lmdeploy/pytorch/dist.py
lmdeploy/pytorch/model.py
lmdeploy/pytorch/session.py
lmdeploy/pytorch/utils.py
lmdeploy/pytorch/adapters/__init__.py
lmdeploy/pytorch/adapters/base.py
lmdeploy/pytorch/adapters/internlm.py
lmdeploy/pytorch/adapters/llama2.py
lmdeploy/pytorch/modules/__init__.py
lmdeploy/pytorch/modules/linear.py
lmdeploy/serve/__init__.py
lmdeploy/serve/async_engine.py
lmdeploy/serve/client.py
lmdeploy/serve/gradio/__init__.py
lmdeploy/serve/gradio/api_server_backend.py
lmdeploy/serve/gradio/app.py
lmdeploy/serve/gradio/constants.py
lmdeploy/serve/gradio/triton_server_backend.py
lmdeploy/serve/gradio/turbomind_coupled.py
lmdeploy/serve/openai/__init__.py
lmdeploy/serve/openai/api_client.py
lmdeploy/serve/openai/api_server.py
lmdeploy/serve/openai/protocol.py
lmdeploy/serve/turbomind/__init__.py
lmdeploy/serve/turbomind/chatbot.py
lmdeploy/serve/turbomind/service_docker_up.sh
lmdeploy/serve/turbomind/utils.py
lmdeploy/serve/turbomind/triton_models/interactive/config.pbtxt
lmdeploy/serve/turbomind/triton_models/interactive/1/placeholder
lmdeploy/serve/turbomind/triton_models/postprocessing/config.pbtxt
lmdeploy/serve/turbomind/triton_models/postprocessing/1/model.py
lmdeploy/serve/turbomind/triton_models/preprocessing/config.pbtxt
lmdeploy/serve/turbomind/triton_models/preprocessing/1/model.py
lmdeploy/serve/turbomind/triton_models/tokenizer/placeholder
lmdeploy/serve/turbomind/triton_models/weights/config.ini
lmdeploy/turbomind/__init__.py
lmdeploy/turbomind/chat.py
lmdeploy/turbomind/decode.py
lmdeploy/turbomind/generate_gemm_config.py
lmdeploy/turbomind/turbomind.py
lmdeploy/turbomind/utils.py
lmdeploy/turbomind/deploy/__init__.py
lmdeploy/turbomind/deploy/converter.py
lmdeploy/turbomind/deploy/source_model/__init__.py
lmdeploy/turbomind/deploy/source_model/baichuan.py
lmdeploy/turbomind/deploy/source_model/baichuan_awq.py
lmdeploy/turbomind/deploy/source_model/base.py
lmdeploy/turbomind/deploy/source_model/llama.py
lmdeploy/turbomind/deploy/source_model/llama_awq.py
lmdeploy/turbomind/deploy/source_model/meta_llama.py
lmdeploy/turbomind/deploy/source_model/qwen.py
lmdeploy/turbomind/deploy/source_model/qwen_awq.py
lmdeploy/turbomind/deploy/target_model/__init__.py
lmdeploy/turbomind/deploy/target_model/base.py
lmdeploy/turbomind/deploy/target_model/fp.py
lmdeploy/turbomind/deploy/target_model/w4.py