Unverified Commit 0e0218ff authored by Graham King's avatar Graham King Committed by GitHub
Browse files

chore: Remove model_config from LocalModel (#3558)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent 5c69c119
......@@ -2101,18 +2101,6 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "dynamo-engine-llamacpp"
version = "0.5.1"
dependencies = [
"async-stream",
"dynamo-llm",
"dynamo-runtime",
"llama-cpp-2",
"tokio",
"tracing",
]
[[package]]
name = "dynamo-engine-mistralrs"
version = "0.5.1"
......
......@@ -25,7 +25,7 @@ Example:
See `docs/guides/dynamo_run.md` in the repo for full details.
"#;
const USAGE: &str = "USAGE: dynamo-run in=[http|grpc|text|dyn://<path>|batch:<folder>] out=ENGINE_LIST|auto|dyn://<path> [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--model-config <hf-repo>] [--context-length=N] [--kv-cache-block-size=16] [--extra-engine-args=args.json] [--static-worker] [--router-mode random|round-robin|kv] [--kv-overlap-score-weight=2.0] [--router-temperature=0.0] [--use-kv-events] [--max-num-batched-tokens=1.0] [--migration-limit=0] [--verbosity (-v|-vv)]";
const USAGE: &str = "USAGE: dynamo-run in=[http|grpc|text|dyn://<path>|batch:<folder>] out=ENGINE_LIST|auto|dyn://<path> [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--context-length=N] [--kv-cache-block-size=16] [--extra-engine-args=args.json] [--static-worker] [--router-mode random|round-robin|kv] [--kv-overlap-score-weight=2.0] [--router-temperature=0.0] [--use-kv-events] [--max-num-batched-tokens=1.0] [--migration-limit=0] [--verbosity (-v|-vv)]";
fn main() -> anyhow::Result<()> {
// Set log level based on verbosity flag
......
......@@ -62,10 +62,6 @@ def parse_args():
# model_name: Option<String>
parser.add_argument("--model-name", type=str, help="Name of the model to load.")
# model_config: Option<PathBuf>
parser.add_argument(
"--model-config", type=Path, help="Path to the model configuration file."
)
# context_length: Option<u32>
parser.add_argument(
"--context-length", type=int, help="Maximum context length for the model (u32)."
......@@ -139,8 +135,6 @@ async def run():
flags = args["flags"]
if flags.model_name is not None:
entrypoint_kwargs["model_name"] = flags.model_name
if flags.model_config is not None:
entrypoint_kwargs["model_config"] = flags.model_config
if flags.context_length is not None:
entrypoint_kwargs["context_length"] = flags.context_length
if flags.template_file is not None:
......
......@@ -107,7 +107,6 @@ pub(crate) struct EntrypointArgs {
engine_type: EngineType,
model_path: Option<PathBuf>,
model_name: Option<String>,
model_config: Option<PathBuf>,
endpoint_id: Option<EndpointId>,
context_length: Option<u32>,
template_file: Option<PathBuf>,
......@@ -125,12 +124,11 @@ pub(crate) struct EntrypointArgs {
impl EntrypointArgs {
#[allow(clippy::too_many_arguments)]
#[new]
#[pyo3(signature = (engine_type, model_path=None, model_name=None, model_config=None, endpoint_id=None, context_length=None, template_file=None, router_config=None, kv_cache_block_size=None, http_host=None, http_port=None, tls_cert_path=None, tls_key_path=None, extra_engine_args=None, namespace=None))]
#[pyo3(signature = (engine_type, model_path=None, model_name=None, endpoint_id=None, context_length=None, template_file=None, router_config=None, kv_cache_block_size=None, http_host=None, http_port=None, tls_cert_path=None, tls_key_path=None, extra_engine_args=None, namespace=None))]
pub fn new(
engine_type: EngineType,
model_path: Option<PathBuf>,
model_name: Option<String>, // e.g. "dyn://namespace.component.endpoint"
model_config: Option<PathBuf>,
endpoint_id: Option<String>,
context_length: Option<u32>,
template_file: Option<PathBuf>,
......@@ -155,7 +153,6 @@ impl EntrypointArgs {
engine_type,
model_path,
model_name,
model_config,
endpoint_id: endpoint_id_obj,
context_length,
template_file,
......@@ -188,7 +185,6 @@ pub fn make_engine<'p>(
builder
.model_path(args.model_path.clone())
.model_name(args.model_name.clone())
.model_config(args.model_config.clone())
.endpoint_id(args.endpoint_id.clone())
.context_length(args.context_length)
.request_template(args.template_file.clone())
......
......@@ -42,7 +42,6 @@ pub const DEFAULT_HTTP_PORT: u16 = 8080;
pub struct LocalModelBuilder {
model_path: Option<PathBuf>,
model_name: Option<String>,
model_config: Option<PathBuf>,
endpoint_id: Option<EndpointId>,
context_length: Option<u32>,
template_file: Option<PathBuf>,
......@@ -71,7 +70,6 @@ impl Default for LocalModelBuilder {
tls_key_path: Default::default(),
model_path: Default::default(),
model_name: Default::default(),
model_config: Default::default(),
endpoint_id: Default::default(),
context_length: Default::default(),
template_file: Default::default(),
......@@ -98,11 +96,6 @@ impl LocalModelBuilder {
self
}
pub fn model_config(&mut self, model_config: Option<PathBuf>) -> &mut Self {
self.model_config = model_config;
self
}
pub fn endpoint_id(&mut self, endpoint_id: Option<EndpointId>) -> &mut Self {
self.endpoint_id = endpoint_id;
self
......@@ -246,13 +239,9 @@ impl LocalModelBuilder {
} else {
fs::canonicalize(relative_path)?
};
// --model-config takes precedence over --model-path
let model_config_path = self.model_config.as_ref().unwrap_or(&full_path);
let mut card = ModelDeploymentCard::load_from_disk(
model_config_path,
self.custom_template_path.as_deref(),
)?;
let mut card =
ModelDeploymentCard::load_from_disk(&full_path, self.custom_template_path.as_deref())?;
// Usually we infer from the path, self.model_name is user override
let model_name = self.model_name.take().unwrap_or_else(|| {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment