Unverified Commit 0e0218ff authored by Graham King's avatar Graham King Committed by GitHub
Browse files

chore: Remove model_config from LocalModel (#3558)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent 5c69c119
...@@ -2101,18 +2101,6 @@ dependencies = [ ...@@ -2101,18 +2101,6 @@ dependencies = [
"syn 2.0.106", "syn 2.0.106",
] ]
[[package]]
name = "dynamo-engine-llamacpp"
version = "0.5.1"
dependencies = [
"async-stream",
"dynamo-llm",
"dynamo-runtime",
"llama-cpp-2",
"tokio",
"tracing",
]
[[package]] [[package]]
name = "dynamo-engine-mistralrs" name = "dynamo-engine-mistralrs"
version = "0.5.1" version = "0.5.1"
......
...@@ -25,7 +25,7 @@ Example: ...@@ -25,7 +25,7 @@ Example:
See `docs/guides/dynamo_run.md` in the repo for full details. See `docs/guides/dynamo_run.md` in the repo for full details.
"#; "#;
const USAGE: &str = "USAGE: dynamo-run in=[http|grpc|text|dyn://<path>|batch:<folder>] out=ENGINE_LIST|auto|dyn://<path> [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--model-config <hf-repo>] [--context-length=N] [--kv-cache-block-size=16] [--extra-engine-args=args.json] [--static-worker] [--router-mode random|round-robin|kv] [--kv-overlap-score-weight=2.0] [--router-temperature=0.0] [--use-kv-events] [--max-num-batched-tokens=1.0] [--migration-limit=0] [--verbosity (-v|-vv)]"; const USAGE: &str = "USAGE: dynamo-run in=[http|grpc|text|dyn://<path>|batch:<folder>] out=ENGINE_LIST|auto|dyn://<path> [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--context-length=N] [--kv-cache-block-size=16] [--extra-engine-args=args.json] [--static-worker] [--router-mode random|round-robin|kv] [--kv-overlap-score-weight=2.0] [--router-temperature=0.0] [--use-kv-events] [--max-num-batched-tokens=1.0] [--migration-limit=0] [--verbosity (-v|-vv)]";
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
// Set log level based on verbosity flag // Set log level based on verbosity flag
......
...@@ -62,10 +62,6 @@ def parse_args(): ...@@ -62,10 +62,6 @@ def parse_args():
# model_name: Option<String> # model_name: Option<String>
parser.add_argument("--model-name", type=str, help="Name of the model to load.") parser.add_argument("--model-name", type=str, help="Name of the model to load.")
# model_config: Option<PathBuf>
parser.add_argument(
"--model-config", type=Path, help="Path to the model configuration file."
)
# context_length: Option<u32> # context_length: Option<u32>
parser.add_argument( parser.add_argument(
"--context-length", type=int, help="Maximum context length for the model (u32)." "--context-length", type=int, help="Maximum context length for the model (u32)."
...@@ -139,8 +135,6 @@ async def run(): ...@@ -139,8 +135,6 @@ async def run():
flags = args["flags"] flags = args["flags"]
if flags.model_name is not None: if flags.model_name is not None:
entrypoint_kwargs["model_name"] = flags.model_name entrypoint_kwargs["model_name"] = flags.model_name
if flags.model_config is not None:
entrypoint_kwargs["model_config"] = flags.model_config
if flags.context_length is not None: if flags.context_length is not None:
entrypoint_kwargs["context_length"] = flags.context_length entrypoint_kwargs["context_length"] = flags.context_length
if flags.template_file is not None: if flags.template_file is not None:
......
...@@ -107,7 +107,6 @@ pub(crate) struct EntrypointArgs { ...@@ -107,7 +107,6 @@ pub(crate) struct EntrypointArgs {
engine_type: EngineType, engine_type: EngineType,
model_path: Option<PathBuf>, model_path: Option<PathBuf>,
model_name: Option<String>, model_name: Option<String>,
model_config: Option<PathBuf>,
endpoint_id: Option<EndpointId>, endpoint_id: Option<EndpointId>,
context_length: Option<u32>, context_length: Option<u32>,
template_file: Option<PathBuf>, template_file: Option<PathBuf>,
...@@ -125,12 +124,11 @@ pub(crate) struct EntrypointArgs { ...@@ -125,12 +124,11 @@ pub(crate) struct EntrypointArgs {
impl EntrypointArgs { impl EntrypointArgs {
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
#[new] #[new]
#[pyo3(signature = (engine_type, model_path=None, model_name=None, model_config=None, endpoint_id=None, context_length=None, template_file=None, router_config=None, kv_cache_block_size=None, http_host=None, http_port=None, tls_cert_path=None, tls_key_path=None, extra_engine_args=None, namespace=None))] #[pyo3(signature = (engine_type, model_path=None, model_name=None, endpoint_id=None, context_length=None, template_file=None, router_config=None, kv_cache_block_size=None, http_host=None, http_port=None, tls_cert_path=None, tls_key_path=None, extra_engine_args=None, namespace=None))]
pub fn new( pub fn new(
engine_type: EngineType, engine_type: EngineType,
model_path: Option<PathBuf>, model_path: Option<PathBuf>,
model_name: Option<String>, // e.g. "dyn://namespace.component.endpoint" model_name: Option<String>, // e.g. "dyn://namespace.component.endpoint"
model_config: Option<PathBuf>,
endpoint_id: Option<String>, endpoint_id: Option<String>,
context_length: Option<u32>, context_length: Option<u32>,
template_file: Option<PathBuf>, template_file: Option<PathBuf>,
...@@ -155,7 +153,6 @@ impl EntrypointArgs { ...@@ -155,7 +153,6 @@ impl EntrypointArgs {
engine_type, engine_type,
model_path, model_path,
model_name, model_name,
model_config,
endpoint_id: endpoint_id_obj, endpoint_id: endpoint_id_obj,
context_length, context_length,
template_file, template_file,
...@@ -188,7 +185,6 @@ pub fn make_engine<'p>( ...@@ -188,7 +185,6 @@ pub fn make_engine<'p>(
builder builder
.model_path(args.model_path.clone()) .model_path(args.model_path.clone())
.model_name(args.model_name.clone()) .model_name(args.model_name.clone())
.model_config(args.model_config.clone())
.endpoint_id(args.endpoint_id.clone()) .endpoint_id(args.endpoint_id.clone())
.context_length(args.context_length) .context_length(args.context_length)
.request_template(args.template_file.clone()) .request_template(args.template_file.clone())
......
...@@ -42,7 +42,6 @@ pub const DEFAULT_HTTP_PORT: u16 = 8080; ...@@ -42,7 +42,6 @@ pub const DEFAULT_HTTP_PORT: u16 = 8080;
pub struct LocalModelBuilder { pub struct LocalModelBuilder {
model_path: Option<PathBuf>, model_path: Option<PathBuf>,
model_name: Option<String>, model_name: Option<String>,
model_config: Option<PathBuf>,
endpoint_id: Option<EndpointId>, endpoint_id: Option<EndpointId>,
context_length: Option<u32>, context_length: Option<u32>,
template_file: Option<PathBuf>, template_file: Option<PathBuf>,
...@@ -71,7 +70,6 @@ impl Default for LocalModelBuilder { ...@@ -71,7 +70,6 @@ impl Default for LocalModelBuilder {
tls_key_path: Default::default(), tls_key_path: Default::default(),
model_path: Default::default(), model_path: Default::default(),
model_name: Default::default(), model_name: Default::default(),
model_config: Default::default(),
endpoint_id: Default::default(), endpoint_id: Default::default(),
context_length: Default::default(), context_length: Default::default(),
template_file: Default::default(), template_file: Default::default(),
...@@ -98,11 +96,6 @@ impl LocalModelBuilder { ...@@ -98,11 +96,6 @@ impl LocalModelBuilder {
self self
} }
pub fn model_config(&mut self, model_config: Option<PathBuf>) -> &mut Self {
self.model_config = model_config;
self
}
pub fn endpoint_id(&mut self, endpoint_id: Option<EndpointId>) -> &mut Self { pub fn endpoint_id(&mut self, endpoint_id: Option<EndpointId>) -> &mut Self {
self.endpoint_id = endpoint_id; self.endpoint_id = endpoint_id;
self self
...@@ -246,13 +239,9 @@ impl LocalModelBuilder { ...@@ -246,13 +239,9 @@ impl LocalModelBuilder {
} else { } else {
fs::canonicalize(relative_path)? fs::canonicalize(relative_path)?
}; };
// --model-config takes precedence over --model-path
let model_config_path = self.model_config.as_ref().unwrap_or(&full_path);
let mut card = ModelDeploymentCard::load_from_disk( let mut card =
model_config_path, ModelDeploymentCard::load_from_disk(&full_path, self.custom_template_path.as_deref())?;
self.custom_template_path.as_deref(),
)?;
// Usually we infer from the path, self.model_name is user override // Usually we infer from the path, self.model_name is user override
let model_name = self.model_name.take().unwrap_or_else(|| { let model_name = self.model_name.take().unwrap_or_else(|| {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment