# TODO this is a synchronous generator in an async method.
# Move it to a thread so it doesn't block the event loop.
forresingen:
logging.debug(f"res: {res}")
yieldres
defcmd_line_args():
parser=argparse.ArgumentParser(
description="llama.cpp server integrated with Dynamo LLM."
)
parser.add_argument(
"--model-path",
type=str,
required=True,
help="Path to a local GGUF file.",
)
parser.add_argument(
"--endpoint",
type=str,
default=DEFAULT_ENDPOINT,
help=f"Dynamo endpoint string in 'dyn://namespace.component.endpoint' format. Default: {DEFAULT_ENDPOINT}",
)
parser.add_argument(
"--model-name",
type=str,
default="",
help="Name to serve the model under. Defaults to deriving it from model path.",
)
parser.add_argument(
"--context-length",
type=int,
default=None,
help="Max model context length. Defaults to models max, usually model_max_length from tokenizer_config.json. Reducing this reduces VRAM requirements.",
)
args=parser.parse_args()
config=Config()
config.model_path=args.model_path
ifargs.model_name:
config.model_name=args.model_name
else:
# This becomes an `Option` on the Rust side
config.model_name=None
endpoint_str=args.endpoint.replace("dyn://","",1)
endpoint_parts=endpoint_str.split(".")
iflen(endpoint_parts)!=3:
logging.error(
f"Invalid endpoint format: '{args.endpoint}'. Expected 'dyn://namespace.component.endpoint' or 'namespace.component.endpoint'."