Commit bef0b359 authored by Lianmin Zheng's avatar Lianmin Zheng
Browse files

Fix llava & Fix multiprocessing

parent c6576e82
...@@ -7,10 +7,8 @@ def image_qa(s, image_path, question): ...@@ -7,10 +7,8 @@ def image_qa(s, image_path, question):
s += sgl.assistant(sgl.gen("answer")) s += sgl.assistant(sgl.gen("answer"))
# runtime = sgl.Runtime(model_path="liuhaotian/llava-v1.5-7b", runtime = sgl.Runtime(model_path="liuhaotian/llava-v1.5-7b",
# tokenizer_path="llava-hf/llava-1.5-7b-hf") tokenizer_path="llava-hf/llava-1.5-7b-hf")
runtime = sgl.Runtime(model_path="llava-internal/llava-v1.6-7b-hd-224px_3x2-preview-20230103",
tokenizer_path="llava-internal/llava-v1.6-7b-hd-224px_3x2-preview-20230103-tokenizer")
sgl.set_default_backend(runtime) sgl.set_default_backend(runtime)
......
import asyncio import asyncio
import concurrent.futures import concurrent.futures
import dataclasses import dataclasses
import multiprocessing as mp
import os import os
from typing import List from typing import List
...@@ -101,7 +102,9 @@ class TokenizerManager: ...@@ -101,7 +102,9 @@ class TokenizerManager:
self.tokenizer = self.processor.tokenizer self.tokenizer = self.processor.tokenizer
os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["TOKENIZERS_PARALLELISM"] = "false"
self.executor = concurrent.futures.ProcessPoolExecutor( self.executor = concurrent.futures.ProcessPoolExecutor(
initializer=init_global_processor, initargs=(server_args,) initializer=init_global_processor,
mp_context=mp.get_context("fork"),
initargs=(server_args,),
) )
else: else:
self.tokenizer = get_tokenizer( self.tokenizer = get_tokenizer(
......
...@@ -34,9 +34,10 @@ class LlavaLlamaForCausalLM(nn.Module): ...@@ -34,9 +34,10 @@ class LlavaLlamaForCausalLM(nn.Module):
self.config.text_config.hidden_size = config.hidden_size self.config.text_config.hidden_size = config.hidden_size
self.multi_modal_projector = LlavaMultiModalProjector(config) self.multi_modal_projector = LlavaMultiModalProjector(config)
self.language_model = LlamaForCausalLM(config, linear_method) self.language_model = LlamaForCausalLM(config, linear_method)
if "unpad" in getattr(config, "mm_patch_merge_type"): if "unpad" in getattr(config, "mm_patch_merge_type", ""):
self.language_model.model.image_newline = nn.Parameter( self.language_model.model.image_newline = nn.Parameter(
torch.empty(config.text_config.hidden_size, dtype=torch.float16)) torch.empty(config.text_config.hidden_size, dtype=torch.float16)
)
def pad_input_ids(self, input_ids, pad_value, pt_shape=None, image_size=None): def pad_input_ids(self, input_ids, pad_value, pt_shape=None, image_size=None):
new_image_feature_len = self.image_feature_len new_image_feature_len = self.image_feature_len
......
...@@ -425,8 +425,6 @@ class Runtime: ...@@ -425,8 +425,6 @@ class Runtime:
random_seed=random_seed, random_seed=random_seed,
log_level=log_level, log_level=log_level,
) )
import torch
torch.multiprocessing.set_start_method("spawn", force=True)
self.url = self.server_args.url() self.url = self.server_args.url()
self.generate_url = ( self.generate_url = (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment