Commit 6f5d76dc authored by 王敏's avatar 王敏
Browse files

暂时去掉profilling标志位,避免影响其他模型

parent a0d02d42
...@@ -2051,8 +2051,6 @@ class GPUModelRunner(LoRAModelRunnerMixin): ...@@ -2051,8 +2051,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
input_ids = None input_ids = None
inputs_embeds = self.inputs_embeds[:num_tokens] inputs_embeds = self.inputs_embeds[:num_tokens]
else: else:
#self.input_ids[:num_tokens] = torch.randint(0, 120000, (num_tokens,), dtype=torch.int32)
#self.input_ids[:num_tokens] = torch.arange(num_tokens, dtype=torch.int32, device=self.input_ids.device)
input_ids = self.input_ids[:num_tokens] input_ids = self.input_ids[:num_tokens]
inputs_embeds = None inputs_embeds = None
if self.uses_mrope: if self.uses_mrope:
...@@ -2226,8 +2224,8 @@ class GPUModelRunner(LoRAModelRunnerMixin): ...@@ -2226,8 +2224,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
def profile_run(self) -> None: def profile_run(self) -> None:
# set profiling flag to avoid torch compile # set profiling flag to avoid torch compile
set_profilling(True) #set_profilling(True)
self._sync_device() #self._sync_device()
# Profile with multimodal encoder & encoder cache. # Profile with multimodal encoder & encoder cache.
# TODO: handle encoder-decoder models once we support them. # TODO: handle encoder-decoder models once we support them.
...@@ -2312,7 +2310,7 @@ class GPUModelRunner(LoRAModelRunnerMixin): ...@@ -2312,7 +2310,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
del hidden_states, output del hidden_states, output
self.encoder_cache.clear() self.encoder_cache.clear()
gc.collect() gc.collect()
set_profilling(False) #set_profilling(False)
def capture_model(self) -> None: def capture_model(self) -> None:
if not self.use_cuda_graph: if not self.use_cuda_graph:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment