'Please carefully check the video and then answer the following question with details:'
)
eliflistinstr(['Video-MME'],dataset):
prompt=prompt.replace('\nAnswer:','')
prompt+="\nAnswer with the option's letter from the given choices directly."
eliflistinstr(['MVBench'],dataset):
prompt=prompt.replace('Best option:(','')
system_prompt='Carefully watch the video and pay attention to the cause and sequence of events, the detail and movement of objects, and the action and pose of persons. Based on your observations, select the best option that accurately addresses the question.\n'# noqa: E501
prompt=prompt.replace(system_prompt,'')
returnprompt
defadjust_kwargs(self,dataset):
kwargs=cp.deepcopy(self.kwargs)
kwargs["temperature"]=0.0
kwargs["do_sample"]=False
ifDATASET_MODALITY(dataset)=="VIDEO":
kwargs["max_image_size"]=490
else:
kwargs["max_image_size"]=980
kwargs["split_image"]=False
iflistinstr(['MMMU','MMStar','Math'],dataset):
# These datasets may lead the model to work as a CoT-alike behaviour.
logging.warning("Remove L157-L158 in https://github.com/NVlabs/EAGLE/blob/fef95f103b5e9899acbbe2c237e5b99147ab7e8e/eagle/model/builder.py to make it work properly.")# noqa: E501