"git@developer.sourcefind.cn:OpenDAS/colossalai.git" did not exist on "dd2c28a32352de45675ab13e72049a7f2a57e364"
Unverified Commit 3ff60d13 authored by Yuanchen's avatar Yuanchen Committed by GitHub
Browse files

Fix ColossalEval (#5186)


Co-authored-by: default avatarXu Yuanchen <yuanchen.xu00@gmail.com>
parent 79718fae
...@@ -58,12 +58,12 @@ class DatasetEvaluator(object): ...@@ -58,12 +58,12 @@ class DatasetEvaluator(object):
[sample["output"] for sample in self.data[category]["data"]] [sample["output"] for sample in self.data[category]["data"]]
flag = False flag = False
softmaxs = [] logits = []
for i, sample in enumerate(self.data[category]["data"]): for i, sample in enumerate(self.data[category]["data"]):
if np.any(np.isnan(np.array(list(sample["softmax_over_choices"].values())))): if np.any(np.isnan(np.array(list(sample["logits_over_choices"].values())))):
if not flag: if not flag:
print( print(
f"NaN in the softmax, switch to exact match for category {category} in dataset {self.dataset_name} in model {self.model_name}." f"NaN in the logits, switch to exact match for category {category} in dataset {self.dataset_name} in model {self.model_name}."
) )
flag = True flag = True
score = 0 score = 0
...@@ -79,13 +79,13 @@ class DatasetEvaluator(object): ...@@ -79,13 +79,13 @@ class DatasetEvaluator(object):
score, score,
metric_helper.accuracy_by_options(sample["input"], sample["output"], ref), metric_helper.accuracy_by_options(sample["input"], sample["output"], ref),
) )
softmaxs.append(references[i] if score == 1 else -1) logits.append(references[i] if score == 1 else -1)
else: else:
softmaxs.append(np.argmax(np.array(list(sample["softmax_over_choices"].values())))) logits.append(np.argmax(np.array(list(sample["logits_over_choices"].values()))))
references = np.array(references) references = np.array(references)
softmaxs = np.array(softmaxs) logits = np.array(logits)
scores = np.sum(references == softmaxs) / len(self.data[category]["data"]) * 100 scores = np.sum(references == logits) / len(self.data[category]["data"]) * 100
self.evaluation_results[metric][category] = (scores, len(self.data[category]["data"])) self.evaluation_results[metric][category] = (scores, len(self.data[category]["data"]))
self.evaluation_results[metric]["ALL"] += scores * weight self.evaluation_results[metric]["ALL"] += scores * weight
...@@ -105,12 +105,12 @@ class DatasetEvaluator(object): ...@@ -105,12 +105,12 @@ class DatasetEvaluator(object):
predictions = [sample["output"] for sample in self.data[category]["data"]] predictions = [sample["output"] for sample in self.data[category]["data"]]
flag = False flag = False
softmaxs = [] logits = []
for i, sample in enumerate(self.data[category]["data"]): for i, sample in enumerate(self.data[category]["data"]):
if np.any(np.isnan(np.array(list(sample["softmax_over_choices"].values())))): if np.any(np.isnan(np.array(list(sample["logits_over_choices"].values())))):
if not flag: if not flag:
print( print(
f"NaN in the softmax, switch to exact match for category {category} in dataset {self.dataset_name} in model {self.model_name}." f"NaN in the logits, switch to exact match for category {category} in dataset {self.dataset_name} in model {self.model_name}."
) )
flag = True flag = True
score = 0 score = 0
...@@ -121,16 +121,14 @@ class DatasetEvaluator(object): ...@@ -121,16 +121,14 @@ class DatasetEvaluator(object):
sample["output"], ref, all_classes=self.data[category]["inference_kwargs"]["all_classes"] sample["output"], ref, all_classes=self.data[category]["inference_kwargs"]["all_classes"]
), ),
) )
softmaxs.append(references[i] if score == 1 else -1) logits.append(references[i] if score == 1 else -1)
else: else:
softmaxs.append(np.argmax(np.array(list(sample["softmax_over_choices"].values())))) logits.append(np.argmax(np.array(list(sample["logits_over_choices"].values()))))
metric_method = eval("metric_helper." + metric) metric_method = eval("metric_helper." + metric)
total_score = 0.0 total_score = 0.0
for prediction, reference, references_label, softmax in zip( for prediction, reference, references_label, softmax in zip(predictions, references, references_labels, logits):
predictions, references, references_labels, softmaxs
):
score = 0.0 score = 0.0
for ref in reference: for ref in reference:
......
...@@ -116,10 +116,10 @@ class HuggingFaceModel(BaseModel): ...@@ -116,10 +116,10 @@ class HuggingFaceModel(BaseModel):
shard_config: Shard config for tensor parallel. shard_config: Shard config for tensor parallel.
""" """
model_kwargs.setdefault("torch_dtype", torch.float16)
if "torch_dtype" in model_kwargs: if "torch_dtype" in model_kwargs:
model_kwargs["torch_dtype"] = eval(model_kwargs["torch_dtype"]) model_kwargs["torch_dtype"] = eval(model_kwargs["torch_dtype"])
else:
model_kwargs.setdefault("torch_dtype", torch.float16)
if "config" in model_kwargs: if "config" in model_kwargs:
model_kwargs["config"] = AutoConfig.from_pretrained(model_kwargs["config"]) model_kwargs["config"] = AutoConfig.from_pretrained(model_kwargs["config"])
...@@ -586,11 +586,10 @@ class HuggingFaceCausalLM(HuggingFaceModel): ...@@ -586,11 +586,10 @@ class HuggingFaceCausalLM(HuggingFaceModel):
shard_config: Shard config for tensor parallel. shard_config: Shard config for tensor parallel.
""" """
model_kwargs.setdefault("torch_dtype", torch.float16)
if "torch_dtype" in model_kwargs: if "torch_dtype" in model_kwargs:
model_kwargs["torch_dtype"] = eval(model_kwargs["torch_dtype"]) model_kwargs["torch_dtype"] = eval(model_kwargs["torch_dtype"])
else:
model_kwargs.setdefault("torch_dtype", torch.float16)
if "config" in model_kwargs: if "config" in model_kwargs:
model_kwargs["config"] = AutoConfig.from_pretrained(model_kwargs["config"]) model_kwargs["config"] = AutoConfig.from_pretrained(model_kwargs["config"])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment