Skip to content
Snippets Groups Projects
Commit 0a33885a authored by Hsien-Chin Lin's avatar Hsien-Chin Lin
Browse files

clean the print format

parent bb9921c4
Branches
No related tags found
No related merge requests found
......@@ -175,27 +175,27 @@ class Evaluator:
nlg_eval["dialog"] = self._transform_result()
# if golden_action:
print("Calculate BLEU")
# print("Calculate BLEU")
bleu_metric = load_metric("sacrebleu")
labels = [[utt] for utt in self.r["golden_utts"]]
bleu_score = bleu_metric.compute(predictions=self.r["gen_utts"],
references=labels,
force=True)
print("bleu_metric", bleu_score)
nlg_eval["metrics"]["bleu"] = bleu_score
# else:
print("Calculate SER")
missing, hallucinate, total, hallucination_dialogs, missing_dialogs = fine_SER(
self.r["gen_acts"], self.r["gen_utts"])
print("{} Missing acts: {}, Total acts: {}, Hallucinations {}, SER {}".format(
"EmoUSNLG", missing, total, hallucinate, missing/total))
print(nlg_eval["metrics"])
# print("{} Missing acts: {}, Total acts: {}, Hallucinations {}, SER {}".format(
# "EmoUSNLG", missing, total, hallucinate, missing/total))
nlg_eval["metrics"]["SER"] = missing/total
# TODO emotion metric
print("=== Natural language generation ===")
print("Sacre-BLEU", nlg_eval["metrics"]["bleu"]["score"])
print("SER", nlg_eval["metrics"]["SER"])
dir_name = self.model_checkpoint
json.dump(nlg_eval,
......@@ -213,7 +213,6 @@ class Evaluator:
return acts
def evaluation(self, generated_file, golden_emotion=False, golden_action=False):
# TODO add emotion
gen_file = json.load(open(generated_file))
self.read_generated_result(generated_file)
......@@ -239,26 +238,29 @@ class Evaluator:
golden_emotions.append(dialog["golden_emotion"])
dialog_result = gen_file['dialog']
scores = {"complete": {"precision": [], "recall": [], "f1": [], "turn_acc": []},
"intent_domain": {"precision": [], "recall": [], "f1": [], "turn_acc": []}}
scores = {"full action": {"precision": [], "recall": [], "f1": [], "turn_acc": []},
"intent-domain": {"precision": [], "recall": [], "f1": [], "turn_acc": []}}
# full action
for gen_act, golden_act in zip(gen_acts, golden_acts):
s = f1_measure(preds=gen_act, labels=golden_act)
for metric in scores["complete"]:
scores["complete"][metric].append(s[metric])
for metric in scores["full action"]:
scores["full action"][metric].append(s[metric])
s = f1_measure(preds=self._intent_domain(gen_act),
labels=self._intent_domain(golden_act))
for metric in scores["intent_domain"]:
scores["intent_domain"][metric].append(s[metric])
for metric in scores["intent-domain"]:
scores["intent-domain"][metric].append(s[metric])
result = {}
result["emotion_weight"] = self.emotion_weight
print("=== Semantic evaluation ===")
for metric_type, score in scores.items():
result[metric_type] = {}
print(f"> {metric_type}")
for m, s in score.items():
result[metric_type][m] = sum(s)/len(s)
print(f"{metric_type}-{m}: {result[metric_type][m]}")
print(f"{m}: {result[metric_type][m]}")
print("")
if not golden_emotion:
emo_score = emotion_score(
......@@ -307,7 +309,7 @@ def emotion_score(golden_emotions, gen_emotions, dirname=".", time="", no_neutra
"Apologetic", "Abusive", "Excited", "Satisfied"]
if no_neutral:
labels = labels[1:]
print(labels)
macro_f1 = metrics.f1_score(golden_emotions, gen_emotions, average="macro")
sep_f1 = metrics.f1_score(
golden_emotions, gen_emotions, average=None, labels=labels)
......@@ -319,14 +321,18 @@ def emotion_score(golden_emotions, gen_emotions, dirname=".", time="", no_neutra
plt.savefig(os.path.join(dirname, f"{time}-emotion.png"))
r = {"macro_f1": float(macro_f1), "sep_f1": list(
sep_f1), "cm": [list(c) for c in list(cm)]}
print(r)
print("=== emotion score ===")
print("emotions:", labels)
print("macro_f1:", r["macro_f1"])
print("sep_f1:")
for i, l in enumerate(labels):
print(f"{l}: {r['sep_f1'][i]}")
return r
def sentiment_score(golden_sentiment, gen_sentiment, dirname=".", time=""):
labels = ["Neutral", "Negative", "Positive"]
print(labels)
macro_f1 = metrics.f1_score(
golden_sentiment, gen_sentiment, average="macro")
sep_f1 = metrics.f1_score(
......@@ -339,7 +345,12 @@ def sentiment_score(golden_sentiment, gen_sentiment, dirname=".", time=""):
plt.savefig(os.path.join(dirname, f"{time}-sentiment.png"))
r = {"macro_f1": float(macro_f1), "sep_f1": list(
sep_f1), "cm": [list(c) for c in list(cm)]}
print(r)
print("=== sentiment score ===")
print("sentiments:", labels)
print("macro_f1:", r["macro_f1"])
print("sep_f1:")
for i, l in enumerate(labels):
print(f"{l}: {r['sep_f1'][i]}")
return r
......
......@@ -143,14 +143,6 @@ class DataBuilder(GenTUSDataBuilder):
return json.dumps(out_str)
"""
TODO
1. add sentiment in the output str
2. check exciting/fearful in user goal (domain)
3. add impolite (user info?)
"""
if __name__ == "__main__":
args = arg_parser()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment