From 0a33885af132a28c07bc15830d3b02102422470c Mon Sep 17 00:00:00 2001 From: Hsien-Chin Lin <linh@hhu.de> Date: Thu, 13 Apr 2023 18:23:16 +0200 Subject: [PATCH] clean the print format --- convlab/policy/emoUS/evaluate.py | 49 +++++++++++++++--------- convlab/policy/emoUS/unify/build_data.py | 8 ---- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/convlab/policy/emoUS/evaluate.py b/convlab/policy/emoUS/evaluate.py index cff2780b..99da1c24 100644 --- a/convlab/policy/emoUS/evaluate.py +++ b/convlab/policy/emoUS/evaluate.py @@ -175,27 +175,27 @@ class Evaluator: nlg_eval["dialog"] = self._transform_result() # if golden_action: - print("Calculate BLEU") + # print("Calculate BLEU") bleu_metric = load_metric("sacrebleu") labels = [[utt] for utt in self.r["golden_utts"]] bleu_score = bleu_metric.compute(predictions=self.r["gen_utts"], references=labels, force=True) - print("bleu_metric", bleu_score) + nlg_eval["metrics"]["bleu"] = bleu_score # else: - print("Calculate SER") missing, hallucinate, total, hallucination_dialogs, missing_dialogs = fine_SER( self.r["gen_acts"], self.r["gen_utts"]) - print("{} Missing acts: {}, Total acts: {}, Hallucinations {}, SER {}".format( - "EmoUSNLG", missing, total, hallucinate, missing/total)) - print(nlg_eval["metrics"]) + # print("{} Missing acts: {}, Total acts: {}, Hallucinations {}, SER {}".format( + # "EmoUSNLG", missing, total, hallucinate, missing/total)) nlg_eval["metrics"]["SER"] = missing/total - # TODO emotion metric + print("=== Natural language generation ===") + print("Sacre-BLEU", nlg_eval["metrics"]["bleu"]["score"]) + print("SER", nlg_eval["metrics"]["SER"]) dir_name = self.model_checkpoint json.dump(nlg_eval, @@ -213,7 +213,6 @@ class Evaluator: return acts def evaluation(self, generated_file, golden_emotion=False, golden_action=False): - # TODO add emotion gen_file = json.load(open(generated_file)) self.read_generated_result(generated_file) @@ -239,26 +238,29 @@ class Evaluator: golden_emotions.append(dialog["golden_emotion"]) dialog_result = gen_file['dialog'] - scores = {"complete": {"precision": [], "recall": [], "f1": [], "turn_acc": []}, - "intent_domain": {"precision": [], "recall": [], "f1": [], "turn_acc": []}} + scores = {"full action": {"precision": [], "recall": [], "f1": [], "turn_acc": []}, + "intent-domain": {"precision": [], "recall": [], "f1": [], "turn_acc": []}} # full action for gen_act, golden_act in zip(gen_acts, golden_acts): s = f1_measure(preds=gen_act, labels=golden_act) - for metric in scores["complete"]: - scores["complete"][metric].append(s[metric]) + for metric in scores["full action"]: + scores["full action"][metric].append(s[metric]) s = f1_measure(preds=self._intent_domain(gen_act), labels=self._intent_domain(golden_act)) - for metric in scores["intent_domain"]: - scores["intent_domain"][metric].append(s[metric]) + for metric in scores["intent-domain"]: + scores["intent-domain"][metric].append(s[metric]) result = {} result["emotion_weight"] = self.emotion_weight + print("=== Semantic evaluation ===") for metric_type, score in scores.items(): result[metric_type] = {} + print(f"> {metric_type}") for m, s in score.items(): result[metric_type][m] = sum(s)/len(s) - print(f"{metric_type}-{m}: {result[metric_type][m]}") + print(f"{m}: {result[metric_type][m]}") + print("") if not golden_emotion: emo_score = emotion_score( @@ -307,7 +309,7 @@ def emotion_score(golden_emotions, gen_emotions, dirname=".", time="", no_neutra "Apologetic", "Abusive", "Excited", "Satisfied"] if no_neutral: labels = labels[1:] - print(labels) + macro_f1 = metrics.f1_score(golden_emotions, gen_emotions, average="macro") sep_f1 = metrics.f1_score( golden_emotions, gen_emotions, average=None, labels=labels) @@ -319,14 +321,18 @@ def emotion_score(golden_emotions, gen_emotions, dirname=".", time="", no_neutra plt.savefig(os.path.join(dirname, f"{time}-emotion.png")) r = {"macro_f1": float(macro_f1), "sep_f1": list( sep_f1), "cm": [list(c) for c in list(cm)]} - print(r) + print("=== emotion score ===") + print("emotions:", labels) + print("macro_f1:", r["macro_f1"]) + print("sep_f1:") + for i, l in enumerate(labels): + print(f"{l}: {r['sep_f1'][i]}") return r def sentiment_score(golden_sentiment, gen_sentiment, dirname=".", time=""): labels = ["Neutral", "Negative", "Positive"] - print(labels) macro_f1 = metrics.f1_score( golden_sentiment, gen_sentiment, average="macro") sep_f1 = metrics.f1_score( @@ -339,7 +345,12 @@ def sentiment_score(golden_sentiment, gen_sentiment, dirname=".", time=""): plt.savefig(os.path.join(dirname, f"{time}-sentiment.png")) r = {"macro_f1": float(macro_f1), "sep_f1": list( sep_f1), "cm": [list(c) for c in list(cm)]} - print(r) + print("=== sentiment score ===") + print("sentiments:", labels) + print("macro_f1:", r["macro_f1"]) + print("sep_f1:") + for i, l in enumerate(labels): + print(f"{l}: {r['sep_f1'][i]}") return r diff --git a/convlab/policy/emoUS/unify/build_data.py b/convlab/policy/emoUS/unify/build_data.py index 8e84cf7d..d1e2af8f 100644 --- a/convlab/policy/emoUS/unify/build_data.py +++ b/convlab/policy/emoUS/unify/build_data.py @@ -143,14 +143,6 @@ class DataBuilder(GenTUSDataBuilder): return json.dumps(out_str) -""" -TODO -1. add sentiment in the output str -2. check exciting/fearful in user goal (domain) -3. add impolite (user info?) -""" - - if __name__ == "__main__": args = arg_parser() -- GitLab