From 0a33885af132a28c07bc15830d3b02102422470c Mon Sep 17 00:00:00 2001
From: Hsien-Chin Lin <linh@hhu.de>
Date: Thu, 13 Apr 2023 18:23:16 +0200
Subject: [PATCH] clean the print format

---
 convlab/policy/emoUS/evaluate.py         | 49 +++++++++++++++---------
 convlab/policy/emoUS/unify/build_data.py |  8 ----
 2 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/convlab/policy/emoUS/evaluate.py b/convlab/policy/emoUS/evaluate.py
index cff2780b..99da1c24 100644
--- a/convlab/policy/emoUS/evaluate.py
+++ b/convlab/policy/emoUS/evaluate.py
@@ -175,27 +175,27 @@ class Evaluator:
         nlg_eval["dialog"] = self._transform_result()
 
         # if golden_action:
-        print("Calculate BLEU")
+        # print("Calculate BLEU")
         bleu_metric = load_metric("sacrebleu")
         labels = [[utt] for utt in self.r["golden_utts"]]
 
         bleu_score = bleu_metric.compute(predictions=self.r["gen_utts"],
                                          references=labels,
                                          force=True)
-        print("bleu_metric", bleu_score)
+
         nlg_eval["metrics"]["bleu"] = bleu_score
 
         # else:
-        print("Calculate SER")
         missing, hallucinate, total, hallucination_dialogs, missing_dialogs = fine_SER(
             self.r["gen_acts"], self.r["gen_utts"])
 
-        print("{} Missing acts: {}, Total acts: {}, Hallucinations {}, SER {}".format(
-            "EmoUSNLG", missing, total, hallucinate, missing/total))
-        print(nlg_eval["metrics"])
+        # print("{} Missing acts: {}, Total acts: {}, Hallucinations {}, SER {}".format(
+        #     "EmoUSNLG", missing, total, hallucinate, missing/total))
         nlg_eval["metrics"]["SER"] = missing/total
 
-        # TODO emotion metric
+        print("=== Natural language generation ===")
+        print("Sacre-BLEU", nlg_eval["metrics"]["bleu"]["score"])
+        print("SER", nlg_eval["metrics"]["SER"])
 
         dir_name = self.model_checkpoint
         json.dump(nlg_eval,
@@ -213,7 +213,6 @@ class Evaluator:
         return acts
 
     def evaluation(self, generated_file, golden_emotion=False, golden_action=False):
-        # TODO add emotion
         gen_file = json.load(open(generated_file))
         self.read_generated_result(generated_file)
 
@@ -239,26 +238,29 @@ class Evaluator:
                 golden_emotions.append(dialog["golden_emotion"])
             dialog_result = gen_file['dialog']
 
-        scores = {"complete": {"precision": [], "recall": [], "f1": [], "turn_acc": []},
-                  "intent_domain": {"precision": [], "recall": [], "f1": [], "turn_acc": []}}
+        scores = {"full action": {"precision": [], "recall": [], "f1": [], "turn_acc": []},
+                  "intent-domain": {"precision": [], "recall": [], "f1": [], "turn_acc": []}}
 
         # full action
         for gen_act, golden_act in zip(gen_acts, golden_acts):
             s = f1_measure(preds=gen_act, labels=golden_act)
-            for metric in scores["complete"]:
-                scores["complete"][metric].append(s[metric])
+            for metric in scores["full action"]:
+                scores["full action"][metric].append(s[metric])
             s = f1_measure(preds=self._intent_domain(gen_act),
                            labels=self._intent_domain(golden_act))
-            for metric in scores["intent_domain"]:
-                scores["intent_domain"][metric].append(s[metric])
+            for metric in scores["intent-domain"]:
+                scores["intent-domain"][metric].append(s[metric])
 
         result = {}
         result["emotion_weight"] = self.emotion_weight
+        print("=== Semantic evaluation ===")
         for metric_type, score in scores.items():
             result[metric_type] = {}
+            print(f"> {metric_type}")
             for m, s in score.items():
                 result[metric_type][m] = sum(s)/len(s)
-                print(f"{metric_type}-{m}: {result[metric_type][m]}")
+                print(f"{m}: {result[metric_type][m]}")
+            print("")
 
         if not golden_emotion:
             emo_score = emotion_score(
@@ -307,7 +309,7 @@ def emotion_score(golden_emotions, gen_emotions, dirname=".", time="", no_neutra
               "Apologetic", "Abusive", "Excited", "Satisfied"]
     if no_neutral:
         labels = labels[1:]
-    print(labels)
+
     macro_f1 = metrics.f1_score(golden_emotions, gen_emotions, average="macro")
     sep_f1 = metrics.f1_score(
         golden_emotions, gen_emotions, average=None, labels=labels)
@@ -319,14 +321,18 @@ def emotion_score(golden_emotions, gen_emotions, dirname=".", time="", no_neutra
     plt.savefig(os.path.join(dirname, f"{time}-emotion.png"))
     r = {"macro_f1": float(macro_f1), "sep_f1": list(
         sep_f1), "cm": [list(c) for c in list(cm)]}
-    print(r)
+    print("=== emotion score ===")
+    print("emotions:", labels)
+    print("macro_f1:", r["macro_f1"])
+    print("sep_f1:")
+    for i, l in enumerate(labels):
+        print(f"{l}: {r['sep_f1'][i]}")
     return r
 
 
 def sentiment_score(golden_sentiment, gen_sentiment, dirname=".", time=""):
     labels = ["Neutral", "Negative", "Positive"]
 
-    print(labels)
     macro_f1 = metrics.f1_score(
         golden_sentiment, gen_sentiment, average="macro")
     sep_f1 = metrics.f1_score(
@@ -339,7 +345,12 @@ def sentiment_score(golden_sentiment, gen_sentiment, dirname=".", time=""):
     plt.savefig(os.path.join(dirname, f"{time}-sentiment.png"))
     r = {"macro_f1": float(macro_f1), "sep_f1": list(
         sep_f1), "cm": [list(c) for c in list(cm)]}
-    print(r)
+    print("=== sentiment score ===")
+    print("sentiments:", labels)
+    print("macro_f1:", r["macro_f1"])
+    print("sep_f1:")
+    for i, l in enumerate(labels):
+        print(f"{l}: {r['sep_f1'][i]}")
     return r
 
 
diff --git a/convlab/policy/emoUS/unify/build_data.py b/convlab/policy/emoUS/unify/build_data.py
index 8e84cf7d..d1e2af8f 100644
--- a/convlab/policy/emoUS/unify/build_data.py
+++ b/convlab/policy/emoUS/unify/build_data.py
@@ -143,14 +143,6 @@ class DataBuilder(GenTUSDataBuilder):
         return json.dumps(out_str)
 
 
-"""
-TODO
-1. add sentiment in the output str
-2. check exciting/fearful in user goal (domain)
-3. add impolite (user info?)
-"""
-
-
 if __name__ == "__main__":
     args = arg_parser()
 
-- 
GitLab