add sentiment

f92cc630 · Hsien-Chin Lin · 41362a56 · f92cc630 · f92cc630 · f92cc630
Commit f92cc630 authored Jan 11, 2023 by Hsien-Chin Lin
--- a/convlab/policy/emoTUS/emoTUS.py
+++ b/convlab/policy/emoTUS/emoTUS.py
@@ -45,6 +45,8 @@ class UserActionPolicy(GenTUSUserActionPolicy):
                history = self.usr_acts[-1]
            else:
                history = self.usr_acts[-1*self.max_history:]
+
+        # TODO add user info? impolite?
        inputs = json.dumps({"system": sys_act,
                             "goal": self.goal.get_goal_list(),
                             "history": history,

--- a/convlab/policy/emoTUS/evaluate.py
+++ b/convlab/policy/emoTUS/evaluate.py
@@ -231,7 +231,9 @@ class Evaluator:
        for metric in scores:
            result[metric] = sum(scores[metric])/len(scores[metric])
            print(f"{metric}: {result[metric]}")
-        emo_score = emotion_score(golden_emotions, gen_emotions, self.model_checkpoint, time=self.time)
+        # TODO no neutral
+        emo_score = emotion_score(golden_emotions, gen_emotions, self.model_checkpoint, 
+        time=self.time, no_neutral=True)
        # for metric in emo_score:
        #     result[metric] = emo_score[metric]
        #     print(f"{metric}: {result[metric]}")
@@ -243,14 +245,16 @@ class Evaluator:
            self.model_checkpoint, f"{self.time}-{self.dataset}-{basename}.json"), 'w'))


-def emotion_score(golden_emotions, gen_emotions, dirname=".", time=""):
+def emotion_score(golden_emotions, gen_emotions, dirname=".", time="", no_neutral=False):
    labels = ["Neutral", "Fearful", "Dissatisfied",
              "Apologetic", "Abusive", "Excited", "Satisfied"]
+    if no_neutral:
+        labels = labels[1:]
    print(labels)
    macro_f1 = metrics.f1_score(golden_emotions, gen_emotions, average="macro")
    sep_f1 = metrics.f1_score(
        golden_emotions, gen_emotions, average=None, labels=labels)
-    cm = metrics.confusion_matrix(golden_emotions, gen_emotions, labels=labels)
+    cm = metrics.confusion_matrix(golden_emotions, gen_emotions, normalize="true", labels=labels)
    disp = metrics.ConfusionMatrixDisplay(
        confusion_matrix=cm, display_labels=labels)
    disp.plot()

--- a/convlab/policy/emoTUS/sent2emo.json
+++ b/convlab/policy/emoTUS/sent2emo.json
+{
+    "Neutral": [
+        "Neutral"
+    ],
+    "Negative": [
+        "Fearful",
+        "Dissatisfied",
+        "Apologetic",
+        "Abusive"
+    ],
+    "Positive": [
+        "Excited",
+        "Satisfied"
+    ]
+}
\ No newline at end of file
--- a/convlab/policy/emoTUS/sentiment.json
+++ b/convlab/policy/emoTUS/sentiment.json
+{
+    "Neutral": 0,
+    "Negative": 1,
+    "Positive": 2
+}
\ No newline at end of file
--- a/convlab/policy/emoTUS/unify/build_data.py
+++ b/convlab/policy/emoTUS/unify/build_data.py
@@ -5,20 +5,20 @@ from argparse import ArgumentParser

 from tqdm import tqdm

-from convlab.policy.genTUS.unify.Goal import Goal, transform_data_act
+from convlab.policy.emoTUS.unify.Goal import Goal, emotion_info
+from convlab.policy.genTUS.unify.build_data import \
+    DataBuilder as GenTUSDataBuilder
+from convlab.policy.genTUS.unify.Goal import transform_data_act
 from convlab.policy.tus.unify.util import create_goal, load_experiment_dataset
-from convlab.policy.genTUS.unify.build_data import DataBuilder as GenTUSDataBuilder
-

 sys.path.append(os.path.dirname(os.path.dirname(
    os.path.dirname(os.path.abspath(__file__)))))

-# TODO add emotion
-

 def arg_parser():
    parser = ArgumentParser()
    parser.add_argument("--dataset", type=str, default="emowoz")
+    parser.add_argument("--use-sentiment", action="store_true")
    parser.add_argument("--dial-ids-order", type=int, default=0)
    parser.add_argument("--split2ratio", type=float, default=1)
    parser.add_argument("--random-order", action="store_true")
@@ -30,12 +30,21 @@ def arg_parser():


 class DataBuilder(GenTUSDataBuilder):
-    def __init__(self, dataset='emowoz'):
+    def __init__(self, dataset='emowoz', use_sentiment=False):
        super().__init__(dataset)
+        self.use_sentiment = use_sentiment
+
        self.emotion = {}
        for emotion, index in json.load(open("convlab/policy/emoTUS/emotion.json")).items():
            self.emotion[int(index)] = emotion

+        if use_sentiment:
+            self.sentiment = {}
+            for sentiment, index in json.load(open("convlab/policy/emoTUS/sentiment.json")).items():
+                self.sentiment[int(index)] = sentiment
+            self.sent2emo = json.load(
+                open("convlab/policy/emoTUS/sent2emo.json"))
+
    def _one_dialog(self, dialog, add_history=True, random_order=False, no_status=False):
        example = []
        history = []
@@ -44,6 +53,13 @@ class DataBuilder(GenTUSDataBuilder):
        if not data_goal:
            return example
        user_goal = Goal(goal=data_goal)
+        user_info = None
+        if self.use_sentiment:
+            user_info = emotion_info(dialog)
+            # if user_info["user"] == "Impolite":
+            #     print(user_info)
+            # if "event" in user_info:
+            #     print(user_info)

        for turn_id in range(0, len(dialog["turns"]), 2):
            sys_act = self._get_sys_act(dialog, turn_id)
@@ -62,7 +78,15 @@ class DataBuilder(GenTUSDataBuilder):
                dialog["turns"][turn_id]["emotion"][-1]["emotion"]]

            in_str = self._dump_in_str(
-                sys_act, usr_goal_str, history, turn_id, add_history)
+                sys_act, usr_goal_str, history, turn_id, add_history, user_info)
+
+            if self.use_sentiment:
+                usr_sentiment = self.sentiment[
+                    dialog["turns"][turn_id]["emotion"][-1]["sentiment"]]
+                out_str = self._dump_out_str(
+                    usr_act, dialog["turns"][turn_id]["utterance"], usr_emotion, usr_sentiment)
+
+            else:
                out_str = self._dump_out_str(
                    usr_act, dialog["turns"][turn_id]["utterance"], usr_emotion)

@@ -72,11 +96,46 @@ class DataBuilder(GenTUSDataBuilder):

        return example

-    def _dump_out_str(self, usr_act, text, usr_emotion):
-        out_str = {"emotion": usr_emotion, "action": usr_act, "text": text}
+    def _dump_in_str(self, sys_act, usr_goal_str, history, turn_id, add_history, user_info=None):
+        in_str = {}
+        in_str["system"] = self._modify_act(sys_act)
+        in_str["goal"] = usr_goal_str
+        if add_history:
+            h = []
+            if history:
+                h = history[-3:]
+            in_str["history"] = h
+            in_str["turn"] = str(int(turn_id/2))
+
+        if self.use_sentiment:
+            for info in ["event", "user"]:
+                if info not in user_info:
+                    continue
+                in_str[info] = user_info[info]
+
+        return json.dumps(in_str)
+
+    def _dump_out_str(self, usr_act, text, usr_emotion, usr_sentiment=None):
+        if self.use_sentiment:
+            out_str = {"sentiment": usr_sentiment,
+                       "action": usr_act,
+                       "emotion": usr_emotion,
+                       "text": text}
+        else:
+            out_str = {"emotion": usr_emotion,
+                       "action": usr_act,
+                       "text": text}
        return json.dumps(out_str)


+"""
+TODO
+1. add sentiment in the output str
+2. check exciting/fearful in user goal (domain)
+3. add impolite (user info?)
+"""
+
+
 if __name__ == "__main__":
    args = arg_parser()

@@ -91,7 +150,9 @@ if __name__ == "__main__":
        data_name=args.dataset,
        dial_ids_order=args.dial_ids_order,
        split2ratio=args.split2ratio)
-    data_builder = DataBuilder(dataset=args.dataset)
+    data_builder = DataBuilder(
+        dataset=args.dataset,
+        use_sentiment=args.use_sentiment)
    data = data_builder.setup_data(
        raw_data=dataset,
        random_order=args.random_order,

--- a/convlab/policy/emoTUS/unify/knowledge_graph.py
+++ b/convlab/policy/emoTUS/unify/knowledge_graph.py
@@ -13,15 +13,37 @@ DATASET = "unify"


 class KnowledgeGraph(GenTUSKnowledgeGraph):
-    def __init__(self, tokenizer: BartTokenizer, ontology_file=None, dataset="emowoz"):
+    def __init__(self, tokenizer: BartTokenizer, ontology_file=None, dataset="emowoz", use_sentiment=False):
        super().__init__(tokenizer, ontology_file, dataset="multiwoz")
-        data_emotion = json.load(open("convlab/policy/emoTUS/emotion.json"))
+        self.use_sentiment = use_sentiment
+
+        if use_sentiment:
+            data_sentiment = json.load(
+                open("convlab/policy/emoTUS/sentiment.json"))
+            self.kg_map = {"sentiment": tokenMap(tokenizer=self.tokenizer)}
+            self.sentiment = [""]*len(data_sentiment)
+            for sentiment, index in data_sentiment.items():
+                self.sentiment[index] = sentiment
+            for sentiment in self.sentiment:
+                self.kg_map["sentiment"].add_token(sentiment, sentiment)
+                self.kg_map[sentiment] = tokenMap(tokenizer=self.tokenizer)
+            self.sent2emo = json.load(
+                open("convlab/policy/emoTUS/sent2emo.json"))
+            for sent in self.sent2emo:
+                for emo in self.sent2emo[sent]:
+                    self.kg_map[sent].add_token(emo, emo)
+
+        else:
+            data_emotion = json.load(
+                open("convlab/policy/emoTUS/emotion.json"))
            self.emotion = [""]*len(data_emotion)
            for emotion, index in data_emotion.items():
                self.emotion[index] = emotion
-
            self.kg_map = {"emotion": tokenMap(tokenizer=self.tokenizer)}
-        self.prior = {"Neutral": 1,
+            for emotion in self.emotion:
+                self.kg_map["emotion"].add_token(emotion, emotion)
+
+        self.emotion_weight = {"Neutral": 1,
                               "Fearful": 1,
                               "Dissatisfied": 1,
                               "Apologetic": 1,
@@ -29,17 +51,25 @@ class KnowledgeGraph(GenTUSKnowledgeGraph):
                               "Excited": 1,
                               "Satisfied": 1}

-        for emotion in self.emotion:
-            self.kg_map["emotion"].add_token(emotion, emotion)
-
-    def get_emotion(self, outputs, mode="max", emotion_mode="normal"):
+    def get_sentiment(self, outputs, mode="max"):
+        score = self._get_max_score(
+            outputs, self.sentiment, "sentiment")
+        s = self._select(score, mode)
+        return score[s]

+    def get_emotion(self, outputs, mode="max", emotion_mode="normal", sentiment=None):
+        if self.use_sentiment:
+            if not sentiment:
+                print("You are in 'use_sentiment' mode. Please provide sentiment")
+            score = self._get_max_score(
+                outputs, self.sent2emo[sentiment], "sentiment")
+        else:
            if emotion_mode == "normal":
                score = self._get_max_score(
-            outputs, self.emotion, "emotion", weight=self.prior)
+                    outputs, self.emotion, "emotion", weight=self.emotion_weight)
            elif emotion_mode == "no_neutral":
                score = self._get_max_score(
-            outputs, self.emotion[1:], "emotion", weight=self.prior)
+                    outputs, self.emotion[1:], "emotion", weight=self.emotion_weight)
            else:
                print(f"unknown emotion mode: {emotion_mode}")
        s = self._select(score, mode)

--- a/convlab/policy/genTUS/unify/Goal.py
+++ b/convlab/policy/genTUS/unify/Goal.py
@@ -40,7 +40,7 @@ class Goal:
               json.dumps(self.domain_goals, indent=4) + \
               '\n-----Goal-----'

-    def _init_goal_from_data(self, goal=None, goal_generator=None):
+    def _old_goal(self, goal=None, goal_generator=None):
        if not goal and goal_generator:
            goal = ABUS_Goal(goal_generator)
            self.raw_goal = goal.domain_goals
@@ -56,6 +56,10 @@ class Goal:

        # else:
        #     print("unknow goal")
+        return goal
+
+    def _init_goal_from_data(self, goal=None, goal_generator=None):
+        goal = self._old_goal(goal, goal_generator)

        # be careful of this order
        for domain, intent, slot, value in goal: