the last system act is now updated in environment to be consistent with...

the last system act is now updated in environment to be consistent with pipeline agent and not let the policy change it, which is more errorprone

the last system act is now updated in environment to be consistent with...
64e8a6a8 · Christian · b5b66cfa · 64e8a6a8 · 64e8a6a8 · 64e8a6a8
Commit 64e8a6a8 authored Jul 26, 2022 by Christian
--- a/convlab/dialog_agent/env.py
+++ b/convlab/dialog_agent/env.py
@@ -28,6 +28,8 @@ class Environment():
        return self.sys_dst.state
    def step(self, action):
+        # save last system action
+        self.sys_dst.state['system_action'] = action
        if not self.use_semantic_acts:
            model_response = self.sys_nlg.generate(
                action) if self.sys_nlg else action
@@ -49,7 +51,6 @@ class Environment():
        self.sys_dst.state['user_action'] = dialog_act
        state = self.sys_dst.update(dialog_act)
        state = deepcopy(state)
-        dialog_act = self.sys_dst.state['user_action']
        state['history'].append(["sys", model_response])
        state['history'].append(["usr", observation])

--- a/convlab/policy/gdpl/gdpl.py
+++ b/convlab/policy/gdpl/gdpl.py
@@ -76,7 +76,6 @@ class GDPL(Policy):
        # print('True :')
        # print(a)
        action = self.vector.action_devectorize(a.detach().numpy())
-        state['system_action'] = action
        self.info_dict["action_used"] = action
        # for key in state.keys():
        #     print("Key : {} , Value : {}".format(key,state[key]))

--- a/convlab/policy/pg/pg.py
+++ b/convlab/policy/pg/pg.py
@@ -74,7 +74,6 @@ class PG(Policy):
        # print('True :')
        # print(a)
        action = self.vector.action_devectorize(a.detach().numpy())
-        state['system_action'] = action
        self.info_dict["action_used"] = action
        # for key in state.keys():
        #     print("Key : {} , Value : {}".format(key,state[key]))

--- a/convlab/policy/ppo/ppo.py
+++ b/convlab/policy/ppo/ppo.py
@@ -85,7 +85,6 @@ class PPO(Policy):
        # print('True :')
        # print(a)
        action = self.vector.action_devectorize(a.detach().numpy())
-        state['system_action'] = action
        self.info_dict["action_used"] = action
        # for key in state.keys():
        #     print("Key : {} , Value : {}".format(key,state[key]))