diff --git a/README.md b/README.md
index 252a121c39c87e00598ae7fbff313c22af7b6f6e..88c2888e95540ea34aec2f7455404018ad5a9990 100755
--- a/README.md
+++ b/README.md
@@ -105,23 +105,22 @@ Performance (the first row is the default config for each module. Empty entries
 
 | NLU         | DST       | Policy         | NLG         | Complete rate | Success rate | Book rate | Inform P/R/F1 | Turn(succ/all) |
 | ----------- | --------- | -------------- | ----------- | ------------- | ------------ | --------- | --------- | -------------- |
-| **BERTNLU** | RuleDST   | RulePolicy     | TemplateNLG | 92.1          | 85.5         | 91.5      | 79.8/92.8/83.8 | 12.7/13.8      |
-| **MILU**    | RuleDST | RulePolicy | TemplateNLG | 89.9          | 83.1         | 90.9      | 78.3/91.7/82.5 | 12.1/13.9      |
-| **SVMNLU**  | RuleDST | RulePolicy | TemplateNLG | 84.2          | 70.4         | 76.1      | 79.1/88.8/81.5 | 14.8/17.7      |
-| BERTNLU | RuleDST | RulePolicy | **SCLSTM**  | 40.1       | 41.0  | 51.5    | 68.5/56.5/59.1 |      11.6/29.2      |
-| BERTNLU     | RuleDST | **MLEPolicy**  | TemplateNLG | 52.6              | 48.4         | 35.5      |  66.3/72.7/66.0 | 12.5/26.3      |
-| BERTNLU | RuleDST | **PGPolicy**   | TemplateNLG | 42.9              | 43.3         | 31.0      |  61.9/66.8/60.4 | 14.7/29.1      |
-| BERTNLU | RuleDST | **PPOPolicy**  | TemplateNLG | 69.7              | 56.6         | 56.6      |  64.8/79.0/68.1 | 12.9/22.1      |
-| BERTNLU | RuleDST | **GDPLPolicy** | TemplateNLG | 57.9              | 49.5         | 33.5      |  67.0/76.4/68.2 | 11.5/24.3      |
-| None        | **MDBT**  | RulePolicy | TemplateNLG |     27.7      |       21.2     |   45.4    |  52.2/41.0/42.4 |   11.8/32.1       |
-| None        | **TRADE** | RulePolicy | TemplateNLG |      29.9      |    25.3       |     36.9     | 49.3/48.1/44.4 |     12.7/24.7     |
-| None        | **SUMBT** | RulePolicy | TemplateNLG |       34.7    |    33.8     |    57.8   |  52.3/50.6/47.3   | 12.1/26.6         |
-| BERTNLU | RuleDST | **MDRG**       | None        | 27.0 | 25.2 | 49.0 | 46.6/43.1/42.0 | 13.6/33.6 |
-| BERTNLU | RuleDST | **HDSA**       | None        | 35.6 | 27.5 | 5.4 | 47.8/57.2/48.8 | 13.0/31.5 |
-| BERTNLU | RuleDST | **LaRL**       | None        | 40.6 | 34.0 | 45.6 | 47.8/54.1/47.6 | 15.0/28.6 |
-| None | **SUMBT** | **LaRL** | None |    39.4|   33.1| 39.5  | 48.5/56.0/48.8| 15.5/28.7|
-| None | None | **Sequicity*** | None | 21.7 | 14.0 | 4.9 | 36.3/35.1/32.0 | 18.2/35.2 |
-| None | None | **DAMD***      | None | 38.5 | 33.6 | 50.9 | 62.1/60.7/57.4 | 10.4/28.2 |
+| **BERTNLU** | RuleDST   | RulePolicy     | TemplateNLG |   90.5       |     81.3    |   91.1 | 79.7/92.6/83.5 | 11.6/12.3      |
+| **MILU**    | RuleDST | RulePolicy | TemplateNLG |    93.3       |   81.8      |   93.0    | 80.4/94.7/84.8 | 11.3/12.1      |
+| **SVMNLU**  | RuleDST | RulePolicy | TemplateNLG |     57.5      |   38.5    |  57.1   | 62.9/67.7/62.6 |  11.0/13.4    |
+| BERTNLU | RuleDST | RulePolicy | **SCLSTM**  |   48.5    | 40.2 | 56.9   | 62.3/62.5/58.7 |  11.9/27.1         |
+| BERTNLU     | RuleDST | **MLEPolicy**  | TemplateNLG |     42.7          |    35.9      |  17.6   | 62.8/69.8/62.9  |  12.1/24.1    |
+| BERTNLU | RuleDST | **PGPolicy**   | TemplateNLG |     37.4         |    31.7     |   17.4  |  57.4/63.7/56.9  |   11.0/25.3    |
+| BERTNLU | RuleDST | **PPOPolicy**  | TemplateNLG |     61.1         |    44.0    |   44.6    | 63.9/76.8/67.2  |  12.5/20.8   |
+| BERTNLU | RuleDST | **GDPLPolicy** | TemplateNLG |     49.4         |     38.4    |  20.1     |  64.5/73.8/65.6 |  11.5/21.3    |
+| None        | **MDBT**  | RulePolicy | TemplateNLG |    12.4       |      7.8     |   30.4  | 32.6/21.5/23.9 |     7.9/25.1    |
+| None        | **TRADE** | RulePolicy | TemplateNLG |    32.4      |    20.1     |    34.7      |  46.9/48.5/44.0 |  11.4/23.9      |
+| None        | **SUMBT** | RulePolicy | TemplateNLG |   34.5       |   29.4     |   62.4    |  54.1/50.3/48.3  |   11.0/28.1     |
+| BERTNLU | RuleDST | **MDRG**       | None        | 21.6 | 17.8 | 31.2 | 39.9/36.3/34.8 | 15.6/30.5|
+| BERTNLU | RuleDST | **LaRL**       | None        | 34.8 | 27.0 | 29.6 | 49.1/53.6/47.8 |13.2/24.4|
+| None | **SUMBT** | **LaRL** | None |  32.9 | 23.7  |  25.9 | 48.6/52.0/46.7 | 12.5/24.3|
+| None | None | **Sequicity*** | None | 23.1| 9.8 | 4.1 | 33.0/32.7/29.9 | 12.2/32.6 |
+| None | None | **DAMD***      | None | 39.5| 34.3 | 51.4 | 60.4/59.8/56.3 | 15.8/29.8 |
 
 *: end-to-end models used as sys_agent directly.
 
diff --git a/convlab2/dialog_agent/agent.py b/convlab2/dialog_agent/agent.py
index adb9d33ea575b091ff7f94a4a4460dc346e68a34..b4083037bcf5ecea4e5504c5f28b73cde6c05a32 100755
--- a/convlab2/dialog_agent/agent.py
+++ b/convlab2/dialog_agent/agent.py
@@ -91,6 +91,26 @@ class PipelineAgent(Agent):
         self.init_session()
         self.history = []
 
+    def state_replace(self, agent_state):
+        """
+        this interface is reserved to replace all interal states of agent
+        the code snippet example below is for the scenario when the agent state only depends on self.history and self.dst.state
+        """
+        self.history = deepcopy(agent_state['history'])
+        self.dst.state = deepcopy(agent_state['dst_state'])
+
+    def state_return(self):
+        """
+        this interface is reserved to return all interal states of agent
+        the code snippet example below is for the scenario when the agent state only depends on self.history and self.dst.state
+        """
+        agent_state = {}
+        agent_state['history'] = deepcopy(self.history)
+        agent_state['dst_state'] = deepcopy(self.dst.state)
+
+        return agent_state
+
+
     def response(self, observation):
         """Generate agent response using the agent modules."""
         # Note: If you modify the logic of this function, please ensure that it is consistent with deploy.server.ServerCtrl._turn()
diff --git a/convlab2/human_eval/run.py b/convlab2/human_eval/run.py
index c556cc90be67c813e6fe42c8e3201dfddb3021d1..d5945347169ae0b8dcfe9ded7e007f9de4930c75 100755
--- a/convlab2/human_eval/run.py
+++ b/convlab2/human_eval/run.py
@@ -23,7 +23,7 @@ MASTER_QUALIF_SDBOX = {
 LOCALE_QUALIF_SDBOX = {
     'QualificationTypeId': '00000000000000000071',
     "Comparator": "In",
-    'LocaleValues': [{'Country': "HK"}, {'Country': "US"}, {'Country': "CN"}]
+    'LocaleValues': [{'Country': "US"}, {'Country': 'HK'}, {'Country': 'IN'} ]
 }
 
 
diff --git a/convlab2/human_eval/run_agent.py b/convlab2/human_eval/run_agent.py
index 0ae7aae890db103ce618d4c2ee2cf060237840a6..f848674299af9eb6771e273f434d80183cb1def8 100755
--- a/convlab2/human_eval/run_agent.py
+++ b/convlab2/human_eval/run_agent.py
@@ -11,6 +11,7 @@ from threading import Thread
 # Agent
 from convlab2.dialog_agent import PipelineAgent, BiSession
 from convlab2.nlu.milu.multiwoz import MILU
+from convlab2.nlu.jointBERT.multiwoz import BERTNLU
 from convlab2.dst.rule.multiwoz import RuleDST
 from convlab2.policy.rule.multiwoz import RulePolicy
 from convlab2.nlg.template.multiwoz import TemplateNLG
@@ -24,6 +25,7 @@ rgo_queue = PriorityQueue(maxsize=0)
 
 app = Flask(__name__)
 
+# sys_nlu = BERTNLU()
 sys_nlu = MILU()
 sys_dst = RuleDST()
 sys_policy = RulePolicy(character='sys')
@@ -53,19 +55,21 @@ def process():
 def generate_response(in_queue, out_queue):
     while True:
         # pop input
-        last_action = 'null'
+        # last_action = 'null'
         in_request = in_queue.get()
         obs = in_request['input']
         if in_request['agent_state'] == {}:
             agent.init_session()
         else:
-            encoded_state, dst_state, last_action = in_request['agent_state']
-            agent.dst.state = copy.deepcopy(dst_state)
+            # encoded_state, dst_state, last_action = in_request['agent_state']
+            # agent.dst.state = copy.deepcopy(dst_state)
+            agent.state_replace(in_request['agent_state'])
         try:
             action = agent.response(obs)
             print(f'obs:{obs}; action:{action}')
-            dst_state = copy.deepcopy(agent.dst.state)
-            encoded_state = None
+            # dst_state = copy.deepcopy(agent.dst.state)
+            # encoded_state = None
+            in_request['agent_state'] = agent.state_return()
         except Exception as e:
             print('agent error', e)
 
@@ -78,8 +82,9 @@ def generate_response(in_queue, out_queue):
             print('Response generation error', e)
             response = 'What did you say?'
 
-        last_action = action
-        out_queue.put({'response': response, 'agent_state': (encoded_state, dst_state, last_action)})
+        # last_action = action
+        # out_queue.put({'response': response, 'agent_state': (encoded_state, dst_state, last_action)})
+        out_queue.put({'response': response, 'agent_state': in_request['agent_state']})
         in_queue.task_done()
         out_queue.join()
 
diff --git a/convlab2/human_eval/task_config.py b/convlab2/human_eval/task_config.py
index aee64f51743e4fab3b53789a71af83ca92bc0b5f..82f226ce6081a142a15cb1d272c025d5eaa1085d 100755
--- a/convlab2/human_eval/task_config.py
+++ b/convlab2/human_eval/task_config.py
@@ -7,18 +7,18 @@ task_config = {}
 On the Amazon Mechanical Turk web site, the HIT title appears in search results,
 and everywhere the HIT is mentioned.
 """
-task_config['hit_title'] = 'Chat and evaluate bot!'
+task_config['hit_title'] = 'Chat and evaluate bot with a specified goal'
 
 """A description includes detailed information about the kind of task the HIT contains.
 On the Amazon Mechanical Turk web site, the HIT description appears in the expanded
 view of search results, and in the HIT and assignment screens.
 """
-task_config['hit_description'] = 'You will chat to a tour information bot and then evaluate that bot.'
+task_config['hit_description'] = 'You will chat to a tour information bot and then evaluate that bot, type "success" or "fail" once finished'
 
 """One or more words or phrases that describe the HIT, separated by commas.
 On MTurk website, these words are used in searches to find HITs.
 """
-task_config['hit_keywords'] = 'chat,dialog'
+task_config['hit_keywords'] = 'chat,dialog, dialogue, evaluation'
 
 """A detailed task description that will be shown on the HIT task preview page
 and on the left side of the chat page. Supports HTML formatting.
@@ -27,26 +27,56 @@ task_config['task_description'] = \
     """
     (You can keep accepting new HITs after you finish your current one, so keep working on it if you like the task!)
     <br>
-    
-    <span id="user-goal" style="font-size: 16px;"> 
-    </span>
-    
-    <br><br>
-    Chat with the bot naturally and stick to your own goal but <b>do not trivially copy the goal descriptions into the message.</b>
     <br>
-    Once the conversation is done, you will be asked to rate the bot on metrics like <b>goal accomplishment, language understanding, and response naturalness</b>.
+    Chat with the bot naturally and stick to your own goal but do not trivially copy the goal descriptions into the message.
+    <br>
+    <br>
+    You can start with the conversation with sentences like <b>'I am looking for / I need to/ etc.'</b> and terminate the dialog session by typing <b> 'success' or 'fail'</b>. Once the conversation is done, you will be asked to rate the bot on metrics like goal accomplishment, language understanding, and response naturalness.
     <br>
-    There is a <b>2 min</b> time limit for each turn.
+    There is a <b>3 min</b> time limit for each turn.
     <br>
     <br>
+    In this task you will chat with an information desk clerk bot to plan your tour according to a given goal.
+    <span id="user-goal" style="font-size: 16px;">
+    </span>
+
+    <br>
+    For example, your given goal and expected conversation could be: <br><br>
+    <table border="1" cellpadding="10">
+    <tr><th>Your goal</th><th>Expected conversation</th></tr>
+    <tr><td>
+    <ul>
+    <li>You are looking for a <b>place to stay</b>. The hotel should be in the <b>cheap</b> price range and should be in the type of <b>hotel</b></li>
+    <li>The hotel should include <b>free parking</b> and should include <b>free wifi</b></li>
+    <li>Once you find the hotel, you want to book it for <b>6</b> people and <b>3</b> nights</b> starting from <b>tuesday</b></li>
+    <li>If the booking fails how about <b>2</b> nights</li>
+    <li>Make sure you get the <b>reference number</b></li>
+    </ul>
+    </td>
+    <td>
+    <b>You: </b>I am looking for a place to to stay that has cheap price range it should be in a type of hotel<br>
+    <b>Info desk: </b>Okay, do you have a specific area you want to stay in?<br>
+    <b>You: </b>no, i just need to make sure it's cheap. oh, and i need parking<br>
+    <b>Info desk: </b>I found 1 cheap hotel for you that includes parking. Do you like me to book it?<br>
+    <b>You: </b>Yes, please. 6 people 3 nights starting on tuesday.<br>
+    <b>Info desk: </b>I am sorry but I wasn't able to book that for you for Tuesday. Is there another day you would like to stay or perhaps a shorter stay?<br>
+    <b>You: </b>how about only 2 nights.<br>
+    <b>Info desk: </b>Booking was successful.\nReference number is : 7GAWK763. Anything else I can do for you?<br>
+    <b>You: </b>No, that will be all. Good bye.<br>
+    <b>Info desk: </b>Thank you for using our services.<br>
+    <b>You: </b>Success<br>
+    </td>
+    </table>
+
+
     - Do not reference the task or MTurk itself during the conversation.
     <br>
     <b><span style="color:red">- No racism, sexism or otherwise offensive comments, or the submission will be rejected and we will report to Amazon.</b></span>
     <br>
-    <br>
-    
+
+
     <script type="text/javascript">
-    
+
     function handle_new_message(new_message_id, message) {
       var agent_id = message.id;
       var message_text = message.text
diff --git a/convlab2/human_eval/worlds.py b/convlab2/human_eval/worlds.py
index 7aaac4ce7e6c27dc550c7a44f116a996868e40d5..2158aa8f3ce286bdab0b11eb66cc790f3f11c850 100755
--- a/convlab2/human_eval/worlds.py
+++ b/convlab2/human_eval/worlds.py
@@ -202,10 +202,10 @@ class MultiWozEvalWorld(MTurkTaskWorld):
     def __init__(self, opt, agent,
                  num_extra_trial=2,
                  max_turn=50,
-                 max_resp_time=300,
+                 max_resp_time=180,
                  model_agent_opt=None,
                  world_tag='',
-                 agent_timeout_shutdown=300):
+                 agent_timeout_shutdown=180):
         self.opt = opt
         self.agent = agent
         self.turn_idx = 1
@@ -422,7 +422,6 @@ class MultiWozEvalWorld(MTurkTaskWorld):
                                 if 'text' in acts[idx] and \
                                         acts[idx]['text'] != '':
                                     self.final_goal[domain]['reqt'][slot] = acts[idx]['text']
-                    # print(self.final_goal)
 
                 # Language Understanding Check
                 control_msg['text'] = UNDERSTANDING_MSG
diff --git a/convlab2/nlg/template/multiwoz/nlg.py b/convlab2/nlg/template/multiwoz/nlg.py
index 1329275cebd7c28a51f59b405cff281c5b1f1eeb..9cb0f26d64c35361da541280984ba5169742e338 100755
--- a/convlab2/nlg/template/multiwoz/nlg.py
+++ b/convlab2/nlg/template/multiwoz/nlg.py
@@ -200,7 +200,8 @@ class TemplateNLG(NLG):
                 sentences += sentence
             else:
                 for slot, value in slot_value_pairs:
-                    value_lower = value.lower()
+                    if isinstance(value, str):
+                        value_lower = value.lower()
                     if value in ["do nt care", "do n't care", "dontcare"]:
                         sentence = 'I don\'t care about the {} of the {}'.format(slot, dialog_act.split('-')[0])
                     elif self.is_user and dialog_act.split('-')[1] == 'inform' and slot == 'choice' and value_lower == 'any':
diff --git a/convlab2/policy/rule/multiwoz/policy_agenda_multiwoz.py b/convlab2/policy/rule/multiwoz/policy_agenda_multiwoz.py
index 3e717ffe733992869187580274166e0cd71e56ae..a13e5e8157a2fd0fc4a4ba0c1dc9f97b5d6f9993 100755
--- a/convlab2/policy/rule/multiwoz/policy_agenda_multiwoz.py
+++ b/convlab2/policy/rule/multiwoz/policy_agenda_multiwoz.py
@@ -106,16 +106,20 @@ class UserPolicyAgendaMultiWoz(Policy):
             self.agenda.close_session()
         else:
             sys_action = self._transform_sysact_in(sys_action)
+            # print('sys action before update agenda', sys_action)
             self.agenda.update(sys_action, self.goal)
             if self.goal.task_complete():
                 self.agenda.close_session()
 
-        # A -> A' + user_action
-        # action = self.agenda.get_action(random.randint(2, self.max_initiative))
-        action = self.agenda.get_action(self.max_initiative)
+        action = {}
+        while len(action) == 0:
+            # A -> A' + user_action
+            # action = self.agenda.get_action(random.randint(2, self.max_initiative))
+            action = self.agenda.get_action(self.max_initiative)
 
-        # transform to DA
-        action = self._transform_usract_out(action)
+            # transform to DA
+            action = self._transform_usract_out(action)
+            # print(action)
 
         tuples = []
         for domain_intent, svs in action.items():
@@ -169,6 +173,8 @@ class UserPolicyAgendaMultiWoz(Policy):
                             new_action[new_act].append(['NotBook', 'none'])
                         elif slot is not None:
                             new_action[new_act].append([slot, pairs[1]])
+                    if len(new_action[new_act]) == 0:
+                        new_action.pop(new_act)
                     # new_action[new_act] = [[REF_USR_DA_M[dom.capitalize()].get(pairs[0], pairs[0]), pairs[1]] for pairs in action[act]]
                 else:
                     new_action[act] = action[act]
@@ -848,7 +854,6 @@ class Agenda(object):
         diaacts = []
         slots = []
         values = []
-
         p_diaact, p_slot = self.__check_next_diaact_slot()
         if p_diaact.split('-')[1] == 'inform' and p_slot in BOOK_SLOT:
             for _ in range(10 if self.__cur_push_num == 0 else self.__cur_push_num):
@@ -914,23 +919,23 @@ if __name__ == '__main__':
     from convlab2.dst.rule.multiwoz.dst import RuleDST
     from convlab2.nlu.jointBERT.multiwoz.nlu import BERTNLU
 
-    seed = 50
+    seed = 41
     np.random.seed(seed)
     random.seed(seed)
     torch.manual_seed(seed)
+    #
+    # sys_nlu = BERTNLU()
+    # sys_dst = RuleDST()
+    # sys_policy = RulePolicy()
+    # sys_nlg = TemplateNLG(is_user=False)
+    # sys_agent = PipelineAgent(sys_nlu, sys_dst, sys_policy, sys_nlg, name='sys')
 
-    sys_nlu = BERTNLU()
-    sys_dst = RuleDST()
-    sys_policy = RulePolicy()
-    sys_nlg = TemplateNLG(is_user=False)
-    sys_agent = PipelineAgent(sys_nlu, sys_dst, sys_policy, sys_nlg, name='sys')
-
-    user_nlu = BERTNLU(mode='sys', config_file='multiwoz_sys_context.json',
-                       model_file='https://convlab.blob.core.windows.net/convlab-2/bert_multiwoz_sys_context.zip')
-    user_dst = None
-    user_policy = RulePolicy(character='usr')
-    user_nlg = TemplateNLG(is_user=True)
-    user_agent = PipelineAgent(user_nlu, user_dst, user_policy, user_nlg, name='user')
+    # user_nlu = BERTNLU(mode='sys', config_file='multiwoz_sys_context.json',
+    #                    model_file='https://convlab.blob.core.windows.net/convlab-2/bert_multiwoz_sys_context.zip')
+    # user_dst = None
+    # user_policy = RulePolicy(character='usr')
+    # user_nlg = TemplateNLG(is_user=True)
+    # user_agent = PipelineAgent(user_nlu, user_dst, user_policy, user_nlg, name='user')
 
     # evaluator = MultiWozEvaluator()
     # sess = BiSession(sys_agent=sys_agent, user_agent=user_agent, kb_query=None, evaluator=evaluator)
@@ -938,14 +943,14 @@ if __name__ == '__main__':
 
 
 
-    # user_policy = UserPolicyAgendaMultiWoz()
+    user_policy = UserPolicyAgendaMultiWoz()
     #
-    # sys_policy = RuleBasedMultiwozBot()
+    sys_policy = RulePolicy(character='sys')
     #
-    # user_nlg = TemplateNLG(is_user=True, mode='manual')
-    # sys_nlg = TemplateNLG(is_user=False, mode='manual')
+    user_nlg = TemplateNLG(is_user=True, mode='manual')
+    sys_nlg = TemplateNLG(is_user=False, mode='manual')
     #
-    # dst = RuleDST()
+    dst = RuleDST()
     #
     # user_nlu = BERTNLU(mode='sys', config_file='multiwoz_sys_context.json',
     #                    model_file='https://convlab.blob.core.windows.net/convlab-2/bert_multiwoz_sys_context.zip')
@@ -956,12 +961,24 @@ if __name__ == '__main__':
     #     if 'restaurant' in goal['domain_ordering'] and 'hotel' in goal['domain_ordering']:
     #         break
     # # pprint(goal)
-    user_goal = {'domain_ordering': ('restaurant', 'hotel', 'taxi'),
-                 'hotel': {'book': {'day': 'sunday', 'people': '6', 'stay': '4'},
-                           'info': {'internet': 'no',
-                                    'parking': 'no',
-                                    'pricerange': 'moderate',
-                                    'area': 'centre'}},
+    user_goal = {'domain_ordering': ('hotel', 'attraction'),
+                 'train': {
+                     'info': {'arriveBy': '16:00',
+                              'day': 'monday',
+                              'departure': 'cambridge',
+                              'destination': 'stansted airport'},
+                     'book': {'people': 2}, 'booked': '?'
+                 },
+                 'attraction': {
+                     'info': {'type': 'museum'},
+                     'reqt': ['phone']
+                 },
+                 'hotel': {
+                           'info': {'internet': 'yes',
+                                    'parking': 'yes',
+                                    'stars': '4',
+                                    'type': 'hotel'},
+                           'reqt': ['postcode']},
                  'restaurant': {'info': {'area': 'centre',
                                          'food': 'portuguese',
                                          'pricerange': 'cheap'},
@@ -986,7 +1003,7 @@ if __name__ == '__main__':
     user_policy.init_session(ini_goal=goal)
     print('init goal:')
     # pprint(user_policy.get_goal())
-    pprint(user_agent.policy.get_goal())
+    # pprint(user_agent.policy.get_goal())
     # pprint(sess.evaluator.goal)
     # print('-' * 50)
     # for i in range(20):
@@ -1005,60 +1022,66 @@ if __name__ == '__main__':
     # print('=' * 100)
 
     history = []
-    user_utt = user_agent.response('')
-    print(user_utt)
-    user_utt = 'I need a restaurant . It just needs to be expensive . I am also in the market for a new restaurant . Is there something in the centre of town ? Do you have portuguese food ?'
-    # history.append(['user', user_utt])
-    sys_agent.dst.state['belief_state']['restaurant']['semi']['food'] = 'portuguese'
-    sys_utt = sys_agent.response(user_utt)
-    pprint(sys_agent.dst.state)
-    print(sys_utt)
-    sys_utt = "I have n't found any in the centre. I am unable to find any portuguese restaurants in town ."
-    # history.append(['user', user_utt])
-
-    user_utt = user_agent.response(sys_utt)
-    print(user_utt)
-    user_utt = "It just needs to be cheap ."
-    sys_utt = sys_agent.response(user_utt)
-    print(sys_utt)
-    sys_utt = "It is in the centre area . They serve portuguese . Would you like to try nandos city centre ? They are in the cheap price range . I will book it for you and get a reference number ?"
-
-    user_utt = user_agent.response(sys_utt)
-    print(user_utt)
-    sys_utt = sys_agent.response(user_utt)
-    print(sys_utt)
-
-    user_utt = user_agent.response(sys_utt)
-    print(user_utt)
-    sys_utt = sys_agent.response(user_utt)
-    print(sys_utt)
-
-    user_utt = user_agent.response(sys_utt)
-    print(user_utt)
-    sys_utt = sys_agent.response(user_utt)
-    print(sys_utt)
-
+    # user_utt = user_agent.response('')
+    # print(user_utt)
+    # user_utt = 'I need a restaurant . It just needs to be expensive . I am also in the market for a new restaurant . Is there something in the centre of town ? Do you have portuguese food ?'
+    # # history.append(['user', user_utt])
+    # sys_agent.dst.state['belief_state']['restaurant']['semi']['food'] = 'portuguese'
+    # sys_utt = sys_agent.response(user_utt)
+    # pprint(sys_agent.dst.state)
+    # print(sys_utt)
+    # sys_utt = "I have n't found any in the centre. I am unable to find any portuguese restaurants in town ."
+    # # history.append(['user', user_utt])
     #
-    # print(user_policy.agenda)
-    # user_act = user_policy.predict([])
-    # print(user_act)
-    # user_utt = user_nlg.generate(user_act)
+    # user_utt = user_agent.response(sys_utt)
     # print(user_utt)
-    # state = dst.state
-    # state['user_action'] = user_act
-    # dst.update(user_act)
-    # # pprint(state)
-    # sys_act = sys_policy.predict(state)
-    # sys_utt = sys_nlg.generate(sys_act)
-    # # sys_act.append(["Request", "Restaurant", "Price", "?"])
-    # # sys_act = [['Request', 'Hotel', 'Area', '?'], ['Request', 'Hotel', 'Stars', '?']]
-    # print(sys_act)
+    # user_utt = "It just needs to be cheap ."
+    # sys_utt = sys_agent.response(user_utt)
     # print(sys_utt)
+    # sys_utt = "It is in the centre area . They serve portuguese . Would you like to try nandos city centre ? They are in the cheap price range . I will book it for you and get a reference number ?"
     #
-    # user_act = user_policy.predict(sys_act)
-    # print(user_act)
-    # user_utt = user_nlg.generate(user_act)
+    # user_utt = user_agent.response(sys_utt)
+    # print(user_utt)
+    # sys_utt = sys_agent.response(user_utt)
+    # print(sys_utt)
+    #
+    # user_utt = user_agent.response(sys_utt)
+    # print(user_utt)
+    # sys_utt = sys_agent.response(user_utt)
+    # print(sys_utt)
+    #
+    # user_utt = user_agent.response(sys_utt)
     # print(user_utt)
+    # sys_utt = sys_agent.response(user_utt)
+    # print(sys_utt)
+
+    #
+    print(user_policy.agenda)
+    user_act = user_policy.predict([])
+    print(user_act)
+    user_utt = user_nlg.generate(user_act)
+    print(user_utt)
+    history.append(['user', user_utt])
+    state = dst.state
+    state['user_action'] = user_act
+    dst.update(user_act)
+    # pprint(state)
+    sys_act = sys_policy.predict(state)
+    sys_utt = sys_nlg.generate(sys_act)
+    # sys_act.append(["Request", "Restaurant", "Price", "?"])
+    # sys_act = [['Request', 'Hotel', 'Area', '?'], ['Request', 'Hotel', 'Stars', '?']]
+    sys_act = [['Inform', 'Hotel', 'Post', 'pe296fl']]
+    print(sys_act)
+    history.append(['sys', user_utt])
+
+    # sys_utt = sys_agent.response(user_utt)
+    # print(sys_utt)
+    #
+    user_act = user_policy.predict(sys_act)
+    print(user_act)
+    user_utt = user_nlg.generate(user_act)
+    print(user_utt)
+    history.append(['user', user_utt])
     # state = dst.state
     # state['user_action'] = user_act
     # dst.update(user_act)
@@ -1066,24 +1089,34 @@ if __name__ == '__main__':
     # sys_act = sys_policy.predict(state)
     # # sys_act = [['Inform', 'Hotel', 'Choice', '3']]
     # print(sys_act)
+    sys_act = [
+        ['Inform', 'Hotel', 'Post', 'pe296fl']
+    ]
+    print(sys_act)
+    # sys_utt = sys_agent.response(user_utt)
+    # print(sys_utt)
+    # sys_utt = 'The arrive time is 15:08 . The train will be departing from cambridge . The booking is for arriving in stansted airport . TR6936 will be your perfect fit . How about 14:40 will that work for you ?'
+    # history.append(['sys', user_utt])
     #
     #
-    # user_act = user_policy.predict(sys_act)
-    # print(user_act)
-    # user_utt = user_nlg.generate(user_act)
-    # print(user_utt)
+    # sys_act = user_nlu.predict(sys_utt, history)
+    # print(sys_act)
+    user_act = user_policy.predict(sys_act)
+    print(user_act)
+    user_utt = user_nlg.generate(user_act)
+    print(user_utt)
     # state = dst.state
     # state['user_action'] = user_act
     # dst.update(user_act)
     # # pprint(state)
     # sys_act = sys_policy.predict(state)
-    # # sys_act = [["Book", "Booking", "Ref", "7GAWK763"]]
-    # print(sys_act)
+    sys_act = [['Request', 'Hotel', 'Price', '?'], ['Request', 'Attraction', 'Price', '?']]
+    print(sys_act)
     # #
-    # user_act = user_policy.predict(sys_act)
-    # print(user_act)
-    # user_utt = user_nlg.generate(user_act)
-    # print(user_utt)
+    user_act = user_policy.predict(sys_act)
+    print(user_act)
+    user_utt = user_nlg.generate(user_act)
+    print(user_utt)
     # state = dst.state
     # state['user_action'] = user_act
     # dst.update(user_act)