diff --git a/data/unified_datasets/dailydialog/README.md b/data/unified_datasets/dailydialog/README.md
index 69e627cf2f5fbd7c4f5b10b3450a659afd6822d5..43cbe9b0a64869bc0abed6527d7a2b35a76827b2 100644
--- a/data/unified_datasets/dailydialog/README.md
+++ b/data/unified_datasets/dailydialog/README.md
@@ -18,6 +18,7 @@ DailyDialog is a high-quality multi-turn dialog dataset. It is intriguing in sev
   - Retain emotion annotation in the `emotion` field of each turn.
   - Use nltk to remove space before punctuation: `utt = ' '.join([detokenizer.detokenize(word_tokenize(s)) for s in sent_tokenize(utt)])`.
   - Replace `" ’ "` with `"'"`: `utt = utt.replace(' ’ ', "'")`.
+  - Add space after full-stop
 - **Annotations:**
   - intent, emotion
 
@@ -33,10 +34,10 @@ English
 
 | split      |   dialogues |   utterances |   avg_utt |   avg_tokens |   avg_domains | cat slot match(state)   | cat slot match(goal)   | cat slot match(dialogue act)   | non-cat slot span(dialogue act)   |
 |------------|-------------|--------------|-----------|--------------|---------------|-------------------------|------------------------|--------------------------------|-----------------------------------|
-| train      |       11118 |        87170 |      7.84 |        11.18 |             1 | -                       | -                      | -                              | -                                 |
-| validation |        1000 |         8069 |      8.07 |        11.14 |             1 | -                       | -                      | -                              | -                                 |
-| test       |        1000 |         7740 |      7.74 |        11.33 |             1 | -                       | -                      | -                              | -                                 |
-| all        |       13118 |       102979 |      7.85 |        11.19 |             1 | -                       | -                      | -                              | -                                 |
+| train      |       11118 |        87170 |      7.84 |        11.22 |             1 | -                       | -                      | -                              | -                                 |
+| validation |        1000 |         8069 |      8.07 |        11.16 |             1 | -                       | -                      | -                              | -                                 |
+| test       |        1000 |         7740 |      7.74 |        11.36 |             1 | -                       | -                      | -                              | -                                 |
+| all        |       13118 |       102979 |      7.85 |        11.22 |             1 | -                       | -                      | -                              | -                                 |
 
 10 domains: ['Ordinary Life', 'School Life', 'Culture & Education', 'Attitude & Emotion', 'Relationship', 'Tourism', 'Health', 'Work', 'Politics', 'Finance']
 - **cat slot match**: how many values of categorical slots are in the possible values of ontology in percentage.
diff --git a/data/unified_datasets/dailydialog/data.zip b/data/unified_datasets/dailydialog/data.zip
index e8f1805a465c0609980e109bf1c2a6b0491ba81f..cb2af33908afc7f9954beb32cb2114605970027c 100644
Binary files a/data/unified_datasets/dailydialog/data.zip and b/data/unified_datasets/dailydialog/data.zip differ
diff --git a/data/unified_datasets/dailydialog/dummy_data.json b/data/unified_datasets/dailydialog/dummy_data.json
index 5da0cbbac9fed4f0efbcb48c88b1647241efa506..404646bbfa4fa01c85832b988a0ed04e39f9a144 100644
--- a/data/unified_datasets/dailydialog/dummy_data.json
+++ b/data/unified_datasets/dailydialog/dummy_data.json
@@ -78,7 +78,7 @@
       },
       {
         "speaker": "user",
-        "utterance": "I guess you are right.But what shall we do? I don't feel like sitting at home.",
+        "utterance": "I guess you are right. But what shall we do? I don't feel like sitting at home.",
         "utt_idx": 4,
         "dialogue_acts": {
           "binary": [
@@ -112,7 +112,7 @@
       },
       {
         "speaker": "user",
-        "utterance": "That's a good idea. I hear Mary and Sally often go there to play pingpong.Perhaps we can make a foursome with them.",
+        "utterance": "That's a good idea. I hear Mary and Sally often go there to play pingpong. Perhaps we can make a foursome with them.",
         "utt_idx": 6,
         "dialogue_acts": {
           "binary": [
@@ -129,7 +129,7 @@
       },
       {
         "speaker": "system",
-        "utterance": "Sounds great to me! If they are willing, we could ask them to go dancing with us.That is excellent exercise and fun, too.",
+        "utterance": "Sounds great to me! If they are willing, we could ask them to go dancing with us. That is excellent exercise and fun, too.",
         "utt_idx": 7,
         "dialogue_acts": {
           "binary": [
@@ -146,7 +146,7 @@
       },
       {
         "speaker": "user",
-        "utterance": "Good.Let' s go now.",
+        "utterance": "Good. Let' s go now.",
         "utt_idx": 8,
         "dialogue_acts": {
           "binary": [
@@ -434,7 +434,7 @@
       },
       {
         "speaker": "user",
-        "utterance": "Don't worry.He is an acrobat 。",
+        "utterance": "Don't worry. He is an acrobat 。",
         "utt_idx": 2,
         "dialogue_acts": {
           "binary": [
@@ -677,7 +677,7 @@
       },
       {
         "speaker": "user",
-        "utterance": "What are you talking about? Let me see that...What are horoscopes?",
+        "utterance": "What are you talking about? Let me see that... What are horoscopes?",
         "utt_idx": 2,
         "dialogue_acts": {
           "binary": [
diff --git a/data/unified_datasets/dailydialog/preprocess.py b/data/unified_datasets/dailydialog/preprocess.py
index de108a747bf7c4acc06a93c75f2bb8c8df5ba924..caea8a543743212913fb86ee83cee69820cd88cb 100644
--- a/data/unified_datasets/dailydialog/preprocess.py
+++ b/data/unified_datasets/dailydialog/preprocess.py
@@ -7,6 +7,7 @@ from collections import Counter
 from pprint import pprint
 from nltk.tokenize import sent_tokenize, word_tokenize
 from nltk.tokenize.treebank import TreebankWordDetokenizer
+import re
 
 topic_map = {
     1: "Ordinary Life", 
@@ -110,8 +111,12 @@ def preprocess():
                     speaker = 'user' if len(dialogue['turns']) % 2 == 0 else 'system'
                     intent = act_map[int(act)]
                     emotion = emotion_map[int(emotion)]
+                    # re-tokenize
                     utt = ' '.join([detokenizer.detokenize(word_tokenize(s)) for s in sent_tokenize(utt)])
+                    # replace with common apostrophe
                     utt = utt.replace(' ’ ', "'")
+                    # add space after full-stop
+                    utt = re.sub('\.(?!com)(\w)', lambda x: '. '+x.group(1), utt)
 
                     dialogue['turns'].append({
                         'speaker': speaker,