diff --git a/data/unified_datasets/reddit/preprocess.py b/data/unified_datasets/reddit/preprocess.py
index 4aa1f03fe584590645bf2b0d7b9548581baedff4..35824e8a023b7bbaead5593487c6cf427df43a48 100644
--- a/data/unified_datasets/reddit/preprocess.py
+++ b/data/unified_datasets/reddit/preprocess.py
@@ -40,6 +40,8 @@ def preprocess():
             if len(utterance) > 256:
                 # remove dialogs that contain too long utterances
                 return None
+            if 'http://' in utterance or 'https://' in utterance:
+                return None
             speaker = 'system' if i % 2 == 1 else 'user'
             turn = {
                 'speaker': speaker,