From 28342786268f2d4af5a54cc64c7b3ccb8e442d22 Mon Sep 17 00:00:00 2001
From: feger <marc.feger@hhu.de>
Date: Sun, 14 Apr 2019 16:46:28 +0200
Subject: [PATCH] Add A2.2 and add more doc-strings

---
 Aufgabe2/FileReader.py          |  10 +++
 Aufgabe2/FileWriter.py          |  17 ++++++
 Aufgabe2/files/output.documents |   1 +
 Aufgabe2/files/output.query     |   1 +
 Aufgabe2/program.py             |  39 +++++++++---
 Aufgabe2/rocchio.py             | 105 ++++++++++++++++++++++++++++++++
 6 files changed, 163 insertions(+), 10 deletions(-)
 create mode 100644 Aufgabe2/FileWriter.py
 create mode 100644 Aufgabe2/files/output.documents
 create mode 100644 Aufgabe2/files/output.query
 create mode 100644 Aufgabe2/rocchio.py

diff --git a/Aufgabe2/FileReader.py b/Aufgabe2/FileReader.py
index 3c9c27b..9a970af 100644
--- a/Aufgabe2/FileReader.py
+++ b/Aufgabe2/FileReader.py
@@ -1,3 +1,4 @@
+import ast
 from typing import List, Dict
 
 
@@ -24,3 +25,12 @@ class FileReader(object):
                     "text": line[1]
                 })
             return file_as_list
+
+    def read_document_and_eval_type_of_content(self):
+        """
+        This private function reads the first query results and returns it as a list of dicts.
+
+        :return: The file as dict.
+        """
+        with open(self.filename) as f:
+            return ast.literal_eval(f.read())
diff --git a/Aufgabe2/FileWriter.py b/Aufgabe2/FileWriter.py
new file mode 100644
index 0000000..d1e979d
--- /dev/null
+++ b/Aufgabe2/FileWriter.py
@@ -0,0 +1,17 @@
+import json
+
+
+class FileWriter(object):
+
+    def __init__(self, filename: str, documents):
+        self.filename: str = filename
+        self.documents = documents
+
+    def write_to_file(self):
+        """
+        Writes a document to a file.
+
+        :return: None
+        """
+        with open(self.filename, 'w') as file:
+            json.dump(self.documents, file)
diff --git a/Aufgabe2/files/output.documents b/Aufgabe2/files/output.documents
new file mode 100644
index 0000000..843185f
--- /dev/null
+++ b/Aufgabe2/files/output.documents
@@ -0,0 +1 @@
+[{"filename": "1", "text": "drei Raben auf einem Baum", "filtered_text": ["Baum", "Raben", "drei"], "filtered_stemmed_text": ["baum", "rab", "drei"], "counted_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0, "baum": 1, "beim": 0, "drei": 1, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 1, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0, "wildbahn": 0, "wood": 0}, "relative_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0, "baum": 0.3333333333333333, "beim": 0, "drei": 0.3333333333333333, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0.3333333333333333, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0, "wildbahn": 0, "wood": 0}, "document_vector": {"angegriff": 0, "apfelkuch": 0, "baum": 0.3656366710026855, "beim": 0, "drei": 0.4659800028906792, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0.4659800028906792, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0, "wildbahn": 0, "wood": 0}, "cosine": 0.0}, {"filename": "2", "text": "Tiger in Indien", "filtered_text": ["Indien", "Tiger"], "filtered_stemmed_text": ["indi", "tig"], "counted_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 1, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 1, "wildbahn": 0, "wood": 0}, "relative_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0.5, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0.5, "wildbahn": 0, "wood": 0}, "document_vector": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0.6989700043360189, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0.309894379144197, "wildbahn": 0, "wood": 0}, "cosine": 0.101}, {"filename": "3", "text": "Sonnenblummenfelder im Sommer", "filtered_text": ["Sommer", "Sonnenblummenfelder"], "filtered_stemmed_text": ["somm", "sonnenblummenfeld"], "counted_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 1, "sonnenblummenfeld": 1, "spiel": 0, "suss": 0, "tig": 0, "wildbahn": 0, "wood": 0}, "relative_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0.5, "sonnenblummenfeld": 0.5, "spiel": 0, "suss": 0, "tig": 0, "wildbahn": 0, "wood": 0}, "document_vector": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0.6989700043360189, "sonnenblummenfeld": 0.6989700043360189, "spiel": 0, "suss": 0, "tig": 0, "wildbahn": 0, "wood": 0}, "cosine": 0.0}, {"filename": "4", "text": "s\u00fc\u00dfe kleine Tiger spielen an Baum", "filtered_text": ["Baum", "Tiger", "kleine", "spielen", "s\u00fc\u00dfe"], "filtered_stemmed_text": ["baum", "tig", "klein", "spiel", "suss"], "counted_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0, "baum": 1, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 1, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 1, "suss": 1, "tig": 1, "wildbahn": 0, "wood": 0}, "relative_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0, "baum": 0.2, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0.2, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0.2, "suss": 0.2, "tig": 0.2, "wildbahn": 0, "wood": 0}, "document_vector": {"angegriff": 0, "apfelkuch": 0, "baum": 0.2193820026016113, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0.27958800173440757, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0.27958800173440757, "suss": 0.27958800173440757, "tig": 0.1239577516576788, "wildbahn": 0, "wood": 0}, "cosine": 0.056}, {"filename": "5", "text": "Mann von Tigern angegriffen", "filtered_text": ["Mann", "Tigern", "angegriffen"], "filtered_stemmed_text": ["mann", "tig", "angegriff"], "counted_filtered_stemmed_text": {"angegriff": 1, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 1, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 1, "wildbahn": 0, "wood": 0}, "relative_filtered_stemmed_text": {"angegriff": 0.3333333333333333, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0.3333333333333333, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0.3333333333333333, "wildbahn": 0, "wood": 0}, "document_vector": {"angegriff": 0.4659800028906792, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0.4659800028906792, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0.20659625276279797, "wildbahn": 0, "wood": 0}, "cosine": 0.074}, {"filename": "6", "text": "Tiger II Panzer", "filtered_text": ["II", "Panzer", "Tiger"], "filtered_stemmed_text": ["ii", "panz", "tig"], "counted_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 1, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 1, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 1, "wildbahn": 0, "wood": 0}, "relative_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0.3333333333333333, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0.3333333333333333, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0.3333333333333333, "wildbahn": 0, "wood": 0}, "document_vector": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0.4659800028906792, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0.4659800028906792, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0.20659625276279797, "wildbahn": 0, "wood": 0}, "cosine": 0.074}, {"filename": "7", "text": "Apfelkuchen mit Sahne", "filtered_text": ["Apfelkuchen", "Sahne"], "filtered_stemmed_text": ["apfelkuch", "sahn"], "counted_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 1, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 1, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0, "wildbahn": 0, "wood": 0}, "relative_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0.5, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0.5, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0, "wildbahn": 0, "wood": 0}, "document_vector": {"angegriff": 0, "apfelkuch": 0.6989700043360189, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0.6989700043360189, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0, "wildbahn": 0, "wood": 0}, "cosine": 0.0}, {"filename": "8", "text": "der gestiefelte Kater Kost\u00fcm", "filtered_text": ["Kater", "Kost\u00fcm", "gestiefelte"], "filtered_stemmed_text": ["kat", "kostum", "gestiefelt"], "counted_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 1, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 1, "klein": 0, "kostum": 1, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0, "wildbahn": 0, "wood": 0}, "relative_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0.3333333333333333, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0.3333333333333333, "klein": 0, "kostum": 0.3333333333333333, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0, "wildbahn": 0, "wood": 0}, "document_vector": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0, "gestiefelt": 0.4659800028906792, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0.4659800028906792, "klein": 0, "kostum": 0.4659800028906792, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0, "wildbahn": 0, "wood": 0}, "cosine": 0.0}, {"filename": "9", "text": "Tiger Woods beim Golfen in Kalifornien", "filtered_text": ["Golfen", "Kalifornien", "Tiger", "Woods", "beim"], "filtered_stemmed_text": ["golf", "kaliforni", "tig", "wood", "beim"], "counted_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 1, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 1, "ii": 0, "indi": 0, "kaliforni": 1, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 1, "wildbahn": 0, "wood": 1}, "relative_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0.2, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0.2, "ii": 0, "indi": 0, "kaliforni": 0.2, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0.2, "wildbahn": 0, "wood": 0.2}, "document_vector": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0.27958800173440757, "drei": 0, "freier": 0, "gestiefelt": 0, "golf": 0.27958800173440757, "ii": 0, "indi": 0, "kaliforni": 0.27958800173440757, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0.1239577516576788, "wildbahn": 0, "wood": 0.27958800173440757}, "cosine": 0.327}, {"filename": "10", "text": "Tiger in freier Wildbahn", "filtered_text": ["Tiger", "Wildbahn", "freier"], "filtered_stemmed_text": ["tig", "wildbahn", "freier"], "counted_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 1, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 1, "wildbahn": 1, "wood": 0}, "relative_filtered_stemmed_text": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0.3333333333333333, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0.3333333333333333, "wildbahn": 0.3333333333333333, "wood": 0}, "document_vector": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0.4659800028906792, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0.20659625276279797, "wildbahn": 0.4659800028906792, "wood": 0}, "cosine": 0.829}]
\ No newline at end of file
diff --git a/Aufgabe2/files/output.query b/Aufgabe2/files/output.query
new file mode 100644
index 0000000..00a3f07
--- /dev/null
+++ b/Aufgabe2/files/output.query
@@ -0,0 +1 @@
+{"text": "Tiger Woods in freier Wildbahn", "filtered_query": ["Tiger", "Wildbahn", "Woods", "freier"], "filtered_stemmed_query": ["tig", "wildbahn", "wood", "freier"], "counted_filtered_stemmed_query": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 1, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 1, "wildbahn": 1, "wood": 1}, "relative_filtered_stemmed_query": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0.25, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0.25, "wildbahn": 0.25, "wood": 0.25}, "document_vector": {"angegriff": 0, "apfelkuch": 0, "baum": 0, "beim": 0, "drei": 0, "freier": 0.34948500216800943, "gestiefelt": 0, "golf": 0, "ii": 0, "indi": 0, "kaliforni": 0, "kat": 0, "klein": 0, "kostum": 0, "mann": 0, "panz": 0, "rab": 0, "sahn": 0, "somm": 0, "sonnenblummenfeld": 0, "spiel": 0, "suss": 0, "tig": 0.1549471895720985, "wildbahn": 0.34948500216800943, "wood": 0.34948500216800943}}
\ No newline at end of file
diff --git a/Aufgabe2/program.py b/Aufgabe2/program.py
index cfe71a9..34d48a6 100644
--- a/Aufgabe2/program.py
+++ b/Aufgabe2/program.py
@@ -9,6 +9,7 @@ import numpy as np
 from nltk.corpus import stopwords
 
 from FileReader import FileReader
+from FileWriter import FileWriter
 
 EXAMPLE = False
 LANGUAGE = 'german'
@@ -80,7 +81,8 @@ def add_document_vectors(target: List[Dict], vocabulary: List, key: str, example
                     relative_counted_document_vector[term] = counted_term.get(term)
                 else:
                     # Maybe the relative count of all terms per document and not all terms in the vocabulary ?
-                    relative_counted_document_vector[term] = counted_term.get(term) / len(vocabulary)
+                    # Maybe len(vocabulary)
+                    relative_counted_document_vector[term] = counted_term.get(term) / len(element.get(key))
         element['counted_' + key]: Dict = counted_document_vector
         element['relative_' + key]: Dict = relative_counted_document_vector
 
@@ -152,32 +154,46 @@ def cosine(v1: List, v2: List):
     return counter / divider
 
 
-def add_cosine_similarity(documents: List[Dict], query: Dict) -> None:
+def add_cosine_similarity(documents: List[Dict], query: Dict, tag='') -> None:
     """
     Adds the cosine-similarity between a document and the query.
 
+    :param tag: This can be used to add a new cosine similarity with a new document vector of the query.
     :param documents: A list of documents.
     :param query: The query to be looked for.
     :return: None
     """
-    v2: List = [value for _, value in query.get('document_vector').items()]
+
+    if tag != '':
+        tag = '_' + tag
+
+    v2: List = [value for _, value in query.get('document_vector' + tag).items()]
     for document in documents:
         v1: List = [value for _, value in document.get('document_vector').items()]
-        document['cosine']: Double = round(cosine(v1, v2), 3)
+        document['cosine' + tag]: Double = round(cosine(v1, v2), 3)
 
 
-def print_results(documents: List[Dict]) -> None:
+def print_results(documents: List[Dict], query: Dict, tag='') -> None:
     """
     Pretty print the documents.
 
+    :param query: The search query.
+    :param tag: This can be used to use a new cosine similarity with a new document vector of the query.
     :param documents: List of documents
     :return: None
     """
+
+    if tag != '':
+        tag = '_' + tag
+
     print('###############################################################')
-    documents: List[Dict] = sorted(documents, key=lambda k: k['cosine'], reverse=True)
+    documents: List[Dict] = sorted(documents, key=lambda k: k['cosine' + tag], reverse=True)
     top5_documents: List[Dict] = documents[:5]
+    print('Query: ' + query.get('text'))
     for document in top5_documents:
-        print('Document: ' + document.get('filename') + ' has a cosine similarity of: ' + str(document.get('cosine')))
+        print('Document: ' + document.get('filename') + ', Text: ' + document.get(
+            'text') + ', has a cosine similarity of: ' + str(
+            round(document.get('cosine' + tag), 3)))
     print('###############################################################')
 
 
@@ -190,10 +206,10 @@ if __name__ == '__main__':
     
     To run the example of A1:
         -> Change example to True
-        run $ python program.py meta_a1.txt "Tasse Kanne"
+        run $ python program.py ./files/meta_a1.txt "Tasse Kanne"
     Else
         -> Make sure example is False.
-        run $ python program.py meta.txt "<query>"
+        run $ python program.py ./files/meta.txt "<query>"
     
     :arg 1: File which contains all documents (Two columns separated with \t and ends with \n)
     :arg 2: Query to be looked for (must be surrounded with "")
@@ -224,4 +240,7 @@ if __name__ == '__main__':
     # print('All Documents: ' + json.dumps(all_documents, indent=3))
     # print('Search Query: ' + json.dumps(search_query, indent=3))
     #
-    print_results(all_documents)
+    print_results(all_documents, search_query)
+
+    FileWriter('./files/output.documents', all_documents).write_to_file()
+    FileWriter('./files/output.query', search_query).write_to_file()
diff --git a/Aufgabe2/rocchio.py b/Aufgabe2/rocchio.py
new file mode 100644
index 0000000..7f8e7e9
--- /dev/null
+++ b/Aufgabe2/rocchio.py
@@ -0,0 +1,105 @@
+from tokenize import Double
+from typing import List, Dict, Tuple
+
+import numpy as np
+
+from FileReader import FileReader
+from program import print_results, add_cosine_similarity
+
+
+def assemble_dr_dnr(feedback: List, documents: List[Dict]) -> Tuple[List, List]:
+    """
+    This method assembles the Dr and Dnr for Rocchio.
+    It is assumed that all elements in Dr will be out of the TOP-5 Elements and Dnr will be all other elements.
+    Therefore it is assumed that: Dr U Dnr = D (all documents)
+
+    :param feedback: List of relevant document names
+    :param documents: List of all documents in the system
+    :return: Dr, Dnr
+    """
+
+    unique_documents = list(set([int(document.get('filename')) for document in documents]))
+    dr = list(set([int(d) for d in unique_documents if d in feedback]))
+    dnr = list(set([int(d) for d in unique_documents if d not in dr]))
+
+    return dr, dnr
+
+
+def dict_to_list(target: Dict) -> List:
+    """
+    Turns a dict in a list.
+
+    :param target: Dict to be turned into a list.
+    :return: A list containing all values of a dict
+    """
+    return [value for _, value in target.items()]
+
+
+def q_m(query: Dict, documents: List[Dict], feedback: List, alpha: Double, beta: Double, gamma: Double):
+    """
+    Calculates the new query vector as defined by Rocchio.
+
+    :param query: The search query
+    :param documents: The list of all documents
+    :param feedback: The list with the relevant document names
+    :param alpha: Value for alpha
+    :param beta: Value for beta
+    :param gamma: Value for gamma
+    :return: None
+    """
+    relevant_documents, not_relevant_documents = assemble_dr_dnr(feedback, documents)
+
+    addend_1: List = list(np.multiply(alpha, dict_to_list(query.get('document_vector'))))
+
+    addend_2: List = list(np.zeros(len(query.get('document_vector'))))
+    if len(relevant_documents) != 0:
+        for document in documents:
+            if int(document.get('filename')) in relevant_documents:
+                addend_2 = list(np.add(addend_2, dict_to_list(document.get('document_vector'))))
+
+        addend_2 = list(np.multiply((beta / len(relevant_documents)), addend_2))
+
+    addend_3: List = list(np.zeros(len(query.get('document_vector'))))
+    if len(not_relevant_documents) != 0:
+        for document in documents:
+            if int(document.get('filename')) in not_relevant_documents:
+                addend_3 = list(np.add(addend_3, dict_to_list(document.get('document_vector'))))
+        addend_3 = list(np.multiply((gamma / len(not_relevant_documents)), addend_3))
+
+    qm_tmp = list(np.add(addend_1, addend_2))
+    qm_tmp = list(np.subtract(qm_tmp, addend_3))
+
+    qm = dict(list(zip(query.get('document_vector').keys(), qm_tmp)))
+
+    query['document_vector_new'] = qm
+
+
+if __name__ == '__main__':
+    """
+    This program does a one step rocchio improvement for the search query.
+    The values for the search query along with the values for the document are taken of the corresponding 
+    files in /files. Then the user is asked to give his feedback.
+    Notice: Input only valid document ids which are also in the Top 5.
+    
+    Before running this code make sure you have generated all files in /files.
+    To do so run e.g:
+        $ python program.py ./files/meta.txt "Tiger Woods in freier Wildbahn"
+    To run the code use:
+        $ python rocchio.py
+    """
+    all_documents = FileReader('./files/output.documents').read_document_and_eval_type_of_content()
+    search_query = FileReader('./files/output.query').read_document_and_eval_type_of_content()
+
+    print('Top-5 results:')
+    print_results(all_documents, search_query)
+    print('Attention: Input must be int! And please use only documents in the Top-5!')
+
+    input_of_relevant_documents = [int(x) for x in
+                                   input("Please enter the names of the documents you find relevant: ").split()]
+
+    q_m(search_query, all_documents, input_of_relevant_documents, 1, 0.8, 0.1)
+
+    add_cosine_similarity(all_documents, search_query, 'new')
+
+    print('Top-5 results (new):')
+    print_results(all_documents, search_query, 'new')
-- 
GitLab