diff --git a/Bilder/battle.png b/Bilder/battle.png
new file mode 100644
index 0000000000000000000000000000000000000000..b52050afa73f7fd97fdfcee5712a92e79c7b46f9
Binary files /dev/null and b/Bilder/battle.png differ
diff --git a/baselines.tex b/baselines.tex
index 172884b20f0275cbe534ef0354d100f2c85a11fc..f28562d38f9583338b04cae3e2c8c99e579c90bd 100644
--- a/baselines.tex
+++ b/baselines.tex
@@ -25,6 +25,16 @@ As the \textit{Netflix-Prize} has shown, \textit{research} and \textit{validatio
 Before actually conducting the experiment, the authors took a closer look at the given baselines. In the process, they noticed some \textit{systematic overlaps}. These can be taken from \textit{table} below.
 \input{overlaps}
 
+From the three aspects it can be seen that the models are fundamentally similar and that the main differences arise from different setups and learning procedures.
+Thus, the authors examined the two learning methods \textit{stochastic gradient descent} and \textit{bayesian learning} in combination with \textit{biased matrix-factorization} before conducting the actual experiment. For $b_u = b_i = 0$ this is equivalent to \textit{regulated matrix-factorization (RSVD)}. In addition, for $\alpha = \beta = 1$ the \textit{weighted regulated matrix-factorization (WR)} is equivalent to \textit{RSVD}. Thus, the only differences are explained by the different adjustments of the methods.
+To prepare the two learning procedures they were initialized with a \textit{gaussian normal distribution} $\mathcal{N}(\mu, 0.1^2)$. The value for the \textit{standard deviation} of 0.1 is the value suggested by the \textit{factorization machine libFM} as the default. In addition, \citet{Rendle13} achieved good results on the \textit{Netflix-Prize-dataset} with this value. Nothing is said about the parameter $\mu$. However, it can be assumed that this parameter is around the \textit{global average} of the \textit{ratings}. This can be assumed because \textit{ratings} are to be \textit{generated} with the \textit{initialization}.
+
+For both approaches the number of \textit{sampling steps} was then set to \textit{128}. Since \textit{SGD} has two additional \textit{hyperparameters} $\lambda, \gamma$ these were also determined. Overall, the \textit{MovieLens10M-dataset} was evaluated by a \textit{10-fold cross-validation} over a \textit{random global} and \textit{non-overlapping 90:10 split}. In each split, \textit{90\%} of the data was used for \textit{training} and \textit{10\%} of the data was used for \textit{evaluation} without overlapping. In each split, \textit{95\%} of the \textit{training data} was used for \textit{training} and the remaining \textit{5\%} for \textit{evaluation} to determine the \textit{hyperparameters}. The \textit{hyperparameter search} was performed as mentioned in \textit{section} \ref{sec:sgd} using the \textit{grid} $(\lambda \in \{0.02, 0.03, 0.04, 0.05\}, \gamma \in \{0.001, 0.003\})$. This grid was inspired by findings during the \textit{Netflix-Prize} \citep{Kor08, Paterek07}. In total the parameters $\lambda=0.04$ and $\gamma=0.003$ could be determined. Afterwards both \textit{learning methods} and their settings were compared. The \textit{RMSE} was plotted against the used \textit{dimension} $f$ of $p_u, q_i \in \mathbb{R}^f$. \textit{Figure} \ref{fig:battle} shows the corresponding results.
+\input{battle}
+
+As a \textit{first intermediate result} of the preparation it can be stated that both \textit{SGD} and \textit{gibbs-samper} achieve better \textit{RMSE values} for increasing \textit{dimensional embedding}.
+
+In addition, it can be stated that learning using the \textit{bayesian approach} is better than learning using \textit{SGD}. Even if the results could be different due to more efficient setups, it is still surprising that \textit{SGD} is worse than the \textit{bayesian approach}, although the \textit{exact opposite} was reported for \textit{MovieLens10M}. For example, \textit{figure} \ref{fig:reported_results} shows that the \textit{bayesian approach BPMF} achieved an \textit{RMSE} of \textit{0.8187} while the \textit{SGD approach Biased MF} performed better with \textit{0.803}. The fact that the \textit{bayesian approach} outperforms \textit{SGD} has already been reported and validated by \citet{Rendle13}, \citet{Rus08} for the \textit{Netflix-Prize-dataset}. Looking more closely at \textit{figures} \ref{fig:reported_results} and \ref{fig:battle}, the \textit{bayesian approach} scores better than the reported \textit{BPMF} and \textit{Biased MF} for each \textit{dimensional embedding}. Moreover, it even beats all reported baselines and new methods. Building on this, the authors have gone into the detailed examination of the methods and baselines.
 \subsubsection{Experiment Implementation}
 \subsection{Obeservations}
 \subsubsection{Stronger Baselines}
diff --git a/battle.tex b/battle.tex
new file mode 100644
index 0000000000000000000000000000000000000000..6826865056f3626b99abb6e19c6fa81d0e67fdfc
--- /dev/null
+++ b/battle.tex
@@ -0,0 +1,7 @@
+\begin{figure}[!ht]
+  \centering
+    \includegraphics[scale=0.37]{Bilder/battle.png}
+  \caption{Comparison of \textit{matrix-factorization} learned by \textit{gibbs-sampling (bayesian learning)} and \textit{stochastic gradient descent (SGD)} for an \textit{embedding dimension} from \textit{16} to \textit{512}.
+}
+\label{fig:battle}
+\end{figure}
diff --git a/overlaps.tex b/overlaps.tex
index d01f8366364d39e47f292545e5a3cd3800bd5d11..0a6629819d5609a91bb5e648f1cd8143db7679db 100644
--- a/overlaps.tex
+++ b/overlaps.tex
@@ -8,8 +8,8 @@
 \hline
 \textbf{Methods}                 & \textbf{Overlaps}                                                                             \\ \hline
 \textit{Biased MF}, \textit{RSVD}         & Same method with the only difference being a different setup of the hyperparameters. \\ \hline
-\textit{ALS-WR}, \textit{Biased MF}, \textit{RSVD} & Same models learned through different approaches.                                    \\ \hline
-\textit{BPMF}, \textit{RSVD}, \textit{ALS-WR}      & Same models learned through different approaches.                                    \\ \hline
+\textit{ALS-WR}, \textit{Biased MF}, \textit{RSVD} & Same models that were learned with other approaches (\textit{SGD} and \textit{ALS}). \\ \hline
+\textit{BPMF}, \textit{RSVD}, \textit{ALS-WR}      & Completely different approach of learning but fundamentally the same model. \\ \hline
 \end{tabular}%
 }
 \caption{\textit{Systematic consistency} of the \textit{baselines} used on \textit{MovieLens10M}.}
diff --git a/recommender.tex b/recommender.tex
index 2273182b13d3eb7c03219eaa678c91d0470a95d0..acf9de4f83e330bccccb0cd0dc5e4cd7fe3c9182 100644
--- a/recommender.tex
+++ b/recommender.tex
@@ -53,6 +53,7 @@ As a second possibility, \textit{implicit influence} can be included. This can i
 An important point that does not emerge from the above points is the question of how the individual components $p_u, q_i, b_u, b_i$ are constructed. In the following, the three most common methods are presented.
 
 \subsubsection{Stochastic Gradient Descent}
+\label{sec:sgd}
 The best known and most common method when it comes to \textit{machine learning} is \textit{stochastic gradient descent (SGD)}. The goal of \textit{SGD} is to \textit{minimize} the \textit{error} of a given \textit{objective function}. Thus the estimators mentioned in section \ref{sec:mf} can be used as \textit{objective functions}. In the field of \textit{recommender systems}, \citet{Funk06} presented a \textit{modified} variant of \textit{SGD} in the context of the \textit{Netflix Challenge}. \textit{SGD} can be applied to \textit{regulated matrix-factorization} with \textit{bias} as well as without \textit{bias}. This method can be described by the following pseudo code:
 \begin{algorithm}\label{alg:sgd}
 	\caption{SGD of Funk}
diff --git a/references.bib b/references.bib
index 2b1796ba26a70a3885a1dbe3557330bc77ec9b52..c66f3e4bc777019c86313766a815656940b5372c 100644
--- a/references.bib
+++ b/references.bib
@@ -157,4 +157,22 @@ title = {The BellKor solution to the Netflix Grand Prize}
  publisher = {ACM},
  address = {New York, NY, USA},
  keywords = {Datasets, MovieLens, ratings, recommendations},
-} 
\ No newline at end of file
+}
+@inproceedings{Rendle13,
+author = {Steffen Rendle},
+year = {2013},
+month = {03},
+pages = {337-348},
+title = {Scaling factorization machines to relational data},
+volume = {6},
+journal = {Proceedings of the VLDB Endowment},
+doi = {10.14778/2535573.2488340}
+}
+@article{Paterek07,
+author = {Arkadiusz Paterek},
+year = {2007},
+month = {01},
+pages = {},
+title = {Improving regularized singular value decomposition for collaborative filtering},
+journal = {Proceedings of KDD Cup and Workshop}
+}
\ No newline at end of file
diff --git a/submission.pdf b/submission.pdf
index 420cf58a78e7e6c39ba0575c616806e1ebb50149..2315d4853adef794b13235d3219823368f9e7a1a 100644
Binary files a/submission.pdf and b/submission.pdf differ