diff --git a/A1/README.md b/A1/README.md new file mode 100644 index 0000000000000000000000000000000000000000..805fbf00d4b84b7c344c042e6c60537bf88ce305 --- /dev/null +++ b/A1/README.md @@ -0,0 +1,19 @@ +# A1 + + +## a) + + +## b) +Es wurden die initialen Centeroiden P7, P8, P9 gewählt. +Zu sehen ist, dass ein anderes Cluster entsteht. +Die Cluster aus A1a) machen jedoch mehr Sinn, da dort tatsächlich drei Cluster zu sehen sind. +Bei dem unten stehenden Bild sind die zwei Unterend Punkte-Wolden disjunkt aber dennoch in einem Cluster. + + +## c) +Ja es kommt auf die Reihenfolge an. +Spiegelt man die Reihenfolge der Datan P1, ..., P10 zu P10, ..., P1, und lässt man die Center P3, P4 und P8 gleich, so sieht man, dass die +Cluster-Label anders sind. +Die Reihenfolge in der die Daten mit den Centroiden verglichen werden, wird durch das Spiegeln der Daten auch gespiegelt. + \ No newline at end of file diff --git a/A1/images/a.jpg b/A1/images/a.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fcf1fa4bdf212d5db50eb96d3b03963f4f3f6a42 Binary files /dev/null and b/A1/images/a.jpg differ diff --git a/A1/images/b.jpg b/A1/images/b.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8518de009ad796166a6e7b560d686f194cd70f4d Binary files /dev/null and b/A1/images/b.jpg differ diff --git a/A1/images/c.jpg b/A1/images/c.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2e40170f516157168702f48e4d15580ee4f1b34d Binary files /dev/null and b/A1/images/c.jpg differ diff --git a/A1/kmeans.py b/A1/kmeans.py new file mode 100644 index 0000000000000000000000000000000000000000..c4d0f98bd9ede9ccbb34fe74b8426a89974020a1 --- /dev/null +++ b/A1/kmeans.py @@ -0,0 +1,78 @@ +from typing import List + +import matplotlib.pyplot as plt +import numpy as np + + +def euclidean(vector1: List, vector2: List) -> float: + """ + This method calculates the euclidean distance. + + :param vector1: Vector as list + :param vector2: Vector as list + :return: Euclidean distance between vector1 and vector2. + """ + return np.linalg.norm(np.subtract(vector1, vector2)) + + +def pairwise_arg_min(X: List, Y: List) -> np.ndarray: + """ + This method returns a list of all pairwise distances from X to Y. + + :param X: Vector with features + :param Y: Centroids + :return: List of all pairwise distances from X to Y. + """ + return np.asarray([np.argmin([euclidean(x, y) for y in Y]) for x in X]) + + +def find_clusters_with_fix_init_centers(X, n_clusters, centers): + """ + This method finds all clusters. + + :param centers: pre defined centers + :param X: Data to be clustered + :param n_clusters: amount of clusters + :return: All labels for clustering and the centroids. + """ + while True: + labels = pairwise_arg_min(X, centers) + new_centers = np.array([X[labels == i].mean(0) for i in range(n_clusters)]) + + if np.all(centers == new_centers): + break + centers = new_centers + + return centers, labels + + +if __name__ == '__main__': + X = np.array([[1, 1], + [1, 4], + [2, 2], + [10, 3], + [11, 2], + [11, 4], + [4, 12], + [6, 11], + [7, 10], + [8, 10]]) + + centers, labels = find_clusters_with_fix_init_centers(X, 3, [X[2], X[3], X[7]]) + plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis') + plt.scatter(centers[:, 0], centers[:, 1], c='Red', marker='X') + plt.savefig('./images/a.jpg', dpi=300, bbox_inches='tight') + plt.close() + + centers, labels = find_clusters_with_fix_init_centers(X, 3, [X[6], X[7], X[8]]) + plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis') + plt.scatter(centers[:, 0], centers[:, 1], c='Red', marker='X') + plt.savefig('./images/b.jpg', dpi=300, bbox_inches='tight') + plt.close() + + X = X[::-1] + centers, labels = find_clusters_with_fix_init_centers(X, 3, [X[2], X[3], X[7]]) + plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis') + plt.scatter(centers[:, 0], centers[:, 1], c='Red', marker='X') + plt.savefig('./images/c.jpg', dpi=300, bbox_inches='tight') + plt.close() diff --git a/A2/README.md b/A2/README.md deleted file mode 100644 index 8e62dcfa72811b1e3c2a533aa1a4dbfdda42c94a..0000000000000000000000000000000000000000 --- a/A2/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# Histograms - -| Histogram -:---------------------------------------------:| - | - | - | - | - | - | - -Es lässt sich erkenne, dass beide Histograme ähnlich arbeiten. -Mit der Partitionierung ist es möglich zu erkennen in welchen Intervall sich Farben häufen. -Mit dem einfachen Histogram ist es nur möglich das gesamte Sprektrum zu begutachten. - -Durch die Partitionierung können so Intervalle festgelegt werden in denen sich besonders viele Farben/Feature häufen. \ No newline at end of file diff --git a/A2/test.py b/A2/test.py deleted file mode 100644 index d39b4cae449f69ae5151e8e72d8eda0a01c92250..0000000000000000000000000000000000000000 --- a/A2/test.py +++ /dev/null @@ -1,52 +0,0 @@ -import numpy as np -import cv2 as cv -from matplotlib import pyplot as plt - -img = cv.imread('./images/martian/martian.jpg', 0) -# Initiate ORB detector -orb = cv.ORB_create() -# find the keypoints with ORB -kp = orb.detect(img, None) -# compute the descriptors with ORB -kp, des = orb.compute(img, kp) -key_points = [k.pt for k in kp] - -# draw only keypoints location,not size and orientation -img2 = cv.drawKeypoints(img, kp, None, color=(0, 255, 0), flags=0) -# plt.scatter(*zip(*key_points)) -plt.imshow(img2) -# plt.show() - -from sklearn.metrics import pairwise_distances_argmin - - -def find_clusters(X, n_clusters, rseed=2): - # 1. Randomly choose clusters - rng = np.random.RandomState(rseed) - i = rng.permutation(X.shape[0])[:n_clusters] - centers = X[i] - - while True: - # 2a. Assign labels based on closest center - labels = pairwise_distances_argmin(X, centers) - - # 2b. Find new centers from means of points - new_centers = np.array([X[labels == i].mean(0) - for i in range(n_clusters)]) - - # 2c. Check for convergence - if np.all(centers == new_centers): - break - centers = new_centers - - return centers, labels - - -X = np.array([list(x) for x in key_points]) - -centers, labels = find_clusters(X, 3) -plt.scatter(X[:, 0], X[:, 1], marker='o', c=labels, - s=50, cmap='viridis') - -plt.scatter(centers[:, 0], centers[:, 1], marker='+', color='red') -plt.show()