Commit b6e044be authored by Alexander Lercher's avatar Alexander Lercher

Visualized clustering results based on small dataset

parent d032fb36
# clustering of generated nodes
import sys
import os
modules_path = './'
if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
import matplotlib.pyplot as plt
import sklearn.datasets
import numpy as np
from processing.clustering.clusterer import Clusterer
# parameters for data generation
N_SAMPLES = 1000
N_FEATURES = 2
N_CENTERS = 3
STD_DEVIATION = 1.0
def show_generated_data(ax, nodes, labels):
distinct_colors = plt.cm.rainbow(np.linspace(0, 1, N_CENTERS))
colors = [distinct_colors[label] for label in labels]
ax.set_title('Generated Dataset')
ax.set_xlabel('Feature 1')
ax.set_ylabel('Feature 2')
ax.scatter(nodes[:,0], nodes[:,1], c=colors)
def show_clustering_result(ax, min_pts, clusters: dict):
labels = clusters.keys()
# flatten values in dict
nodes = [node for subset in clusters.values() for node in subset]
if -1 in labels:
# clustering contains noise, add them in black
distinct_colors = plt.cm.rainbow(np.linspace(0, 1, len(set(labels))-1))
distinct_colors = np.append(distinct_colors, [[0,0,0,1]], axis=0)
else:
distinct_colors = plt.cm.rainbow(np.linspace(0, 1, len(set(labels))))
colors = [distinct_colors[node['cluster_label']] for node in nodes]
ax.set_title(f'Clustering Result with MinPts={min_pts}')
ax.set_xlabel('Feature 1')
ax.set_ylabel('Feature 2')
ax.scatter( [n['1'] for n in nodes],
[n['2'] for n in nodes],
c=colors)
def run_clustering(min_points, dataset):
clusterer = Clusterer(min_points=min_points)
return clusterer.cluster_dataset(
dataset=dataset,
features=['1','2']
)
if __name__ == '__main__':
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
fig.tight_layout(pad=3.0)
nodes, labels = sklearn.datasets.make_blobs(n_samples=N_SAMPLES, n_features=N_FEATURES, centers=N_CENTERS, cluster_std=STD_DEVIATION)
# nodes = np.multiply(nodes, .1)
show_generated_data(ax1, nodes, labels)
dataset = [{'1':n[0], '2':n[1]} for n in nodes]
clusters = run_clustering(5, dataset)
show_clustering_result(ax2, 5, clusters)
dataset = [{'1':n[0], '2':n[1]} for n in nodes]
clusters = run_clustering(10, dataset)
show_clustering_result(ax3, 10, clusters)
dataset = [{'1':n[0], '2':n[1]} for n in nodes]
clusters = run_clustering(15, dataset)
show_clustering_result(ax4, 15, clusters)
plt.show()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment