Commit a04e6d34 authored by Alexander Lercher's avatar Alexander Lercher

Executed time measurements for clustering

parent cb80a8d9
...@@ -5,6 +5,7 @@ if os.path.exists(modules_path): ...@@ -5,6 +5,7 @@ if os.path.exists(modules_path):
sys.path.insert(1, modules_path) sys.path.insert(1, modules_path)
import json import json
import timeit
from db.entities import * from db.entities import *
from typing import List, Dict, Tuple from typing import List, Dict, Tuple
from db.repository import Repository, AgiRepository from db.repository import Repository, AgiRepository
...@@ -28,19 +29,31 @@ def run_generic_clustering(): ...@@ -28,19 +29,31 @@ def run_generic_clustering():
print("skipping") print("skipping")
continue continue
clusters = run_clustering_for_layer(layer)
Ns = [1000, 5000, 10000, 20000, 50000]
for N in Ns:
clusters = run_clustering_for_layer(layer, N)
# cluster_set = ClusterSet(layer.layer_name, clusters) # cluster_set = ClusterSet(layer.layer_name, clusters)
store_generic_clusters(clusters) # store_generic_clusters(clusters)
def run_clustering_for_layer(layer: Layer) -> List[Cluster]: def run_clustering_for_layer(layer: Layer, N) -> List[Cluster]:
nodes = repo.get_layer_nodes(layer.layer_name) NR_EXEC = 5
clusterer = Clusterer() nodes = repo.get_layer_nodes(layer.layer_name)[:N]
res = clusterer.cluster_dataset(
nodes, sum = 0
layer.properties for i in range(NR_EXEC):
) start = timeit.default_timer()
clusterer = Clusterer()
res = clusterer.cluster_dataset(
nodes,
layer.properties
)
end = timeit.default_timer()
sum += end-start
print(f"{layer.layer_name} needed {sum/NR_EXEC} seconds with {min(N, len(nodes))} elements.")
return [Cluster(layer.layer_name, key, value) for key, value in res.items()] return [Cluster(layer.layer_name, key, value) for key, value in res.items()]
...@@ -110,6 +123,7 @@ def store_clusters(type: str, clusters: List): ...@@ -110,6 +123,7 @@ def store_clusters(type: str, clusters: List):
if __name__ == "__main__": if __name__ == "__main__":
# print(len(repo.get_layer_nodes('Reputation_Layer')))
run_generic_clustering() run_generic_clustering()
# TODO cleanup # TODO cleanup
......
import matplotlib.pyplot as plt
# clustering
times = [[1000,0.9823,1.0420,0.9656],
[5000,7.8716,8.8916,8.2609],
[10000,24.7394,29.0521,24.3734],
[20000,86.0519,104.0453,85.4891],
[50000,489.4964,574.7641,468.8706]]
n = [t[0] for t in times]
finished = [t[1] for t in times]
dest = [t[2] for t in times]
price = [t[3] for t in times]
# print(f"{t[0]}: {t[1]} {t[2]} {t[3]}")
fig, ax = plt.subplots()
ax.set_xlabel('Number of Nodes')
ax.set_ylabel('Time in Seconds')
ax.plot(n, dest, label='Destination')
ax.plot(n, finished, label='Finished Time')
ax.plot(n, price, label='Price')
ax.legend()
plt.show()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment