Commit d032fb36 authored by Alexander Lercher's avatar Alexander Lercher

Executed time measurements for stage extraction

parent a04e6d34
...@@ -5,6 +5,7 @@ if os.path.exists(modules_path): ...@@ -5,6 +5,7 @@ if os.path.exists(modules_path):
sys.path.insert(1, modules_path) sys.path.insert(1, modules_path)
import json import json
import timeit
from datetime import datetime, date from datetime import datetime, date
from db.repository import Repository from db.repository import Repository
from db.entities.timeslice import TimeSlice from db.entities.timeslice import TimeSlice
...@@ -53,7 +54,7 @@ def split_clusterset_by_time(layer_name: str, clusters: List[Cluster]) -> Dict[T ...@@ -53,7 +54,7 @@ def split_clusterset_by_time(layer_name: str, clusters: List[Cluster]) -> Dict[T
if __name__ == "__main__": if __name__ == "__main__":
repo = Repository() repo = Repository()
repo.remove_all_time_slices() # repo.remove_all_time_slices()
layers = repo.get_layers() layers = repo.get_layers()
for layer in layers: for layer in layers:
...@@ -65,7 +66,34 @@ if __name__ == "__main__": ...@@ -65,7 +66,34 @@ if __name__ == "__main__":
if clusters_for_layer is None or len(clusters_for_layer) == 0: if clusters_for_layer is None or len(clusters_for_layer) == 0:
clusters_for_layer = [Cluster(layer_name, -1, repo.get_layer_nodes(layer_name))] clusters_for_layer = [Cluster(layer_name, -1, repo.get_layer_nodes(layer_name))]
time_slices = split_clusterset_by_time(layer_name, clusters_for_layer) Ns = [1000, 5000, 10000, 20000, 50000]
Ns.reverse()
for k,v in time_slices.items(): NR_EXEC = 5
repo.add_time_slice(v) for N in Ns:
cur_N = 0
cluster_subset = []
for cluster in clusters_for_layer:
if cur_N + len(cluster.nodes) <= N:
# add new cluster as it will be ok
cluster_subset.append(cluster)
cur_N += len(cluster.nodes)
else:
# add subset of that one and stop
cluster.nodes = cluster.nodes[:N-cur_N]
cluster_subset.append(cluster)
cur_N += len(cluster.nodes)
break
clusters_for_layer = cluster_subset
sum = 0
for i in range(NR_EXEC):
start = timeit.default_timer()
time_slices = split_clusterset_by_time(layer_name, clusters_for_layer)
end = timeit.default_timer()
sum += end-start
print(f"{layer.layer_name} needed {sum/NR_EXEC} seconds with {N}, {cur_N} elements.")
# for k,v in time_slices.items():
# repo.add_time_slice(v)
...@@ -7,6 +7,14 @@ times = [[1000,0.9823,1.0420,0.9656], ...@@ -7,6 +7,14 @@ times = [[1000,0.9823,1.0420,0.9656],
[20000,86.0519,104.0453,85.4891], [20000,86.0519,104.0453,85.4891],
[50000,489.4964,574.7641,468.8706]] [50000,489.4964,574.7641,468.8706]]
# slicing
times2 = [[1000, 0.010159840000000031, 0.008385740000001363, 0.008584839999997484],
[5000, 0.044350359999999256, 0.04146890000000099, 0.04291390000000206],
[10000, 0.07776566000000074, 0.07954154000000102, 0.07955803999999489],
[20000, 0.15964476000000047, 0.16679267999999894, 0.15759418000000097],
[50000, 0.4081138799999998, 0.4278634399999987, 0.41363941999999554]]
n = [t[0] for t in times] n = [t[0] for t in times]
finished = [t[1] for t in times] finished = [t[1] for t in times]
dest = [t[2] for t in times] dest = [t[2] for t in times]
...@@ -15,6 +23,7 @@ price = [t[3] for t in times] ...@@ -15,6 +23,7 @@ price = [t[3] for t in times]
fig, ax = plt.subplots() fig, ax = plt.subplots()
ax.set_title('Execution Time for Clustering')
ax.set_xlabel('Number of Nodes') ax.set_xlabel('Number of Nodes')
ax.set_ylabel('Time in Seconds') ax.set_ylabel('Time in Seconds')
ax.plot(n, dest, label='Destination') ax.plot(n, dest, label='Destination')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment