Executed time measurements for stage extraction

d032fb36 · Alexander Lercher · a04e6d34 · d032fb36 · d032fb36
Commit d032fb36 authored May 12, 2020 by Alexander Lercher
Show whitespace changes
Inline Side-by-side

Showing with 42 additions and 5 deletions

run_time_slicing.py .../community-detection-microservice/app/run_time_slicing.py +33 -5

vis_execution_time.py ...tion-microservice/app/visualization/vis_execution_time.py +9 -0

No files found.
--- a/src/data-hub/community-detection-microservice/app/run_time_slicing.py
+++ b/src/data-hub/community-detection-microservice/app/run_time_slicing.py
@@ -5,6 +5,7 @@ if os.path.exists(modules_path):
    sys.path.insert(1, modules_path)

 import json
+import timeit
 from datetime import datetime, date
 from db.repository import Repository
 from db.entities.timeslice import TimeSlice
@@ -53,7 +54,7 @@ def split_clusterset_by_time(layer_name: str, clusters: List[Cluster]) -> Dict[T
 if __name__ == "__main__":
    repo = Repository()

-    repo.remove_all_time_slices()
+    # repo.remove_all_time_slices()

    layers = repo.get_layers()
    for layer in layers:
@@ -65,7 +66,34 @@ if __name__ == "__main__":
        if clusters_for_layer is None or len(clusters_for_layer) == 0:
            clusters_for_layer = [Cluster(layer_name, -1, repo.get_layer_nodes(layer_name))]

+        Ns = [1000, 5000, 10000, 20000, 50000]
+        Ns.reverse()
+        NR_EXEC = 5
+        for N in Ns:
+            cur_N = 0
+            cluster_subset = []
+            for cluster in clusters_for_layer:
+                if cur_N + len(cluster.nodes) <= N:
+                    # add new cluster as it will be ok
+                    cluster_subset.append(cluster)
+                    cur_N += len(cluster.nodes)
+                else:
+                    # add subset of that one and stop
+                    cluster.nodes = cluster.nodes[:N-cur_N]
+                    cluster_subset.append(cluster)
+                    cur_N += len(cluster.nodes)
+                    break
+            clusters_for_layer = cluster_subset
+
+            sum = 0
+            for i in range(NR_EXEC):    
+                start = timeit.default_timer()
                time_slices = split_clusterset_by_time(layer_name, clusters_for_layer)
+                end = timeit.default_timer()
+                sum += end-start

-        for k,v in time_slices.items():
-            repo.add_time_slice(v)
+            print(f"{layer.layer_name} needed {sum/NR_EXEC} seconds with {N}, {cur_N} elements.")
+            
+
+        # for k,v in time_slices.items():
+        #     repo.add_time_slice(v)
--- a/src/data-hub/community-detection-microservice/app/visualization/vis_execution_time.py
+++ b/src/data-hub/community-detection-microservice/app/visualization/vis_execution_time.py
@@ -7,6 +7,14 @@ times = [[1000,0.9823,1.0420,0.9656],
 [20000,86.0519,104.0453,85.4891],
 [50000,489.4964,574.7641,468.8706]]

+# slicing
+times2 = [[1000, 0.010159840000000031,   0.008385740000001363,   0.008584839999997484],
+[5000,  0.044350359999999256,   0.04146890000000099,    0.04291390000000206],
+[10000, 0.07776566000000074,    0.07954154000000102,    0.07955803999999489],
+[20000, 0.15964476000000047,    0.16679267999999894,    0.15759418000000097],
+[50000, 0.4081138799999998,     0.4278634399999987,     0.41363941999999554]]
+
+
 n = [t[0] for t in times]
 finished = [t[1] for t in times]
 dest = [t[2] for t in times]
@@ -15,6 +23,7 @@ price = [t[3] for t in times]


 fig, ax = plt.subplots()
+ax.set_title('Execution Time for Clustering')
 ax.set_xlabel('Number of Nodes')
 ax.set_ylabel('Time in Seconds')
 ax.plot(n, dest, label='Destination')