Slicing for all clusters using start- and end-timestamp

c66bd0dd · Alexander Lercher · 9885bbf7 · c66bd0dd
Commit c66bd0dd authored Apr 22, 2020 by Alexander Lercher
Hide whitespace changes
Inline Side-by-side

Showing with 26 additions and 22 deletions

run_time_slicing.py .../community-detection-microservice/app/run_time_slicing.py +26 -22

No files found.
--- a/src/data-hub/community-detection-microservice/app/run_time_slicing.py
+++ b/src/data-hub/community-detection-microservice/app/run_time_slicing.py
@@ -11,47 +11,51 @@ from db.entities.timeslice import TimeSlice
 from db.entities import ClusterSet
 from typing import Tuple, Dict, Any
+TimeSliceKey = Tuple[int, int]
-def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]:
+def convert_to_time_slice_key(timestamp: str) -> TimeSliceKey:
    '''Returns the tuple (year, week_of_year) from a timestamp. This is used as the key for the slicing.'''
-    timestamp = datetime.fromtimestamp(float(timestamp[0:10]))
+    time = datetime.utcfromtimestamp(float(timestamp[0:10]))
-    (y, w, _) = timestamp.isocalendar()
+    (y, w, _) = time.isocalendar()
    return (y, w)
-def split_clustersets_by_time(clustersets) -> Dict[Any, TimeSlice]:
+def split_clusterset_by_time(clustersets) -> Dict[TimeSliceKey, TimeSlice]:
    '''
-    Partitions all nodes of each clusterset into idividual time slices based on their timestamp. The information about the cluster is kept.
+    Distributes all nodes of a single clusterset into individual time slices based on their timestamps. 
+    If a node spans over multiple slices it will be added to all of them.
+    Information about clusters and the nodes in the clusters will not be changed.
-    :params clustersets: The clustersets whichs nodes are split
+    :params clustersets: The clusterset whichs nodes are split
    :returns: A dict of time slices where the key is the time info and value is the information about the time slice
    '''
-    cnt = 0
    time_slices: Dict[Any, TimeSlice] = {} 
-    for clusterset in clustersets:
+    for cluster_no in clusterset.clusters:
-        for cluster_no in clusterset.clusters:
+        for node in cluster_no.nodes:
-            for node in cluster_no.nodes:
-                # assign the nodes to time slices and recreate the clusters there
+            time_keys = {
-                # TODO use start and end time for assignment
+                convert_to_time_slice_key(str(node['Finished_time'])), 
-                time_key = convert_to_time_slice_key(str(node['Finished_time']))
+                convert_to_time_slice_key(str(node['Starting_time']))
+            }
+            for time_key in time_keys:
                if time_key not in time_slices:
                    time_slices[time_key] = TimeSlice(time_key, clusterset.layer_name)
                time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node)
    return time_slices
 if __name__ == "__main__":
    repo = Repository()
-    clustersets = [repo.get_clusterset('Destination_Layer')]
+    repo.remove_all_time_slices()
-    time_slices = split_clustersets_by_time(clustersets)
-    # sort chronologically
+    clustersets = repo.get_clustersets()
-    keys = list(time_slices.keys())
+    for clusterset in clustersets:
-    keys.sort()
+        time_slices = split_clusterset_by_time(clusterset)
-    repo.remove_all_time_slices()
+        for k,v in time_slices.items():
-    for k,v in time_slices.items():
+            repo.add_time_slice(v)
-        repo.add_time_slice(v)