Extracted visualization from slicing

9885bbf7 · Alexander Lercher · e7061d7f · 9885bbf7 · 9885bbf7 · 9885bbf7
Commit 9885bbf7 authored 4 years ago by Alexander Lercher
3 changed files
--- a/src/data-hub/community-detection-microservice/app/db/entities/timeslice.py
+++ b/src/data-hub/community-detection-microservice/app/db/entities/timeslice.py
@@ -16,19 +16,22 @@ class TimeSlice:
                 time_slice_dict: Dict = None, from_db = False):
        self.time = str(time)
        self.layer_name = layer_name
-        self.clusters: Dict[int, List[Node]] = {}
+        self.clusters: Dict[str, List[Node]] = {}

        if time_slice_dict is not None:
            self.from_serializable_dict(time_slice_dict, from_db)

-    def add_node_to_cluster(self, cluster_label: int, node):
+    def add_node_to_cluster(self, cluster_label: str, node):
+        # only string keys can be stored in json
+        cluster_label = str(cluster_label)
+
        if cluster_label not in self.clusters:
            self.clusters[cluster_label] = []

        node = self._get_unique_id(node)
        self.clusters[cluster_label].append(node)

-    def get_nodes_for_cluster(self, cluster_label: int):
+    def get_nodes_for_cluster(self, cluster_label: str):
        if cluster_label in self.clusters:
            return self.clusters[cluster_label]
        else:

--- a/src/data-hub/community-detection-microservice/app/run_time_slicing.py
+++ b/src/data-hub/community-detection-microservice/app/run_time_slicing.py
@@ -6,96 +6,52 @@ if os.path.exists(modules_path):

 import json
 from datetime import datetime, date
-import matplotlib.pyplot as plt
 from db.repository import Repository
 from db.entities.timeslice import TimeSlice
 from db.entities import ClusterSet
-from typing import Tuple, Dict
-
-repo = Repository()
+from typing import Tuple, Dict, Any


 def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]:
-    '''Returns the tuple (year, week_of_year) from a timestamp.'''
+    '''Returns the tuple (year, week_of_year) from a timestamp. This is used as the key for the slicing.'''
    timestamp = datetime.fromtimestamp(float(timestamp[0:10]))
    (y, w, _) = timestamp.isocalendar()
    return (y, w)


-def get_clusterset():
-    clusterset = repo.get_clusterset('Destination_Layer')
-    # with open('clustering_results/optics/clusterset_Destination_Layer.txt') as file:
-    #     clusterset = ClusterSet(cluster_set_dict=json.loads(file.read()))
-    return clusterset
-
-
-def plt_show_circles(keys, time_slices, cluster_no):
-    for k in keys:
-        slice_ = time_slices[k]
-
-        if cluster_no in slice_.nodes:
-            nodes = slice_.nodes[cluster_no] 
-        else:
-            nodes = []    
-            
-        # print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
-
-        plt.title(str(k))
-
-        plt.scatter([n['Longitude_Destination'] for n in nodes],
-                    [n['Latitude_Destination'] for n in nodes],
-                    s=[len(nodes)*100]*len(nodes))
-
-        plt.pause(0.5)
-
-
-def plt_show_bars(keys, time_slices, cluster_no):
-    x_axis_label_stepsize = 10
-
-    nodes_per_slice_for_single_cluster = \
-            [len(time_slices[k].get_nodes_for_cluster(cluster_no))
-            for k 
-            in keys]
+def split_clustersets_by_time(clustersets) -> Dict[Any, TimeSlice]:
+    '''
+    Partitions all nodes of each clusterset into idividual time slices based on their timestamp. The information about the cluster is kept.

-    fig, ax = plt.subplots()
-    ax.bar(x=range(len(keys)),
-        height=nodes_per_slice_for_single_cluster)
-
-    ax.set_ylabel('Size')
-    ax.set_title(f'Cluster-{cluster_no} size over time')
-    ax.set_xticks(range(len(keys))[::x_axis_label_stepsize])
-    ax.set_xticklabels(keys[::x_axis_label_stepsize])
-
-    plt.show()
-
-
-
-clusterset = get_clusterset()
-
-
-cnt = 0
-time_slices = {} 
-# for clusterset in clustersets:
-for cluster_no in clusterset.clusters:
+    :params clustersets: The clustersets whichs nodes are split
+    :returns: A dict of time slices where the key is the time info and value is the information about the time slice
+    '''
+    cnt = 0
+    time_slices: Dict[Any, TimeSlice] = {} 
+    for clusterset in clustersets:
+        for cluster_no in clusterset.clusters:
            for node in cluster_no.nodes:
                # assign the nodes to time slices and recreate the clusters there
+                # TODO use start and end time for assignment
                time_key = convert_to_time_slice_key(str(node['Finished_time']))
                
                if time_key not in time_slices:
                    time_slices[time_key] = TimeSlice(time_key, clusterset.layer_name)

                time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node)
+    return time_slices
        

-# sort chronologically
-keys = list(time_slices.keys())
-keys.sort()
+if __name__ == "__main__":
+    repo = Repository()

+    clustersets = [repo.get_clusterset('Destination_Layer')]
+    time_slices = split_clustersets_by_time(clustersets)

-repo.remove_all_time_slices()
-for k,v in time_slices.items():
-    repo.add_time_slice(v)
+    # sort chronologically
+    keys = list(time_slices.keys())
+    keys.sort()

-
-print(len(time_slices))
-plt_show_bars(keys, time_slices, cluster_no = 0)
+    repo.remove_all_time_slices()
+    for k,v in time_slices.items():
+        repo.add_time_slice(v)
--- a/src/data-hub/community-detection-microservice/app/visualization/visualize_time_slices.py
+++ b/src/data-hub/community-detection-microservice/app/visualization/visualize_time_slices.py
+import sys
+import os
+for path in ['../', './', '../../../modules/']:
+    if os.path.exists(path):
+        sys.path.insert(1, path)
+
+import matplotlib.pyplot as plt
+from db.repository import Repository
+from db.entities import TimeSlice
+from typing import List
+
+
+def plt_show_circles(time_slices: List[TimeSlice], cluster_no):
+    cluster_no = str(cluster_no)
+
+    for slice_ in time_slices:
+
+        nodes = slice_.get_nodes_for_cluster(cluster_no)
+            
+        # print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
+
+        plt.title(str(slice_.time))
+
+        plt.scatter([n['Longitude_Destination'] if 'Longitude_Destination' in n else 0
+                        for n in nodes],
+                    [n['Latitude_Destination'] if 'Latitude_Destination' in n else 0
+                        for n in nodes],
+                    s=[len(nodes)*100]*len(nodes))
+
+        plt.pause(0.5)
+
+
+def plt_show_bars(time_slices: List[TimeSlice], cluster_no):
+    cluster_no = str(cluster_no)
+    
+    labels = [ts.time for ts in time_slices]  
+    x_axis_label_stepsize = 10  
+
+    nodes_per_slice_for_single_cluster = \
+            [len(time_slice.get_nodes_for_cluster(cluster_no))
+            for time_slice
+            in time_slices]
+
+    fig, ax = plt.subplots()
+    ax.bar(x=range(len(labels)),
+        height=nodes_per_slice_for_single_cluster)
+
+    ax.set_ylabel('Size')
+    ax.set_title(f'Cluster-{cluster_no} size over time')
+    ax.set_xticks(range(len(labels))[::x_axis_label_stepsize])
+    ax.set_xticklabels(labels[::x_axis_label_stepsize])
+
+    plt.show()
+
+
+if __name__ == "__main__":
+    repo = Repository()
+    time_slices = repo.get_time_slices_by_name("Destination_Layer")
+
+    # chronological order
+    time_slices.sort(key=lambda ts: eval(ts.time))
+
+    print(len(time_slices))
+    plt_show_bars(time_slices, cluster_no = 0)
\ No newline at end of file