Commit 4b461053 authored by Alexander Lercher's avatar Alexander Lercher

New time slicing considering empty cluster subsets

Empty cluster subsets are still stored as empty list ie. size=0
parent b0e3117f
......@@ -23,12 +23,20 @@ class TimeSlice:
if time_slice_dict is not None:
self.from_serializable_dict(time_slice_dict, from_db)
def init_all_clusters(self, cluster_labels: List[str]):
'''Initializes internal clusters for all labels with an empty list.'''
for cluster_label in cluster_labels:
# only string keys can be stored in json
cluster_label = str(cluster_label)
self.clusters[cluster_label] = []
def add_node_to_cluster(self, cluster_label: str, node):
# only string keys can be stored in json
cluster_label = str(cluster_label)
if cluster_label not in self.clusters:
self.clusters[cluster_label] = []
# self.clusters[cluster_label] = []
raise KeyError(f"self::init_all_clusters must be used to add all global cluster labels beforehand (got {cluster_label})")
# node = self._get_unique_id(node)
self.clusters[cluster_label].append(node)
......
......@@ -38,14 +38,16 @@ def split_clusterset_by_time(layer: Layer, clusters: List[Cluster]) -> Dict[Time
'''
time_slices: Dict[Any, TimeSlice] = {}
for cluster_no in clusters:
if cluster_no.cluster_label == -1:
all_cluster_labels = [cluster.cluster_label for cluster in clusters if cluster.cluster_label != -1]
for cluster in clusters:
if cluster.cluster_label == -1:
print("Noise cluster was ignored.")
continue
for node in cluster_no.nodes:
for node in cluster.nodes:
# retrieve times the node is located in based on the defined time properties in the schema
# retrieve times where the node is located inas (year, week), based on the defined timestamp fields in the schema
time_keys = set()
for time_property in TIME_PROPERTY_NAMES:
if time_property in node:
......@@ -54,8 +56,9 @@ def split_clusterset_by_time(layer: Layer, clusters: List[Cluster]) -> Dict[Time
for time_key in time_keys:
if time_key not in time_slices:
time_slices[time_key] = TimeSlice(time_key, layer.use_case, layer.use_case_table, layer.layer_name)
time_slices[time_key].init_all_clusters(all_cluster_labels)
time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node)
time_slices[time_key].add_node_to_cluster(cluster.cluster_label, node)
return time_slices
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment