Commit 9885bbf7 authored by Alexander Lercher's avatar Alexander Lercher

Extracted visualization from slicing

parent e7061d7f
...@@ -16,19 +16,22 @@ class TimeSlice: ...@@ -16,19 +16,22 @@ class TimeSlice:
time_slice_dict: Dict = None, from_db = False): time_slice_dict: Dict = None, from_db = False):
self.time = str(time) self.time = str(time)
self.layer_name = layer_name self.layer_name = layer_name
self.clusters: Dict[int, List[Node]] = {} self.clusters: Dict[str, List[Node]] = {}
if time_slice_dict is not None: if time_slice_dict is not None:
self.from_serializable_dict(time_slice_dict, from_db) self.from_serializable_dict(time_slice_dict, from_db)
def add_node_to_cluster(self, cluster_label: int, node): def add_node_to_cluster(self, cluster_label: str, node):
# only string keys can be stored in json
cluster_label = str(cluster_label)
if cluster_label not in self.clusters: if cluster_label not in self.clusters:
self.clusters[cluster_label] = [] self.clusters[cluster_label] = []
node = self._get_unique_id(node) node = self._get_unique_id(node)
self.clusters[cluster_label].append(node) self.clusters[cluster_label].append(node)
def get_nodes_for_cluster(self, cluster_label: int): def get_nodes_for_cluster(self, cluster_label: str):
if cluster_label in self.clusters: if cluster_label in self.clusters:
return self.clusters[cluster_label] return self.clusters[cluster_label]
else: else:
......
...@@ -6,96 +6,52 @@ if os.path.exists(modules_path): ...@@ -6,96 +6,52 @@ if os.path.exists(modules_path):
import json import json
from datetime import datetime, date from datetime import datetime, date
import matplotlib.pyplot as plt
from db.repository import Repository from db.repository import Repository
from db.entities.timeslice import TimeSlice from db.entities.timeslice import TimeSlice
from db.entities import ClusterSet from db.entities import ClusterSet
from typing import Tuple, Dict from typing import Tuple, Dict, Any
repo = Repository()
def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]: def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]:
'''Returns the tuple (year, week_of_year) from a timestamp.''' '''Returns the tuple (year, week_of_year) from a timestamp. This is used as the key for the slicing.'''
timestamp = datetime.fromtimestamp(float(timestamp[0:10])) timestamp = datetime.fromtimestamp(float(timestamp[0:10]))
(y, w, _) = timestamp.isocalendar() (y, w, _) = timestamp.isocalendar()
return (y, w) return (y, w)
def get_clusterset(): def split_clustersets_by_time(clustersets) -> Dict[Any, TimeSlice]:
clusterset = repo.get_clusterset('Destination_Layer') '''
# with open('clustering_results/optics/clusterset_Destination_Layer.txt') as file: Partitions all nodes of each clusterset into idividual time slices based on their timestamp. The information about the cluster is kept.
# clusterset = ClusterSet(cluster_set_dict=json.loads(file.read()))
return clusterset :params clustersets: The clustersets whichs nodes are split
:returns: A dict of time slices where the key is the time info and value is the information about the time slice
'''
def plt_show_circles(keys, time_slices, cluster_no): cnt = 0
for k in keys: time_slices: Dict[Any, TimeSlice] = {}
slice_ = time_slices[k] for clusterset in clustersets:
for cluster_no in clusterset.clusters:
if cluster_no in slice_.nodes: for node in cluster_no.nodes:
nodes = slice_.nodes[cluster_no] # assign the nodes to time slices and recreate the clusters there
else: # TODO use start and end time for assignment
nodes = [] time_key = convert_to_time_slice_key(str(node['Finished_time']))
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}") if time_key not in time_slices:
time_slices[time_key] = TimeSlice(time_key, clusterset.layer_name)
plt.title(str(k))
time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node)
plt.scatter([n['Longitude_Destination'] for n in nodes], return time_slices
[n['Latitude_Destination'] for n in nodes],
s=[len(nodes)*100]*len(nodes))
plt.pause(0.5)
def plt_show_bars(keys, time_slices, cluster_no):
x_axis_label_stepsize = 10
nodes_per_slice_for_single_cluster = \
[len(time_slices[k].get_nodes_for_cluster(cluster_no))
for k
in keys]
fig, ax = plt.subplots()
ax.bar(x=range(len(keys)),
height=nodes_per_slice_for_single_cluster)
ax.set_ylabel('Size')
ax.set_title(f'Cluster-{cluster_no} size over time')
ax.set_xticks(range(len(keys))[::x_axis_label_stepsize])
ax.set_xticklabels(keys[::x_axis_label_stepsize])
plt.show()
clusterset = get_clusterset()
cnt = 0
time_slices = {}
# for clusterset in clustersets:
for cluster_no in clusterset.clusters:
for node in cluster_no.nodes:
# assign the nodes to time slices and recreate the clusters there
time_key = convert_to_time_slice_key(str(node['Finished_time']))
if time_key not in time_slices:
time_slices[time_key] = TimeSlice(time_key, clusterset.layer_name)
time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node)
# sort chronologically
keys = list(time_slices.keys())
keys.sort()
if __name__ == "__main__":
repo = Repository()
repo.remove_all_time_slices() clustersets = [repo.get_clusterset('Destination_Layer')]
for k,v in time_slices.items(): time_slices = split_clustersets_by_time(clustersets)
repo.add_time_slice(v)
# sort chronologically
keys = list(time_slices.keys())
keys.sort()
print(len(time_slices)) repo.remove_all_time_slices()
plt_show_bars(keys, time_slices, cluster_no = 0) for k,v in time_slices.items():
repo.add_time_slice(v)
import sys
import os
for path in ['../', './', '../../../modules/']:
if os.path.exists(path):
sys.path.insert(1, path)
import matplotlib.pyplot as plt
from db.repository import Repository
from db.entities import TimeSlice
from typing import List
def plt_show_circles(time_slices: List[TimeSlice], cluster_no):
cluster_no = str(cluster_no)
for slice_ in time_slices:
nodes = slice_.get_nodes_for_cluster(cluster_no)
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt.title(str(slice_.time))
plt.scatter([n['Longitude_Destination'] if 'Longitude_Destination' in n else 0
for n in nodes],
[n['Latitude_Destination'] if 'Latitude_Destination' in n else 0
for n in nodes],
s=[len(nodes)*100]*len(nodes))
plt.pause(0.5)
def plt_show_bars(time_slices: List[TimeSlice], cluster_no):
cluster_no = str(cluster_no)
labels = [ts.time for ts in time_slices]
x_axis_label_stepsize = 10
nodes_per_slice_for_single_cluster = \
[len(time_slice.get_nodes_for_cluster(cluster_no))
for time_slice
in time_slices]
fig, ax = plt.subplots()
ax.bar(x=range(len(labels)),
height=nodes_per_slice_for_single_cluster)
ax.set_ylabel('Size')
ax.set_title(f'Cluster-{cluster_no} size over time')
ax.set_xticks(range(len(labels))[::x_axis_label_stepsize])
ax.set_xticklabels(labels[::x_axis_label_stepsize])
plt.show()
if __name__ == "__main__":
repo = Repository()
time_slices = repo.get_time_slices_by_name("Destination_Layer")
# chronological order
time_slices.sort(key=lambda ts: eval(ts.time))
print(len(time_slices))
plt_show_bars(time_slices, cluster_no = 0)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment