Commit 9885bbf7 authored by Alexander Lercher's avatar Alexander Lercher

Extracted visualization from slicing

parent e7061d7f
......@@ -16,19 +16,22 @@ class TimeSlice:
time_slice_dict: Dict = None, from_db = False):
self.time = str(time)
self.layer_name = layer_name
self.clusters: Dict[int, List[Node]] = {}
self.clusters: Dict[str, List[Node]] = {}
if time_slice_dict is not None:
self.from_serializable_dict(time_slice_dict, from_db)
def add_node_to_cluster(self, cluster_label: int, node):
def add_node_to_cluster(self, cluster_label: str, node):
# only string keys can be stored in json
cluster_label = str(cluster_label)
if cluster_label not in self.clusters:
self.clusters[cluster_label] = []
node = self._get_unique_id(node)
self.clusters[cluster_label].append(node)
def get_nodes_for_cluster(self, cluster_label: int):
def get_nodes_for_cluster(self, cluster_label: str):
if cluster_label in self.clusters:
return self.clusters[cluster_label]
else:
......
......@@ -6,96 +6,52 @@ if os.path.exists(modules_path):
import json
from datetime import datetime, date
import matplotlib.pyplot as plt
from db.repository import Repository
from db.entities.timeslice import TimeSlice
from db.entities import ClusterSet
from typing import Tuple, Dict
repo = Repository()
from typing import Tuple, Dict, Any
def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]:
'''Returns the tuple (year, week_of_year) from a timestamp.'''
'''Returns the tuple (year, week_of_year) from a timestamp. This is used as the key for the slicing.'''
timestamp = datetime.fromtimestamp(float(timestamp[0:10]))
(y, w, _) = timestamp.isocalendar()
return (y, w)
def get_clusterset():
clusterset = repo.get_clusterset('Destination_Layer')
# with open('clustering_results/optics/clusterset_Destination_Layer.txt') as file:
# clusterset = ClusterSet(cluster_set_dict=json.loads(file.read()))
return clusterset
def plt_show_circles(keys, time_slices, cluster_no):
for k in keys:
slice_ = time_slices[k]
if cluster_no in slice_.nodes:
nodes = slice_.nodes[cluster_no]
else:
nodes = []
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt.title(str(k))
plt.scatter([n['Longitude_Destination'] for n in nodes],
[n['Latitude_Destination'] for n in nodes],
s=[len(nodes)*100]*len(nodes))
plt.pause(0.5)
def plt_show_bars(keys, time_slices, cluster_no):
x_axis_label_stepsize = 10
nodes_per_slice_for_single_cluster = \
[len(time_slices[k].get_nodes_for_cluster(cluster_no))
for k
in keys]
fig, ax = plt.subplots()
ax.bar(x=range(len(keys)),
height=nodes_per_slice_for_single_cluster)
ax.set_ylabel('Size')
ax.set_title(f'Cluster-{cluster_no} size over time')
ax.set_xticks(range(len(keys))[::x_axis_label_stepsize])
ax.set_xticklabels(keys[::x_axis_label_stepsize])
plt.show()
clusterset = get_clusterset()
cnt = 0
time_slices = {}
# for clusterset in clustersets:
for cluster_no in clusterset.clusters:
for node in cluster_no.nodes:
# assign the nodes to time slices and recreate the clusters there
time_key = convert_to_time_slice_key(str(node['Finished_time']))
def split_clustersets_by_time(clustersets) -> Dict[Any, TimeSlice]:
'''
Partitions all nodes of each clusterset into idividual time slices based on their timestamp. The information about the cluster is kept.
:params clustersets: The clustersets whichs nodes are split
:returns: A dict of time slices where the key is the time info and value is the information about the time slice
'''
cnt = 0
time_slices: Dict[Any, TimeSlice] = {}
for clusterset in clustersets:
for cluster_no in clusterset.clusters:
for node in cluster_no.nodes:
# assign the nodes to time slices and recreate the clusters there
# TODO use start and end time for assignment
time_key = convert_to_time_slice_key(str(node['Finished_time']))
if time_key not in time_slices:
time_slices[time_key] = TimeSlice(time_key, clusterset.layer_name)
time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node)
return time_slices
if time_key not in time_slices:
time_slices[time_key] = TimeSlice(time_key, clusterset.layer_name)
time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node)
# sort chronologically
keys = list(time_slices.keys())
keys.sort()
if __name__ == "__main__":
repo = Repository()
repo.remove_all_time_slices()
for k,v in time_slices.items():
repo.add_time_slice(v)
clustersets = [repo.get_clusterset('Destination_Layer')]
time_slices = split_clustersets_by_time(clustersets)
# sort chronologically
keys = list(time_slices.keys())
keys.sort()
print(len(time_slices))
plt_show_bars(keys, time_slices, cluster_no = 0)
repo.remove_all_time_slices()
for k,v in time_slices.items():
repo.add_time_slice(v)
import sys
import os
for path in ['../', './', '../../../modules/']:
if os.path.exists(path):
sys.path.insert(1, path)
import matplotlib.pyplot as plt
from db.repository import Repository
from db.entities import TimeSlice
from typing import List
def plt_show_circles(time_slices: List[TimeSlice], cluster_no):
cluster_no = str(cluster_no)
for slice_ in time_slices:
nodes = slice_.get_nodes_for_cluster(cluster_no)
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt.title(str(slice_.time))
plt.scatter([n['Longitude_Destination'] if 'Longitude_Destination' in n else 0
for n in nodes],
[n['Latitude_Destination'] if 'Latitude_Destination' in n else 0
for n in nodes],
s=[len(nodes)*100]*len(nodes))
plt.pause(0.5)
def plt_show_bars(time_slices: List[TimeSlice], cluster_no):
cluster_no = str(cluster_no)
labels = [ts.time for ts in time_slices]
x_axis_label_stepsize = 10
nodes_per_slice_for_single_cluster = \
[len(time_slice.get_nodes_for_cluster(cluster_no))
for time_slice
in time_slices]
fig, ax = plt.subplots()
ax.bar(x=range(len(labels)),
height=nodes_per_slice_for_single_cluster)
ax.set_ylabel('Size')
ax.set_title(f'Cluster-{cluster_no} size over time')
ax.set_xticks(range(len(labels))[::x_axis_label_stepsize])
ax.set_xticklabels(labels[::x_axis_label_stepsize])
plt.show()
if __name__ == "__main__":
repo = Repository()
time_slices = repo.get_time_slices_by_name("Destination_Layer")
# chronological order
time_slices.sort(key=lambda ts: eval(ts.time))
print(len(time_slices))
plt_show_bars(time_slices, cluster_no = 0)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment