Commit bd4aa55b authored by Alexander Lercher's avatar Alexander Lercher

Merge branch 'feature/network-stages' into develop

parents 0e20ca32 c66bd0dd
......@@ -228,14 +228,14 @@ paths:
items:
type: string
/clustersets/{name}:
/clustersets/{layername}:
get:
operationId: "routes.clustersets.get_by_name"
tags:
- "Clusters"
summary: "Get clusterset for layer-name"
parameters:
- name: "name"
- name: "layername"
in: "path"
description: "Name of the layer to return the clusterset for"
required: true
......@@ -262,6 +262,41 @@ paths:
schema:
$ref: "#/definitions/UserClusterGraphCollection"
# Time slices
/timeslices:
get:
operationId: "routes.timeslices.get"
tags:
- "Time Slices"
summary: "Get all time slices based on individual layers containing clusters with nodes for that time"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/TimeSliceCollection"
/timeslices/{layername}:
get:
operationId: "routes.timeslices.get_by_name"
tags:
- "Time Slices"
summary: "Get all time slices for one layer"
parameters:
- name: "layername"
in: "path"
description: "Name of the layer to return the time slices for"
required: true
type: "string"
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/TimeSliceCollection"
404:
description: "No time slices found for layername"
# Function Calls
/rfc/run:
post:
......@@ -422,4 +457,31 @@ definitions:
ClusterSetCollection:
type: array
items:
$ref: "#/definitions/ClusterSet"
\ No newline at end of file
$ref: "#/definitions/ClusterSet"
TimeSlice:
type: object
properties:
time:
type: object
example: "(2020, 52)"
layer_name:
type: string
clusters:
type: object
additionalProperties:
type: array
items:
type: object
properties:
UniqueID:
type: string
example:
"0":
- UniqueID: abc
- UniqueID: def
TimeSliceCollection:
type: array
items:
$ref: "#/definitions/TimeSlice"
\ No newline at end of file
......@@ -4,3 +4,4 @@ from db.entities.cluster import Cluster, LocationCluster, TimeCluster
from db.entities.clusterset import ClusterSet
from db.entities.user_cluster_graph import UserClusterGraph
from db.entities.layer import Layer
from db.entities.timeslice import TimeSlice
\ No newline at end of file
import json
from typing import List, Dict, TypeVar, Any
from typing import List, Dict, NewType, Any
from datetime import date, datetime
Node = TypeVar('Node')
Node = NewType('Node', dict)
class TimeSlice:
def __init__(self, time, nodes = None,
cluster_set_dict: Dict = None, from_db = False):
self.time = time
self.nodes: Dict[int, List[Node]] = {}
# if cluster_set_dict is not None:
# self.from_serializable_dict(cluster_set_dict, from_db)
def add_node_to_cluster(self, cluster_label, node):
if cluster_label not in self.nodes:
self.nodes[cluster_label] = []
self.nodes[cluster_label].append(node)
# todo
# def to_serializable_dict(self, for_db=False) -> Dict:
# serialized_dict_clusters = [cluster.to_serializable_dict(for_db)
# for cluster in self.clusters]
# return {
# "layer_name": self.layer_name,
# "clusters": json.dumps(serialized_dict_clusters) if for_db else serialized_dict_clusters
# }
# def from_serializable_dict(self, cluster_set_dict: Dict, from_db=False):
# self.layer_name = cluster_set_dict["layer_name"]
# serialized_dict_clusters = json.loads(cluster_set_dict["clusters"]) \
# if from_db else cluster_set_dict["clusters"]
# self.clusters = [Cluster(cluster_dict=cluster_dict, from_db=from_db)
# for cluster_dict in serialized_dict_clusters]
'''
A time slice for a single layer containing all nodes for that time.
:param time: The tag indicating the time
:param layer_name: The name of the layer the nodes belong to
'''
def __init__(self, time: Any, layer_name: str,
time_slice_dict: Dict = None, from_db = False):
self.time = str(time)
self.layer_name = layer_name
self.clusters: Dict[str, List[Node]] = {}
if time_slice_dict is not None:
self.from_serializable_dict(time_slice_dict, from_db)
def add_node_to_cluster(self, cluster_label: str, node):
# only string keys can be stored in json
cluster_label = str(cluster_label)
if cluster_label not in self.clusters:
self.clusters[cluster_label] = []
node = self._get_unique_id(node)
self.clusters[cluster_label].append(node)
def get_nodes_for_cluster(self, cluster_label: str):
if cluster_label in self.clusters:
return self.clusters[cluster_label]
else:
return []
def _get_unique_id(self, node : Dict) -> Dict:
'''Returns a new dict with the unique id only.'''
uid_key = 'UniqueID'
if uid_key in node:
return {uid_key: node[uid_key]}
def to_serializable_dict(self, for_db=False) -> Dict:
return {
"time": self.time,
'layer_name': self.layer_name,
"clusters": json.dumps(self.clusters) if for_db else self.clusters
}
def from_serializable_dict(self, dict: Dict, from_db=False):
self.time = dict["time"]
self.layer_name = dict['layer_name']
self.clusters = json.loads(dict['clusters']) if from_db else dict['clusters']
def __repr__(self):
return self.__str__()
# return {'time': self.time, "#nodes": len(self.nodes)}
# json.dumps(self.to_serializable_dict())
return json.dumps(self.to_serializable_dict())
def __str__(self):
return f"TimeSlice({self.time}, {[len(v) for k, v in self.nodes.items()]})"
return f"TimeSlice({self.__repr__()})"
......@@ -23,6 +23,7 @@ class Repository(MongoRepositoryBase):
self._user_cluster_graph_collection = 'user_cluster_graph'
self._layer_collection = 'layer'
self._clusterset_collection = 'cluster_set'
self._time_slice_collection = 'time_slice'
self.agi_repo = AgiRepository()
......@@ -113,3 +114,21 @@ class Repository(MongoRepositoryBase):
else:
return None
#endregion
#region TimeSlice
def add_time_slice(self, timeslice: TimeSlice):
super().insert_entry(self._time_slice_collection, timeslice.to_serializable_dict(for_db=True))
def get_time_slices(self) -> List[TimeSlice]:
'''Returns all time slices.'''
entries = super().get_entries(self._time_slice_collection)
return [TimeSlice(None, None, time_slice_dict=e, from_db=True) for e in entries]
def get_time_slices_by_name(self, layer_name) -> List[TimeSlice]:
'''Returns all time slices with the given layer_name.'''
entries = super().get_entries(self._time_slice_collection, selection={'layer_name': layer_name})
return [TimeSlice(None, None, time_slice_dict=e, from_db=True) for e in entries]
def remove_all_time_slices(self):
super().drop_collection(self._time_slice_collection)
#endregion
\ No newline at end of file
......@@ -10,8 +10,8 @@ def get():
def get_names():
return repo.get_clusterset_names()
def get_by_name(name):
res = repo.get_clusterset(name)
def get_by_name(layername):
res = repo.get_clusterset(layername)
if res is not None:
return res.to_serializable_dict()
else:
......
from flask import request, Response
from db.repository import Repository
from db.entities import TimeSlice
repo = Repository()
def get():
return [e.to_serializable_dict() for e in repo.get_time_slices()]
def get_by_name(layername):
res = repo.get_time_slices_by_name(layername)
print(len(res))
if res is not None and len(res) != 0:
return [e.to_serializable_dict() for e in res]
else:
return Response(status=404)
......@@ -6,132 +6,56 @@ if os.path.exists(modules_path):
import json
from datetime import datetime, date
import matplotlib.pyplot as plt
from db.repository import Repository
from db.entities.timeslice import TimeSlice
from db.entities import ClusterSet
from typing import Tuple
from typing import Tuple, Dict, Any
# repo = Repository()
TimeSliceKey = Tuple[int, int]
def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]:
'''Returns the tuple (year, week_of_year) from a timestamp.'''
timestamp = datetime.fromtimestamp(float(timestamp[0:10]))
(y, w, _) = timestamp.isocalendar()
def convert_to_time_slice_key(timestamp: str) -> TimeSliceKey:
'''Returns the tuple (year, week_of_year) from a timestamp. This is used as the key for the slicing.'''
time = datetime.utcfromtimestamp(float(timestamp[0:10]))
(y, w, _) = time.isocalendar()
return (y, w)
def get_clusterset():
# clusterset = repo.get_clusterset('Destination_Layer')
with open('clustering_results/optics/clusterset_Destination_Layer.txt') as file:
clusterset = ClusterSet(cluster_set_dict=json.loads(file.read()))
return clusterset
clusterset = ClusterSet(cluster_set_dict={
"clusters": [{
"cluster_label": 0,
"nodes": [{
"Finished_time": 1579143634812589,
"Latitude_Destination": -5.95081,
"Longitude_Destination": 37.415281,
"TravelID": "5e57ec9159bc0668543f1568",
"TravelPrice": 19,
"UniqueID": "2696718d7a33ab3dbf28e9c88411afcfe9a933a45e57ec9159bc0668543f1568",
"UserID": "2696718d7a33ab3dbf28e9c88411afcfe9a933a4",
"cluster_label": 0
}, {
"Finished_time": 1582709512112368,
"Latitude_Destination": -5.95081,
"Longitude_Destination": 37.415281,
"TravelID": "5e57ec9159bc0668543f15cf",
"TravelPrice": 16,
"UniqueID": "98dcb2717ddae152d5b359c6ea97e4fe34a29d4c5e57ec9159bc0668543f15cf",
"UserID": "98dcb2717ddae152d5b359c6ea97e4fe34a29d4c",
"cluster_label": 0
}, {
"Finished_time": 1582709512112367,
"Latitude_Destination": -5.95081,
"Longitude_Destination": 37.415281,
"TravelID": "5e57ec9159bc0668543f15cf",
"TravelPrice": 16,
"UniqueID": "98dcb2717ddae152d5b359c6ea97e4fe34a29d4c5e57ec9159bc0668543f15cd",
"UserID": "98dcb2717ddae152d5b359c6ea97e4fe34a29d4c",
"cluster_label": 0
}]
}],
"layer_name": "Destination_Layer"
})
return clusterset
def plt_show_circles(keys, time_slices, cluster_no):
for k in keys:
slice_ = time_slices[k]
if cluster_no in slice_.nodes:
nodes = slice_.nodes[cluster_no]
else:
nodes = []
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt.title(str(k))
plt.scatter([n['Longitude_Destination'] for n in nodes],
[n['Latitude_Destination'] for n in nodes],
s=[len(nodes)*100]*len(nodes))
plt.pause(0.5)
def split_clusterset_by_time(clustersets) -> Dict[TimeSliceKey, TimeSlice]:
'''
Distributes all nodes of a single clusterset into individual time slices based on their timestamps.
If a node spans over multiple slices it will be added to all of them.
Information about clusters and the nodes in the clusters will not be changed.
:params clustersets: The clusterset whichs nodes are split
:returns: A dict of time slices where the key is the time info and value is the information about the time slice
'''
def plt_show_bars(keys, time_slices, cluster_no):
x_axis_label_stepsize = 10
time_slices: Dict[Any, TimeSlice] = {}
for cluster_no in clusterset.clusters:
for node in cluster_no.nodes:
nodes_per_slice_for_single_cluster = \
[len(time_slices[k].nodes[cluster_no])
if cluster_no in time_slices[k].nodes
else 0
for k
in keys]
time_keys = {
convert_to_time_slice_key(str(node['Finished_time'])),
convert_to_time_slice_key(str(node['Starting_time']))
}
fig, ax = plt.subplots()
ax.bar(x=range(len(keys)),
height=nodes_per_slice_for_single_cluster)
for time_key in time_keys:
if time_key not in time_slices:
time_slices[time_key] = TimeSlice(time_key, clusterset.layer_name)
ax.set_ylabel('Size')
ax.set_title(f'Cluster-{cluster_no} size over time')
ax.set_xticks(range(len(keys))[::x_axis_label_stepsize])
ax.set_xticklabels(keys[::x_axis_label_stepsize])
time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node)
plt.show()
clusterset = get_clusterset()
# print(clusterset.layer_name)
cnt = 0
time_slices = {}
# for clusterset in clustersets:
for cluster_no in clusterset.clusters:
for node in cluster_no.nodes:
# assign the nodes to time slices and recreate the clusters there
time_key = convert_to_time_slice_key(str(node['Finished_time']))
return time_slices
if time_key not in time_slices:
time_slices[time_key] = TimeSlice(time_key)
time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node)
# sort chronologically
keys = list(time_slices.keys())
keys.sort()
if __name__ == "__main__":
repo = Repository()
plt_show_bars(keys, time_slices, cluster_no = 20)
repo.remove_all_time_slices()
clustersets = repo.get_clustersets()
for clusterset in clustersets:
time_slices = split_clusterset_by_time(clusterset)
for k,v in time_slices.items():
repo.add_time_slice(v)
import sys
import os
for path in ['../', './', '../../../modules/']:
if os.path.exists(path):
sys.path.insert(1, path)
import matplotlib.pyplot as plt
from db.repository import Repository
from db.entities import TimeSlice
from typing import List
def plt_show_circles(time_slices: List[TimeSlice], cluster_no):
cluster_no = str(cluster_no)
for slice_ in time_slices:
nodes = slice_.get_nodes_for_cluster(cluster_no)
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt.title(str(slice_.time))
plt.scatter([n['Longitude_Destination'] if 'Longitude_Destination' in n else 0
for n in nodes],
[n['Latitude_Destination'] if 'Latitude_Destination' in n else 0
for n in nodes],
s=[len(nodes)*100]*len(nodes))
plt.pause(0.5)
def plt_show_bars(time_slices: List[TimeSlice], cluster_no):
cluster_no = str(cluster_no)
labels = [ts.time for ts in time_slices]
x_axis_label_stepsize = 10
nodes_per_slice_for_single_cluster = \
[len(time_slice.get_nodes_for_cluster(cluster_no))
for time_slice
in time_slices]
fig, ax = plt.subplots()
ax.bar(x=range(len(labels)),
height=nodes_per_slice_for_single_cluster)
ax.set_ylabel('Size')
ax.set_title(f'Cluster-{cluster_no} size over time')
ax.set_xticks(range(len(labels))[::x_axis_label_stepsize])
ax.set_xticklabels(labels[::x_axis_label_stepsize])
plt.show()
if __name__ == "__main__":
repo = Repository()
time_slices = repo.get_time_slices_by_name("Destination_Layer")
# chronological order
time_slices.sort(key=lambda ts: eval(ts.time))
print(len(time_slices))
plt_show_bars(time_slices, cluster_no = 0)
\ No newline at end of file
......@@ -12,6 +12,9 @@ class MongoRepositoryBase:
self._mongo_client = MongoClient(f"mongodb://{username}:{password}@{hostname}:{port}/")
self._database = self._mongo_client[database_name]
def drop_collection(self, collection_name):
self._database[collection_name].drop()
def insert_entry(self, collection_name, content: dict):
collection = self._database[collection_name]
collection.insert_one(content)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment