Commit bd4aa55b authored by Alexander Lercher's avatar Alexander Lercher

Merge branch 'feature/network-stages' into develop

parents 0e20ca32 c66bd0dd
...@@ -228,14 +228,14 @@ paths: ...@@ -228,14 +228,14 @@ paths:
items: items:
type: string type: string
/clustersets/{name}: /clustersets/{layername}:
get: get:
operationId: "routes.clustersets.get_by_name" operationId: "routes.clustersets.get_by_name"
tags: tags:
- "Clusters" - "Clusters"
summary: "Get clusterset for layer-name" summary: "Get clusterset for layer-name"
parameters: parameters:
- name: "name" - name: "layername"
in: "path" in: "path"
description: "Name of the layer to return the clusterset for" description: "Name of the layer to return the clusterset for"
required: true required: true
...@@ -262,6 +262,41 @@ paths: ...@@ -262,6 +262,41 @@ paths:
schema: schema:
$ref: "#/definitions/UserClusterGraphCollection" $ref: "#/definitions/UserClusterGraphCollection"
# Time slices
/timeslices:
get:
operationId: "routes.timeslices.get"
tags:
- "Time Slices"
summary: "Get all time slices based on individual layers containing clusters with nodes for that time"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/TimeSliceCollection"
/timeslices/{layername}:
get:
operationId: "routes.timeslices.get_by_name"
tags:
- "Time Slices"
summary: "Get all time slices for one layer"
parameters:
- name: "layername"
in: "path"
description: "Name of the layer to return the time slices for"
required: true
type: "string"
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/TimeSliceCollection"
404:
description: "No time slices found for layername"
# Function Calls # Function Calls
/rfc/run: /rfc/run:
post: post:
...@@ -422,4 +457,31 @@ definitions: ...@@ -422,4 +457,31 @@ definitions:
ClusterSetCollection: ClusterSetCollection:
type: array type: array
items: items:
$ref: "#/definitions/ClusterSet" $ref: "#/definitions/ClusterSet"
\ No newline at end of file
TimeSlice:
type: object
properties:
time:
type: object
example: "(2020, 52)"
layer_name:
type: string
clusters:
type: object
additionalProperties:
type: array
items:
type: object
properties:
UniqueID:
type: string
example:
"0":
- UniqueID: abc
- UniqueID: def
TimeSliceCollection:
type: array
items:
$ref: "#/definitions/TimeSlice"
\ No newline at end of file
...@@ -4,3 +4,4 @@ from db.entities.cluster import Cluster, LocationCluster, TimeCluster ...@@ -4,3 +4,4 @@ from db.entities.cluster import Cluster, LocationCluster, TimeCluster
from db.entities.clusterset import ClusterSet from db.entities.clusterset import ClusterSet
from db.entities.user_cluster_graph import UserClusterGraph from db.entities.user_cluster_graph import UserClusterGraph
from db.entities.layer import Layer from db.entities.layer import Layer
from db.entities.timeslice import TimeSlice
\ No newline at end of file
import json import json
from typing import List, Dict, TypeVar, Any from typing import List, Dict, NewType, Any
from datetime import date, datetime from datetime import date, datetime
Node = TypeVar('Node') Node = NewType('Node', dict)
class TimeSlice: class TimeSlice:
'''
def __init__(self, time, nodes = None, A time slice for a single layer containing all nodes for that time.
cluster_set_dict: Dict = None, from_db = False):
self.time = time :param time: The tag indicating the time
self.nodes: Dict[int, List[Node]] = {} :param layer_name: The name of the layer the nodes belong to
'''
# if cluster_set_dict is not None:
# self.from_serializable_dict(cluster_set_dict, from_db) def __init__(self, time: Any, layer_name: str,
time_slice_dict: Dict = None, from_db = False):
def add_node_to_cluster(self, cluster_label, node): self.time = str(time)
if cluster_label not in self.nodes: self.layer_name = layer_name
self.nodes[cluster_label] = [] self.clusters: Dict[str, List[Node]] = {}
self.nodes[cluster_label].append(node) if time_slice_dict is not None:
self.from_serializable_dict(time_slice_dict, from_db)
# todo
def add_node_to_cluster(self, cluster_label: str, node):
# def to_serializable_dict(self, for_db=False) -> Dict: # only string keys can be stored in json
# serialized_dict_clusters = [cluster.to_serializable_dict(for_db) cluster_label = str(cluster_label)
# for cluster in self.clusters]
# return { if cluster_label not in self.clusters:
# "layer_name": self.layer_name, self.clusters[cluster_label] = []
# "clusters": json.dumps(serialized_dict_clusters) if for_db else serialized_dict_clusters
# } node = self._get_unique_id(node)
self.clusters[cluster_label].append(node)
# def from_serializable_dict(self, cluster_set_dict: Dict, from_db=False):
# self.layer_name = cluster_set_dict["layer_name"] def get_nodes_for_cluster(self, cluster_label: str):
if cluster_label in self.clusters:
# serialized_dict_clusters = json.loads(cluster_set_dict["clusters"]) \ return self.clusters[cluster_label]
# if from_db else cluster_set_dict["clusters"] else:
# self.clusters = [Cluster(cluster_dict=cluster_dict, from_db=from_db) return []
# for cluster_dict in serialized_dict_clusters]
def _get_unique_id(self, node : Dict) -> Dict:
'''Returns a new dict with the unique id only.'''
uid_key = 'UniqueID'
if uid_key in node:
return {uid_key: node[uid_key]}
def to_serializable_dict(self, for_db=False) -> Dict:
return {
"time": self.time,
'layer_name': self.layer_name,
"clusters": json.dumps(self.clusters) if for_db else self.clusters
}
def from_serializable_dict(self, dict: Dict, from_db=False):
self.time = dict["time"]
self.layer_name = dict['layer_name']
self.clusters = json.loads(dict['clusters']) if from_db else dict['clusters']
def __repr__(self): def __repr__(self):
return self.__str__() return json.dumps(self.to_serializable_dict())
# return {'time': self.time, "#nodes": len(self.nodes)}
# json.dumps(self.to_serializable_dict())
def __str__(self): def __str__(self):
return f"TimeSlice({self.time}, {[len(v) for k, v in self.nodes.items()]})" return f"TimeSlice({self.__repr__()})"
...@@ -23,6 +23,7 @@ class Repository(MongoRepositoryBase): ...@@ -23,6 +23,7 @@ class Repository(MongoRepositoryBase):
self._user_cluster_graph_collection = 'user_cluster_graph' self._user_cluster_graph_collection = 'user_cluster_graph'
self._layer_collection = 'layer' self._layer_collection = 'layer'
self._clusterset_collection = 'cluster_set' self._clusterset_collection = 'cluster_set'
self._time_slice_collection = 'time_slice'
self.agi_repo = AgiRepository() self.agi_repo = AgiRepository()
...@@ -113,3 +114,21 @@ class Repository(MongoRepositoryBase): ...@@ -113,3 +114,21 @@ class Repository(MongoRepositoryBase):
else: else:
return None return None
#endregion #endregion
#region TimeSlice
def add_time_slice(self, timeslice: TimeSlice):
super().insert_entry(self._time_slice_collection, timeslice.to_serializable_dict(for_db=True))
def get_time_slices(self) -> List[TimeSlice]:
'''Returns all time slices.'''
entries = super().get_entries(self._time_slice_collection)
return [TimeSlice(None, None, time_slice_dict=e, from_db=True) for e in entries]
def get_time_slices_by_name(self, layer_name) -> List[TimeSlice]:
'''Returns all time slices with the given layer_name.'''
entries = super().get_entries(self._time_slice_collection, selection={'layer_name': layer_name})
return [TimeSlice(None, None, time_slice_dict=e, from_db=True) for e in entries]
def remove_all_time_slices(self):
super().drop_collection(self._time_slice_collection)
#endregion
\ No newline at end of file
...@@ -10,8 +10,8 @@ def get(): ...@@ -10,8 +10,8 @@ def get():
def get_names(): def get_names():
return repo.get_clusterset_names() return repo.get_clusterset_names()
def get_by_name(name): def get_by_name(layername):
res = repo.get_clusterset(name) res = repo.get_clusterset(layername)
if res is not None: if res is not None:
return res.to_serializable_dict() return res.to_serializable_dict()
else: else:
......
from flask import request, Response
from db.repository import Repository
from db.entities import TimeSlice
repo = Repository()
def get():
return [e.to_serializable_dict() for e in repo.get_time_slices()]
def get_by_name(layername):
res = repo.get_time_slices_by_name(layername)
print(len(res))
if res is not None and len(res) != 0:
return [e.to_serializable_dict() for e in res]
else:
return Response(status=404)
...@@ -6,132 +6,56 @@ if os.path.exists(modules_path): ...@@ -6,132 +6,56 @@ if os.path.exists(modules_path):
import json import json
from datetime import datetime, date from datetime import datetime, date
import matplotlib.pyplot as plt
from db.repository import Repository from db.repository import Repository
from db.entities.timeslice import TimeSlice from db.entities.timeslice import TimeSlice
from db.entities import ClusterSet from db.entities import ClusterSet
from typing import Tuple from typing import Tuple, Dict, Any
# repo = Repository() TimeSliceKey = Tuple[int, int]
def convert_to_time_slice_key(timestamp: str) -> TimeSliceKey:
def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]: '''Returns the tuple (year, week_of_year) from a timestamp. This is used as the key for the slicing.'''
'''Returns the tuple (year, week_of_year) from a timestamp.''' time = datetime.utcfromtimestamp(float(timestamp[0:10]))
timestamp = datetime.fromtimestamp(float(timestamp[0:10])) (y, w, _) = time.isocalendar()
(y, w, _) = timestamp.isocalendar()
return (y, w) return (y, w)
def get_clusterset(): def split_clusterset_by_time(clustersets) -> Dict[TimeSliceKey, TimeSlice]:
# clusterset = repo.get_clusterset('Destination_Layer') '''
with open('clustering_results/optics/clusterset_Destination_Layer.txt') as file: Distributes all nodes of a single clusterset into individual time slices based on their timestamps.
clusterset = ClusterSet(cluster_set_dict=json.loads(file.read())) If a node spans over multiple slices it will be added to all of them.
return clusterset Information about clusters and the nodes in the clusters will not be changed.
clusterset = ClusterSet(cluster_set_dict={
"clusters": [{
"cluster_label": 0,
"nodes": [{
"Finished_time": 1579143634812589,
"Latitude_Destination": -5.95081,
"Longitude_Destination": 37.415281,
"TravelID": "5e57ec9159bc0668543f1568",
"TravelPrice": 19,
"UniqueID": "2696718d7a33ab3dbf28e9c88411afcfe9a933a45e57ec9159bc0668543f1568",
"UserID": "2696718d7a33ab3dbf28e9c88411afcfe9a933a4",
"cluster_label": 0
}, {
"Finished_time": 1582709512112368,
"Latitude_Destination": -5.95081,
"Longitude_Destination": 37.415281,
"TravelID": "5e57ec9159bc0668543f15cf",
"TravelPrice": 16,
"UniqueID": "98dcb2717ddae152d5b359c6ea97e4fe34a29d4c5e57ec9159bc0668543f15cf",
"UserID": "98dcb2717ddae152d5b359c6ea97e4fe34a29d4c",
"cluster_label": 0
}, {
"Finished_time": 1582709512112367,
"Latitude_Destination": -5.95081,
"Longitude_Destination": 37.415281,
"TravelID": "5e57ec9159bc0668543f15cf",
"TravelPrice": 16,
"UniqueID": "98dcb2717ddae152d5b359c6ea97e4fe34a29d4c5e57ec9159bc0668543f15cd",
"UserID": "98dcb2717ddae152d5b359c6ea97e4fe34a29d4c",
"cluster_label": 0
}]
}],
"layer_name": "Destination_Layer"
})
return clusterset
def plt_show_circles(keys, time_slices, cluster_no):
for k in keys:
slice_ = time_slices[k]
if cluster_no in slice_.nodes:
nodes = slice_.nodes[cluster_no]
else:
nodes = []
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt.title(str(k))
plt.scatter([n['Longitude_Destination'] for n in nodes],
[n['Latitude_Destination'] for n in nodes],
s=[len(nodes)*100]*len(nodes))
plt.pause(0.5)
:params clustersets: The clusterset whichs nodes are split
:returns: A dict of time slices where the key is the time info and value is the information about the time slice
'''
def plt_show_bars(keys, time_slices, cluster_no): time_slices: Dict[Any, TimeSlice] = {}
x_axis_label_stepsize = 10 for cluster_no in clusterset.clusters:
for node in cluster_no.nodes:
nodes_per_slice_for_single_cluster = \ time_keys = {
[len(time_slices[k].nodes[cluster_no]) convert_to_time_slice_key(str(node['Finished_time'])),
if cluster_no in time_slices[k].nodes convert_to_time_slice_key(str(node['Starting_time']))
else 0 }
for k
in keys]
fig, ax = plt.subplots() for time_key in time_keys:
ax.bar(x=range(len(keys)), if time_key not in time_slices:
height=nodes_per_slice_for_single_cluster) time_slices[time_key] = TimeSlice(time_key, clusterset.layer_name)
ax.set_ylabel('Size') time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node)
ax.set_title(f'Cluster-{cluster_no} size over time')
ax.set_xticks(range(len(keys))[::x_axis_label_stepsize])
ax.set_xticklabels(keys[::x_axis_label_stepsize])
plt.show() return time_slices
clusterset = get_clusterset()
# print(clusterset.layer_name)
cnt = 0
time_slices = {}
# for clusterset in clustersets:
for cluster_no in clusterset.clusters:
for node in cluster_no.nodes:
# assign the nodes to time slices and recreate the clusters there
time_key = convert_to_time_slice_key(str(node['Finished_time']))
if time_key not in time_slices:
time_slices[time_key] = TimeSlice(time_key)
time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node)
# sort chronologically
keys = list(time_slices.keys())
keys.sort()
if __name__ == "__main__":
repo = Repository()
plt_show_bars(keys, time_slices, cluster_no = 20) repo.remove_all_time_slices()
clustersets = repo.get_clustersets()
for clusterset in clustersets:
time_slices = split_clusterset_by_time(clusterset)
for k,v in time_slices.items():
repo.add_time_slice(v)
import sys
import os
for path in ['../', './', '../../../modules/']:
if os.path.exists(path):
sys.path.insert(1, path)
import matplotlib.pyplot as plt
from db.repository import Repository
from db.entities import TimeSlice
from typing import List
def plt_show_circles(time_slices: List[TimeSlice], cluster_no):
cluster_no = str(cluster_no)
for slice_ in time_slices:
nodes = slice_.get_nodes_for_cluster(cluster_no)
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt.title(str(slice_.time))
plt.scatter([n['Longitude_Destination'] if 'Longitude_Destination' in n else 0
for n in nodes],
[n['Latitude_Destination'] if 'Latitude_Destination' in n else 0
for n in nodes],
s=[len(nodes)*100]*len(nodes))
plt.pause(0.5)
def plt_show_bars(time_slices: List[TimeSlice], cluster_no):
cluster_no = str(cluster_no)
labels = [ts.time for ts in time_slices]
x_axis_label_stepsize = 10
nodes_per_slice_for_single_cluster = \
[len(time_slice.get_nodes_for_cluster(cluster_no))
for time_slice
in time_slices]
fig, ax = plt.subplots()
ax.bar(x=range(len(labels)),
height=nodes_per_slice_for_single_cluster)
ax.set_ylabel('Size')
ax.set_title(f'Cluster-{cluster_no} size over time')
ax.set_xticks(range(len(labels))[::x_axis_label_stepsize])
ax.set_xticklabels(labels[::x_axis_label_stepsize])
plt.show()
if __name__ == "__main__":
repo = Repository()
time_slices = repo.get_time_slices_by_name("Destination_Layer")
# chronological order
time_slices.sort(key=lambda ts: eval(ts.time))
print(len(time_slices))
plt_show_bars(time_slices, cluster_no = 0)
\ No newline at end of file
...@@ -12,6 +12,9 @@ class MongoRepositoryBase: ...@@ -12,6 +12,9 @@ class MongoRepositoryBase:
self._mongo_client = MongoClient(f"mongodb://{username}:{password}@{hostname}:{port}/") self._mongo_client = MongoClient(f"mongodb://{username}:{password}@{hostname}:{port}/")
self._database = self._mongo_client[database_name] self._database = self._mongo_client[database_name]
def drop_collection(self, collection_name):
self._database[collection_name].drop()
def insert_entry(self, collection_name, content: dict): def insert_entry(self, collection_name, content: dict):
collection = self._database[collection_name] collection = self._database[collection_name]
collection.insert_one(content) collection.insert_one(content)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment