Commit e7061d7f authored by Alexander Lercher's avatar Alexander Lercher

Improved data schema for time slices, storing and loading from mongodb

parent 0e20ca32
......@@ -228,14 +228,14 @@ paths:
items:
type: string
/clustersets/{name}:
/clustersets/{layername}:
get:
operationId: "routes.clustersets.get_by_name"
tags:
- "Clusters"
summary: "Get clusterset for layer-name"
parameters:
- name: "name"
- name: "layername"
in: "path"
description: "Name of the layer to return the clusterset for"
required: true
......@@ -262,6 +262,41 @@ paths:
schema:
$ref: "#/definitions/UserClusterGraphCollection"
# Time slices
/timeslices:
get:
operationId: "routes.timeslices.get"
tags:
- "Time Slices"
summary: "Get all time slices based on individual layers containing clusters with nodes for that time"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/TimeSliceCollection"
/timeslices/{layername}:
get:
operationId: "routes.timeslices.get_by_name"
tags:
- "Time Slices"
summary: "Get all time slices for one layer"
parameters:
- name: "layername"
in: "path"
description: "Name of the layer to return the time slices for"
required: true
type: "string"
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/TimeSliceCollection"
404:
description: "No time slices found for layername"
# Function Calls
/rfc/run:
post:
......@@ -423,3 +458,30 @@ definitions:
type: array
items:
$ref: "#/definitions/ClusterSet"
TimeSlice:
type: object
properties:
time:
type: object
example: "(2020, 52)"
layer_name:
type: string
clusters:
type: object
additionalProperties:
type: array
items:
type: object
properties:
UniqueID:
type: string
example:
"0":
- UniqueID: abc
- UniqueID: def
TimeSliceCollection:
type: array
items:
$ref: "#/definitions/TimeSlice"
\ No newline at end of file
......@@ -4,3 +4,4 @@ from db.entities.cluster import Cluster, LocationCluster, TimeCluster
from db.entities.clusterset import ClusterSet
from db.entities.user_cluster_graph import UserClusterGraph
from db.entities.layer import Layer
from db.entities.timeslice import TimeSlice
\ No newline at end of file
import json
from typing import List, Dict, TypeVar, Any
from typing import List, Dict, NewType, Any
from datetime import date, datetime
Node = TypeVar('Node')
Node = NewType('Node', dict)
class TimeSlice:
def __init__(self, time, nodes = None,
cluster_set_dict: Dict = None, from_db = False):
self.time = time
self.nodes: Dict[int, List[Node]] = {}
# if cluster_set_dict is not None:
# self.from_serializable_dict(cluster_set_dict, from_db)
def add_node_to_cluster(self, cluster_label, node):
if cluster_label not in self.nodes:
self.nodes[cluster_label] = []
self.nodes[cluster_label].append(node)
# todo
# def to_serializable_dict(self, for_db=False) -> Dict:
# serialized_dict_clusters = [cluster.to_serializable_dict(for_db)
# for cluster in self.clusters]
# return {
# "layer_name": self.layer_name,
# "clusters": json.dumps(serialized_dict_clusters) if for_db else serialized_dict_clusters
# }
# def from_serializable_dict(self, cluster_set_dict: Dict, from_db=False):
# self.layer_name = cluster_set_dict["layer_name"]
# serialized_dict_clusters = json.loads(cluster_set_dict["clusters"]) \
# if from_db else cluster_set_dict["clusters"]
# self.clusters = [Cluster(cluster_dict=cluster_dict, from_db=from_db)
# for cluster_dict in serialized_dict_clusters]
'''
A time slice for a single layer containing all nodes for that time.
:param time: The tag indicating the time
:param layer_name: The name of the layer the nodes belong to
'''
def __init__(self, time: Any, layer_name: str,
time_slice_dict: Dict = None, from_db = False):
self.time = str(time)
self.layer_name = layer_name
self.clusters: Dict[int, List[Node]] = {}
if time_slice_dict is not None:
self.from_serializable_dict(time_slice_dict, from_db)
def add_node_to_cluster(self, cluster_label: int, node):
if cluster_label not in self.clusters:
self.clusters[cluster_label] = []
node = self._get_unique_id(node)
self.clusters[cluster_label].append(node)
def get_nodes_for_cluster(self, cluster_label: int):
if cluster_label in self.clusters:
return self.clusters[cluster_label]
else:
return []
def _get_unique_id(self, node : Dict) -> Dict:
'''Returns a new dict with the unique id only.'''
uid_key = 'UniqueID'
if uid_key in node:
return {uid_key: node[uid_key]}
def to_serializable_dict(self, for_db=False) -> Dict:
return {
"time": self.time,
'layer_name': self.layer_name,
"clusters": json.dumps(self.clusters) if for_db else self.clusters
}
def from_serializable_dict(self, dict: Dict, from_db=False):
self.time = dict["time"]
self.layer_name = dict['layer_name']
self.clusters = json.loads(dict['clusters']) if from_db else dict['clusters']
def __repr__(self):
return self.__str__()
# return {'time': self.time, "#nodes": len(self.nodes)}
# json.dumps(self.to_serializable_dict())
return json.dumps(self.to_serializable_dict())
def __str__(self):
return f"TimeSlice({self.time}, {[len(v) for k, v in self.nodes.items()]})"
return f"TimeSlice({self.__repr__()})"
......@@ -23,6 +23,7 @@ class Repository(MongoRepositoryBase):
self._user_cluster_graph_collection = 'user_cluster_graph'
self._layer_collection = 'layer'
self._clusterset_collection = 'cluster_set'
self._time_slice_collection = 'time_slice'
self.agi_repo = AgiRepository()
......@@ -113,3 +114,21 @@ class Repository(MongoRepositoryBase):
else:
return None
#endregion
#region TimeSlice
def add_time_slice(self, timeslice: TimeSlice):
super().insert_entry(self._time_slice_collection, timeslice.to_serializable_dict(for_db=True))
def get_time_slices(self) -> List[TimeSlice]:
'''Returns all time slices.'''
entries = super().get_entries(self._time_slice_collection)
return [TimeSlice(None, None, time_slice_dict=e, from_db=True) for e in entries]
def get_time_slices_by_name(self, layer_name) -> List[TimeSlice]:
'''Returns all time slices with the given layer_name.'''
entries = super().get_entries(self._time_slice_collection, selection={'layer_name': layer_name})
return [TimeSlice(None, None, time_slice_dict=e, from_db=True) for e in entries]
def remove_all_time_slices(self):
super().drop_collection(self._time_slice_collection)
#endregion
\ No newline at end of file
......@@ -10,8 +10,8 @@ def get():
def get_names():
return repo.get_clusterset_names()
def get_by_name(name):
res = repo.get_clusterset(name)
def get_by_name(layername):
res = repo.get_clusterset(layername)
if res is not None:
return res.to_serializable_dict()
else:
......
from flask import request, Response
from db.repository import Repository
from db.entities import TimeSlice
repo = Repository()
def get():
return [e.to_serializable_dict() for e in repo.get_time_slices()]
def get_by_name(layername):
res = repo.get_time_slices_by_name(layername)
print(len(res))
if res is not None and len(res) != 0:
return [e.to_serializable_dict() for e in res]
else:
return Response(status=404)
......@@ -10,9 +10,9 @@ import matplotlib.pyplot as plt
from db.repository import Repository
from db.entities.timeslice import TimeSlice
from db.entities import ClusterSet
from typing import Tuple
from typing import Tuple, Dict
# repo = Repository()
repo = Repository()
def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]:
......@@ -23,46 +23,9 @@ def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]:
def get_clusterset():
# clusterset = repo.get_clusterset('Destination_Layer')
with open('clustering_results/optics/clusterset_Destination_Layer.txt') as file:
clusterset = ClusterSet(cluster_set_dict=json.loads(file.read()))
return clusterset
clusterset = ClusterSet(cluster_set_dict={
"clusters": [{
"cluster_label": 0,
"nodes": [{
"Finished_time": 1579143634812589,
"Latitude_Destination": -5.95081,
"Longitude_Destination": 37.415281,
"TravelID": "5e57ec9159bc0668543f1568",
"TravelPrice": 19,
"UniqueID": "2696718d7a33ab3dbf28e9c88411afcfe9a933a45e57ec9159bc0668543f1568",
"UserID": "2696718d7a33ab3dbf28e9c88411afcfe9a933a4",
"cluster_label": 0
}, {
"Finished_time": 1582709512112368,
"Latitude_Destination": -5.95081,
"Longitude_Destination": 37.415281,
"TravelID": "5e57ec9159bc0668543f15cf",
"TravelPrice": 16,
"UniqueID": "98dcb2717ddae152d5b359c6ea97e4fe34a29d4c5e57ec9159bc0668543f15cf",
"UserID": "98dcb2717ddae152d5b359c6ea97e4fe34a29d4c",
"cluster_label": 0
}, {
"Finished_time": 1582709512112367,
"Latitude_Destination": -5.95081,
"Longitude_Destination": 37.415281,
"TravelID": "5e57ec9159bc0668543f15cf",
"TravelPrice": 16,
"UniqueID": "98dcb2717ddae152d5b359c6ea97e4fe34a29d4c5e57ec9159bc0668543f15cd",
"UserID": "98dcb2717ddae152d5b359c6ea97e4fe34a29d4c",
"cluster_label": 0
}]
}],
"layer_name": "Destination_Layer"
})
clusterset = repo.get_clusterset('Destination_Layer')
# with open('clustering_results/optics/clusterset_Destination_Layer.txt') as file:
# clusterset = ClusterSet(cluster_set_dict=json.loads(file.read()))
return clusterset
......@@ -90,9 +53,7 @@ def plt_show_bars(keys, time_slices, cluster_no):
x_axis_label_stepsize = 10
nodes_per_slice_for_single_cluster = \
[len(time_slices[k].nodes[cluster_no])
if cluster_no in time_slices[k].nodes
else 0
[len(time_slices[k].get_nodes_for_cluster(cluster_no))
for k
in keys]
......@@ -110,7 +71,7 @@ def plt_show_bars(keys, time_slices, cluster_no):
clusterset = get_clusterset()
# print(clusterset.layer_name)
cnt = 0
time_slices = {}
......@@ -121,7 +82,7 @@ for cluster_no in clusterset.clusters:
time_key = convert_to_time_slice_key(str(node['Finished_time']))
if time_key not in time_slices:
time_slices[time_key] = TimeSlice(time_key)
time_slices[time_key] = TimeSlice(time_key, clusterset.layer_name)
time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node)
......@@ -131,7 +92,10 @@ keys = list(time_slices.keys())
keys.sort()
plt_show_bars(keys, time_slices, cluster_no = 20)
repo.remove_all_time_slices()
for k,v in time_slices.items():
repo.add_time_slice(v)
print(len(time_slices))
plt_show_bars(keys, time_slices, cluster_no = 0)
......@@ -12,6 +12,9 @@ class MongoRepositoryBase:
self._mongo_client = MongoClient(f"mongodb://{username}:{password}@{hostname}:{port}/")
self._database = self._mongo_client[database_name]
def drop_collection(self, collection_name):
self._database[collection_name].drop()
def insert_entry(self, collection_name, content: dict):
collection = self._database[collection_name]
collection.insert_one(content)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment