Commit e7061d7f authored by Alexander Lercher's avatar Alexander Lercher

Improved data schema for time slices, storing and loading from mongodb

parent 0e20ca32
...@@ -228,14 +228,14 @@ paths: ...@@ -228,14 +228,14 @@ paths:
items: items:
type: string type: string
/clustersets/{name}: /clustersets/{layername}:
get: get:
operationId: "routes.clustersets.get_by_name" operationId: "routes.clustersets.get_by_name"
tags: tags:
- "Clusters" - "Clusters"
summary: "Get clusterset for layer-name" summary: "Get clusterset for layer-name"
parameters: parameters:
- name: "name" - name: "layername"
in: "path" in: "path"
description: "Name of the layer to return the clusterset for" description: "Name of the layer to return the clusterset for"
required: true required: true
...@@ -262,6 +262,41 @@ paths: ...@@ -262,6 +262,41 @@ paths:
schema: schema:
$ref: "#/definitions/UserClusterGraphCollection" $ref: "#/definitions/UserClusterGraphCollection"
# Time slices
/timeslices:
get:
operationId: "routes.timeslices.get"
tags:
- "Time Slices"
summary: "Get all time slices based on individual layers containing clusters with nodes for that time"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/TimeSliceCollection"
/timeslices/{layername}:
get:
operationId: "routes.timeslices.get_by_name"
tags:
- "Time Slices"
summary: "Get all time slices for one layer"
parameters:
- name: "layername"
in: "path"
description: "Name of the layer to return the time slices for"
required: true
type: "string"
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/TimeSliceCollection"
404:
description: "No time slices found for layername"
# Function Calls # Function Calls
/rfc/run: /rfc/run:
post: post:
...@@ -422,4 +457,31 @@ definitions: ...@@ -422,4 +457,31 @@ definitions:
ClusterSetCollection: ClusterSetCollection:
type: array type: array
items: items:
$ref: "#/definitions/ClusterSet" $ref: "#/definitions/ClusterSet"
\ No newline at end of file
TimeSlice:
type: object
properties:
time:
type: object
example: "(2020, 52)"
layer_name:
type: string
clusters:
type: object
additionalProperties:
type: array
items:
type: object
properties:
UniqueID:
type: string
example:
"0":
- UniqueID: abc
- UniqueID: def
TimeSliceCollection:
type: array
items:
$ref: "#/definitions/TimeSlice"
\ No newline at end of file
...@@ -4,3 +4,4 @@ from db.entities.cluster import Cluster, LocationCluster, TimeCluster ...@@ -4,3 +4,4 @@ from db.entities.cluster import Cluster, LocationCluster, TimeCluster
from db.entities.clusterset import ClusterSet from db.entities.clusterset import ClusterSet
from db.entities.user_cluster_graph import UserClusterGraph from db.entities.user_cluster_graph import UserClusterGraph
from db.entities.layer import Layer from db.entities.layer import Layer
from db.entities.timeslice import TimeSlice
\ No newline at end of file
import json import json
from typing import List, Dict, TypeVar, Any from typing import List, Dict, NewType, Any
from datetime import date, datetime from datetime import date, datetime
Node = TypeVar('Node') Node = NewType('Node', dict)
class TimeSlice: class TimeSlice:
'''
def __init__(self, time, nodes = None, A time slice for a single layer containing all nodes for that time.
cluster_set_dict: Dict = None, from_db = False):
self.time = time :param time: The tag indicating the time
self.nodes: Dict[int, List[Node]] = {} :param layer_name: The name of the layer the nodes belong to
'''
# if cluster_set_dict is not None:
# self.from_serializable_dict(cluster_set_dict, from_db) def __init__(self, time: Any, layer_name: str,
time_slice_dict: Dict = None, from_db = False):
def add_node_to_cluster(self, cluster_label, node): self.time = str(time)
if cluster_label not in self.nodes: self.layer_name = layer_name
self.nodes[cluster_label] = [] self.clusters: Dict[int, List[Node]] = {}
self.nodes[cluster_label].append(node) if time_slice_dict is not None:
self.from_serializable_dict(time_slice_dict, from_db)
# todo
def add_node_to_cluster(self, cluster_label: int, node):
# def to_serializable_dict(self, for_db=False) -> Dict: if cluster_label not in self.clusters:
# serialized_dict_clusters = [cluster.to_serializable_dict(for_db) self.clusters[cluster_label] = []
# for cluster in self.clusters]
# return { node = self._get_unique_id(node)
# "layer_name": self.layer_name, self.clusters[cluster_label].append(node)
# "clusters": json.dumps(serialized_dict_clusters) if for_db else serialized_dict_clusters
# } def get_nodes_for_cluster(self, cluster_label: int):
if cluster_label in self.clusters:
# def from_serializable_dict(self, cluster_set_dict: Dict, from_db=False): return self.clusters[cluster_label]
# self.layer_name = cluster_set_dict["layer_name"] else:
return []
# serialized_dict_clusters = json.loads(cluster_set_dict["clusters"]) \
# if from_db else cluster_set_dict["clusters"] def _get_unique_id(self, node : Dict) -> Dict:
# self.clusters = [Cluster(cluster_dict=cluster_dict, from_db=from_db) '''Returns a new dict with the unique id only.'''
# for cluster_dict in serialized_dict_clusters] uid_key = 'UniqueID'
if uid_key in node:
return {uid_key: node[uid_key]}
def to_serializable_dict(self, for_db=False) -> Dict:
return {
"time": self.time,
'layer_name': self.layer_name,
"clusters": json.dumps(self.clusters) if for_db else self.clusters
}
def from_serializable_dict(self, dict: Dict, from_db=False):
self.time = dict["time"]
self.layer_name = dict['layer_name']
self.clusters = json.loads(dict['clusters']) if from_db else dict['clusters']
def __repr__(self): def __repr__(self):
return self.__str__() return json.dumps(self.to_serializable_dict())
# return {'time': self.time, "#nodes": len(self.nodes)}
# json.dumps(self.to_serializable_dict())
def __str__(self): def __str__(self):
return f"TimeSlice({self.time}, {[len(v) for k, v in self.nodes.items()]})" return f"TimeSlice({self.__repr__()})"
...@@ -23,6 +23,7 @@ class Repository(MongoRepositoryBase): ...@@ -23,6 +23,7 @@ class Repository(MongoRepositoryBase):
self._user_cluster_graph_collection = 'user_cluster_graph' self._user_cluster_graph_collection = 'user_cluster_graph'
self._layer_collection = 'layer' self._layer_collection = 'layer'
self._clusterset_collection = 'cluster_set' self._clusterset_collection = 'cluster_set'
self._time_slice_collection = 'time_slice'
self.agi_repo = AgiRepository() self.agi_repo = AgiRepository()
...@@ -113,3 +114,21 @@ class Repository(MongoRepositoryBase): ...@@ -113,3 +114,21 @@ class Repository(MongoRepositoryBase):
else: else:
return None return None
#endregion #endregion
#region TimeSlice
def add_time_slice(self, timeslice: TimeSlice):
super().insert_entry(self._time_slice_collection, timeslice.to_serializable_dict(for_db=True))
def get_time_slices(self) -> List[TimeSlice]:
'''Returns all time slices.'''
entries = super().get_entries(self._time_slice_collection)
return [TimeSlice(None, None, time_slice_dict=e, from_db=True) for e in entries]
def get_time_slices_by_name(self, layer_name) -> List[TimeSlice]:
'''Returns all time slices with the given layer_name.'''
entries = super().get_entries(self._time_slice_collection, selection={'layer_name': layer_name})
return [TimeSlice(None, None, time_slice_dict=e, from_db=True) for e in entries]
def remove_all_time_slices(self):
super().drop_collection(self._time_slice_collection)
#endregion
\ No newline at end of file
...@@ -10,8 +10,8 @@ def get(): ...@@ -10,8 +10,8 @@ def get():
def get_names(): def get_names():
return repo.get_clusterset_names() return repo.get_clusterset_names()
def get_by_name(name): def get_by_name(layername):
res = repo.get_clusterset(name) res = repo.get_clusterset(layername)
if res is not None: if res is not None:
return res.to_serializable_dict() return res.to_serializable_dict()
else: else:
......
from flask import request, Response
from db.repository import Repository
from db.entities import TimeSlice
repo = Repository()
def get():
return [e.to_serializable_dict() for e in repo.get_time_slices()]
def get_by_name(layername):
res = repo.get_time_slices_by_name(layername)
print(len(res))
if res is not None and len(res) != 0:
return [e.to_serializable_dict() for e in res]
else:
return Response(status=404)
...@@ -10,9 +10,9 @@ import matplotlib.pyplot as plt ...@@ -10,9 +10,9 @@ import matplotlib.pyplot as plt
from db.repository import Repository from db.repository import Repository
from db.entities.timeslice import TimeSlice from db.entities.timeslice import TimeSlice
from db.entities import ClusterSet from db.entities import ClusterSet
from typing import Tuple from typing import Tuple, Dict
# repo = Repository() repo = Repository()
def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]: def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]:
...@@ -23,46 +23,9 @@ def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]: ...@@ -23,46 +23,9 @@ def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]:
def get_clusterset(): def get_clusterset():
# clusterset = repo.get_clusterset('Destination_Layer') clusterset = repo.get_clusterset('Destination_Layer')
with open('clustering_results/optics/clusterset_Destination_Layer.txt') as file: # with open('clustering_results/optics/clusterset_Destination_Layer.txt') as file:
clusterset = ClusterSet(cluster_set_dict=json.loads(file.read())) # clusterset = ClusterSet(cluster_set_dict=json.loads(file.read()))
return clusterset
clusterset = ClusterSet(cluster_set_dict={
"clusters": [{
"cluster_label": 0,
"nodes": [{
"Finished_time": 1579143634812589,
"Latitude_Destination": -5.95081,
"Longitude_Destination": 37.415281,
"TravelID": "5e57ec9159bc0668543f1568",
"TravelPrice": 19,
"UniqueID": "2696718d7a33ab3dbf28e9c88411afcfe9a933a45e57ec9159bc0668543f1568",
"UserID": "2696718d7a33ab3dbf28e9c88411afcfe9a933a4",
"cluster_label": 0
}, {
"Finished_time": 1582709512112368,
"Latitude_Destination": -5.95081,
"Longitude_Destination": 37.415281,
"TravelID": "5e57ec9159bc0668543f15cf",
"TravelPrice": 16,
"UniqueID": "98dcb2717ddae152d5b359c6ea97e4fe34a29d4c5e57ec9159bc0668543f15cf",
"UserID": "98dcb2717ddae152d5b359c6ea97e4fe34a29d4c",
"cluster_label": 0
}, {
"Finished_time": 1582709512112367,
"Latitude_Destination": -5.95081,
"Longitude_Destination": 37.415281,
"TravelID": "5e57ec9159bc0668543f15cf",
"TravelPrice": 16,
"UniqueID": "98dcb2717ddae152d5b359c6ea97e4fe34a29d4c5e57ec9159bc0668543f15cd",
"UserID": "98dcb2717ddae152d5b359c6ea97e4fe34a29d4c",
"cluster_label": 0
}]
}],
"layer_name": "Destination_Layer"
})
return clusterset return clusterset
...@@ -90,9 +53,7 @@ def plt_show_bars(keys, time_slices, cluster_no): ...@@ -90,9 +53,7 @@ def plt_show_bars(keys, time_slices, cluster_no):
x_axis_label_stepsize = 10 x_axis_label_stepsize = 10
nodes_per_slice_for_single_cluster = \ nodes_per_slice_for_single_cluster = \
[len(time_slices[k].nodes[cluster_no]) [len(time_slices[k].get_nodes_for_cluster(cluster_no))
if cluster_no in time_slices[k].nodes
else 0
for k for k
in keys] in keys]
...@@ -110,7 +71,7 @@ def plt_show_bars(keys, time_slices, cluster_no): ...@@ -110,7 +71,7 @@ def plt_show_bars(keys, time_slices, cluster_no):
clusterset = get_clusterset() clusterset = get_clusterset()
# print(clusterset.layer_name)
cnt = 0 cnt = 0
time_slices = {} time_slices = {}
...@@ -121,7 +82,7 @@ for cluster_no in clusterset.clusters: ...@@ -121,7 +82,7 @@ for cluster_no in clusterset.clusters:
time_key = convert_to_time_slice_key(str(node['Finished_time'])) time_key = convert_to_time_slice_key(str(node['Finished_time']))
if time_key not in time_slices: if time_key not in time_slices:
time_slices[time_key] = TimeSlice(time_key) time_slices[time_key] = TimeSlice(time_key, clusterset.layer_name)
time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node) time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node)
...@@ -131,7 +92,10 @@ keys = list(time_slices.keys()) ...@@ -131,7 +92,10 @@ keys = list(time_slices.keys())
keys.sort() keys.sort()
repo.remove_all_time_slices()
plt_show_bars(keys, time_slices, cluster_no = 20) for k,v in time_slices.items():
repo.add_time_slice(v)
print(len(time_slices))
plt_show_bars(keys, time_slices, cluster_no = 0)
...@@ -12,6 +12,9 @@ class MongoRepositoryBase: ...@@ -12,6 +12,9 @@ class MongoRepositoryBase:
self._mongo_client = MongoClient(f"mongodb://{username}:{password}@{hostname}:{port}/") self._mongo_client = MongoClient(f"mongodb://{username}:{password}@{hostname}:{port}/")
self._database = self._mongo_client[database_name] self._database = self._mongo_client[database_name]
def drop_collection(self, collection_name):
self._database[collection_name].drop()
def insert_entry(self, collection_name, content: dict): def insert_entry(self, collection_name, content: dict):
collection = self._database[collection_name] collection = self._database[collection_name]
collection.insert_one(content) collection.insert_one(content)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment