Commit 174165d4 authored by Alexander Lercher's avatar Alexander Lercher

[RoleStage] Working time slicing and Swagger docu #19

parent 687e2fc1
......@@ -262,10 +262,14 @@ definitions:
properties:
use_case:
type: string
use_case_table:
type: string
layer_name:
type: string
cluster_label:
type: number
label:
type: string
nodes:
type: array
items:
......@@ -285,8 +289,14 @@ definitions:
type: array
items:
type: string
total_properties:
type: array
items:
type: string
use_case:
type: string
use_case_table:
type: string
LayerCollection:
type: array
......@@ -297,14 +307,16 @@ definitions:
type: object
example:
"UniqueID": "4437d98b4516e899fb7d93cef0bea6111574473703f0aab9d8c2f02aaa673f5c"
"use_case": "string"
"layer_name": "some_layer_name"
"Finished_time": 1576631193265951
"Latitude_Destination": -5.973257
"Longitude_Destination": 37.416316
"TravelID": "5e57ec9159bc0668543f156a"
"TravelPrice": 15
"UserID": "a95075f5042b1b27060080156d87fe34ec7e712c"
"use_case": "debug"
"use_case_table": "debug-table1"
"layer_name": "some_layer"
"some_app_key": "some_app_value"
# "Finished_time": 1576631193265951
# "Latitude_Destination": -5.973257
# "Longitude_Destination": 37.416316
# "TravelID": "5e57ec9159bc0668543f156a"
# "TravelPrice": 15
# "UserID": "a95075f5042b1b27060080156d87fe34ec7e712c"
NodeCollection:
type: array
......@@ -319,6 +331,8 @@ definitions:
example: "(2020, 52)"
use_case:
type: string
use_case_table:
type: string
layer_name:
type: string
clusters:
......
......@@ -58,7 +58,7 @@ class TimeSlice:
def from_serializable_dict(self, dict: Dict, from_db=False):
self.time = dict["time"]
self.use_case = dict["use_case"]
self.use_case_tables = dict["use_case_tables"]
self.use_case_table = dict["use_case_table"]
self.layer_name = dict['layer_name']
self.clusters = json.loads(dict['clusters']) if from_db else dict['clusters']
......
......@@ -17,7 +17,7 @@ class Repository(MongoRepositoryBase):
def __init__(self):
super().__init__(netconst.ROLESTAGE_DISCOVERY_DB_HOSTNAME,
netconst.ROLESTAGE_DISCOVERY_DB_PORT,
'roleStageDb-testing')
'roleStageDb')
self._layer_collection = 'layers'
self._layer_nodes_collection = 'layer_nodes'
......
......@@ -42,7 +42,7 @@ def run_clustering_for_layer(layer: Layer) -> List[Cluster]:
)
return [Cluster(layer.use_case, layer.use_case_table, layer.layer_name,
cluster_label=key, nodes=cluster_result.nodes, label=cluster_result.label)
cluster_label=key, nodes=cluster_result.nodes, label=cluster_result.label if key != -1 else 'noise')
for key, cluster_result in res.items()]
......
......@@ -12,9 +12,13 @@ from typing import Tuple, Dict, Any, List
TimeSliceKey = Tuple[int, int]
# TODO extract information about time features (maybe from table mapping)
TIME_PROPERTY_NAMES = ['Timestamp']
def convert_to_time_slice_key(timestamp: str) -> TimeSliceKey:
'''Returns the tuple (year, week_of_year) from a timestamp. This is used as the key for the slicing.'''
time = datetime.utcfromtimestamp(float(timestamp[0:10]))
# time = datetime.utcfromtimestamp(float(timestamp[0:10]))
time = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
(y, w, _) = time.isocalendar()
return (y, w)
......@@ -28,7 +32,6 @@ def split_clusterset_by_time(layer: Layer, clusters: List[Cluster]) -> Dict[Time
:params clusters: The clusters whichs nodes are split
:returns: A dict of time slices where the key is the time info and value is the information about the time slice
'''
time_property_names = ['Finished_time', 'Starting_time']
time_slices: Dict[Any, TimeSlice] = {}
for cluster_no in clusters:
......@@ -36,13 +39,13 @@ def split_clusterset_by_time(layer: Layer, clusters: List[Cluster]) -> Dict[Time
# retrieve times the node is located in based on the defined time properties in the schema
time_keys = set()
for time_property in time_property_names:
for time_property in TIME_PROPERTY_NAMES:
if time_property in node:
time_keys.add(convert_to_time_slice_key(str(node[time_property])))
for time_key in time_keys:
if time_key not in time_slices:
time_slices[time_key] = TimeSlice(time_key, layer.use_case, layer.layer_name)
time_slices[time_key] = TimeSlice(time_key, layer.use_case, layer.use_case_table, layer.layer_name)
time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node)
......@@ -58,12 +61,17 @@ if __name__ == "__main__":
for layer in layers:
layer_name = layer.layer_name
use_case = layer.use_case
print(f"Working on {use_case}, {layer_name}.")
use_case_table = layer.use_case_table
print(f"Working on {use_case}//{use_case_table}//{layer_name}.")
clusters_for_layer = repo.get_clusters_for_layer(use_case, layer_name)
clusters_for_layer = repo.get_clusters_for_layer(use_case, use_case_table, layer_name)
# if no clusters were generated use one large cluster instead of skipping the layer
if clusters_for_layer is None or len(clusters_for_layer) == 0:
clusters_for_layer = [Cluster(use_case, layer_name, -1, repo.get_layer_nodes(layer_name))]
nodes = repo.get_layer_nodes(use_case, use_case_table, layer_name)
if nodes is None or len(nodes) == 0:
print("Skipping, because there are no clusters and no nodes for the layer.")
continue
clusters_for_layer = [Cluster(use_case, use_case_table, layer_name, -1, nodes, 'noise')]
time_slices = split_clusterset_by_time(layer, clusters_for_layer)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment