Commit 4d887b0a authored by Alexander Lercher's avatar Alexander Lercher

Extracted methods for db access from time slicing

parent 688215a0
...@@ -73,7 +73,7 @@ class Repository(MongoRepositoryBase): ...@@ -73,7 +73,7 @@ class Repository(MongoRepositoryBase):
def add_layer_nodes(self, nodes:List[dict]): def add_layer_nodes(self, nodes:List[dict]):
super().insert_many(self._layer_nodes_collection, nodes) super().insert_many(self._layer_nodes_collection, nodes)
def get_layer_nodes(self, use_case: str, use_case_table: str, layer_name: str) -> dict: def get_layer_nodes(self, use_case: str, use_case_table: str, layer_name: str) -> List[dict]:
'''Returns all nodes for the use-case and layer.''' '''Returns all nodes for the use-case and layer.'''
entries = super().get_entries(self._layer_nodes_collection, selection={'use_case': use_case, 'use_case_table': use_case_table, 'layer_name': layer_name}, projection={'_id': 0}) entries = super().get_entries(self._layer_nodes_collection, selection={'use_case': use_case, 'use_case_table': use_case_table, 'layer_name': layer_name}, projection={'_id': 0})
return [e for e in entries] return [e for e in entries]
......
...@@ -15,6 +15,9 @@ TimeSliceKey = Tuple[int, int] ...@@ -15,6 +15,9 @@ TimeSliceKey = Tuple[int, int]
# TODO extract information about time features (maybe from table mapping) # TODO extract information about time features (maybe from table mapping)
TIME_PROPERTY_NAMES = ['Timestamp'] TIME_PROPERTY_NAMES = ['Timestamp']
repo = Repository()
def convert_to_time_slice_key(timestamp: str) -> TimeSliceKey: def convert_to_time_slice_key(timestamp: str) -> TimeSliceKey:
'''Returns the tuple (year, week_of_year) from a timestamp. This is used as the key for the slicing.''' '''Returns the tuple (year, week_of_year) from a timestamp. This is used as the key for the slicing.'''
# time = datetime.utcfromtimestamp(float(timestamp[0:10])) # time = datetime.utcfromtimestamp(float(timestamp[0:10]))
...@@ -52,22 +55,37 @@ def split_clusterset_by_time(layer: Layer, clusters: List[Cluster]) -> Dict[Time ...@@ -52,22 +55,37 @@ def split_clusterset_by_time(layer: Layer, clusters: List[Cluster]) -> Dict[Time
return time_slices return time_slices
if __name__ == "__main__": def get_layers():
repo = Repository() return repo.get_layers()
repo.remove_all_time_slices() def get_clusters_for_layer(use_case, use_case_table, layer_name):
return repo.get_clusters_for_layer(use_case, use_case_table, layer_name)
layers = repo.get_layers() def get_layer_nodes(use_case, use_case_table, layer_name):
return repo.get_layer_nodes(use_case, use_case_table, layer_name)
def add_time_slice(timeslice):
repo.add_time_slice(timeslice)
def run_time_slicing(selected_use_cases: List[str] = None, selected_use_case_tables: List[str] = None, selected_layer_names: List[str] = None):
layers = get_layers()
for layer in layers: for layer in layers:
layer_name = layer.layer_name layer_name = layer.layer_name
use_case = layer.use_case use_case = layer.use_case
use_case_table = layer.use_case_table use_case_table = layer.use_case_table
# skip layers not in the params
if selected_use_cases is not None and use_case not in selected_use_cases \
or selected_use_case_tables is not None and use_case_table not in selected_use_case_tables \
or selected_layer_names is not None and layer_name not in selected_layer_names:
continue
print(f"Working on {use_case}//{use_case_table}//{layer_name}.") print(f"Working on {use_case}//{use_case_table}//{layer_name}.")
clusters_for_layer = repo.get_clusters_for_layer(use_case, use_case_table, layer_name) clusters_for_layer = get_clusters_for_layer(use_case, use_case_table, layer_name)
# if no clusters were generated use one large cluster instead of skipping the layer # if no clusters were generated use one large cluster instead of skipping the layer
if clusters_for_layer is None or len(clusters_for_layer) == 0: if clusters_for_layer is None or len(clusters_for_layer) == 0:
nodes = repo.get_layer_nodes(use_case, use_case_table, layer_name) nodes = get_layer_nodes(use_case, use_case_table, layer_name)
if nodes is None or len(nodes) == 0: if nodes is None or len(nodes) == 0:
print("Skipping, because there are no clusters and no nodes for the layer.") print("Skipping, because there are no clusters and no nodes for the layer.")
continue continue
...@@ -76,4 +94,9 @@ if __name__ == "__main__": ...@@ -76,4 +94,9 @@ if __name__ == "__main__":
time_slices = split_clusterset_by_time(layer, clusters_for_layer) time_slices = split_clusterset_by_time(layer, clusters_for_layer)
for k,v in time_slices.items(): for k,v in time_slices.items():
repo.add_time_slice(v) add_time_slice(v)
if __name__ == "__main__":
repo.remove_all_time_slices()
run_time_slicing([],[],[])
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment