Time slicing for all use-cases

Delete old slices then split communities loaded from db

Time slicing for all use-cases
Delete old slices then split communities loaded from db
ab96c298 · Alexander Lercher · 1ba72d41 · ab96c298 · ab96c298 · ab96c298
Commit ab96c298 authored Aug 02, 2021 by Alexander Lercher
8 changed files
--- a/documentation/schema_information.md
+++ b/documentation/schema_information.md
@@ -119,4 +119,41 @@ __Example:__ The node for the initial trace of the pizzashop and the layer ```na
            "fullName": "name+description",
            "firstTopping": "toppings[0]//name"
        }
    }
\ No newline at end of file
+# Clusters 
+## Time Slices
+Time slices are clusters split into time windows. Currently SMART creates one time slice per week for each cluster.
+Currently, the timestamp information is _not_ integrated into the table schema mapping and _not_ converted into a UNIX timestamp during upload.
+The following fields are considered timestamps during the partitioning:
+```yaml
+vialog-enum:
+    video: created
+    change: /
+car-sharing-official:
+    car: /
+    hash: /
+    media: /
+    offer: available
+    publication: date
+    travel: startDate
+    travelCancelledBy: moment
+    travelFinishedBy: moment
+    travelStartedBy: moment
+    travelSuggestedEndPlaces: /
+    travelUsers: /
+    user: /
+    offerEndPlaces: /
+smart-energy:
+    smart-energy: Timestamp
+crowd-journalism-enum:
+    video: creationTimestamp
+    tag: /
+    classification: lastUpdate
+    event: /
+    purchase: timestamp
+```
--- a/src/data-hub/role-stage-discovery-microservice/app/db/repository.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/repository.py
@@ -111,6 +111,9 @@ class Repository(MongoRepositoryBase):
        entries = super().get_entries(self._time_slice_collection, selection={'use_case': use_case, 'use_case_table': use_case_table, 'layer_name': layer_name})
        return [TimeSlice(time_slice_dict=e, from_db=True) for e in entries]
+    def delete_time_slices(self, use_case: str):
+        super().delete_many(self._time_slice_collection, selection={'use_case': use_case})
    def remove_all_time_slices(self):
        super().drop_collection(self._time_slice_collection)

--- a/src/data-hub/role-stage-discovery-microservice/app/run_time_slicing.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/run_time_slicing.py
@@ -13,16 +13,61 @@ from typing import Tuple, Dict, Any, List
 TimeSliceKey = Tuple[int, int]
 # TODO extract information about time features (maybe from table mapping)
-TIME_PROPERTY_NAMES = ['timestamp']
+TIME_PROPERTY_NAMES = [
+    # vialog-enum
+    'created',
+    # car-sharing-official
+    'available',
+    'date',
+    'startDate',
+    'moment',
+    # smart-energy
+    'Timestamp',
+    # crowd-journalism-enum
+    'creationTimestamp',
+    'lastUpdate',
+    'timestamp',
+    # community-prediction-youtube-n
+    'timestamp',
+    # community-prediction-taxi
+    'timestamp',
+    ]
 repo = Repository()
+def try_convert_string_to_time(timestamp: str) -> datetime:
+    '''
+    Tries to convert a timestamp string by applying a few common conversion methods, 
+    e.g. unix timestamp, human readable, ISO 8601
+    '''
+    try:
+        return datetime.utcfromtimestamp(float(timestamp))
+    except ValueError:
+        pass
+    try:
+        return datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
+    except ValueError:
+        pass
+    try:
+        return datetime.strptime(timestamp, '%d-%m-%Y %H:%M:%S')
+    except ValueError:
+        pass
+    try:
+        return datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%SZ")
+    except ValueError:
+        pass
+    raise ValueError(f"No conversion available for time data: {timestamp}")
 def convert_to_time_slice_key(timestamp: str) -> TimeSliceKey:
    '''Returns the tuple (year, week_of_year) from a timestamp. This is used as the key for the slicing.'''
-    # time = datetime.utcfromtimestamp(float(timestamp[0:10]))
+    time = try_convert_string_to_time(timestamp)    
-    # time = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
+    # print(time)
-    time = datetime.utcfromtimestamp(float(timestamp))
    (y, w, _) = time.isocalendar()
    return (y, w)
@@ -42,7 +87,7 @@ def split_clusterset_by_time(layer: Layer, clusters: List[Cluster]) -> Dict[Time
    for cluster in clusters:
        if cluster.cluster_label == -1:
-            print("Noise cluster was ignored.")
+            # print("Noise cluster was ignored.")
            continue
        for node in cluster.nodes:
@@ -67,38 +112,21 @@ def get_layers() -> List[Layer]:
    return repo.get_layers()
 def get_clusters_for_layer(use_case, use_case_table, layer_name)-> List[Cluster]:
-    # return repo.get_clusters_for_layer(use_case, use_case_table, layer_name)
+    return repo.get_clusters_for_layer(use_case, use_case_table, layer_name)
-    json_path = f'_predictions/clusters/{layer_name}.json'
-    if os.path.exists(json_path):
-        with open(json_path, 'r') as file:
-            return [Cluster(cluster_dict=e, from_db=False) for e in json.loads(file.read())]
-    return []
 def get_layer_nodes(use_case, use_case_table, layer_name)-> List[dict]:
-    # return repo.get_layer_nodes(use_case, use_case_table, layer_name)
+    return repo.get_layer_nodes(use_case, use_case_table, layer_name)
-    return []
 def add_time_slice(timeslice):
    try:
        repo.add_time_slice(timeslice)  
-        pass
    except:
        print(f"Error while storing time slice in db for {timeslice.layer_name}")
-    # try:
-    #     json_path = f'_predictions/timeslices/{timeslice.layer_name}/{timeslice.time}.json'.replace(', ', '_').replace('(', '').replace(')', '')
-    #     if not os.path.exists(os.path.dirname(json_path)):
-    #         os.makedirs(os.path.dirname(json_path))
-    #     with open(json_path, 'w') as file:
-    #         file.write(json.dumps(timeslice.to_serializable_dict(for_db=False)))
-    # except Exception as e:
-    #     print(f"Error while writing json for {timeslice.layer_name}: {e}")
 def run_time_slicing(selected_use_cases: List[str] = None, selected_use_case_tables: List[str] = None, selected_layer_names: List[str] = None):
    layers = get_layers()
    for layer in layers:
        layer_name = layer.layer_name
        use_case = layer.use_case
@@ -128,5 +156,6 @@ def run_time_slicing(selected_use_cases: List[str] = None, selected_use_case_tab
 if __name__ == "__main__":
-    # repo.remove_all_time_slices()
+    use_case = 'community-prediction-youtube-n'
-    run_time_slicing(selected_use_cases=['community-prediction-youtube-n'])
+    repo.delete_time_slices(use_case)
\ No newline at end of file
+    run_time_slicing(selected_use_cases=[use_case]) 
\ No newline at end of file
--- a/src/modules/database/MongoRepositoryBase.py
+++ b/src/modules/database/MongoRepositoryBase.py
@@ -27,6 +27,10 @@ class MongoRepositoryBase:
        collection = self._database[collection_name]
        return collection.find(selection, projection)
+    def delete_many(self, collection_name, selection: dict = {'__confirm__': '__false__'}):
+        collection = self._database[collection_name]
+        collection.delete_many(selection)
    def close_connection(self):
        self._mongo_client.close()
        self._mongo_client = None

--- a/src/modules/security/token_manager.py
+++ b/src/modules/security/token_manager.py
@@ -20,13 +20,13 @@ class TokenManager:
    def __init__(self):
        self._token = None
-    def getToken(self) -> str:
+    def getToken(self, admin=False) -> str:
        if self._token == None:
            credentials_path = get_resources_path()
            print("Looking for credentials at ... "+str(credentials_path))
-            with open(f'{credentials_path}/regular_user_credentials.json') as file_handler:
+            with open(f"{credentials_path}/{'admin' if admin else 'regular'}_user_credentials.json") as file_handler:
                credentials = json.loads(file_handler.read())
            url = f'https://{network_constants.REST_GATEWAY_HOSTNAME}:{network_constants.REST_GATEWAY_REST_PORT}/api/tokens'

--- a/src/participation-hub/business-logic-microservice/app/_add_use_case_scripts/requestPost.py
+++ b/src/participation-hub/business-logic-microservice/app/_add_use_case_scripts/requestPost.py
@@ -6,7 +6,7 @@ from typing import List
 def postTableToSwagger(use_case:str, table:dict ):
-    jwt = TokenManager.getInstance().getToken()
+    jwt = TokenManager.getInstance().getToken(admin=True)
    url = f"https://articonf1.itec.aau.at:30420/api/use-cases/{use_case}/tables"
@@ -23,7 +23,7 @@ def postTableToSwagger(use_case:str, table:dict ):
 def postLayersToSwagger(use_case:str, layers: List[dict]):
-    jwt = TokenManager.getInstance().getToken()
+    jwt = TokenManager.getInstance().getToken(admin=True)
    for layer in layers:
        url = f"https://articonf1.itec.aau.at:30420/api/layers"

--- a/src/participation-hub/business-logic-microservice/app/_add_use_case_scripts/vialog/add_vialog_schema.py
+++ b/src/participation-hub/business-logic-microservice/app/_add_use_case_scripts/vialog/add_vialog_schema.py
-import sys
-import os
-from pathlib import Path
-from typing import Dict, Any
-import requests
-modules_paths = ['.', '../../../modules/']
-for modules_path in modules_paths:
-    if os.path.exists(modules_path):
-        sys.path.insert(1, modules_path)
-from _add_use_case_scripts.vialog.tables import add_user, add_video, add_change
-import network_constants as nc
-from security.token_manager import TokenManager
-def add_use_case(use_case: str):
-    #use_case = "vialog"
-    jwt = TokenManager.getInstance().getToken()
-    url = f"https://articonf1.itec.aau.at:30420/api/use-cases"
-    response = requests.post(
-            url,
-            verify=False, 
-            proxies = { "http":None, "https":None },
-            headers = { "Authorization": f"Bearer {jwt}"},
-            json = {"name": use_case}
-        )
-    print(url+": "+str(response.content))     
-if __name__ == "__main__":
-    use_case = "vialog"
-    # disable ssl warnings :) 
-    requests.packages.urllib3.disable_warnings() 
-    add_use_case(use_case)
-    add_user.main(use_case)
-    add_video.main(use_case)
-    add_change.main(use_case)
\ No newline at end of file
--- a/src/participation-hub/business-logic-microservice/app/_add_use_case_scripts/vialog/add_vialog_schema_new_enum.py
+++ b/src/participation-hub/business-logic-microservice/app/_add_use_case_scripts/vialog/add_vialog_schema_new_enum.py
@@ -17,7 +17,7 @@ from security.token_manager import TokenManager
 def add_use_case(use_case: str):
    #use_case = "vialog"
-    jwt = TokenManager.getInstance().getToken()
+    jwt = TokenManager.getInstance().getToken(admin=True)
    url = f"https://articonf1.itec.aau.at:30420/api/use-cases"
    response = requests.post(
            url,
@@ -30,7 +30,7 @@ def add_use_case(use_case: str):
    print(url+": "+str(response.content))     
 if __name__ == "__main__":
-    use_case = "vialog-new-enum"
+    use_case = "vialog-enum"
    # disable ssl warnings :) 
    requests.packages.urllib3.disable_warnings()