Commit ab96c298 authored by Alexander Lercher's avatar Alexander Lercher

Time slicing for all use-cases

Delete old slices then split communities loaded from db
parent 1ba72d41
...@@ -119,4 +119,41 @@ __Example:__ The node for the initial trace of the pizzashop and the layer ```na ...@@ -119,4 +119,41 @@ __Example:__ The node for the initial trace of the pizzashop and the layer ```na
"fullName": "name+description", "fullName": "name+description",
"firstTopping": "toppings[0]//name" "firstTopping": "toppings[0]//name"
} }
} }
\ No newline at end of file
# Clusters
## Time Slices
Time slices are clusters split into time windows. Currently SMART creates one time slice per week for each cluster.
Currently, the timestamp information is _not_ integrated into the table schema mapping and _not_ converted into a UNIX timestamp during upload.
The following fields are considered timestamps during the partitioning:
```yaml
vialog-enum:
video: created
change: /
car-sharing-official:
car: /
hash: /
media: /
offer: available
publication: date
travel: startDate
travelCancelledBy: moment
travelFinishedBy: moment
travelStartedBy: moment
travelSuggestedEndPlaces: /
travelUsers: /
user: /
offerEndPlaces: /
smart-energy:
smart-energy: Timestamp
crowd-journalism-enum:
video: creationTimestamp
tag: /
classification: lastUpdate
event: /
purchase: timestamp
```
...@@ -111,6 +111,9 @@ class Repository(MongoRepositoryBase): ...@@ -111,6 +111,9 @@ class Repository(MongoRepositoryBase):
entries = super().get_entries(self._time_slice_collection, selection={'use_case': use_case, 'use_case_table': use_case_table, 'layer_name': layer_name}) entries = super().get_entries(self._time_slice_collection, selection={'use_case': use_case, 'use_case_table': use_case_table, 'layer_name': layer_name})
return [TimeSlice(time_slice_dict=e, from_db=True) for e in entries] return [TimeSlice(time_slice_dict=e, from_db=True) for e in entries]
def delete_time_slices(self, use_case: str):
super().delete_many(self._time_slice_collection, selection={'use_case': use_case})
def remove_all_time_slices(self): def remove_all_time_slices(self):
super().drop_collection(self._time_slice_collection) super().drop_collection(self._time_slice_collection)
......
...@@ -13,16 +13,61 @@ from typing import Tuple, Dict, Any, List ...@@ -13,16 +13,61 @@ from typing import Tuple, Dict, Any, List
TimeSliceKey = Tuple[int, int] TimeSliceKey = Tuple[int, int]
# TODO extract information about time features (maybe from table mapping) # TODO extract information about time features (maybe from table mapping)
TIME_PROPERTY_NAMES = ['timestamp'] TIME_PROPERTY_NAMES = [
# vialog-enum
'created',
# car-sharing-official
'available',
'date',
'startDate',
'moment',
# smart-energy
'Timestamp',
# crowd-journalism-enum
'creationTimestamp',
'lastUpdate',
'timestamp',
# community-prediction-youtube-n
'timestamp',
# community-prediction-taxi
'timestamp',
]
repo = Repository() repo = Repository()
def try_convert_string_to_time(timestamp: str) -> datetime:
'''
Tries to convert a timestamp string by applying a few common conversion methods,
e.g. unix timestamp, human readable, ISO 8601
'''
try:
return datetime.utcfromtimestamp(float(timestamp))
except ValueError:
pass
try:
return datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
except ValueError:
pass
try:
return datetime.strptime(timestamp, '%d-%m-%Y %H:%M:%S')
except ValueError:
pass
try:
return datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%SZ")
except ValueError:
pass
raise ValueError(f"No conversion available for time data: {timestamp}")
def convert_to_time_slice_key(timestamp: str) -> TimeSliceKey: def convert_to_time_slice_key(timestamp: str) -> TimeSliceKey:
'''Returns the tuple (year, week_of_year) from a timestamp. This is used as the key for the slicing.''' '''Returns the tuple (year, week_of_year) from a timestamp. This is used as the key for the slicing.'''
# time = datetime.utcfromtimestamp(float(timestamp[0:10])) time = try_convert_string_to_time(timestamp)
# time = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S') # print(time)
time = datetime.utcfromtimestamp(float(timestamp))
(y, w, _) = time.isocalendar() (y, w, _) = time.isocalendar()
return (y, w) return (y, w)
...@@ -42,7 +87,7 @@ def split_clusterset_by_time(layer: Layer, clusters: List[Cluster]) -> Dict[Time ...@@ -42,7 +87,7 @@ def split_clusterset_by_time(layer: Layer, clusters: List[Cluster]) -> Dict[Time
for cluster in clusters: for cluster in clusters:
if cluster.cluster_label == -1: if cluster.cluster_label == -1:
print("Noise cluster was ignored.") # print("Noise cluster was ignored.")
continue continue
for node in cluster.nodes: for node in cluster.nodes:
...@@ -67,38 +112,21 @@ def get_layers() -> List[Layer]: ...@@ -67,38 +112,21 @@ def get_layers() -> List[Layer]:
return repo.get_layers() return repo.get_layers()
def get_clusters_for_layer(use_case, use_case_table, layer_name)-> List[Cluster]: def get_clusters_for_layer(use_case, use_case_table, layer_name)-> List[Cluster]:
# return repo.get_clusters_for_layer(use_case, use_case_table, layer_name) return repo.get_clusters_for_layer(use_case, use_case_table, layer_name)
json_path = f'_predictions/clusters/{layer_name}.json'
if os.path.exists(json_path):
with open(json_path, 'r') as file:
return [Cluster(cluster_dict=e, from_db=False) for e in json.loads(file.read())]
return []
def get_layer_nodes(use_case, use_case_table, layer_name)-> List[dict]: def get_layer_nodes(use_case, use_case_table, layer_name)-> List[dict]:
# return repo.get_layer_nodes(use_case, use_case_table, layer_name) return repo.get_layer_nodes(use_case, use_case_table, layer_name)
return []
def add_time_slice(timeslice): def add_time_slice(timeslice):
try: try:
repo.add_time_slice(timeslice) repo.add_time_slice(timeslice)
pass
except: except:
print(f"Error while storing time slice in db for {timeslice.layer_name}") print(f"Error while storing time slice in db for {timeslice.layer_name}")
# try:
# json_path = f'_predictions/timeslices/{timeslice.layer_name}/{timeslice.time}.json'.replace(', ', '_').replace('(', '').replace(')', '')
# if not os.path.exists(os.path.dirname(json_path)):
# os.makedirs(os.path.dirname(json_path))
# with open(json_path, 'w') as file:
# file.write(json.dumps(timeslice.to_serializable_dict(for_db=False)))
# except Exception as e:
# print(f"Error while writing json for {timeslice.layer_name}: {e}")
def run_time_slicing(selected_use_cases: List[str] = None, selected_use_case_tables: List[str] = None, selected_layer_names: List[str] = None): def run_time_slicing(selected_use_cases: List[str] = None, selected_use_case_tables: List[str] = None, selected_layer_names: List[str] = None):
layers = get_layers() layers = get_layers()
for layer in layers: for layer in layers:
layer_name = layer.layer_name layer_name = layer.layer_name
use_case = layer.use_case use_case = layer.use_case
...@@ -128,5 +156,6 @@ def run_time_slicing(selected_use_cases: List[str] = None, selected_use_case_tab ...@@ -128,5 +156,6 @@ def run_time_slicing(selected_use_cases: List[str] = None, selected_use_case_tab
if __name__ == "__main__": if __name__ == "__main__":
# repo.remove_all_time_slices() use_case = 'community-prediction-youtube-n'
run_time_slicing(selected_use_cases=['community-prediction-youtube-n']) repo.delete_time_slices(use_case)
\ No newline at end of file run_time_slicing(selected_use_cases=[use_case])
\ No newline at end of file
...@@ -27,6 +27,10 @@ class MongoRepositoryBase: ...@@ -27,6 +27,10 @@ class MongoRepositoryBase:
collection = self._database[collection_name] collection = self._database[collection_name]
return collection.find(selection, projection) return collection.find(selection, projection)
def delete_many(self, collection_name, selection: dict = {'__confirm__': '__false__'}):
collection = self._database[collection_name]
collection.delete_many(selection)
def close_connection(self): def close_connection(self):
self._mongo_client.close() self._mongo_client.close()
self._mongo_client = None self._mongo_client = None
......
...@@ -20,13 +20,13 @@ class TokenManager: ...@@ -20,13 +20,13 @@ class TokenManager:
def __init__(self): def __init__(self):
self._token = None self._token = None
def getToken(self) -> str: def getToken(self, admin=False) -> str:
if self._token == None: if self._token == None:
credentials_path = get_resources_path() credentials_path = get_resources_path()
print("Looking for credentials at ... "+str(credentials_path)) print("Looking for credentials at ... "+str(credentials_path))
with open(f'{credentials_path}/regular_user_credentials.json') as file_handler: with open(f"{credentials_path}/{'admin' if admin else 'regular'}_user_credentials.json") as file_handler:
credentials = json.loads(file_handler.read()) credentials = json.loads(file_handler.read())
url = f'https://{network_constants.REST_GATEWAY_HOSTNAME}:{network_constants.REST_GATEWAY_REST_PORT}/api/tokens' url = f'https://{network_constants.REST_GATEWAY_HOSTNAME}:{network_constants.REST_GATEWAY_REST_PORT}/api/tokens'
......
...@@ -6,7 +6,7 @@ from typing import List ...@@ -6,7 +6,7 @@ from typing import List
def postTableToSwagger(use_case:str, table:dict ): def postTableToSwagger(use_case:str, table:dict ):
jwt = TokenManager.getInstance().getToken() jwt = TokenManager.getInstance().getToken(admin=True)
url = f"https://articonf1.itec.aau.at:30420/api/use-cases/{use_case}/tables" url = f"https://articonf1.itec.aau.at:30420/api/use-cases/{use_case}/tables"
...@@ -23,7 +23,7 @@ def postTableToSwagger(use_case:str, table:dict ): ...@@ -23,7 +23,7 @@ def postTableToSwagger(use_case:str, table:dict ):
def postLayersToSwagger(use_case:str, layers: List[dict]): def postLayersToSwagger(use_case:str, layers: List[dict]):
jwt = TokenManager.getInstance().getToken() jwt = TokenManager.getInstance().getToken(admin=True)
for layer in layers: for layer in layers:
url = f"https://articonf1.itec.aau.at:30420/api/layers" url = f"https://articonf1.itec.aau.at:30420/api/layers"
......
import sys
import os
from pathlib import Path
from typing import Dict, Any
import requests
modules_paths = ['.', '../../../modules/']
for modules_path in modules_paths:
if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
from _add_use_case_scripts.vialog.tables import add_user, add_video, add_change
import network_constants as nc
from security.token_manager import TokenManager
def add_use_case(use_case: str):
#use_case = "vialog"
jwt = TokenManager.getInstance().getToken()
url = f"https://articonf1.itec.aau.at:30420/api/use-cases"
response = requests.post(
url,
verify=False,
proxies = { "http":None, "https":None },
headers = { "Authorization": f"Bearer {jwt}"},
json = {"name": use_case}
)
print(url+": "+str(response.content))
if __name__ == "__main__":
use_case = "vialog"
# disable ssl warnings :)
requests.packages.urllib3.disable_warnings()
add_use_case(use_case)
add_user.main(use_case)
add_video.main(use_case)
add_change.main(use_case)
\ No newline at end of file
...@@ -17,7 +17,7 @@ from security.token_manager import TokenManager ...@@ -17,7 +17,7 @@ from security.token_manager import TokenManager
def add_use_case(use_case: str): def add_use_case(use_case: str):
#use_case = "vialog" #use_case = "vialog"
jwt = TokenManager.getInstance().getToken() jwt = TokenManager.getInstance().getToken(admin=True)
url = f"https://articonf1.itec.aau.at:30420/api/use-cases" url = f"https://articonf1.itec.aau.at:30420/api/use-cases"
response = requests.post( response = requests.post(
url, url,
...@@ -30,7 +30,7 @@ def add_use_case(use_case: str): ...@@ -30,7 +30,7 @@ def add_use_case(use_case: str):
print(url+": "+str(response.content)) print(url+": "+str(response.content))
if __name__ == "__main__": if __name__ == "__main__":
use_case = "vialog-new-enum" use_case = "vialog-enum"
# disable ssl warnings :) # disable ssl warnings :)
requests.packages.urllib3.disable_warnings() requests.packages.urllib3.disable_warnings()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment