Commit 358803e3 authored by Alexander Lercher's avatar Alexander Lercher

Run prediction for use-cases

parent f7617d57
...@@ -93,3 +93,13 @@ Returns the computed similarity. Two clusters belonging to the SAME layer will b ...@@ -93,3 +93,13 @@ Returns the computed similarity. Two clusters belonging to the SAME layer will b
Intermediary data-structure used only by the function which computes the similarity. Clusters are connected only to other clusters belonging to a DIFFERENT layer. Intermediary data-structure used only by the function which computes the similarity. Clusters are connected only to other clusters belonging to a DIFFERENT layer.
```GET https://articonf1.itec.aau.at:30103/api/use_cases/{use_case}/tables/{table}/connectedClusters``` returns all connected clusters for the given use-case and table. ```GET https://articonf1.itec.aau.at:30103/api/use_cases/{use_case}/tables/{table}/connectedClusters``` returns all connected clusters for the given use-case and table.
# Proactive Community Detection Microservice
https://articonf1.itec.aau.at:30105/api/ui/
This microservice contains predictions of the cluster sizes from the clusters in [role stage discovery microservice](https://articonf1.itec.aau.at:30103/api/ui/#!/Clusters/routes_clustersets_get_by_name) for the week following the latest data in SMART.
Example: Layer $L$ contains 3 clusters with sizes 3, 0, 7 in the most recent week $t$. SMART predicts the sizes in the following week $t+1$ as 5, 0, 6 based on each cluster's structural changes over the last $N=3$ weeks, i.e. $t,\ t-1,\ t-2$.
```GET https://articonf1.itec.aau.at:30105/api/use-cases/{use_case}/tables/{table}/layers/{layer_name}/predictions```
contains the size predictions for all clusters of a layer derived as described above.
\ No newline at end of file
# contains raw data for machine learning # contains raw data for machine learning
data/ data/
# backup data for machine learning debugging
data_bak/
\ No newline at end of file
...@@ -46,6 +46,10 @@ def run_prediction(use_case: str): ...@@ -46,6 +46,10 @@ def run_prediction(use_case: str):
with open(path_in, 'r') as file: with open(path_in, 'r') as file:
data = [Cluster.create_from_dict(cl_d) for cl_d in json.loads(file.read())] data = [Cluster.create_from_dict(cl_d) for cl_d in json.loads(file.read())]
if len(data) == 0:
print(f"No data for predicting {use_case}//{table}//{layer_name}.")
continue
data.sort(key=lambda cl: (eval(cl.cluster_id), eval(cl.time_window_id))) data.sort(key=lambda cl: (eval(cl.cluster_id), eval(cl.time_window_id)))
##################### #####################
cluster_map: Dict['cluster_id', 'time_windows'] = {} cluster_map: Dict['cluster_id', 'time_windows'] = {}
......
...@@ -49,6 +49,8 @@ def print_regression_report(clf, test_X, test_Y, title): ...@@ -49,6 +49,8 @@ def print_regression_report(clf, test_X, test_Y, title):
:param title: title for the report :param title: title for the report
""" """
pred_Y = clf.predict(test_X) pred_Y = clf.predict(test_X)
pred_Y = np.rint(pred_Y) # round to full numbers
print(f"### {title} ###\nR2-score={sklearn.metrics.r2_score(y_true=test_Y, y_pred=pred_Y)}, " \ print(f"### {title} ###\nR2-score={sklearn.metrics.r2_score(y_true=test_Y, y_pred=pred_Y)}, " \
f"MSE={sklearn.metrics.mean_squared_error(y_true=test_Y, y_pred=pred_Y)}, " \ f"MSE={sklearn.metrics.mean_squared_error(y_true=test_Y, y_pred=pred_Y)}, " \
f"sanity={sklearn.metrics.mean_squared_error(y_true=test_Y, y_pred=[0]*len(pred_Y))}") f"sanity={sklearn.metrics.mean_squared_error(y_true=test_Y, y_pred=[0]*len(pred_Y))}")
......
...@@ -44,6 +44,10 @@ def run_training(use_case): ...@@ -44,6 +44,10 @@ def run_training(use_case):
reference_layer_name = layerpair.reference_layer reference_layer_name = layerpair.reference_layer
df: DataFrame = pd.read_csv(f'data/{use_case}/{table}/ml_input/cross_context/{layer_name}_{reference_layer_name}.csv', index_col=0) df: DataFrame = pd.read_csv(f'data/{use_case}/{table}/ml_input/cross_context/{layer_name}_{reference_layer_name}.csv', index_col=0)
if df.empty:
print(f"No data for training {use_case}//{table}//{layer_name} on {reference_layer_name}.")
continue
####################### #######################
training, testing = split_data(df, shuffle=False) training, testing = split_data(df, shuffle=False)
##################### #####################
......
...@@ -43,6 +43,10 @@ def run_training(use_case): ...@@ -43,6 +43,10 @@ def run_training(use_case):
layer_name = layer.layer_name layer_name = layer.layer_name
df: DataFrame = pd.read_csv(f'data/{use_case}/{table}/ml_input/single_context/{layer_name}.csv', index_col=0) df: DataFrame = pd.read_csv(f'data/{use_case}/{table}/ml_input/single_context/{layer_name}.csv', index_col=0)
if df.empty:
print(f"No data for training {use_case}//{table}//{layer_name}.")
continue
####################### #######################
training, testing = split_data(df, shuffle=False) training, testing = split_data(df, shuffle=False)
##################### #####################
......
...@@ -4,6 +4,7 @@ modules_path = '../../../modules/' ...@@ -4,6 +4,7 @@ modules_path = '../../../modules/'
if os.path.exists(modules_path): if os.path.exists(modules_path):
sys.path.insert(1, modules_path) sys.path.insert(1, modules_path)
import shutil
from typing import List from typing import List
from db.repository import Repository from db.repository import Repository
...@@ -32,14 +33,23 @@ def _run_prediction(use_cases: List[str] = None): ...@@ -32,14 +33,23 @@ def _run_prediction(use_cases: List[str] = None):
for use_case in use_cases: for use_case in use_cases:
repo.delete_prediction_results(use_case) repo.delete_prediction_results(use_case)
run_single_prediction(use_case) run_single_prediction(use_case)
run_cross_prediction(use_case) # 20210803 dont execute cross-context for use-cases
# run_cross_prediction(use_case)
def _run_cleanup(use_cases: List[str] = None):
'''Deletes all files in data/ for the use-cases'''
for use_case in use_cases:
path_ = f'data/{use_case}/'
if os.path.exists(path_):
shutil.rmtree(path_)
if __name__ == '__main__': if __name__ == '__main__':
use_cases = ['vialog-enum', 'car-sharing-official', 'smart-energy', 'crowd-journalism-enum'] use_cases = ['vialog-enum', 'car-sharing-official', 'smart-energy', 'crowd-journalism-enum']
use_cases = ['community-prediction-youtube-n', 'community-prediction-taxi'] # use_cases = ['community-prediction-youtube-n', 'community-prediction-taxi']
_run_data_preparation(use_cases) _run_data_preparation(use_cases)
_run_training(use_cases) _run_training(use_cases)
_run_prediction(use_cases) _run_prediction(use_cases)
# TODO file cleanup _run_cleanup(use_cases)
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment