Commit 358803e3 authored by Alexander Lercher's avatar Alexander Lercher

Run prediction for use-cases

parent f7617d57
......@@ -93,3 +93,13 @@ Returns the computed similarity. Two clusters belonging to the SAME layer will b
Intermediary data-structure used only by the function which computes the similarity. Clusters are connected only to other clusters belonging to a DIFFERENT layer.
```GET https://articonf1.itec.aau.at:30103/api/use_cases/{use_case}/tables/{table}/connectedClusters``` returns all connected clusters for the given use-case and table.
# Proactive Community Detection Microservice
https://articonf1.itec.aau.at:30105/api/ui/
This microservice contains predictions of the cluster sizes from the clusters in [role stage discovery microservice](https://articonf1.itec.aau.at:30103/api/ui/#!/Clusters/routes_clustersets_get_by_name) for the week following the latest data in SMART.
Example: Layer $L$ contains 3 clusters with sizes 3, 0, 7 in the most recent week $t$. SMART predicts the sizes in the following week $t+1$ as 5, 0, 6 based on each cluster's structural changes over the last $N=3$ weeks, i.e. $t,\ t-1,\ t-2$.
```GET https://articonf1.itec.aau.at:30105/api/use-cases/{use_case}/tables/{table}/layers/{layer_name}/predictions```
contains the size predictions for all clusters of a layer derived as described above.
\ No newline at end of file
# contains raw data for machine learning
data/
# backup data for machine learning debugging
data_bak/
\ No newline at end of file
......@@ -46,6 +46,10 @@ def run_prediction(use_case: str):
with open(path_in, 'r') as file:
data = [Cluster.create_from_dict(cl_d) for cl_d in json.loads(file.read())]
if len(data) == 0:
print(f"No data for predicting {use_case}//{table}//{layer_name}.")
continue
data.sort(key=lambda cl: (eval(cl.cluster_id), eval(cl.time_window_id)))
#####################
cluster_map: Dict['cluster_id', 'time_windows'] = {}
......
......@@ -48,7 +48,9 @@ def print_regression_report(clf, test_X, test_Y, title):
:param test_Y: true prediction values
:param title: title for the report
"""
pred_Y = clf.predict(test_X)
pred_Y = clf.predict(test_X)
pred_Y = np.rint(pred_Y) # round to full numbers
print(f"### {title} ###\nR2-score={sklearn.metrics.r2_score(y_true=test_Y, y_pred=pred_Y)}, " \
f"MSE={sklearn.metrics.mean_squared_error(y_true=test_Y, y_pred=pred_Y)}, " \
f"sanity={sklearn.metrics.mean_squared_error(y_true=test_Y, y_pred=[0]*len(pred_Y))}")
......
......@@ -44,6 +44,10 @@ def run_training(use_case):
reference_layer_name = layerpair.reference_layer
df: DataFrame = pd.read_csv(f'data/{use_case}/{table}/ml_input/cross_context/{layer_name}_{reference_layer_name}.csv', index_col=0)
if df.empty:
print(f"No data for training {use_case}//{table}//{layer_name} on {reference_layer_name}.")
continue
#######################
training, testing = split_data(df, shuffle=False)
#####################
......
......@@ -43,6 +43,10 @@ def run_training(use_case):
layer_name = layer.layer_name
df: DataFrame = pd.read_csv(f'data/{use_case}/{table}/ml_input/single_context/{layer_name}.csv', index_col=0)
if df.empty:
print(f"No data for training {use_case}//{table}//{layer_name}.")
continue
#######################
training, testing = split_data(df, shuffle=False)
#####################
......
......@@ -4,6 +4,7 @@ modules_path = '../../../modules/'
if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
import shutil
from typing import List
from db.repository import Repository
......@@ -32,14 +33,23 @@ def _run_prediction(use_cases: List[str] = None):
for use_case in use_cases:
repo.delete_prediction_results(use_case)
run_single_prediction(use_case)
run_cross_prediction(use_case)
# 20210803 dont execute cross-context for use-cases
# run_cross_prediction(use_case)
def _run_cleanup(use_cases: List[str] = None):
'''Deletes all files in data/ for the use-cases'''
for use_case in use_cases:
path_ = f'data/{use_case}/'
if os.path.exists(path_):
shutil.rmtree(path_)
if __name__ == '__main__':
use_cases = ['vialog-enum', 'car-sharing-official', 'smart-energy', 'crowd-journalism-enum']
use_cases = ['community-prediction-youtube-n', 'community-prediction-taxi']
# use_cases = ['community-prediction-youtube-n', 'community-prediction-taxi']
_run_data_preparation(use_cases)
_run_training(use_cases)
_run_prediction(use_cases)
# TODO file cleanup
\ No newline at end of file
_run_cleanup(use_cases)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment