Run prediction for use-cases

358803e3 · Alexander Lercher · f7617d57 · 358803e3 · 358803e3 · 358803e3
Commit 358803e3 authored Aug 03, 2021 by Alexander Lercher
7 changed files
--- a/documentation/external_access.md
+++ b/documentation/external_access.md
@@ -93,3 +93,13 @@ Returns the computed similarity. Two clusters belonging to the SAME layer will b
 Intermediary data-structure used only by the function which computes the similarity. Clusters are connected only to other clusters belonging to a DIFFERENT layer.
 ```GET https://articonf1.itec.aau.at:30103/api/use_cases/{use_case}/tables/{table}/connectedClusters``` returns all connected clusters for the given use-case and table.
+# Proactive Community Detection Microservice
+https://articonf1.itec.aau.at:30105/api/ui/
+This microservice contains predictions of the cluster sizes from the clusters in [role stage discovery microservice](https://articonf1.itec.aau.at:30103/api/ui/#!/Clusters/routes_clustersets_get_by_name) for the week following the latest data in SMART.
+Example: Layer $L$ contains 3 clusters with sizes 3, 0, 7 in the most recent week $t$. SMART predicts the sizes in the following week $t+1$ as 5, 0, 6 based on each cluster's structural changes over the last $N=3$ weeks, i.e. $t,\ t-1,\ t-2$.
+```GET https://articonf1.itec.aau.at:30105/api/use-cases/{use_case}/tables/{table}/layers/{layer_name}/predictions```
+contains the size predictions for all clusters of a layer derived as described above.
\ No newline at end of file
--- a/src/data-hub/proactive-community-detection-microservice/app/.gitignore
+++ b/src/data-hub/proactive-community-detection-microservice/app/.gitignore
 # contains raw data for machine learning
 data/
+# backup data for machine learning debugging
+data_bak/ 
\ No newline at end of file
--- a/src/data-hub/proactive-community-detection-microservice/app/processing/ml/predict_single_context.py
+++ b/src/data-hub/proactive-community-detection-microservice/app/processing/ml/predict_single_context.py
@@ -46,6 +46,10 @@ def run_prediction(use_case: str):
        with open(path_in, 'r') as file:
            data = [Cluster.create_from_dict(cl_d) for cl_d in json.loads(file.read())]
+        if len(data) == 0:
+            print(f"No data for predicting {use_case}//{table}//{layer_name}.")
+            continue
        data.sort(key=lambda cl: (eval(cl.cluster_id), eval(cl.time_window_id)))
        #####################
        cluster_map: Dict['cluster_id', 'time_windows'] = {}

--- a/src/data-hub/proactive-community-detection-microservice/app/processing/ml/train_base.py
+++ b/src/data-hub/proactive-community-detection-microservice/app/processing/ml/train_base.py
@@ -48,7 +48,9 @@ def print_regression_report(clf, test_X, test_Y, title):
    :param test_Y: true prediction values 
    :param title: title for the report
    """
-    pred_Y = clf.predict(test_X)        
+    pred_Y = clf.predict(test_X)
+    pred_Y = np.rint(pred_Y) # round to full numbers
    print(f"### {title} ###\nR2-score={sklearn.metrics.r2_score(y_true=test_Y, y_pred=pred_Y)}, " \
          f"MSE={sklearn.metrics.mean_squared_error(y_true=test_Y, y_pred=pred_Y)}, " \
          f"sanity={sklearn.metrics.mean_squared_error(y_true=test_Y, y_pred=[0]*len(pred_Y))}")

--- a/src/data-hub/proactive-community-detection-microservice/app/processing/ml/train_cross_context.py
+++ b/src/data-hub/proactive-community-detection-microservice/app/processing/ml/train_cross_context.py
@@ -44,6 +44,10 @@ def run_training(use_case):
        reference_layer_name = layerpair.reference_layer
        df: DataFrame = pd.read_csv(f'data/{use_case}/{table}/ml_input/cross_context/{layer_name}_{reference_layer_name}.csv', index_col=0)
+        if df.empty:
+            print(f"No data for training {use_case}//{table}//{layer_name} on {reference_layer_name}.")
+            continue            
        #######################
        training, testing = split_data(df, shuffle=False)
        #####################

--- a/src/data-hub/proactive-community-detection-microservice/app/processing/ml/train_single_context.py
+++ b/src/data-hub/proactive-community-detection-microservice/app/processing/ml/train_single_context.py
@@ -43,6 +43,10 @@ def run_training(use_case):
        layer_name = layer.layer_name
        df: DataFrame = pd.read_csv(f'data/{use_case}/{table}/ml_input/single_context/{layer_name}.csv', index_col=0)
+        if df.empty:
+            print(f"No data for training {use_case}//{table}//{layer_name}.")
+            continue
        #######################
        training, testing = split_data(df, shuffle=False)
        #####################

--- a/src/data-hub/proactive-community-detection-microservice/app/run_training.py
+++ b/src/data-hub/proactive-community-detection-microservice/app/run_training.py
@@ -4,6 +4,7 @@ modules_path = '../../../modules/'
 if os.path.exists(modules_path):
    sys.path.insert(1, modules_path)
+import shutil
 from typing import List
 from db.repository import Repository
@@ -32,14 +33,23 @@ def _run_prediction(use_cases: List[str] = None):
    for use_case in use_cases:
        repo.delete_prediction_results(use_case)
        run_single_prediction(use_case)
-        run_cross_prediction(use_case)
+        # 20210803 dont execute cross-context for use-cases
+        # run_cross_prediction(use_case)
+def _run_cleanup(use_cases: List[str] = None):
+    '''Deletes all files in data/ for the use-cases'''
+    for use_case in use_cases:
+        path_ = f'data/{use_case}/'
+        if os.path.exists(path_):
+            shutil.rmtree(path_)
 if __name__ == '__main__':
    use_cases = ['vialog-enum', 'car-sharing-official', 'smart-energy', 'crowd-journalism-enum']
-    use_cases = ['community-prediction-youtube-n', 'community-prediction-taxi']
+    # use_cases = ['community-prediction-youtube-n', 'community-prediction-taxi']
    _run_data_preparation(use_cases)
    _run_training(use_cases)
    _run_prediction(use_cases)
-    # TODO file cleanup
+    _run_cleanup(use_cases)
\ No newline at end of file