Single context prediction

d94b70d7 · Alexander Lercher · c1dc19d0 · d94b70d7 · d94b70d7 · d94b70d7
Commit d94b70d7 authored Jul 26, 2021 by Alexander Lercher
13 changed files
--- a/src/data-hub/proactive-community-detection-microservice/app/configs/routes.yml
+++ b/src/data-hub/proactive-community-detection-microservice/app/configs/routes.yml
@@ -15,3 +15,57 @@ paths:
      responses:
        '200':
          description: "Successful echo of request data"
+  /use-cases/{use_case}/tables/{table}/layers/{layer_name}/predictions:
+    get:
+      operationId: "routes.predictions.get"
+      security:
+        - JwtRegular: []
+      tags:
+          - "Predictions"
+      summary: "Get predictions"
+      parameters: 
+        - name: "use_case"
+          in: "path"
+          description: "Name of the use-case"
+          required: true
+          type: "string"
+        - name: "table"
+          in: "path"
+          description: "Name of the table"
+          required: true
+          type: "string"
+        - name: "layer_name"
+          in: "path"
+          description: "Name of the layer"
+          required: true
+          type: "string"
+      responses:
+        '200':
+            description: "Successful operation"
+            schema: 
+                $ref: "#/definitions/Prediction"
+        '404':
+            description: "Predictions not found"
+definitions:
+  Prediction:
+    type: object
+    properties:
+        use_case:
+            type: string
+        table:
+            type: string
+        method:
+            type: string
+        layer:
+            type: string
+        reference_layer:
+            type: string
+        cluster_label:
+            type: string
+        time_window:
+            type: string
+        prediction:
+            type: integer
--- a/src/data-hub/proactive-community-detection-microservice/app/db/dao/__init__.py
+++ b/src/data-hub/proactive-community-detection-microservice/app/db/dao/__init__.py
@@ -2,3 +2,4 @@ from db.dao.cluster import Cluster as ClusterDao
 from db.dao.layer import Layer as LayerDao
 from db.dao.timeslice import TimeSlice as TimeSliceDao
 from db.dao.layer_pair import LayerPair as LayerPairDao
+from db.dao.prediction_result import PredictionResult
--- a/src/data-hub/proactive-community-detection-microservice/app/db/dao/prediction_result.py
+++ b/src/data-hub/proactive-community-detection-microservice/app/db/dao/prediction_result.py
+from typing import List, Dict
+class PredictionResult:
+    def __init__(self, use_case: str, table: str, method: str, 
+                 layer: str, reference_layer: str, cluster_id: str, 
+                 time_window: str, prediction: int):
+        self.use_case = use_case
+        self.table = table
+        self.method = method
+        self.layer = layer
+        self.reference_layer = reference_layer
+        self.cluster_id = cluster_id
+        self.time_window = time_window
+        self.prediction = prediction
+    @staticmethod
+    def create_from_dict(dict_) -> 'PredictionResult':
+        obj = PredictionResult(None, None, None, None, None, None, None, None)
+        obj.__dict__.update(dict_)
+        return obj
--- a/src/data-hub/proactive-community-detection-microservice/app/db/repository.py
+++ b/src/data-hub/proactive-community-detection-microservice/app/db/repository.py
@@ -23,6 +23,7 @@ class Repository(MongoRepositoryBase):
        self._layer_pair_collection = 'layer_pairs'
        self._clusters_collection = 'clusters'
        self._time_slice_collection = 'time_slices'
+        self._prediction_result_collection = 'prediction_results'
    def DROP(self, confirm:bool=False):
@@ -120,6 +121,14 @@ class Repository(MongoRepositoryBase):
    def get_layer_pairs(self, use_case: str) -> List[LayerPairDao]:
        entries = super().get_entries(self._layer_pair_collection, selection={'use_case': use_case})
        return [LayerPairDao.create_from_dict(e) for e in entries]
 #endregion
+#region PredictionResult
+    def add_prediction_result(self, prediction_result: PredictionResult):
+        super().insert_entry(self._prediction_result_collection, prediction_result.__dict__)
+    def get_prediction_results(self, use_case: str) -> List[PredictionResult]:
+        entries = super().get_entries(self._prediction_result_collection, selection={'use_case': use_case}, projection={'_id': 0})
+        return [PredictionResult.create_from_dict(e) for e in entries]
+#endregion
--- a/src/data-hub/proactive-community-detection-microservice/app/predict.ipynb
+++ b/src/data-hub/proactive-community-detection-microservice/app/predict.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "use_case = 'community-prediction-youtube-n'\r\n",
+    "layer_name = 'LikesLayer'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\r\n",
+    "from pandas import DataFrame\r\n",
+    "\r\n",
+    "df: DataFrame = pd.read_csv(f'data/{use_case}/ml_input/single_context/{layer_name}.csv', index_col=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>cluster_size</th>\n",
+       "      <th>cluster_variance</th>\n",
+       "      <th>cluster_density</th>\n",
+       "      <th>cluster_import1</th>\n",
+       "      <th>cluster_import2</th>\n",
+       "      <th>cluster_area</th>\n",
+       "      <th>cluster_center_distance</th>\n",
+       "      <th>time_f1</th>\n",
+       "      <th>time_f2</th>\n",
+       "      <th>cluster_size.1</th>\n",
+       "      <th>...</th>\n",
+       "      <th>cluster_size.2</th>\n",
+       "      <th>cluster_variance.2</th>\n",
+       "      <th>cluster_density.2</th>\n",
+       "      <th>cluster_import1.2</th>\n",
+       "      <th>cluster_import2.2</th>\n",
+       "      <th>cluster_area.2</th>\n",
+       "      <th>cluster_center_distance.2</th>\n",
+       "      <th>time_f1.2</th>\n",
+       "      <th>time_f2.2</th>\n",
+       "      <th>evolution_label</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>565819</th>\n",
+       "      <td>4.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.000336</td>\n",
+       "      <td>0.000168</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.992709</td>\n",
+       "      <td>0.120537</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.992709</td>\n",
+       "      <td>-0.120537</td>\n",
+       "      <td>-1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>565820</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.935016</td>\n",
+       "      <td>-0.354605</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.822984</td>\n",
+       "      <td>-0.568065</td>\n",
+       "      <td>4.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>565821</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.970942</td>\n",
+       "      <td>-0.239316</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.885456</td>\n",
+       "      <td>-0.464723</td>\n",
+       "      <td>-1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>565822</th>\n",
+       "      <td>4.0</td>\n",
+       "      <td>1.089725</td>\n",
+       "      <td>0.75</td>\n",
+       "      <td>0.000334</td>\n",
+       "      <td>0.000166</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>0.885456</td>\n",
+       "      <td>-0.464723</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.748511</td>\n",
+       "      <td>-0.663123</td>\n",
+       "      <td>-1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>565823</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.748511</td>\n",
+       "      <td>-0.663123</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.663123</td>\n",
+       "      <td>-0.748511</td>\n",
+       "      <td>-1.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        cluster_size  cluster_variance  cluster_density  cluster_import1  \\\n",
+       "565819           4.0          0.000000             0.00         0.000336   \n",
+       "565820           0.0          0.000000             0.00         0.000000   \n",
+       "565821           0.0          0.000000             0.00         0.000000   \n",
+       "565822           4.0          1.089725             0.75         0.000334   \n",
+       "565823           0.0          0.000000             0.00         0.000000   \n",
+       "\n",
+       "        cluster_import2  cluster_area  cluster_center_distance   time_f1  \\\n",
+       "565819         0.000168           0.0                      0.0  0.992709   \n",
+       "565820         0.000000           0.0                      0.0  0.935016   \n",
+       "565821         0.000000           0.0                      0.0  0.970942   \n",
+       "565822         0.000166           3.0                      6.0  0.885456   \n",
+       "565823         0.000000           0.0                      0.0  0.748511   \n",
+       "\n",
+       "         time_f2  cluster_size.1  ...  cluster_size.2  cluster_variance.2  \\\n",
+       "565819  0.120537             1.0  ...             0.0                 0.0   \n",
+       "565820 -0.354605             1.0  ...             0.0                 0.0   \n",
+       "565821 -0.239316             0.0  ...             0.0                 0.0   \n",
+       "565822 -0.464723             1.0  ...             0.0                 0.0   \n",
+       "565823 -0.663123             1.0  ...             0.0                 0.0   \n",
+       "\n",
+       "        cluster_density.2  cluster_import1.2  cluster_import2.2  \\\n",
+       "565819                0.0                0.0                0.0   \n",
+       "565820                0.0                0.0                0.0   \n",
+       "565821                0.0                0.0                0.0   \n",
+       "565822                0.0                0.0                0.0   \n",
+       "565823                0.0                0.0                0.0   \n",
+       "\n",
+       "        cluster_area.2  cluster_center_distance.2  time_f1.2  time_f2.2  \\\n",
+       "565819             0.0                        0.0   0.992709  -0.120537   \n",
+       "565820             0.0                        0.0   0.822984  -0.568065   \n",
+       "565821             0.0                        0.0   0.885456  -0.464723   \n",
+       "565822             0.0                        0.0   0.748511  -0.663123   \n",
+       "565823             0.0                        0.0   0.663123  -0.748511   \n",
+       "\n",
+       "        evolution_label  \n",
+       "565819             -1.0  \n",
+       "565820              4.0  \n",
+       "565821             -1.0  \n",
+       "565822             -1.0  \n",
+       "565823             -1.0  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.tail()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\r\n",
+    "from entities import Cluster\r\n",
+    "import collections\r\n",
+    "import numpy as np\r\n",
+    "from typing import Iterable, Tuple"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "N=3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path_in = f\"data/{use_case}/cluster_metrics/{layer_name}.json\"\r\n",
+    "with open(path_in, 'r') as file:\r\n",
+    "    data = [Cluster.create_from_dict(cl_d) for cl_d in json.loads(file.read())]\r\n",
+    "\r\n",
+    "data.sort(key=lambda cl: (eval(cl.cluster_id), eval(cl.time_window_id)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'time_window_id': '(2018, 24)', 'cluster_id': '20207', 'size': 0, 'std_dev': 0, 'scarcity': 0, 'importance1': 0, 'importance2': 0, 'range_': 0.0, 'center': [0, 0], 'global_center_distance': 0}"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data[-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cluster_map = {}\r\n",
+    "\r\n",
+    "# for cluster in {c.cluster_id for c in data}:\r\n",
+    "#     data_map[cluster] = [c for c in data if c.cluster_id == cluster]\r\n",
+    "\r\n",
+    "for cluster in data:\r\n",
+    "    id_ = cluster.cluster_id\r\n",
+    "\r\n",
+    "    if id_ not in cluster_map:\r\n",
+    "        cluster_map[id_] = []\r\n",
+    "\r\n",
+    "    cluster_map[id_].append(cluster)\r\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "{c.cluster_id for c in data} == cluster_map.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "20208"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(cluster_map.keys())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\r\n",
+    "\r\n",
+    "def get_cyclic_time_feature(time: int, max_time_value: int = 52) -> Tuple[float, float]:\r\n",
+    "    return (np.sin(2*np.pi*time/max_time_value),\r\n",
+    "            np.cos(2*np.pi*time/max_time_value))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Tuple\r\n",
+    "\r\n",
+    "def get_metrics(cur_cluster: Cluster) -> Tuple:\r\n",
+    "    return (cur_cluster.size, cur_cluster.std_dev, cur_cluster.scarcity, cur_cluster.importance1, cur_cluster.importance2, cur_cluster.range_, cur_cluster.global_center_distance, get_cyclic_time_feature(cur_cluster.get_time_info()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle \r\n",
+    "\r\n",
+    "method = 'single_context'\r\n",
+    "\r\n",
+    "with open(f'data/{use_case}/ml_output/{method}/{layer_name}.model', 'rb') as file:\r\n",
+    "    svc = pickle.load(file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def flatten_metrics_datapoint(datapoint: list) -> Tuple['X', np.array]:\r\n",
+    "    '''\r\n",
+    "    Flattens a single metrics data point in the form:\r\n",
+    "    [(cluster_size, cluster_variance, cluster_density, cluster_import1, cluster_import2, cluster_range, cluster_center, (time_f1, time_f2))^N, evolution_label]\r\n",
+    "    to:\r\n",
+    "    (X, y: np.array)\r\n",
+    "    '''\r\n",
+    "    flat_list = []\r\n",
+    "    for entry in datapoint: # for all x\r\n",
+    "        flat_list.extend(entry[:-1]) # add all number features except the time tuple\r\n",
+    "        flat_list.extend(entry[-1]) # add time tuple\r\n",
+    "\r\n",
+    "    # flat_list.append(datapoint[-1]) # y\r\n",
+    "    return np.asarray(flat_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def increase_time_window(time_window_id: str):\r\n",
+    "    tuple_ = eval(time_window_id)\r\n",
+    "    \r\n",
+    "    if tuple_[1] == 52:\r\n",
+    "        # 1st week next year\r\n",
+    "        return (tuple_[0]+1 , 1)\r\n",
+    "    else:\r\n",
+    "        # next week\r\n",
+    "        return str((tuple_[0], tuple_[1]+1))\r\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from entities import PredictionResult\r\n",
+    "\r\n",
+    "prediction_results = []\r\n",
+    "\r\n",
+    "for cluster_id, time_windows in cluster_map.items():\r\n",
+    "    v = [get_metrics(c) for c in time_windows[-N:]] # metrics for last N time windows\r\n",
+    "    v_flattened = flatten_metrics_datapoint(v)\r\n",
+    "    v_flattened = v_flattened.reshape(1, v_flattened.shape[0]) # reshape for ML with only 1 pred value\r\n",
+    "    res = PredictionResult(use_case, use_case, method, layer_name, None, cluster_id, increase_time_window(time_windows[-1].time_window_id), svc.predict(v_flattened)[0])\r\n",
+    "    prediction_results.append(res)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'use_case': 'community-prediction-youtube-n',\n",
+       "  'table': 'community-prediction-youtube-n',\n",
+       "  'method': 'single_context',\n",
+       "  'layer': 'LikesLayer',\n",
+       "  'reference_layer': None,\n",
+       "  'cluster_id': '0',\n",
+       "  'time_window': '(2018, 25)',\n",
+       "  'prediction': 2.0},\n",
+       " {'use_case': 'community-prediction-youtube-n',\n",
+       "  'table': 'community-prediction-youtube-n',\n",
+       "  'method': 'single_context',\n",
+       "  'layer': 'LikesLayer',\n",
+       "  'reference_layer': None,\n",
+       "  'cluster_id': '1',\n",
+       "  'time_window': '(2018, 25)',\n",
+       "  'prediction': 2.0},\n",
+       " {'use_case': 'community-prediction-youtube-n',\n",
+       "  'table': 'community-prediction-youtube-n',\n",
+       "  'method': 'single_context',\n",
+       "  'layer': 'LikesLayer',\n",
+       "  'reference_layer': None,\n",
+       "  'cluster_id': '2',\n",
+       "  'time_window': '(2018, 25)',\n",
+       "  'prediction': 3.0},\n",
+       " {'use_case': 'community-prediction-youtube-n',\n",
+       "  'table': 'community-prediction-youtube-n',\n",
+       "  'method': 'single_context',\n",
+       "  'layer': 'LikesLayer',\n",
+       "  'reference_layer': None,\n",
+       "  'cluster_id': '3',\n",
+       "  'time_window': '(2018, 25)',\n",
+       "  'prediction': 2.0},\n",
+       " {'use_case': 'community-prediction-youtube-n',\n",
+       "  'table': 'community-prediction-youtube-n',\n",
+       "  'method': 'single_context',\n",
+       "  'layer': 'LikesLayer',\n",
+       "  'reference_layer': None,\n",
+       "  'cluster_id': '4',\n",
+       "  'time_window': '(2018, 25)',\n",
+       "  'prediction': 2.0},\n",
+       " {'use_case': 'community-prediction-youtube-n',\n",
+       "  'table': 'community-prediction-youtube-n',\n",
+       "  'method': 'single_context',\n",
+       "  'layer': 'LikesLayer',\n",
+       "  'reference_layer': None,\n",
+       "  'cluster_id': '5',\n",
+       "  'time_window': '(2018, 25)',\n",
+       "  'prediction': 2.0},\n",
+       " {'use_case': 'community-prediction-youtube-n',\n",
+       "  'table': 'community-prediction-youtube-n',\n",
+       "  'method': 'single_context',\n",
+       "  'layer': 'LikesLayer',\n",
+       "  'reference_layer': None,\n",
+       "  'cluster_id': '6',\n",
+       "  'time_window': '(2018, 25)',\n",
+       "  'prediction': 2.0},\n",
+       " {'use_case': 'community-prediction-youtube-n',\n",
+       "  'table': 'community-prediction-youtube-n',\n",
+       "  'method': 'single_context',\n",
+       "  'layer': 'LikesLayer',\n",
+       "  'reference_layer': None,\n",
+       "  'cluster_id': '7',\n",
+       "  'time_window': '(2018, 25)',\n",
+       "  'prediction': 3.0},\n",
+       " {'use_case': 'community-prediction-youtube-n',\n",
+       "  'table': 'community-prediction-youtube-n',\n",
+       "  'method': 'single_context',\n",
+       "  'layer': 'LikesLayer',\n",
+       "  'reference_layer': None,\n",
+       "  'cluster_id': '8',\n",
+       "  'time_window': '(2018, 25)',\n",
+       "  'prediction': 2.0},\n",
+       " {'use_case': 'community-prediction-youtube-n',\n",
+       "  'table': 'community-prediction-youtube-n',\n",
+       "  'method': 'single_context',\n",
+       "  'layer': 'LikesLayer',\n",
+       "  'reference_layer': None,\n",
+       "  'cluster_id': '9',\n",
+       "  'time_window': '(2018, 25)',\n",
+       "  'prediction': 2.0}]"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "[r.__dict__ for r in prediction_results[:10]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0\n"
+     ]
+    }
+   ],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "6f758d9e9b2866087a1d464f700475727f47c3870deef6e7815ca445f120e6ad"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.7.6 64-bit ('venv': venv)",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
\ No newline at end of file
--- a/src/data-hub/proactive-community-detection-microservice/app/processing/__init__.py
+++ b/src/data-hub/proactive-community-detection-microservice/app/processing/__init__.py
 from processing.ClusterMetricsCalculator import ClusterMetricsCalculator, ClusterMetricsCalculator1D, ClusterMetricsCalculator2D, ClusterMetricsCalculatorFactory
 from processing.DataSampler import DataSampler
-from processing.fetching import fetching
\ No newline at end of file
--- a/src/data-hub/proactive-community-detection-microservice/app/processing/ml/predict_single_context.py
+++ b/src/data-hub/proactive-community-detection-microservice/app/processing/ml/predict_single_context.py
+from processing.data_prep.metrics_base import get_cyclic_time_feature
+N = 3 # Currently N is fixed to 3
+method = 'single_context'
+####################
+import pandas as pd
+from pandas import DataFrame
+#####################
+import json
+from entities import Cluster
+import collections
+import numpy as np
+from typing import Iterable, Tuple
+######################
+from typing import Dict
+from typing import Tuple
+def get_metrics(cur_cluster: Cluster) -> Tuple:
+    return (cur_cluster.size, cur_cluster.std_dev, cur_cluster.scarcity, cur_cluster.importance1, cur_cluster.importance2, cur_cluster.range_, cur_cluster.global_center_distance, get_cyclic_time_feature(cur_cluster.get_time_info()))
+####################
+import pickle 
+#####################
+def flatten_metrics_datapoint(datapoint: list) -> Tuple['X', np.array]:
+    '''
+    Flattens a single metrics data point in the form:
+    [(cluster_size, cluster_variance, cluster_density, cluster_import1, cluster_import2, cluster_range, cluster_center, (time_f1, time_f2))^N]
+    to:
+    (X)
+    '''
+    flat_list = []
+    for entry in datapoint: # for all x
+        flat_list.extend(entry[:-1]) # add all number features except the time tuple
+        flat_list.extend(entry[-1]) # add time tuple
+    return np.asarray(flat_list)
+######################
+def increase_time_window(time_window_id: str):
+    tuple_ = eval(time_window_id)
+    if tuple_[1] == 52:
+        # 1st week next year
+        return (tuple_[0]+1 , 1)
+    else:
+        # next week
+        return str((tuple_[0], tuple_[1]+1))
+#########################
+from db.repository import Repository
+from db.dao import PredictionResult
+repo = Repository()
+def run_prediction(use_case: str):
+    for layer in repo.get_layers_for_use_case(use_case):
+        layer_name = layer.layer_name
+        ################
+        df: DataFrame = pd.read_csv(f'data/{use_case}/ml_input/single_context/{layer_name}.csv', index_col=0)
+        #################
+        path_in = f"data/{use_case}/cluster_metrics/{layer_name}.json"
+        with open(path_in, 'r') as file:
+            data = [Cluster.create_from_dict(cl_d) for cl_d in json.loads(file.read())]
+        data.sort(key=lambda cl: (eval(cl.cluster_id), eval(cl.time_window_id)))
+        #####################
+        cluster_map: Dict['cluster_id', 'time_windows'] = {}
+        for cluster in data:
+            id_ = cluster.cluster_id
+            if id_ not in cluster_map:
+                cluster_map[id_] = []
+            cluster_map[id_].append(cluster)
+        ####################        
+        with open(f'data/{use_case}/ml_output/{method}/{layer_name}.model', 'rb') as file:
+            svc = pickle.load(file)
+        #####################
+        for cluster_id, time_windows in cluster_map.items():
+            v = [get_metrics(c) for c in time_windows[-N:]] # metrics for last N time windows
+            v_flattened = flatten_metrics_datapoint(v)
+            v_flattened = v_flattened.reshape(1, v_flattened.shape[0]) # reshape for ML with only 1 pred value
+            res = PredictionResult(use_case, use_case, method, layer_name, None, cluster_id, increase_time_window(time_windows[-1].time_window_id), svc.predict(v_flattened)[0])
+            repo.add_prediction_result(res)
+        #####################
--- a/src/data-hub/proactive-community-detection-microservice/app/routes/predictions.py
+++ b/src/data-hub/proactive-community-detection-microservice/app/routes/predictions.py
+from flask import request, Response
+from db.repository import Repository
+from db.dao import PredictionResult
+repo = Repository()
+def get(use_case, table, layer_name):
+    res = repo.get_prediction_results(use_case)
+    if res is None or len(res) == 0:
+        return Response(status=404)
+    else:
+        return [c.__dict__ for c in res]
--- a/src/data-hub/proactive-community-detection-microservice/app/run_dataprep.py
+++ b/src/data-hub/proactive-community-detection-microservice/app/run_dataprep.py
@@ -7,4 +7,6 @@ if os.path.exists(modules_path):
 from processing.data_prep.main import run
-run(use_case='community-prediction-youtube-n')
+if __name__ == '__main__':
\ No newline at end of file
+    '''Creates data/raw files'''
+    run(use_case='community-prediction-youtube-n')
\ No newline at end of file
--- a/src/data-hub/proactive-community-detection-microservice/app/run_layerpair_upload.py
+++ b/src/data-hub/proactive-community-detection-microservice/app/run_layerpair_upload.py
@@ -68,6 +68,8 @@ def upload_layerpair(layerpair:LayerPairDao):
 if __name__ == '__main__':
+    '''Uploads the cross-context dependencies for all use-cases.'''
    assert False, 'replace with true to upload now'
    for lp in get_youtube_dependencies():

--- a/src/data-hub/proactive-community-detection-microservice/app/run_node_fetching.py
+++ b/src/data-hub/proactive-community-detection-microservice/app/run_node_fetching.py
@@ -11,4 +11,6 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 from processing.fetching import fetching
 if __name__ == "__main__":
+    '''Fetches all required data from business-logic and role-stage-discovery.'''
    fetching.fetch(selected_use_cases=['community-prediction-youtube-n'], selected_use_case_tables=None)
\ No newline at end of file
--- a/src/data-hub/proactive-community-detection-microservice/app/run_prediction.py
+++ b/src/data-hub/proactive-community-detection-microservice/app/run_prediction.py
+import sys
+import os
+modules_path = '../../../modules/'
+if os.path.exists(modules_path):
+    sys.path.insert(1, modules_path)
+from processing.ml.predict_single_context import run_prediction as run_single_prediction
+# from processing.ml.predict_cross_context import run_prediction as run_cross_prediction
+if __name__ == '__main__':
+    '''Executes the predictions.'''
+    use_case='community-prediction-youtube-n'
+    run_single_prediction(use_case)
+    # run_cross_prediction(use_case)
\ No newline at end of file
--- a/src/data-hub/proactive-community-detection-microservice/app/run_training.py
+++ b/src/data-hub/proactive-community-detection-microservice/app/run_training.py
@@ -5,10 +5,11 @@ if os.path.exists(modules_path):
    sys.path.insert(1, modules_path)
-from processing.ml.train_single_context import run_single_training
+from processing.ml.train_single_context import run_training as run_single_training
-from processing.ml.train_cross_context import run_cross_training
+from processing.ml.train_cross_context import run_training as run_cross_training
 if __name__ == '__main__':
+    '''Executes the training.'''
    use_case='community-prediction-youtube-n'
    run_single_training(use_case)