Commit d94b70d7 authored by Alexander Lercher's avatar Alexander Lercher

Single context prediction

parent c1dc19d0
......@@ -14,4 +14,58 @@ paths:
type: object
responses:
'200':
description: "Successful echo of request data"
\ No newline at end of file
description: "Successful echo of request data"
/use-cases/{use_case}/tables/{table}/layers/{layer_name}/predictions:
get:
operationId: "routes.predictions.get"
security:
- JwtRegular: []
tags:
- "Predictions"
summary: "Get predictions"
parameters:
- name: "use_case"
in: "path"
description: "Name of the use-case"
required: true
type: "string"
- name: "table"
in: "path"
description: "Name of the table"
required: true
type: "string"
- name: "layer_name"
in: "path"
description: "Name of the layer"
required: true
type: "string"
responses:
'200':
description: "Successful operation"
schema:
$ref: "#/definitions/Prediction"
'404':
description: "Predictions not found"
definitions:
Prediction:
type: object
properties:
use_case:
type: string
table:
type: string
method:
type: string
layer:
type: string
reference_layer:
type: string
cluster_label:
type: string
time_window:
type: string
prediction:
type: integer
......@@ -2,3 +2,4 @@ from db.dao.cluster import Cluster as ClusterDao
from db.dao.layer import Layer as LayerDao
from db.dao.timeslice import TimeSlice as TimeSliceDao
from db.dao.layer_pair import LayerPair as LayerPairDao
from db.dao.prediction_result import PredictionResult
from typing import List, Dict
class PredictionResult:
def __init__(self, use_case: str, table: str, method: str,
layer: str, reference_layer: str, cluster_id: str,
time_window: str, prediction: int):
self.use_case = use_case
self.table = table
self.method = method
self.layer = layer
self.reference_layer = reference_layer
self.cluster_id = cluster_id
self.time_window = time_window
self.prediction = prediction
@staticmethod
def create_from_dict(dict_) -> 'PredictionResult':
obj = PredictionResult(None, None, None, None, None, None, None, None)
obj.__dict__.update(dict_)
return obj
......@@ -23,6 +23,7 @@ class Repository(MongoRepositoryBase):
self._layer_pair_collection = 'layer_pairs'
self._clusters_collection = 'clusters'
self._time_slice_collection = 'time_slices'
self._prediction_result_collection = 'prediction_results'
def DROP(self, confirm:bool=False):
......@@ -120,6 +121,14 @@ class Repository(MongoRepositoryBase):
def get_layer_pairs(self, use_case: str) -> List[LayerPairDao]:
entries = super().get_entries(self._layer_pair_collection, selection={'use_case': use_case})
return [LayerPairDao.create_from_dict(e) for e in entries]
#endregion
#region PredictionResult
def add_prediction_result(self, prediction_result: PredictionResult):
super().insert_entry(self._prediction_result_collection, prediction_result.__dict__)
def get_prediction_results(self, use_case: str) -> List[PredictionResult]:
entries = super().get_entries(self._prediction_result_collection, selection={'use_case': use_case}, projection={'_id': 0})
return [PredictionResult.create_from_dict(e) for e in entries]
#endregion
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"use_case = 'community-prediction-youtube-n'\r\n",
"layer_name = 'LikesLayer'"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\r\n",
"from pandas import DataFrame\r\n",
"\r\n",
"df: DataFrame = pd.read_csv(f'data/{use_case}/ml_input/single_context/{layer_name}.csv', index_col=0)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cluster_size</th>\n",
" <th>cluster_variance</th>\n",
" <th>cluster_density</th>\n",
" <th>cluster_import1</th>\n",
" <th>cluster_import2</th>\n",
" <th>cluster_area</th>\n",
" <th>cluster_center_distance</th>\n",
" <th>time_f1</th>\n",
" <th>time_f2</th>\n",
" <th>cluster_size.1</th>\n",
" <th>...</th>\n",
" <th>cluster_size.2</th>\n",
" <th>cluster_variance.2</th>\n",
" <th>cluster_density.2</th>\n",
" <th>cluster_import1.2</th>\n",
" <th>cluster_import2.2</th>\n",
" <th>cluster_area.2</th>\n",
" <th>cluster_center_distance.2</th>\n",
" <th>time_f1.2</th>\n",
" <th>time_f2.2</th>\n",
" <th>evolution_label</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>565819</th>\n",
" <td>4.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>0.000336</td>\n",
" <td>0.000168</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.992709</td>\n",
" <td>0.120537</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.992709</td>\n",
" <td>-0.120537</td>\n",
" <td>-1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>565820</th>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.935016</td>\n",
" <td>-0.354605</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.822984</td>\n",
" <td>-0.568065</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>565821</th>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.970942</td>\n",
" <td>-0.239316</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.885456</td>\n",
" <td>-0.464723</td>\n",
" <td>-1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>565822</th>\n",
" <td>4.0</td>\n",
" <td>1.089725</td>\n",
" <td>0.75</td>\n",
" <td>0.000334</td>\n",
" <td>0.000166</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>0.885456</td>\n",
" <td>-0.464723</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.748511</td>\n",
" <td>-0.663123</td>\n",
" <td>-1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>565823</th>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.748511</td>\n",
" <td>-0.663123</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.663123</td>\n",
" <td>-0.748511</td>\n",
" <td>-1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 28 columns</p>\n",
"</div>"
],
"text/plain": [
" cluster_size cluster_variance cluster_density cluster_import1 \\\n",
"565819 4.0 0.000000 0.00 0.000336 \n",
"565820 0.0 0.000000 0.00 0.000000 \n",
"565821 0.0 0.000000 0.00 0.000000 \n",
"565822 4.0 1.089725 0.75 0.000334 \n",
"565823 0.0 0.000000 0.00 0.000000 \n",
"\n",
" cluster_import2 cluster_area cluster_center_distance time_f1 \\\n",
"565819 0.000168 0.0 0.0 0.992709 \n",
"565820 0.000000 0.0 0.0 0.935016 \n",
"565821 0.000000 0.0 0.0 0.970942 \n",
"565822 0.000166 3.0 6.0 0.885456 \n",
"565823 0.000000 0.0 0.0 0.748511 \n",
"\n",
" time_f2 cluster_size.1 ... cluster_size.2 cluster_variance.2 \\\n",
"565819 0.120537 1.0 ... 0.0 0.0 \n",
"565820 -0.354605 1.0 ... 0.0 0.0 \n",
"565821 -0.239316 0.0 ... 0.0 0.0 \n",
"565822 -0.464723 1.0 ... 0.0 0.0 \n",
"565823 -0.663123 1.0 ... 0.0 0.0 \n",
"\n",
" cluster_density.2 cluster_import1.2 cluster_import2.2 \\\n",
"565819 0.0 0.0 0.0 \n",
"565820 0.0 0.0 0.0 \n",
"565821 0.0 0.0 0.0 \n",
"565822 0.0 0.0 0.0 \n",
"565823 0.0 0.0 0.0 \n",
"\n",
" cluster_area.2 cluster_center_distance.2 time_f1.2 time_f2.2 \\\n",
"565819 0.0 0.0 0.992709 -0.120537 \n",
"565820 0.0 0.0 0.822984 -0.568065 \n",
"565821 0.0 0.0 0.885456 -0.464723 \n",
"565822 0.0 0.0 0.748511 -0.663123 \n",
"565823 0.0 0.0 0.663123 -0.748511 \n",
"\n",
" evolution_label \n",
"565819 -1.0 \n",
"565820 4.0 \n",
"565821 -1.0 \n",
"565822 -1.0 \n",
"565823 -1.0 \n",
"\n",
"[5 rows x 28 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import json\r\n",
"from entities import Cluster\r\n",
"import collections\r\n",
"import numpy as np\r\n",
"from typing import Iterable, Tuple"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"N=3"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"path_in = f\"data/{use_case}/cluster_metrics/{layer_name}.json\"\r\n",
"with open(path_in, 'r') as file:\r\n",
" data = [Cluster.create_from_dict(cl_d) for cl_d in json.loads(file.read())]\r\n",
"\r\n",
"data.sort(key=lambda cl: (eval(cl.cluster_id), eval(cl.time_window_id)))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'time_window_id': '(2018, 24)', 'cluster_id': '20207', 'size': 0, 'std_dev': 0, 'scarcity': 0, 'importance1': 0, 'importance2': 0, 'range_': 0.0, 'center': [0, 0], 'global_center_distance': 0}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[-1]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"cluster_map = {}\r\n",
"\r\n",
"# for cluster in {c.cluster_id for c in data}:\r\n",
"# data_map[cluster] = [c for c in data if c.cluster_id == cluster]\r\n",
"\r\n",
"for cluster in data:\r\n",
" id_ = cluster.cluster_id\r\n",
"\r\n",
" if id_ not in cluster_map:\r\n",
" cluster_map[id_] = []\r\n",
"\r\n",
" cluster_map[id_].append(cluster)\r\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"{c.cluster_id for c in data} == cluster_map.keys()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"20208"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(cluster_map.keys())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\r\n",
"\r\n",
"def get_cyclic_time_feature(time: int, max_time_value: int = 52) -> Tuple[float, float]:\r\n",
" return (np.sin(2*np.pi*time/max_time_value),\r\n",
" np.cos(2*np.pi*time/max_time_value))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"from typing import Tuple\r\n",
"\r\n",
"def get_metrics(cur_cluster: Cluster) -> Tuple:\r\n",
" return (cur_cluster.size, cur_cluster.std_dev, cur_cluster.scarcity, cur_cluster.importance1, cur_cluster.importance2, cur_cluster.range_, cur_cluster.global_center_distance, get_cyclic_time_feature(cur_cluster.get_time_info()))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"import pickle \r\n",
"\r\n",
"method = 'single_context'\r\n",
"\r\n",
"with open(f'data/{use_case}/ml_output/{method}/{layer_name}.model', 'rb') as file:\r\n",
" svc = pickle.load(file)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def flatten_metrics_datapoint(datapoint: list) -> Tuple['X', np.array]:\r\n",
" '''\r\n",
" Flattens a single metrics data point in the form:\r\n",
" [(cluster_size, cluster_variance, cluster_density, cluster_import1, cluster_import2, cluster_range, cluster_center, (time_f1, time_f2))^N, evolution_label]\r\n",
" to:\r\n",
" (X, y: np.array)\r\n",
" '''\r\n",
" flat_list = []\r\n",
" for entry in datapoint: # for all x\r\n",
" flat_list.extend(entry[:-1]) # add all number features except the time tuple\r\n",
" flat_list.extend(entry[-1]) # add time tuple\r\n",
"\r\n",
" # flat_list.append(datapoint[-1]) # y\r\n",
" return np.asarray(flat_list)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"def increase_time_window(time_window_id: str):\r\n",
" tuple_ = eval(time_window_id)\r\n",
" \r\n",
" if tuple_[1] == 52:\r\n",
" # 1st week next year\r\n",
" return (tuple_[0]+1 , 1)\r\n",
" else:\r\n",
" # next week\r\n",
" return str((tuple_[0], tuple_[1]+1))\r\n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"from entities import PredictionResult\r\n",
"\r\n",
"prediction_results = []\r\n",
"\r\n",
"for cluster_id, time_windows in cluster_map.items():\r\n",
" v = [get_metrics(c) for c in time_windows[-N:]] # metrics for last N time windows\r\n",
" v_flattened = flatten_metrics_datapoint(v)\r\n",
" v_flattened = v_flattened.reshape(1, v_flattened.shape[0]) # reshape for ML with only 1 pred value\r\n",
" res = PredictionResult(use_case, use_case, method, layer_name, None, cluster_id, increase_time_window(time_windows[-1].time_window_id), svc.predict(v_flattened)[0])\r\n",
" prediction_results.append(res)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '0',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 2.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '1',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 2.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '2',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 3.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '3',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 2.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '4',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 2.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '5',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 2.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '6',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 2.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '7',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 3.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '8',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 2.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '9',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 2.0}]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[r.__dict__ for r in prediction_results[:10]]"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0\n"
]
}
],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "6f758d9e9b2866087a1d464f700475727f47c3870deef6e7815ca445f120e6ad"
},
"kernelspec": {
"display_name": "Python 3.7.6 64-bit ('venv': venv)",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
\ No newline at end of file
from processing.ClusterMetricsCalculator import ClusterMetricsCalculator, ClusterMetricsCalculator1D, ClusterMetricsCalculator2D, ClusterMetricsCalculatorFactory
from processing.DataSampler import DataSampler
from processing.fetching import fetching
\ No newline at end of file
from processing.data_prep.metrics_base import get_cyclic_time_feature
N = 3 # Currently N is fixed to 3
method = 'single_context'
####################
import pandas as pd
from pandas import DataFrame
#####################
import json
from entities import Cluster
import collections
import numpy as np
from typing import Iterable, Tuple
######################
from typing import Dict
from typing import Tuple
def get_metrics(cur_cluster: Cluster) -> Tuple:
return (cur_cluster.size, cur_cluster.std_dev, cur_cluster.scarcity, cur_cluster.importance1, cur_cluster.importance2, cur_cluster.range_, cur_cluster.global_center_distance, get_cyclic_time_feature(cur_cluster.get_time_info()))
####################
import pickle
#####################
def flatten_metrics_datapoint(datapoint: list) -> Tuple['X', np.array]:
'''
Flattens a single metrics data point in the form:
[(cluster_size, cluster_variance, cluster_density, cluster_import1, cluster_import2, cluster_range, cluster_center, (time_f1, time_f2))^N]
to:
(X)
'''
flat_list = []
for entry in datapoint: # for all x
flat_list.extend(entry[:-1]) # add all number features except the time tuple
flat_list.extend(entry[-1]) # add time tuple
return np.asarray(flat_list)
######################
def increase_time_window(time_window_id: str):
tuple_ = eval(time_window_id)
if tuple_[1] == 52:
# 1st week next year
return (tuple_[0]+1 , 1)
else:
# next week
return str((tuple_[0], tuple_[1]+1))
#########################
from db.repository import Repository
from db.dao import PredictionResult
repo = Repository()
def run_prediction(use_case: str):
for layer in repo.get_layers_for_use_case(use_case):
layer_name = layer.layer_name
################
df: DataFrame = pd.read_csv(f'data/{use_case}/ml_input/single_context/{layer_name}.csv', index_col=0)
#################
path_in = f"data/{use_case}/cluster_metrics/{layer_name}.json"
with open(path_in, 'r') as file:
data = [Cluster.create_from_dict(cl_d) for cl_d in json.loads(file.read())]
data.sort(key=lambda cl: (eval(cl.cluster_id), eval(cl.time_window_id)))
#####################
cluster_map: Dict['cluster_id', 'time_windows'] = {}
for cluster in data:
id_ = cluster.cluster_id
if id_ not in cluster_map:
cluster_map[id_] = []
cluster_map[id_].append(cluster)
####################
with open(f'data/{use_case}/ml_output/{method}/{layer_name}.model', 'rb') as file:
svc = pickle.load(file)
#####################
for cluster_id, time_windows in cluster_map.items():
v = [get_metrics(c) for c in time_windows[-N:]] # metrics for last N time windows
v_flattened = flatten_metrics_datapoint(v)
v_flattened = v_flattened.reshape(1, v_flattened.shape[0]) # reshape for ML with only 1 pred value
res = PredictionResult(use_case, use_case, method, layer_name, None, cluster_id, increase_time_window(time_windows[-1].time_window_id), svc.predict(v_flattened)[0])
repo.add_prediction_result(res)
#####################
from flask import request, Response
from db.repository import Repository
from db.dao import PredictionResult
repo = Repository()
def get(use_case, table, layer_name):
res = repo.get_prediction_results(use_case)
if res is None or len(res) == 0:
return Response(status=404)
else:
return [c.__dict__ for c in res]
......@@ -7,4 +7,6 @@ if os.path.exists(modules_path):
from processing.data_prep.main import run
run(use_case='community-prediction-youtube-n')
\ No newline at end of file
if __name__ == '__main__':
'''Creates data/raw files'''
run(use_case='community-prediction-youtube-n')
\ No newline at end of file
......@@ -68,6 +68,8 @@ def upload_layerpair(layerpair:LayerPairDao):
if __name__ == '__main__':
'''Uploads the cross-context dependencies for all use-cases.'''
assert False, 'replace with true to upload now'
for lp in get_youtube_dependencies():
......
......@@ -11,4 +11,6 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from processing.fetching import fetching
if __name__ == "__main__":
'''Fetches all required data from business-logic and role-stage-discovery.'''
fetching.fetch(selected_use_cases=['community-prediction-youtube-n'], selected_use_case_tables=None)
\ No newline at end of file
import sys
import os
modules_path = '../../../modules/'
if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
from processing.ml.predict_single_context import run_prediction as run_single_prediction
# from processing.ml.predict_cross_context import run_prediction as run_cross_prediction
if __name__ == '__main__':
'''Executes the predictions.'''
use_case='community-prediction-youtube-n'
run_single_prediction(use_case)
# run_cross_prediction(use_case)
\ No newline at end of file
......@@ -5,10 +5,11 @@ if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
from processing.ml.train_single_context import run_single_training
from processing.ml.train_cross_context import run_cross_training
from processing.ml.train_single_context import run_training as run_single_training
from processing.ml.train_cross_context import run_training as run_cross_training
if __name__ == '__main__':
'''Executes the training.'''
use_case='community-prediction-youtube-n'
run_single_training(use_case)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment