Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
d94b70d7
Commit
d94b70d7
authored
Jul 26, 2021
by
Alexander Lercher
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Single context prediction
parent
c1dc19d0
Changes
13
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
817 additions
and
6 deletions
+817
-6
routes.yml
...e-community-detection-microservice/app/configs/routes.yml
+55
-1
__init__.py
...e-community-detection-microservice/app/db/dao/__init__.py
+1
-0
prediction_result.py
...ty-detection-microservice/app/db/dao/prediction_result.py
+22
-0
repository.py
...ive-community-detection-microservice/app/db/repository.py
+10
-1
predict.ipynb
...active-community-detection-microservice/app/predict.ipynb
+605
-0
__init__.py
...mmunity-detection-microservice/app/processing/__init__.py
+0
-1
predict_single_context.py
...-microservice/app/processing/ml/predict_single_context.py
+86
-0
predictions.py
...ommunity-detection-microservice/app/routes/predictions.py
+12
-0
run_dataprep.py
...tive-community-detection-microservice/app/run_dataprep.py
+3
-1
run_layerpair_upload.py
...munity-detection-microservice/app/run_layerpair_upload.py
+2
-0
run_node_fetching.py
...community-detection-microservice/app/run_node_fetching.py
+2
-0
run_prediction.py
...ve-community-detection-microservice/app/run_prediction.py
+16
-0
run_training.py
...tive-community-detection-microservice/app/run_training.py
+3
-2
No files found.
src/data-hub/proactive-community-detection-microservice/app/configs/routes.yml
View file @
d94b70d7
...
@@ -15,3 +15,57 @@ paths:
...
@@ -15,3 +15,57 @@ paths:
responses
:
responses
:
'
200'
:
'
200'
:
description
:
"
Successful
echo
of
request
data"
description
:
"
Successful
echo
of
request
data"
/use-cases/{use_case}/tables/{table}/layers/{layer_name}/predictions
:
get
:
operationId
:
"
routes.predictions.get"
security
:
-
JwtRegular
:
[]
tags
:
-
"
Predictions"
summary
:
"
Get
predictions"
parameters
:
-
name
:
"
use_case"
in
:
"
path"
description
:
"
Name
of
the
use-case"
required
:
true
type
:
"
string"
-
name
:
"
table"
in
:
"
path"
description
:
"
Name
of
the
table"
required
:
true
type
:
"
string"
-
name
:
"
layer_name"
in
:
"
path"
description
:
"
Name
of
the
layer"
required
:
true
type
:
"
string"
responses
:
'
200'
:
description
:
"
Successful
operation"
schema
:
$ref
:
"
#/definitions/Prediction"
'
404'
:
description
:
"
Predictions
not
found"
definitions
:
Prediction
:
type
:
object
properties
:
use_case
:
type
:
string
table
:
type
:
string
method
:
type
:
string
layer
:
type
:
string
reference_layer
:
type
:
string
cluster_label
:
type
:
string
time_window
:
type
:
string
prediction
:
type
:
integer
src/data-hub/proactive-community-detection-microservice/app/db/dao/__init__.py
View file @
d94b70d7
...
@@ -2,3 +2,4 @@ from db.dao.cluster import Cluster as ClusterDao
...
@@ -2,3 +2,4 @@ from db.dao.cluster import Cluster as ClusterDao
from
db.dao.layer
import
Layer
as
LayerDao
from
db.dao.layer
import
Layer
as
LayerDao
from
db.dao.timeslice
import
TimeSlice
as
TimeSliceDao
from
db.dao.timeslice
import
TimeSlice
as
TimeSliceDao
from
db.dao.layer_pair
import
LayerPair
as
LayerPairDao
from
db.dao.layer_pair
import
LayerPair
as
LayerPairDao
from
db.dao.prediction_result
import
PredictionResult
src/data-hub/proactive-community-detection-microservice/app/db/dao/prediction_result.py
0 → 100644
View file @
d94b70d7
from
typing
import
List
,
Dict
class
PredictionResult
:
def
__init__
(
self
,
use_case
:
str
,
table
:
str
,
method
:
str
,
layer
:
str
,
reference_layer
:
str
,
cluster_id
:
str
,
time_window
:
str
,
prediction
:
int
):
self
.
use_case
=
use_case
self
.
table
=
table
self
.
method
=
method
self
.
layer
=
layer
self
.
reference_layer
=
reference_layer
self
.
cluster_id
=
cluster_id
self
.
time_window
=
time_window
self
.
prediction
=
prediction
@
staticmethod
def
create_from_dict
(
dict_
)
->
'PredictionResult'
:
obj
=
PredictionResult
(
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
)
obj
.
__dict__
.
update
(
dict_
)
return
obj
src/data-hub/proactive-community-detection-microservice/app/db/repository.py
View file @
d94b70d7
...
@@ -23,6 +23,7 @@ class Repository(MongoRepositoryBase):
...
@@ -23,6 +23,7 @@ class Repository(MongoRepositoryBase):
self
.
_layer_pair_collection
=
'layer_pairs'
self
.
_layer_pair_collection
=
'layer_pairs'
self
.
_clusters_collection
=
'clusters'
self
.
_clusters_collection
=
'clusters'
self
.
_time_slice_collection
=
'time_slices'
self
.
_time_slice_collection
=
'time_slices'
self
.
_prediction_result_collection
=
'prediction_results'
def
DROP
(
self
,
confirm
:
bool
=
False
):
def
DROP
(
self
,
confirm
:
bool
=
False
):
...
@@ -120,6 +121,14 @@ class Repository(MongoRepositoryBase):
...
@@ -120,6 +121,14 @@ class Repository(MongoRepositoryBase):
def
get_layer_pairs
(
self
,
use_case
:
str
)
->
List
[
LayerPairDao
]:
def
get_layer_pairs
(
self
,
use_case
:
str
)
->
List
[
LayerPairDao
]:
entries
=
super
()
.
get_entries
(
self
.
_layer_pair_collection
,
selection
=
{
'use_case'
:
use_case
})
entries
=
super
()
.
get_entries
(
self
.
_layer_pair_collection
,
selection
=
{
'use_case'
:
use_case
})
return
[
LayerPairDao
.
create_from_dict
(
e
)
for
e
in
entries
]
return
[
LayerPairDao
.
create_from_dict
(
e
)
for
e
in
entries
]
#endregion
#endregion
#region PredictionResult
def
add_prediction_result
(
self
,
prediction_result
:
PredictionResult
):
super
()
.
insert_entry
(
self
.
_prediction_result_collection
,
prediction_result
.
__dict__
)
def
get_prediction_results
(
self
,
use_case
:
str
)
->
List
[
PredictionResult
]:
entries
=
super
()
.
get_entries
(
self
.
_prediction_result_collection
,
selection
=
{
'use_case'
:
use_case
},
projection
=
{
'_id'
:
0
})
return
[
PredictionResult
.
create_from_dict
(
e
)
for
e
in
entries
]
#endregion
src/data-hub/proactive-community-detection-microservice/app/predict.ipynb
0 → 100644
View file @
d94b70d7
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"use_case = 'community-prediction-youtube-n'\r\n",
"layer_name = 'LikesLayer'"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\r\n",
"from pandas import DataFrame\r\n",
"\r\n",
"df: DataFrame = pd.read_csv(f'data/{use_case}/ml_input/single_context/{layer_name}.csv', index_col=0)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cluster_size</th>\n",
" <th>cluster_variance</th>\n",
" <th>cluster_density</th>\n",
" <th>cluster_import1</th>\n",
" <th>cluster_import2</th>\n",
" <th>cluster_area</th>\n",
" <th>cluster_center_distance</th>\n",
" <th>time_f1</th>\n",
" <th>time_f2</th>\n",
" <th>cluster_size.1</th>\n",
" <th>...</th>\n",
" <th>cluster_size.2</th>\n",
" <th>cluster_variance.2</th>\n",
" <th>cluster_density.2</th>\n",
" <th>cluster_import1.2</th>\n",
" <th>cluster_import2.2</th>\n",
" <th>cluster_area.2</th>\n",
" <th>cluster_center_distance.2</th>\n",
" <th>time_f1.2</th>\n",
" <th>time_f2.2</th>\n",
" <th>evolution_label</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>565819</th>\n",
" <td>4.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>0.000336</td>\n",
" <td>0.000168</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.992709</td>\n",
" <td>0.120537</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.992709</td>\n",
" <td>-0.120537</td>\n",
" <td>-1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>565820</th>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.935016</td>\n",
" <td>-0.354605</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.822984</td>\n",
" <td>-0.568065</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>565821</th>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.970942</td>\n",
" <td>-0.239316</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.885456</td>\n",
" <td>-0.464723</td>\n",
" <td>-1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>565822</th>\n",
" <td>4.0</td>\n",
" <td>1.089725</td>\n",
" <td>0.75</td>\n",
" <td>0.000334</td>\n",
" <td>0.000166</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>0.885456</td>\n",
" <td>-0.464723</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.748511</td>\n",
" <td>-0.663123</td>\n",
" <td>-1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>565823</th>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.748511</td>\n",
" <td>-0.663123</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.663123</td>\n",
" <td>-0.748511</td>\n",
" <td>-1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 28 columns</p>\n",
"</div>"
],
"text/plain": [
" cluster_size cluster_variance cluster_density cluster_import1 \\\n",
"565819 4.0 0.000000 0.00 0.000336 \n",
"565820 0.0 0.000000 0.00 0.000000 \n",
"565821 0.0 0.000000 0.00 0.000000 \n",
"565822 4.0 1.089725 0.75 0.000334 \n",
"565823 0.0 0.000000 0.00 0.000000 \n",
"\n",
" cluster_import2 cluster_area cluster_center_distance time_f1 \\\n",
"565819 0.000168 0.0 0.0 0.992709 \n",
"565820 0.000000 0.0 0.0 0.935016 \n",
"565821 0.000000 0.0 0.0 0.970942 \n",
"565822 0.000166 3.0 6.0 0.885456 \n",
"565823 0.000000 0.0 0.0 0.748511 \n",
"\n",
" time_f2 cluster_size.1 ... cluster_size.2 cluster_variance.2 \\\n",
"565819 0.120537 1.0 ... 0.0 0.0 \n",
"565820 -0.354605 1.0 ... 0.0 0.0 \n",
"565821 -0.239316 0.0 ... 0.0 0.0 \n",
"565822 -0.464723 1.0 ... 0.0 0.0 \n",
"565823 -0.663123 1.0 ... 0.0 0.0 \n",
"\n",
" cluster_density.2 cluster_import1.2 cluster_import2.2 \\\n",
"565819 0.0 0.0 0.0 \n",
"565820 0.0 0.0 0.0 \n",
"565821 0.0 0.0 0.0 \n",
"565822 0.0 0.0 0.0 \n",
"565823 0.0 0.0 0.0 \n",
"\n",
" cluster_area.2 cluster_center_distance.2 time_f1.2 time_f2.2 \\\n",
"565819 0.0 0.0 0.992709 -0.120537 \n",
"565820 0.0 0.0 0.822984 -0.568065 \n",
"565821 0.0 0.0 0.885456 -0.464723 \n",
"565822 0.0 0.0 0.748511 -0.663123 \n",
"565823 0.0 0.0 0.663123 -0.748511 \n",
"\n",
" evolution_label \n",
"565819 -1.0 \n",
"565820 4.0 \n",
"565821 -1.0 \n",
"565822 -1.0 \n",
"565823 -1.0 \n",
"\n",
"[5 rows x 28 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import json\r\n",
"from entities import Cluster\r\n",
"import collections\r\n",
"import numpy as np\r\n",
"from typing import Iterable, Tuple"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"N=3"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"path_in = f\"data/{use_case}/cluster_metrics/{layer_name}.json\"\r\n",
"with open(path_in, 'r') as file:\r\n",
" data = [Cluster.create_from_dict(cl_d) for cl_d in json.loads(file.read())]\r\n",
"\r\n",
"data.sort(key=lambda cl: (eval(cl.cluster_id), eval(cl.time_window_id)))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'time_window_id': '(2018, 24)', 'cluster_id': '20207', 'size': 0, 'std_dev': 0, 'scarcity': 0, 'importance1': 0, 'importance2': 0, 'range_': 0.0, 'center': [0, 0], 'global_center_distance': 0}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[-1]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"cluster_map = {}\r\n",
"\r\n",
"# for cluster in {c.cluster_id for c in data}:\r\n",
"# data_map[cluster] = [c for c in data if c.cluster_id == cluster]\r\n",
"\r\n",
"for cluster in data:\r\n",
" id_ = cluster.cluster_id\r\n",
"\r\n",
" if id_ not in cluster_map:\r\n",
" cluster_map[id_] = []\r\n",
"\r\n",
" cluster_map[id_].append(cluster)\r\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"{c.cluster_id for c in data} == cluster_map.keys()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"20208"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(cluster_map.keys())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\r\n",
"\r\n",
"def get_cyclic_time_feature(time: int, max_time_value: int = 52) -> Tuple[float, float]:\r\n",
" return (np.sin(2*np.pi*time/max_time_value),\r\n",
" np.cos(2*np.pi*time/max_time_value))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"from typing import Tuple\r\n",
"\r\n",
"def get_metrics(cur_cluster: Cluster) -> Tuple:\r\n",
" return (cur_cluster.size, cur_cluster.std_dev, cur_cluster.scarcity, cur_cluster.importance1, cur_cluster.importance2, cur_cluster.range_, cur_cluster.global_center_distance, get_cyclic_time_feature(cur_cluster.get_time_info()))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"import pickle \r\n",
"\r\n",
"method = 'single_context'\r\n",
"\r\n",
"with open(f'data/{use_case}/ml_output/{method}/{layer_name}.model', 'rb') as file:\r\n",
" svc = pickle.load(file)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def flatten_metrics_datapoint(datapoint: list) -> Tuple['X', np.array]:\r\n",
" '''\r\n",
" Flattens a single metrics data point in the form:\r\n",
" [(cluster_size, cluster_variance, cluster_density, cluster_import1, cluster_import2, cluster_range, cluster_center, (time_f1, time_f2))^N, evolution_label]\r\n",
" to:\r\n",
" (X, y: np.array)\r\n",
" '''\r\n",
" flat_list = []\r\n",
" for entry in datapoint: # for all x\r\n",
" flat_list.extend(entry[:-1]) # add all number features except the time tuple\r\n",
" flat_list.extend(entry[-1]) # add time tuple\r\n",
"\r\n",
" # flat_list.append(datapoint[-1]) # y\r\n",
" return np.asarray(flat_list)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"def increase_time_window(time_window_id: str):\r\n",
" tuple_ = eval(time_window_id)\r\n",
" \r\n",
" if tuple_[1] == 52:\r\n",
" # 1st week next year\r\n",
" return (tuple_[0]+1 , 1)\r\n",
" else:\r\n",
" # next week\r\n",
" return str((tuple_[0], tuple_[1]+1))\r\n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"from entities import PredictionResult\r\n",
"\r\n",
"prediction_results = []\r\n",
"\r\n",
"for cluster_id, time_windows in cluster_map.items():\r\n",
" v = [get_metrics(c) for c in time_windows[-N:]] # metrics for last N time windows\r\n",
" v_flattened = flatten_metrics_datapoint(v)\r\n",
" v_flattened = v_flattened.reshape(1, v_flattened.shape[0]) # reshape for ML with only 1 pred value\r\n",
" res = PredictionResult(use_case, use_case, method, layer_name, None, cluster_id, increase_time_window(time_windows[-1].time_window_id), svc.predict(v_flattened)[0])\r\n",
" prediction_results.append(res)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '0',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 2.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '1',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 2.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '2',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 3.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '3',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 2.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '4',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 2.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '5',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 2.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '6',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 2.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '7',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 3.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '8',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 2.0},\n",
" {'use_case': 'community-prediction-youtube-n',\n",
" 'table': 'community-prediction-youtube-n',\n",
" 'method': 'single_context',\n",
" 'layer': 'LikesLayer',\n",
" 'reference_layer': None,\n",
" 'cluster_id': '9',\n",
" 'time_window': '(2018, 25)',\n",
" 'prediction': 2.0}]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[r.__dict__ for r in prediction_results[:10]]"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0\n"
]
}
],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "6f758d9e9b2866087a1d464f700475727f47c3870deef6e7815ca445f120e6ad"
},
"kernelspec": {
"display_name": "Python 3.7.6 64-bit ('venv': venv)",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
\ No newline at end of file
src/data-hub/proactive-community-detection-microservice/app/processing/__init__.py
View file @
d94b70d7
from
processing.ClusterMetricsCalculator
import
ClusterMetricsCalculator
,
ClusterMetricsCalculator1D
,
ClusterMetricsCalculator2D
,
ClusterMetricsCalculatorFactory
from
processing.ClusterMetricsCalculator
import
ClusterMetricsCalculator
,
ClusterMetricsCalculator1D
,
ClusterMetricsCalculator2D
,
ClusterMetricsCalculatorFactory
from
processing.DataSampler
import
DataSampler
from
processing.DataSampler
import
DataSampler
from
processing.fetching
import
fetching
\ No newline at end of file
src/data-hub/proactive-community-detection-microservice/app/processing/ml/predict_single_context.py
0 → 100644
View file @
d94b70d7
from
processing.data_prep.metrics_base
import
get_cyclic_time_feature
N
=
3
# Currently N is fixed to 3
method
=
'single_context'
####################
import
pandas
as
pd
from
pandas
import
DataFrame
#####################
import
json
from
entities
import
Cluster
import
collections
import
numpy
as
np
from
typing
import
Iterable
,
Tuple
######################
from
typing
import
Dict
from
typing
import
Tuple
def
get_metrics
(
cur_cluster
:
Cluster
)
->
Tuple
:
return
(
cur_cluster
.
size
,
cur_cluster
.
std_dev
,
cur_cluster
.
scarcity
,
cur_cluster
.
importance1
,
cur_cluster
.
importance2
,
cur_cluster
.
range_
,
cur_cluster
.
global_center_distance
,
get_cyclic_time_feature
(
cur_cluster
.
get_time_info
()))
####################
import
pickle
#####################
def
flatten_metrics_datapoint
(
datapoint
:
list
)
->
Tuple
[
'X'
,
np
.
array
]:
'''
Flattens a single metrics data point in the form:
[(cluster_size, cluster_variance, cluster_density, cluster_import1, cluster_import2, cluster_range, cluster_center, (time_f1, time_f2))^N]
to:
(X)
'''
flat_list
=
[]
for
entry
in
datapoint
:
# for all x
flat_list
.
extend
(
entry
[:
-
1
])
# add all number features except the time tuple
flat_list
.
extend
(
entry
[
-
1
])
# add time tuple
return
np
.
asarray
(
flat_list
)
######################
def
increase_time_window
(
time_window_id
:
str
):
tuple_
=
eval
(
time_window_id
)
if
tuple_
[
1
]
==
52
:
# 1st week next year
return
(
tuple_
[
0
]
+
1
,
1
)
else
:
# next week
return
str
((
tuple_
[
0
],
tuple_
[
1
]
+
1
))
#########################
from
db.repository
import
Repository
from
db.dao
import
PredictionResult
repo
=
Repository
()
def
run_prediction
(
use_case
:
str
):
for
layer
in
repo
.
get_layers_for_use_case
(
use_case
):
layer_name
=
layer
.
layer_name
################
df
:
DataFrame
=
pd
.
read_csv
(
f
'data/{use_case}/ml_input/single_context/{layer_name}.csv'
,
index_col
=
0
)
#################
path_in
=
f
"data/{use_case}/cluster_metrics/{layer_name}.json"
with
open
(
path_in
,
'r'
)
as
file
:
data
=
[
Cluster
.
create_from_dict
(
cl_d
)
for
cl_d
in
json
.
loads
(
file
.
read
())]
data
.
sort
(
key
=
lambda
cl
:
(
eval
(
cl
.
cluster_id
),
eval
(
cl
.
time_window_id
)))
#####################
cluster_map
:
Dict
[
'cluster_id'
,
'time_windows'
]
=
{}
for
cluster
in
data
:
id_
=
cluster
.
cluster_id
if
id_
not
in
cluster_map
:
cluster_map
[
id_
]
=
[]
cluster_map
[
id_
]
.
append
(
cluster
)
####################
with
open
(
f
'data/{use_case}/ml_output/{method}/{layer_name}.model'
,
'rb'
)
as
file
:
svc
=
pickle
.
load
(
file
)
#####################
for
cluster_id
,
time_windows
in
cluster_map
.
items
():
v
=
[
get_metrics
(
c
)
for
c
in
time_windows
[
-
N
:]]
# metrics for last N time windows
v_flattened
=
flatten_metrics_datapoint
(
v
)
v_flattened
=
v_flattened
.
reshape
(
1
,
v_flattened
.
shape
[
0
])
# reshape for ML with only 1 pred value
res
=
PredictionResult
(
use_case
,
use_case
,
method
,
layer_name
,
None
,
cluster_id
,
increase_time_window
(
time_windows
[
-
1
]
.
time_window_id
),
svc
.
predict
(
v_flattened
)[
0
])
repo
.
add_prediction_result
(
res
)
#####################
src/data-hub/proactive-community-detection-microservice/app/routes/predictions.py
0 → 100644
View file @
d94b70d7
from
flask
import
request
,
Response
from
db.repository
import
Repository
from
db.dao
import
PredictionResult
repo
=
Repository
()
def
get
(
use_case
,
table
,
layer_name
):
res
=
repo
.
get_prediction_results
(
use_case
)
if
res
is
None
or
len
(
res
)
==
0
:
return
Response
(
status
=
404
)
else
:
return
[
c
.
__dict__
for
c
in
res
]
src/data-hub/proactive-community-detection-microservice/app/run_dataprep.py
View file @
d94b70d7
...
@@ -7,4 +7,6 @@ if os.path.exists(modules_path):
...
@@ -7,4 +7,6 @@ if os.path.exists(modules_path):
from
processing.data_prep.main
import
run
from
processing.data_prep.main
import
run
run
(
use_case
=
'community-prediction-youtube-n'
)
if
__name__
==
'__main__'
:
\ No newline at end of file
'''Creates data/raw files'''
run
(
use_case
=
'community-prediction-youtube-n'
)
\ No newline at end of file
src/data-hub/proactive-community-detection-microservice/app/run_layerpair_upload.py
View file @
d94b70d7
...
@@ -68,6 +68,8 @@ def upload_layerpair(layerpair:LayerPairDao):
...
@@ -68,6 +68,8 @@ def upload_layerpair(layerpair:LayerPairDao):
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
'''Uploads the cross-context dependencies for all use-cases.'''
assert
False
,
'replace with true to upload now'
assert
False
,
'replace with true to upload now'
for
lp
in
get_youtube_dependencies
():
for
lp
in
get_youtube_dependencies
():
...
...
src/data-hub/proactive-community-detection-microservice/app/run_node_fetching.py
View file @
d94b70d7
...
@@ -11,4 +11,6 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
...
@@ -11,4 +11,6 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from
processing.fetching
import
fetching
from
processing.fetching
import
fetching
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
'''Fetches all required data from business-logic and role-stage-discovery.'''
fetching
.
fetch
(
selected_use_cases
=
[
'community-prediction-youtube-n'
],
selected_use_case_tables
=
None
)
fetching
.
fetch
(
selected_use_cases
=
[
'community-prediction-youtube-n'
],
selected_use_case_tables
=
None
)
\ No newline at end of file
src/data-hub/proactive-community-detection-microservice/app/run_prediction.py
0 → 100644
View file @
d94b70d7
import
sys
import
os
modules_path
=
'../../../modules/'
if
os
.
path
.
exists
(
modules_path
):
sys
.
path
.
insert
(
1
,
modules_path
)
from
processing.ml.predict_single_context
import
run_prediction
as
run_single_prediction
# from processing.ml.predict_cross_context import run_prediction as run_cross_prediction
if
__name__
==
'__main__'
:
'''Executes the predictions.'''
use_case
=
'community-prediction-youtube-n'
run_single_prediction
(
use_case
)
# run_cross_prediction(use_case)
\ No newline at end of file
src/data-hub/proactive-community-detection-microservice/app/run_training.py
View file @
d94b70d7
...
@@ -5,10 +5,11 @@ if os.path.exists(modules_path):
...
@@ -5,10 +5,11 @@ if os.path.exists(modules_path):
sys
.
path
.
insert
(
1
,
modules_path
)
sys
.
path
.
insert
(
1
,
modules_path
)
from
processing.ml.train_single_context
import
run_single_training
from
processing.ml.train_single_context
import
run_
training
as
run_
single_training
from
processing.ml.train_cross_context
import
run_cross_training
from
processing.ml.train_cross_context
import
run_
training
as
run_
cross_training
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
'''Executes the training.'''
use_case
=
'community-prediction-youtube-n'
use_case
=
'community-prediction-youtube-n'
run_single_training
(
use_case
)
run_single_training
(
use_case
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment