Commit ce3886b2 authored by Alexander Lercher's avatar Alexander Lercher

Major code cleanup

Removed all old and unused source code, including source code from old approaches
parent a58cc362
layers:
user:
properties:
starting-point:
properties:
- Latitude_StartingPoint
- Longitude_StartingPoint
\ No newline at end of file
......@@ -29,59 +29,7 @@ paths:
200:
description: "Successful echo of request data"
# Locations
# TODO remove
/locations:
post:
operationId: "routes.location.post"
tags:
- "Locations"
summary: "Add new location data"
parameters:
- in: body
name: "Location"
description: "The location data to be added"
required: true
schema:
$ref: "#/definitions/Location"
responses:
201:
description: "Successful operation"
400:
description: "Invalid input"
get:
operationId: "routes.location.get"
tags:
- "Locations"
summary: "Get location data"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/LocationCollection"
/location-collections:
post:
operationId: "routes.location.post_many"
tags:
- "Locations"
summary: "Add new location data collection"
parameters:
- in: body
name: "Locations"
description: "The location data collection to be added"
required: true
schema:
$ref: "#/definitions/LocationCollection"
responses:
201:
description: "Successful operation"
400:
description: "Invalid input"
#region Layers
/layers:
post:
operationId: "routes.layers.post"
......@@ -176,7 +124,7 @@ paths:
/layers/{name}/clusters:
get:
operationId: "routes.clustersets.get_by_name2"
operationId: "routes.clustersets.get_by_name"
tags:
- "Layers"
summary: "Get all clusters for the layer"
......@@ -196,7 +144,7 @@ paths:
/layers/{name}/timeslices:
get:
operationId: "routes.timeslices.get_by_name2"
operationId: "routes.timeslices.get_by_name"
tags:
- "Layers"
summary: "Get all timeslices for the layer"
......@@ -213,162 +161,10 @@ paths:
$ref: "#/definitions/TimeSliceCollection"
404:
description: "Layer not found"
#endregion
# Clusters
# TODO remove partially
/location-clusters:
get:
operationId: "routes.cluster.get_locations"
tags:
- "Clusters"
summary: "Get user communities clustered by location"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/LocationClusterCollection"
# /clusters/cluster.png:
# get:
# operationId: "routes.cluster.get_image"
# tags:
# - "Clusters"
# summary: "Get user communities per date per hour as image"
# parameters: []
# produces:
# - "image/png"
# responses:
# 200:
# description: "Successful operation"
/time-clusters:
get:
operationId: "routes.cluster.get_times"
tags:
- "Clusters"
summary: "Get user communities clustered by time per hour"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/TimeClusterCollection"
# /agi/clusters/cluster.png:
# get:
# operationId: "routes.agi_cluster.get_image"
# tags:
# - "Clusters"
# summary: "Get user communities per date per hour from agi data as image"
# parameters: []
# produces:
# - "image/png"
# responses:
# 200:
# description: "Successful operation"
# TODO remove
/clustersets:
get:
operationId: "routes.clustersets.get"
tags:
- "Clusters"
summary: "Get clustersets for all layers"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/ClusterSetCollection"
/clustersets/names:
get:
operationId: "routes.clustersets.get_names"
tags:
- "Clusters"
summary: "Get clusterset names for all layers"
parameters: []
responses:
200:
description: "Successful operation"
schema:
type: array
items:
type: string
/clustersets/{layername}:
get:
operationId: "routes.clustersets.get_by_name"
tags:
- "Clusters"
summary: "Get clusterset for layer-name"
parameters:
- name: "layername"
in: "path"
description: "Name of the layer to return the clusterset for"
required: true
type: "string"
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/ClusterSet"
404:
description: "Clusterset not found"
# TODO remove
/user-cluster-graphs:
get:
operationId: "routes.user_cluster.get"
tags:
- "User Graphs"
summary: "Get user graphs per layer per cluster"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/UserClusterGraphCollection"
# Time slices
/timeslices:
get:
operationId: "routes.timeslices.get"
tags:
- "Time Slices"
summary: "Get all time slices based on individual layers containing clusters with nodes for that time"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/TimeSliceCollection"
#endregion
/timeslices/{layername}:
get:
operationId: "routes.timeslices.get_by_name"
tags:
- "Time Slices"
summary: "Get all time slices for one layer"
parameters:
- name: "layername"
in: "path"
description: "Name of the layer to return the time slices for"
required: true
type: "string"
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/TimeSliceCollection"
404:
description: "No time slices found for layername"
# Function Calls
#region Function Calls
/rfc/run:
post:
operationId: "routes.functions.run_agi_clustering_and_graph_creation"
......@@ -380,29 +176,10 @@ paths:
204:
description: "Successful operation"
#endregion
definitions:
Location:
type: "object"
properties:
id:
type: string
user:
type: "string"
latitude:
type: "number"
format: float
longitude:
type: "number"
format: float
timestamp:
type: "number"
LocationCollection:
type: array
items:
$ref: "#/definitions/Location"
definitions:
Cluster:
type: object
properties:
......@@ -415,82 +192,16 @@ definitions:
items:
$ref: "#/definitions/Node"
ClusterCollection:
type: array
items:
$ref: "#/definitions/Cluster"
LocationCluster:
type: object
properties:
id:
type: string
cluster_label:
type: number
nodes:
type: array
items:
$ref: "#/definitions/Location"
LocationClusterCollection:
type: array
items:
$ref: "#/definitions/LocationCluster"
TimeCluster:
type: object
properties:
id:
type: string
date:
type: string
hour:
type: number
cluster_label:
type: number
nodes:
type: array
items:
$ref: "#/definitions/Location"
TimeClusterCollection:
type: array
items:
$ref: "#/definitions/TimeCluster"
UserClusterGraph:
type: object
properties:
nodes:
type: array
items:
type: string
edges:
type: array
items:
type: array
items:
type: string
example:
- user1
- user2
- weight
UserClusterGraphCollection:
type: array
items:
$ref: "#/definitions/UserClusterGraph"
Layer-UpperCase:
type: object
properties:
LayerName:
type: string
# Nodes:
# type: array
# items:
# type: object
Properties:
type: array
items:
......@@ -501,10 +212,6 @@ definitions:
properties:
layer_name:
type: string
# nodes:
# type: array
# items:
# type: object
properties:
type: array
items:
......@@ -531,21 +238,6 @@ definitions:
items:
$ref: "#/definitions/Node"
ClusterSet:
type: object
properties:
layer_name:
type: string
clusters:
type: array
items:
$ref: "#/definitions/Cluster"
ClusterSetCollection:
type: array
items:
$ref: "#/definitions/ClusterSet"
TimeSlice:
type: object
properties:
......
import sys
import os
modules_paths = ['../../../modules/']
for modules_path in modules_paths:
if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
from typing import List, Tuple, Any
from networkx import Graph
from db.entities import LocationCluster, UserClusterGraph, Cluster
from db.repository import Repository
from processing.user_graph_generator import UserGraphGenerator
repo = Repository()
def get_edges_with_weights(g: Graph) -> List[Tuple[Any, Any, int]]:
res = []
for e in g.edges:
res.append((*e, g.edges[e]['weight']))
return res
def create_graphs_for_location_clusters():
graphs_for_clusters = []
ug = UserGraphGenerator()
clusters: Cluster = repo.get_location_clusters()
for cluster in clusters:
user_ids = [n['user'] for n in cluster.nodes]
graph: Graph = ug.create_graph_from_nodes(user_ids)
vertices = list(graph.nodes)
edges = get_edges_with_weights(graph)
cluster_graph = UserClusterGraph(vertices, edges)
graphs_for_clusters.append(cluster_graph)
store_graphs(graphs_for_clusters)
def store_graphs(graphs: List):
for g in graphs:
repo.add_user_cluster_graph(g)
if __name__ == "__main__":
create_graphs_for_location_clusters()
import json
from typing import List, Dict
import hashlib
class AgiRepository:
def getLocations(self) -> List[Dict]:
locations = []
travels = self.readDataFromFile()
# only take started travels
travels = [t for t in travels if t['status'] >= 2]
for travel in travels:
num_complete_travels = min(len(travel['startedBy']), len(travel['users']))
for i in range(num_complete_travels):
cur_location = travel['startedBy'][i]
cur_user = travel['users'][i]
locations.append(
self.location(f'{travel["id"]}-{cur_location["moment"]}',
cur_location['coordinate']['latitude'],
cur_location['coordinate']['longitude'],
cur_location['moment'],
# todo user in travel startedBy not available from dataset - currently using user list
hashlib.sha1(cur_user['userId'].encode()).hexdigest() # not showing generated username
))
return locations
def getLocationsBasedOnNewDataSchema(self):
'''Creates the new data generic schema to be used beginning on 24.03.2020'''
data = {
'layer_name': 'Destination',
'nodes': self.getLocations(),
'properties': ['latitude', 'longitude']
}
return data
def getTimesBasedOnNewDataSchema(self):
'''Creates the new data generic schema to be used beginning on 24.03.2020'''
data = {
'layer_name': 'Starting_Time',
'nodes': self.getLocations(),
'properties': ['timestamp']
}
return data
def readDataFromFile(self) -> List[Dict]:
with open('./db/agi/travels.json', 'r') as f_travels:
travels = json.loads(f_travels.read())
return travels
def location(self, id_, lat, long_, timestamp, username) -> dict:
return {
"id": id_,
'latitude': lat,
'longitude': long_,
"timestamp": timestamp,
"user": username
}
This source diff could not be displayed because it is too large. You can view the blob instead.
from db.entities.location import Location
from db.entities.popular_location import PopularLocation
from db.entities.cluster import Cluster, LocationCluster, TimeCluster
from db.entities.cluster import Cluster
from db.entities.clusterset import ClusterSet
from db.entities.user_cluster_graph import UserClusterGraph
from db.entities.layer import Layer
......
......@@ -39,67 +39,3 @@ class Cluster:
def __str__(self):
return f"Cluster({self.__repr__()})"
class LocationCluster(Cluster):
def __init__(self, cluster_label: int = None, nodes: List = None,
location_dict: Dict = None, from_db=False):
super().__init__(cluster_label, nodes)
self.id = f'{self.cluster_label}'
if location_dict is not None:
self.from_serializable_dict(location_dict, from_db)
def to_serializable_dict(self, for_db=False) -> Dict:
return {
"id": self.id,
"cluster_label": self.cluster_label,
"nodes": json.dumps(self.nodes) if for_db else self.nodes
}
def from_serializable_dict(self, location_dict: Dict, from_db=False):
self.id = location_dict["id"]
self.cluster_label = location_dict["cluster_label"]
self.nodes = json.loads(location_dict["nodes"]) \
if from_db else location_dict["nodes"]
def __repr__(self):
return json.dumps(self.to_serializable_dict())
def __str__(self):
return f"LocationCluster({self.__repr__()})"
class TimeCluster(Cluster):
def __init__(self, date: date = None, hour: int = None, cluster_label: int = None, nodes: List = None,
time_dict: Dict = None, from_db=False):
super().__init__(cluster_label, nodes)
self.date = date
self.hour = hour
self.id = f'{self.date}-{self.hour}-{self.cluster_label}'
if time_dict is not None:
self.from_serializable_dict(time_dict, from_db)
def to_serializable_dict(self, for_db=False) -> Dict:
return {
"id": self.id,
"date": str(self.date),
"hour": self.hour,
"cluster_label": self.cluster_label,
"nodes": json.dumps(self.nodes) if for_db else self.nodes
}
def from_serializable_dict(self, time_dict: Dict, from_db=False):
self.id = time_dict["id"]
self.date = datetime.strptime(time_dict["date"], '%Y-%m-%d').date()
self.hour = time_dict["hour"]
self.cluster_label = time_dict["cluster_label"]
self.nodes = json.loads(time_dict["nodes"]) \
if from_db else time_dict["nodes"]
def __repr__(self):
return json.dumps(self.to_serializable_dict(True))
def __str__(self):
return f"TimeCluster({self.__repr__()})"
......@@ -3,8 +3,6 @@ import network_constants as netconst
from database.MongoRepositoryBase import MongoRepositoryBase
import json
from db.agi.agi_repository import AgiRepository
from db.entities import *
from typing import List
......@@ -13,61 +11,14 @@ class Repository(MongoRepositoryBase):
'''This is a repository for MongoDb.'''
def __init__(self):
super().__init__(netconst.COMMUNITY_DETECTION_DB_HOSTNAME,
netconst.COMMUNITY_DETECTION_DB_PORT,
'communityDetectionDb')
self._location_collection = 'location'
self._location_cluster_collection = 'location_cluster'
self._time_cluster_collection = 'time_cluster'
self._user_cluster_graph_collection = 'user_cluster_graph'
self._layer_collection = 'layer-new'
self._layer_nodes_collection = 'layer_nodes-new'
self._clusterset_collection = 'cluster_set-new'
self._time_slice_collection = 'time_slice-new'
self.agi_repo = AgiRepository()
#region Location
def add_location(self, location: Location):
super().insert_entry(self._location_collection, location.to_serializable_dict())
def get_locations(self) -> List[Location]:
locations = super().get_entries(self._location_collection)
return [Location(l) for l in locations]
def get_agi_locations(self) -> List[Location]:
agi_locations = self.agi_repo.getLocations()
return [Location(agi_loc) for agi_loc in agi_locations]
#endregion
#region Specific Clusters
def add_location_cluster(self, cluster: LocationCluster):
super().insert_entry(self._location_cluster_collection,
cluster.to_serializable_dict(for_db=True))
super().__init__(netconst.ROLESTAGE_DISCOVERY_DB_HOSTNAME,
netconst.ROLESTAGE_DISCOVERY_DB_PORT,
'roleStageDb')
def get_location_clusters(self) -> List[LocationCluster]:
clusters = super().get_entries(self._location_cluster_collection)
return [LocationCluster(location_dict=c, from_db=True) for c in clusters]
def add_time_cluster(self, cluster: TimeCluster):
super().insert_entry(self._time_cluster_collection,
cluster.to_serializable_dict(for_db=True))
def get_time_clusters(self) -> List[TimeCluster]:
clusters = super().get_entries(self._time_cluster_collection)
return [TimeCluster(time_dict=c, from_db=True) for c in clusters]
#endregion
#region Cluster Graph
def add_user_cluster_graph(self, user_graph: UserClusterGraph):
super().insert_entry(self._user_cluster_graph_collection,
user_graph.to_serializable_dict(for_db=True))
def get_user_cluster_graphs(self) -> List[UserClusterGraph]:
user_graphs = super().get_entries(self._user_cluster_graph_collection)
return [UserClusterGraph(dict_=u, from_db=True) for u in user_graphs]
#endregion
self._layer_collection = 'layers'
self._layer_nodes_collection = 'layer_nodes'
self._clusters_collection = 'clusters'
self._time_slice_collection = 'time_slices'
#region Layers
def add_layer(self, layer: Layer):
......@@ -77,10 +28,6 @@ class Repository(MongoRepositoryBase):
entries = super().get_entries(self._layer_collection)
return [Layer(e) for e in entries]
def get_layer_names(self) -> List[str]:
entries = super().get_entries(self._layer_collection, projection={'layer_name': 1})
return [e['layer_name'] for e in entries]
def get_layer(self, layer_name) -> Layer:
entries = super().get_entries(self._layer_collection, selection={'layer_name': layer_name})
entries = [Layer(e) for e in entries]
......@@ -103,38 +50,13 @@ class Repository(MongoRepositoryBase):
#endregion
#region ClusterSet
# TODO cleanup
def add_clusterset(self, cluster_set: ClusterSet):
super().insert_entry(self._clusterset_collection, cluster_set.to_serializable_dict())
def get_clustersets(self) -> List[ClusterSet]:
'''Returns all clustersets.'''
entries = super().get_entries(self._clusterset_collection)
return [ClusterSet(cluster_set_dict=e) for e in entries]
def get_clusterset_names(self) -> List[str]:
'''Returns the names of all clustersets.'''
entries = super().get_entries(self._clusterset_collection, projection={'layer_name': 1})
return [e['layer_name'] for e in entries]
def get_clusterset(self, layer_name) -> ClusterSet:
'''Returns a single clusterset with the given name or None otherwise.'''
entries = super().get_entries(self._clusterset_collection, selection={'layer_name': layer_name})
entries = [ClusterSet(cluster_set_dict=e) for e in entries]
if entries is not None and len(entries) > 0:
return entries[0]
else:
return None
#region Clusters
def add_clusters(self, clusters: List[Cluster]):
cluster_dicts = [c.to_serializable_dict(for_db=True) for c in clusters]
super().insert_many(self._clusterset_collection, cluster_dicts)
super().insert_many(self._clusters_collection, cluster_dicts)
def get_clusters_for_layer(self, layer_name: str) -> List[Cluster]:
entries = super().get_entries(self._clusterset_collection, selection={'layer_name': layer_name}, projection={'_id': 0})
entries = super().get_entries(self._clusters_collection, selection={'layer_name': layer_name}, projection={'_id': 0})
return [Cluster(cluster_dict=e, from_db=True) for e in entries]
#endregion
......@@ -155,4 +77,5 @@ class Repository(MongoRepositoryBase):
def remove_all_time_slices(self):
super().drop_collection(self._time_slice_collection)
#endregion
\ No newline at end of file
import sys
import os
modules_path = '../../../modules/'
if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
from db.repository import Repository
def insert_locations():
repo = Repository()
locs = repo.get_agi_locations()
for l in locs:
repo.add_location(l)
if __name__ == "__main__":
insert_locations()
......@@ -3,7 +3,6 @@ import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import OPTICS
from typing import List, Dict, Any, TypeVar
from deprecated import deprecated
T = TypeVar('T')
ClusterGroup = Dict[Any, List[Dict]]
......@@ -16,63 +15,20 @@ class Clusterer:
:param epsilon: Eps used in OPTICS
:param min_points: MinPts used in OPTICS
'''
def __init__(self, epsilon=11, min_points=5):
self.epsilon = epsilon
def __init__(self, min_points=5):
self.min_points = min_points
def draw_locations(self, locations:List, labels:List=None) -> plt.Figure:
if locations is None or len(locations) == 0:
return self._draw_locations()
if labels is None or len(locations) != len(labels):
labels = self.create_labels(locations)
return self._draw_locations(
locations = self.extract_location_data(locations),
partition_info = labels
)
def _draw_locations(self, locations:np.ndarray=None, centroids:np.ndarray=None, partition_info:List=None) -> plt.Figure:
fig = plt.Figure()
axis = fig.add_subplot(1, 1, 1)
if locations is not None:
colors = plt.cm.rainbow(np.linspace(0, 1, len(locations)))
if partition_info is not None:
distinct_colors = plt.cm.rainbow(np.linspace(0, 1, len(set(partition_info))))
colors = [distinct_colors[pi] for pi in partition_info]
# draw locations with random colors
axis.scatter(locations[:,0],
locations[:,1],
c=colors)
if centroids is not None:
# draw black centroids
axis.scatter(centroids[:,0], centroids[:,1], c='k', marker='x', s=80)
return fig
def create_labels(self, features:np.ndarray) -> List[int]:
'''Creates labels for the items based on DBSCAN.'''
'''Creates labels for the items based on OPTICS.'''
if features is None or len(features) == 0:
return features # trash in trash out
dbsc = OPTICS(min_samples=self.min_points) # DBSCAN(eps = self.epsilon, min_samples = self.min_points)
dbsc = OPTICS(min_samples=self.min_points)
dbsc = dbsc.fit(features)
labels = dbsc.labels_
return labels.tolist()
@deprecated(reason="Use generic version instead")
def extract_location_features(self, locations: List[dict]) -> np.ndarray:
return np.asarray([(float(l['latitude']), float(l['longitude'])) for l in locations])
@deprecated(reason="Use generic version instead")
def extract_time_features(self, times: List[Dict]) -> np.ndarray:
return np.asarray([[float(t['timestamp'])] for t in times])
def _extract_features(self, dataset: List[Dict], features:List[str]) -> np.ndarray:
'''Extracts the feature values from the dataset into a np array with same order as original dataset.'''
extracted_features = []
......@@ -104,30 +60,6 @@ class Clusterer:
return clusters
@deprecated(reason="Use generic version instead")
def cluster_locations(self, locations:List[Dict]) -> ClusterGroup:
'''Returns a dictionary with identified clusters and their locations copied from the input'''
if locations is None or len(locations) == 0:
# raise Exception("locations has to contain something")
return {}
features = self.extract_location_features(locations)
labels = self.create_labels(features)
self.label_dataset(locations, labels)
return self.group_by_clusters(locations, labels)
@deprecated(reason="Use generic version instead")
def cluster_times(self, times:List[Dict]) -> ClusterGroup:
'''Returns a dictionary with identified clusters and their times copied from the input'''
features = self.extract_time_features(times)
labels = self.create_labels(features)
self.label_dataset(times, labels)
return self.group_by_clusters(times, labels)
def cluster_dataset(self, dataset:List[Dict], features:List[str]) -> ClusterGroup:
'''
Returns the identified clusters containing a subset of nodes from the dataset.
......
import yaml
from typing import Generator
from pathlib import Path
### init logging ###
import logging
LOG_FORMAT = (
'%(levelname) -5s %(asctime)s %(name)s:%(funcName) -35s %(lineno) -5d: %(message)s')
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
LOGGER = logging.getLogger(__name__)
PROJECT_ROOT = Path(__file__).parent.parent.parent
class ClusteringConfig:
'''Contains the configuration for the clustering algorithm defined in configs/clustering.yaml.'''
config_path = f'{PROJECT_ROOT}/configs/clustering.yaml'
config: dict = None
def __init__(self):
self.config = self._load_config()
def _load_config(self) -> dict:
'''Loads the whole configuration from file.'''
config = None
with open(self.config_path, 'r') as stream:
try:
config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
LOGGER.error(exc)
config = {}
return config
def get_config(self):
return self.config
def get_layer_configs(self) -> Generator[dict, None, None]:
"""
Returns a generator for the individual layer configs.
Layer configs are dicts including a layer-name.
"""
for key, layer in self.config['layers'].items():
layer['layer-name'] = key
yield layer
import itertools
from typing import List, Dict, Tuple, Any
from networkx import Graph
class UserGraphGenerator:
def __init__(self):
pass
def count_edges(self, nodes: List) -> Dict[Tuple, int]:
edge_counts = {}
coms = itertools.combinations(nodes, 2)
for first, second in coms:
if first == second: # dont process reflexive connections
continue
if (first, second) in edge_counts:
edge_counts[first, second] += 1
else:
edge_counts[first, second] = 1
return edge_counts
def create_edges_with_weights(self, edge_counts: Dict[Tuple[Any, Any], int]) -> List[Tuple[Any, Any, Dict]]:
edges = []
for (key1, key2), value in edge_counts.items():
edge = (key1, key2, {'weight': value})
edges.append(edge)
return edges
def create_fully_connected_edges_for_nodes(self, nodes: List) -> List[Tuple[Any, Any, Dict]]:
return self.create_edges_with_weights(self.count_edges(nodes))
def create_graph_from_nodes(self, nodes: List) -> Graph:
'''Creates a networkx.Graph with distinct nodes and weighted edges between these nodes'''
g = Graph()
g.add_nodes_from(nodes)
g.add_edges_from(self.create_fully_connected_edges_for_nodes(nodes))
return g
import io
from flask import request, Response
from db.repository import Repository
from processing.clustering.clusterer import Clusterer
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
repo = Repository()
clusterer = Clusterer()
def get_locations():
clusters = repo.get_location_clusters()
return [c.to_serializable_dict() for c in clusters]
def get_times():
clusters = repo.get_time_clusters()
return [c.to_serializable_dict() for c in clusters]
def get_image_1():
return Response(status=501)
# todo
locations = repo.getLocations()
fig = clusterer.draw_locations(locations)
output = io.BytesIO()
FigureCanvas(fig).print_png(output)
return Response(output.getvalue(), mimetype="image/png")
def get_image_2():
return Response(status=501)
# todo
locations = repo.getLocations()
fig = clusterer.draw_locations(locations)
output = io.BytesIO()
FigureCanvas(fig).print_png(output)
return Response(output.getvalue(), mimetype="image/png")
\ No newline at end of file
......@@ -4,23 +4,9 @@ from db.entities import ClusterSet
repo = Repository()
def get():
return [c.to_serializable_dict() for c in repo.get_clustersets()]
def get_names():
return repo.get_clusterset_names()
def get_by_name2(name):
def get_by_name(name):
res = repo.get_clusters_for_layer(name)
if res is None or len(res) == 0:
return Response(status=404)
else:
return [c.to_serializable_dict() for c in res]
def get_by_name(name):
res = repo.get_clusterset(name)
if res is not None:
return res.to_serializable_dict()
else:
return Response(status=404)
\ No newline at end of file
import insert_agi_locations
import run_clustering
import create_user_graphs
def run_agi_clustering_and_graph_creation():
insert_agi_locations.insert_locations()
run_clustering.run_location_clustering()
run_clustering.run_time_clustering()
create_user_graphs.create_graphs_for_location_clusters()
pass
......@@ -15,7 +15,6 @@ def post():
def _insert_layer(layer_data: dict):
'''Converts object keys from external source and inserts into database.'''
layer_data['layer_name'] = layer_data.pop('LayerName')
# layer_data['nodes'] = layer_data.pop('Nodes')
layer_data['properties'] = layer_data.pop('Properties')
repo.add_layer(Layer(layer_data))
......
from flask import request, Response
from db.repository import Repository
from db.entities import Location
repo = Repository()
def post():
body = request.json
_insert_location(body)
return Response(status=201)
def post_many():
body = request.json
for location in body:
_insert_location(location)
return Response(status=201)
def get():
return [l.to_serializable_dict() for l in repo.get_locations()]
def _insert_location(location_data: dict):
repo.add_location(Location(location_data))
......@@ -4,23 +4,8 @@ from db.entities import TimeSlice
repo = Repository()
def get():
return [e.to_serializable_dict() for e in repo.get_time_slices()]
def get_by_name(layername):
res = repo.get_time_slices_by_name(layername)
# print(len(res))
if res is not None and len(res) != 0:
return [e.to_serializable_dict() for e in res]
else:
return Response(status=404)
def get_by_name2(name):
def get_by_name(name):
res = repo.get_time_slices_by_name(name)
# print(len(res))
if res is not None and len(res) != 0:
return [e.to_serializable_dict() for e in res]
......
from flask import request, Response
from db.repository import Repository
repo = Repository()
def get():
data = repo.get_user_cluster_graphs()
return [d.to_serializable_dict() for d in data]
......@@ -5,16 +5,13 @@ if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
import json
from db.entities import *
from db.entities import Layer, Cluster
from typing import List, Dict, Tuple
from db.repository import Repository, AgiRepository
from db.repository import Repository
from processing.clustering.clusterer import Clusterer
DEBUG = False
repo = Repository()
test_repo = AgiRepository()
def run_generic_clustering():
......@@ -29,7 +26,6 @@ def run_generic_clustering():
continue
clusters = run_clustering_for_layer(layer)
# cluster_set = ClusterSet(layer.layer_name, clusters)
store_generic_clusters(clusters)
......@@ -44,74 +40,10 @@ def run_clustering_for_layer(layer: Layer) -> List[Cluster]:
return [Cluster(layer.layer_name, key, value) for key, value in res.items()]
def store_generic_clusters(clusters: List[Cluster]):
repo.add_clusters(clusters)
# with open(f'clusterset_{cluster_set.layer_name}.txt', 'w') as file:
# file.write(json.dumps(cluster_set.to_serializable_dict()))
def run_location_clustering():
user_clusterer = Clusterer()
all_location_traces = repo.get_locations()
cluster_result = user_clusterer.cluster_locations(
[l.to_serializable_dict() for l in all_location_traces])
clusters = [LocationCluster(key, value)
for key, value in cluster_result.items()]
store_clusters('locations', clusters)
def run_time_clustering():
clusters: List[TimeCluster] = []
user_clusterer = Clusterer(epsilon=600) # clustered within 10 minutes
all_location_traces = repo.get_locations()
# for each date in timestamp list
dates = {trace.timestamp.date() for trace in all_location_traces}
for cur_date in dates:
traces_for_cur_date = [
trace for trace in all_location_traces if trace.timestamp.date() == cur_date]
# for each hour of that day
for cur_hour in list(range(24)):
traces_for_time_slice = [
trace for trace in traces_for_cur_date if trace.timestamp.hour == cur_hour]
if len(traces_for_time_slice) == 0:
continue
# clustering per hour
cluster_result = user_clusterer.cluster_times(
[t.to_serializable_dict() for t in traces_for_time_slice])
cur_clusters = [TimeCluster(cur_date, cur_hour, key, value)
for key, value in cluster_result.items()]
clusters.extend(cur_clusters)
store_clusters('times', clusters)
def store_clusters(type: str, clusters: List):
if DEBUG:
print(clusters)
return
if type == 'locations':
for c in clusters:
repo.add_location_cluster(c)
if type == 'times':
for c in clusters:
repo.add_time_cluster(c)
if __name__ == "__main__":
run_generic_clustering()
# TODO cleanup
# run_location_clustering()
# run_time_clustering()
import sys
import os
for path in ['../', './', '../../../modules/']:
if os.path.exists(path):
sys.path.insert(1, path)
import matplotlib.pyplot as plt
from db.repository import Repository
from db.entities import TimeSlice
from typing import List
def plt_show_circles(time_slices: List[TimeSlice], cluster_no):
cluster_no = str(cluster_no)
for slice_ in time_slices:
nodes = slice_.get_nodes_for_cluster(cluster_no)
# print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
plt.title(str(slice_.time))
plt.scatter([n['Longitude_Destination'] if 'Longitude_Destination' in n else 0
for n in nodes],
[n['Latitude_Destination'] if 'Latitude_Destination' in n else 0
for n in nodes],
s=[len(nodes)*100]*len(nodes))
plt.pause(0.5)
def plt_show_bars(time_slices: List[TimeSlice], cluster_no):
cluster_no = str(cluster_no)
labels = [ts.time for ts in time_slices]
x_axis_label_stepsize = 10
nodes_per_slice_for_single_cluster = \
[len(time_slice.get_nodes_for_cluster(cluster_no))
for time_slice
in time_slices]
fig, ax = plt.subplots()
ax.bar(x=range(len(labels)),
height=nodes_per_slice_for_single_cluster)
ax.set_ylabel('Size')
ax.set_title(f'Cluster-{cluster_no} size over time')
ax.set_xticks(range(len(labels))[::x_axis_label_stepsize])
ax.set_xticklabels(labels[::x_axis_label_stepsize])
plt.show()
if __name__ == "__main__":
repo = Repository()
time_slices = repo.get_time_slices_by_name("Destination_Layer")
# chronological order
time_slices.sort(key=lambda ts: eval(ts.time))
print(len(time_slices))
plt_show_bars(time_slices, cluster_no = 0)
\ No newline at end of file
......@@ -16,8 +16,8 @@ SEMANTIC_LINKING_REST_PORT = 80
SEMANTIC_LINKING_DB_HOSTNAME = f'{SEMANTIC_LINKING_HOSTNAME}-db'
SEMANTIC_LINKING_DB_PORT = 27017
## Community Detection
COMMUNITY_DETECTION_HOSTNAME = 'role-stage-discovery'
COMMUNITY_DETECTION_REST_PORT = 80
COMMUNITY_DETECTION_DB_HOSTNAME = f'{COMMUNITY_DETECTION_HOSTNAME}-db'
COMMUNITY_DETECTION_DB_PORT = 27017
\ No newline at end of file
## Role Stage Discovery
ROLESTAGE_DISCOVERY_HOSTNAME = 'role-stage-discovery'
ROLESTAGE_DISCOVERY_REST_PORT = 80
ROLESTAGE_DISCOVERY_DB_HOSTNAME = f'{ROLESTAGE_DISCOVERY_HOSTNAME}-db'
ROLESTAGE_DISCOVERY_DB_PORT = 27017
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment