Major code cleanup

Removed all old and unused source code, including source code from old approaches

Major code cleanup
Removed all old and unused source code, including source code from old approaches
ce3886b2 · Alexander Lercher · a58cc362 · a58cc362 · ce3886b2 · a58cc362
Commit ce3886b2 authored Jun 11, 2020 by Alexander Lercher
22 changed files
--- a/src/data-hub/role-stage-discovery-microservice/app/configs/clustering.yaml
+++ b/src/data-hub/role-stage-discovery-microservice/app/configs/clustering.yaml
-layers:
-    user:
-        properties:
-    starting-point:
-        properties:
-            - Latitude_StartingPoint
-            - Longitude_StartingPoint
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/configs/swagger.yml
+++ b/src/data-hub/role-stage-discovery-microservice/app/configs/swagger.yml
@@ -29,59 +29,7 @@ paths:
        200:
          description: "Successful echo of request data"
-# Locations
-# TODO remove
-  /locations:
-    post:
-        operationId: "routes.location.post"
-        tags:
-          - "Locations"
-        summary: "Add new location data"
-        parameters:
-          - in: body 
-            name: "Location"
-            description: "The location data to be added"
-            required: true
-            schema:
-              $ref: "#/definitions/Location"
-        responses:
-          201:
-            description: "Successful operation"
-          400:
-            description: "Invalid input"
-    get:
-        operationId: "routes.location.get"
-        tags:
-            - "Locations"
-        summary: "Get location data"
-        parameters: []
-        responses:
-          200:
-            description: "Successful operation"
-            schema:
-              $ref: "#/definitions/LocationCollection"
-  /location-collections:
-    post:
-        operationId: "routes.location.post_many"
-        tags:
-          - "Locations"
-        summary: "Add new location data collection"
-        parameters:
-          - in: body 
-            name: "Locations"
-            description: "The location data collection to be added"
-            required: true
-            schema:
-                $ref: "#/definitions/LocationCollection"
-        responses:
-          201:
-            description: "Successful operation"
-          400:
-            description: "Invalid input"
 #region Layers
  /layers:
    post:
        operationId: "routes.layers.post"
@@ -176,7 +124,7 @@ paths:
  /layers/{name}/clusters:
    get:
-        operationId: "routes.clustersets.get_by_name2"
+        operationId: "routes.clustersets.get_by_name"
        tags:
            - "Layers"
        summary: "Get all clusters for the layer"
@@ -196,7 +144,7 @@ paths:
  /layers/{name}/timeslices:
    get:
-        operationId: "routes.timeslices.get_by_name2"
+        operationId: "routes.timeslices.get_by_name"
        tags:
            - "Layers"
        summary: "Get all timeslices for the layer"
@@ -213,162 +161,10 @@ paths:
                    $ref: "#/definitions/TimeSliceCollection"
            404:
                description: "Layer not found"
-#endregion 
-# Clusters
-# TODO remove partially
-  /location-clusters:
-    get:
-        operationId: "routes.cluster.get_locations"
-        tags:
-            - "Clusters"
-        summary: "Get user communities clustered by location"
-        parameters: []
-        responses:
-            200:
-                description: "Successful operation"
-                schema:
-                  $ref: "#/definitions/LocationClusterCollection"
-  # /clusters/cluster.png:
-  #   get:
-  #       operationId: "routes.cluster.get_image"
-  #       tags:
-  #           - "Clusters"
-  #       summary: "Get user communities per date per hour as image"
-  #       parameters: []
-  #       produces:
-  #           - "image/png"
-  #       responses:
-  #           200:
-  #               description: "Successful operation"
-  /time-clusters:
-    get:
-        operationId: "routes.cluster.get_times"
-        tags:
-            - "Clusters"
-        summary: "Get user communities clustered by time per hour"
-        parameters: []
-        responses:
-            200:
-                description: "Successful operation"
-                schema:
-                  $ref: "#/definitions/TimeClusterCollection"
-  # /agi/clusters/cluster.png:
-  #   get:
-  #       operationId: "routes.agi_cluster.get_image"
-  #       tags:
-  #           - "Clusters"
-  #       summary: "Get user communities per date per hour from agi data as image"
-  #       parameters: []
-  #       produces:
-  #           - "image/png"
-  #       responses:
-  #           200:
-  #               description: "Successful operation"
-# TODO remove
-  /clustersets:
-    get:
-        operationId: "routes.clustersets.get"
-        tags:
-            - "Clusters"
-        summary: "Get clustersets for all layers"
-        parameters: []
-        responses:
-            200:
-                description: "Successful operation"
-                schema:
-                    $ref: "#/definitions/ClusterSetCollection"
-  /clustersets/names:
-    get:
-        operationId: "routes.clustersets.get_names"
-        tags:
-            - "Clusters"
-        summary: "Get clusterset names for all layers"
-        parameters: []
-        responses:
-            200:
-                description: "Successful operation"
-                schema:
-                    type: array
-                    items:
-                        type: string
-  /clustersets/{layername}:
-    get:
-        operationId: "routes.clustersets.get_by_name"
-        tags:
-            - "Clusters"
-        summary: "Get clusterset for layer-name"
-        parameters: 
-          - name: "layername"
-            in: "path"
-            description: "Name of the layer to return the clusterset for"
-            required: true
-            type: "string"
-        responses:
-            200:
-                description: "Successful operation"
-                schema:
-                    $ref: "#/definitions/ClusterSet"
-            404:
-                description: "Clusterset not found"
-# TODO remove                    
-  /user-cluster-graphs:
-    get:
-        operationId: "routes.user_cluster.get"
-        tags:
-            - "User Graphs"
-        summary: "Get user graphs per layer per cluster"
-        parameters: []
-        responses:
-            200:
-                description: "Successful operation"
-                schema:
-                  $ref: "#/definitions/UserClusterGraphCollection"
-# Time slices
+#endregion 
-  /timeslices:
-    get:
-        operationId: "routes.timeslices.get"
-        tags:
-            - "Time Slices"
-        summary: "Get all time slices based on individual layers containing clusters with nodes for that time"
-        parameters: []
-        responses:
-            200:
-                description: "Successful operation"
-                schema:
-                    $ref: "#/definitions/TimeSliceCollection"
-  /timeslices/{layername}:
-    get:
-        operationId: "routes.timeslices.get_by_name"
-        tags:
-            - "Time Slices"
-        summary: "Get all time slices for one layer"
-        parameters:
-          - name: "layername"
-            in: "path"
-            description: "Name of the layer to return the time slices for"
-            required: true
-            type: "string"
-        responses:
-            200:
-                description: "Successful operation"
-                schema:
-                    $ref: "#/definitions/TimeSliceCollection"
-            404:
-                description: "No time slices found for layername"
-# Function Calls
+#region Function Calls
  /rfc/run:
    post:
        operationId: "routes.functions.run_agi_clustering_and_graph_creation"
@@ -379,30 +175,11 @@ paths:
        responses:
            204:
                description: "Successful operation"
+#endregion
 definitions:
-  Location:
-    type: "object"
-    properties:
-      id:
-        type: string
-      user:
-        type: "string"
-      latitude:
-        type: "number"
-        format: float
-      longitude:
-        type: "number"
-        format: float
-      timestamp:
-        type: "number"
-  LocationCollection:
-    type: array
-    items:
-      $ref: "#/definitions/Location"
  Cluster:
    type: object
    properties:
@@ -414,83 +191,17 @@ definitions:
            type: array
            items: 
                $ref: "#/definitions/Node"
  ClusterCollection:
    type: array
    items:
        $ref: "#/definitions/Cluster"
-  LocationCluster:
-    type: object
-    properties:
-      id:
-        type: string
-      cluster_label:
-        type: number
-      nodes:
-        type: array
-        items: 
-          $ref: "#/definitions/Location"
-  LocationClusterCollection:
-    type: array
-    items:
-      $ref: "#/definitions/LocationCluster"
-  TimeCluster:
-    type: object
-    properties:
-      id:
-        type: string
-      date:
-        type: string
-      hour:
-        type: number
-      cluster_label:
-        type: number
-      nodes:
-        type: array
-        items: 
-          $ref: "#/definitions/Location"
-  TimeClusterCollection:
-    type: array
-    items:
-      $ref: "#/definitions/TimeCluster"
-  UserClusterGraph:
-    type: object
-    properties:
-      nodes:
-        type: array
-        items: 
-          type: string
-      edges:
-        type: array
-        items: 
-          type: array
-          items:
-            type: string
-          example: 
-            - user1
-            - user2
-            - weight
-  UserClusterGraphCollection:
-    type: array
-    items:
-      $ref: "#/definitions/UserClusterGraph"
  Layer-UpperCase:
    type: object
    properties:
        LayerName:
            type: string
-        # Nodes:
-        #     type: array
-        #     items: 
-        #         type: object
        Properties:
            type: array
            items: 
@@ -501,10 +212,6 @@ definitions:
    properties:
        layer_name:
            type: string
-        # nodes:
-        #     type: array
-        #     items: 
-        #         type: object
        properties:
            type: array
            items: 
@@ -531,21 +238,6 @@ definitions:
    items: 
        $ref: "#/definitions/Node"
-  ClusterSet:
-    type: object
-    properties:
-        layer_name: 
-            type: string
-        clusters:
-            type: array
-            items: 
-                $ref: "#/definitions/Cluster"
-  ClusterSetCollection:
-    type: array
-    items:
-      $ref: "#/definitions/ClusterSet"
  TimeSlice:
    type: object
    properties:

--- a/src/data-hub/role-stage-discovery-microservice/app/create_user_graphs.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/create_user_graphs.py
-import sys
-import os
-modules_paths = ['../../../modules/']
-for modules_path in modules_paths:
-    if os.path.exists(modules_path):
-        sys.path.insert(1, modules_path)
-from typing import List, Tuple, Any
-from networkx import Graph
-from db.entities import LocationCluster, UserClusterGraph, Cluster
-from db.repository import Repository
-from processing.user_graph_generator import UserGraphGenerator
-repo = Repository()
-def get_edges_with_weights(g: Graph) -> List[Tuple[Any, Any, int]]:
-    res = []
-    for e in g.edges:
-        res.append((*e, g.edges[e]['weight']))
-    return res
-def create_graphs_for_location_clusters():
-    graphs_for_clusters = []
-    ug = UserGraphGenerator()
-    clusters: Cluster = repo.get_location_clusters()
-    for cluster in clusters:
-        user_ids = [n['user'] for n in cluster.nodes]
-        graph: Graph = ug.create_graph_from_nodes(user_ids)
-        vertices = list(graph.nodes)
-        edges = get_edges_with_weights(graph)
-        cluster_graph = UserClusterGraph(vertices, edges)
-        graphs_for_clusters.append(cluster_graph)
-    store_graphs(graphs_for_clusters)
-def store_graphs(graphs: List):
-    for g in graphs:
-        repo.add_user_cluster_graph(g)
-if __name__ == "__main__":
-    create_graphs_for_location_clusters()
--- a/src/data-hub/role-stage-discovery-microservice/app/db/agi/agi_repository.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/agi/agi_repository.py
-import json
-from typing import List, Dict
-import hashlib
-class AgiRepository:
-    def getLocations(self) -> List[Dict]:
-        locations = []
-        travels = self.readDataFromFile()
-        # only take started travels
-        travels = [t for t in travels if t['status'] >= 2]
-        for travel in travels:
-            num_complete_travels = min(len(travel['startedBy']), len(travel['users']))
-            for i in range(num_complete_travels):
-                cur_location = travel['startedBy'][i]
-                cur_user = travel['users'][i]
-                locations.append(
-                    self.location(f'{travel["id"]}-{cur_location["moment"]}',
-                            cur_location['coordinate']['latitude'],
-                            cur_location['coordinate']['longitude'],
-                            cur_location['moment'],
-                            # todo user in travel startedBy not available from dataset - currently using user list 
-                            hashlib.sha1(cur_user['userId'].encode()).hexdigest() # not showing generated username 
-                ))
-        return locations
-    def getLocationsBasedOnNewDataSchema(self):
-        '''Creates the new data generic schema to be used beginning on 24.03.2020'''
-        data = {
-            'layer_name': 'Destination',
-            'nodes': self.getLocations(),
-            'properties': ['latitude', 'longitude']
-        }
-        return data
-    def getTimesBasedOnNewDataSchema(self):
-        '''Creates the new data generic schema to be used beginning on 24.03.2020'''
-        data = {
-            'layer_name': 'Starting_Time',
-            'nodes': self.getLocations(),
-            'properties': ['timestamp']
-        }
-        return data
-    def readDataFromFile(self) -> List[Dict]:
-        with open('./db/agi/travels.json', 'r') as f_travels:
-            travels = json.loads(f_travels.read())
-        return travels
-    def location(self, id_, lat, long_, timestamp, username) -> dict:
-        return {
-            "id": id_,
-            'latitude': lat,
-            'longitude': long_,
-            "timestamp": timestamp,
-            "user": username
-        }
--- a/src/data-hub/role-stage-discovery-microservice/app/db/agi/travels.json
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/agi/travels.json
--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/__init__.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/__init__.py
 from db.entities.location import Location
 from db.entities.popular_location import PopularLocation
-from db.entities.cluster import Cluster, LocationCluster, TimeCluster
+from db.entities.cluster import Cluster
 from db.entities.clusterset import ClusterSet
 from db.entities.user_cluster_graph import UserClusterGraph
 from db.entities.layer import Layer

--- a/src/data-hub/role-stage-discovery-microservice/app/db/entities/cluster.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/entities/cluster.py
@@ -39,67 +39,3 @@ class Cluster:
    def __str__(self):
        return f"Cluster({self.__repr__()})"
-class LocationCluster(Cluster):
-    def __init__(self, cluster_label: int = None, nodes: List = None,
-                 location_dict: Dict = None, from_db=False):
-        super().__init__(cluster_label, nodes)
-        self.id = f'{self.cluster_label}'
-        if location_dict is not None:
-            self.from_serializable_dict(location_dict, from_db)
-    def to_serializable_dict(self, for_db=False) -> Dict:
-        return {
-            "id": self.id,
-            "cluster_label": self.cluster_label,
-            "nodes": json.dumps(self.nodes) if for_db else self.nodes
-        }
-    def from_serializable_dict(self, location_dict: Dict, from_db=False):
-        self.id = location_dict["id"]
-        self.cluster_label = location_dict["cluster_label"]
-        self.nodes = json.loads(location_dict["nodes"]) \
-            if from_db else location_dict["nodes"]
-    def __repr__(self):
-        return json.dumps(self.to_serializable_dict())
-    def __str__(self):
-        return f"LocationCluster({self.__repr__()})"
-class TimeCluster(Cluster):
-    def __init__(self, date: date = None, hour: int = None, cluster_label: int = None, nodes: List = None,
-                 time_dict: Dict = None, from_db=False):
-        super().__init__(cluster_label, nodes)
-        self.date = date
-        self.hour = hour
-        self.id = f'{self.date}-{self.hour}-{self.cluster_label}'
-        if time_dict is not None:
-            self.from_serializable_dict(time_dict, from_db)
-    def to_serializable_dict(self, for_db=False) -> Dict:
-        return {
-            "id": self.id,
-            "date": str(self.date),
-            "hour": self.hour,
-            "cluster_label": self.cluster_label,
-            "nodes": json.dumps(self.nodes) if for_db else self.nodes
-        }
-    def from_serializable_dict(self, time_dict: Dict, from_db=False):
-        self.id = time_dict["id"]
-        self.date = datetime.strptime(time_dict["date"], '%Y-%m-%d').date()
-        self.hour = time_dict["hour"]
-        self.cluster_label = time_dict["cluster_label"]
-        self.nodes = json.loads(time_dict["nodes"]) \
-            if from_db else time_dict["nodes"]
-    def __repr__(self):
-        return json.dumps(self.to_serializable_dict(True))
-    def __str__(self):
-        return f"TimeCluster({self.__repr__()})"
--- a/src/data-hub/role-stage-discovery-microservice/app/db/repository.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/db/repository.py
@@ -3,8 +3,6 @@ import network_constants as netconst
 from database.MongoRepositoryBase import MongoRepositoryBase
 import json
-from db.agi.agi_repository import AgiRepository
 from db.entities import *
 from typing import List
@@ -13,61 +11,14 @@ class Repository(MongoRepositoryBase):
    '''This is a repository for MongoDb.'''
    def __init__(self):
-        super().__init__(netconst.COMMUNITY_DETECTION_DB_HOSTNAME,
+        super().__init__(netconst.ROLESTAGE_DISCOVERY_DB_HOSTNAME,
-                         netconst.COMMUNITY_DETECTION_DB_PORT,
+                         netconst.ROLESTAGE_DISCOVERY_DB_PORT,
-                         'communityDetectionDb')
+                         'roleStageDb')
-        self._location_collection = 'location'
-        self._location_cluster_collection = 'location_cluster'
-        self._time_cluster_collection = 'time_cluster'
-        self._user_cluster_graph_collection = 'user_cluster_graph'
-        self._layer_collection = 'layer-new'
-        self._layer_nodes_collection = 'layer_nodes-new'
-        self._clusterset_collection = 'cluster_set-new'
-        self._time_slice_collection = 'time_slice-new'
-        self.agi_repo = AgiRepository()
-#region Location
-    def add_location(self, location: Location):
-        super().insert_entry(self._location_collection, location.to_serializable_dict())
-    def get_locations(self) -> List[Location]:
-        locations = super().get_entries(self._location_collection)
-        return [Location(l) for l in locations]
-    def get_agi_locations(self) -> List[Location]:
-        agi_locations = self.agi_repo.getLocations()
-        return [Location(agi_loc) for agi_loc in agi_locations]
-#endregion
-#region Specific Clusters
-    def add_location_cluster(self, cluster: LocationCluster):
-        super().insert_entry(self._location_cluster_collection,
-                             cluster.to_serializable_dict(for_db=True))
-    def get_location_clusters(self) -> List[LocationCluster]:
+        self._layer_collection = 'layers'
-        clusters = super().get_entries(self._location_cluster_collection)
+        self._layer_nodes_collection = 'layer_nodes'
-        return [LocationCluster(location_dict=c, from_db=True) for c in clusters]
+        self._clusters_collection = 'clusters'
+        self._time_slice_collection = 'time_slices'
-    def add_time_cluster(self, cluster: TimeCluster):
-        super().insert_entry(self._time_cluster_collection,
-                             cluster.to_serializable_dict(for_db=True))
-    def get_time_clusters(self) -> List[TimeCluster]:
-        clusters = super().get_entries(self._time_cluster_collection)
-        return [TimeCluster(time_dict=c, from_db=True) for c in clusters]
-#endregion
-#region Cluster Graph
-    def add_user_cluster_graph(self, user_graph: UserClusterGraph):
-        super().insert_entry(self._user_cluster_graph_collection,
-                             user_graph.to_serializable_dict(for_db=True))
-    def get_user_cluster_graphs(self) -> List[UserClusterGraph]:
-        user_graphs = super().get_entries(self._user_cluster_graph_collection)
-        return [UserClusterGraph(dict_=u, from_db=True) for u in user_graphs]
-#endregion
 #region Layers
    def add_layer(self, layer: Layer):
@@ -77,10 +28,6 @@ class Repository(MongoRepositoryBase):
        entries = super().get_entries(self._layer_collection)
        return [Layer(e) for e in entries]
-    def get_layer_names(self) -> List[str]:
-        entries = super().get_entries(self._layer_collection, projection={'layer_name': 1})
-        return [e['layer_name'] for e in entries]
    def get_layer(self, layer_name) -> Layer:
        entries = super().get_entries(self._layer_collection, selection={'layer_name': layer_name})
        entries = [Layer(e) for e in entries]
@@ -103,38 +50,13 @@ class Repository(MongoRepositoryBase):
 #endregion
-#region ClusterSet
+#region Clusters
-# TODO cleanup
-    def add_clusterset(self, cluster_set: ClusterSet):
-        super().insert_entry(self._clusterset_collection, cluster_set.to_serializable_dict())
-    def get_clustersets(self) -> List[ClusterSet]:
-        '''Returns all clustersets.'''
-        entries = super().get_entries(self._clusterset_collection)
-        return [ClusterSet(cluster_set_dict=e) for e in entries]
-    def get_clusterset_names(self) -> List[str]:
-        '''Returns the names of all clustersets.'''
-        entries = super().get_entries(self._clusterset_collection, projection={'layer_name': 1})
-        return [e['layer_name'] for e in entries]
-    def get_clusterset(self, layer_name) -> ClusterSet:
-        '''Returns a single clusterset with the given name or None otherwise.'''
-        entries = super().get_entries(self._clusterset_collection, selection={'layer_name': layer_name})
-        entries = [ClusterSet(cluster_set_dict=e) for e in entries]
-        if entries is not None and len(entries) > 0:
-            return entries[0]
-        else:
-            return None
    def add_clusters(self, clusters: List[Cluster]):
        cluster_dicts = [c.to_serializable_dict(for_db=True) for c in clusters]
-        super().insert_many(self._clusterset_collection, cluster_dicts)
+        super().insert_many(self._clusters_collection, cluster_dicts)
    def get_clusters_for_layer(self, layer_name: str) -> List[Cluster]:
-        entries = super().get_entries(self._clusterset_collection, selection={'layer_name': layer_name}, projection={'_id': 0})
+        entries = super().get_entries(self._clusters_collection, selection={'layer_name': layer_name}, projection={'_id': 0})
        return [Cluster(cluster_dict=e, from_db=True) for e in entries]
 #endregion
@@ -155,4 +77,5 @@ class Repository(MongoRepositoryBase):
    def remove_all_time_slices(self):
        super().drop_collection(self._time_slice_collection)
 #endregion
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/insert_agi_locations.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/insert_agi_locations.py
-import sys
-import os
-modules_path = '../../../modules/'
-if os.path.exists(modules_path):
-    sys.path.insert(1, modules_path)
-from db.repository import Repository
-def insert_locations():
-    repo = Repository()
-    locs = repo.get_agi_locations()
-    for l in locs:
-        repo.add_location(l)
-if __name__ == "__main__":
-    insert_locations()    
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/clustering/clusterer.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/clustering/clusterer.py
@@ -3,7 +3,6 @@ import numpy as np
 import matplotlib.pyplot as plt
 from sklearn.cluster import OPTICS
 from typing import List, Dict, Any, TypeVar
-from deprecated import deprecated
 T = TypeVar('T')
 ClusterGroup = Dict[Any, List[Dict]]
@@ -16,63 +15,20 @@ class Clusterer:
    :param epsilon: Eps used in OPTICS 
    :param min_points: MinPts used in OPTICS
    '''
-    def __init__(self, epsilon=11, min_points=5):
+    def __init__(self, min_points=5):
-        self.epsilon = epsilon
        self.min_points = min_points
-    def draw_locations(self, locations:List, labels:List=None) -> plt.Figure:
-        if locations is None or len(locations) == 0:
-            return self._draw_locations()
-        if labels is None or len(locations) != len(labels):
-            labels = self.create_labels(locations)
-        return self._draw_locations(
-            locations = self.extract_location_data(locations),
-            partition_info = labels
-        )
-    def _draw_locations(self, locations:np.ndarray=None, centroids:np.ndarray=None, partition_info:List=None) -> plt.Figure:
-        fig = plt.Figure()
-        axis = fig.add_subplot(1, 1, 1)
-        if locations is not None:
-            colors = plt.cm.rainbow(np.linspace(0, 1, len(locations)))
-            if partition_info is not None:
-                distinct_colors = plt.cm.rainbow(np.linspace(0, 1, len(set(partition_info))))
-                colors = [distinct_colors[pi] for pi in partition_info]
-            # draw locations with random colors
-            axis.scatter(locations[:,0],
-                        locations[:,1], 
-                        c=colors)
-        if centroids is not None:
-            # draw black centroids
-            axis.scatter(centroids[:,0], centroids[:,1], c='k', marker='x', s=80)
-        return fig
    def create_labels(self, features:np.ndarray) -> List[int]:
-        '''Creates labels for the items based on DBSCAN.'''
+        '''Creates labels for the items based on OPTICS.'''
        if features is None or len(features) == 0:
            return features # trash in trash out
-        dbsc = OPTICS(min_samples=self.min_points) # DBSCAN(eps = self.epsilon, min_samples = self.min_points)
+        dbsc = OPTICS(min_samples=self.min_points)
        dbsc = dbsc.fit(features)
        labels = dbsc.labels_
        return labels.tolist()
-    @deprecated(reason="Use generic version instead")
-    def extract_location_features(self, locations: List[dict]) -> np.ndarray:
-        return np.asarray([(float(l['latitude']), float(l['longitude'])) for l in locations])
-    @deprecated(reason="Use generic version instead")
-    def extract_time_features(self, times: List[Dict]) -> np.ndarray:
-        return np.asarray([[float(t['timestamp'])] for t in times])
    def _extract_features(self, dataset: List[Dict], features:List[str]) -> np.ndarray:
        '''Extracts the feature values from the dataset into a np array with same order as original dataset.'''
        extracted_features = []
@@ -104,30 +60,6 @@ class Clusterer:
        return clusters
-    @deprecated(reason="Use generic version instead")
-    def cluster_locations(self, locations:List[Dict]) -> ClusterGroup:
-        '''Returns a dictionary with identified clusters and their locations copied from the input'''
-        if locations is None or len(locations) == 0:
-            # raise Exception("locations has to contain something")
-            return {}
-        features = self.extract_location_features(locations)
-        labels = self.create_labels(features)
-        self.label_dataset(locations, labels)
-        return self.group_by_clusters(locations, labels)
-    @deprecated(reason="Use generic version instead") 
-    def cluster_times(self, times:List[Dict]) -> ClusterGroup:
-        '''Returns a dictionary with identified clusters and their times copied from the input'''
-        features = self.extract_time_features(times)
-        labels = self.create_labels(features)
-        self.label_dataset(times, labels)
-        return self.group_by_clusters(times, labels)
    def cluster_dataset(self, dataset:List[Dict], features:List[str]) -> ClusterGroup:
        '''
        Returns the identified clusters containing a subset of nodes from the dataset.

--- a/src/data-hub/role-stage-discovery-microservice/app/processing/clustering/clustering_config.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/clustering/clustering_config.py
-import yaml
-from typing import Generator
-from pathlib import Path
-### init logging ###
-import logging
-LOG_FORMAT = (
-    '%(levelname) -5s %(asctime)s %(name)s:%(funcName) -35s %(lineno) -5d:  %(message)s')
-logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
-LOGGER = logging.getLogger(__name__)
-PROJECT_ROOT = Path(__file__).parent.parent.parent
-class ClusteringConfig:
-    '''Contains the configuration for the clustering algorithm defined in configs/clustering.yaml.'''
-    config_path = f'{PROJECT_ROOT}/configs/clustering.yaml'
-    config: dict = None
-    def __init__(self):
-        self.config = self._load_config()
-    def _load_config(self) -> dict:
-        '''Loads the whole configuration from file.'''
-        config = None
-        with open(self.config_path, 'r') as stream:
-            try:
-                config = yaml.safe_load(stream)
-            except yaml.YAMLError as exc:
-                LOGGER.error(exc)
-                config = {}
-        return config
-    def get_config(self):
-        return self.config
-    def get_layer_configs(self) -> Generator[dict, None, None]:
-        """
-        Returns a generator for the individual layer configs.
-        Layer configs are dicts including a layer-name.
-        """
-        for key, layer in self.config['layers'].items():
-            layer['layer-name'] = key
-            yield layer
--- a/src/data-hub/role-stage-discovery-microservice/app/processing/user_graph_generator.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/processing/user_graph_generator.py
-import itertools
-from typing import List, Dict, Tuple, Any
-from networkx import Graph
-class UserGraphGenerator:
-    def __init__(self):
-        pass
-    def count_edges(self, nodes: List) -> Dict[Tuple, int]:
-        edge_counts = {}
-        coms = itertools.combinations(nodes, 2)
-        for first, second in coms:
-            if first == second:  # dont process reflexive connections
-                continue
-            if (first, second) in edge_counts:
-                edge_counts[first, second] += 1
-            else:
-                edge_counts[first, second] = 1
-        return edge_counts
-    def create_edges_with_weights(self, edge_counts: Dict[Tuple[Any, Any], int]) -> List[Tuple[Any, Any, Dict]]:
-        edges = []
-        for (key1, key2), value in edge_counts.items():
-            edge = (key1, key2, {'weight': value})
-            edges.append(edge)
-        return edges
-    def create_fully_connected_edges_for_nodes(self, nodes: List) -> List[Tuple[Any, Any, Dict]]:
-        return self.create_edges_with_weights(self.count_edges(nodes))
-    def create_graph_from_nodes(self, nodes: List) -> Graph:
-        '''Creates a networkx.Graph with distinct nodes and weighted edges between these nodes'''
-        g = Graph()
-        g.add_nodes_from(nodes)
-        g.add_edges_from(self.create_fully_connected_edges_for_nodes(nodes))
-        return g
--- a/src/data-hub/role-stage-discovery-microservice/app/routes/cluster.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/cluster.py
-import io
-from flask import request, Response
-from db.repository import Repository
-from processing.clustering.clusterer import Clusterer
-from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
-repo = Repository()
-clusterer = Clusterer()
-def get_locations():
-    clusters = repo.get_location_clusters()
-    return [c.to_serializable_dict() for c in clusters]
-def get_times():
-    clusters = repo.get_time_clusters()
-    return [c.to_serializable_dict() for c in clusters]
-def get_image_1():
-    return Response(status=501)
-    # todo
-    locations = repo.getLocations()
-    fig = clusterer.draw_locations(locations)
-    output = io.BytesIO()
-    FigureCanvas(fig).print_png(output)
-    return Response(output.getvalue(), mimetype="image/png")
-def get_image_2():
-    return Response(status=501)
-    # todo
-    locations = repo.getLocations()
-    fig = clusterer.draw_locations(locations)
-    output = io.BytesIO()
-    FigureCanvas(fig).print_png(output)
-    return Response(output.getvalue(), mimetype="image/png")
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/routes/clustersets.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/clustersets.py
@@ -4,23 +4,9 @@ from db.entities import ClusterSet
 repo = Repository()
-def get():
+def get_by_name(name):
-    return [c.to_serializable_dict() for c in repo.get_clustersets()]
-def get_names():
-    return repo.get_clusterset_names()
-def get_by_name2(name):
    res = repo.get_clusters_for_layer(name)
    if res is None or len(res) == 0:
        return Response(status=404)
    else:
        return [c.to_serializable_dict() for c in res]
-def get_by_name(name):
-    res = repo.get_clusterset(name)
-    if res is not None:
-        return res.to_serializable_dict() 
-    else:
-        return Response(status=404)
\ No newline at end of file
--- a/src/data-hub/role-stage-discovery-microservice/app/routes/functions.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/functions.py
-import insert_agi_locations 
-import run_clustering
-import create_user_graphs
 def run_agi_clustering_and_graph_creation():
-    insert_agi_locations.insert_locations()
+    pass
-    run_clustering.run_location_clustering()
-    run_clustering.run_time_clustering()
-    create_user_graphs.create_graphs_for_location_clusters()
--- a/src/data-hub/role-stage-discovery-microservice/app/routes/layers.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/layers.py
@@ -15,7 +15,6 @@ def post():
 def _insert_layer(layer_data: dict):
    '''Converts object keys from external source and inserts into database.'''
    layer_data['layer_name'] = layer_data.pop('LayerName')
-    # layer_data['nodes'] = layer_data.pop('Nodes')
    layer_data['properties'] = layer_data.pop('Properties')
    repo.add_layer(Layer(layer_data))

--- a/src/data-hub/role-stage-discovery-microservice/app/routes/location.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/location.py
-from flask import request, Response
-from db.repository import Repository
-from db.entities import Location
-repo = Repository()
-def post():
-    body = request.json
-    _insert_location(body)
-    return Response(status=201)
-def post_many():
-    body = request.json
-    for location in body:
-        _insert_location(location)
-    return Response(status=201)
-def get():
-    return [l.to_serializable_dict() for l in repo.get_locations()]
-def _insert_location(location_data: dict):
-    repo.add_location(Location(location_data))
--- a/src/data-hub/role-stage-discovery-microservice/app/routes/timeslices.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/timeslices.py
@@ -4,23 +4,8 @@ from db.entities import TimeSlice
 repo = Repository()
+def get_by_name(name):
-def get():
-    return [e.to_serializable_dict() for e in repo.get_time_slices()]
-def get_by_name(layername):
-    res = repo.get_time_slices_by_name(layername)
-    # print(len(res))
-    if res is not None and len(res) != 0:
-        return [e.to_serializable_dict() for e in res]
-    else:
-        return Response(status=404)
-def get_by_name2(name):
    res = repo.get_time_slices_by_name(name)
-    # print(len(res))
    if res is not None and len(res) != 0:
        return [e.to_serializable_dict() for e in res]

--- a/src/data-hub/role-stage-discovery-microservice/app/routes/user_cluster.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/routes/user_cluster.py
-from flask import request, Response
-from db.repository import Repository
-repo = Repository()
-def get():
-    data = repo.get_user_cluster_graphs()
-    return [d.to_serializable_dict() for d in data]
--- a/src/data-hub/role-stage-discovery-microservice/app/run_clustering.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/run_clustering.py
@@ -5,16 +5,13 @@ if os.path.exists(modules_path):
    sys.path.insert(1, modules_path)
 import json
-from db.entities import *
+from db.entities import Layer, Cluster
 from typing import List, Dict, Tuple
-from db.repository import Repository, AgiRepository
+from db.repository import Repository
 from processing.clustering.clusterer import Clusterer
-DEBUG = False
 repo = Repository()
-test_repo = AgiRepository()
 def run_generic_clustering():
@@ -29,7 +26,6 @@ def run_generic_clustering():
            continue
        clusters = run_clustering_for_layer(layer)
-        # cluster_set = ClusterSet(layer.layer_name, clusters)
        store_generic_clusters(clusters)
@@ -44,74 +40,10 @@ def run_clustering_for_layer(layer: Layer) -> List[Cluster]:
    return [Cluster(layer.layer_name, key, value) for key, value in res.items()]
 def store_generic_clusters(clusters: List[Cluster]):
    repo.add_clusters(clusters)
-    # with open(f'clusterset_{cluster_set.layer_name}.txt', 'w') as file:
-    #     file.write(json.dumps(cluster_set.to_serializable_dict()))
-def run_location_clustering():
-    user_clusterer = Clusterer()
-    all_location_traces = repo.get_locations()
-    cluster_result = user_clusterer.cluster_locations(
-        [l.to_serializable_dict() for l in all_location_traces])
-    clusters = [LocationCluster(key, value)
-                for key, value in cluster_result.items()]
-    store_clusters('locations', clusters)
-def run_time_clustering():
-    clusters: List[TimeCluster] = []
-    user_clusterer = Clusterer(epsilon=600)  # clustered within 10 minutes
-    all_location_traces = repo.get_locations()
-    # for each date in timestamp list
-    dates = {trace.timestamp.date() for trace in all_location_traces}
-    for cur_date in dates:
-        traces_for_cur_date = [
-            trace for trace in all_location_traces if trace.timestamp.date() == cur_date]
-        # for each hour of that day
-        for cur_hour in list(range(24)):
-            traces_for_time_slice = [
-                trace for trace in traces_for_cur_date if trace.timestamp.hour == cur_hour]
-            if len(traces_for_time_slice) == 0:
-                continue
-            # clustering per hour
-            cluster_result = user_clusterer.cluster_times(
-                [t.to_serializable_dict() for t in traces_for_time_slice])
-            cur_clusters = [TimeCluster(cur_date, cur_hour, key, value)
-                            for key, value in cluster_result.items()]
-            clusters.extend(cur_clusters)
-    store_clusters('times', clusters)
-def store_clusters(type: str, clusters: List):
-    if DEBUG:
-        print(clusters)
-        return
-    if type == 'locations':
-        for c in clusters:
-            repo.add_location_cluster(c)
-    if type == 'times':
-        for c in clusters:
-            repo.add_time_cluster(c)
 if __name__ == "__main__":
    run_generic_clustering()
-    # TODO cleanup
-    # run_location_clustering()
-    # run_time_clustering()
--- a/src/data-hub/role-stage-discovery-microservice/app/visualization/visualize_time_slices.py
+++ b/src/data-hub/role-stage-discovery-microservice/app/visualization/visualize_time_slices.py
-import sys
-import os
-for path in ['../', './', '../../../modules/']:
-    if os.path.exists(path):
-        sys.path.insert(1, path)
-import matplotlib.pyplot as plt
-from db.repository import Repository
-from db.entities import TimeSlice
-from typing import List
-def plt_show_circles(time_slices: List[TimeSlice], cluster_no):
-    cluster_no = str(cluster_no)
-    for slice_ in time_slices:
-        nodes = slice_.get_nodes_for_cluster(cluster_no)
-        # print(f"{slice_.time} number elements for cluster {cluster_no}: {len(nodes)}")
-        plt.title(str(slice_.time))
-        plt.scatter([n['Longitude_Destination'] if 'Longitude_Destination' in n else 0
-                        for n in nodes],
-                    [n['Latitude_Destination'] if 'Latitude_Destination' in n else 0
-                        for n in nodes],
-                    s=[len(nodes)*100]*len(nodes))
-        plt.pause(0.5)
-def plt_show_bars(time_slices: List[TimeSlice], cluster_no):
-    cluster_no = str(cluster_no)
-    labels = [ts.time for ts in time_slices]  
-    x_axis_label_stepsize = 10  
-    nodes_per_slice_for_single_cluster = \
-            [len(time_slice.get_nodes_for_cluster(cluster_no))
-            for time_slice
-            in time_slices]
-    fig, ax = plt.subplots()
-    ax.bar(x=range(len(labels)),
-        height=nodes_per_slice_for_single_cluster)
-    ax.set_ylabel('Size')
-    ax.set_title(f'Cluster-{cluster_no} size over time')
-    ax.set_xticks(range(len(labels))[::x_axis_label_stepsize])
-    ax.set_xticklabels(labels[::x_axis_label_stepsize])
-    plt.show()
-if __name__ == "__main__":
-    repo = Repository()
-    time_slices = repo.get_time_slices_by_name("Destination_Layer")
-    # chronological order
-    time_slices.sort(key=lambda ts: eval(ts.time))
-    print(len(time_slices))
-    plt_show_bars(time_slices, cluster_no = 0)
\ No newline at end of file
--- a/src/modules/network_constants.py
+++ b/src/modules/network_constants.py
@@ -16,8 +16,8 @@ SEMANTIC_LINKING_REST_PORT = 80
 SEMANTIC_LINKING_DB_HOSTNAME = f'{SEMANTIC_LINKING_HOSTNAME}-db'
 SEMANTIC_LINKING_DB_PORT = 27017
-## Community Detection
+## Role Stage Discovery
-COMMUNITY_DETECTION_HOSTNAME = 'role-stage-discovery'
+ROLESTAGE_DISCOVERY_HOSTNAME = 'role-stage-discovery'
-COMMUNITY_DETECTION_REST_PORT = 80
+ROLESTAGE_DISCOVERY_REST_PORT = 80
-COMMUNITY_DETECTION_DB_HOSTNAME = f'{COMMUNITY_DETECTION_HOSTNAME}-db'
+ROLESTAGE_DISCOVERY_DB_HOSTNAME = f'{ROLESTAGE_DISCOVERY_HOSTNAME}-db'
-COMMUNITY_DETECTION_DB_PORT = 27017
+ROLESTAGE_DISCOVERY_DB_PORT = 27017
\ No newline at end of file