Commit 71008e36 authored by Alexander Lercher's avatar Alexander Lercher

Merge branch 'feature/generic-clustering' into develop

parents 6f8b2f9c cc03bae0
layers:
user:
properties:
starting-point:
properties:
- Latitude_StartingPoint
- Longitude_StartingPoint
\ No newline at end of file
......@@ -14,7 +14,7 @@ basePath: "/api"
paths:
/debug:
post:
operationId: "rest.debug.echo"
operationId: "routes.debug.echo"
tags:
- "Echo"
summary: "Echo function for debugging purposes"
......@@ -29,9 +29,11 @@ paths:
200:
description: "Successful echo of request data"
# Locations
# TODO remove
/locations:
post:
operationId: "rest.location.post"
operationId: "routes.location.post"
tags:
- "Locations"
summary: "Add new location data"
......@@ -48,7 +50,7 @@ paths:
400:
description: "Invalid input"
get:
operationId: "rest.location.get"
operationId: "routes.location.get"
tags:
- "Locations"
summary: "Get location data"
......@@ -61,7 +63,7 @@ paths:
/location-collections:
post:
operationId: "rest.location.post_many"
operationId: "routes.location.post_many"
tags:
- "Locations"
summary: "Add new location data collection"
......@@ -78,9 +80,77 @@ paths:
400:
description: "Invalid input"
# Layers
/layers:
post:
operationId: "routes.layers.post"
tags:
- "Layers"
summary: "Add a new layer or overwrite an existing one"
parameters:
- in: body
name: "Layer"
description: "The layer data to be added"
required: true
schema:
$ref: "#/definitions/Layer-UpperCase"
responses:
201:
description: "Successful operation"
400:
description: "Invalid input"
get:
operationId: "routes.layers.get"
tags:
- "Layers"
summary: "Get all layer data"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/LayerCollection"
/layers/names:
get:
operationId: "routes.layers.get_names"
tags:
- "Layers"
summary: "Get all layer names"
parameters: []
responses:
200:
description: "Successful operation"
schema:
type: array
items:
type: string
/layers/{name}:
get:
operationId: "routes.layers.get_by_name"
tags:
- "Layers"
summary: "Get layer data for layer-name"
parameters:
- name: "name"
in: "path"
description: "Name of the layer to return"
required: true
type: "string"
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/Layer"
404:
description: "Layer not found"
# Clusters
# TODO remove partially
/location-clusters:
get:
operationId: "rest.cluster.get_locations"
operationId: "routes.cluster.get_locations"
tags:
- "Clusters"
summary: "Get user communities clustered by location"
......@@ -93,7 +163,7 @@ paths:
# /clusters/cluster.png:
# get:
# operationId: "rest.cluster.get_image"
# operationId: "routes.cluster.get_image"
# tags:
# - "Clusters"
# summary: "Get user communities per date per hour as image"
......@@ -106,7 +176,7 @@ paths:
/time-clusters:
get:
operationId: "rest.cluster.get_times"
operationId: "routes.cluster.get_times"
tags:
- "Clusters"
summary: "Get user communities clustered by time per hour"
......@@ -119,7 +189,7 @@ paths:
# /agi/clusters/cluster.png:
# get:
# operationId: "rest.agi_cluster.get_image"
# operationId: "routes.agi_cluster.get_image"
# tags:
# - "Clusters"
# summary: "Get user communities per date per hour from agi data as image"
......@@ -130,9 +200,58 @@ paths:
# 200:
# description: "Successful operation"
/clustersets:
get:
operationId: "routes.clustersets.get"
tags:
- "Clusters"
summary: "Get clustersets for all layers"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/ClusterSetCollection"
/clustersets/names:
get:
operationId: "routes.clustersets.get_names"
tags:
- "Clusters"
summary: "Get clusterset names for all layers"
parameters: []
responses:
200:
description: "Successful operation"
schema:
type: array
items:
type: string
/clustersets/{name}:
get:
operationId: "routes.clustersets.get_by_name"
tags:
- "Clusters"
summary: "Get clusterset for layer-name"
parameters:
- name: "name"
in: "path"
description: "Name of the layer to return the clusterset for"
required: true
type: "string"
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/ClusterSet"
404:
description: "Clusterset not found"
# TODO remove
/user-cluster-graphs:
get:
operationId: "rest.user_cluster.get"
operationId: "routes.user_cluster.get"
tags:
- "User Graphs"
summary: "Get user graphs per layer per cluster"
......@@ -142,10 +261,11 @@ paths:
description: "Successful operation"
schema:
$ref: "#/definitions/UserClusterGraphCollection"
# Function Calls
/rfc/run:
post:
operationId: "rest.functions.run_agi_clustering_and_graph_creation"
operationId: "routes.functions.run_agi_clustering_and_graph_creation"
tags:
- "Remote function calls"
summary: "Insert locations from AGI, create clusters for starting time and location layers, create graphs for the location clusters"
......@@ -154,6 +274,7 @@ paths:
204:
description: "Successful operation"
definitions:
Location:
type: "object"
......@@ -176,6 +297,24 @@ definitions:
items:
$ref: "#/definitions/Location"
Cluster:
type: object
properties:
cluster_label:
type: number
nodes:
type: array
items:
type: object
example:
"Finished_time": 1576631193265951
"Latitude_Destination": -5.973257
"Longitude_Destination": 37.416316
"TravelID": "5e57ec9159bc0668543f156a"
"TravelPrice": 15
"UniqueID": "a95075f5042b1b27060080156d87fe34ec7e712c5e57ec9159bc0668543f156a"
"UserID": "a95075f5042b1b27060080156d87fe34ec7e712c"
LocationCluster:
type: object
properties:
......@@ -235,4 +374,52 @@ definitions:
UserClusterGraphCollection:
type: array
items:
$ref: "#/definitions/UserClusterGraph"
\ No newline at end of file
$ref: "#/definitions/UserClusterGraph"
Layer-UpperCase:
type: object
properties:
LayerName:
type: string
Nodes:
type: array
items:
type: object
Properties:
type: array
items:
type: string
Layer:
type: object
properties:
layer_name:
type: string
nodes:
type: array
items:
type: object
properties:
type: array
items:
type: string
LayerCollection:
type: array
items:
$ref: "#/definitions/Layer"
ClusterSet:
type: object
properties:
layer_name:
type: string
clusters:
type: array
items:
$ref: "#/definitions/Cluster"
ClusterSetCollection:
type: array
items:
$ref: "#/definitions/ClusterSet"
\ No newline at end of file
......@@ -29,6 +29,24 @@ class AgiRepository:
return locations
def getLocationsBasedOnNewDataSchema(self):
'''Creates the new data generic schema to be used beginning on 24.03.2020'''
data = {
'layer_name': 'Destination',
'nodes': self.getLocations(),
'properties': ['latitude', 'longitude']
}
return data
def getTimesBasedOnNewDataSchema(self):
'''Creates the new data generic schema to be used beginning on 24.03.2020'''
data = {
'layer_name': 'Starting_Time',
'nodes': self.getLocations(),
'properties': ['timestamp']
}
return data
def readDataFromFile(self) -> List[Dict]:
with open('./db/agi/travels.json', 'r') as f_travels:
travels = json.loads(f_travels.read())
......
from db.entities.location import Location
from db.entities.popular_location import PopularLocation
from db.entities.cluster import Cluster, LocationCluster, TimeCluster
from db.entities.user_cluster_graph import UserClusterGraph
\ No newline at end of file
from db.entities.clusterset import ClusterSet
from db.entities.user_cluster_graph import UserClusterGraph
from db.entities.layer import Layer
......@@ -4,10 +4,38 @@ from datetime import date, datetime
class Cluster:
def __init__(self, cluster_label: int = None, nodes: List = None):
'''
A cluster for an arbitrary layer containing some nodes.
:param cluster_label: The label of the cluster unique for the layer
:param nodes: The individual nodes of the cluster
'''
def __init__(self, cluster_label: int = None, nodes: List = None,
cluster_dict: Dict = None, from_db=False):
self.cluster_label = cluster_label
self.nodes = nodes
if cluster_dict is not None:
self.from_serializable_dict(cluster_dict, from_db)
def to_serializable_dict(self, for_db=False) -> Dict:
return {
"cluster_label": self.cluster_label,
"nodes": json.dumps(self.nodes) if for_db else self.nodes
}
def from_serializable_dict(self, cluster_dict: Dict, from_db=False):
self.cluster_label = cluster_dict["cluster_label"]
self.nodes = json.loads(cluster_dict["nodes"]) \
if from_db else cluster_dict["nodes"]
def __repr__(self):
return json.dumps(self.to_serializable_dict())
def __str__(self):
return f"Cluster({self.__repr__()})"
class LocationCluster(Cluster):
def __init__(self, cluster_label: int = None, nodes: List = None,
......@@ -67,7 +95,7 @@ class TimeCluster(Cluster):
if from_db else time_dict["nodes"]
def __repr__(self):
return json.dumps(self.to_serializable_dict())
return json.dumps(self.to_serializable_dict(True))
def __str__(self):
return f"TimeCluster({self.__repr__()})"
import json
from db.entities.cluster import Cluster
from typing import List, Dict
from datetime import date, datetime
class ClusterSet:
'''
A clusterset for an arbitrary layer containing all clusters.
:param layer_name: The name of the layer
:param clusters: The individual clusters
'''
def __init__(self, layer_name: str = None, clusters: List[Cluster] = None,
cluster_set_dict: Dict = None, from_db=False):
self.layer_name = layer_name
self.clusters = clusters
if cluster_set_dict is not None:
self.from_serializable_dict(cluster_set_dict, from_db)
def to_serializable_dict(self, for_db=False) -> Dict:
serialized_dict_clusters = [cluster.to_serializable_dict(for_db)
for cluster in self.clusters]
return {
"layer_name": self.layer_name,
"clusters": json.dumps(serialized_dict_clusters) if for_db else serialized_dict_clusters
}
def from_serializable_dict(self, cluster_set_dict: Dict, from_db=False):
self.layer_name = cluster_set_dict["layer_name"]
serialized_dict_clusters = json.loads(cluster_set_dict["clusters"]) \
if from_db else cluster_set_dict["clusters"]
self.clusters = [Cluster(cluster_dict=cluster_dict, from_db=from_db)
for cluster_dict in serialized_dict_clusters]
def __repr__(self):
return json.dumps(self.to_serializable_dict())
def __str__(self):
return f"ClusterSet({self.__repr__()})"
import json
from datetime import datetime
from typing import Dict
class Layer:
'''
This class represents a single layer of the Multilayer Graph.
:param layer_info: Information as dictionary to restore the layer object.
'''
def __init__(self, layer_info: Dict = None, from_db=False):
if layer_info is not None:
self.from_serializable_dict(layer_info, from_db)
def to_serializable_dict(self, for_db=False) -> Dict:
return {
"layer_name": self.layer_name,
"properties": self.properties,
"nodes": json.dumps(self.nodes) if for_db else self.nodes
}
def from_serializable_dict(self, layer_info: Dict, from_db=False):
self.layer_name = layer_info['layer_name']
self.properties = layer_info['properties']
self.nodes = json.loads(layer_info["nodes"]) \
if from_db else layer_info["nodes"]
def __repr__(self):
return json.dumps(self.to_serializable_dict())
def __str__(self):
return f"Layer({self.__repr__()})"
......@@ -5,12 +5,12 @@ import json
from db.agi.agi_repository import AgiRepository
from db.entities import Location, TimeCluster, PopularLocation, LocationCluster, UserClusterGraph
from db.entities import *
from typing import List
class Repository(MongoRepositoryBase):
'''This repository stores and loads locations and clusters with MongoDb.'''
'''This is a repository for MongoDb.'''
def __init__(self):
super().__init__(netconst.COMMUNITY_DETECTION_DB_HOSTNAME,
......@@ -21,9 +21,12 @@ class Repository(MongoRepositoryBase):
self._location_cluster_collection = 'location_cluster'
self._time_cluster_collection = 'time_cluster'
self._user_cluster_graph_collection = 'user_cluster_graph'
self._layer_collection = 'layer'
self._clusterset_collection = 'cluster_set'
self.agi_repo = AgiRepository()
#region Location
def add_location(self, location: Location):
super().insert_entry(self._location_collection, location.to_serializable_dict())
......@@ -34,7 +37,9 @@ class Repository(MongoRepositoryBase):
def get_agi_locations(self) -> List[Location]:
agi_locations = self.agi_repo.getLocations()
return [Location(agi_loc) for agi_loc in agi_locations]
#endregion
#region Specific Clusters
def add_location_cluster(self, cluster: LocationCluster):
super().insert_entry(self._location_cluster_collection,
cluster.to_serializable_dict(for_db=True))
......@@ -50,7 +55,9 @@ class Repository(MongoRepositoryBase):
def get_time_clusters(self) -> List[TimeCluster]:
clusters = super().get_entries(self._time_cluster_collection)
return [TimeCluster(time_dict=c, from_db=True) for c in clusters]
#endregion
#region Cluster Graph
def add_user_cluster_graph(self, user_graph: UserClusterGraph):
super().insert_entry(self._user_cluster_graph_collection,
user_graph.to_serializable_dict(for_db=True))
......@@ -58,3 +65,48 @@ class Repository(MongoRepositoryBase):
def get_user_cluster_graphs(self) -> List[UserClusterGraph]:
user_graphs = super().get_entries(self._user_cluster_graph_collection)
return [UserClusterGraph(dict_=u, from_db=True) for u in user_graphs]
#endregion
#region Layers
def add_layer(self, layer: Layer):
super().insert_entry(self._layer_collection, layer.to_serializable_dict())
def get_layers(self) -> List[Layer]:
entries = super().get_entries(self._layer_collection)
return [Layer(e) for e in entries]
def get_layer_names(self) -> List[str]:
entries = super().get_entries(self._layer_collection, projection={'layer_name': 1})
return [e['layer_name'] for e in entries]
def get_layer(self, layer_name) -> Layer:
entries = super().get_entries(self._layer_collection, selection={'layer_name': layer_name})
entries = [Layer(e) for e in entries]
if entries is not None and len(entries) > 0:
return entries[0]
else:
return None
#endregion
#region ClusterSet
def add_clusterset(self, cluster_set: ClusterSet):
super().insert_entry(self._clusterset_collection, cluster_set.to_serializable_dict())
def get_clustersets(self) -> List[ClusterSet]:
entries = super().get_entries(self._clusterset_collection)
return [ClusterSet(cluster_set_dict=e) for e in entries]
def get_clusterset_names(self) -> List[str]:
entries = super().get_entries(self._clusterset_collection, projection={'layer_name': 1})
return [e['layer_name'] for e in entries]
def get_clusterset(self, layer_name) -> ClusterSet:
entries = super().get_entries(self._clusterset_collection, selection={'layer_name': layer_name})
entries = [ClusterSet(cluster_set_dict=e) for e in entries]
if entries is not None and len(entries) > 0:
return entries[0]
else:
return None
#endregion
......@@ -2,9 +2,20 @@ import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from typing import List, Dict
from typing import List, Dict, Any, TypeVar
from deprecated import deprecated
T = TypeVar('T')
ClusterGroup = Dict[Any, List[Dict]]
class Clusterer:
'''
Clusterer for applying density-based clustering on datasets.
The clustering is done with DBSCAN.
:param epsilon: Epsilon used in DBSCAN
:param min_points: Min_points used in DBSCAN
'''
def __init__(self, epsilon=11, min_points=2):
self.epsilon = epsilon
self.min_points = min_points
......@@ -43,7 +54,8 @@ class Clusterer:
return fig
def create_labels(self, features:np.ndarray) -> List:
def create_labels(self, features:np.ndarray) -> List[int]:
'''Creates labels for the items based on DBSCAN.'''
if features is None or len(features) == 0:
return features # trash in trash out
......@@ -53,13 +65,25 @@ class Clusterer:
return labels.tolist()
@deprecated(reason="Use generic version instead")
def extract_location_features(self, locations: List[dict]) -> np.ndarray:
return np.asarray([(float(l['latitude']), float(l['longitude'])) for l in locations])
@deprecated(reason="Use generic version instead")
def extract_time_features(self, times: List[Dict]) -> np.ndarray:
return np.asarray([((t['timestamp']), 0) for t in times])
return np.asarray([[float(t['timestamp'])] for t in times])
def _extract_features(self, dataset: List[Dict], features:List[str]) -> np.ndarray:
'''Extracts the feature values from the dataset into a np array with same order as original dataset.'''
extracted_features = []
for data in dataset:
entry = [float(data[feature]) for feature in features]
extracted_features.append(entry)
def label_dataset(self, dataset:List[Dict], labels:List) -> List:
return np.asarray(extracted_features)
def label_dataset(self, dataset:List[Dict], labels:List[Any]) -> List:
'''Adds the labels to the elements of the dataset at the same position. The new key is called cluster_label.'''
if dataset is None or labels is None:
return
......@@ -67,16 +91,21 @@ class Clusterer:
raise ValueError("dataset and labels has to have same length")
for i in range(len(dataset)):
if 'cluster_label' in dataset[i]:
continue
dataset[i]['cluster_label'] = labels[i]
def group_by_clusters(self, dataset:List[Dict], labels:List) -> Dict[int, List[Dict]]:
def group_by_clusters(self, dataset:List[Dict], labels:List[Any]) -> ClusterGroup:
self.label_dataset(dataset, labels)
clusters = {}
for label in labels:
clusters[label] = [ds for ds in dataset if ds['cluster_label'] == label]
return clusters
def cluster_locations(self, locations:List[Dict]) -> Dict[int, List[Dict]]:
@deprecated(reason="Use generic version instead")
def cluster_locations(self, locations:List[Dict]) -> ClusterGroup:
'''Returns a dictionary with identified clusters and their locations copied from the input'''
if locations is None or len(locations) == 0:
# raise Exception("locations has to contain something")
......@@ -88,12 +117,29 @@ class Clusterer:
self.label_dataset(locations, labels)
return self.group_by_clusters(locations, labels)
def cluster_times(self, times:List[Dict]) -> Dict[int, List[Dict]]:
@deprecated(reason="Use generic version instead")
def cluster_times(self, times:List[Dict]) -> ClusterGroup:
'''Returns a dictionary with identified clusters and their times copied from the input'''
features = self.extract_time_features(times)
labels = self.create_labels(features)
self.label_dataset(times, labels)
return self.group_by_clusters(times, labels)
\ No newline at end of file
return self.group_by_clusters(times, labels)
def cluster_dataset(self, dataset:List[Dict], features:List[str]) -> ClusterGroup:
'''
Returns the identified clusters containing a subset of nodes from the dataset.
:param dataset: The nodes to assign to clusters
:param features: The feature names of the nodes to use for clustering
:returns: A dictionary of clusters, where each value is a non-empty subset of dataset if dataset was not empty
'''
arr = self._extract_features(dataset, features)
labels = self.create_labels(arr)
return self.group_by_clusters(dataset, labels)
import yaml
from typing import Generator
### init logging ###
import logging
LOG_FORMAT = (
'%(levelname) -5s %(asctime)s %(name)s:%(funcName) -35s %(lineno) -5d: %(message)s')
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
LOGGER = logging.getLogger(__name__)
class ClusteringConfig:
'''Contains the configuration for the clustering algorithm defined in configs/clustering.yaml.'''
config_path = 'configs/clustering.yaml'
config: dict = None
def __init__(self):
self.config = self._load_config()
def _load_config(self) -> dict:
'''Loads the whole configuration from file.'''
config = None
with open(self.config_path, 'r') as stream:
try:
config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
LOGGER.error(exc)
config = {}
return config
def get_config(self):
return self.config
def get_layer_configs(self) -> Generator[dict, None, None]:
"""
Returns a generator for the individual layer configs.
Layer configs are dicts including a layer-name.
"""
for key, layer in self.config['layers'].items():
layer['layer-name'] = key
yield layer
......@@ -5,9 +5,11 @@ certifi==2019.11.28
chardet==3.0.4
Click==7.0
clickclick==1.2.2
colorama==0.4.3
connexion==2.6.0
cycler==0.10.0
decorator==4.4.1
Deprecated==1.2.7
Flask==1.1.1
idna==2.8
importlib-metadata==1.5.0
......
import io
from flask import request, Response
from db.repository import Repository
from processing.clusterer import Clusterer
from processing.clustering.clusterer import Clusterer
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
repo = Repository()
......
from flask import request, Response
from db.repository import Repository
from db.entities import ClusterSet
repo = Repository()
def get():
return [c.to_serializable_dict() for c in repo.get_clustersets()]
def get_names():
return repo.get_clusterset_names()
def get_by_name(name):
res = repo.get_clusterset(name)
if res is not None:
return res.to_serializable_dict()
else:
return Response(status=404)
\ No newline at end of file
from flask import request, Response
from db.repository import Repository
from db.entities import Layer
repo = Repository()
def post():
body = request.json
_insert_layer(body)
return Response(status=201)
def _insert_layer(layer_data: dict):
# convert object keys from ext source
layer_data['layer_name'] = layer_data.pop('LayerName')
layer_data['nodes'] = layer_data.pop('Nodes')
layer_data['properties'] = layer_data.pop('Properties')
repo.add_layer(Layer(layer_data))
def get():
return [l.to_serializable_dict() for l in repo.get_layers()]
def get_names():
return repo.get_layer_names()
def get_by_name(name):
res = repo.get_layer(name)
if res is not None:
return res.to_serializable_dict()
else:
return Response(status=404)
\ No newline at end of file
......@@ -4,15 +4,40 @@ modules_path = '../../../modules/'
if os.path.exists(modules_path):
sys.path.insert(1, modules_path)
from db.entities import Location, PopularLocation, LocationCluster, TimeCluster
from db.entities import *
from typing import List, Dict, Tuple
from db.repository import Repository
from processing.clusterer import Clusterer
from db.repository import Repository, AgiRepository
from processing.clustering.clusterer import Clusterer
DEBUG = False
repo = Repository()
test_repo = AgiRepository()
def run_generic_clustering():
'''Runs the clustering for all layers found in the repository.'''
all_layers:List[Layer] = repo.get_layers()
for layer in all_layers:
print(f"Clustering {layer.layer_name}")
clusters = run_clustering_for_layer(layer)
cluster_set = ClusterSet(layer.layer_name, clusters)
repo.add_clusterset(cluster_set)
def run_clustering_for_layer(layer: Layer) -> List[Cluster]:
clusterer = Clusterer()
res = clusterer.cluster_dataset(
layer.nodes,
layer.properties
)
return [Cluster(key, value) for key, value in res.items()]
def run_location_clustering():
user_clusterer = Clusterer()
......@@ -74,5 +99,7 @@ def store_clusters(type: str, clusters: List):
if __name__ == "__main__":
run_location_clustering()
run_time_clustering()
run_generic_clustering()
# run_location_clustering()
# run_time_clustering()
import unittest
import sys
sys.path.insert(1, '../')
for path in ['../', './']:
sys.path.insert(1, path)
# python -m unittest discover
from processing.clusterer import Clusterer
from processing.clustering.clusterer import Clusterer
class TestClusterer(unittest.TestCase):
clusterer:Clusterer = None
......@@ -71,11 +72,50 @@ class TestClusterer(unittest.TestCase):
self.assertEqual(3, len(locations))
self.assertHaveLabelsAsNewKey(locations, labels)
def test_cluster_locations_multInput_correctlyLabeled(self):
locations = [self.location(1,2), self.location(2,2), self.location(20,20)]
labels = [0,0,-1]
res = self.clusterer.cluster_locations(locations)
self.assertHaveLabelsAsNewKey(locations, labels)
self.assertDictEqual(res, {0: [{'latitude': 1, 'longitude': 2, 'cluster_label': 0}, {'latitude': 2, 'longitude': 2, 'cluster_label': 0}], -1: [{'latitude': 20, 'longitude': 20, 'cluster_label': -1}]})
def test_cluster_times_multInput_correctlyLabeled(self):
times = [self.time(123), self.time(128), self.time(223)]
labels = [0,0,-1]
res = self.clusterer.cluster_times(times)
self.assertHaveLabelsAsNewKey(times, labels)
self.assertDictEqual(res, {0: [{'timestamp': 123, 'cluster_label': 0}, {'timestamp': 128, 'cluster_label': 0}], -1: [{'timestamp': 223, 'cluster_label': -1}]})
def test_cluster_dataset_locationsMultInput_correctlyLabeled(self):
locations = [self.location(1,2), self.location(2,2), self.location(20,20)]
labels = [0,0,-1]
res = self.clusterer.cluster_dataset(locations, ['latitude', 'longitude'])
self.assertHaveLabelsAsNewKey(locations, labels)
self.assertDictEqual(res, {0: [{'latitude': 1, 'longitude': 2, 'cluster_label': 0}, {'latitude': 2, 'longitude': 2, 'cluster_label': 0}], -1: [{'latitude': 20, 'longitude': 20, 'cluster_label': -1}]})
def test_cluster_dataset_timesMultInput_correctlyLabeled(self):
times = [self.time(123), self.time(128), self.time(223)]
labels = [0,0,-1]
res = self.clusterer.cluster_dataset(times, ['timestamp'])
self.assertHaveLabelsAsNewKey(times, labels)
self.assertDictEqual(res, {0: [{'timestamp': 123, 'cluster_label': 0}, {'timestamp': 128, 'cluster_label': 0}], -1: [{'timestamp': 223, 'cluster_label': -1}]})
# helper methods:
def location(self, lat, long_) -> dict:
return {'latitude': lat, 'longitude':long_}
def time(self, ts) -> dict:
return {'timestamp': ts}
def assertHaveLabelsAsNewKey(self, locations, labels):
for i in range(len(locations)):
self.assertEqual(labels[i], locations[i]['cluster_label'])
......
import unittest
import sys
for path in ['../', './']:
sys.path.insert(1, path)
# python -m unittest discover
from processing.clustering.clustering_config import ClusteringConfig
class TestClusteringConfig(unittest.TestCase):
def setUp(self):
self.clustering_config = ClusteringConfig()
def test_get_layer_configs_noneInput_noneOutput(self):
for layer_config in self.clustering_config.get_layer_configs():
self.assertIn('layer-name', layer_config)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment