Commit f11c4d13 authored by Alexander Lercher's avatar Alexander Lercher

added basic clustering

parent 2ac95a70
......@@ -28,3 +28,83 @@ paths:
responses:
200:
description: "Successful echo of request data"
/location:
post:
operationId: "rest.location.post"
tags:
- "Locations"
summary: "Add new location data"
parameters:
- in: body
name: "Location"
description: "The location data to be added"
required: true
schema:
$ref: "#/definitions/Location"
responses:
201:
description: "Successful operation"
400:
description: "Invalid input"
get:
operationId: "rest.location.get"
tags:
- "Locations"
summary: "Get location data"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/Location"
/cluster:
get:
operationId: "rest.cluster.get"
tags:
- "Clusters"
summary: "Get clustered data"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/Cluster"
/cluster.png:
get:
operationId: "rest.cluster.get_image"
tags:
- "Clusters"
summary: "Get clustered data as image"
parameters: []
produces:
- "image/png"
responses:
200:
description: "Successful operation"
definitions:
Location:
type: "object"
properties:
id:
type: string
format: uuid
username:
type: "string"
latitude:
type: "number"
longitude:
type: "number"
timestamp:
type: "number"
Cluster:
type: "object"
properties:
id:
type: string
format: uuid
\ No newline at end of file
from __future__ import annotations
class LocationDatastore:
'''This Singelton simulates a location database'''
_instance = None
@staticmethod
def get_instance() -> LocationDatastore:
if LocationDatastore._instance == None:
LocationDatastore._instance = LocationDatastore()
return LocationDatastore._instance
def __init__(self):
if LocationDatastore._instance != None:
raise Exception("This class is a singleton!")
self.locations = []
def add(self, location):
self.locations.append(location)
def get(self):
return self.locations
\ No newline at end of file
from db.location_datastore import LocationDatastore
class Repository:
def __init__(self):
self.store = LocationDatastore.get_instance()
def addLocation(self, location):
self.store.add(location)
def getLocations(self):
return self.store.get()
### init logging ###
import logging
LOG_FORMAT = ('%(levelname) -5s %(asctime)s %(name)s:%(funcName) -35s %(lineno) -5d: %(message)s')
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
LOGGER = logging.getLogger(__name__)
#############################
import connexion
# load swagger config
......@@ -10,4 +17,4 @@ def api_root():
# start app
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)
app.run(host='0.0.0.0', port=5000, debug=True, use_reloader=True)
import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from typing import List, Dict
class Clusterer:
def __init__(self, epsilon=11, min_points=2):
self.epsilon = epsilon
self.min_points = min_points
def draw_locations(self, locations:List, labels:List=None) -> plt.Figure:
if locations is None or len(locations) == 0:
return self._draw_locations()
if labels is None or len(locations) != len(labels):
labels = self.create_labels(locations)
return self._draw_locations(
locations = np.asarray([(l['latitude'], l['longitude']) for l in locations]),
partition_info = labels
)
def _draw_locations(self, locations:np.ndarray=None, centroids:np.ndarray=None, partition_info=None) -> plt.Figure:
fig = plt.Figure()
axis = fig.add_subplot(1, 1, 1)
if locations is not None:
colors = plt.cm.rainbow(np.linspace(0, 1, len(locations)))
if partition_info is not None:
distinct_colors = plt.cm.rainbow(np.linspace(0, 1, len(set(partition_info))))
colors = [distinct_colors[pi] for pi in partition_info]
# draw locations with random colors
axis.scatter(locations[:,0],
locations[:,1],
c=colors)
if centroids is not None:
# draw black centroids
axis.scatter(centroids[:,0], centroids[:,1], c='k', marker='x', s=80)
return fig
def create_labels(self, locations:List) -> List:
if locations is None or len(locations) == 0:
return locations # trash in trash out
locations = np.asarray([(l['latitude'], l['longitude']) for l in locations])
dbsc = DBSCAN(eps = self.epsilon, min_samples = self.min_points)
dbsc = dbsc.fit(locations)
labels = dbsc.labels_
return labels.tolist()
def label_locations(self, locations:List[Dict], labels:List) -> List:
if locations is None or labels is None:
return
if len(locations) != len(labels):
raise ValueError("locations and labels has to have same length")
for i in range(len(locations)):
locations[i]['cluster_label'] = labels[i]
def run(self, locations:List[Dict]) -> Dict[int, List[Dict]]:
if locations is None or len(locations) == 0:
# raise Exception("locations has to contain something")
return {}
labels = self.create_labels(locations)
self.label_locations(locations, labels)
clusters = {}
for label in labels:
clusters[label] = [l for l in locations if l['cluster_label'] == label]
return clusters
\ No newline at end of file
connexion[swagger-ui]
numpy
matplotlib
scikit-learn
\ No newline at end of file
import io
from flask import request, Response
from db.repository import Repository
from processing.clusterer import Clusterer
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
repo = Repository()
clusterer = Clusterer()
def get():
locations = repo.getLocations()
clusters = clusterer.run(locations)
return clusters
def get_image():
locations = repo.getLocations()
fig = clusterer.draw_locations(locations)
output = io.BytesIO()
FigureCanvas(fig).print_png(output)
return Response(output.getvalue(), mimetype="image/png")
\ No newline at end of file
from flask import request, Response
from db.repository import Repository
repo = Repository()
def post():
body = request.json
repo.addLocation(body)
return Response(status=201)
def get():
return repo.getLocations()
import unittest
import sys
sys.path.insert(1, './')
# python -m unittest discover -v tests
from processing.clusterer import Clusterer
class TestClusterer(unittest.TestCase):
clusterer:Clusterer = None
def setUp(self):
self.clusterer = Clusterer(epsilon=10, min_points=2)
def test_create_labels_noneInput_noneOutput(self):
labels = self.clusterer.create_labels(None)
self.assertEqual(None, labels)
def test_create_labels_emptyInput_emptyOutput(self):
labels = self.clusterer.create_labels([])
self.assertEqual([], labels)
def test_create_labels_singleInput_singleCluster(self):
labels = self.clusterer.create_labels([self.location(1,2)])
self.assertEqual(1, len(labels))
def test_create_labels_nearInputs_singleCluster(self):
locations = [self.location(1,2), self.location(2,2)]
labels = self.clusterer.create_labels(locations)
self.assertEqual(2, len(labels))
self.assertEqual(labels[0], labels[1])
def test_create_labels_nearInputs_twoClusters(self):
locations = [self.location(1,2), self.location(2,2), self.location(20,20)]
labels = self.clusterer.create_labels(locations)
self.assertEqual(3, len(labels))
self.assertEqual(labels[0], labels[1])
self.assertNotEqual(labels[0], labels[2])
def test_label_locations_NoneLocations_NoException(self):
self.clusterer.label_locations(None, [])
def test_label_locations_NoneLabels_NoException(self):
self.clusterer.label_locations([], None)
def test_label_locations_emptyInput_emptyOutput(self):
locations = []
self.clusterer.label_locations(locations, [])
self.assertEqual(0, len(locations))
def test_label_locations_diffInputLengths_ValueError_1(self):
with self.assertRaises(ValueError):
self.clusterer.label_locations([], [1])
def test_label_locations_diffInputLengths_ValueError_2(self):
with self.assertRaises(ValueError):
self.clusterer.label_locations([self.location(1,2)], [])
def test_label_locations_multInput_correctlyLabeled(self):
locations = [self.location(1,2), self.location(2,2), self.location(20,20)]
labels = [17,2,20]
self.clusterer.label_locations(locations, labels)
self.assertEqual(3, len(locations))
self.assertHaveLabelsAsNewKey(locations, labels)
# helper methods:
def location(self, lat, long_) -> dict:
return {'latitude': lat, 'longitude':long_}
def assertHaveLabelsAsNewKey(self, locations, labels):
for i in range(len(locations)):
self.assertEqual(labels[i], locations[i]['cluster_label'])
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment