Commit 85e47ff6 authored by Alexander Lercher's avatar Alexander Lercher

Merge branch 'feature/clustering' into develop

parents 2ac95a70 ad3f1b64
......@@ -2,13 +2,13 @@ import os
import shutil
import sys
if len(sys.argv) != 2:
raise Exception("Push to Docker Hub will not work, please provide username as argument")
# if len(sys.argv) != 2:
# raise Exception("Push to Docker Hub will not work, please provide username as argument")
DOCKER_COMPOSE_NAME = "Dockerfile"
ROOT = './'
SOURCEPATH = f'{ROOT}src/'
DOCKER_USERNAME = sys.argv[1]
DOCKER_USERNAME = "alexx882"
paths = []
for r, _, f in os.walk(SOURCEPATH):
......@@ -33,9 +33,11 @@ for command_arg in command_args:
exit_val = os.system(f"docker image build -t {image_name} {ROOT}")
os.remove(os.path.join(ROOT, DOCKER_COMPOSE_NAME))
res_str.append(f"### {image_name} built with exit code {exit_val}")
res_str.append(f"{image_name} built with exit code {exit_val}")
os.system(f"docker push {image_name}")
res_str.append(f"{image_name} pushed with exit code {exit_val}")
print(f"Found {len(command_args)} images")
for s in res_str:
......
......@@ -5,13 +5,12 @@ ENV http_proxy http://proxy.uni-klu.ac.at:3128/
ENV https_proxy http://proxy.uni-klu.ac.at:3128/
RUN apt-get update
RUN pip install flask
RUN pip install connexion[swagger-ui]
EXPOSE 5000
WORKDIR /app
COPY src/data-hub/community-detection-microservice/app/ /app/
RUN pip install -r requirements.txt
RUN chmod a+x main.py
CMD ["python", "./main.py"]
\ No newline at end of file
......@@ -28,3 +28,109 @@ paths:
responses:
200:
description: "Successful echo of request data"
/location:
post:
operationId: "rest.location.post"
tags:
- "Locations"
summary: "Add new location data"
parameters:
- in: body
name: "Location"
description: "The location data to be added"
required: true
schema:
$ref: "#/definitions/Location"
responses:
201:
description: "Successful operation"
400:
description: "Invalid input"
get:
operationId: "rest.location.get"
tags:
- "Locations"
summary: "Get location data"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/Location"
/cluster:
get:
operationId: "rest.cluster.get"
tags:
- "Clusters"
summary: "Get clustered data"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/Cluster"
/cluster/cluster.png:
get:
operationId: "rest.cluster.get_image"
tags:
- "Clusters"
summary: "Get clustered data as image"
parameters: []
produces:
- "image/png"
responses:
200:
description: "Successful operation"
/agi/cluster:
get:
operationId: "rest.agi_cluster.get"
tags:
- "Clusters"
summary: "Get clustered data"
parameters: []
responses:
200:
description: "Successful operation"
schema:
$ref: "#/definitions/Cluster"
/agi/cluster/cluster.png:
get:
operationId: "rest.agi_cluster.get_image"
tags:
- "Clusters"
summary: "Get clustered data as image"
parameters: []
produces:
- "image/png"
responses:
200:
description: "Successful operation"
definitions:
Location:
type: "object"
properties:
id:
type: string
format: uuid
username:
type: "string"
latitude:
type: "number"
longitude:
type: "number"
timestamp:
type: "number"
Cluster:
type: "object"
properties:
id:
type: string
format: uuid
\ No newline at end of file
import json
from typing import List, Dict
class AgiRepository:
def getLocations(self) -> List:
locations = []
travels = self.readDataFromFile()
# only take started travels
travels = [t for t in travels if t['status'] >= 2]
for travel in travels:
locations.append(self.location(travel["id"], travel['startPlace.latitude'], travel['startPlace.longitude'], 0, ''))
continue # todo work on locations
# todo number of complete travels with startlocation and user data
num_complete_travels = min(len(travel['startedBy']), len(travel['users']))
for i in range(num_complete_travels):
cur_location = travel['startedBy'][i]
cur_user = travel['users'][i]
locations.append(
self.location(f'{travel["id"]}-{cur_location["moment"]}',
cur_location['coordinate']['latitude'],
cur_location['coordinate']['longitude'],
cur_location['moment'],
# todo user in travel startedBy not available from dataset - currently using user list
cur_user['userId']
))
return locations
def readDataFromFile(self) -> List[Dict]:
with open('./db/agi/travels.json', 'r') as f_travels:
travels = json.loads(f_travels.read())
return travels
def location(self, id_, lat, long_, timestamp, username) -> dict:
return {
"id": id_,
'latitude': lat,
'longitude': long_,
"timestamp": timestamp,
"username": username
}
from __future__ import annotations
class LocationDatastore:
'''This Singelton simulates a location database'''
_instance = None
@staticmethod
def get_instance() -> LocationDatastore:
if LocationDatastore._instance == None:
LocationDatastore._instance = LocationDatastore()
return LocationDatastore._instance
def __init__(self):
if LocationDatastore._instance != None:
raise Exception("This class is a singleton!")
self.locations = []
def add(self, location):
self.locations.append(location)
def get(self):
return self.locations
\ No newline at end of file
from db.location_datastore import LocationDatastore
class Repository:
def __init__(self):
self.store = LocationDatastore.get_instance()
def addLocation(self, location):
self.store.add(location)
def getLocations(self):
return self.store.get()
### init logging ###
import logging
LOG_FORMAT = ('%(levelname) -5s %(asctime)s %(name)s:%(funcName) -35s %(lineno) -5d: %(message)s')
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
LOGGER = logging.getLogger(__name__)
#############################
import connexion
# load swagger config
# load swagger config
app = connexion.App(__name__, specification_dir='configs/')
app.add_api('swagger.yml')
......@@ -9,5 +16,5 @@ def api_root():
return 'Endpoint of community-detection-microservice!'
# start app
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)
import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from typing import List, Dict
class Clusterer:
def __init__(self, epsilon=11, min_points=2):
self.epsilon = epsilon
self.min_points = min_points
def draw_locations(self, locations:List, labels:List=None) -> plt.Figure:
if locations is None or len(locations) == 0:
return self._draw_locations()
if labels is None or len(locations) != len(labels):
labels = self.create_labels(locations)
return self._draw_locations(
locations = np.asarray([(l['latitude'], l['longitude']) for l in locations]),
partition_info = labels
)
def _draw_locations(self, locations:np.ndarray=None, centroids:np.ndarray=None, partition_info=None) -> plt.Figure:
fig = plt.Figure()
axis = fig.add_subplot(1, 1, 1)
if locations is not None:
colors = plt.cm.rainbow(np.linspace(0, 1, len(locations)))
if partition_info is not None:
distinct_colors = plt.cm.rainbow(np.linspace(0, 1, len(set(partition_info))))
colors = [distinct_colors[pi] for pi in partition_info]
# draw locations with random colors
axis.scatter(locations[:,0],
locations[:,1],
c=colors)
if centroids is not None:
# draw black centroids
axis.scatter(centroids[:,0], centroids[:,1], c='k', marker='x', s=80)
return fig
def create_labels(self, locations:List) -> List:
if locations is None or len(locations) == 0:
return locations # trash in trash out
locations = np.asarray([(l['latitude'], l['longitude']) for l in locations])
dbsc = DBSCAN(eps = self.epsilon, min_samples = self.min_points)
dbsc = dbsc.fit(locations)
labels = dbsc.labels_
return labels.tolist()
def label_locations(self, locations:List[Dict], labels:List) -> List:
if locations is None or labels is None:
return
if len(locations) != len(labels):
raise ValueError("locations and labels has to have same length")
for i in range(len(locations)):
locations[i]['cluster_label'] = labels[i]
def run(self, locations:List[Dict]) -> Dict[int, List[Dict]]:
if locations is None or len(locations) == 0:
# raise Exception("locations has to contain something")
return {}
labels = self.create_labels(locations)
self.label_locations(locations, labels)
clusters = {}
for label in labels:
clusters[label] = [l for l in locations if l['cluster_label'] == label]
return clusters
\ No newline at end of file
connexion[swagger-ui]
numpy
matplotlib
scikit-learn
\ No newline at end of file
import io
from flask import request, Response
from db.agi.agi_repository import AgiRepository
from processing.clusterer import Clusterer
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
repo = AgiRepository()
clusterer = Clusterer()
def get():
locations = repo.getLocations()
clusters = clusterer.run(locations)
return clusters
def get_image():
locations = repo.getLocations()
fig = clusterer.draw_locations(locations)
output = io.BytesIO()
FigureCanvas(fig).print_png(output)
return Response(output.getvalue(), mimetype="image/png")
\ No newline at end of file
import io
from flask import request, Response
from db.repository import Repository
from processing.clusterer import Clusterer
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
repo = Repository()
clusterer = Clusterer()
def get():
locations = repo.getLocations()
clusters = clusterer.run(locations)
return clusters
def get_image():
locations = repo.getLocations()
fig = clusterer.draw_locations(locations)
output = io.BytesIO()
FigureCanvas(fig).print_png(output)
return Response(output.getvalue(), mimetype="image/png")
\ No newline at end of file
from flask import request, Response
from db.repository import Repository
repo = Repository()
def post():
body = request.json
repo.addLocation(body)
return Response(status=201)
def get():
return repo.getLocations()
import unittest
import sys
sys.path.insert(1, './')
# python -m unittest discover -v tests
from processing.clusterer import Clusterer
class TestClusterer(unittest.TestCase):
clusterer:Clusterer = None
def setUp(self):
self.clusterer = Clusterer(epsilon=10, min_points=2)
def test_create_labels_noneInput_noneOutput(self):
labels = self.clusterer.create_labels(None)
self.assertEqual(None, labels)
def test_create_labels_emptyInput_emptyOutput(self):
labels = self.clusterer.create_labels([])
self.assertEqual([], labels)
def test_create_labels_singleInput_singleCluster(self):
labels = self.clusterer.create_labels([self.location(1,2)])
self.assertEqual(1, len(labels))
def test_create_labels_nearInputs_singleCluster(self):
locations = [self.location(1,2), self.location(2,2)]
labels = self.clusterer.create_labels(locations)
self.assertEqual(2, len(labels))
self.assertEqual(labels[0], labels[1])
def test_create_labels_nearInputs_twoClusters(self):
locations = [self.location(1,2), self.location(2,2), self.location(20,20)]
labels = self.clusterer.create_labels(locations)
self.assertEqual(3, len(labels))
self.assertEqual(labels[0], labels[1])
self.assertNotEqual(labels[0], labels[2])
def test_label_locations_NoneLocations_NoException(self):
self.clusterer.label_locations(None, [])
def test_label_locations_NoneLabels_NoException(self):
self.clusterer.label_locations([], None)
def test_label_locations_emptyInput_emptyOutput(self):
locations = []
self.clusterer.label_locations(locations, [])
self.assertEqual(0, len(locations))
def test_label_locations_diffInputLengths_ValueError_1(self):
with self.assertRaises(ValueError):
self.clusterer.label_locations([], [1])
def test_label_locations_diffInputLengths_ValueError_2(self):
with self.assertRaises(ValueError):
self.clusterer.label_locations([self.location(1,2)], [])
def test_label_locations_multInput_correctlyLabeled(self):
locations = [self.location(1,2), self.location(2,2), self.location(20,20)]
labels = [17,2,20]
self.clusterer.label_locations(locations, labels)
self.assertEqual(3, len(locations))
self.assertHaveLabelsAsNewKey(locations, labels)
# helper methods:
def location(self, lat, long_) -> dict:
return {'latitude': lat, 'longitude':long_}
def assertHaveLabelsAsNewKey(self, locations, labels):
for i in range(len(locations)):
self.assertEqual(labels[i], locations[i]['cluster_label'])
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment