Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
SMART
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
UNI-KLU
SMART
Commits
85e47ff6
Commit
85e47ff6
authored
Jan 24, 2020
by
Alexander Lercher
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'feature/clustering' into develop
parents
2ac95a70
ad3f1b64
Changes
14
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
27390 additions
and
9 deletions
+27390
-9
build.py
bin/build.py
+6
-4
Dockerfile
src/data-hub/community-detection-microservice/Dockerfile
+1
-2
swagger.yml
.../community-detection-microservice/app/configs/swagger.yml
+106
-0
agi_repository.py
...unity-detection-microservice/app/db/agi/agi_repository.py
+49
-0
travels.json
.../community-detection-microservice/app/db/agi/travels.json
+26958
-0
location_datastore.py
...unity-detection-microservice/app/db/location_datastore.py
+24
-0
repository.py
...hub/community-detection-microservice/app/db/repository.py
+11
-0
main.py
src/data-hub/community-detection-microservice/app/main.py
+10
-3
clusterer.py
...munity-detection-microservice/app/processing/clusterer.py
+80
-0
requirements.txt
...hub/community-detection-microservice/app/requirements.txt
+4
-0
agi_cluster.py
.../community-detection-microservice/app/rest/agi_cluster.py
+24
-0
cluster.py
...-hub/community-detection-microservice/app/rest/cluster.py
+24
-0
location.py
...hub/community-detection-microservice/app/rest/location.py
+12
-0
test_clusterer.py
...munity-detection-microservice/app/tests/test_clusterer.py
+81
-0
No files found.
bin/build.py
View file @
85e47ff6
...
@@ -2,13 +2,13 @@ import os
...
@@ -2,13 +2,13 @@ import os
import
shutil
import
shutil
import
sys
import
sys
if
len
(
sys
.
argv
)
!=
2
:
#
if len(sys.argv) != 2:
raise
Exception
(
"Push to Docker Hub will not work, please provide username as argument"
)
#
raise Exception("Push to Docker Hub will not work, please provide username as argument")
DOCKER_COMPOSE_NAME
=
"Dockerfile"
DOCKER_COMPOSE_NAME
=
"Dockerfile"
ROOT
=
'./'
ROOT
=
'./'
SOURCEPATH
=
f
'{ROOT}src/'
SOURCEPATH
=
f
'{ROOT}src/'
DOCKER_USERNAME
=
sys
.
argv
[
1
]
DOCKER_USERNAME
=
"alexx882"
paths
=
[]
paths
=
[]
for
r
,
_
,
f
in
os
.
walk
(
SOURCEPATH
):
for
r
,
_
,
f
in
os
.
walk
(
SOURCEPATH
):
...
@@ -33,10 +33,12 @@ for command_arg in command_args:
...
@@ -33,10 +33,12 @@ for command_arg in command_args:
exit_val
=
os
.
system
(
f
"docker image build -t {image_name} {ROOT}"
)
exit_val
=
os
.
system
(
f
"docker image build -t {image_name} {ROOT}"
)
os
.
remove
(
os
.
path
.
join
(
ROOT
,
DOCKER_COMPOSE_NAME
))
os
.
remove
(
os
.
path
.
join
(
ROOT
,
DOCKER_COMPOSE_NAME
))
res_str
.
append
(
f
"
###
{image_name} built with exit code {exit_val}"
)
res_str
.
append
(
f
"{image_name} built with exit code {exit_val}"
)
os
.
system
(
f
"docker push {image_name}"
)
os
.
system
(
f
"docker push {image_name}"
)
res_str
.
append
(
f
"{image_name} pushed with exit code {exit_val}"
)
print
(
f
"Found {len(command_args)} images"
)
print
(
f
"Found {len(command_args)} images"
)
for
s
in
res_str
:
for
s
in
res_str
:
print
(
s
)
print
(
s
)
src/data-hub/community-detection-microservice/Dockerfile
View file @
85e47ff6
...
@@ -5,13 +5,12 @@ ENV http_proxy http://proxy.uni-klu.ac.at:3128/
...
@@ -5,13 +5,12 @@ ENV http_proxy http://proxy.uni-klu.ac.at:3128/
ENV
https_proxy http://proxy.uni-klu.ac.at:3128/
ENV
https_proxy http://proxy.uni-klu.ac.at:3128/
RUN
apt-get update
RUN
apt-get update
RUN
pip
install
flask
RUN
pip
install
connexion[swagger-ui]
EXPOSE
5000
EXPOSE
5000
WORKDIR
/app
WORKDIR
/app
COPY
src/data-hub/community-detection-microservice/app/ /app/
COPY
src/data-hub/community-detection-microservice/app/ /app/
RUN
pip
install
-r
requirements.txt
RUN
chmod
a+x main.py
RUN
chmod
a+x main.py
CMD
["python", "./main.py"]
CMD
["python", "./main.py"]
\ No newline at end of file
src/data-hub/community-detection-microservice/app/configs/swagger.yml
View file @
85e47ff6
...
@@ -28,3 +28,109 @@ paths:
...
@@ -28,3 +28,109 @@ paths:
responses
:
responses
:
200
:
200
:
description
:
"
Successful
echo
of
request
data"
description
:
"
Successful
echo
of
request
data"
/location
:
post
:
operationId
:
"
rest.location.post"
tags
:
-
"
Locations"
summary
:
"
Add
new
location
data"
parameters
:
-
in
:
body
name
:
"
Location"
description
:
"
The
location
data
to
be
added"
required
:
true
schema
:
$ref
:
"
#/definitions/Location"
responses
:
201
:
description
:
"
Successful
operation"
400
:
description
:
"
Invalid
input"
get
:
operationId
:
"
rest.location.get"
tags
:
-
"
Locations"
summary
:
"
Get
location
data"
parameters
:
[]
responses
:
200
:
description
:
"
Successful
operation"
schema
:
$ref
:
"
#/definitions/Location"
/cluster
:
get
:
operationId
:
"
rest.cluster.get"
tags
:
-
"
Clusters"
summary
:
"
Get
clustered
data"
parameters
:
[]
responses
:
200
:
description
:
"
Successful
operation"
schema
:
$ref
:
"
#/definitions/Cluster"
/cluster/cluster.png
:
get
:
operationId
:
"
rest.cluster.get_image"
tags
:
-
"
Clusters"
summary
:
"
Get
clustered
data
as
image"
parameters
:
[]
produces
:
-
"
image/png"
responses
:
200
:
description
:
"
Successful
operation"
/agi/cluster
:
get
:
operationId
:
"
rest.agi_cluster.get"
tags
:
-
"
Clusters"
summary
:
"
Get
clustered
data"
parameters
:
[]
responses
:
200
:
description
:
"
Successful
operation"
schema
:
$ref
:
"
#/definitions/Cluster"
/agi/cluster/cluster.png
:
get
:
operationId
:
"
rest.agi_cluster.get_image"
tags
:
-
"
Clusters"
summary
:
"
Get
clustered
data
as
image"
parameters
:
[]
produces
:
-
"
image/png"
responses
:
200
:
description
:
"
Successful
operation"
definitions
:
Location
:
type
:
"
object"
properties
:
id
:
type
:
string
format
:
uuid
username
:
type
:
"
string"
latitude
:
type
:
"
number"
longitude
:
type
:
"
number"
timestamp
:
type
:
"
number"
Cluster
:
type
:
"
object"
properties
:
id
:
type
:
string
format
:
uuid
\ No newline at end of file
src/data-hub/community-detection-microservice/app/db/agi/agi_repository.py
0 → 100644
View file @
85e47ff6
import
json
from
typing
import
List
,
Dict
class
AgiRepository
:
def
getLocations
(
self
)
->
List
:
locations
=
[]
travels
=
self
.
readDataFromFile
()
# only take started travels
travels
=
[
t
for
t
in
travels
if
t
[
'status'
]
>=
2
]
for
travel
in
travels
:
locations
.
append
(
self
.
location
(
travel
[
"id"
],
travel
[
'startPlace.latitude'
],
travel
[
'startPlace.longitude'
],
0
,
''
))
continue
# todo work on locations
# todo number of complete travels with startlocation and user data
num_complete_travels
=
min
(
len
(
travel
[
'startedBy'
]),
len
(
travel
[
'users'
]))
for
i
in
range
(
num_complete_travels
):
cur_location
=
travel
[
'startedBy'
][
i
]
cur_user
=
travel
[
'users'
][
i
]
locations
.
append
(
self
.
location
(
f
'{travel["id"]}-{cur_location["moment"]}'
,
cur_location
[
'coordinate'
][
'latitude'
],
cur_location
[
'coordinate'
][
'longitude'
],
cur_location
[
'moment'
],
# todo user in travel startedBy not available from dataset - currently using user list
cur_user
[
'userId'
]
))
return
locations
def
readDataFromFile
(
self
)
->
List
[
Dict
]:
with
open
(
'./db/agi/travels.json'
,
'r'
)
as
f_travels
:
travels
=
json
.
loads
(
f_travels
.
read
())
return
travels
def
location
(
self
,
id_
,
lat
,
long_
,
timestamp
,
username
)
->
dict
:
return
{
"id"
:
id_
,
'latitude'
:
lat
,
'longitude'
:
long_
,
"timestamp"
:
timestamp
,
"username"
:
username
}
src/data-hub/community-detection-microservice/app/db/agi/travels.json
0 → 100644
View file @
85e47ff6
This source diff could not be displayed because it is too large. You can
view the blob
instead.
src/data-hub/community-detection-microservice/app/db/location_datastore.py
0 → 100644
View file @
85e47ff6
from
__future__
import
annotations
class
LocationDatastore
:
'''This Singelton simulates a location database'''
_instance
=
None
@
staticmethod
def
get_instance
()
->
LocationDatastore
:
if
LocationDatastore
.
_instance
==
None
:
LocationDatastore
.
_instance
=
LocationDatastore
()
return
LocationDatastore
.
_instance
def
__init__
(
self
):
if
LocationDatastore
.
_instance
!=
None
:
raise
Exception
(
"This class is a singleton!"
)
self
.
locations
=
[]
def
add
(
self
,
location
):
self
.
locations
.
append
(
location
)
def
get
(
self
):
return
self
.
locations
\ No newline at end of file
src/data-hub/community-detection-microservice/app/db/repository.py
0 → 100644
View file @
85e47ff6
from
db.location_datastore
import
LocationDatastore
class
Repository
:
def
__init__
(
self
):
self
.
store
=
LocationDatastore
.
get_instance
()
def
addLocation
(
self
,
location
):
self
.
store
.
add
(
location
)
def
getLocations
(
self
):
return
self
.
store
.
get
()
src/data-hub/community-detection-microservice/app/main.py
View file @
85e47ff6
### init logging ###
import
logging
LOG_FORMAT
=
(
'
%(levelname) -5
s
%(asctime)
s
%(name)
s:
%(funcName) -35
s
%(lineno) -5
d:
%(message)
s'
)
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
LOG_FORMAT
)
LOGGER
=
logging
.
getLogger
(
__name__
)
#############################
import
connexion
import
connexion
# load swagger config
# load swagger config
...
...
src/data-hub/community-detection-microservice/app/processing/clusterer.py
0 → 100644
View file @
85e47ff6
import
json
import
numpy
as
np
import
matplotlib.pyplot
as
plt
from
sklearn.cluster
import
DBSCAN
from
typing
import
List
,
Dict
class
Clusterer
:
def
__init__
(
self
,
epsilon
=
11
,
min_points
=
2
):
self
.
epsilon
=
epsilon
self
.
min_points
=
min_points
def
draw_locations
(
self
,
locations
:
List
,
labels
:
List
=
None
)
->
plt
.
Figure
:
if
locations
is
None
or
len
(
locations
)
==
0
:
return
self
.
_draw_locations
()
if
labels
is
None
or
len
(
locations
)
!=
len
(
labels
):
labels
=
self
.
create_labels
(
locations
)
return
self
.
_draw_locations
(
locations
=
np
.
asarray
([(
l
[
'latitude'
],
l
[
'longitude'
])
for
l
in
locations
]),
partition_info
=
labels
)
def
_draw_locations
(
self
,
locations
:
np
.
ndarray
=
None
,
centroids
:
np
.
ndarray
=
None
,
partition_info
=
None
)
->
plt
.
Figure
:
fig
=
plt
.
Figure
()
axis
=
fig
.
add_subplot
(
1
,
1
,
1
)
if
locations
is
not
None
:
colors
=
plt
.
cm
.
rainbow
(
np
.
linspace
(
0
,
1
,
len
(
locations
)))
if
partition_info
is
not
None
:
distinct_colors
=
plt
.
cm
.
rainbow
(
np
.
linspace
(
0
,
1
,
len
(
set
(
partition_info
))))
colors
=
[
distinct_colors
[
pi
]
for
pi
in
partition_info
]
# draw locations with random colors
axis
.
scatter
(
locations
[:,
0
],
locations
[:,
1
],
c
=
colors
)
if
centroids
is
not
None
:
# draw black centroids
axis
.
scatter
(
centroids
[:,
0
],
centroids
[:,
1
],
c
=
'k'
,
marker
=
'x'
,
s
=
80
)
return
fig
def
create_labels
(
self
,
locations
:
List
)
->
List
:
if
locations
is
None
or
len
(
locations
)
==
0
:
return
locations
# trash in trash out
locations
=
np
.
asarray
([(
l
[
'latitude'
],
l
[
'longitude'
])
for
l
in
locations
])
dbsc
=
DBSCAN
(
eps
=
self
.
epsilon
,
min_samples
=
self
.
min_points
)
dbsc
=
dbsc
.
fit
(
locations
)
labels
=
dbsc
.
labels_
return
labels
.
tolist
()
def
label_locations
(
self
,
locations
:
List
[
Dict
],
labels
:
List
)
->
List
:
if
locations
is
None
or
labels
is
None
:
return
if
len
(
locations
)
!=
len
(
labels
):
raise
ValueError
(
"locations and labels has to have same length"
)
for
i
in
range
(
len
(
locations
)):
locations
[
i
][
'cluster_label'
]
=
labels
[
i
]
def
run
(
self
,
locations
:
List
[
Dict
])
->
Dict
[
int
,
List
[
Dict
]]:
if
locations
is
None
or
len
(
locations
)
==
0
:
# raise Exception("locations has to contain something")
return
{}
labels
=
self
.
create_labels
(
locations
)
self
.
label_locations
(
locations
,
labels
)
clusters
=
{}
for
label
in
labels
:
clusters
[
label
]
=
[
l
for
l
in
locations
if
l
[
'cluster_label'
]
==
label
]
return
clusters
\ No newline at end of file
src/data-hub/community-detection-microservice/app/requirements.txt
0 → 100644
View file @
85e47ff6
connexion[swagger-ui]
numpy
matplotlib
scikit-learn
\ No newline at end of file
src/data-hub/community-detection-microservice/app/rest/agi_cluster.py
0 → 100644
View file @
85e47ff6
import
io
from
flask
import
request
,
Response
from
db.agi.agi_repository
import
AgiRepository
from
processing.clusterer
import
Clusterer
from
matplotlib.backends.backend_agg
import
FigureCanvasAgg
as
FigureCanvas
repo
=
AgiRepository
()
clusterer
=
Clusterer
()
def
get
():
locations
=
repo
.
getLocations
()
clusters
=
clusterer
.
run
(
locations
)
return
clusters
def
get_image
():
locations
=
repo
.
getLocations
()
fig
=
clusterer
.
draw_locations
(
locations
)
output
=
io
.
BytesIO
()
FigureCanvas
(
fig
)
.
print_png
(
output
)
return
Response
(
output
.
getvalue
(),
mimetype
=
"image/png"
)
\ No newline at end of file
src/data-hub/community-detection-microservice/app/rest/cluster.py
0 → 100644
View file @
85e47ff6
import
io
from
flask
import
request
,
Response
from
db.repository
import
Repository
from
processing.clusterer
import
Clusterer
from
matplotlib.backends.backend_agg
import
FigureCanvasAgg
as
FigureCanvas
repo
=
Repository
()
clusterer
=
Clusterer
()
def
get
():
locations
=
repo
.
getLocations
()
clusters
=
clusterer
.
run
(
locations
)
return
clusters
def
get_image
():
locations
=
repo
.
getLocations
()
fig
=
clusterer
.
draw_locations
(
locations
)
output
=
io
.
BytesIO
()
FigureCanvas
(
fig
)
.
print_png
(
output
)
return
Response
(
output
.
getvalue
(),
mimetype
=
"image/png"
)
\ No newline at end of file
src/data-hub/community-detection-microservice/app/rest/location.py
0 → 100644
View file @
85e47ff6
from
flask
import
request
,
Response
from
db.repository
import
Repository
repo
=
Repository
()
def
post
():
body
=
request
.
json
repo
.
addLocation
(
body
)
return
Response
(
status
=
201
)
def
get
():
return
repo
.
getLocations
()
src/data-hub/community-detection-microservice/app/tests/test_clusterer.py
0 → 100644
View file @
85e47ff6
import
unittest
import
sys
sys
.
path
.
insert
(
1
,
'./'
)
# python -m unittest discover -v tests
from
processing.clusterer
import
Clusterer
class
TestClusterer
(
unittest
.
TestCase
):
clusterer
:
Clusterer
=
None
def
setUp
(
self
):
self
.
clusterer
=
Clusterer
(
epsilon
=
10
,
min_points
=
2
)
def
test_create_labels_noneInput_noneOutput
(
self
):
labels
=
self
.
clusterer
.
create_labels
(
None
)
self
.
assertEqual
(
None
,
labels
)
def
test_create_labels_emptyInput_emptyOutput
(
self
):
labels
=
self
.
clusterer
.
create_labels
([])
self
.
assertEqual
([],
labels
)
def
test_create_labels_singleInput_singleCluster
(
self
):
labels
=
self
.
clusterer
.
create_labels
([
self
.
location
(
1
,
2
)])
self
.
assertEqual
(
1
,
len
(
labels
))
def
test_create_labels_nearInputs_singleCluster
(
self
):
locations
=
[
self
.
location
(
1
,
2
),
self
.
location
(
2
,
2
)]
labels
=
self
.
clusterer
.
create_labels
(
locations
)
self
.
assertEqual
(
2
,
len
(
labels
))
self
.
assertEqual
(
labels
[
0
],
labels
[
1
])
def
test_create_labels_nearInputs_twoClusters
(
self
):
locations
=
[
self
.
location
(
1
,
2
),
self
.
location
(
2
,
2
),
self
.
location
(
20
,
20
)]
labels
=
self
.
clusterer
.
create_labels
(
locations
)
self
.
assertEqual
(
3
,
len
(
labels
))
self
.
assertEqual
(
labels
[
0
],
labels
[
1
])
self
.
assertNotEqual
(
labels
[
0
],
labels
[
2
])
def
test_label_locations_NoneLocations_NoException
(
self
):
self
.
clusterer
.
label_locations
(
None
,
[])
def
test_label_locations_NoneLabels_NoException
(
self
):
self
.
clusterer
.
label_locations
([],
None
)
def
test_label_locations_emptyInput_emptyOutput
(
self
):
locations
=
[]
self
.
clusterer
.
label_locations
(
locations
,
[])
self
.
assertEqual
(
0
,
len
(
locations
))
def
test_label_locations_diffInputLengths_ValueError_1
(
self
):
with
self
.
assertRaises
(
ValueError
):
self
.
clusterer
.
label_locations
([],
[
1
])
def
test_label_locations_diffInputLengths_ValueError_2
(
self
):
with
self
.
assertRaises
(
ValueError
):
self
.
clusterer
.
label_locations
([
self
.
location
(
1
,
2
)],
[])
def
test_label_locations_multInput_correctlyLabeled
(
self
):
locations
=
[
self
.
location
(
1
,
2
),
self
.
location
(
2
,
2
),
self
.
location
(
20
,
20
)]
labels
=
[
17
,
2
,
20
]
self
.
clusterer
.
label_locations
(
locations
,
labels
)
self
.
assertEqual
(
3
,
len
(
locations
))
self
.
assertHaveLabelsAsNewKey
(
locations
,
labels
)
# helper methods:
def
location
(
self
,
lat
,
long_
)
->
dict
:
return
{
'latitude'
:
lat
,
'longitude'
:
long_
}
def
assertHaveLabelsAsNewKey
(
self
,
locations
,
labels
):
for
i
in
range
(
len
(
locations
)):
self
.
assertEqual
(
labels
[
i
],
locations
[
i
][
'cluster_label'
])
if
__name__
==
'__main__'
:
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment