Commit c94aba76 authored by Alexander's avatar Alexander

preparation for mongodb storage

parent 72491c09
...@@ -43,6 +43,7 @@ class Clusterer: ...@@ -43,6 +43,7 @@ class Clusterer:
return fig return fig
# TODO refactor for other input
def create_labels(self, locations:List) -> List: def create_labels(self, locations:List) -> List:
if locations is None or len(locations) == 0: if locations is None or len(locations) == 0:
return locations # trash in trash out return locations # trash in trash out
...@@ -58,6 +59,7 @@ class Clusterer: ...@@ -58,6 +59,7 @@ class Clusterer:
def extract_location_data(self, locations: List[dict]) -> np.ndarray: def extract_location_data(self, locations: List[dict]) -> np.ndarray:
return np.asarray([(float(l['latitude']), float(l['longitude'])) for l in locations]) return np.asarray([(float(l['latitude']), float(l['longitude'])) for l in locations])
# TODO refactor for other input
def label_locations(self, locations:List[Dict], labels:List) -> List: def label_locations(self, locations:List[Dict], labels:List) -> List:
if locations is None or labels is None: if locations is None or labels is None:
return return
...@@ -84,24 +86,19 @@ class Clusterer: ...@@ -84,24 +86,19 @@ class Clusterer:
return clusters return clusters
def cluster_times(self, times:List[Dict]) -> Dict[int, List[Dict]]: def cluster_times(self, times:List[Dict]) -> Dict[int, List[Dict]]:
times1 = np.asarray([((t['timestamp']), 0) for t in times])
# times.sort(key=lambda x: x['timestamp']) # TODO refactor for other input
times1 = np.asarray([(float(t['timestamp']), float(0)) for t in times])
# print(times)
dbsc = DBSCAN(eps = self.epsilon, min_samples = self.min_points) dbsc = DBSCAN(eps = self.epsilon, min_samples = self.min_points)
dbsc = dbsc.fit(times1) dbsc = dbsc.fit(times1)
labels = dbsc.labels_ labels = dbsc.labels_.tolist()
print(labels)
self.label_locations(times, labels) self.label_locations(times, labels)
times = [t for t in times if t['cluster_label'] != -1]
print(times)
info = [l for l in labels if l != -1] clusters = {}
print(info) for label in labels:
clusters[label] = [l for l in times if l['cluster_label'] == label]
times1 = np.asarray([(float(t['timestamp']), float(0)) for t in times]) # fig = self._draw_locations(locations=times1, partition_info=labels)
fig = self._draw_locations(locations=times1, partition_info=info) # fig.savefig('img.png')
fig.savefig('img.png') return clusters
\ No newline at end of file \ No newline at end of file
...@@ -10,7 +10,7 @@ from db.repository import Repository ...@@ -10,7 +10,7 @@ from db.repository import Repository
from processing.clusterer import Clusterer from processing.clusterer import Clusterer
DEBUG = False DEBUG = True
repo = Repository() repo = Repository()
...@@ -30,11 +30,16 @@ def run_location_clustering(): ...@@ -30,11 +30,16 @@ def run_location_clustering():
def run_time_clustering(): def run_time_clustering():
user_clusterer = Clusterer(epsilon=10**5.8)
all_location_traces = repo.get_agi_locations() all_location_traces = repo.get_agi_locations()
user_clusterer = Clusterer(epsilon=10**5.8) cluster_result = user_clusterer.cluster_times([l.to_serializable_dict() for l in all_location_traces])
user_clusterer.cluster_times([l.to_serializable_dict() for l in all_location_traces]) clusters = [UserCluster(key, value)
for key, value in cluster_result.items()]
store_user_clusters(clusters)
def store_user_clusters(user_clusters: List[UserCluster]): def store_user_clusters(user_clusters: List[UserCluster]):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment