Commit c94aba76 authored by Alexander's avatar Alexander

preparation for mongodb storage

parent 72491c09
......@@ -43,6 +43,7 @@ class Clusterer:
return fig
# TODO refactor for other input
def create_labels(self, locations:List) -> List:
if locations is None or len(locations) == 0:
return locations # trash in trash out
......@@ -58,6 +59,7 @@ class Clusterer:
def extract_location_data(self, locations: List[dict]) -> np.ndarray:
return np.asarray([(float(l['latitude']), float(l['longitude'])) for l in locations])
# TODO refactor for other input
def label_locations(self, locations:List[Dict], labels:List) -> List:
if locations is None or labels is None:
return
......@@ -84,24 +86,19 @@ class Clusterer:
return clusters
def cluster_times(self, times:List[Dict]) -> Dict[int, List[Dict]]:
times1 = np.asarray([((t['timestamp']), 0) for t in times])
# times.sort(key=lambda x: x['timestamp'])
times1 = np.asarray([(float(t['timestamp']), float(0)) for t in times])
# print(times)
# TODO refactor for other input
dbsc = DBSCAN(eps = self.epsilon, min_samples = self.min_points)
dbsc = dbsc.fit(times1)
labels = dbsc.labels_
print(labels)
labels = dbsc.labels_.tolist()
self.label_locations(times, labels)
times = [t for t in times if t['cluster_label'] != -1]
print(times)
info = [l for l in labels if l != -1]
print(info)
clusters = {}
for label in labels:
clusters[label] = [l for l in times if l['cluster_label'] == label]
times1 = np.asarray([(float(t['timestamp']), float(0)) for t in times])
fig = self._draw_locations(locations=times1, partition_info=info)
fig.savefig('img.png')
\ No newline at end of file
# fig = self._draw_locations(locations=times1, partition_info=labels)
# fig.savefig('img.png')
return clusters
\ No newline at end of file
......@@ -10,7 +10,7 @@ from db.repository import Repository
from processing.clusterer import Clusterer
DEBUG = False
DEBUG = True
repo = Repository()
......@@ -30,11 +30,16 @@ def run_location_clustering():
def run_time_clustering():
user_clusterer = Clusterer(epsilon=10**5.8)
all_location_traces = repo.get_agi_locations()
user_clusterer = Clusterer(epsilon=10**5.8)
cluster_result = user_clusterer.cluster_times([l.to_serializable_dict() for l in all_location_traces])
user_clusterer.cluster_times([l.to_serializable_dict() for l in all_location_traces])
clusters = [UserCluster(key, value)
for key, value in cluster_result.items()]
store_user_clusters(clusters)
def store_user_clusters(user_clusters: List[UserCluster]):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment