Commit 72491c09 authored by Alexander's avatar Alexander

clustering based on time

parent 9bca8889
......@@ -21,7 +21,7 @@ class Clusterer:
partition_info = labels
)
def _draw_locations(self, locations:np.ndarray=None, centroids:np.ndarray=None, partition_info=None) -> plt.Figure:
def _draw_locations(self, locations:np.ndarray=None, centroids:np.ndarray=None, partition_info:List=None) -> plt.Figure:
fig = plt.Figure()
axis = fig.add_subplot(1, 1, 1)
......@@ -55,6 +55,9 @@ class Clusterer:
return labels.tolist()
def extract_location_data(self, locations: List[dict]) -> np.ndarray:
return np.asarray([(float(l['latitude']), float(l['longitude'])) for l in locations])
def label_locations(self, locations:List[Dict], labels:List) -> List:
if locations is None or labels is None:
return
......@@ -65,7 +68,7 @@ class Clusterer:
for i in range(len(locations)):
locations[i]['cluster_label'] = labels[i]
def run(self, locations:List[Dict]) -> Dict[int, List[Dict]]:
def cluster_locations(self, locations:List[Dict]) -> Dict[int, List[Dict]]:
'''Returns a dictionary with identified clusters and their locations copied from the input'''
if locations is None or len(locations) == 0:
# raise Exception("locations has to contain something")
......@@ -80,5 +83,25 @@ class Clusterer:
return clusters
def extract_location_data(self, locations: List[dict]) -> np.ndarray:
return np.asarray([(float(l['latitude']), float(l['longitude'])) for l in locations])
\ No newline at end of file
def cluster_times(self, times:List[Dict]) -> Dict[int, List[Dict]]:
# times.sort(key=lambda x: x['timestamp'])
times1 = np.asarray([(float(t['timestamp']), float(0)) for t in times])
# print(times)
dbsc = DBSCAN(eps = self.epsilon, min_samples = self.min_points)
dbsc = dbsc.fit(times1)
labels = dbsc.labels_
print(labels)
self.label_locations(times, labels)
times = [t for t in times if t['cluster_label'] != -1]
print(times)
info = [l for l in labels if l != -1]
print(info)
times1 = np.asarray([(float(t['timestamp']), float(0)) for t in times])
fig = self._draw_locations(locations=times1, partition_info=info)
fig.savefig('img.png')
\ No newline at end of file
......@@ -12,16 +12,15 @@ from processing.clusterer import Clusterer
DEBUG = False
# used to cluster the users based on their main location
user_clusterer = Clusterer()
repo = Repository()
def run_location_clustering():
all_location_traces = repo.get_locations()
user_clusterer = Clusterer()
all_location_traces = repo.get_agi_locations()
cluster_result = user_clusterer.run(
cluster_result = user_clusterer.cluster_locations(
[l.to_serializable_dict() for l in all_location_traces])
clusters = [UserCluster(key, value)
......@@ -30,6 +29,14 @@ def run_location_clustering():
store_user_clusters(clusters)
def run_time_clustering():
all_location_traces = repo.get_agi_locations()
user_clusterer = Clusterer(epsilon=10**5.8)
user_clusterer.cluster_times([l.to_serializable_dict() for l in all_location_traces])
def store_user_clusters(user_clusters: List[UserCluster]):
if DEBUG:
print(user_clusters)
......@@ -40,4 +47,4 @@ def store_user_clusters(user_clusters: List[UserCluster]):
if __name__ == "__main__":
run_location_clustering()
run_time_clustering()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment