Commit c66bd0dd authored by Alexander Lercher's avatar Alexander Lercher

Slicing for all clusters using start- and end-timestamp

parent 9885bbf7
...@@ -11,47 +11,51 @@ from db.entities.timeslice import TimeSlice ...@@ -11,47 +11,51 @@ from db.entities.timeslice import TimeSlice
from db.entities import ClusterSet from db.entities import ClusterSet
from typing import Tuple, Dict, Any from typing import Tuple, Dict, Any
TimeSliceKey = Tuple[int, int]
def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]: def convert_to_time_slice_key(timestamp: str) -> TimeSliceKey:
'''Returns the tuple (year, week_of_year) from a timestamp. This is used as the key for the slicing.''' '''Returns the tuple (year, week_of_year) from a timestamp. This is used as the key for the slicing.'''
timestamp = datetime.fromtimestamp(float(timestamp[0:10])) time = datetime.utcfromtimestamp(float(timestamp[0:10]))
(y, w, _) = timestamp.isocalendar() (y, w, _) = time.isocalendar()
return (y, w) return (y, w)
def split_clustersets_by_time(clustersets) -> Dict[Any, TimeSlice]: def split_clusterset_by_time(clustersets) -> Dict[TimeSliceKey, TimeSlice]:
''' '''
Partitions all nodes of each clusterset into idividual time slices based on their timestamp. The information about the cluster is kept. Distributes all nodes of a single clusterset into individual time slices based on their timestamps.
If a node spans over multiple slices it will be added to all of them.
Information about clusters and the nodes in the clusters will not be changed.
:params clustersets: The clustersets whichs nodes are split :params clustersets: The clusterset whichs nodes are split
:returns: A dict of time slices where the key is the time info and value is the information about the time slice :returns: A dict of time slices where the key is the time info and value is the information about the time slice
''' '''
cnt = 0
time_slices: Dict[Any, TimeSlice] = {} time_slices: Dict[Any, TimeSlice] = {}
for clusterset in clustersets:
for cluster_no in clusterset.clusters: for cluster_no in clusterset.clusters:
for node in cluster_no.nodes: for node in cluster_no.nodes:
# assign the nodes to time slices and recreate the clusters there
# TODO use start and end time for assignment
time_key = convert_to_time_slice_key(str(node['Finished_time']))
time_keys = {
convert_to_time_slice_key(str(node['Finished_time'])),
convert_to_time_slice_key(str(node['Starting_time']))
}
for time_key in time_keys:
if time_key not in time_slices: if time_key not in time_slices:
time_slices[time_key] = TimeSlice(time_key, clusterset.layer_name) time_slices[time_key] = TimeSlice(time_key, clusterset.layer_name)
time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node) time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node)
return time_slices return time_slices
if __name__ == "__main__": if __name__ == "__main__":
repo = Repository() repo = Repository()
clustersets = [repo.get_clusterset('Destination_Layer')] repo.remove_all_time_slices()
time_slices = split_clustersets_by_time(clustersets)
# sort chronologically clustersets = repo.get_clustersets()
keys = list(time_slices.keys()) for clusterset in clustersets:
keys.sort() time_slices = split_clusterset_by_time(clusterset)
repo.remove_all_time_slices()
for k,v in time_slices.items(): for k,v in time_slices.items():
repo.add_time_slice(v) repo.add_time_slice(v)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment