Commit c66bd0dd authored by Alexander Lercher's avatar Alexander Lercher

Slicing for all clusters using start- and end-timestamp

parent 9885bbf7
......@@ -11,47 +11,51 @@ from db.entities.timeslice import TimeSlice
from db.entities import ClusterSet
from typing import Tuple, Dict, Any
TimeSliceKey = Tuple[int, int]
def convert_to_time_slice_key(timestamp: str) -> Tuple[int, int]:
def convert_to_time_slice_key(timestamp: str) -> TimeSliceKey:
'''Returns the tuple (year, week_of_year) from a timestamp. This is used as the key for the slicing.'''
timestamp = datetime.fromtimestamp(float(timestamp[0:10]))
(y, w, _) = timestamp.isocalendar()
time = datetime.utcfromtimestamp(float(timestamp[0:10]))
(y, w, _) = time.isocalendar()
return (y, w)
def split_clustersets_by_time(clustersets) -> Dict[Any, TimeSlice]:
def split_clusterset_by_time(clustersets) -> Dict[TimeSliceKey, TimeSlice]:
'''
Partitions all nodes of each clusterset into idividual time slices based on their timestamp. The information about the cluster is kept.
Distributes all nodes of a single clusterset into individual time slices based on their timestamps.
If a node spans over multiple slices it will be added to all of them.
Information about clusters and the nodes in the clusters will not be changed.
:params clustersets: The clustersets whichs nodes are split
:params clustersets: The clusterset whichs nodes are split
:returns: A dict of time slices where the key is the time info and value is the information about the time slice
'''
cnt = 0
time_slices: Dict[Any, TimeSlice] = {}
for clusterset in clustersets:
for cluster_no in clusterset.clusters:
for node in cluster_no.nodes:
# assign the nodes to time slices and recreate the clusters there
# TODO use start and end time for assignment
time_key = convert_to_time_slice_key(str(node['Finished_time']))
for cluster_no in clusterset.clusters:
for node in cluster_no.nodes:
time_keys = {
convert_to_time_slice_key(str(node['Finished_time'])),
convert_to_time_slice_key(str(node['Starting_time']))
}
for time_key in time_keys:
if time_key not in time_slices:
time_slices[time_key] = TimeSlice(time_key, clusterset.layer_name)
time_slices[time_key].add_node_to_cluster(cluster_no.cluster_label, node)
return time_slices
if __name__ == "__main__":
repo = Repository()
clustersets = [repo.get_clusterset('Destination_Layer')]
time_slices = split_clustersets_by_time(clustersets)
repo.remove_all_time_slices()
# sort chronologically
keys = list(time_slices.keys())
keys.sort()
clustersets = repo.get_clustersets()
for clusterset in clustersets:
time_slices = split_clusterset_by_time(clusterset)
repo.remove_all_time_slices()
for k,v in time_slices.items():
repo.add_time_slice(v)
for k,v in time_slices.items():
repo.add_time_slice(v)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment