Source code for pm4py.algo.discovery.inductive.variants.im_f.splitting_infrequent

'''
    This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).

    PM4Py is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    PM4Py is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with PM4Py.  If not, see <https://www.gnu.org/licenses/>.
'''
from pm4py.objects.log import obj


[docs]def filter_trace_on_cut_partition(trace, partition, activity_key): filtered_trace = obj.Trace() for event in trace: if event[activity_key] in partition: filtered_trace.append(event) return filtered_trace
[docs]def find_split_point(trace, cut_partition, start, ignore, activity_key): possibly_best_before_first_activity = False least_cost = start position_with_least_cost = start cost = float(0) i = start while i < len(trace): if trace[i][activity_key] in cut_partition: cost = cost-1 elif trace[i][activity_key] not in ignore: # use bool variable for the case, that the best split is before the first activity if i == 0: possibly_best_before_first_activity = True cost = cost+1 if cost <= least_cost: least_cost = cost position_with_least_cost = i+1 i += 1 if possibly_best_before_first_activity and position_with_least_cost == 1: position_with_least_cost = 0 return position_with_least_cost
[docs]def cut_trace_between_two_points(trace, point_a, point_b): cutted_trace = obj.Trace() # we have to use <= although in the paper the intervall is [) because our index starts at 0 while point_a < point_b: cutted_trace.append(trace[point_a]) point_a += 1 return cutted_trace
[docs]def split_xor_infrequent(cut, l, activity_key): # TODO think of empty logs # creating the empty L_1,...,L_n from the second code-line on page 205 n = len(cut) new_logs = [obj.EventLog() for i in range(0, n)] for trace in l: # for all traces number_of_events_in_trace = 0 index_of_cut_partition = 0 i = 0 # use i as index here so that we can write in L_i for i in range(0, len(cut)): # for all cut partitions temp_counter = 0 for event in trace: # for all events in current trace if event[activity_key] in cut[i]: # count amount of events from trace in partition temp_counter += 1 if temp_counter > number_of_events_in_trace: number_of_events_in_trace = temp_counter index_of_cut_partition = i filtered_trace = filter_trace_on_cut_partition(trace, cut[index_of_cut_partition], activity_key) new_logs[index_of_cut_partition].append(filtered_trace) return new_logs
[docs]def split_sequence_infrequent(cut, l, activity_key): # write L_1,...,L_n like in second line of code on page 206 n = len(cut) new_logs = [obj.EventLog() for j in range(0, n)] ignore = [] split_points_list = [0] * len(l) for i in range(0, n): split_point = 0 # write our ignore list with all elements from past cut partitions if i != 0: for element in cut[i-1]: ignore.append(element) for j in range(len(l)): trace = l[j] new_split_point = find_split_point(trace, cut[i], split_points_list[j], ignore, activity_key) cutted_trace = cut_trace_between_two_points(trace, split_points_list[j], new_split_point) filtered_trace = filter_trace_on_cut_partition(cutted_trace, cut[i], activity_key) new_logs[i].append(filtered_trace) split_points_list[j] = new_split_point return new_logs
[docs]def split_loop_infrequent(cut, l, activity_key): n = len(cut) new_logs = [obj.EventLog() for i in range(0, n)] for trace in l: s = cut[0] st = obj.Trace() for act in trace: if act in s: st.insert(act) else: j = 0 for j in range(0, len(cut)): if cut[j] == s: break new_logs[j].append(st) st = obj.Trace() for partition in cut: if act[activity_key] in partition: s.append(partition) # L_j <- L_j + [st] with sigma_j = s j = 0 for j in range(0, len(cut)): if cut[j] == s: break new_logs[j].append(st) if s != cut[0]: new_logs[0].append(obj.EventLog()) return new_logs