Source code for pm4py.objects.log.util.get_class_representation

'''
    This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).

    PM4Py is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    PM4Py is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with PM4Py.  If not, see <https://www.gnu.org/licenses/>.
'''
import numpy as np
from pm4py.util.business_hours import BusinessHours


[docs]def get_class_representation_by_str_ev_attr_value_presence(log, str_attr_name, str_attr_value): """ Get the representation for the target part of the decision tree learning if the focus is on the presence of a given value of a (string) event attribute Parameters ------------- log Trace log str_attr_name Attribute name to consider str_attr_value Attribute value to consider Returns ------------- target Target part for decision tree learning classes Name of the classes, in order """ count = 0 dictionary = {} target = [] classes = [] for trace in log: value = False for event in trace: if str_attr_name in event and event[str_attr_name] == str_attr_value: value = True if not str(value) in dictionary: dictionary[str(value)] = count classes.append(str(value)) count = count + 1 target.append(dictionary[str(value)]) target = np.array(target) return target, classes
[docs]def get_class_representation_by_str_ev_attr_value_value(log, str_attr_name): """ Get the representation for the target part of the decision tree learning if the focus is on all (string) values of an event attribute Parameters ------------ log Trace log str_attr_name Attribute name to consider Returns ------------ target Target part for decision tree learning classes Name of the classes, in order """ count = 0 dictionary = {} target = [] classes = [] for trace in log: value = "UNDEFINED" for event in trace: if str_attr_name in event and event[str_attr_name]: value = event[str_attr_name] if not str(value) in dictionary: dictionary[str(value)] = count classes.append(str(value)) count = count + 1 target.append(dictionary[str(value)]) target = np.array(target) return target, classes
[docs]def get_class_representation_by_trace_duration(log, target_trace_duration, timestamp_key="time:timestamp", parameters=None): """ Get class representation by splitting traces according to trace duration Parameters ------------ log Trace log target_trace_duration Target trace duration timestamp_key Timestamp key Returns ------------ target Target part for decision tree learning classes Name of the classes, in order """ if parameters is None: parameters = {} business_hours = parameters["business_hours"] if "business_hours" in parameters else False worktiming = parameters["worktiming"] if "worktiming" in parameters else [7, 17] weekends = parameters["weekends"] if "weekends" in parameters else [6, 7] count = 0 dictionary = {} target = [] classes = [] for trace in log: value = "LESSEQUAL" if len(trace) > 0 and timestamp_key in trace[0] and timestamp_key in trace[-1]: timestamp_st = trace[0][timestamp_key] timestamp_et = trace[-1][timestamp_key] if business_hours: bh = BusinessHours(timestamp_st.replace(tzinfo=None), timestamp_et.replace(tzinfo=None), worktiming=worktiming, weekends=weekends) diff = bh.getseconds() else: diff = (timestamp_et - timestamp_st).total_seconds() if diff > target_trace_duration: value = "GREATER" if not str(value) in dictionary: dictionary[str(value)] = count classes.append(str(value)) count = count + 1 target.append(dictionary[str(value)]) target = np.array(target) return target, classes