Source code for pm4py.org

'''
    This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).

    PM4Py is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    PM4Py is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with PM4Py.  If not, see <https://www.gnu.org/licenses/>.
'''
from typing import Union

import pandas as pd

from pm4py.objects.log.obj import EventLog, EventStream
from pm4py.util.pandas_utils import check_is_pandas_dataframe, check_pandas_dataframe_columns
from pm4py.utils import get_properties
from pm4py.util import constants, xes_constants
from typing import Dict, Tuple, Any


[docs]def discover_handover_of_work_network(log: Union[EventLog, pd.DataFrame], beta=0): """ Calculates the handover of work network of the event log. The handover of work network is essentially the DFG of the event log, however, using the resource as a node of the graph, instead of the activity. As such, to use this, resource information should be present in the event log. Parameters --------------- log Event log or Pandas dataframe beta beta parameter for Handover metric Returns --------------- metric_values Values of the metric """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") from pm4py.algo.organizational_mining.sna import algorithm as sna parameters = get_properties(log) parameters["beta"] = beta if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) return sna.apply(log, variant=sna.Variants.HANDOVER_PANDAS, parameters=parameters) else: return sna.apply(log, variant=sna.Variants.HANDOVER_LOG, parameters=parameters)
[docs]def discover_working_together_network(log: Union[EventLog, pd.DataFrame]): """ Calculates the working together network of the process. Two nodes resources are connected in the graph if the resources collaborate on an instance of the process. Parameters --------------- log Event log or Pandas dataframe Returns --------------- metric_values Values of the metric """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") from pm4py.algo.organizational_mining.sna import algorithm as sna if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) return sna.apply(log, variant=sna.Variants.WORKING_TOGETHER_PANDAS, parameters=get_properties(log)) else: return sna.apply(log, variant=sna.Variants.WORKING_TOGETHER_LOG, parameters=get_properties(log))
[docs]def discover_activity_based_resource_similarity(log: Union[EventLog, pd.DataFrame]): """ Calculates similarity between the resources in the event log, based on their activity profiles. Parameters --------------- log Event log or Pandas dataframe Returns --------------- metric_values Values of the metric """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") from pm4py.algo.organizational_mining.sna import algorithm as sna if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) return sna.apply(log, variant=sna.Variants.JOINTACTIVITIES_PANDAS, parameters=get_properties(log)) else: return sna.apply(log, variant=sna.Variants.JOINTACTIVITIES_LOG, parameters=get_properties(log))
[docs]def discover_subcontracting_network(log: Union[EventLog, pd.DataFrame], n=2): """ Calculates the subcontracting network of the process. Parameters --------------- log Event log or Pandas dataframe n n parameter for Subcontracting metric Returns --------------- metric_values Values of the metric """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") from pm4py.algo.organizational_mining.sna import algorithm as sna parameters = get_properties(log) parameters["n"] = n if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) return sna.apply(log, variant=sna.Variants.SUBCONTRACTING_PANDAS, parameters=parameters) else: return sna.apply(log, variant=sna.Variants.SUBCONTRACTING_LOG, parameters=parameters)
[docs]def discover_organizational_roles(log: Union[EventLog, pd.DataFrame]): """ Mines the organizational roles Parameters --------------- log Event log or Pandas dataframe Returns --------------- roles Organizational roles. List where each role is a sublist with two elements: - The first element of the sublist is the list of activities belonging to a role. Each activity belongs to a single role - The second element of the sublist is a dictionary containing the resources of the role and the number of times they executed activities belonging to the role. """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") from pm4py.algo.organizational_mining.roles import algorithm as roles if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) return roles.apply(log, variant=roles.Variants.PANDAS, parameters=get_properties(log)) else: return roles.apply(log, variant=roles.Variants.LOG, parameters=get_properties(log))
[docs]def discover_network_analysis(log: Union[pd.DataFrame, EventLog, EventStream], out_column: str, in_column: str, node_column_source: str, node_column_target: str, edge_column: str, edge_reference: str = "_out", performance: bool = False, sorting_column: str = xes_constants.DEFAULT_TIMESTAMP_KEY, timestamp_column: str = xes_constants.DEFAULT_TIMESTAMP_KEY) -> Dict[Tuple[str, str], Dict[str, Any]]: """ Performs a network analysis of the log based on the provided parameters. The output is a multigraph. Two events EV1 and EV2 of the log are merged (indipendently from the case notion) based on having EV1.OUT_COLUMN = EV2.IN_COLUMN. Then, an aggregation is applied on the couple of events (NODE_COLUMN) to obtain the nodes that are connected. The edges between these nodes are aggregated based on some property of the *source* event (EDGE_COLUMN). Parameters ------------------ log Event log / Pandas dataframe out_column The source column of the link (default: the case identifier; events of the same case are linked) in_column The target column of the link (default: the case identifier; events of the same case are linked) node_column_source The attribute to be used for the node definition of the source event (default: the resource of the log, org:resource) node_column_target The attribute to be used for the node definition of the target event (default: the resource of the log, org:resource) edge_column The attribute to be used for the edge definition (default: the activity of the log, concept:name) edge_reference Decide if the edge attribute should be picked from the source event. Values: - _out => the source event - _in => the target event performance Boolean value that enables the performance calculation on the edges of the network analysis sorting_column The column that should be used to sort the log before performing the network analysis (default: time:timestamp) timestamp_column The column that should be used as timestamp for the performance-related analysis (default: time:timestamp) Returns ------------------ network_analysis Edges of the network analysis (first key: edge; second key: type; value: number of occurrences) """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") from pm4py.algo.organizational_mining.network_analysis.variants import dataframe parameters = {} parameters[dataframe.Parameters.OUT_COLUMN] = out_column parameters[dataframe.Parameters.IN_COLUMN] = in_column parameters[dataframe.Parameters.NODE_COLUMN_SOURCE] = node_column_source parameters[dataframe.Parameters.NODE_COLUMN_TARGET] = node_column_target parameters[dataframe.Parameters.EDGE_COLUMN] = edge_column parameters[dataframe.Parameters.EDGE_REFERENCE] = edge_reference parameters[dataframe.Parameters.SORTING_COLUMN] = sorting_column parameters[dataframe.Parameters.TIMESTAMP_KEY] = timestamp_column parameters[dataframe.Parameters.INCLUDE_PERFORMANCE] = performance from pm4py.algo.organizational_mining.network_analysis import algorithm as network_analysis return network_analysis.apply(log, parameters=parameters)