Source code for pm4py.objects.log.util.split_train_test

'''
    This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).

    PM4Py is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    PM4Py is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with PM4Py.  If not, see <https://www.gnu.org/licenses/>.
'''
from pm4py.objects.log.obj import EventLog
from typing import Tuple
import random
import math


[docs]def split(log: EventLog, train_percentage: float = 0.8) -> Tuple[EventLog, EventLog]: """ Split an event log in a training log and a test log (for machine learning purposes) Parameters -------------- log Event log train_percentage Fraction of traces to be included in the training log (from 0.0 to 1.0) Returns -------------- training_log Training event log test_log Test event log """ idxs = [i for i in range(len(log))] random.shuffle(idxs) stop_idx = math.floor(len(idxs) * train_percentage) + 1 idxs_train = idxs[:stop_idx] idxs_test = idxs[stop_idx:] train_log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers, omni_present=log.omni_present, properties=log.properties) test_log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers, omni_present=log.omni_present, properties=log.properties) for idx in idxs_train: train_log.append(log[idx]) for idx in idxs_test: test_log.append(log[idx]) return train_log, test_log