diff --git a/openml/extensions/extension_interface.py b/openml/extensions/extension_interface.py index 2d06b69e0..4529ad163 100644 --- a/openml/extensions/extension_interface.py +++ b/openml/extensions/extension_interface.py @@ -229,6 +229,19 @@ def obtain_parameter_values( - ``oml:component`` : int: flow id to which the parameter belongs """ + @abstractmethod + def check_if_model_fitted(self, model: Any) -> bool: + """Returns True/False denoting if the model has already been fitted/trained. + + Parameters + ---------- + model : Any + + Returns + ------- + bool + """ + ################################################################################################ # Abstract methods for hyperparameter optimization diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py index 1cd979af5..0d049c4fd 100644 --- a/openml/extensions/sklearn/extension.py +++ b/openml/extensions/sklearn/extension.py @@ -1537,6 +1537,37 @@ def _seed_current_object(current_value): model.set_params(**random_states) return model + def check_if_model_fitted(self, model: Any) -> bool: + """Returns True/False denoting if the model has already been fitted/trained + + Parameters + ---------- + model : Any + + Returns + ------- + bool + """ + try: + # check if model is fitted + from sklearn.exceptions import NotFittedError + + # Creating random dummy data of arbitrary size + dummy_data = np.random.uniform(size=(10, 3)) + # Using 'predict' instead of 'sklearn.utils.validation.check_is_fitted' for a more + # robust check that works across sklearn versions and models. Internally, 'predict' + # should call 'check_is_fitted' for every concerned attribute, thus offering a more + # assured check than explicit calls to 'check_is_fitted' + model.predict(dummy_data) + # Will reach here if the model was fit on a dataset with 3 features + return True + except NotFittedError: # needs to be the first exception to be caught + # Model is not fitted, as is required + return False + except ValueError: + # Will reach here if the model was fit on a dataset with more or less than 3 features + return True + def _run_model_on_fold( self, model: Any, diff --git a/openml/runs/functions.py b/openml/runs/functions.py index a08c84df8..194e4b598 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -250,6 +250,12 @@ def run_flow_on_task( run_environment = flow.extension.get_version_information() tags = ["openml-python", run_environment[1]] + if flow.extension.check_if_model_fitted(flow.model): + warnings.warn( + "The model is already fitted!" + " This might cause inconsistency in comparison of results." + ) + # execute the run res = _run_task_get_arffcontent( flow=flow,