From 5665c9788d9fb4f8d4e491a134118b1ef193abd9 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 24 Jul 2019 15:39:48 -0400
Subject: [PATCH 01/17] test against scikit-learn 0.21

---
 .travis.yml                                   |  2 +-
 .../test_sklearn_extension.py                 | 31 +++++++++----------
 tests/test_flows/test_flow.py                 | 11 +++----
 tests/test_runs/test_run.py                   | 10 +++---
 tests/test_runs/test_run_functions.py         | 21 +++++++------
 tests/test_study/test_study_examples.py       |  3 +-
 6 files changed, 38 insertions(+), 40 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 675186469..96effeee9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -18,7 +18,7 @@ env:
   - DISTRIB="conda" PYTHON_VERSION="3.5" SKLEARN_VERSION="0.20.0"
   - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.20.0"
   - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.0" RUN_FLAKE8="true" SKIP_TESTS="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.0" COVERAGE="true" DOCPUSH="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" COVERAGE="true" DOCPUSH="true"
   # Checks for older scikit-learn versions (which also don't nicely work with
   # Python3.7)
   - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.19.2"
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 2728076fe..67b5cc419 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -27,10 +27,6 @@
 import sklearn.tree
 import sklearn.cluster
 
-if LooseVersion(sklearn.__version__) < "0.20":
-    from sklearn.preprocessing import Imputer
-else:
-    from sklearn.impute import SimpleImputer as Imputer
 
 import openml
 from openml.extensions.sklearn import SklearnExtension
@@ -39,6 +35,7 @@
 from openml.flows.functions import assert_flows_equal
 from openml.runs.trace import OpenMLRunTrace
 from openml.testing import TestBase
+from openml._backport import SimpleImputer
 
 this_directory = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(this_directory)
@@ -941,7 +938,7 @@ def test_illegal_parameter_names(self):
     def test_illegal_parameter_names_pipeline(self):
         # illegal name: steps
         steps = [
-            ('Imputer', Imputer(strategy='median')),
+            ('Imputer', SimpleImputer(strategy='median')),
             ('OneHotEncoder',
              sklearn.preprocessing.OneHotEncoder(sparse=False,
                                                  handle_unknown='ignore')),
@@ -954,7 +951,7 @@ def test_illegal_parameter_names_featureunion(self):
         # illegal name: transformer_list
         transformer_list = [
             ('transformer_list',
-             Imputer(strategy='median')),
+             SimpleImputer(strategy='median')),
             ('OneHotEncoder',
              sklearn.preprocessing.OneHotEncoder(sparse=False,
                                                  handle_unknown='ignore'))
@@ -1045,7 +1042,7 @@ def test_deserialize_with_defaults(self):
         # used the 'initialize_with_defaults' flag of the deserialization
         # method to return a flow that contains default hyperparameter
         # settings.
-        steps = [('Imputer', Imputer()),
+        steps = [('Imputer', SimpleImputer()),
                  ('OneHotEncoder', sklearn.preprocessing.OneHotEncoder()),
                  ('Estimator', sklearn.tree.DecisionTreeClassifier())]
         pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
@@ -1069,7 +1066,7 @@ def test_deserialize_adaboost_with_defaults(self):
         # used the 'initialize_with_defaults' flag of the deserialization
         # method to return a flow that contains default hyperparameter
         # settings.
-        steps = [('Imputer', Imputer()),
+        steps = [('Imputer', SimpleImputer()),
                  ('OneHotEncoder', sklearn.preprocessing.OneHotEncoder()),
                  ('Estimator', sklearn.ensemble.AdaBoostClassifier(
                      sklearn.tree.DecisionTreeClassifier()))]
@@ -1095,7 +1092,7 @@ def test_deserialize_complex_with_defaults(self):
         # method to return a flow that contains default hyperparameter
         # settings.
         steps = [
-            ('Imputer', Imputer()),
+            ('Imputer', SimpleImputer()),
             ('OneHotEncoder', sklearn.preprocessing.OneHotEncoder()),
             (
                 'Estimator',
@@ -1299,7 +1296,7 @@ def test_run_model_on_fold_classification_1(self):
         y_test = y[test_indices]
 
         pipeline = sklearn.pipeline.Pipeline(steps=[
-            ('imp', sklearn.preprocessing.Imputer()),
+            ('imp',  SimpleImputer()),
             ('clf', sklearn.tree.DecisionTreeClassifier()),
         ])
         # TODO add some mocking here to actually test the innards of this function, too!
@@ -1425,11 +1422,11 @@ def predict_proba(*args, **kwargs):
             y_train = y[train_indices]
             X_test = X[test_indices]
             clf1 = sklearn.pipeline.Pipeline(steps=[
-                ('imputer', sklearn.preprocessing.Imputer()),
+                ('imputer',  SimpleImputer()),
                 ('estimator', sklearn.naive_bayes.GaussianNB())
             ])
             clf2 = sklearn.pipeline.Pipeline(steps=[
-                ('imputer', sklearn.preprocessing.Imputer()),
+                ('imputer',  SimpleImputer()),
                 ('estimator', HardNaiveBayes())
             ])
 
@@ -1482,7 +1479,7 @@ def test_run_model_on_fold_regression(self):
         y_test = y[test_indices]
 
         pipeline = sklearn.pipeline.Pipeline(steps=[
-            ('imp', sklearn.preprocessing.Imputer()),
+            ('imp',  SimpleImputer()),
             ('clf', sklearn.tree.DecisionTreeRegressor()),
         ])
         # TODO add some mocking here to actually test the innards of this function, too!
@@ -1527,7 +1524,7 @@ def test_run_model_on_fold_clustering(self):
         X = task.get_X(dataset_format='array')
 
         pipeline = sklearn.pipeline.Pipeline(steps=[
-            ('imp', sklearn.preprocessing.Imputer()),
+            ('imp',  SimpleImputer()),
             ('clf', sklearn.cluster.KMeans()),
         ])
         # TODO add some mocking here to actually test the innards of this function, too!
@@ -1616,7 +1613,7 @@ def test_trim_flow_name(self):
         long = """sklearn.pipeline.Pipeline(
                     columntransformer=sklearn.compose._column_transformer.ColumnTransformer(
                         numeric=sklearn.pipeline.Pipeline(
-                            imputer=sklearn.preprocessing.imputation.Imputer,
+                            SimpleImputer=sklearn.preprocessing.imputation.Imputer,
                             standardscaler=sklearn.preprocessing.data.StandardScaler),
                         nominal=sklearn.pipeline.Pipeline(
                             simpleimputer=sklearn.impute.SimpleImputer,
@@ -1640,7 +1637,7 @@ def test_trim_flow_name(self):
         self.assertEqual(short, SklearnExtension.trim_flow_name(long_stripped))
 
         long = """sklearn.pipeline.Pipeline(
-                    Imputer=sklearn.preprocessing.imputation.Imputer,
+                    SimpleImputer=sklearn.preprocessing.imputation.Imputer,
                     VarianceThreshold=sklearn.feature_selection.variance_threshold.VarianceThreshold, # noqa: E501
                     Estimator=sklearn.model_selection._search.RandomizedSearchCV(
                         estimator=sklearn.tree.tree.DecisionTreeClassifier))"""
@@ -1650,7 +1647,7 @@ def test_trim_flow_name(self):
 
         long = """sklearn.model_selection._search.RandomizedSearchCV(
                     estimator=sklearn.pipeline.Pipeline(
-                        Imputer=sklearn.preprocessing.imputation.Imputer,
+                        SimpleImputer=sklearn.preprocessing.imputation.Imputer,
                         classifier=sklearn.ensemble.forest.RandomForestClassifier))"""
         short = "sklearn.RandomizedSearchCV(Pipeline(Imputer,RandomForestClassifier))"
         long_stripped, _ = re.subn(r'\s', '', long)
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index 6e7eb7fbb..f6829838c 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -19,10 +19,7 @@
 import sklearn.naive_bayes
 import sklearn.tree
 
-if LooseVersion(sklearn.__version__) < "0.20":
-    from sklearn.preprocessing import Imputer
-else:
-    from sklearn.impute import SimpleImputer as Imputer
+from openml._backport import SimpleImputer
 
 import xmltodict
 
@@ -76,6 +73,8 @@ def test_get_flow(self):
         self.assertEqual(subflow_3.parameters['L'], '-1')
         self.assertEqual(len(subflow_3.components), 0)
 
+
+
     def test_get_structure(self):
         # also responsible for testing: flow.get_subflow
         # We need to use the production server here because 4024 is not the
@@ -302,8 +301,8 @@ def test_publish_error(self, api_call_mock, flow_exists_mock, get_flow_mock):
                                                                 flow.flow_id))
 
         fixture = (
-            "The flow on the server is inconsistent with the local flow. "
-            "The server flow ID is 1. Please check manually and remove "
+            "Flow was not stored correctly on the server. "
+            "New flow ID is 1. Please check manually and remove "
             "the flow if necessary! Error is:\n"
             "'Flow sklearn.ensemble.forest.RandomForestClassifier: "
             "values for attribute 'name' differ: "
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
index 23ab43df0..a9651a785 100644
--- a/tests/test_runs/test_run.py
+++ b/tests/test_runs/test_run.py
@@ -7,8 +7,8 @@
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.model_selection import GridSearchCV
 from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import Imputer
 
+from openml._backport import SimpleImputer
 from openml.testing import TestBase
 import openml
 import openml.extensions.sklearn
@@ -106,7 +106,7 @@ def _check_array(array, type_):
     def test_to_from_filesystem_vanilla(self):
 
         model = Pipeline([
-            ('imputer', Imputer(strategy='mean')),
+            ('imputer', SimpleImputer(strategy='mean')),
             ('classifier', DecisionTreeClassifier(max_depth=1)),
         ])
         task = openml.tasks.get_task(119)
@@ -139,7 +139,7 @@ def test_to_from_filesystem_vanilla(self):
     def test_to_from_filesystem_search(self):
 
         model = Pipeline([
-            ('imputer', Imputer(strategy='mean')),
+            ('imputer', SimpleImputer(strategy='mean')),
             ('classifier', DecisionTreeClassifier(max_depth=1)),
         ])
         model = GridSearchCV(
@@ -175,7 +175,7 @@ def test_to_from_filesystem_search(self):
     def test_to_from_filesystem_no_model(self):
 
         model = Pipeline([
-            ('imputer', Imputer(strategy='mean')),
+            ('imputer', SimpleImputer(strategy='mean')),
             ('classifier', DummyClassifier()),
         ])
         task = openml.tasks.get_task(119)
@@ -205,7 +205,7 @@ def test_publish_with_local_loaded_flow(self):
         extension = openml.extensions.sklearn.SklearnExtension()
 
         model = Pipeline([
-            ('imputer', Imputer(strategy='mean')),
+            ('imputer', SimpleImputer(strategy='mean')),
             ('classifier', DummyClassifier()),
         ])
         task = openml.tasks.get_task(119)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index bd123cd37..56ee7d909 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -24,11 +24,12 @@
 )
 from openml.runs.trace import OpenMLRunTrace
 from openml.tasks import TaskTypeEnum
+from openml._backport import SimpleImputer
 
 from sklearn.naive_bayes import GaussianNB
 from sklearn.model_selection._search import BaseSearchCV
 from sklearn.tree import DecisionTreeClassifier
-from sklearn.preprocessing.imputation import Imputer
+
 from sklearn.dummy import DummyClassifier
 from sklearn.preprocessing import StandardScaler
 from sklearn.feature_selection import VarianceThreshold
@@ -550,7 +551,7 @@ def get_ct_cf(nominal_indices, numeric_indices):
             '62501', sentinel=sentinel)
 
     def test_run_and_upload_decision_tree_pipeline(self):
-        pipeline2 = Pipeline(steps=[('Imputer', Imputer(strategy='median')),
+        pipeline2 = Pipeline(steps=[('Imputer', SimpleImputer(strategy='median')),
                                     ('VarianceThreshold', VarianceThreshold()),
                                     ('Estimator', RandomizedSearchCV(
                                         DecisionTreeClassifier(),
@@ -657,7 +658,7 @@ def test_learning_curve_task_2(self):
         num_folds = 10
         num_samples = 8
 
-        pipeline2 = Pipeline(steps=[('Imputer', Imputer(strategy='median')),
+        pipeline2 = Pipeline(steps=[('Imputer', SimpleImputer(strategy='median')),
                                     ('VarianceThreshold', VarianceThreshold()),
                                     ('Estimator', RandomizedSearchCV(
                                         DecisionTreeClassifier(),
@@ -734,7 +735,7 @@ def _test_local_evaluations(self, run):
     def test_local_run_swapped_parameter_order_model(self):
 
         # construct sci-kit learn classifier
-        clf = Pipeline(steps=[('imputer', Imputer(strategy='median')),
+        clf = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),
                               ('estimator', RandomForestClassifier())])
 
         # download task
@@ -752,7 +753,7 @@ def test_local_run_swapped_parameter_order_model(self):
     def test_local_run_swapped_parameter_order_flow(self):
 
         # construct sci-kit learn classifier
-        clf = Pipeline(steps=[('imputer', Imputer(strategy='median')),
+        clf = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),
                               ('estimator', RandomForestClassifier())])
 
         flow = self.extension.model_to_flow(clf)
@@ -771,7 +772,7 @@ def test_local_run_swapped_parameter_order_flow(self):
     def test_local_run_metric_score(self):
 
         # construct sci-kit learn classifier
-        clf = Pipeline(steps=[('imputer', Imputer(strategy='median')),
+        clf = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),
                               ('estimator', RandomForestClassifier())])
 
         # download task
@@ -798,7 +799,7 @@ def test_online_run_metric_score(self):
 
     def test_initialize_model_from_run(self):
         clf = sklearn.pipeline.Pipeline(steps=[
-            ('Imputer', Imputer(strategy='median')),
+            ('Imputer', SimpleImputer(strategy='median')),
             ('VarianceThreshold', VarianceThreshold(threshold=0.05)),
             ('Estimator', GaussianNB())])
         task = openml.tasks.get_task(11)
@@ -882,12 +883,12 @@ def test__run_exists(self):
         rs = 1
         clfs = [
             sklearn.pipeline.Pipeline(steps=[
-                ('Imputer', Imputer(strategy='mean')),
+                ('Imputer', SimpleImputer(strategy='mean')),
                 ('VarianceThreshold', VarianceThreshold(threshold=0.05)),
                 ('Estimator', DecisionTreeClassifier(max_depth=4))
             ]),
             sklearn.pipeline.Pipeline(steps=[
-                ('Imputer', Imputer(strategy='most_frequent')),
+                ('Imputer', SimpleImputer(strategy='most_frequent')),
                 ('VarianceThreshold', VarianceThreshold(threshold=0.1)),
                 ('Estimator', DecisionTreeClassifier(max_depth=4))]
             )
@@ -1251,7 +1252,7 @@ def test_run_on_dataset_with_missing_labels(self):
         flow.name = 'dummy'
         task = openml.tasks.get_task(2)
 
-        model = Pipeline(steps=[('Imputer', Imputer(strategy='median')),
+        model = Pipeline(steps=[('Imputer', SimpleImputer(strategy='median')),
                                 ('Estimator', DecisionTreeClassifier())])
 
         data_content, _, _, _ = _run_task_get_arffcontent(
diff --git a/tests/test_study/test_study_examples.py b/tests/test_study/test_study_examples.py
index 62d1a98c8..c4919abb9 100644
--- a/tests/test_study/test_study_examples.py
+++ b/tests/test_study/test_study_examples.py
@@ -30,12 +30,13 @@ def test_Figure1a(self):
         import sklearn.pipeline
         import sklearn.preprocessing
         import sklearn.tree
+        from openml._backport import SimpleImputer
         benchmark_suite = openml.study.get_study(
             'OpenML100', 'tasks'
         )  # obtain the benchmark suite
         clf = sklearn.pipeline.Pipeline(
             steps=[
-                ('imputer', sklearn.preprocessing.Imputer()),
+                ('imputer', SimpleImputer()),
                 ('estimator', sklearn.tree.DecisionTreeClassifier())
             ]
         )  # build a sklearn classifier

From fba9efa6c6e6d8b7cbb2939f443b961281fe64ce Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 25 Jul 2019 11:27:27 -0400
Subject: [PATCH 02/17] fix call to roc_auc

---
 tests/test_runs/test_run_functions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 56ee7d909..746546c04 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -715,9 +715,9 @@ def _test_local_evaluations(self, run):
         np.testing.assert_array_almost_equal(accuracy_scores_provided,
                                              accuracy_scores)
 
-        # also check if we can obtain some other scores: # TODO: how to do AUC?
+        # also check if we can obtain some other scores:
         tests = [(sklearn.metrics.cohen_kappa_score, {'weights': None}),
-                 (sklearn.metrics.auc, {'reorder': True}),
+                 (sklearn.metrics.roc_auc, {}),
                  (sklearn.metrics.average_precision_score, {}),
                  (sklearn.metrics.jaccard_similarity_score, {}),
                  (sklearn.metrics.precision_score, {'average': 'macro'}),

From 29b13c1927a8f8b6d457ba68443be8e6b3cba9e1 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 25 Jul 2019 11:44:18 -0400
Subject: [PATCH 03/17] added verbose parameter to pipeline in 0.21

---
 .../test_sklearn_extension/test_sklearn_extension.py  | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index a3e6f8d00..5073c605f 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -1012,18 +1012,25 @@ def test_paralizable_check(self):
                 self.extension._prevent_optimize_n_jobs(model)
 
     def test__get_fn_arguments_with_defaults(self):
-        if LooseVersion(sklearn.__version__) < "0.19":
+        sklearn_version = LooseVersion(sklearn.__version__)
+        if sklearn_version < "0.19":
             fns = [
                 (sklearn.ensemble.RandomForestRegressor.__init__, 15),
                 (sklearn.tree.DecisionTreeClassifier.__init__, 12),
                 (sklearn.pipeline.Pipeline.__init__, 0)
             ]
-        else:
+        elif sklearn_version < "0.21":
             fns = [
                 (sklearn.ensemble.RandomForestRegressor.__init__, 16),
                 (sklearn.tree.DecisionTreeClassifier.__init__, 13),
                 (sklearn.pipeline.Pipeline.__init__, 1)
             ]
+        else:
+            fns = [
+                (sklearn.ensemble.RandomForestRegressor.__init__, 16),
+                (sklearn.tree.DecisionTreeClassifier.__init__, 13),
+                (sklearn.pipeline.Pipeline.__init__, 2)
+            ]
 
         for fn, num_params_with_defaults in fns:
             defaults, defaultless = (

From 609ad77cf896fe0c5cbc8d70eb0a9b6906c511cb Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 25 Jul 2019 11:46:41 -0400
Subject: [PATCH 04/17] remove no-longer-existant categorical_features paramter

---
 .../test_sklearn_extension/test_sklearn_extension.py            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 5073c605f..3b87ab0d0 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -621,7 +621,7 @@ def test_serialize_feature_union_switched_names(self):
             .format(module_name_encoder))
 
     def test_serialize_complex_flow(self):
-        ohe = sklearn.preprocessing.OneHotEncoder(categorical_features=[0])
+        ohe = sklearn.preprocessing.OneHotEncoder()
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         boosting = sklearn.ensemble.AdaBoostClassifier(
             base_estimator=sklearn.tree.DecisionTreeClassifier())

From 1b5dbd125a761ea5587b2e35b25a29dd6fa7766a Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 25 Jul 2019 11:52:05 -0400
Subject: [PATCH 05/17] more pipeline parameter checks

---
 .../test_sklearn_extension/test_sklearn_extension.py  | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 3b87ab0d0..e502d309c 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -282,11 +282,14 @@ def test_serialize_pipeline(self):
         # Comparing the pipeline
         # The parameters only have the name of base objects(not the whole flow)
         # as value
-        # memory parameter has been added in 0.19
+        # memory parameter has been added in 0.19, verbose in 0.21
         if LooseVersion(sklearn.__version__) < "0.19":
             self.assertEqual(len(serialization.parameters), 1)
-        else:
+        elif LooseVersion(sklearn.__version__) < "0.21":
             self.assertEqual(len(serialization.parameters), 2)
+        else:
+            self.assertEqual(len(serialization.parameters), 3)
+
         # Hard to compare two representations of a dict due to possibly
         # different sorting. Making a json makes it easier
         self.assertEqual(
@@ -371,8 +374,10 @@ def test_serialize_pipeline_clustering(self):
         # memory parameter has been added in 0.19
         if LooseVersion(sklearn.__version__) < "0.19":
             self.assertEqual(len(serialization.parameters), 1)
-        else:
+        elif LooseVersion(sklearn.__version__) < "0.21":
             self.assertEqual(len(serialization.parameters), 2)
+        else:
+            self.assertEqual(len(serialization.parameters), 3)
         # Hard to compare two representations of a dict due to possibly
         # different sorting. Making a json makes it easier
         self.assertEqual(

From ebaa18fdca666226c42577fa8d36b38812a1b1d4 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 25 Jul 2019 11:52:13 -0400
Subject: [PATCH 06/17] more imputer replacements

---
 tests/test_flows/test_flow.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index f6829838c..f539a86e5 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -74,7 +74,6 @@ def test_get_flow(self):
         self.assertEqual(len(subflow_3.components), 0)
 
 
-
     def test_get_structure(self):
         # also responsible for testing: flow.get_subflow
         # We need to use the production server here because 4024 is not the
@@ -317,8 +316,8 @@ def test_illegal_flow(self):
         # should throw error as it contains two imputers
         illegal = sklearn.pipeline.Pipeline(
             steps=[
-                ('imputer1', Imputer()),
-                ('imputer2', Imputer()),
+                ('imputer1', SimpleImputer()),
+                ('imputer2', SimpleImputer()),
                 ('classif', sklearn.tree.DecisionTreeClassifier())
             ]
         )
@@ -349,7 +348,7 @@ def test_existing_flow_exists(self):
         if LooseVersion(sklearn.__version__) >= '0.20':
             ohe_params['categories'] = 'auto'
         steps = [
-            ('imputation', Imputer(strategy='median')),
+            ('imputation', SimpleImputer(strategy='median')),
             ('hotencoding', sklearn.preprocessing.OneHotEncoder(**ohe_params)),
             (
                 'variencethreshold',

From 1098594b5ac91cc11b46697204f002107f588fc8 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 25 Jul 2019 11:53:42 -0400
Subject: [PATCH 07/17] don't break on dev versions

---
 tests/test_flows/test_flow_functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index de933731a..95b4fa3f0 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -288,7 +288,7 @@ def test_get_flow_reinstantiate_model_no_extension(self):
     def test_get_flow_reinstantiate_model_wrong_version(self):
         # Note that CI does not test against 0.19.1.
         openml.config.server = self.production_server
-        _, sklearn_major, _ = LooseVersion(sklearn.__version__).version
+        _, sklearn_major, _ = LooseVersion(sklearn.__version__).version[:3]
         flow = 8175
         expected = 'Trying to deserialize a model with dependency sklearn==0.19.1 not satisfied.'
         self.assertRaisesRegex(ValueError,

From 2eb7f7b2e90f50100f9dd5a2cce00246afeb6d41 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 25 Jul 2019 11:53:51 -0400
Subject: [PATCH 08/17] typo on roc_auc_score name

---
 tests/test_runs/test_run_functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 746546c04..ab4999586 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -717,7 +717,7 @@ def _test_local_evaluations(self, run):
 
         # also check if we can obtain some other scores:
         tests = [(sklearn.metrics.cohen_kappa_score, {'weights': None}),
-                 (sklearn.metrics.roc_auc, {}),
+                 (sklearn.metrics.roc_auc_score, {}),
                  (sklearn.metrics.average_precision_score, {}),
                  (sklearn.metrics.jaccard_similarity_score, {}),
                  (sklearn.metrics.precision_score, {'average': 'macro'}),

From 0b033d9a938f0b3187e4bc78403a741c1012a509 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 25 Jul 2019 12:07:30 -0400
Subject: [PATCH 09/17] use ordered dicts, avoid nan comparison

---
 .../test_sklearn_extension.py                 | 23 ++++++++++---------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index e502d309c..3554555d1 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -749,15 +749,16 @@ def test_serialize_simple_parameter_grid(self):
         # Examples from the scikit-learn documentation
         models = [sklearn.svm.SVC(), sklearn.ensemble.RandomForestClassifier()]
         grids = \
-            [[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
-              {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
-               'kernel': ['rbf']}],
-             {"max_depth": [3, None],
-              "max_features": [1, 3, 10],
-              "min_samples_split": [1, 3, 10],
-              "min_samples_leaf": [1, 3, 10],
-              "bootstrap": [True, False],
-              "criterion": ["gini", "entropy"]}]
+            [[OrderedDict({'C': [1, 10, 100, 1000], 'kernel': ['linear']}),
+              OrderedDict({'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
+                           'kernel': ['rbf']})],
+             OrderedDict({"bootstrap": [True, False],
+                          "criterion": ["gini", "entropy"],
+                          "max_depth": [3, None],
+                          "max_features": [1, 3, 10],
+                          "min_samples_leaf": [1, 3, 10],
+                          "min_samples_split": [1, 3, 10]
+                          })]
 
         for grid, model in zip(grids, models):
             serialized = self.extension.model_to_flow(grid)
@@ -765,9 +766,9 @@ def test_serialize_simple_parameter_grid(self):
 
             self.assertEqual(deserialized, grid)
             self.assertIsNot(deserialized, grid)
-
+            # providing error_score because nan != nan
             hpo = sklearn.model_selection.GridSearchCV(
-                param_grid=grid, estimator=model)
+                param_grid=grid, estimator=model, error_score=-1000)
 
             serialized = self.extension.model_to_flow(hpo)
             deserialized = self.extension.flow_to_model(serialized)

From 4e9f75c8fb6f5c120f340f548c1947772ff699c5 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 25 Jul 2019 12:09:56 -0400
Subject: [PATCH 10/17] undid weird merge artifact

---
 tests/test_flows/test_flow.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index f539a86e5..841e9d1cb 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -300,8 +300,8 @@ def test_publish_error(self, api_call_mock, flow_exists_mock, get_flow_mock):
                                                                 flow.flow_id))
 
         fixture = (
-            "Flow was not stored correctly on the server. "
-            "New flow ID is 1. Please check manually and remove "
+            "The flow on the server is inconsistent with the local flow. "
+            "The server flow ID is 1. Please check manually and remove "
             "the flow if necessary! Error is:\n"
             "'Flow sklearn.ensemble.forest.RandomForestClassifier: "
             "values for attribute 'name' differ: "

From 54382a4ea0ab0ff4bbc0aa0df205932494f40f45 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 25 Jul 2019 13:05:15 -0400
Subject: [PATCH 11/17] add missing file whoops

---
 openml/_backport.py | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 openml/_backport.py

diff --git a/openml/_backport.py b/openml/_backport.py
new file mode 100644
index 000000000..b8642dd10
--- /dev/null
+++ b/openml/_backport.py
@@ -0,0 +1,6 @@
+try:
+    from sklearn.impute import SimpleImputer
+except ImportError:
+    from sklearn.preprocessing.impute import Imputer as SimpleImputer
+
+__all__ = ['SimpleImputer']

From a8083571ffd14c9e20d7acc861f02958d9a16d11 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 25 Jul 2019 13:07:01 -0400
Subject: [PATCH 12/17] flake8

---
 .../test_sklearn_extension/test_sklearn_extension.py   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 3554555d1..49c753ee8 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -1311,7 +1311,7 @@ def test_run_model_on_fold_classification_1(self):
         y_test = y[test_indices]
 
         pipeline = sklearn.pipeline.Pipeline(steps=[
-            ('imp',  SimpleImputer()),
+            ('imp', SimpleImputer()),
             ('clf', sklearn.tree.DecisionTreeClassifier()),
         ])
         # TODO add some mocking here to actually test the innards of this function, too!
@@ -1437,11 +1437,11 @@ def predict_proba(*args, **kwargs):
             y_train = y[train_indices]
             X_test = X[test_indices]
             clf1 = sklearn.pipeline.Pipeline(steps=[
-                ('imputer',  SimpleImputer()),
+                ('imputer', SimpleImputer()),
                 ('estimator', sklearn.naive_bayes.GaussianNB())
             ])
             clf2 = sklearn.pipeline.Pipeline(steps=[
-                ('imputer',  SimpleImputer()),
+                ('imputer', SimpleImputer()),
                 ('estimator', HardNaiveBayes())
             ])
 
@@ -1494,7 +1494,7 @@ def test_run_model_on_fold_regression(self):
         y_test = y[test_indices]
 
         pipeline = sklearn.pipeline.Pipeline(steps=[
-            ('imp',  SimpleImputer()),
+            ('imp', SimpleImputer()),
             ('clf', sklearn.tree.DecisionTreeRegressor()),
         ])
         # TODO add some mocking here to actually test the innards of this function, too!
@@ -1539,7 +1539,7 @@ def test_run_model_on_fold_clustering(self):
         X = task.get_X(dataset_format='array')
 
         pipeline = sklearn.pipeline.Pipeline(steps=[
-            ('imp',  SimpleImputer()),
+            ('imp', SimpleImputer()),
             ('clf', sklearn.cluster.KMeans()),
         ])
         # TODO add some mocking here to actually test the innards of this function, too!

From 5d4db1ebfdd807c3c7955ad1f274333a68602d51 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 25 Jul 2019 14:00:32 -0400
Subject: [PATCH 13/17] try fixing import in backport, pep8

---
 openml/_backport.py           | 2 +-
 tests/test_flows/test_flow.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/openml/_backport.py b/openml/_backport.py
index b8642dd10..cc68ab222 100644
--- a/openml/_backport.py
+++ b/openml/_backport.py
@@ -1,6 +1,6 @@
 try:
     from sklearn.impute import SimpleImputer
 except ImportError:
-    from sklearn.preprocessing.impute import Imputer as SimpleImputer
+    from sklearn.preprocessing import Imputer as SimpleImputer
 
 __all__ = ['SimpleImputer']
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index 841e9d1cb..eb5d23e7f 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -73,7 +73,6 @@ def test_get_flow(self):
         self.assertEqual(subflow_3.parameters['L'], '-1')
         self.assertEqual(len(subflow_3.components), 0)
 
-
     def test_get_structure(self):
         # also responsible for testing: flow.get_subflow
         # We need to use the production server here because 4024 is not the

From 1686dfd75014cb603e8ae85e162f2ca958c24fc1 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 26 Jul 2019 10:47:13 -0400
Subject: [PATCH 14/17] move SimpleImputer to testing module

---
 openml/_backport.py                                       | 6 ------
 openml/testing.py                                         | 8 +++++++-
 .../test_sklearn_extension/test_sklearn_extension.py      | 4 ++--
 tests/test_flows/test_flow.py                             | 4 +---
 tests/test_runs/test_run_functions.py                     | 3 +--
 tests/test_study/test_study_examples.py                   | 4 ++--
 6 files changed, 13 insertions(+), 16 deletions(-)
 delete mode 100644 openml/_backport.py

diff --git a/openml/_backport.py b/openml/_backport.py
deleted file mode 100644
index cc68ab222..000000000
--- a/openml/_backport.py
+++ /dev/null
@@ -1,6 +0,0 @@
-try:
-    from sklearn.impute import SimpleImputer
-except ImportError:
-    from sklearn.preprocessing import Imputer as SimpleImputer
-
-__all__ = ['SimpleImputer']
diff --git a/openml/testing.py b/openml/testing.py
index dad1aa9f5..c5a12068c 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -319,4 +319,10 @@ def _check_fold_timing_evaluations(
                         self.assertLessEqual(evaluation, max_val)
 
 
-__all__ = ['TestBase']
+try:
+    from sklearn.impute import SimpleImputer
+except ImportError:
+    from sklearn.preprocessing import Imputer as SimpleImputer
+
+
+__all__ = ['TestBase', 'SimpleImputer']
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 49c753ee8..3fbe94b5d 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -35,8 +35,8 @@
 from openml.flows import OpenMLFlow
 from openml.flows.functions import assert_flows_equal
 from openml.runs.trace import OpenMLRunTrace
-from openml.testing import TestBase
-from openml._backport import SimpleImputer
+from openml.testing import TestBase, SimpleImputer
+
 
 this_directory = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(this_directory)
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index eb5d23e7f..25e2dacfb 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -19,15 +19,13 @@
 import sklearn.naive_bayes
 import sklearn.tree
 
-from openml._backport import SimpleImputer
-
 import xmltodict
 
 import openml
 from openml._api_calls import _perform_api_call
 import openml.exceptions
 import openml.extensions.sklearn
-from openml.testing import TestBase
+from openml.testing import TestBase, SimpleImputer
 import openml.utils
 
 
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index ab4999586..2b09ef501 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -17,14 +17,13 @@
 import pandas as pd
 
 import openml.extensions.sklearn
-from openml.testing import TestBase
+from openml.testing import TestBase, SimpleImputer
 from openml.runs.functions import (
     _run_task_get_arffcontent,
     run_exists,
 )
 from openml.runs.trace import OpenMLRunTrace
 from openml.tasks import TaskTypeEnum
-from openml._backport import SimpleImputer
 
 from sklearn.naive_bayes import GaussianNB
 from sklearn.model_selection._search import BaseSearchCV
diff --git a/tests/test_study/test_study_examples.py b/tests/test_study/test_study_examples.py
index c4919abb9..1d9c56d54 100644
--- a/tests/test_study/test_study_examples.py
+++ b/tests/test_study/test_study_examples.py
@@ -1,4 +1,4 @@
-from openml.testing import TestBase
+from openml.testing import TestBase, SimpleImputer
 
 
 class TestStudyFunctions(TestBase):
@@ -30,7 +30,7 @@ def test_Figure1a(self):
         import sklearn.pipeline
         import sklearn.preprocessing
         import sklearn.tree
-        from openml._backport import SimpleImputer
+
         benchmark_suite = openml.study.get_study(
             'OpenML100', 'tasks'
         )  # obtain the benchmark suite

From 34c24236523252e976ebc7e92641d16193c14207 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 26 Jul 2019 10:51:47 -0400
Subject: [PATCH 15/17] don't trust dicts to be ordered

---
 .../test_sklearn_extension.py                 | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 3fbe94b5d..7a854a20e 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -749,16 +749,16 @@ def test_serialize_simple_parameter_grid(self):
         # Examples from the scikit-learn documentation
         models = [sklearn.svm.SVC(), sklearn.ensemble.RandomForestClassifier()]
         grids = \
-            [[OrderedDict({'C': [1, 10, 100, 1000], 'kernel': ['linear']}),
-              OrderedDict({'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
-                           'kernel': ['rbf']})],
-             OrderedDict({"bootstrap": [True, False],
-                          "criterion": ["gini", "entropy"],
-                          "max_depth": [3, None],
-                          "max_features": [1, 3, 10],
-                          "min_samples_leaf": [1, 3, 10],
-                          "min_samples_split": [1, 3, 10]
-                          })]
+            [[OrderedDict([('C', [1, 10, 100, 1000]), ('kernel', ['linear'])]),
+              OrderedDict([('C', [1, 10, 100, 1000]), ('gamma', [0.001, 0.0001]),
+                           ('kernel', ['rbf'])])],
+             OrderedDict([("bootstrap", [True, False]),
+                          ("criterion", ["gini", "entropy"]),
+                          ("max_depth", [3, None]),
+                          ("max_features", [1, 3, 10]),
+                          ("min_samples_leaf", [1, 3, 10]),
+                          ("min_samples_split", [1, 3, 10])
+                          ])]
 
         for grid, model in zip(grids, models):
             serialized = self.extension.model_to_flow(grid)

From 87a1366b1d36b6318614fdf8366ba19679d5421b Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 26 Jul 2019 11:06:01 -0400
Subject: [PATCH 16/17] run CI mostly on 0.21.2

---
 .travis.yml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 96effeee9..beaa3b53e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,10 +15,11 @@ env:
   - TEST_DIR=/tmp/test_dir/
   - MODULE=openml
   matrix:
-  - DISTRIB="conda" PYTHON_VERSION="3.5" SKLEARN_VERSION="0.20.0"
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.20.0"
-  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.0" RUN_FLAKE8="true" SKIP_TESTS="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.5" SKLEARN_VERSION="0.21.2"
+  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2"
+  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" RUN_FLAKE8="true" SKIP_TESTS="true"
   - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" COVERAGE="true" DOCPUSH="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.2"
   # Checks for older scikit-learn versions (which also don't nicely work with
   # Python3.7)
   - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.19.2"

From bdbb1f43244f32a6d33026fc405326882eef8e55 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 26 Jul 2019 11:15:23 -0400
Subject: [PATCH 17/17] failed to safe lol

---
 .../test_sklearn_extension/test_sklearn_extension.py           | 2 +-
 tests/test_runs/test_run.py                                    | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 2203a568e..8bc615516 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -1247,7 +1247,7 @@ def test_run_model_on_task(self):
         class MyPipe(sklearn.pipeline.Pipeline):
             pass
         task = openml.tasks.get_task(1)
-        pipe = MyPipe([('imp', Imputer()),
+        pipe = MyPipe([('imp', SimpleImputer()),
                        ('dummy', sklearn.dummy.DummyClassifier())])
         openml.runs.run_model_on_task(pipe, task)
 
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
index a9651a785..88fe8d6ef 100644
--- a/tests/test_runs/test_run.py
+++ b/tests/test_runs/test_run.py
@@ -8,8 +8,7 @@
 from sklearn.model_selection import GridSearchCV
 from sklearn.pipeline import Pipeline
 
-from openml._backport import SimpleImputer
-from openml.testing import TestBase
+from openml.testing import TestBase, SimpleImputer
 import openml
 import openml.extensions.sklearn