openml · mfeurer · Dec 6, 2018 · Nov 14, 2018 · Dec 4, 2018 · Dec 4, 2018
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -419,7 +419,7 @@ def create_dataset(name, description, creator, contributor,
                    licence, attributes, data,
                    default_target_attribute,
                    ignore_attribute, citation,
-                   row_id_attribute=None, format=None,
+                   row_id_attribute=None,
                    original_data_url=None, paper_url=None,
                    update_comment=None, version_label=None):
     """Create a dataset.
@@ -473,11 +473,6 @@ def create_dataset(name, description, creator, contributor,
         be discarded.
         .. versionadded: 0.8
            Inference of ``row_id_attribute`` from a dataframe.
-    format : str, optional
-        Format of the dataset which can be either 'arff' or 'sparse_arff'.
-        By default, the format is automatically inferred.
-        .. deprecated: 0.8
-            ``format`` is deprecated in 0.8 and will be removed in 0.10.
     original_data_url : str, optional
         For derived data, the url to the original dataset.
     paper_url : str, optional
@@ -536,34 +531,29 @@ def create_dataset(name, description, creator, contributor,
         else:
             data = data.values
 
-    if format is not None:
-        warn("The format parameter will be deprecated in the future,"
-             " the method will determine the format of the ARFF "
-             "based on the given data.", DeprecationWarning)
-        d_format = format
-
-    # Determine ARFF format from the dataset
-    else:
-        if isinstance(data, (list, np.ndarray)):
-            if isinstance(data[0], (list, np.ndarray)):
-                d_format = 'arff'
-            elif isinstance(data[0], dict):
-                d_format = 'sparse_arff'
-            else:
-                raise ValueError(
-                    'When giving a list or a numpy.ndarray, '
-                    'they should contain a list/ numpy.ndarray '
-                    'for dense data or a dictionary for sparse '
-                    'data. Got {!r} instead.'
-                    .format(data[0])
-                )
-        elif isinstance(data, coo_matrix):
-            d_format = 'sparse_arff'
+    if isinstance(data, (list, np.ndarray)):
+        if isinstance(data[0], (list, np.ndarray)):
+            data_format = 'arff'
+        elif isinstance(data[0], dict):
+            data_format = 'sparse_arff'
         else:
             raise ValueError(
-                'Invalid data type. The data type can be a list, '
-                'a numpy ndarray or a scipy.sparse.coo_matrix'
+                'When giving a list or a numpy.ndarray, '
+                'they should contain a list/ numpy.ndarray '
+                'for dense data or a dictionary for sparse '
+                'data. Got {!r} instead.'
+                .format(data[0])
             )
+    elif isinstance(data, coo_matrix):
+        data_format = 'sparse_arff'
+    else:
+        raise ValueError(
+            'When giving a list or a numpy.ndarray, '
+            'they should contain a list/ numpy.ndarray '
+            'for dense data or a dictionary for sparse '
+            'data. Got {!r} instead.'
+            .format(data[0])
+        )
 
     arff_object = {
         'relation': name,
@@ -577,10 +567,11 @@ def create_dataset(name, description, creator, contributor,
     try:
         # check if ARFF is valid
         decoder = arff.ArffDecoder()
+        return_type = arff.COO if data_format == 'sparse_arff' else arff.DENSE
         decoder.decode(
             arff_dataset,
             encode_nominal=True,
-            return_type=arff.COO if d_format == 'sparse_arff' else arff.DENSE
+            return_type=return_type
         )
     except arff.ArffException:
         raise ValueError("The arguments you have provided \
@@ -589,7 +580,7 @@ def create_dataset(name, description, creator, contributor,
     return OpenMLDataset(
         name,
         description,
-        data_format=d_format,
+        data_format=data_format,
         creator=creator,
         contributor=contributor,
         collection_date=collection_date,

diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
@@ -683,18 +683,6 @@ def test_create_invalid_dataset(self):
             **param
         )
 
-    def test_create_dataset_warning(self):
-
-        parameters = self._get_empty_param_for_dataset()
-        parameters['format'] = 'arff'
-        with catch_warnings():
-            filterwarnings('error')
-            self.assertRaises(
-                DeprecationWarning,
-                create_dataset,
-                **parameters
-            )
-
     def test_get_online_dataset_arff(self):
 
         # Australian dataset
@@ -768,7 +756,6 @@ def test_create_dataset_pandas(self):
             citation=citation,
             attributes='auto',
             data=df,
-            format=None,
             version_label='test',
             original_data_url=original_data_url,
             paper_url=paper_url
@@ -803,7 +790,6 @@ def test_create_dataset_pandas(self):
             citation=citation,
             attributes='auto',
             data=df,
-            format=None,
             version_label='test',
             original_data_url=original_data_url,
             paper_url=paper_url
@@ -840,7 +826,6 @@ def test_create_dataset_pandas(self):
             citation=citation,
             attributes=attributes,
             data=df,
-            format=None,
             version_label='test',
             original_data_url=original_data_url,
             paper_url=paper_url
@@ -892,7 +877,6 @@ def test_create_dataset_row_id_attribute_error(self):
                 attributes='auto',
                 data=df,
                 row_id_attribute='unknown_row_id',
-                format=None,
                 version_label='test',
                 original_data_url=original_data_url,
                 paper_url=paper_url
@@ -939,7 +923,6 @@ def test_create_dataset_row_id_attribute_inference(self):
                 attributes='auto',
                 data=df,
                 row_id_attribute=row_id,
-                format=None,
                 version_label='test',
                 original_data_url=original_data_url,
                 paper_url=paper_url
@@ -986,7 +969,6 @@ def test_create_dataset_attributes_auto_without_df(self):
                 citation=citation,
                 attributes=attributes,
                 data=data,
-                format=None,
                 version_label='test',
                 original_data_url=original_data_url,
                 paper_url=paper_url