Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion openml/_api_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
import xmltodict

from . import config
from .exceptions import OpenMLServerError, OpenMLServerException
from .exceptions import (OpenMLServerError, OpenMLServerException,
OpenMLServerNoResult)


def _perform_api_call(call, data=None, file_dictionary=None,
Expand Down Expand Up @@ -138,4 +139,6 @@ def _parse_server_exception(response):
additional = None
if 'oml:additional_information' in server_exception['oml:error']:
additional = server_exception['oml:error']['oml:additional_information']
if code in [370, 372]:
return OpenMLServerNoResult(code, message, additional)
return OpenMLServerException(code, message, additional)
38 changes: 27 additions & 11 deletions openml/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import logging
import os
import six
import sys

import arff

Expand All @@ -26,9 +25,9 @@ class OpenMLDataset(object):

Parameters
----------
name : string
name : str
Name of the dataset
description : string
description : str
Description of the dataset
FIXME : which of these do we actually nee?
"""
Expand Down Expand Up @@ -82,7 +81,7 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
feature = OpenMLDataFeature(int(xmlfeature['oml:index']),
xmlfeature['oml:name'],
xmlfeature['oml:data_type'],
None, #todo add nominal values (currently not in database)
None, # todo add nominal values (currently not in database)
int(xmlfeature.get('oml:number_of_missing_values', 0)))
if idx != feature.index:
raise ValueError('Data features not provided in right order')
Expand Down Expand Up @@ -129,6 +128,28 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
logger.debug("Saved dataset %d: %s to file %s" %
(self.dataset_id, self.name, self.data_pickle_file))

def push_tag(self, tag):
"""Annotates this data set with a tag on the server.

Parameters
----------
tag : str
Tag to attach to the dataset.
"""
data = {'data_id': self.dataset_id, 'tag': tag}
_perform_api_call("/data/tag", data=data)

def remove_tag(self, tag):
"""Removes a tag from this dataset on the server.

Parameters
----------
tag : str
Tag to attach to the dataset.
"""
data = {'data_id': self.dataset_id, 'tag': tag}
_perform_api_call("/data/untag", data=data)

def __eq__(self, other):
if type(other) != OpenMLDataset:
return False
Expand Down Expand Up @@ -315,7 +336,6 @@ def retrieve_class_labels(self, target_name='class'):
else:
return None


def get_features_by_type(self, data_type, exclude=None,
exclude_ignore_attributes=True,
exclude_row_id_attribute=True):
Expand Down Expand Up @@ -377,11 +397,7 @@ def publish(self):

Returns
-------
return_code : int
Return code from server

return_value : string
xml return from server
self
"""

file_elements = {'description': self._to_xml()}
Expand All @@ -401,7 +417,7 @@ def _to_xml(self):

Returns
-------
xml_dataset : string
xml_dataset : str
XML description of the data.
"""
xml_dataset = ('<oml:data_set_description '
Expand Down
7 changes: 5 additions & 2 deletions openml/datasets/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import xmltodict

from .dataset import OpenMLDataset
from ..exceptions import OpenMLCacheException
from ..exceptions import OpenMLCacheException, OpenMLServerNoResult
from .. import config
from .._api_calls import _perform_api_call, _read_url

Expand Down Expand Up @@ -178,7 +178,10 @@ def list_datasets(offset=None, size=None, tag=None):

def _list_datasets(api_call):
# TODO add proper error handling here!
xml_string = _perform_api_call(api_call)
try:
xml_string = _perform_api_call(api_call)
except OpenMLServerNoResult:
return []
datasets_dict = xmltodict.parse(xml_string, force_list=('oml:dataset',))

# Minimalistic check if the XML is useful
Expand Down
7 changes: 6 additions & 1 deletion openml/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class OpenMLServerError(PyOpenMLError):
def __init__(self, message):
super(OpenMLServerError, self).__init__(message)

#

class OpenMLServerException(OpenMLServerError):
"""exception for when the result of the server was
not 200 (e.g., listing call w/o results). """
Expand All @@ -22,6 +22,11 @@ def __init__(self, code, message, additional=None):
super(OpenMLServerException, self).__init__(message)


class OpenMLServerNoResult(OpenMLServerException):
"""exception for when the result of the server is empty. """
pass


class OpenMLCacheException(PyOpenMLError):
"""Dataset / task etc not found in cache"""
def __init__(self, message):
Expand Down
20 changes: 20 additions & 0 deletions tests/test_datasets/test_dataset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
from scipy import sparse
import six
from time import time

from openml.testing import TestBase
import openml
Expand Down Expand Up @@ -90,6 +91,25 @@ def test_get_data_with_ignore_attributes(self):
# TODO test multiple ignore attributes!


class OpenMLDatasetTestOnTestServer(TestBase):
def setUp(self):
super(OpenMLDatasetTestOnTestServer, self).setUp()
# longley, really small dataset
self.dataset = openml.datasets.get_dataset(125)

def test_tagging(self):
tag = "testing_tag_{}_{}".format(self.id(), time())
ds_list = openml.datasets.list_datasets(tag=tag)
self.assertEqual(len(ds_list), 0)
self.dataset.push_tag(tag)
ds_list = openml.datasets.list_datasets(tag=tag)
self.assertEqual(len(ds_list), 1)
self.assertIn(125, ds_list)
self.dataset.remove_tag(tag)
ds_list = openml.datasets.list_datasets(tag=tag)
self.assertEqual(len(ds_list), 0)


class OpenMLDatasetTestSparse(TestBase):
_multiprocess_can_split_ = True

Expand Down
1 change: 0 additions & 1 deletion tests/test_datasets/test_dataset_functions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import unittest
import os
import os
import sys

if sys.version_info[0] >= 3:
Expand Down