Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions openml/evaluations/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

class OpenMLEvaluation(object):
'''
"""
Contains all meta-information about a run / evaluation combination,
according to the evaluation/list function

Expand All @@ -26,11 +26,13 @@ class OpenMLEvaluation(object):
the time of evaluation
value : float
the value of this evaluation
values : List[float]
the values per repeat and fold (if requested)
array_data : str
list of information per class (e.g., in case of precision, auroc, recall)
'''
"""
def __init__(self, run_id, task_id, setup_id, flow_id, flow_name,
data_id, data_name, function, upload_time, value,
data_id, data_name, function, upload_time, value, values,
array_data=None):
self.run_id = run_id
self.task_id = task_id
Expand All @@ -42,4 +44,5 @@ def __init__(self, run_id, task_id, setup_id, flow_id, flow_name,
self.function = function
self.upload_time = upload_time
self.value = value
self.values = values
self.array_data = array_data
41 changes: 29 additions & 12 deletions openml/evaluations/functions.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import json
import xmltodict

from openml.exceptions import OpenMLServerNoResult
import openml.utils
import openml._api_calls
from ..evaluations import OpenMLEvaluation


def list_evaluations(function, offset=None, size=None, id=None, task=None,
setup=None, flow=None, uploader=None, tag=None):
setup=None, flow=None, uploader=None, tag=None,
per_fold=None):
"""
List all run-evaluation pairs matching all of the given filters.
(Supports large amount of results)
Expand All @@ -33,13 +34,19 @@ def list_evaluations(function, offset=None, size=None, id=None, task=None,

tag : str, optional

per_fold : bool, optional

Returns
-------
dict
"""
if per_fold is not None:
per_fold = str(per_fold).lower()

return openml.utils._list_all(_list_evaluations, function, offset=offset, size=size,
id=id, task=task, setup=setup, flow=flow, uploader=uploader, tag=tag)
return openml.utils._list_all(_list_evaluations, function, offset=offset,
size=size, id=id, task=task, setup=setup,
flow=flow, uploader=uploader, tag=tag,
per_fold=per_fold)


def _list_evaluations(function, id=None, task=None,
Expand Down Expand Up @@ -97,24 +104,34 @@ def __list_evaluations(api_call):
evals_dict = xmltodict.parse(xml_string, force_list=('oml:evaluation',))
# Minimalistic check if the XML is useful
if 'oml:evaluations' not in evals_dict:
raise ValueError('Error in return XML, does not contain "oml:evaluations": %s'
% str(evals_dict))
raise ValueError('Error in return XML, does not contain '
'"oml:evaluations": %s' % str(evals_dict))

assert type(evals_dict['oml:evaluations']['oml:evaluation']) == list, \
type(evals_dict['oml:evaluations'])

evals = dict()
for eval_ in evals_dict['oml:evaluations']['oml:evaluation']:
run_id = int(eval_['oml:run_id'])
value = None
values = None
array_data = None
if 'oml:value' in eval_:
value = float(eval_['oml:value'])
if 'oml:values' in eval_:
values = json.loads(eval_['oml:values'])
if 'oml:array_data' in eval_:
array_data = eval_['oml:array_data']

evals[run_id] = OpenMLEvaluation(int(eval_['oml:run_id']), int(eval_['oml:task_id']),
int(eval_['oml:setup_id']), int(eval_['oml:flow_id']),
eval_['oml:flow_name'], eval_['oml:data_id'],
eval_['oml:data_name'], eval_['oml:function'],
eval_['oml:upload_time'], float(eval_['oml:value']),
array_data)
evals[run_id] = OpenMLEvaluation(int(eval_['oml:run_id']),
int(eval_['oml:task_id']),
int(eval_['oml:setup_id']),
int(eval_['oml:flow_id']),
eval_['oml:flow_name'],
eval_['oml:data_id'],
eval_['oml:data_name'],
eval_['oml:function'],
eval_['oml:upload_time'],
value, values, array_data)

return evals
2 changes: 1 addition & 1 deletion openml/runs/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def _publish_flow_if_necessary(flow):
except OpenMLServerException as e:
if e.message == "flow already exists":
# TODO: JvR: the following lines of code can be replaced by
# a pass (after changing the unit test) as run_flow_on_task does
# a pass (after changing the unit tests) as run_flow_on_task does
# not longer rely on it
flow_id = openml.flows.flow_exists(flow.name,
flow.external_version)
Expand Down
48 changes: 45 additions & 3 deletions tests/test_evaluations/test_evaluation_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import openml.evaluations
from openml.testing import TestBase


class TestEvaluationFunctions(TestBase):
_multiprocess_can_split_ = True

Expand All @@ -15,6 +16,10 @@ def test_evaluation_list_filter_task(self):
self.assertGreater(len(evaluations), 100)
for run_id in evaluations.keys():
self.assertEquals(evaluations[run_id].task_id, task_id)
# default behaviour of this method: return aggregated results (not
# per fold)
self.assertIsNotNone(evaluations[run_id].value)
self.assertIsNone(evaluations[run_id].values)

def test_evaluation_list_filter_uploader_ID_16(self):
openml.config.server = self.production_server
Expand All @@ -23,7 +28,7 @@ def test_evaluation_list_filter_uploader_ID_16(self):

evaluations = openml.evaluations.list_evaluations("predictive_accuracy", uploader=[uploader_id])

self.assertGreater(len(evaluations), 100)
self.assertGreater(len(evaluations), 50)

def test_evaluation_list_filter_uploader_ID_10(self):
openml.config.server = self.production_server
Expand All @@ -32,9 +37,13 @@ def test_evaluation_list_filter_uploader_ID_10(self):

evaluations = openml.evaluations.list_evaluations("predictive_accuracy", setup=[setup_id])

self.assertGreater(len(evaluations), 100)
self.assertGreater(len(evaluations), 50)
for run_id in evaluations.keys():
self.assertEquals(evaluations[run_id].setup_id, setup_id)
# default behaviour of this method: return aggregated results (not
# per fold)
self.assertIsNotNone(evaluations[run_id].value)
self.assertIsNone(evaluations[run_id].values)

def test_evaluation_list_filter_flow(self):
openml.config.server = self.production_server
Expand All @@ -46,17 +55,25 @@ def test_evaluation_list_filter_flow(self):
self.assertGreater(len(evaluations), 2)
for run_id in evaluations.keys():
self.assertEquals(evaluations[run_id].flow_id, flow_id)
# default behaviour of this method: return aggregated results (not
# per fold)
self.assertIsNotNone(evaluations[run_id].value)
self.assertIsNone(evaluations[run_id].values)

def test_evaluation_list_filter_run(self):
openml.config.server = self.production_server

run_id = 1
run_id = 12

evaluations = openml.evaluations.list_evaluations("predictive_accuracy", id=[run_id])

self.assertEquals(len(evaluations), 1)
for run_id in evaluations.keys():
self.assertEquals(evaluations[run_id].run_id, run_id)
# default behaviour of this method: return aggregated results (not
# per fold)
self.assertIsNotNone(evaluations[run_id].value)
self.assertIsNone(evaluations[run_id].values)

def test_evaluation_list_limit(self):
openml.config.server = self.production_server
Expand All @@ -70,3 +87,28 @@ def test_list_evaluations_empty(self):
raise ValueError('UnitTest Outdated, got somehow results')

self.assertIsInstance(evaluations, dict)

def test_evaluation_list_per_fold(self):
openml.config.server = self.production_server
size = 1000
task_ids = [6]
uploader_ids = [1]
flow_ids = [6969]

evaluations = openml.evaluations.list_evaluations(
"predictive_accuracy", size=size, offset=0, task=task_ids,
flow=flow_ids, uploader=uploader_ids, per_fold=True)

self.assertEquals(len(evaluations), size)
for run_id in evaluations.keys():
self.assertIsNone(evaluations[run_id].value)
self.assertIsNotNone(evaluations[run_id].values)
# potentially we could also test array values, but these might be
# added in the future

evaluations = openml.evaluations.list_evaluations(
"predictive_accuracy", size=size, offset=0, task=task_ids,
flow=flow_ids, uploader=uploader_ids, per_fold=False)
for run_id in evaluations.keys():
self.assertIsNotNone(evaluations[run_id].value)
self.assertIsNone(evaluations[run_id].values)
2 changes: 1 addition & 1 deletion tests/test_runs/test_run_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -999,7 +999,7 @@ def _check_run(self, run):
def test_get_runs_list(self):
# TODO: comes from live, no such lists on test
openml.config.server = self.production_server
runs = openml.runs.list_runs(id=[2])
runs = openml.runs.list_runs(id=[2], show_errors=True)
self.assertEqual(len(runs), 1)
for rid in runs:
self._check_run(runs[rid])
Expand Down