From 5359f3ad081abbec16d8e09d8dbc61ab4bb53ad4 Mon Sep 17 00:00:00 2001 From: janvanrijn Date: Tue, 22 Jan 2019 18:44:56 -0500 Subject: [PATCH 1/3] added ability to obtain per fold evaluation measures --- openml/evaluations/evaluation.py | 9 ++-- openml/evaluations/functions.py | 42 +++++++++++----- openml/runs/functions.py | 2 +- .../test_evaluation_functions.py | 48 +++++++++++++++++-- 4 files changed, 82 insertions(+), 19 deletions(-) diff --git a/openml/evaluations/evaluation.py b/openml/evaluations/evaluation.py index 70acf0029..f297d7054 100644 --- a/openml/evaluations/evaluation.py +++ b/openml/evaluations/evaluation.py @@ -1,6 +1,6 @@ class OpenMLEvaluation(object): - ''' + """ Contains all meta-information about a run / evaluation combination, according to the evaluation/list function @@ -26,11 +26,13 @@ class OpenMLEvaluation(object): the time of evaluation value : float the value of this evaluation + values : List[float] + the values per repeat and fold (if requested) array_data : str list of information per class (e.g., in case of precision, auroc, recall) - ''' + """ def __init__(self, run_id, task_id, setup_id, flow_id, flow_name, - data_id, data_name, function, upload_time, value, + data_id, data_name, function, upload_time, value, values, array_data=None): self.run_id = run_id self.task_id = task_id @@ -42,4 +44,5 @@ def __init__(self, run_id, task_id, setup_id, flow_id, flow_name, self.function = function self.upload_time = upload_time self.value = value + self.values = values self.array_data = array_data diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py index a7691a72e..88916026d 100644 --- a/openml/evaluations/functions.py +++ b/openml/evaluations/functions.py @@ -1,13 +1,14 @@ +import csv import xmltodict -from openml.exceptions import OpenMLServerNoResult import openml.utils import openml._api_calls from ..evaluations import OpenMLEvaluation def list_evaluations(function, offset=None, size=None, id=None, task=None, - setup=None, flow=None, uploader=None, tag=None): + setup=None, flow=None, uploader=None, tag=None, + per_fold=None): """ List all run-evaluation pairs matching all of the given filters. (Supports large amount of results) @@ -33,13 +34,19 @@ def list_evaluations(function, offset=None, size=None, id=None, task=None, tag : str, optional + per_fold : bool, optional + Returns ------- dict """ + if per_fold is not None: + per_fold = str(per_fold).lower() - return openml.utils._list_all(_list_evaluations, function, offset=offset, size=size, - id=id, task=task, setup=setup, flow=flow, uploader=uploader, tag=tag) + return openml.utils._list_all(_list_evaluations, function, offset=offset, + size=size, id=id, task=task, setup=setup, + flow=flow, uploader=uploader, tag=tag, + per_fold=per_fold) def _list_evaluations(function, id=None, task=None, @@ -94,11 +101,12 @@ def _list_evaluations(function, id=None, task=None, def __list_evaluations(api_call): """Helper function to parse API calls which are lists of runs""" xml_string = openml._api_calls._perform_api_call(api_call) + print(xml_string) evals_dict = xmltodict.parse(xml_string, force_list=('oml:evaluation',)) # Minimalistic check if the XML is useful if 'oml:evaluations' not in evals_dict: - raise ValueError('Error in return XML, does not contain "oml:evaluations": %s' - % str(evals_dict)) + raise ValueError('Error in return XML, does not contain ' + '"oml:evaluations": %s' % str(evals_dict)) assert type(evals_dict['oml:evaluations']['oml:evaluation']) == list, \ type(evals_dict['oml:evaluations']) @@ -106,15 +114,25 @@ def __list_evaluations(api_call): evals = dict() for eval_ in evals_dict['oml:evaluations']['oml:evaluation']: run_id = int(eval_['oml:run_id']) + value = None + values = None array_data = None + if 'oml:value' in eval_: + value = float(eval_['oml:value']) + if 'oml:values' in eval_: + values = csv.reader(eval_['oml:values']) if 'oml:array_data' in eval_: array_data = eval_['oml:array_data'] - evals[run_id] = OpenMLEvaluation(int(eval_['oml:run_id']), int(eval_['oml:task_id']), - int(eval_['oml:setup_id']), int(eval_['oml:flow_id']), - eval_['oml:flow_name'], eval_['oml:data_id'], - eval_['oml:data_name'], eval_['oml:function'], - eval_['oml:upload_time'], float(eval_['oml:value']), - array_data) + evals[run_id] = OpenMLEvaluation(int(eval_['oml:run_id']), + int(eval_['oml:task_id']), + int(eval_['oml:setup_id']), + int(eval_['oml:flow_id']), + eval_['oml:flow_name'], + eval_['oml:data_id'], + eval_['oml:data_name'], + eval_['oml:function'], + eval_['oml:upload_time'], + value, values, array_data) return evals diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 9dcb96a42..5dbfe1948 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -179,7 +179,7 @@ def _publish_flow_if_necessary(flow): except OpenMLServerException as e: if e.message == "flow already exists": # TODO: JvR: the following lines of code can be replaced by - # a pass (after changing the unit test) as run_flow_on_task does + # a pass (after changing the unit tests) as run_flow_on_task does # not longer rely on it flow_id = openml.flows.flow_exists(flow.name, flow.external_version) diff --git a/tests/test_evaluations/test_evaluation_functions.py b/tests/test_evaluations/test_evaluation_functions.py index be55c2cd8..598655de9 100644 --- a/tests/test_evaluations/test_evaluation_functions.py +++ b/tests/test_evaluations/test_evaluation_functions.py @@ -2,6 +2,7 @@ import openml.evaluations from openml.testing import TestBase + class TestEvaluationFunctions(TestBase): _multiprocess_can_split_ = True @@ -15,6 +16,10 @@ def test_evaluation_list_filter_task(self): self.assertGreater(len(evaluations), 100) for run_id in evaluations.keys(): self.assertEquals(evaluations[run_id].task_id, task_id) + # default behaviour of this method: return aggregated results (not + # per fold) + self.assertIsNotNone(evaluations[run_id].value) + self.assertIsNone(evaluations[run_id].values) def test_evaluation_list_filter_uploader_ID_16(self): openml.config.server = self.production_server @@ -23,7 +28,7 @@ def test_evaluation_list_filter_uploader_ID_16(self): evaluations = openml.evaluations.list_evaluations("predictive_accuracy", uploader=[uploader_id]) - self.assertGreater(len(evaluations), 100) + self.assertGreater(len(evaluations), 50) def test_evaluation_list_filter_uploader_ID_10(self): openml.config.server = self.production_server @@ -32,9 +37,13 @@ def test_evaluation_list_filter_uploader_ID_10(self): evaluations = openml.evaluations.list_evaluations("predictive_accuracy", setup=[setup_id]) - self.assertGreater(len(evaluations), 100) + self.assertGreater(len(evaluations), 50) for run_id in evaluations.keys(): self.assertEquals(evaluations[run_id].setup_id, setup_id) + # default behaviour of this method: return aggregated results (not + # per fold) + self.assertIsNotNone(evaluations[run_id].value) + self.assertIsNone(evaluations[run_id].values) def test_evaluation_list_filter_flow(self): openml.config.server = self.production_server @@ -46,17 +55,25 @@ def test_evaluation_list_filter_flow(self): self.assertGreater(len(evaluations), 2) for run_id in evaluations.keys(): self.assertEquals(evaluations[run_id].flow_id, flow_id) + # default behaviour of this method: return aggregated results (not + # per fold) + self.assertIsNotNone(evaluations[run_id].value) + self.assertIsNone(evaluations[run_id].values) def test_evaluation_list_filter_run(self): openml.config.server = self.production_server - run_id = 1 + run_id = 12 evaluations = openml.evaluations.list_evaluations("predictive_accuracy", id=[run_id]) self.assertEquals(len(evaluations), 1) for run_id in evaluations.keys(): self.assertEquals(evaluations[run_id].run_id, run_id) + # default behaviour of this method: return aggregated results (not + # per fold) + self.assertIsNotNone(evaluations[run_id].value) + self.assertIsNone(evaluations[run_id].values) def test_evaluation_list_limit(self): openml.config.server = self.production_server @@ -70,3 +87,28 @@ def test_list_evaluations_empty(self): raise ValueError('UnitTest Outdated, got somehow results') self.assertIsInstance(evaluations, dict) + + def test_evaluation_list_per_fold(self): + openml.config.server = self.production_server + size = 1000 + task_ids = [6] + uploader_ids = [1] + flow_ids = [6969] + + evaluations = openml.evaluations.list_evaluations( + "predictive_accuracy", size=size, offset=0, task=task_ids, + flow=flow_ids, uploader=uploader_ids, per_fold=True) + + self.assertEquals(len(evaluations), size) + for run_id in evaluations.keys(): + self.assertIsNone(evaluations[run_id].value) + self.assertIsNotNone(evaluations[run_id].values) + # potentially we could also test array values, but these might be + # added in the future + + evaluations = openml.evaluations.list_evaluations( + "predictive_accuracy", size=size, offset=0, task=task_ids, + flow=flow_ids, uploader=uploader_ids, per_fold=False) + for run_id in evaluations.keys(): + self.assertIsNotNone(evaluations[run_id].value) + self.assertIsNone(evaluations[run_id].values) From 1884c6c5894803e371d2744266c33a81ab7ed7d4 Mon Sep 17 00:00:00 2001 From: janvanrijn Date: Wed, 23 Jan 2019 15:17:38 -0500 Subject: [PATCH 2/3] added json loads --- openml/evaluations/functions.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py index 88916026d..02a3152bb 100644 --- a/openml/evaluations/functions.py +++ b/openml/evaluations/functions.py @@ -1,4 +1,4 @@ -import csv +import json import xmltodict import openml.utils @@ -101,7 +101,6 @@ def _list_evaluations(function, id=None, task=None, def __list_evaluations(api_call): """Helper function to parse API calls which are lists of runs""" xml_string = openml._api_calls._perform_api_call(api_call) - print(xml_string) evals_dict = xmltodict.parse(xml_string, force_list=('oml:evaluation',)) # Minimalistic check if the XML is useful if 'oml:evaluations' not in evals_dict: @@ -120,7 +119,7 @@ def __list_evaluations(api_call): if 'oml:value' in eval_: value = float(eval_['oml:value']) if 'oml:values' in eval_: - values = csv.reader(eval_['oml:values']) + values = json.loads(eval_['oml:values']) if 'oml:array_data' in eval_: array_data = eval_['oml:array_data'] From 1457bc052a85c749f8af1ae7dc0f542e53074b4f Mon Sep 17 00:00:00 2001 From: janvanrijn Date: Mon, 11 Feb 2019 15:52:34 +0100 Subject: [PATCH 3/3] updated unit test --- tests/test_runs/test_run_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 0c983d861..1bee66d3d 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -999,7 +999,7 @@ def _check_run(self, run): def test_get_runs_list(self): # TODO: comes from live, no such lists on test openml.config.server = self.production_server - runs = openml.runs.list_runs(id=[2]) + runs = openml.runs.list_runs(id=[2], show_errors=True) self.assertEqual(len(runs), 1) for rid in runs: self._check_run(runs[rid])