From 5e0d214e8b5c0d41176ecec6333c3d7493223e3d Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 28 Apr 2021 16:26:26 +0200
Subject: [PATCH 1/5] Create a non-linear retry policy.
---
openml/_api_calls.py | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index aee67d8c6..a9c3c98af 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -3,7 +3,9 @@
import time
import hashlib
import logging
+import math
import pathlib
+import random
import requests
import urllib.parse
import xml
@@ -262,7 +264,10 @@ def _send_request(request_method, url, data, files=None, md5_checksum=None):
if retry_counter >= n_retries:
raise
else:
- time.sleep(retry_counter)
+ wait = retry_counter + (1 / (1 + math.exp(-(retry_counter - 6)))) * 20
+ variation = random.gauss(0, wait / 10)
+ wait_time = max(1.0, wait + variation)
+ time.sleep(wait_time)
if response is None:
raise ValueError("This should never happen!")
return response
From 761ad9d7f76e4eef78ab0d01ac946bb7a396b3c1 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 3 May 2021 10:21:09 +0200
Subject: [PATCH 2/5] Add retry_policy documentation
---
doc/progress.rst | 1 +
doc/usage.rst | 9 +++++++--
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/doc/progress.rst b/doc/progress.rst
index 8d3f4ec1d..22728f0a7 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -9,6 +9,7 @@ Changelog
0.12.2
~~~~~~
+* ADD #1065: Add a ``retry_policy`` configuration option that determines the frequency and number of times to attempt to retry server requests.
* DOC: Fixes a few broken links in the documentation.
* MAINT/DOC: Automatically check for broken external links when building the documentation.
* MAINT/DOC: Fail documentation building on warnings. This will make the documentation building
diff --git a/doc/usage.rst b/doc/usage.rst
index 7bf247f4d..10928fac0 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -52,9 +52,14 @@ which are separated by newlines. The following keys are defined:
* if set to ``True``, when ``run_flow_on_task`` or similar methods are called a lookup is performed to see if there already exists such a run on the server. If so, download those results instead.
* if not given, will default to ``True``.
+* retry_policy:
+ * Defines how to react when the server is unavailable or experiencing high load. It determines both how often to attempt to reconnect and how quickly to do so. Please don't use ``human`` in an automated script that you run more than one instance of, it might increase the time to complete your jobs and that of others.
+ * human: For people running openml in interactive fashion. Try only a few times, but in quick succession.
+ * robot: For people using openml in an automated fashion. Keep trying to reconnect for a longer time, quickly increasing the time between retries.
+
* connection_n_retries:
- * number of connection retries.
- * default: 2. Maximum number of retries: 20.
+ * number of connection retries
+ * default depends on retry_policy (5 for ``human``, 50 for ``robot``)
* verbosity:
* 0: normal output
From 40e349390fcca958eed531f62e1ca633e59b1383 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 3 May 2021 11:04:30 +0200
Subject: [PATCH 3/5] Remove max_retries, add retry_policy to config
Retry_policy is not yet used to affect retry frequency.
Max_retries is removed because it's mostly useless, server overload is
caused by many users (or parallel scripts) not a single user
reconnecting. The retry policies will have acceptable times between
tries either way.
---
doc/usage.rst | 2 +-
openml/_api_calls.py | 2 +-
openml/cli.py | 10 +++----
openml/config.py | 46 ++++++++++++++++++++------------
tests/test_openml/test_config.py | 4 +--
5 files changed, 37 insertions(+), 27 deletions(-)
diff --git a/doc/usage.rst b/doc/usage.rst
index 10928fac0..d9212099d 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -54,7 +54,7 @@ which are separated by newlines. The following keys are defined:
* retry_policy:
* Defines how to react when the server is unavailable or experiencing high load. It determines both how often to attempt to reconnect and how quickly to do so. Please don't use ``human`` in an automated script that you run more than one instance of, it might increase the time to complete your jobs and that of others.
- * human: For people running openml in interactive fashion. Try only a few times, but in quick succession.
+ * human (default): For people running openml in interactive fashion. Try only a few times, but in quick succession.
* robot: For people using openml in an automated fashion. Keep trying to reconnect for a longer time, quickly increasing the time between retries.
* connection_n_retries:
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index a9c3c98af..5526cdd97 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -219,7 +219,7 @@ def __is_checksum_equal(downloaded_file, md5_checksum=None):
def _send_request(request_method, url, data, files=None, md5_checksum=None):
- n_retries = max(1, min(config.connection_n_retries, config.max_retries))
+ n_retries = max(1, config.connection_n_retries)
response = None
with requests.Session() as session:
diff --git a/openml/cli.py b/openml/cli.py
index b26e67d2e..ded1be54c 100644
--- a/openml/cli.py
+++ b/openml/cli.py
@@ -149,11 +149,9 @@ def check_cache_dir(path: str) -> str:
def configure_connection_n_retries(value: str) -> None:
def valid_connection_retries(n: str) -> str:
if not n.isdigit():
- return f"Must be an integer number (smaller than {config.max_retries})."
- if int(n) > config.max_retries:
- return f"connection_n_retries may not exceed {config.max_retries}."
- if int(n) == 0:
- return "connection_n_retries must be non-zero."
+ return f"'{n}' is not a valid positive integer."
+ if int(n) <= 0:
+ return "connection_n_retries must be positive."
return ""
configure_field(
@@ -161,7 +159,7 @@ def valid_connection_retries(n: str) -> str:
value=value,
check_with_message=valid_connection_retries,
intro_message="Configuring the number of times to attempt to connect to the OpenML Server",
- input_message=f"Enter an integer between 0 and {config.max_retries}: ",
+ input_message="Enter a positive integer: ",
)
diff --git a/openml/config.py b/openml/config.py
index 7295ea82e..50e635761 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -9,7 +9,7 @@
import os
from pathlib import Path
import platform
-from typing import Tuple, cast, Any
+from typing import Tuple, cast, Any, Optional
from io import StringIO
import configparser
@@ -94,11 +94,10 @@ def set_file_log_level(file_output_level: int):
else os.path.join("~", ".openml")
),
"avoid_duplicate_runs": "True",
- "connection_n_retries": "10",
- "max_retries": "20",
+ "retry_policy": "human",
+ "connection_n_retries": "5",
}
-
# Default values are actually added here in the _setup() function which is
# called at the end of this module
server = str(_defaults["server"]) # so mypy knows it is a string
@@ -121,9 +120,26 @@ def get_server_base_url() -> str:
cache_directory = str(_defaults["cachedir"]) # so mypy knows it is a string
avoid_duplicate_runs = True if _defaults["avoid_duplicate_runs"] == "True" else False
-# Number of retries if the connection breaks
+retry_policy = _defaults["retry_policy"]
connection_n_retries = int(_defaults["connection_n_retries"])
-max_retries = int(_defaults["max_retries"])
+
+
+def set_retry_policy(value: str, n_retries: Optional[int] = None) -> None:
+ global retry_policy
+ global connection_n_retries
+ default_retries_by_policy = dict(human=5, robot=50)
+
+ if value not in default_retries_by_policy:
+ raise ValueError(
+ f"Detected retry_policy '{value}' but must be one of {default_retries_by_policy}"
+ )
+ if n_retries is not None and not isinstance(n_retries, int):
+ raise TypeError(f"`n_retries` must be of type `int` or `None` but is `{type(n_retries)}`.")
+ if isinstance(n_retries, int) and n_retries < 1:
+ raise ValueError(f"`n_retries` is '{n_retries}' but must be positive.")
+
+ retry_policy = value
+ connection_n_retries = default_retries_by_policy[value] if n_retries is None else n_retries
class ConfigurationForExamples:
@@ -200,8 +216,6 @@ def _setup(config=None):
global server
global cache_directory
global avoid_duplicate_runs
- global connection_n_retries
- global max_retries
config_file = determine_config_file_path()
config_dir = config_file.parent
@@ -233,8 +247,12 @@ def _get(config, key):
apikey = _get(config, "apikey")
server = _get(config, "server")
short_cache_dir = _get(config, "cachedir")
- connection_n_retries = int(_get(config, "connection_n_retries"))
- max_retries = int(_get(config, "max_retries"))
+
+ n_retries = _get(config, "connection_n_retries")
+ if n_retries is not None:
+ n_retries = int(n_retries)
+
+ set_retry_policy(_get(config, "retry_policy"), n_retries)
cache_directory = os.path.expanduser(short_cache_dir)
# create the cache subdirectory
@@ -256,12 +274,6 @@ def _get(config, key):
"not working properly." % config_dir
)
- if connection_n_retries > max_retries:
- raise ValueError(
- "A higher number of retries than {} is not allowed to keep the "
- "server load reasonable".format(max_retries)
- )
-
def set_field_in_config_file(field: str, value: Any):
""" Overwrites the `field` in the configuration file with the new `value`. """
@@ -312,7 +324,7 @@ def get_config_as_dict():
config["cachedir"] = cache_directory
config["avoid_duplicate_runs"] = avoid_duplicate_runs
config["connection_n_retries"] = connection_n_retries
- config["max_retries"] = max_retries
+ config["retry_policy"] = retry_policy
return config
diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py
index 2e2c609db..415dd5a2e 100644
--- a/tests/test_openml/test_config.py
+++ b/tests/test_openml/test_config.py
@@ -45,7 +45,7 @@ def test_get_config_as_dict(self):
_config["cachedir"] = self.workdir
_config["avoid_duplicate_runs"] = False
_config["connection_n_retries"] = 10
- _config["max_retries"] = 20
+ _config["retry_policy"] = "human"
self.assertIsInstance(config, dict)
self.assertEqual(len(config), 6)
self.assertDictEqual(config, _config)
@@ -57,8 +57,8 @@ def test_setup_with_config(self):
_config["server"] = "https://www.openml.org/api/v1/xml"
_config["cachedir"] = self.workdir
_config["avoid_duplicate_runs"] = True
+ _config["retry_policy"] = "human"
_config["connection_n_retries"] = 100
- _config["max_retries"] = 1000
orig_config = openml.config.get_config_as_dict()
openml.config._setup(_config)
updated_config = openml.config.get_config_as_dict()
From 9f522f34ce20cbc29bff41dcafa5ecd681f951d8 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 3 May 2021 11:18:19 +0200
Subject: [PATCH 4/5] Add retry_policy so cli config
---
openml/cli.py | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/openml/cli.py b/openml/cli.py
index ded1be54c..15654cfc6 100644
--- a/openml/cli.py
+++ b/openml/cli.py
@@ -215,6 +215,35 @@ def is_zero_through_two(verbosity: str) -> str:
)
+def configure_retry_policy(value: str) -> None:
+ def is_known_policy(policy: str) -> str:
+ if policy in ["human", "robot"]:
+ return ""
+ return "Must be 'human' or 'robot'."
+
+ def autocomplete_policy(policy: str) -> str:
+ for option in ["human", "robot"]:
+ if option.startswith(policy.lower()):
+ return option
+ return policy
+
+ intro_message = (
+ "Set the retry policy which determines how to react if the server is unresponsive."
+ "We recommend 'human' for interactive usage and 'robot' for scripts."
+ "'human': try a few times in quick succession, less reliable but quicker response."
+ "'robot': try many times with increasing intervals, more reliable but slower response."
+ )
+
+ configure_field(
+ field="retry_policy",
+ value=value,
+ check_with_message=is_known_policy,
+ intro_message=intro_message,
+ input_message="Enter 'human' or 'robot': ",
+ sanitize=autocomplete_policy,
+ )
+
+
def configure_field(
field: str,
value: Union[None, str],
@@ -270,6 +299,7 @@ def configure(args: argparse.Namespace):
"apikey": configure_apikey,
"server": configure_server,
"cachedir": configure_cachedir,
+ "retry_policy": configure_retry_policy,
"connection_n_retries": configure_connection_n_retries,
"avoid_duplicate_runs": configure_avoid_duplicate_runs,
"verbosity": configure_verbosity,
From 94927bca19ddededf975f36ae0ebd994ab658bf0 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 6 May 2021 11:38:01 +0200
Subject: [PATCH 5/5] Use policy to determine wait time
Some unit tests needed to be updated.
In particular for the hash exception unit test, because we expect a hash
exception, we don't want to use the ``robot`` policy as that will make
it query and wait unnecessarily.
---
openml/_api_calls.py | 15 +++++++++++----
openml/testing.py | 4 +++-
tests/test_datasets/test_dataset_functions.py | 5 +++++
tests/test_openml/test_api_calls.py | 2 +-
tests/test_openml/test_config.py | 4 ++--
5 files changed, 22 insertions(+), 8 deletions(-)
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index 2a7532cfd..b5ed976bc 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -263,10 +263,17 @@ def _send_request(request_method, url, data, files=None, md5_checksum=None):
if retry_counter >= n_retries:
raise
else:
- wait = retry_counter + (1 / (1 + math.exp(-(retry_counter - 6)))) * 20
- variation = random.gauss(0, wait / 10)
- wait_time = max(1.0, wait + variation)
- time.sleep(wait_time)
+
+ def robot(n: int) -> float:
+ wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60
+ variation = random.gauss(0, wait / 10)
+ return max(1.0, wait + variation)
+
+ def human(n: int) -> float:
+ return max(1.0, n)
+
+ delay = {"human": human, "robot": robot}[config.retry_policy](retry_counter)
+ time.sleep(delay)
if response is None:
raise ValueError("This should never happen!")
return response
diff --git a/openml/testing.py b/openml/testing.py
index f8e22bb4c..922d373b2 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -94,8 +94,9 @@ def setUp(self, n_levels: int = 1):
openml.config.cache_directory = self.workdir
# Increase the number of retries to avoid spurious server failures
+ self.retry_policy = openml.config.retry_policy
self.connection_n_retries = openml.config.connection_n_retries
- openml.config.connection_n_retries = 10
+ openml.config.set_retry_policy("robot", n_retries=20)
def tearDown(self):
os.chdir(self.cwd)
@@ -109,6 +110,7 @@ def tearDown(self):
raise
openml.config.server = self.production_server
openml.config.connection_n_retries = self.connection_n_retries
+ openml.config.retry_policy = self.retry_policy
@classmethod
def _mark_entity_for_removal(self, entity_type, entity_id):
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index ec9dd6c53..9d67ee177 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -506,6 +506,9 @@ def test__getarff_md5_issue(self):
"oml:md5_checksum": "abc",
"oml:url": "https://www.openml.org/data/download/61",
}
+ n = openml.config.connection_n_retries
+ openml.config.connection_n_retries = 1
+
self.assertRaisesRegex(
OpenMLHashException,
"Checksum of downloaded file is unequal to the expected checksum abc when downloading "
@@ -514,6 +517,8 @@ def test__getarff_md5_issue(self):
description,
)
+ openml.config.connection_n_retries = n
+
def test__get_dataset_features(self):
features_file = _get_dataset_features_file(self.workdir, 2)
self.assertIsInstance(features_file, str)
diff --git a/tests/test_openml/test_api_calls.py b/tests/test_openml/test_api_calls.py
index 459a0cdf5..16bdbc7df 100644
--- a/tests/test_openml/test_api_calls.py
+++ b/tests/test_openml/test_api_calls.py
@@ -29,4 +29,4 @@ def test_retry_on_database_error(self, Session_class_mock, _):
):
openml._api_calls._send_request("get", "/abc", {})
- self.assertEqual(Session_class_mock.return_value.__enter__.return_value.get.call_count, 10)
+ self.assertEqual(Session_class_mock.return_value.__enter__.return_value.get.call_count, 20)
diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py
index 415dd5a2e..638f02420 100644
--- a/tests/test_openml/test_config.py
+++ b/tests/test_openml/test_config.py
@@ -44,8 +44,8 @@ def test_get_config_as_dict(self):
_config["server"] = "https://test.openml.org/api/v1/xml"
_config["cachedir"] = self.workdir
_config["avoid_duplicate_runs"] = False
- _config["connection_n_retries"] = 10
- _config["retry_policy"] = "human"
+ _config["connection_n_retries"] = 20
+ _config["retry_policy"] = "robot"
self.assertIsInstance(config, dict)
self.assertEqual(len(config), 6)
self.assertDictEqual(config, _config)