Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 44 additions & 37 deletions rosette/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,11 @@

"""
Python client for the Rosette API.

Copyright (c) 2014-2015 Basis Technology Corporation.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand All @@ -24,10 +21,16 @@
import logging
import sys
import pprint
from datetime import datetime

_ACCEPTABLE_SERVER_VERSION = "0.5"
_GZIP_BYTEARRAY = bytearray([0x1F, 0x8b, 0x08])
N_RETRIES = 1
N_RETRIES = 3
HTTP_CONNECTION = None
REUSE_CONNECTION = True
CONNECTION_TYPE = ""
CONNECTION_START = datetime.now()
CONNECTION_REFRESH_DURATION = 86400


_IsPy3 = sys.version_info[0] == 3
Expand Down Expand Up @@ -67,22 +70,39 @@ def _my_loads(obj):


def _retrying_request(op, url, data, headers):
global HTTP_CONNECTION
global REUSE_CONNECTION
global CONNECTION_TYPE
global CONNECTION_START
global CONNECTION_REFRESH_DURATION

timeDelta = datetime.now() - CONNECTION_START
totalTime = timeDelta.days * 86400 + timeDelta.seconds
parsed = urlparse.urlparse(url)
if parsed.scheme != CONNECTION_TYPE:
totalTime = CONNECTION_REFRESH_DURATION

if not REUSE_CONNECTION or HTTP_CONNECTION is None or totalTime >= CONNECTION_REFRESH_DURATION:
parsed = urlparse.urlparse(url)
loc = parsed.netloc
CONNECTION_TYPE = parsed.scheme
CONNECTION_START = datetime.now()
if parsed.scheme == "https":
HTTP_CONNECTION = httplib.HTTPSConnection(loc)
else:
HTTP_CONNECTION = httplib.HTTPConnection(loc)

message = None
code = "unknownError"
parsed = urlparse.urlparse(url)
loc = parsed.netloc
if parsed.scheme == "https":
conn = httplib.HTTPSConnection(loc)
else:
conn = httplib.HTTPConnection(loc)
rdata = None
for i in range(N_RETRIES + 1):
conn.request(op, url, data, headers)
response = conn.getresponse()
HTTP_CONNECTION.request(op, url, data, headers)
response = HTTP_CONNECTION.getresponse()
status = response.status
rdata = response.read()
if status < 500:
conn.close()
if not REUSE_CONNECTION:
HTTP_CONNECTION.close()
return rdata, status
if rdata is not None:
try:
Expand All @@ -93,12 +113,13 @@ def _retrying_request(op, url, data, headers):
code = the_json["code"]
except:
pass
conn.close()
# Do not wait to retry -- the model is that a bunch of dynamically-routed
# resources has failed -- Retry means some other set of servelets and their
# underlings will be called up, and maybe they'll do better.
# This will not help with a persistent or impassible delay situation,
# but the former case is thought to be more likely.
if not REUSE_CONNECTION:
HTTP_CONECTION.close()

if message is None:
message = "A retryable network operation has not succeeded after " + str(N_RETRIES) + " attempts"
Expand Down Expand Up @@ -136,7 +157,6 @@ def add_query(orig_url, key, value):

class RosetteException(Exception):
"""Exception thrown by all Rosette API operations for errors local and remote.

TBD. Right now, the only valid operation is conversion to __str__.
"""

Expand Down Expand Up @@ -253,9 +273,7 @@ class DocumentParameters(_DocumentParamSetBase):
convenience instance methods L{DocumentParameters.load_document_file}
and L{DocumentParameters.load_document_string}. The unit size and
data format are defaulted to L{InputUnit.DOC} and L{DataFormat.SIMPLE}.

Using subscripts instead of instance variables facilitates diagnosis.

If the field C{contentUri} is set to the URL of a web page (only
protocols C{http, https, ftp, ftps} are accepted), the server will
fetch the content from that web page. In this case, neither C{content}
Expand Down Expand Up @@ -334,21 +352,13 @@ class NameTranslationParameters(_DocumentParamSetBase):
All are optional except C{name} and C{targetLanguage}. Scripts are in
ISO15924 codes, and languages in ISO639 (two- or three-letter) codes. See the Name Translation documentation for
more description of these terms, as well as the content of the return result.

C{name} The name to be translated.

C{targetLangauge} The language into which the name is to be translated.

C{entityType} The entity type (TBD) of the name.

C{sourceLanguageOfOrigin} The language of origin of the name.

C{sourceLanguageOfUse} The language of use of the name.

C{sourceScript} The script in which the name is supplied.

C{targetScript} The script into which the name should be translated.

C{targetScheme} The transliteration scheme by which the translated name should be rendered.
"""

Expand All @@ -366,19 +376,12 @@ def validate(self):
class NameMatchingParameters(_DocumentParamSetBase):
"""Parameter object for C{matched_name} endpoint.
All are required.

C{name1} The name to be matched, a C{name} object.

C{name2} The name to be matched, a C{name} object.

The C{name} object contains these fields:

C{text} Text of the name, required.

C{language} Language of the name in ISO639 three-letter code, optional.

C{script} The ISO15924 code of the name, optional.

C{entityType} The entity type, can be "PERSON", "LOCATION" or "ORGANIZATION", optional.
"""

Expand All @@ -399,11 +402,9 @@ class EndpointCaller:
of the Rosette server, specified at its creation. Use the specific
instance methods of the L{API} object to create L{EndpointCaller} objects bound to
corresponding endpoints.

Use L{EndpointCaller.ping} to ping, and L{EndpointCaller.info} to retrieve server info.
For all other types of requests, use L{EndpointCaller.call}, which accepts
an argument specifying the data to be processed and certain metadata.

The results of all operations are returned as python dictionaries, whose
keys and values correspond exactly to those of the corresponding
JSON return value described in the Rosette web service documentation.
Expand Down Expand Up @@ -489,11 +490,9 @@ def call(self, parameters):
endpoints except C{translated_name} and C{matched_name}, it must be a L{DocumentParameters}
object; for C{translated_name}, it must be an L{NameTranslationParameters} object;
for C{matched_name}, it must be an L{NameMatchingParameters} object.

In all cases, the result is returned as a python dictionary
conforming to the JSON object described in the endpoint's entry
in the Rosette web service documentation.

@param parameters: An object specifying the data,
and possible metadata, to be processed by the endpoint. See the
details for those object types.
Expand Down Expand Up @@ -525,7 +524,7 @@ class API:
Call instance methods upon this object to obtain L{EndpointCaller} objects
which can communicate with particular Rosette server endpoints.
"""
def __init__(self, user_key=None, service_url='https://api.rosette.com/rest/v1', retries=1, debug=False):
def __init__(self, user_key=None, service_url='https://api.rosette.com/rest/v1', retries=3, reuse_connection=True, refresh_duration=86400, debug=False):
""" Create an L{API} object.
@param user_key: (Optional; required for servers requiring authentication.) An authentication string to be sent
as user_key with all requests. The default Rosette server requires authentication.
Expand All @@ -540,10 +539,18 @@ def __init__(self, user_key=None, service_url='https://api.rosette.com/rest/v1',
self.debug = debug
self.useMultipart = False
self.version_checked = False

global N_RETRIES
global REUSE_CONNECTION
global CONNECTION_REFRESH_DURATION

if (retries < 1):
retries = 1
if refresh_duration < 60:
refresh_duration = 60
N_RETRIES = retries
REUSE_CONNECTION = reuse_connection
CONNECTION_REFRESH_DURATION = refresh_duration

def check_version(self):
if self.version_checked:
Expand Down
Loading