diff --git a/.cspell.json b/.cspell.json index 89cde1ce77..486b63295a 100644 --- a/.cspell.json +++ b/.cspell.json @@ -66,6 +66,7 @@ "emscripten", "excs", "finalizer", + "finalizers", "GetSet", "groupref", "internable", @@ -117,11 +118,13 @@ "sysmodule", "tracebacks", "typealiases", + "uncollectable", "unhashable", "uninit", "unraisable", "unresizable", "wasi", + "weaked", "zelf", // unix "posixshmem", diff --git a/Cargo.lock b/Cargo.lock index 1ce839a96d..95cbd94b52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3006,6 +3006,7 @@ dependencies = [ "ascii", "bitflags 2.10.0", "cfg-if", + "crossbeam-epoch", "getrandom 0.3.4", "itertools 0.14.0", "libc", diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py index 3e98489419..abb748519c 100644 --- a/Lib/_opcode_metadata.py +++ b/Lib/_opcode_metadata.py @@ -136,10 +136,102 @@ 'JUMP_IF_FALSE_OR_POP': 129, 'JUMP_IF_TRUE_OR_POP': 130, 'JUMP_IF_NOT_EXC_MATCH': 131, - 'SET_EXC_INFO': 134, - 'SUBSCRIPT': 135, + 'SET_EXC_INFO': 132, + 'SUBSCRIPT': 133, 'RESUME': 149, - 'LOAD_CLOSURE': 253, + 'BINARY_OP_ADD_FLOAT': 150, + 'BINARY_OP_ADD_INT': 151, + 'BINARY_OP_ADD_UNICODE': 152, + 'BINARY_OP_MULTIPLY_FLOAT': 153, + 'BINARY_OP_MULTIPLY_INT': 154, + 'BINARY_OP_SUBTRACT_FLOAT': 155, + 'BINARY_OP_SUBTRACT_INT': 156, + 'BINARY_SUBSCR_DICT': 157, + 'BINARY_SUBSCR_GETITEM': 158, + 'BINARY_SUBSCR_LIST_INT': 159, + 'BINARY_SUBSCR_STR_INT': 160, + 'BINARY_SUBSCR_TUPLE_INT': 161, + 'CALL_ALLOC_AND_ENTER_INIT': 162, + 'CALL_BOUND_METHOD_EXACT_ARGS': 163, + 'CALL_BOUND_METHOD_GENERAL': 164, + 'CALL_BUILTIN_CLASS': 165, + 'CALL_BUILTIN_FAST': 166, + 'CALL_BUILTIN_FAST_WITH_KEYWORDS': 167, + 'CALL_BUILTIN_O': 168, + 'CALL_ISINSTANCE': 169, + 'CALL_LEN': 170, + 'CALL_LIST_APPEND': 171, + 'CALL_METHOD_DESCRIPTOR_FAST': 172, + 'CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS': 173, + 'CALL_METHOD_DESCRIPTOR_NOARGS': 174, + 'CALL_METHOD_DESCRIPTOR_O': 175, + 'CALL_NON_PY_GENERAL': 176, + 'CALL_PY_EXACT_ARGS': 177, + 'CALL_PY_GENERAL': 178, + 'CALL_STR_1': 179, + 'CALL_TUPLE_1': 180, + 'CALL_TYPE_1': 181, + 'COMPARE_OP_FLOAT': 182, + 'COMPARE_OP_INT': 183, + 'COMPARE_OP_STR': 184, + 'CONTAINS_OP_DICT': 185, + 'CONTAINS_OP_SET': 186, + 'FOR_ITER_GEN': 187, + 'FOR_ITER_LIST': 188, + 'FOR_ITER_RANGE': 189, + 'FOR_ITER_TUPLE': 190, + 'LOAD_ATTR_CLASS': 191, + 'LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN': 192, + 'LOAD_ATTR_INSTANCE_VALUE': 193, + 'LOAD_ATTR_METHOD_LAZY_DICT': 194, + 'LOAD_ATTR_METHOD_NO_DICT': 195, + 'LOAD_ATTR_METHOD_WITH_VALUES': 196, + 'LOAD_ATTR_MODULE': 197, + 'LOAD_ATTR_NONDESCRIPTOR_NO_DICT': 198, + 'LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES': 199, + 'LOAD_ATTR_PROPERTY': 200, + 'LOAD_ATTR_SLOT': 201, + 'LOAD_ATTR_WITH_HINT': 202, + 'LOAD_GLOBAL_BUILTIN': 203, + 'LOAD_GLOBAL_MODULE': 204, + 'LOAD_SUPER_ATTR_ATTR': 205, + 'LOAD_SUPER_ATTR_METHOD': 206, + 'RESUME_CHECK': 207, + 'SEND_GEN': 208, + 'STORE_ATTR_INSTANCE_VALUE': 209, + 'STORE_ATTR_SLOT': 210, + 'STORE_ATTR_WITH_HINT': 211, + 'STORE_SUBSCR_DICT': 212, + 'STORE_SUBSCR_LIST_INT': 213, + 'TO_BOOL_ALWAYS_TRUE': 214, + 'TO_BOOL_BOOL': 215, + 'TO_BOOL_INT': 216, + 'TO_BOOL_LIST': 217, + 'TO_BOOL_NONE': 218, + 'TO_BOOL_STR': 219, + 'UNPACK_SEQUENCE_LIST': 220, + 'UNPACK_SEQUENCE_TUPLE': 221, + 'UNPACK_SEQUENCE_TWO_TUPLE': 222, + 'INSTRUMENTED_RESUME': 236, + 'INSTRUMENTED_END_FOR': 237, + 'INSTRUMENTED_END_SEND': 238, + 'INSTRUMENTED_RETURN_VALUE': 239, + 'INSTRUMENTED_RETURN_CONST': 240, + 'INSTRUMENTED_YIELD_VALUE': 241, + 'INSTRUMENTED_LOAD_SUPER_ATTR': 242, + 'INSTRUMENTED_FOR_ITER': 243, + 'INSTRUMENTED_CALL': 244, + 'INSTRUMENTED_CALL_KW': 245, + 'INSTRUMENTED_CALL_FUNCTION_EX': 246, + 'INSTRUMENTED_INSTRUCTION': 247, + 'INSTRUMENTED_JUMP_FORWARD': 248, + 'INSTRUMENTED_JUMP_BACKWARD': 249, + 'INSTRUMENTED_POP_JUMP_IF_TRUE': 250, + 'INSTRUMENTED_POP_JUMP_IF_FALSE': 251, + 'INSTRUMENTED_POP_JUMP_IF_NONE': 252, + 'INSTRUMENTED_POP_JUMP_IF_NOT_NONE': 253, + 'INSTRUMENTED_LINE': 254, + 'LOAD_CLOSURE': 255, 'JUMP': 256, 'JUMP_NO_INTERRUPT': 257, 'RESERVED_258': 258, diff --git a/Lib/http/__init__.py b/Lib/http/__init__.py index bf8d7d6886..17a47b180e 100644 --- a/Lib/http/__init__.py +++ b/Lib/http/__init__.py @@ -1,14 +1,15 @@ -from enum import IntEnum +from enum import StrEnum, IntEnum, _simple_enum -__all__ = ['HTTPStatus'] +__all__ = ['HTTPStatus', 'HTTPMethod'] -class HTTPStatus(IntEnum): +@_simple_enum(IntEnum) +class HTTPStatus: """HTTP status codes and reason phrases Status codes from the following RFCs are all observed: - * RFC 7231: Hypertext Transfer Protocol (HTTP/1.1), obsoletes 2616 + * RFC 9110: HTTP Semantics, obsoletes 7231, which obsoleted 2616 * RFC 6585: Additional HTTP Status Codes * RFC 3229: Delta encoding in HTTP * RFC 4918: HTTP Extensions for WebDAV, obsoletes 2518 @@ -25,11 +26,30 @@ class HTTPStatus(IntEnum): def __new__(cls, value, phrase, description=''): obj = int.__new__(cls, value) obj._value_ = value - obj.phrase = phrase obj.description = description return obj + @property + def is_informational(self): + return 100 <= self <= 199 + + @property + def is_success(self): + return 200 <= self <= 299 + + @property + def is_redirection(self): + return 300 <= self <= 399 + + @property + def is_client_error(self): + return 400 <= self <= 499 + + @property + def is_server_error(self): + return 500 <= self <= 599 + # informational CONTINUE = 100, 'Continue', 'Request received, please continue' SWITCHING_PROTOCOLS = (101, 'Switching Protocols', @@ -94,22 +114,25 @@ def __new__(cls, value, phrase, description=''): 'Client must specify Content-Length') PRECONDITION_FAILED = (412, 'Precondition Failed', 'Precondition in headers is false') - REQUEST_ENTITY_TOO_LARGE = (413, 'Request Entity Too Large', - 'Entity is too large') - REQUEST_URI_TOO_LONG = (414, 'Request-URI Too Long', + CONTENT_TOO_LARGE = (413, 'Content Too Large', + 'Content is too large') + REQUEST_ENTITY_TOO_LARGE = CONTENT_TOO_LARGE + URI_TOO_LONG = (414, 'URI Too Long', 'URI is too long') + REQUEST_URI_TOO_LONG = URI_TOO_LONG UNSUPPORTED_MEDIA_TYPE = (415, 'Unsupported Media Type', 'Entity body in unsupported format') - REQUESTED_RANGE_NOT_SATISFIABLE = (416, - 'Requested Range Not Satisfiable', + RANGE_NOT_SATISFIABLE = (416, 'Range Not Satisfiable', 'Cannot satisfy request range') + REQUESTED_RANGE_NOT_SATISFIABLE = RANGE_NOT_SATISFIABLE EXPECTATION_FAILED = (417, 'Expectation Failed', 'Expect condition could not be satisfied') IM_A_TEAPOT = (418, 'I\'m a Teapot', 'Server refuses to brew coffee because it is a teapot.') MISDIRECTED_REQUEST = (421, 'Misdirected Request', 'Server is not able to produce a response') - UNPROCESSABLE_ENTITY = 422, 'Unprocessable Entity' + UNPROCESSABLE_CONTENT = 422, 'Unprocessable Content' + UNPROCESSABLE_ENTITY = UNPROCESSABLE_CONTENT LOCKED = 423, 'Locked' FAILED_DEPENDENCY = 424, 'Failed Dependency' TOO_EARLY = 425, 'Too Early' @@ -148,3 +171,32 @@ def __new__(cls, value, phrase, description=''): NETWORK_AUTHENTICATION_REQUIRED = (511, 'Network Authentication Required', 'The client needs to authenticate to gain network access') + + +@_simple_enum(StrEnum) +class HTTPMethod: + """HTTP methods and descriptions + + Methods from the following RFCs are all observed: + + * RFC 9110: HTTP Semantics, obsoletes 7231, which obsoleted 2616 + * RFC 5789: PATCH Method for HTTP + """ + def __new__(cls, value, description): + obj = str.__new__(cls, value) + obj._value_ = value + obj.description = description + return obj + + def __repr__(self): + return "<%s.%s>" % (self.__class__.__name__, self._name_) + + CONNECT = 'CONNECT', 'Establish a connection to the server.' + DELETE = 'DELETE', 'Remove the target.' + GET = 'GET', 'Retrieve the target.' + HEAD = 'HEAD', 'Same as GET, but only retrieve the status line and header section.' + OPTIONS = 'OPTIONS', 'Describe the communication options for the target.' + PATCH = 'PATCH', 'Apply partial modifications to a target.' + POST = 'POST', 'Perform target-specific processing with the request payload.' + PUT = 'PUT', 'Replace the target with the request payload.' + TRACE = 'TRACE', 'Perform a message loop-back test along the path to the target.' diff --git a/Lib/http/client.py b/Lib/http/client.py index a6ab135b2c..dd5f4136e9 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -111,6 +111,11 @@ _MAXLINE = 65536 _MAXHEADERS = 100 +# Data larger than this will be read in chunks, to prevent extreme +# overallocation. +_MIN_READ_BUF_SIZE = 1 << 20 + + # Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) # # VCHAR = %x21-7E @@ -172,6 +177,13 @@ def _encode(data, name='data'): "if you want to send it encoded in UTF-8." % (name.title(), data[err.start:err.end], name)) from None +def _strip_ipv6_iface(enc_name: bytes) -> bytes: + """Remove interface scope from IPv6 address.""" + enc_name, percent, _ = enc_name.partition(b"%") + if percent: + assert enc_name.startswith(b'['), enc_name + enc_name += b']' + return enc_name class HTTPMessage(email.message.Message): # XXX The only usage of this method is in @@ -221,8 +233,9 @@ def _read_headers(fp): break return headers -def parse_headers(fp, _class=HTTPMessage): - """Parses only RFC2822 headers from a file pointer. +def _parse_header_lines(header_lines, _class=HTTPMessage): + """ + Parses only RFC 5322 headers from header lines. email Parser wants to see strings rather than bytes. But a TextIOWrapper around self.rfile would buffer too many bytes @@ -231,10 +244,15 @@ def parse_headers(fp, _class=HTTPMessage): to parse. """ - headers = _read_headers(fp) - hstring = b''.join(headers).decode('iso-8859-1') + hstring = b''.join(header_lines).decode('iso-8859-1') return email.parser.Parser(_class=_class).parsestr(hstring) +def parse_headers(fp, _class=HTTPMessage): + """Parses only RFC 5322 headers from a file pointer.""" + + headers = _read_headers(fp) + return _parse_header_lines(headers, _class) + class HTTPResponse(io.BufferedIOBase): @@ -448,6 +466,7 @@ def isclosed(self): return self.fp is None def read(self, amt=None): + """Read and return the response body, or up to the next amt bytes.""" if self.fp is None: return b"" @@ -458,7 +477,7 @@ def read(self, amt=None): if self.chunked: return self._read_chunked(amt) - if amt is not None: + if amt is not None and amt >= 0: if self.length is not None and amt > self.length: # clip the read to the "end of response" amt = self.length @@ -576,13 +595,11 @@ def _get_chunk_left(self): def _read_chunked(self, amt=None): assert self.chunked != _UNKNOWN + if amt is not None and amt < 0: + amt = None value = [] try: - while True: - chunk_left = self._get_chunk_left() - if chunk_left is None: - break - + while (chunk_left := self._get_chunk_left()) is not None: if amt is not None and amt <= chunk_left: value.append(self._safe_read(amt)) self.chunk_left = chunk_left - amt @@ -593,8 +610,8 @@ def _read_chunked(self, amt=None): amt -= chunk_left self.chunk_left = 0 return b''.join(value) - except IncompleteRead: - raise IncompleteRead(b''.join(value)) + except IncompleteRead as exc: + raise IncompleteRead(b''.join(value)) from exc def _readinto_chunked(self, b): assert self.chunked != _UNKNOWN @@ -627,10 +644,25 @@ def _safe_read(self, amt): reading. If the bytes are truly not available (due to EOF), then the IncompleteRead exception can be used to detect the problem. """ - data = self.fp.read(amt) - if len(data) < amt: - raise IncompleteRead(data, amt-len(data)) - return data + cursize = min(amt, _MIN_READ_BUF_SIZE) + data = self.fp.read(cursize) + if len(data) >= amt: + return data + if len(data) < cursize: + raise IncompleteRead(data, amt - len(data)) + + data = io.BytesIO(data) + data.seek(0, 2) + while True: + # This is a geometric increase in read size (never more than + # doubling out the current length of data per loop iteration). + delta = min(cursize, amt - cursize) + data.write(self.fp.read(delta)) + if data.tell() >= amt: + return data.getvalue() + cursize += delta + if data.tell() < cursize: + raise IncompleteRead(data.getvalue(), amt - data.tell()) def _safe_readinto(self, b): """Same as _safe_read, but for reading into a buffer.""" @@ -655,6 +687,8 @@ def read1(self, n=-1): self._close_conn() elif self.length is not None: self.length -= len(result) + if not self.length: + self._close_conn() return result def peek(self, n=-1): @@ -679,6 +713,8 @@ def readline(self, limit=-1): self._close_conn() elif self.length is not None: self.length -= len(result) + if not self.length: + self._close_conn() return result def _read1_chunked(self, n): @@ -786,6 +822,20 @@ def getcode(self): ''' return self.status + +def _create_https_context(http_version): + # Function also used by urllib.request to be able to set the check_hostname + # attribute on a context object. + context = ssl._create_default_https_context() + # send ALPN extension to indicate HTTP/1.1 protocol + if http_version == 11: + context.set_alpn_protocols(['http/1.1']) + # enable PHA for TLS 1.3 connections if available + if context.post_handshake_auth is not None: + context.post_handshake_auth = True + return context + + class HTTPConnection: _http_vsn = 11 @@ -847,6 +897,7 @@ def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, self._tunnel_host = None self._tunnel_port = None self._tunnel_headers = {} + self._raw_proxy_headers = None (self.host, self.port) = self._get_hostport(host, port) @@ -859,9 +910,9 @@ def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, def set_tunnel(self, host, port=None, headers=None): """Set up host and port for HTTP CONNECT tunnelling. - In a connection that uses HTTP CONNECT tunneling, the host passed to the - constructor is used as a proxy server that relays all communication to - the endpoint passed to `set_tunnel`. This done by sending an HTTP + In a connection that uses HTTP CONNECT tunnelling, the host passed to + the constructor is used as a proxy server that relays all communication + to the endpoint passed to `set_tunnel`. This done by sending an HTTP CONNECT request to the proxy server when the connection is established. This method must be called before the HTTP connection has been @@ -869,6 +920,13 @@ def set_tunnel(self, host, port=None, headers=None): The headers argument should be a mapping of extra HTTP headers to send with the CONNECT request. + + As HTTP/1.1 is used for HTTP CONNECT tunnelling request, as per the RFC + (https://tools.ietf.org/html/rfc7231#section-4.3.6), a HTTP Host: + header must be provided, matching the authority-form of the request + target provided as the destination for the CONNECT request. If a + HTTP Host: header is not provided via the headers argument, one + is generated and transmitted automatically. """ if self.sock: @@ -876,10 +934,15 @@ def set_tunnel(self, host, port=None, headers=None): self._tunnel_host, self._tunnel_port = self._get_hostport(host, port) if headers: - self._tunnel_headers = headers + self._tunnel_headers = headers.copy() else: self._tunnel_headers.clear() + if not any(header.lower() == "host" for header in self._tunnel_headers): + encoded_host = self._tunnel_host.encode("idna").decode("ascii") + self._tunnel_headers["Host"] = "%s:%d" % ( + encoded_host, self._tunnel_port) + def _get_hostport(self, host, port): if port is None: i = host.rfind(':') @@ -895,17 +958,24 @@ def _get_hostport(self, host, port): host = host[:i] else: port = self.default_port - if host and host[0] == '[' and host[-1] == ']': - host = host[1:-1] + if host and host[0] == '[' and host[-1] == ']': + host = host[1:-1] return (host, port) def set_debuglevel(self, level): self.debuglevel = level + def _wrap_ipv6(self, ip): + if b':' in ip and ip[0] != b'['[0]: + return b"[" + ip + b"]" + return ip + def _tunnel(self): - connect = b"CONNECT %s:%d HTTP/1.0\r\n" % ( - self._tunnel_host.encode("ascii"), self._tunnel_port) + connect = b"CONNECT %s:%d %s\r\n" % ( + self._wrap_ipv6(self._tunnel_host.encode("idna")), + self._tunnel_port, + self._http_vsn_str.encode("ascii")) headers = [connect] for header, value in self._tunnel_headers.items(): headers.append(f"{header}: {value}\r\n".encode("latin-1")) @@ -917,23 +987,35 @@ def _tunnel(self): del headers response = self.response_class(self.sock, method=self._method) - (version, code, message) = response._read_status() + try: + (version, code, message) = response._read_status() - if code != http.HTTPStatus.OK: - self.close() - raise OSError(f"Tunnel connection failed: {code} {message.strip()}") - while True: - line = response.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("header line") - if not line: - # for sites which EOF without sending a trailer - break - if line in (b'\r\n', b'\n', b''): - break + self._raw_proxy_headers = _read_headers(response.fp) if self.debuglevel > 0: - print('header:', line.decode()) + for header in self._raw_proxy_headers: + print('header:', header.decode()) + + if code != http.HTTPStatus.OK: + self.close() + raise OSError(f"Tunnel connection failed: {code} {message.strip()}") + + finally: + response.close() + + def get_proxy_response_headers(self): + """ + Returns a dictionary with the headers of the response + received from the proxy server to the CONNECT request + sent to set the tunnel. + + If the CONNECT request was not sent, the method returns None. + """ + return ( + _parse_header_lines(self._raw_proxy_headers) + if self._raw_proxy_headers is not None + else None + ) def connect(self): """Connect to the host and port specified in __init__.""" @@ -942,7 +1024,7 @@ def connect(self): (self.host,self.port), self.timeout, self.source_address) # Might fail in OSs that don't implement TCP_NODELAY try: - self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) except OSError as e: if e.errno != errno.ENOPROTOOPT: raise @@ -980,14 +1062,11 @@ def send(self, data): print("send:", repr(data)) if hasattr(data, "read") : if self.debuglevel > 0: - print("sendIng a read()able") + print("sending a readable") encode = self._is_textIO(data) if encode and self.debuglevel > 0: print("encoding file using iso-8859-1") - while 1: - datablock = data.read(self.blocksize) - if not datablock: - break + while datablock := data.read(self.blocksize): if encode: datablock = datablock.encode("iso-8859-1") sys.audit("http.client.send", self, datablock) @@ -1013,14 +1092,11 @@ def _output(self, s): def _read_readable(self, readable): if self.debuglevel > 0: - print("sendIng a read()able") + print("reading a readable") encode = self._is_textIO(readable) if encode and self.debuglevel > 0: print("encoding file using iso-8859-1") - while True: - datablock = readable.read(self.blocksize) - if not datablock: - break + while datablock := readable.read(self.blocksize): if encode: datablock = datablock.encode("iso-8859-1") yield datablock @@ -1157,7 +1233,7 @@ def putrequest(self, method, url, skip_host=False, netloc_enc = netloc.encode("ascii") except UnicodeEncodeError: netloc_enc = netloc.encode("idna") - self.putheader('Host', netloc_enc) + self.putheader('Host', _strip_ipv6_iface(netloc_enc)) else: if self._tunnel_host: host = self._tunnel_host @@ -1173,9 +1249,9 @@ def putrequest(self, method, url, skip_host=False, # As per RFC 273, IPv6 address should be wrapped with [] # when used as Host header - - if host.find(':') >= 0: - host_enc = b'[' + host_enc + b']' + host_enc = self._wrap_ipv6(host_enc) + if ":" in host: + host_enc = _strip_ipv6_iface(host_enc) if port == self.default_port: self.putheader('Host', host_enc) @@ -1400,46 +1476,15 @@ class HTTPSConnection(HTTPConnection): default_port = HTTPS_PORT - # XXX Should key_file and cert_file be deprecated in favour of context? - - def __init__(self, host, port=None, key_file=None, cert_file=None, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - source_address=None, *, context=None, - check_hostname=None, blocksize=8192): + def __init__(self, host, port=None, + *, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, context=None, blocksize=8192): super(HTTPSConnection, self).__init__(host, port, timeout, source_address, blocksize=blocksize) - if (key_file is not None or cert_file is not None or - check_hostname is not None): - import warnings - warnings.warn("key_file, cert_file and check_hostname are " - "deprecated, use a custom context instead.", - DeprecationWarning, 2) - self.key_file = key_file - self.cert_file = cert_file if context is None: - context = ssl._create_default_https_context() - # send ALPN extension to indicate HTTP/1.1 protocol - if self._http_vsn == 11: - context.set_alpn_protocols(['http/1.1']) - # enable PHA for TLS 1.3 connections if available - if context.post_handshake_auth is not None: - context.post_handshake_auth = True - will_verify = context.verify_mode != ssl.CERT_NONE - if check_hostname is None: - check_hostname = context.check_hostname - if check_hostname and not will_verify: - raise ValueError("check_hostname needs a SSL context with " - "either CERT_OPTIONAL or CERT_REQUIRED") - if key_file or cert_file: - context.load_cert_chain(cert_file, key_file) - # cert and key file means the user wants to authenticate. - # enable TLS 1.3 PHA implicitly even for custom contexts. - if context.post_handshake_auth is not None: - context.post_handshake_auth = True + context = _create_https_context(self._http_vsn) self._context = context - if check_hostname is not None: - self._context.check_hostname = check_hostname def connect(self): "Connect to a host on a given (SSL) port." diff --git a/Lib/http/cookiejar.py b/Lib/http/cookiejar.py index 685f6a0b97..9a2f0fb851 100644 --- a/Lib/http/cookiejar.py +++ b/Lib/http/cookiejar.py @@ -34,10 +34,7 @@ import re import time import urllib.parse, urllib.request -try: - import threading as _threading -except ImportError: - import dummy_threading as _threading +import threading as _threading import http.client # only for the default HTTP port from calendar import timegm @@ -92,8 +89,7 @@ def _timegm(tt): DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] -MONTHS_LOWER = [] -for month in MONTHS: MONTHS_LOWER.append(month.lower()) +MONTHS_LOWER = [month.lower() for month in MONTHS] def time2isoz(t=None): """Return a string representing time in seconds since epoch, t. @@ -108,9 +104,9 @@ def time2isoz(t=None): """ if t is None: - dt = datetime.datetime.utcnow() + dt = datetime.datetime.now(tz=datetime.UTC) else: - dt = datetime.datetime.utcfromtimestamp(t) + dt = datetime.datetime.fromtimestamp(t, tz=datetime.UTC) return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second) @@ -126,9 +122,9 @@ def time2netscape(t=None): """ if t is None: - dt = datetime.datetime.utcnow() + dt = datetime.datetime.now(tz=datetime.UTC) else: - dt = datetime.datetime.utcfromtimestamp(t) + dt = datetime.datetime.fromtimestamp(t, tz=datetime.UTC) return "%s, %02d-%s-%04d %02d:%02d:%02d GMT" % ( DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1], dt.year, dt.hour, dt.minute, dt.second) @@ -434,6 +430,7 @@ def split_header_words(header_values): if pairs: result.append(pairs) return result +HEADER_JOIN_TOKEN_RE = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+") HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") def join_header_words(lists): """Do the inverse (almost) of the conversion done by split_header_words. @@ -441,10 +438,10 @@ def join_header_words(lists): Takes a list of lists of (key, value) pairs and produces a single header value. Attribute values are quoted if needed. - >>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]]) - 'text/plain; charset="iso-8859-1"' - >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]]) - 'text/plain, charset="iso-8859-1"' + >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) + 'text/plain; charset="iso-8859/1"' + >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) + 'text/plain, charset="iso-8859/1"' """ headers = [] @@ -452,7 +449,7 @@ def join_header_words(lists): attr = [] for k, v in pairs: if v is not None: - if not re.search(r"^\w+$", v): + if not HEADER_JOIN_TOKEN_RE.fullmatch(v): v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ v = '"%s"' % v k = "%s=%s" % (k, v) @@ -644,7 +641,7 @@ def eff_request_host(request): """ erhn = req_host = request_host(request) - if req_host.find(".") == -1 and not IPV4_RE.search(req_host): + if "." not in req_host: erhn = req_host + ".local" return req_host, erhn @@ -1047,12 +1044,13 @@ def set_ok_domain(self, cookie, request): else: undotted_domain = domain embedded_dots = (undotted_domain.find(".") >= 0) - if not embedded_dots and domain != ".local": + if not embedded_dots and not erhn.endswith(".local"): _debug(" non-local domain %s contains no embedded dot", domain) return False if cookie.version == 0: - if (not erhn.endswith(domain) and + if (not (erhn.endswith(domain) or + erhn.endswith(f"{undotted_domain}.local")) and (not erhn.startswith(".") and not ("."+erhn).endswith(domain))): _debug(" effective request-host %s (even with added " @@ -1227,14 +1225,9 @@ def path_return_ok(self, path, request): _debug(" %s does not path-match %s", req_path, path) return False -def vals_sorted_by_key(adict): - keys = sorted(adict.keys()) - return map(adict.get, keys) - def deepvalues(mapping): - """Iterates over nested mapping, depth-first, in sorted order by key.""" - values = vals_sorted_by_key(mapping) - for obj in values: + """Iterates over nested mapping, depth-first""" + for obj in list(mapping.values()): mapping = False try: obj.items @@ -1898,7 +1891,10 @@ def save(self, filename=None, ignore_discard=False, ignore_expires=False): if self.filename is not None: filename = self.filename else: raise ValueError(MISSING_FILENAME_TEXT) - with open(filename, "w") as f: + with os.fdopen( + os.open(filename, os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o600), + 'w', + ) as f: # There really isn't an LWP Cookies 2.0 format, but this indicates # that there is extra information in here (domain_dot and # port_spec) while still being compatible with libwww-perl, I hope. @@ -1923,9 +1919,7 @@ def _really_load(self, f, filename, ignore_discard, ignore_expires): "comment", "commenturl") try: - while 1: - line = f.readline() - if line == "": break + while (line := f.readline()) != "": if not line.startswith(header): continue line = line[len(header):].strip() @@ -1993,7 +1987,7 @@ class MozillaCookieJar(FileCookieJar): This class differs from CookieJar only in the format it uses to save and load cookies to and from a file. This class uses the Mozilla/Netscape - `cookies.txt' format. lynx uses this file format, too. + `cookies.txt' format. curl and lynx use this file format, too. Don't expect cookies saved while the browser is running to be noticed by the browser (in fact, Mozilla on unix will overwrite your saved cookies if @@ -2025,12 +2019,9 @@ def _really_load(self, f, filename, ignore_discard, ignore_expires): filename) try: - while 1: - line = f.readline() + while (line := f.readline()) != "": rest = {} - if line == "": break - # httponly is a cookie flag as defined in rfc6265 # when encoded in a netscape cookie file, # the line is prepended with "#HttpOnly_" @@ -2094,7 +2085,10 @@ def save(self, filename=None, ignore_discard=False, ignore_expires=False): if self.filename is not None: filename = self.filename else: raise ValueError(MISSING_FILENAME_TEXT) - with open(filename, "w") as f: + with os.fdopen( + os.open(filename, os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o600), + 'w', + ) as f: f.write(NETSCAPE_HEADER_TEXT) now = time.time() for cookie in self: diff --git a/Lib/http/server.py b/Lib/http/server.py index 58abadf737..0ec479003a 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -2,18 +2,18 @@ Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, -and CGIHTTPRequestHandler for CGI scripts. +and (deprecated) CGIHTTPRequestHandler for CGI scripts. -It does, however, optionally implement HTTP/1.1 persistent connections, -as of version 0.3. +It does, however, optionally implement HTTP/1.1 persistent connections. Notes on CGIHTTPRequestHandler ------------------------------ -This class implements GET and POST requests to cgi-bin scripts. +This class is deprecated. It implements GET and POST requests to cgi-bin scripts. -If the os.fork() function is not present (e.g. on Windows), -subprocess.Popen() is used as a fallback, with slightly altered semantics. +If the os.fork() function is not present (Windows), subprocess.Popen() is used, +with slightly altered but never documented semantics. Use from a threaded +process is likely to trigger a warning at os.fork() time. In all cases, the implementation is intentionally naive -- all requests are executed synchronously. @@ -93,6 +93,7 @@ import html import http.client import io +import itertools import mimetypes import os import posixpath @@ -109,11 +110,10 @@ # Default error message template DEFAULT_ERROR_MESSAGE = """\ - - + + - + Error response @@ -127,6 +127,10 @@ DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" +# Data larger than this will be read in chunks, to prevent extreme +# overallocation. +_MIN_READ_BUF_SIZE = 1 << 20 + class HTTPServer(socketserver.TCPServer): allow_reuse_address = 1 # Seems to make sense in testing environment @@ -275,6 +279,7 @@ def parse_request(self): error response has already been sent back. """ + is_http_0_9 = False self.command = None # set in case of error on the first line self.request_version = version = self.default_request_version self.close_connection = True @@ -300,6 +305,10 @@ def parse_request(self): # - Leading zeros MUST be ignored by recipients. if len(version_number) != 2: raise ValueError + if any(not component.isdigit() for component in version_number): + raise ValueError("non digit in http version") + if any(len(component) > 10 for component in version_number): + raise ValueError("unreasonable length http version") version_number = int(version_number[0]), int(version_number[1]) except (ValueError, IndexError): self.send_error( @@ -328,8 +337,21 @@ def parse_request(self): HTTPStatus.BAD_REQUEST, "Bad HTTP/0.9 request type (%r)" % command) return False + is_http_0_9 = True self.command, self.path = command, path + # gh-87389: The purpose of replacing '//' with '/' is to protect + # against open redirect attacks possibly triggered if the path starts + # with '//' because http clients treat //path as an absolute URI + # without scheme (similar to http://path) rather than a path. + if self.path.startswith('//'): + self.path = '/' + self.path.lstrip('/') # Reduce to a single / + + # For HTTP/0.9, headers are not expected at all. + if is_http_0_9: + self.headers = {} + return True + # Examine the headers and look for a Connection directive. try: self.headers = http.client.parse_headers(self.rfile, @@ -556,6 +578,11 @@ def log_error(self, format, *args): self.log_message(format, *args) + # https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes + _control_char_table = str.maketrans( + {c: fr'\x{c:02x}' for c in itertools.chain(range(0x20), range(0x7f,0xa0))}) + _control_char_table[ord('\\')] = r'\\' + def log_message(self, format, *args): """Log an arbitrary message. @@ -571,12 +598,16 @@ def log_message(self, format, *args): The client ip and current date/time are prefixed to every message. + Unicode control characters are replaced with escaped hex + before writing the output to stderr. + """ + message = format % args sys.stderr.write("%s - - [%s] %s\n" % (self.address_string(), self.log_date_time_string(), - format%args)) + message.translate(self._control_char_table))) def version_string(self): """Return the server software version string.""" @@ -637,6 +668,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): """ server_version = "SimpleHTTP/" + __version__ + index_pages = ("index.html", "index.htm") extensions_map = _encodings_map_default = { '.gz': 'application/gzip', '.Z': 'application/octet-stream', @@ -680,7 +712,7 @@ def send_head(self): f = None if os.path.isdir(path): parts = urllib.parse.urlsplit(self.path) - if not parts.path.endswith('/'): + if not parts.path.endswith(('/', '%2f', '%2F')): # redirect browser - doing basically what apache does self.send_response(HTTPStatus.MOVED_PERMANENTLY) new_parts = (parts[0], parts[1], parts[2] + '/', @@ -690,9 +722,9 @@ def send_head(self): self.send_header("Content-Length", "0") self.end_headers() return None - for index in "index.html", "index.htm": + for index in self.index_pages: index = os.path.join(path, index) - if os.path.exists(index): + if os.path.isfile(index): path = index break else: @@ -702,7 +734,7 @@ def send_head(self): # The test for this was added in test_httpserver.py # However, some OS platforms accept a trailingSlash as a filename # See discussion on python-dev and Issue34711 regarding - # parseing and rejection of filenames with a trailing slash + # parsing and rejection of filenames with a trailing slash if path.endswith("/"): self.send_error(HTTPStatus.NOT_FOUND, "File not found") return None @@ -770,21 +802,23 @@ def list_directory(self, path): return None list.sort(key=lambda a: a.lower()) r = [] + displaypath = self.path + displaypath = displaypath.split('#', 1)[0] + displaypath = displaypath.split('?', 1)[0] try: - displaypath = urllib.parse.unquote(self.path, + displaypath = urllib.parse.unquote(displaypath, errors='surrogatepass') except UnicodeDecodeError: - displaypath = urllib.parse.unquote(path) + displaypath = urllib.parse.unquote(displaypath) displaypath = html.escape(displaypath, quote=False) enc = sys.getfilesystemencoding() - title = 'Directory listing for %s' % displaypath - r.append('') - r.append('\n') - r.append('' % enc) - r.append('%s\n' % title) - r.append('\n

%s

' % title) + title = f'Directory listing for {displaypath}' + r.append('') + r.append('') + r.append('') + r.append(f'') + r.append(f'{title}\n') + r.append(f'\n

{title}

') r.append('
\n