Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 35 additions & 38 deletions prometheus_client/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,44 +62,35 @@ def parse_labels(labels_string: str, openmetrics: bool = False) -> Dict[str, str
# The label name is before the equal, or if there's no equal, that's the
# metric name.

term, sub_labels = _next_term(sub_labels, openmetrics)
if not term:
name_term, value_term, sub_labels = _next_term(sub_labels, openmetrics)
if not value_term:
if openmetrics:
raise ValueError("empty term in line: " + labels_string)
continue

quoted_name = False
operator_pos = _next_unquoted_char(term, '=')
if operator_pos == -1:
quoted_name = True
label_name = "__name__"
else:
value_start = _next_unquoted_char(term, '=')
label_name, quoted_name = _unquote_unescape(term[:value_start])
term = term[value_start + 1:]
label_name, quoted_name = _unquote_unescape(name_term)

if not quoted_name and not _is_valid_legacy_metric_name(label_name):
raise ValueError("unquoted UTF-8 metric name")

# Check for missing quotes
term = term.strip()
if not term or term[0] != '"':
if not value_term or value_term[0] != '"':
raise ValueError

# The first quote is guaranteed to be after the equal.
# Find the last unescaped quote.
# Make sure that the next unescaped quote is the last character.
i = 1
while i < len(term):
i = term.index('"', i)
if not _is_character_escaped(term[:i], i):
while i < len(value_term):
i = value_term.index('"', i)
if not _is_character_escaped(value_term[:i], i):
break
i += 1

# The label value is between the first and last quote
quote_end = i + 1
if quote_end != len(term):
if quote_end != len(value_term):
raise ValueError("unexpected text after quote: " + labels_string)
label_value, _ = _unquote_unescape(term[:quote_end])

label_value, _ = _unquote_unescape(value_term)
if label_name == '__name__':
_validate_metric_name(label_name)
else:
Expand All @@ -112,11 +103,10 @@ def parse_labels(labels_string: str, openmetrics: bool = False) -> Dict[str, str
raise ValueError("Invalid labels: " + labels_string)


def _next_term(text: str, openmetrics: bool) -> Tuple[str, str]:
"""Extract the next comma-separated label term from the text.

Returns the stripped term and the stripped remainder of the string,
including the comma.
def _next_term(text: str, openmetrics: bool) -> Tuple[str, str, str]:
"""Extract the next comma-separated label term from the text. The results
are stripped terms for the label name, label value, and then the remainder
of the string including the final , or }.

Raises ValueError if the term is empty and we're in openmetrics mode.
"""
Expand All @@ -125,41 +115,48 @@ def _next_term(text: str, openmetrics: bool) -> Tuple[str, str]:
if text[0] == ',':
text = text[1:]
if not text:
return "", ""
return "", "", ""
if text[0] == ',':
raise ValueError("multiple commas")
splitpos = _next_unquoted_char(text, ',}')

splitpos = _next_unquoted_char(text, '=,}')
if splitpos >= 0 and text[splitpos] == "=":
labelname = text[:splitpos]
text = text[splitpos + 1:]
splitpos = _next_unquoted_char(text, ',}')
else:
labelname = "__name__"

if splitpos == -1:
splitpos = len(text)
term = text[:splitpos]
if not term and openmetrics:
raise ValueError("empty term:", term)

sublabels = text[splitpos:]
return term.strip(), sublabels.strip()
rest = text[splitpos:]
return labelname, term.strip(), rest.strip()


def _next_unquoted_char(text: str, chs: str, startidx: int = 0) -> int:
def _next_unquoted_char(text: str, chs: Optional[str], startidx: int = 0) -> int:
"""Return position of next unquoted character in tuple, or -1 if not found.

It is always assumed that the first character being checked is not already
inside quotes.
"""
i = startidx
in_quotes = False
if chs is None:
chs = string.whitespace
while i < len(text):
if text[i] == '"' and not _is_character_escaped(text, i):

for i, c in enumerate(text[startidx:]):
if c == '"' and not _is_character_escaped(text, startidx + i):
in_quotes = not in_quotes
if not in_quotes:
if text[i] in chs:
return i
i += 1
if c in chs:
return startidx + i
return -1


def _last_unquoted_char(text: str, chs: str) -> int:
def _last_unquoted_char(text: str, chs: Optional[str]) -> int:
"""Return position of last unquoted character in list, or -1 if not found."""
i = len(text) - 1
in_quotes = False
Expand Down Expand Up @@ -253,7 +250,7 @@ def _parse_sample(text):
value, timestamp = _parse_value_and_timestamp(remaining_text)
return Sample(name, {}, value, timestamp)
name = text[:label_start].strip()
label_end = _next_unquoted_char(text, '}')
label_end = _next_unquoted_char(text[label_start:], '}') + label_start
labels = parse_labels(text[label_start + 1:label_end], False)
if not name:
# Name might be in the labels
Expand Down
54 changes: 54 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,5 +371,59 @@ def collect(self):
self.assertEqual(text.encode('utf-8'), generate_latest(registry, ALLOWUTF8))


def test_benchmark_text_string_to_metric_families(benchmark):
text = """# HELP go_gc_duration_seconds A summary of the GC invocation durations.
# TYPE go_gc_duration_seconds summary
go_gc_duration_seconds{quantile="0"} 0.013300656000000001
go_gc_duration_seconds{quantile="0.25"} 0.013638736
go_gc_duration_seconds{quantile="0.5"} 0.013759906
go_gc_duration_seconds{quantile="0.75"} 0.013962066
go_gc_duration_seconds{quantile="1"} 0.021383540000000003
go_gc_duration_seconds_sum 56.12904785
go_gc_duration_seconds_count 7476.0
# HELP go_goroutines Number of goroutines that currently exist.
# TYPE go_goroutines gauge
go_goroutines 166.0
# HELP prometheus_local_storage_indexing_batch_duration_milliseconds Quantiles for batch indexing duration in milliseconds.
# TYPE prometheus_local_storage_indexing_batch_duration_milliseconds summary
prometheus_local_storage_indexing_batch_duration_milliseconds{quantile="0.5"} NaN
prometheus_local_storage_indexing_batch_duration_milliseconds{quantile="0.9"} NaN
prometheus_local_storage_indexing_batch_duration_milliseconds{quantile="0.99"} NaN
prometheus_local_storage_indexing_batch_duration_milliseconds_sum 871.5665949999999
prometheus_local_storage_indexing_batch_duration_milliseconds_count 229.0
# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total 29323.4
# HELP process_virtual_memory_bytes Virtual memory size in bytes.
# TYPE process_virtual_memory_bytes gauge
process_virtual_memory_bytes 2.478268416e+09
# HELP prometheus_build_info A metric with a constant '1' value labeled by version, revision, and branch from which Prometheus was built.
# TYPE prometheus_build_info gauge
prometheus_build_info{branch="HEAD",revision="ef176e5",version="0.16.0rc1"} 1.0
# HELP prometheus_local_storage_chunk_ops_total The total number of chunk operations by their type.
# TYPE prometheus_local_storage_chunk_ops_total counter
prometheus_local_storage_chunk_ops_total{type="clone"} 28.0
prometheus_local_storage_chunk_ops_total{type="create"} 997844.0
prometheus_local_storage_chunk_ops_total{type="drop"} 1.345758e+06
prometheus_local_storage_chunk_ops_total{type="load"} 1641.0
prometheus_local_storage_chunk_ops_total{type="persist"} 981408.0
prometheus_local_storage_chunk_ops_total{type="pin"} 32662.0
prometheus_local_storage_chunk_ops_total{type="transcode"} 980180.0
prometheus_local_storage_chunk_ops_total{type="unpin"} 32662.0
# TYPE hist histogram
# HELP hist help
hist_bucket{le="1"} 0
hist_bucket{le="+Inf"} 3
hist_count 3
hist_sum 2
"""

@benchmark
def _():
# We need to convert the generator to a full list in order to
# accurately measure the time to yield everything.
return list(text_string_to_metric_families(text))


if __name__ == '__main__':
unittest.main()
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ envlist = coverage-clean,py{3.9,3.10,3.11,3.12,3.13,py3.9,3.9-nooptionals},cover
deps =
coverage
pytest
pytest-benchmark
attrs
{py3.9,pypy3.9}: twisted
# NOTE: Pinned due to https://github.com/prometheus/client_python/issues/1020
Expand Down