prometheus · csmarchbanks · Jul 8, 2025 · Jun 26, 2025 · Jul 2, 2025 · Jul 2, 2025
diff --git a/prometheus_client/parser.py b/prometheus_client/parser.py
@@ -62,44 +62,35 @@ def parse_labels(labels_string: str, openmetrics: bool = False) -> Dict[str, str
             # The label name is before the equal, or if there's no equal, that's the
             # metric name.
 
-            term, sub_labels = _next_term(sub_labels, openmetrics)
-            if not term:
+            name_term, value_term, sub_labels = _next_term(sub_labels, openmetrics)
+            if not value_term:
                 if openmetrics:
                     raise ValueError("empty term in line: " + labels_string)
                 continue
 
-            quoted_name = False
-            operator_pos = _next_unquoted_char(term, '=')
-            if operator_pos == -1:
-                quoted_name = True
-                label_name = "__name__"
-            else:
-                value_start = _next_unquoted_char(term, '=')
-                label_name, quoted_name = _unquote_unescape(term[:value_start])
-                term = term[value_start + 1:]
+            label_name, quoted_name = _unquote_unescape(name_term)
 
             if not quoted_name and not _is_valid_legacy_metric_name(label_name):
                 raise ValueError("unquoted UTF-8 metric name")
 
             # Check for missing quotes 
-            term = term.strip()
-            if not term or term[0] != '"':
+            if not value_term or value_term[0] != '"':
                 raise ValueError
 
             # The first quote is guaranteed to be after the equal.
-            # Find the last unescaped quote.
+            # Make sure that the next unescaped quote is the last character.
             i = 1
-            while i < len(term):
-                i = term.index('"', i)
-                if not _is_character_escaped(term[:i], i):
+            while i < len(value_term):
+                i = value_term.index('"', i)
+                if not _is_character_escaped(value_term[:i], i):
                     break
                 i += 1
-
             # The label value is between the first and last quote
             quote_end = i + 1
-            if quote_end != len(term):
+            if quote_end != len(value_term):
                 raise ValueError("unexpected text after quote: " + labels_string)
-            label_value, _ = _unquote_unescape(term[:quote_end])
+
+            label_value, _ = _unquote_unescape(value_term)
             if label_name == '__name__':
                 _validate_metric_name(label_name)
             else:
@@ -112,11 +103,10 @@ def parse_labels(labels_string: str, openmetrics: bool = False) -> Dict[str, str
         raise ValueError("Invalid labels: " + labels_string)
 
 
-def _next_term(text: str, openmetrics: bool) -> Tuple[str, str]:
-    """Extract the next comma-separated label term from the text.
-
-    Returns the stripped term and the stripped remainder of the string, 
-    including the comma.
+def _next_term(text: str, openmetrics: bool) -> Tuple[str, str, str]:
+    """Extract the next comma-separated label term from the text. The results
+    are stripped terms for the label name, label value, and then the remainder
+    of the string including the final , or }.
 
     Raises ValueError if the term is empty and we're in openmetrics mode.
     """
@@ -125,41 +115,48 @@ def _next_term(text: str, openmetrics: bool) -> Tuple[str, str]:
     if text[0] == ',':
         text = text[1:]
         if not text:
-            return "", ""
+            return "", "", ""
         if text[0] == ',':
             raise ValueError("multiple commas")
-    splitpos = _next_unquoted_char(text, ',}')
+
+    splitpos = _next_unquoted_char(text, '=,}')
+    if splitpos >= 0 and text[splitpos] == "=":
+        labelname = text[:splitpos]
+        text = text[splitpos + 1:]
+        splitpos = _next_unquoted_char(text, ',}')
+    else:
+        labelname = "__name__"
+
     if splitpos == -1:
         splitpos = len(text)
     term = text[:splitpos]
     if not term and openmetrics:
         raise ValueError("empty term:", term)
 
-    sublabels = text[splitpos:]
-    return term.strip(), sublabels.strip()
+    rest = text[splitpos:]
+    return labelname, term.strip(), rest.strip()
 
 
-def _next_unquoted_char(text: str, chs: str, startidx: int = 0) -> int:
+def _next_unquoted_char(text: str, chs: Optional[str], startidx: int = 0) -> int:
     """Return position of next unquoted character in tuple, or -1 if not found.
 
     It is always assumed that the first character being checked is not already
     inside quotes.
     """
-    i = startidx
     in_quotes = False
     if chs is None:
         chs = string.whitespace
-    while i < len(text):
-        if text[i] == '"' and not _is_character_escaped(text, i):
+
+    for i, c in enumerate(text[startidx:]):
+        if c == '"' and not _is_character_escaped(text, startidx + i):
             in_quotes = not in_quotes
         if not in_quotes:
-            if text[i] in chs:
-                return i
-        i += 1
+            if c in chs:
+                return startidx + i
     return -1
 
 
-def _last_unquoted_char(text: str, chs: str) -> int:
+def _last_unquoted_char(text: str, chs: Optional[str]) -> int:
     """Return position of last unquoted character in list, or -1 if not found."""
     i = len(text) - 1
     in_quotes = False
@@ -253,7 +250,7 @@ def _parse_sample(text):
         value, timestamp = _parse_value_and_timestamp(remaining_text)
         return Sample(name, {}, value, timestamp)
     name = text[:label_start].strip()
-    label_end = _next_unquoted_char(text, '}')
+    label_end = _next_unquoted_char(text[label_start:], '}') + label_start
     labels = parse_labels(text[label_start + 1:label_end], False)
     if not name:
         # Name might be in the labels

diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -371,5 +371,59 @@ def collect(self):
         self.assertEqual(text.encode('utf-8'), generate_latest(registry, ALLOWUTF8))
 
 
+def test_benchmark_text_string_to_metric_families(benchmark):
+    text = """# HELP go_gc_duration_seconds A summary of the GC invocation durations.
+# TYPE go_gc_duration_seconds summary
+go_gc_duration_seconds{quantile="0"} 0.013300656000000001
+go_gc_duration_seconds{quantile="0.25"} 0.013638736
+go_gc_duration_seconds{quantile="0.5"} 0.013759906
+go_gc_duration_seconds{quantile="0.75"} 0.013962066
+go_gc_duration_seconds{quantile="1"} 0.021383540000000003
+go_gc_duration_seconds_sum 56.12904785
+go_gc_duration_seconds_count 7476.0
+# HELP go_goroutines Number of goroutines that currently exist.
+# TYPE go_goroutines gauge
+go_goroutines 166.0
+# HELP prometheus_local_storage_indexing_batch_duration_milliseconds Quantiles for batch indexing duration in milliseconds.
+# TYPE prometheus_local_storage_indexing_batch_duration_milliseconds summary
+prometheus_local_storage_indexing_batch_duration_milliseconds{quantile="0.5"} NaN
+prometheus_local_storage_indexing_batch_duration_milliseconds{quantile="0.9"} NaN
+prometheus_local_storage_indexing_batch_duration_milliseconds{quantile="0.99"} NaN
+prometheus_local_storage_indexing_batch_duration_milliseconds_sum 871.5665949999999
+prometheus_local_storage_indexing_batch_duration_milliseconds_count 229.0
+# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
+# TYPE process_cpu_seconds_total counter
+process_cpu_seconds_total 29323.4
+# HELP process_virtual_memory_bytes Virtual memory size in bytes.
+# TYPE process_virtual_memory_bytes gauge
+process_virtual_memory_bytes 2.478268416e+09
+# HELP prometheus_build_info A metric with a constant '1' value labeled by version, revision, and branch from which Prometheus was built.
+# TYPE prometheus_build_info gauge
+prometheus_build_info{branch="HEAD",revision="ef176e5",version="0.16.0rc1"} 1.0
+# HELP prometheus_local_storage_chunk_ops_total The total number of chunk operations by their type.
+# TYPE prometheus_local_storage_chunk_ops_total counter
+prometheus_local_storage_chunk_ops_total{type="clone"} 28.0
+prometheus_local_storage_chunk_ops_total{type="create"} 997844.0
+prometheus_local_storage_chunk_ops_total{type="drop"} 1.345758e+06
+prometheus_local_storage_chunk_ops_total{type="load"} 1641.0
+prometheus_local_storage_chunk_ops_total{type="persist"} 981408.0
+prometheus_local_storage_chunk_ops_total{type="pin"} 32662.0
+prometheus_local_storage_chunk_ops_total{type="transcode"} 980180.0
+prometheus_local_storage_chunk_ops_total{type="unpin"} 32662.0
+# TYPE hist histogram
+# HELP hist help
+hist_bucket{le="1"} 0
+hist_bucket{le="+Inf"} 3
+hist_count 3
+hist_sum 2
+"""
+
+    @benchmark
+    def _():
+        # We need to convert the generator to a full list in order to
+        # accurately measure the time to yield everything.
+        return list(text_string_to_metric_families(text))
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/tox.ini b/tox.ini
@@ -5,6 +5,7 @@ envlist = coverage-clean,py{3.9,3.10,3.11,3.12,3.13,py3.9,3.9-nooptionals},cover
 deps =
     coverage
     pytest
+    pytest-benchmark
     attrs
     {py3.9,pypy3.9}: twisted
     # NOTE: Pinned due to https://github.com/prometheus/client_python/issues/1020