From 9227999b20a1a5dd1590679b468005ada24cf4a0 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Thu, 18 Dec 2025 16:46:31 -0500
Subject: [PATCH 01/12] Added a "recent times" that allows us to track query
 times.

---
 api/server.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/api/server.py b/api/server.py
index 1a2965c4..65cc17e4 100755
--- a/api/server.py
+++ b/api/server.py
@@ -13,6 +13,7 @@
 import warnings
 import os
 import re
+from collections import deque
 from typing import Dict, List, Union, Annotated, Optional
 
 from fastapi import Body, FastAPI, Query
@@ -38,6 +39,10 @@
     allow_headers=["*"],
 )
 
+# We track the time taken for each Solr query for the last 1000 queries so we can track performance via /status.
+RECENT_TIMES_COUNT = os.getenv("RECENT_TIMES_COUNT", 1000)
+recent_query_times = deque(maxlen=RECENT_TIMES_COUNT)
+
 # ENDPOINT /
 # If someone tries accessing /, we should redirect them to the Swagger interface.
 @app.get("/", include_in_schema=False)
@@ -95,6 +100,11 @@ async def status() -> Dict:
             'segmentCount': index.get('segmentCount', ''),
             'lastModified': index.get('lastModified', ''),
             'size': index.get('size', ''),
+            'recent_queries': {
+                'count': len(recent_query_times),
+                'mean_time_ms': sum(recent_query_times) / len(recent_query_times) if recent_query_times else -1,
+                'recent_time_ms': list(recent_query_times),
+            }
         }
     else:
         return {
@@ -532,9 +542,11 @@ async def lookup(string: str,
                            types=[f"biolink:{d}" for d in doc.get("types", [])]))
 
     time_end = time.time_ns()
+    time_taken_ms = (time_end - time_start)/1_000_000
+    recent_query_times.append(time_taken_ms)
     logger.info(f"Lookup query to Solr for {json.dumps(string)} " +
                  f"(autocomplete={autocomplete}, highlighting={highlighting}, offset={offset}, limit={limit}, biolink_types={biolink_types}, only_prefixes={only_prefixes}, exclude_prefixes={exclude_prefixes}, only_taxa={only_taxa}) "
-                 f"took {(time_end - time_start)/1_000_000:.2f}ms (with {(time_solr_end - time_solr_start)/1_000_000:.2f}ms waiting for Solr)"
+                 f"took {time_taken_ms:.2f}ms (with {(time_solr_end - time_solr_start)/1_000_000:.2f}ms waiting for Solr)"
     )
 
     return outputs

From ddeb95db45be88e99bcdd37cb8edafdfb17dd7c4 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Thu, 18 Dec 2025 16:53:48 -0500
Subject: [PATCH 02/12] Improved name.

---
 api/server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/server.py b/api/server.py
index 65cc17e4..842eb067 100755
--- a/api/server.py
+++ b/api/server.py
@@ -103,7 +103,7 @@ async def status() -> Dict:
             'recent_queries': {
                 'count': len(recent_query_times),
                 'mean_time_ms': sum(recent_query_times) / len(recent_query_times) if recent_query_times else -1,
-                'recent_time_ms': list(recent_query_times),
+                'recent_times_ms': list(recent_query_times),
             }
         }
     else:

From e17e784801cc788721b77b94ebca5448d74d5840 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Tue, 7 Apr 2026 10:40:16 -0600
Subject: [PATCH 03/12] Cleaned up code.

---
 api/server.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/api/server.py b/api/server.py
index 84cf17d3..b672818b 100755
--- a/api/server.py
+++ b/api/server.py
@@ -616,10 +616,11 @@ async def lookup(string: str,
 
     time_end = time.time_ns()
     time_taken_ms = (time_end - time_start)/1_000_000
+    time_taken_ms_solr = (time_solr_end - time_solr_start)/1_000_000
     recent_query_times.append(time_taken_ms)
     logger.info(f"Lookup query to Solr for {json.dumps(string)} " +
                  f"(autocomplete={autocomplete}, highlighting={highlighting}, offset={offset}, limit={limit}, biolink_types={biolink_types}, only_prefixes={only_prefixes}, exclude_prefixes={exclude_prefixes}, only_taxa={only_taxa}): "
-                 f"took {time_taken_ms:.2f}ms (with {(time_solr_end - time_solr_start)/1_000_000:.2f}ms waiting for Solr)"
+                 f"took {time_taken_ms:.2f}ms (with {time_taken_ms_solr:.2f}ms waiting for Solr)"
     )
 
     return outputs

From 580d6f8559402b579f0e9fbaf5b4920426abe7f0 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Tue, 7 Apr 2026 10:56:28 -0600
Subject: [PATCH 04/12] Add Solr native metrics and separate Solr latency
 tracking to /status

- Track Solr-only wait time in a separate deque so mean_solr_time_ms
  can be distinguished from total API time in /status
- Pull query handler (requests/errors/timeouts/p75/p95/p99), cache
  (hitratio/evictions), and JVM (heap %, CPU load) from Solr's
  /admin/metrics API; fails gracefully with solr_metrics: null
- Fix RECENT_TIMES_COUNT env var type cast (int) to prevent deque crash
- Replace -1 sentinel with None for empty mean; remove verbose
  recent_times_ms list from response

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 api/server.py | 63 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 59 insertions(+), 4 deletions(-)

diff --git a/api/server.py b/api/server.py
index b672818b..db4a2889 100755
--- a/api/server.py
+++ b/api/server.py
@@ -37,8 +37,9 @@
 )
 
 # We track the time taken for each Solr query for the last 1000 queries so we can track performance via /status.
-RECENT_TIMES_COUNT = os.getenv("RECENT_TIMES_COUNT", 1000)
+RECENT_TIMES_COUNT = int(os.getenv("RECENT_TIMES_COUNT", 1000))
 recent_query_times = deque(maxlen=RECENT_TIMES_COUNT)
+recent_solr_times = deque(maxlen=RECENT_TIMES_COUNT)
 
 # ENDPOINT /
 # If someone tries accessing /, we should redirect them to the Swagger interface.
@@ -63,10 +64,62 @@ async def status_get() -> Dict:
 async def status() -> Dict:
     """ Return a dictionary containing status and count information for the underlying Solr instance. """
     query_url = f"http://{SOLR_HOST}:{SOLR_PORT}/solr/admin/cores"
+    metrics_url = f"http://{SOLR_HOST}:{SOLR_PORT}/solr/admin/metrics"
     async with httpx.AsyncClient(timeout=None) as client:
         response = await client.get(query_url, params={
             'action': 'STATUS'
         })
+
+        # Fetch Solr query handler, cache, and JVM metrics for strain detection.
+        solr_metrics = None
+        try:
+            core_metrics_resp = await client.get(metrics_url, params={
+                'group': 'core',
+                'prefix': 'QUERY./select,CACHE.core.queryResultCache',
+                'wt': 'json',
+            })
+            jvm_metrics_resp = await client.get(metrics_url, params={
+                'group': 'jvm',
+                'prefix': 'memory.heap,os.processCpuLoad',
+                'wt': 'json',
+            })
+            if core_metrics_resp.status_code < 300 and jvm_metrics_resp.status_code < 300:
+                cm = core_metrics_resp.json().get('metrics', {})
+                jm = jvm_metrics_resp.json().get('metrics', {})
+
+                # Core metrics are keyed by "solr.core.<corename>:<metric>"
+                core_key = next((k for k in cm if k.startswith('solr.core.')), None)
+                core_data = cm.get(core_key, {}) if core_key else {}
+
+                qh = core_data.get('QUERY./select.requestTimes', {})
+                cache = core_data.get('CACHE.core.queryResultCache', {})
+                heap = jm.get('solr.jvm', {}).get('memory.heap', {})
+                cpu = jm.get('solr.jvm', {}).get('os.processCpuLoad', None)
+
+                solr_metrics = {
+                    'query_handler': {
+                        'requests': core_data.get('QUERY./select.requests'),
+                        'errors': core_data.get('QUERY./select.errors'),
+                        'timeouts': core_data.get('QUERY./select.timeouts'),
+                        'mean_ms': qh.get('mean_ms'),
+                        'p75_ms': qh.get('p75_ms'),
+                        'p95_ms': qh.get('p95_ms'),
+                        'p99_ms': qh.get('p99_ms'),
+                    },
+                    'cache': {
+                        'hitratio': cache.get('hitratio'),
+                        'evictions': cache.get('evictions'),
+                        'size': cache.get('size'),
+                    },
+                    'jvm': {
+                        'heap_used_mb': round(heap.get('used', 0) / 1_048_576, 1) if 'used' in heap else None,
+                        'heap_max_mb': round(heap.get('max', 0) / 1_048_576, 1) if 'max' in heap else None,
+                        'heap_used_pct': round(heap.get('used', 0) / heap['max'] * 100, 1) if heap.get('max') else None,
+                        'cpu_load': cpu,
+                    },
+                }
+        except Exception:
+            logger.warning("Failed to retrieve Solr metrics for /status", exc_info=True)
     if response.status_code >= 300:
         logger.error("Solr error on accessing /solr/admin/cores?action=STATUS: %s", response.text)
         response.raise_for_status()
@@ -117,9 +170,10 @@ async def status() -> Dict:
             'size': index.get('size', ''),
             'recent_queries': {
                 'count': len(recent_query_times),
-                'mean_time_ms': sum(recent_query_times) / len(recent_query_times) if recent_query_times else -1,
-                'recent_times_ms': list(recent_query_times),
-            }
+                'mean_time_ms': sum(recent_query_times) / len(recent_query_times) if recent_query_times else None,
+                'mean_solr_time_ms': sum(recent_solr_times) / len(recent_solr_times) if recent_solr_times else None,
+            },
+            'solr_metrics': solr_metrics,
         }
     else:
         return {
@@ -618,6 +672,7 @@ async def lookup(string: str,
     time_taken_ms = (time_end - time_start)/1_000_000
     time_taken_ms_solr = (time_solr_end - time_solr_start)/1_000_000
     recent_query_times.append(time_taken_ms)
+    recent_solr_times.append(time_taken_ms_solr)
     logger.info(f"Lookup query to Solr for {json.dumps(string)} " +
                  f"(autocomplete={autocomplete}, highlighting={highlighting}, offset={offset}, limit={limit}, biolink_types={biolink_types}, only_prefixes={only_prefixes}, exclude_prefixes={exclude_prefixes}, only_taxa={only_taxa}): "
                  f"took {time_taken_ms:.2f}ms (with {time_taken_ms_solr:.2f}ms waiting for Solr)"

From a2e998ee9037092df84e4751d9573b6678cd14a3 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Tue, 7 Apr 2026 10:59:54 -0600
Subject: [PATCH 05/12] Address code review feedback on /status metrics

- Combine two /admin/metrics calls into one (group=core&group=jvm),
  halving the round-trip overhead per /status request
- Pin core key to name_lookup_shard1_replica_n1 instead of non-
  deterministic next() iteration over the metrics dict
- Move raise_for_status() inside the async with block so all Solr I/O
  is co-located
- Add test_status_shape and test_status_recent_queries_populated tests
- Update API.md with recent_queries and solr_metrics response fields

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 api/server.py         | 42 +++++++++++++++++++-----------------------
 documentation/API.md  | 33 ++++++++++++++++++++++++++++++++-
 tests/test_service.py | 42 +++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 92 insertions(+), 25 deletions(-)

diff --git a/api/server.py b/api/server.py
index db4a2889..c4d4babf 100755
--- a/api/server.py
+++ b/api/server.py
@@ -69,32 +69,30 @@ async def status() -> Dict:
         response = await client.get(query_url, params={
             'action': 'STATUS'
         })
+        if response.status_code >= 300:
+            logger.error("Solr error on accessing /solr/admin/cores?action=STATUS: %s", response.text)
+            response.raise_for_status()
 
         # Fetch Solr query handler, cache, and JVM metrics for strain detection.
+        # A single call with group=core&group=jvm retrieves both in one round-trip.
+        SOLR_CORE_NAME = 'name_lookup_shard1_replica_n1'
         solr_metrics = None
         try:
-            core_metrics_resp = await client.get(metrics_url, params={
-                'group': 'core',
-                'prefix': 'QUERY./select,CACHE.core.queryResultCache',
-                'wt': 'json',
-            })
-            jvm_metrics_resp = await client.get(metrics_url, params={
-                'group': 'jvm',
-                'prefix': 'memory.heap,os.processCpuLoad',
-                'wt': 'json',
-            })
-            if core_metrics_resp.status_code < 300 and jvm_metrics_resp.status_code < 300:
-                cm = core_metrics_resp.json().get('metrics', {})
-                jm = jvm_metrics_resp.json().get('metrics', {})
-
-                # Core metrics are keyed by "solr.core.<corename>:<metric>"
-                core_key = next((k for k in cm if k.startswith('solr.core.')), None)
-                core_data = cm.get(core_key, {}) if core_key else {}
-
+            metrics_resp = await client.get(metrics_url, params=[
+                ('group', 'core'),
+                ('group', 'jvm'),
+                ('prefix', 'QUERY./select,CACHE.core.queryResultCache'),
+                ('prefix', 'memory.heap,os.processCpuLoad'),
+                ('wt', 'json'),
+            ])
+            if metrics_resp.status_code < 300:
+                all_metrics = metrics_resp.json().get('metrics', {})
+
+                core_data = all_metrics.get(f'solr.core.{SOLR_CORE_NAME}', {})
                 qh = core_data.get('QUERY./select.requestTimes', {})
                 cache = core_data.get('CACHE.core.queryResultCache', {})
-                heap = jm.get('solr.jvm', {}).get('memory.heap', {})
-                cpu = jm.get('solr.jvm', {}).get('os.processCpuLoad', None)
+                heap = all_metrics.get('solr.jvm', {}).get('memory.heap', {})
+                cpu = all_metrics.get('solr.jvm', {}).get('os.processCpuLoad', None)
 
                 solr_metrics = {
                     'query_handler': {
@@ -120,9 +118,7 @@ async def status() -> Dict:
                 }
         except Exception:
             logger.warning("Failed to retrieve Solr metrics for /status", exc_info=True)
-    if response.status_code >= 300:
-        logger.error("Solr error on accessing /solr/admin/cores?action=STATUS: %s", response.text)
-        response.raise_for_status()
+
     result = response.json()
 
     # Do we know the Babel version and version URL? It will be stored in an environmental variable if we do.
diff --git a/documentation/API.md b/documentation/API.md
index 57bcbdea..7524e965 100644
--- a/documentation/API.md
+++ b/documentation/API.md
@@ -333,6 +333,37 @@ Solr database.
   "version": 34838,
   "segmentCount": 57,
   "lastModified": "2025-09-24T19:09:56.524Z",
-  "size": "142.17 GB"
+  "size": "142.17 GB",
+  "recent_queries": {
+    "count": 1000,
+    "mean_time_ms": 42.3,
+    "mean_solr_time_ms": 38.1
+  },
+  "solr_metrics": {
+    "query_handler": {
+      "requests": 9842301,
+      "errors": 0,
+      "timeouts": 0,
+      "mean_ms": 41.2,
+      "p75_ms": 55.0,
+      "p95_ms": 120.3,
+      "p99_ms": 340.7
+    },
+    "cache": {
+      "hitratio": 0.91,
+      "evictions": 1240,
+      "size": 512
+    },
+    "jvm": {
+      "heap_used_mb": 4096.0,
+      "heap_max_mb": 8192.0,
+      "heap_used_pct": 50.0,
+      "cpu_load": 0.12
+    }
+  }
 }
 ```
+
+`recent_queries` tracks the last 1000 `/lookup` queries handled by this NameRes instance (configurable via the `RECENT_TIMES_COUNT` environment variable). `mean_time_ms` is the total end-to-end time; `mean_solr_time_ms` isolates the time spent waiting for Solr, which helps distinguish Solr-side strain from NameRes processing overhead. Both fields are `null` if no queries have been handled since startup.
+
+`solr_metrics` is populated directly from Solr's `/admin/metrics` API and provides native Solr health indicators: cumulative query handler statistics (useful for detecting errors or timeouts), queryResultCache hit ratio (a low ratio indicates memory pressure or cache thrashing), and JVM heap/CPU metrics. This field is `null` if the Solr metrics API is unavailable.
diff --git a/tests/test_service.py b/tests/test_service.py
index 2fa9a242..ece44c4d 100644
--- a/tests/test_service.py
+++ b/tests/test_service.py
@@ -259,4 +259,44 @@ def test_only_taxa_queries():
     })
     results_ftd_disease_with_only_taxon = response.json()
     assert len(results_ftd_disease_with_only_taxon) == 1
-    assert results_ftd_disease_with_only_taxon[0]['curie'] == 'MONDO:0010857'
\ No newline at end of file
+    assert results_ftd_disease_with_only_taxon[0]['curie'] == 'MONDO:0010857'
+
+
+def test_status_shape():
+    """Verify /status returns expected fields including recent_queries and solr_metrics."""
+    client = TestClient(app)
+    response = client.get("/status")
+    assert response.status_code == 200
+    data = response.json()
+
+    assert data['status'] == 'ok'
+    assert 'numDocs' in data
+
+    # recent_queries should always be present; count/means are None before any queries.
+    rq = data['recent_queries']
+    assert 'count' in rq
+    assert 'mean_time_ms' in rq
+    assert 'mean_solr_time_ms' in rq
+
+    # solr_metrics may be None if Solr's metrics API is unavailable, but if present
+    # it must contain the expected structure.
+    assert 'solr_metrics' in data
+    if data['solr_metrics'] is not None:
+        sm = data['solr_metrics']
+        assert 'query_handler' in sm
+        assert 'cache' in sm
+        assert 'jvm' in sm
+        assert 'requests' in sm['query_handler']
+        assert 'hitratio' in sm['cache']
+        assert 'heap_used_pct' in sm['jvm']
+
+
+def test_status_recent_queries_populated():
+    """After a lookup, recent_queries should reflect at least one recorded time."""
+    client = TestClient(app)
+    client.get("/lookup", params={'string': 'alzheimer'})
+    response = client.get("/status")
+    data = response.json()
+    assert data['recent_queries']['count'] >= 1
+    assert data['recent_queries']['mean_time_ms'] is not None
+    assert data['recent_queries']['mean_solr_time_ms'] is not None
\ No newline at end of file

From c8e876e4a387c34394f154b170381cdd714c96b3 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Tue, 7 Apr 2026 11:03:43 -0600
Subject: [PATCH 06/12] Gate solr_metrics behind ?metrics=true to avoid adding
 latency to /status

The Solr metrics round-trip is skipped unless the caller explicitly
passes ?metrics=true. The solr_metrics key is omitted from the response
entirely when not requested. Tests and API.md updated accordingly.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 api/server.py         | 92 ++++++++++++++++++++++---------------------
 documentation/API.md  |  2 +-
 tests/test_service.py | 15 ++++++-
 3 files changed, 61 insertions(+), 48 deletions(-)

diff --git a/api/server.py b/api/server.py
index c4d4babf..994459e6 100755
--- a/api/server.py
+++ b/api/server.py
@@ -56,12 +56,12 @@ async def docs_redirect():
          description="<p>This endpoint will return status information and a list of counts from the underlying Solr database instance for this NameRes instance.</p>"
                      "<p>You can find out more about this endpoint in the <a href=\"https://github.com/NCATSTranslator/NameResolution/blob/master/documentation/API.md#status\">API documentation</a>.</p>"
          )
-async def status_get() -> Dict:
+async def status_get(metrics: bool = False) -> Dict:
     """ Return status and count information from the underyling Solr instance. """
-    return await status()
+    return await status(metrics)
 
 
-async def status() -> Dict:
+async def status(include_metrics: bool = False) -> Dict:
     """ Return a dictionary containing status and count information for the underlying Solr instance. """
     query_url = f"http://{SOLR_HOST}:{SOLR_PORT}/solr/admin/cores"
     metrics_url = f"http://{SOLR_HOST}:{SOLR_PORT}/solr/admin/metrics"
@@ -75,49 +75,51 @@ async def status() -> Dict:
 
         # Fetch Solr query handler, cache, and JVM metrics for strain detection.
         # A single call with group=core&group=jvm retrieves both in one round-trip.
+        # Only performed when the caller passes ?metrics=true, as it adds latency.
         SOLR_CORE_NAME = 'name_lookup_shard1_replica_n1'
         solr_metrics = None
-        try:
-            metrics_resp = await client.get(metrics_url, params=[
-                ('group', 'core'),
-                ('group', 'jvm'),
-                ('prefix', 'QUERY./select,CACHE.core.queryResultCache'),
-                ('prefix', 'memory.heap,os.processCpuLoad'),
-                ('wt', 'json'),
-            ])
-            if metrics_resp.status_code < 300:
-                all_metrics = metrics_resp.json().get('metrics', {})
-
-                core_data = all_metrics.get(f'solr.core.{SOLR_CORE_NAME}', {})
-                qh = core_data.get('QUERY./select.requestTimes', {})
-                cache = core_data.get('CACHE.core.queryResultCache', {})
-                heap = all_metrics.get('solr.jvm', {}).get('memory.heap', {})
-                cpu = all_metrics.get('solr.jvm', {}).get('os.processCpuLoad', None)
-
-                solr_metrics = {
-                    'query_handler': {
-                        'requests': core_data.get('QUERY./select.requests'),
-                        'errors': core_data.get('QUERY./select.errors'),
-                        'timeouts': core_data.get('QUERY./select.timeouts'),
-                        'mean_ms': qh.get('mean_ms'),
-                        'p75_ms': qh.get('p75_ms'),
-                        'p95_ms': qh.get('p95_ms'),
-                        'p99_ms': qh.get('p99_ms'),
-                    },
-                    'cache': {
-                        'hitratio': cache.get('hitratio'),
-                        'evictions': cache.get('evictions'),
-                        'size': cache.get('size'),
-                    },
-                    'jvm': {
-                        'heap_used_mb': round(heap.get('used', 0) / 1_048_576, 1) if 'used' in heap else None,
-                        'heap_max_mb': round(heap.get('max', 0) / 1_048_576, 1) if 'max' in heap else None,
-                        'heap_used_pct': round(heap.get('used', 0) / heap['max'] * 100, 1) if heap.get('max') else None,
-                        'cpu_load': cpu,
-                    },
-                }
-        except Exception:
-            logger.warning("Failed to retrieve Solr metrics for /status", exc_info=True)
+        if include_metrics:
+            try:
+                metrics_resp = await client.get(metrics_url, params=[
+                    ('group', 'core'),
+                    ('group', 'jvm'),
+                    ('prefix', 'QUERY./select,CACHE.core.queryResultCache'),
+                    ('prefix', 'memory.heap,os.processCpuLoad'),
+                    ('wt', 'json'),
+                ])
+                if metrics_resp.status_code < 300:
+                    all_metrics = metrics_resp.json().get('metrics', {})
+
+                    core_data = all_metrics.get(f'solr.core.{SOLR_CORE_NAME}', {})
+                    qh = core_data.get('QUERY./select.requestTimes', {})
+                    cache = core_data.get('CACHE.core.queryResultCache', {})
+                    heap = all_metrics.get('solr.jvm', {}).get('memory.heap', {})
+                    cpu = all_metrics.get('solr.jvm', {}).get('os.processCpuLoad', None)
+
+                    solr_metrics = {
+                        'query_handler': {
+                            'requests': core_data.get('QUERY./select.requests'),
+                            'errors': core_data.get('QUERY./select.errors'),
+                            'timeouts': core_data.get('QUERY./select.timeouts'),
+                            'mean_ms': qh.get('mean_ms'),
+                            'p75_ms': qh.get('p75_ms'),
+                            'p95_ms': qh.get('p95_ms'),
+                            'p99_ms': qh.get('p99_ms'),
+                        },
+                        'cache': {
+                            'hitratio': cache.get('hitratio'),
+                            'evictions': cache.get('evictions'),
+                            'size': cache.get('size'),
+                        },
+                        'jvm': {
+                            'heap_used_mb': round(heap.get('used', 0) / 1_048_576, 1) if 'used' in heap else None,
+                            'heap_max_mb': round(heap.get('max', 0) / 1_048_576, 1) if 'max' in heap else None,
+                            'heap_used_pct': round(heap.get('used', 0) / heap['max'] * 100, 1) if heap.get('max') else None,
+                            'cpu_load': cpu,
+                        },
+                    }
+            except Exception:
+                logger.warning("Failed to retrieve Solr metrics for /status", exc_info=True)
 
     result = response.json()
 
@@ -169,7 +171,7 @@ async def status() -> Dict:
                 'mean_time_ms': sum(recent_query_times) / len(recent_query_times) if recent_query_times else None,
                 'mean_solr_time_ms': sum(recent_solr_times) / len(recent_solr_times) if recent_solr_times else None,
             },
-            'solr_metrics': solr_metrics,
+            **(({'solr_metrics': solr_metrics}) if include_metrics else {}),
         }
     else:
         return {
diff --git a/documentation/API.md b/documentation/API.md
index 7524e965..453f2c1f 100644
--- a/documentation/API.md
+++ b/documentation/API.md
@@ -366,4 +366,4 @@ Solr database.
 
 `recent_queries` tracks the last 1000 `/lookup` queries handled by this NameRes instance (configurable via the `RECENT_TIMES_COUNT` environment variable). `mean_time_ms` is the total end-to-end time; `mean_solr_time_ms` isolates the time spent waiting for Solr, which helps distinguish Solr-side strain from NameRes processing overhead. Both fields are `null` if no queries have been handled since startup.
 
-`solr_metrics` is populated directly from Solr's `/admin/metrics` API and provides native Solr health indicators: cumulative query handler statistics (useful for detecting errors or timeouts), queryResultCache hit ratio (a low ratio indicates memory pressure or cache thrashing), and JVM heap/CPU metrics. This field is `null` if the Solr metrics API is unavailable.
+`solr_metrics` is only included when the `?metrics=true` query parameter is passed, as fetching it requires an additional round-trip to Solr. It is populated directly from Solr's `/admin/metrics` API and provides native Solr health indicators: cumulative query handler statistics (useful for detecting errors or timeouts), queryResultCache hit ratio (a low ratio indicates memory pressure or cache thrashing), and JVM heap/CPU metrics. This field is `null` within the response if the Solr metrics API is unavailable.
diff --git a/tests/test_service.py b/tests/test_service.py
index ece44c4d..2654c376 100644
--- a/tests/test_service.py
+++ b/tests/test_service.py
@@ -263,7 +263,7 @@ def test_only_taxa_queries():
 
 
 def test_status_shape():
-    """Verify /status returns expected fields including recent_queries and solr_metrics."""
+    """Verify /status returns expected fields including recent_queries; solr_metrics absent by default."""
     client = TestClient(app)
     response = client.get("/status")
     assert response.status_code == 200
@@ -278,9 +278,20 @@ def test_status_shape():
     assert 'mean_time_ms' in rq
     assert 'mean_solr_time_ms' in rq
 
+    # solr_metrics should not be present unless ?metrics=true is passed.
+    assert 'solr_metrics' not in data
+
+
+def test_status_metrics_param():
+    """With ?metrics=true, solr_metrics is included and has the expected structure."""
+    client = TestClient(app)
+    response = client.get("/status", params={'metrics': 'true'})
+    assert response.status_code == 200
+    data = response.json()
+
+    assert 'solr_metrics' in data
     # solr_metrics may be None if Solr's metrics API is unavailable, but if present
     # it must contain the expected structure.
-    assert 'solr_metrics' in data
     if data['solr_metrics'] is not None:
         sm = data['solr_metrics']
         assert 'query_handler' in sm

From e09415300c126eba09154f8ab0011b504432fa6b Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Tue, 7 Apr 2026 13:19:12 -0400
Subject: [PATCH 07/12] Update api/server.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 api/server.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/api/server.py b/api/server.py
index 994459e6..792e17b7 100755
--- a/api/server.py
+++ b/api/server.py
@@ -83,8 +83,10 @@ async def status(include_metrics: bool = False) -> Dict:
                 metrics_resp = await client.get(metrics_url, params=[
                     ('group', 'core'),
                     ('group', 'jvm'),
-                    ('prefix', 'QUERY./select,CACHE.core.queryResultCache'),
-                    ('prefix', 'memory.heap,os.processCpuLoad'),
+                    ('prefix', 'QUERY./select'),
+                    ('prefix', 'CACHE.core.queryResultCache'),
+                    ('prefix', 'memory.heap'),
+                    ('prefix', 'os.processCpuLoad'),
                     ('wt', 'json'),
                 ])
                 if metrics_resp.status_code < 300:

From 2e9bfa99e5f5512fb60259a4f4ee6b1b8768576d Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Tue, 7 Apr 2026 11:18:23 -0600
Subject: [PATCH 08/12] Cleaned up solr_metrics inclusion.

---
 api/server.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/api/server.py b/api/server.py
index 792e17b7..194e7549 100755
--- a/api/server.py
+++ b/api/server.py
@@ -77,7 +77,9 @@ async def status(include_metrics: bool = False) -> Dict:
         # A single call with group=core&group=jvm retrieves both in one round-trip.
         # Only performed when the caller passes ?metrics=true, as it adds latency.
         SOLR_CORE_NAME = 'name_lookup_shard1_replica_n1'
-        solr_metrics = None
+        solr_metrics = {
+            "message": "Use /status?metrics=true to retrieve these metrics."
+        }
         if include_metrics:
             try:
                 metrics_resp = await client.get(metrics_url, params=[
@@ -173,7 +175,7 @@ async def status(include_metrics: bool = False) -> Dict:
                 'mean_time_ms': sum(recent_query_times) / len(recent_query_times) if recent_query_times else None,
                 'mean_solr_time_ms': sum(recent_solr_times) / len(recent_solr_times) if recent_solr_times else None,
             },
-            **(({'solr_metrics': solr_metrics}) if include_metrics else {}),
+            solr_metrics: solr_metrics,
         }
     else:
         return {
@@ -187,6 +189,7 @@ async def status(include_metrics: bool = False) -> Dict:
                 'download_url': biolink_model_download_url,
             },
             'nameres_version': nameres_version,
+            solr_metrics: solr_metrics,
         }
 
 

From a4a1d1d93ea41e593f45b3264f49a8a40c95f1df Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Tue, 7 Apr 2026 11:23:02 -0600
Subject: [PATCH 09/12] Dedup SOLR_CORE_NAME.

---
 api/server.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/api/server.py b/api/server.py
index 194e7549..198184c4 100755
--- a/api/server.py
+++ b/api/server.py
@@ -36,8 +36,12 @@
     allow_headers=["*"],
 )
 
+# Solr core name for this application.
+SOLR_CORE_NAME = 'name_lookup_shard1_replica_n1'
+
 # We track the time taken for each Solr query for the last 1000 queries so we can track performance via /status.
-RECENT_TIMES_COUNT = int(os.getenv("RECENT_TIMES_COUNT", 1000))
+DEFAULT_RECENT_TIMES_COUNT = 1000
+RECENT_TIMES_COUNT = int(os.getenv("RECENT_TIMES_COUNT", DEFAULT_RECENT_TIMES_COUNT))
 recent_query_times = deque(maxlen=RECENT_TIMES_COUNT)
 recent_solr_times = deque(maxlen=RECENT_TIMES_COUNT)
 
@@ -76,7 +80,6 @@ async def status(include_metrics: bool = False) -> Dict:
         # Fetch Solr query handler, cache, and JVM metrics for strain detection.
         # A single call with group=core&group=jvm retrieves both in one round-trip.
         # Only performed when the caller passes ?metrics=true, as it adds latency.
-        SOLR_CORE_NAME = 'name_lookup_shard1_replica_n1'
         solr_metrics = {
             "message": "Use /status?metrics=true to retrieve these metrics."
         }
@@ -143,9 +146,9 @@ async def status(include_metrics: bool = False) -> Dict:
     if 'version' in app_info and app_info['version']:
         nameres_version = 'v' + app_info['version']
 
-    # We should have a status for name_lookup_shard1_replica_n1.
-    if 'status' in result and 'name_lookup_shard1_replica_n1' in result['status']:
-        core = result['status']['name_lookup_shard1_replica_n1']
+    # We should have a status for SOLR_CORE_NAME.
+    if 'status' in result and SOLR_CORE_NAME in result['status']:
+        core = result['status'][SOLR_CORE_NAME]
 
         index = {}
         if 'index' in core:
@@ -182,7 +185,7 @@ async def status(include_metrics: bool = False) -> Dict:
             'status': 'error',
             'message': 'Expected core not found.',
             'babel_version': babel_version,
-            'babel_version_url': babel_version_url,
+            'babel_version_urlg': babel_version_url,
             'biolink_model': {
                 'tag': biolink_model_tag,
                 'url': biolink_model_url,

From e8a4a00a5c807bed2eea6ae2d62571f44d2a5634 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Tue, 7 Apr 2026 11:26:20 -0600
Subject: [PATCH 10/12] Fix typo.

---
 api/server.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/api/server.py b/api/server.py
index 198184c4..5b34170d 100755
--- a/api/server.py
+++ b/api/server.py
@@ -178,7 +178,7 @@ async def status(include_metrics: bool = False) -> Dict:
                 'mean_time_ms': sum(recent_query_times) / len(recent_query_times) if recent_query_times else None,
                 'mean_solr_time_ms': sum(recent_solr_times) / len(recent_solr_times) if recent_solr_times else None,
             },
-            solr_metrics: solr_metrics,
+            'solr_metrics': solr_metrics,
         }
     else:
         return {
@@ -192,7 +192,7 @@ async def status(include_metrics: bool = False) -> Dict:
                 'download_url': biolink_model_download_url,
             },
             'nameres_version': nameres_version,
-            solr_metrics: solr_metrics,
+            'solr_metrics': solr_metrics,
         }
 
 

From 17c4107358d120532632eceba549796308cad141 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Tue, 7 Apr 2026 11:30:58 -0600
Subject: [PATCH 11/12] Attempt to reorganize and fix tests.

---
 tests/test_service.py | 50 -------------------------------------------
 tests/test_status.py  | 50 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/tests/test_service.py b/tests/test_service.py
index 2654c376..55c675fc 100644
--- a/tests/test_service.py
+++ b/tests/test_service.py
@@ -261,53 +261,3 @@ def test_only_taxa_queries():
     assert len(results_ftd_disease_with_only_taxon) == 1
     assert results_ftd_disease_with_only_taxon[0]['curie'] == 'MONDO:0010857'
 
-
-def test_status_shape():
-    """Verify /status returns expected fields including recent_queries; solr_metrics absent by default."""
-    client = TestClient(app)
-    response = client.get("/status")
-    assert response.status_code == 200
-    data = response.json()
-
-    assert data['status'] == 'ok'
-    assert 'numDocs' in data
-
-    # recent_queries should always be present; count/means are None before any queries.
-    rq = data['recent_queries']
-    assert 'count' in rq
-    assert 'mean_time_ms' in rq
-    assert 'mean_solr_time_ms' in rq
-
-    # solr_metrics should not be present unless ?metrics=true is passed.
-    assert 'solr_metrics' not in data
-
-
-def test_status_metrics_param():
-    """With ?metrics=true, solr_metrics is included and has the expected structure."""
-    client = TestClient(app)
-    response = client.get("/status", params={'metrics': 'true'})
-    assert response.status_code == 200
-    data = response.json()
-
-    assert 'solr_metrics' in data
-    # solr_metrics may be None if Solr's metrics API is unavailable, but if present
-    # it must contain the expected structure.
-    if data['solr_metrics'] is not None:
-        sm = data['solr_metrics']
-        assert 'query_handler' in sm
-        assert 'cache' in sm
-        assert 'jvm' in sm
-        assert 'requests' in sm['query_handler']
-        assert 'hitratio' in sm['cache']
-        assert 'heap_used_pct' in sm['jvm']
-
-
-def test_status_recent_queries_populated():
-    """After a lookup, recent_queries should reflect at least one recorded time."""
-    client = TestClient(app)
-    client.get("/lookup", params={'string': 'alzheimer'})
-    response = client.get("/status")
-    data = response.json()
-    assert data['recent_queries']['count'] >= 1
-    assert data['recent_queries']['mean_time_ms'] is not None
-    assert data['recent_queries']['mean_solr_time_ms'] is not None
\ No newline at end of file
diff --git a/tests/test_status.py b/tests/test_status.py
index b48ddc07..f6290a5e 100644
--- a/tests/test_status.py
+++ b/tests/test_status.py
@@ -27,3 +27,53 @@ def test_status():
     assert status['maxDoc'] == 89
     assert status['deletedDocs'] == 0
 
+
+def test_status_shape():
+    """Verify /status returns expected fields including recent_queries; solr_metrics absent by default."""
+    client = TestClient(app)
+    response = client.get("/status")
+    assert response.status_code == 200
+    data = response.json()
+
+    assert data['status'] == 'ok'
+    assert 'numDocs' in data
+
+    # recent_queries should always be present; count/means are None before any queries.
+    rq = data['recent_queries']
+    assert 'count' in rq
+    assert 'mean_time_ms' in rq
+    assert 'mean_solr_time_ms' in rq
+
+    # solr_metrics should be present but with only a message unless ?metrics=true is passed.
+    assert 'solr_metrics' in data and 'message' in data['solr_metrics']
+
+
+def test_status_metrics_param():
+    """With ?metrics=true, solr_metrics is included and has the expected structure."""
+    client = TestClient(app)
+    response = client.get("/status", params={'metrics': 'true'})
+    assert response.status_code == 200
+    data = response.json()
+
+    assert 'solr_metrics' in data
+    # solr_metrics may be None if Solr's metrics API is unavailable, but if present
+    # it must contain the expected structure.
+    if 'message' not in data['solr_metrics']:
+        sm = data['solr_metrics']
+        assert 'query_handler' in sm
+        assert 'cache' in sm
+        assert 'jvm' in sm
+        assert 'requests' in sm['query_handler']
+        assert 'hitratio' in sm['cache']
+        assert 'heap_used_pct' in sm['jvm']
+
+
+def test_status_recent_queries_populated():
+    """After a lookup, recent_queries should reflect at least one recorded time."""
+    client = TestClient(app)
+    client.get("/lookup", params={'string': 'alzheimer'})
+    response = client.get("/status")
+    data = response.json()
+    assert data['recent_queries']['count'] >= 1
+    assert data['recent_queries']['mean_time_ms'] is not None
+    assert data['recent_queries']['mean_solr_time_ms'] is not None

From 38677c309fb0913707002dccf2e5395ab0e981f7 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Tue, 7 Apr 2026 11:42:26 -0600
Subject: [PATCH 12/12] Improved recent_queries output.

---
 api/server.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/api/server.py b/api/server.py
index 5b34170d..00ae2de2 100755
--- a/api/server.py
+++ b/api/server.py
@@ -146,6 +146,14 @@ async def status(include_metrics: bool = False) -> Dict:
     if 'version' in app_info and app_info['version']:
         nameres_version = 'v' + app_info['version']
 
+    # Prepare recent times for reporting.
+    recent_queries = {
+        'max': RECENT_TIMES_COUNT,
+        'count': len(recent_query_times),
+        'mean_time_ms': sum(recent_query_times) / len(recent_query_times) if recent_query_times else None,
+        'mean_solr_time_ms': sum(recent_solr_times) / len(recent_solr_times) if recent_solr_times else None,
+    }
+
     # We should have a status for SOLR_CORE_NAME.
     if 'status' in result and SOLR_CORE_NAME in result['status']:
         core = result['status'][SOLR_CORE_NAME]
@@ -173,11 +181,7 @@ async def status(include_metrics: bool = False) -> Dict:
             'segmentCount': index.get('segmentCount', ''),
             'lastModified': index.get('lastModified', ''),
             'size': index.get('size', ''),
-            'recent_queries': {
-                'count': len(recent_query_times),
-                'mean_time_ms': sum(recent_query_times) / len(recent_query_times) if recent_query_times else None,
-                'mean_solr_time_ms': sum(recent_solr_times) / len(recent_solr_times) if recent_solr_times else None,
-            },
+            'recent_queries': recent_queries,
             'solr_metrics': solr_metrics,
         }
     else:
@@ -185,12 +189,13 @@ async def status(include_metrics: bool = False) -> Dict:
             'status': 'error',
             'message': 'Expected core not found.',
             'babel_version': babel_version,
-            'babel_version_urlg': babel_version_url,
+            'babel_version_url': babel_version_url,
             'biolink_model': {
                 'tag': biolink_model_tag,
                 'url': biolink_model_url,
                 'download_url': biolink_model_download_url,
             },
+            'recent_queries': recent_queries,
             'nameres_version': nameres_version,
             'solr_metrics': solr_metrics,
         }