Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
3d37f2e
Renamed LOGGER to logger.
gaurav Dec 18, 2025
4d271de
Added logging to /synonyms API endpoint.
gaurav Dec 18, 2025
096f79c
Added logging to Lookup.
gaurav Dec 18, 2025
923e079
Added logging for bulk_lookup().
gaurav Dec 18, 2025
c8ae693
Added NameRes version to /status.
gaurav Feb 20, 2026
bc02650
Added BIOLINK_MODEL_TAG.
gaurav Feb 20, 2026
9c255ec
Added basic tests for /status.
gaurav Feb 20, 2026
27f9f4b
Improved tests.
gaurav Feb 20, 2026
d4d23b1
Attempted to fix issues.
gaurav Feb 20, 2026
b19316c
Fixed key name.
gaurav Feb 20, 2026
7f02500
Update api/server.py
gaurav Feb 20, 2026
3a2bbc9
Update api/server.py
gaurav Feb 20, 2026
f7267dc
Added a "recent times" that allows us to track query times.
gaurav Dec 18, 2025
9c89abf
Improved name.
gaurav Dec 18, 2025
a0707a6
Apply suggestion from @gaurav
gaurav Apr 7, 2026
a3e00f8
Add Solr performance diagnostics to /status and logging
gaurav Apr 7, 2026
fc168b8
Added on:pull_request trigger for testing.
gaurav Apr 7, 2026
3979968
Add query rate estimation to /status
gaurav Apr 7, 2026
688737e
Incremented version to v1.5.2.
gaurav Apr 7, 2026
a05e4b8
Group Solr metrics under a single 'solr' key in /status
gaurav Apr 7, 2026
c9367b6
Merge query_log and expand rate statistics in /status
gaurav Apr 7, 2026
a211880
Remove recent_times_ms from /status response
gaurav Apr 7, 2026
66b5654
Fix negative inter_arrival_ms from out-of-order log entries
gaurav Apr 7, 2026
03672b0
Fix three bugs identified in PR review
gaurav Apr 7, 2026
9fcb4e8
Add physical_memory_used_pct to /status Solr OS metrics
gaurav Apr 8, 2026
c9fc44c
Extract Solr logic from server.py into api/solr.py with OO design
gaurav Apr 8, 2026
3624ca6
Added on:pull_request trigger for testing.
gaurav Apr 8, 2026
f1d3b2e
Add latency bucket proportions to recent_queries in /status
gaurav Apr 8, 2026
82e9e1a
Add ?full=true parameter to /status to reduce Solr load
gaurav Apr 8, 2026
36c7f11
Document ?full query parameter in FastAPI schema for /status
gaurav Apr 8, 2026
ddcd29e
Simplify latency bucket counting: single pass + named constants
gaurav Apr 8, 2026
85cb011
Fixed bug: changed default infores to NameRes, not NodeNorm.
gaurav Apr 8, 2026
05e0a0a
Fix doc inaccuracies found during sync-docs audit
gaurav Apr 8, 2026
da3f360
Replaced time_ns() with perf_counter_ns().
gaurav Apr 8, 2026
9a805d8
Removed on:pull_request trigger after testing.
gaurav Apr 8, 2026
ba2a9b9
Tweaked tests to get them to pass.
gaurav Apr 8, 2026
0e9b80e
Oops, messed with the wrong test.
gaurav Apr 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 75 additions & 15 deletions api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@
* Matching names are returned first, followed by non-matching names
"""
import json
import logging, warnings
import logging
import time
import warnings
import os
import re
from collections import deque
from typing import Dict, List, Union, Annotated, Optional

from fastapi import Body, FastAPI, Query
Expand All @@ -19,13 +22,14 @@
from pydantic import BaseModel, conint, Field
from starlette.middleware.cors import CORSMiddleware

from .apidocs import get_app_info, construct_open_api_schema
from api.apidocs import get_app_info, construct_open_api_schema
Comment thread
gaurav marked this conversation as resolved.
Outdated

LOGGER = logging.getLogger(__name__)
SOLR_HOST = os.getenv("SOLR_HOST", "localhost")
SOLR_PORT = os.getenv("SOLR_PORT", "8983")

app = FastAPI(**get_app_info())
logger = logging.getLogger(__name__)
logging.basicConfig(level=os.getenv("LOGLEVEL", logging.INFO))
Comment thread
gaurav marked this conversation as resolved.

app.add_middleware(
CORSMiddleware,
Expand All @@ -35,6 +39,10 @@
allow_headers=["*"],
)

# We track the time taken for each Solr query for the last 1000 queries so we can track performance via /status.
RECENT_TIMES_COUNT = os.getenv("RECENT_TIMES_COUNT", 1000)
recent_query_times = deque(maxlen=RECENT_TIMES_COUNT)

# ENDPOINT /
# If someone tries accessing /, we should redirect them to the Swagger interface.
@app.get("/", include_in_schema=False)
Expand Down Expand Up @@ -63,14 +71,26 @@ async def status() -> Dict:
'action': 'STATUS'
})
if response.status_code >= 300:
LOGGER.error("Solr error on accessing /solr/admin/cores?action=STATUS: %s", response.text)
logger.error("Solr error on accessing /solr/admin/cores?action=STATUS: %s", response.text)
response.raise_for_status()
result = response.json()

# Do we know the Babel version and version URL? It will be stored in an environmental variable if we do.
babel_version = os.environ.get("BABEL_VERSION", "unknown")
babel_version_url = os.environ.get("BABEL_VERSION_URL", "")

# Look up the BIOLINK_MODEL_TAG.
# Note: this should be a tag from the Biolink Model repo, e.g. "master" or "v4.3.6".
biolink_model_tag = os.environ.get("BIOLINK_MODEL_TAG", "master")
biolink_model_url = f"https://github.com/biolink/biolink-model/tree/{biolink_model_tag}"
biolink_model_download_url = f"https://raw.githubusercontent.com/biolink/biolink-model/{biolink_model_tag}/biolink-model.yaml"

# Figure out the NameRes version.
nameres_version = "master"
app_info = get_app_info()
if 'version' in app_info and app_info['version']:
nameres_version = 'v' + app_info['version']

# We should have a status for name_lookup_shard1_replica_n1.
if 'status' in result and 'name_lookup_shard1_replica_n1' in result['status']:
core = result['status']['name_lookup_shard1_replica_n1']
Expand All @@ -84,6 +104,12 @@ async def status() -> Dict:
'message': 'Reporting results from primary core.',
'babel_version': babel_version,
'babel_version_url': babel_version_url,
'biolink_model': {
'tag': biolink_model_tag,
'url': biolink_model_url,
'download_url': biolink_model_download_url,
},
'nameres_version': nameres_version,
'startTime': core['startTime'],
'numDocs': index.get('numDocs', ''),
'maxDoc': index.get('maxDoc', ''),
Expand All @@ -92,11 +118,24 @@ async def status() -> Dict:
'segmentCount': index.get('segmentCount', ''),
'lastModified': index.get('lastModified', ''),
'size': index.get('size', ''),
'recent_queries': {
'count': len(recent_query_times),
'mean_time_ms': sum(recent_query_times) / len(recent_query_times) if recent_query_times else -1,
'recent_times_ms': list(recent_query_times),
}
}
else:
return {
'status': 'error',
'message': 'Expected core not found.'
'message': 'Expected core not found.',
'babel_version': babel_version,
'babel_version_url': babel_version_url,
'biolink_model': {
'tag': biolink_model_tag,
'url': biolink_model_url,
'download_url': biolink_model_download_url,
},
'nameres_version': nameres_version,
}


Expand Down Expand Up @@ -125,7 +164,7 @@ async def reverse_lookup_get(
)
) -> Dict[str, Dict]:
"""Returns a list of synonyms for a particular CURIE."""
return await reverse_lookup(curies)
return await curie_lookup(curies)


@app.get(
Expand All @@ -135,14 +174,14 @@ async def reverse_lookup_get(
response_model=Dict[str, Dict],
tags=["lookup"],
)
async def lookup_names_get(
async def synonyms_get(
preferred_curies: List[str]= Query(
example=["MONDO:0005737", "MONDO:0009757"],
description="A list of CURIEs to look up synonyms for."
)
) -> Dict[str, Dict]:
"""Returns a list of synonyms for a particular CURIE."""
return await reverse_lookup(preferred_curies)
return await curie_lookup(preferred_curies)


@app.post(
Expand All @@ -159,7 +198,7 @@ async def lookup_names_post(
}),
) -> Dict[str, Dict]:
"""Returns a list of synonyms for a particular CURIE."""
return await reverse_lookup(request.curies)
return await curie_lookup(request.curies)


@app.post(
Expand All @@ -169,17 +208,18 @@ async def lookup_names_post(
response_model=Dict[str, Dict],
tags=["lookup"],
)
async def lookup_names_post(
async def synonyms_post(
request: SynonymsRequest = Body(..., example={
"preferred_curies": ["MONDO:0005737", "MONDO:0009757"],
}),
) -> Dict[str, Dict]:
"""Returns a list of synonyms for a particular CURIE."""
return await reverse_lookup(request.preferred_curies)
return await curie_lookup(request.preferred_curies)


async def reverse_lookup(curies) -> Dict[str, Dict]:
async def curie_lookup(curies) -> Dict[str, Dict]:
"""Returns a list of synonyms for a particular CURIE."""
time_start = time.time_ns()
query = f"http://{SOLR_HOST}:{SOLR_PORT}/solr/name_lookup/select"
curie_filter = " OR ".join(
f"curie:\"{curie}\""
Expand All @@ -199,6 +239,10 @@ async def reverse_lookup(curies) -> Dict[str, Dict]:
}
for doc in response_json["response"]["docs"]:
output[doc["curie"]] = doc
time_end = time.time_ns()

logger.info(f"CURIE Lookup on {len(curies)} CURIEs {json.dumps(curies)} took {(time_end - time_start)/1_000_000:.2f}ms")

return output

class LookupResult(BaseModel):
Expand Down Expand Up @@ -351,6 +395,8 @@ async def lookup(string: str,
will be returned, rather than filtering to concepts that are both PhenotypicFeature and Disease.
"""

time_start = time.time_ns()
Comment thread
gaurav marked this conversation as resolved.
Outdated

# First, we strip and lowercase the query since all our indexes are case-insensitive.
string_lc = string.strip().lower()

Expand Down Expand Up @@ -459,16 +505,18 @@ async def lookup(string: str,
"fields": "*, score",
"params": inner_params,
}
logging.debug(f"Query: {json.dumps(params, indent=2)}")
logger.debug(f"Query: {json.dumps(params, indent=2)}")

time_solr_start = time.time_ns()
query_url = f"http://{SOLR_HOST}:{SOLR_PORT}/solr/name_lookup/select"
async with httpx.AsyncClient(timeout=None) as client:
response = await client.post(query_url, json=params)
if response.status_code >= 300:
LOGGER.error("Solr REST error: %s", response.text)
logger.error("Solr REST error: %s", response.text)
response.raise_for_status()
response = response.json()
logging.debug(f"Solr response: {json.dumps(response, indent=2)}")
time_solr_end = time.time_ns()
logger.debug(f"Solr response: {json.dumps(response, indent=2)}")

# Associate highlighting information with search results.
highlighting_response = response.get("highlighting", {})
Expand Down Expand Up @@ -511,6 +559,14 @@ async def lookup(string: str,
clique_identifier_count=doc.get("clique_identifier_count", 0),
types=[f"biolink:{d}" for d in doc.get("types", [])]))

time_end = time.time_ns()
time_taken_ms = (time_end - time_start)/1_000_000
recent_query_times.append(time_taken_ms)
logger.info(f"Lookup query to Solr for {json.dumps(string)} " +
f"(autocomplete={autocomplete}, highlighting={highlighting}, offset={offset}, limit={limit}, biolink_types={biolink_types}, only_prefixes={only_prefixes}, exclude_prefixes={exclude_prefixes}, only_taxa={only_taxa}) "
f"took {time_taken_ms:.2f}ms (with {(time_solr_end - time_solr_start)/1_000_000:.2f}ms waiting for Solr)"
)

return outputs

## BULK ENDPOINT
Expand Down Expand Up @@ -579,6 +635,7 @@ class NameResQuery(BaseModel):
tags=["lookup"]
)
async def bulk_lookup(query: NameResQuery) -> Dict[str, List[LookupResult]]:
time_start = time.time_ns()
result = {}
for string in query.strings:
result[string] = await lookup(
Expand All @@ -591,6 +648,9 @@ async def bulk_lookup(query: NameResQuery) -> Dict[str, List[LookupResult]]:
query.only_prefixes,
query.exclude_prefixes,
query.only_taxa)
time_end = time.time_ns()
logger.info(f"Bulk lookup query for {len(query.strings)} strings ({query}) took {(time_end - time_start)/1_000_000:.2f}ms")

return result


Expand Down
29 changes: 29 additions & 0 deletions tests/test_status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import logging

from api.server import app
from fastapi.testclient import TestClient

# Turn on debugging for tests.
logging.basicConfig(level=logging.DEBUG)

def test_status():
client = TestClient(app)
response = client.get("/status")
status = response.json()

assert status['status'] == 'ok'
assert status['message'] != ''
assert 'babel_version' in status
assert 'babel_version_url' in status
assert 'biolink_model' in status
assert 'tag' in status['biolink_model']
assert 'nameres_version' in status
assert status['version'] > 1
assert status['size'] != ''
assert status['startTime']

# Count the specific number of test documents we load.
assert status['numDocs'] == 89
assert status['maxDoc'] == 89
assert status['deletedDocs'] == 0
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test asserts legacy top-level /status fields (e.g., numDocs/maxDoc/deletedDocs/version/size/startTime). In the updated /status response those fields are nested under the solr key, so these assertions will fail. Update the test to read from status['solr'][...] (and adjust the version assertion accordingly).

Suggested change
assert status['version'] > 1
assert status['size'] != ''
assert status['startTime']
# Count the specific number of test documents we load.
assert status['numDocs'] == 89
assert status['maxDoc'] == 89
assert status['deletedDocs'] == 0
assert 'solr' in status
assert status['solr']['version'] != ''
assert status['solr']['size'] != ''
assert status['solr']['startTime']
# Count the specific number of test documents we load.
assert status['solr']['numDocs'] == 89
assert status['solr']['maxDoc'] == 89
assert status['solr']['deletedDocs'] == 0

Copilot uses AI. Check for mistakes.

Loading