Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions src/nodenorm/handlers/normalized_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ async def get_normalized_nodes(

async def create_normalized_node(
aggregate_node: NormalizedNode,
include_descriptions: bool = True,
include_descriptions: bool = False,
include_individual_types: bool = False,
conflations: dict = None,
) -> dict:
Expand Down Expand Up @@ -254,16 +254,19 @@ async def create_normalized_node(
else:
normal_node = {"id": {"identifier": aggregate_node.canonical_identifier}}

# if descriptions are enabled, look for the first available description and use that
# if descriptions are enabled, collect all available descriptions and use the first as the preferred one
if include_descriptions:
descriptions = list(
map(
lambda x: x[0],
filter(lambda x: len(x) > 0, [eid["d"] for eid in aggregate_node.identifiers if "d" in eid]),
)
descriptions = unique_list(
[
description
for identifier in aggregate_node.identifiers
for description in identifier.get("d", [])
if description
]
)
if len(descriptions) > 0:
normal_node["id"]["description"] = descriptions[0]
normal_node["descriptions"] = descriptions

# now need to reformat the identifier keys. It could be cleaner but we have to worry about if there is a label
normal_node["equivalent_identifiers"] = []
Expand Down
46 changes: 46 additions & 0 deletions tests/test_normalized_nodes_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ def load_normalized_nodes_module():
normalized_nodes = load_normalized_nodes_module()
_lookup_curie_metadata = normalized_nodes._lookup_curie_metadata
_lookup_equivalent_identifiers = normalized_nodes._lookup_equivalent_identifiers
create_normalized_node = normalized_nodes.create_normalized_node
NormalizedNode = normalized_nodes.NormalizedNode


class FakeAsyncElasticsearch:
Expand Down Expand Up @@ -78,6 +80,50 @@ def no_hit_response():
return {"hits": {"total": {"value": 0}, "hits": []}}


@pytest.mark.asyncio
async def test_create_normalized_node_aggregates_descriptions_when_requested():
node = NormalizedNode(
curie="NCIT:C34373",
canonical_identifier="MONDO:0004976",
preferred_label="amyotrophic lateral sclerosis",
information_content=74.9,
identifiers=[
{"i": "MONDO:0004976", "l": "amyotrophic lateral sclerosis", "d": ["first description"]},
{"i": "NCIT:C34373", "l": "Amyotrophic Lateral Sclerosis", "d": ["second description"]},
{"i": "UMLS:C0002736", "l": "Amyotrophic Lateral Sclerosis", "d": ["first description", ""]},
{"i": "MESH:D000690", "l": "Amyotrophic Lateral Sclerosis"},
],
types=["biolink:Disease"],
taxa=[],
)

response = await create_normalized_node(node, include_descriptions=True)

assert response["id"]["description"] == "first description"
assert response["descriptions"] == ["first description", "second description"]
assert response["equivalent_identifiers"][0]["description"] == "first description"
assert response["equivalent_identifiers"][1]["description"] == "second description"


@pytest.mark.asyncio
async def test_create_normalized_node_hides_descriptions_by_default():
node = NormalizedNode(
curie="NCIT:C34373",
canonical_identifier="MONDO:0004976",
preferred_label="amyotrophic lateral sclerosis",
information_content=74.9,
identifiers=[{"i": "MONDO:0004976", "l": "amyotrophic lateral sclerosis", "d": ["first description"]}],
types=["biolink:Disease"],
taxa=[],
)

response = await create_normalized_node(node)

assert "description" not in response["id"]
assert "descriptions" not in response
assert "description" not in response["equivalent_identifiers"][0]


@pytest.mark.asyncio
async def test_lookup_equivalent_identifiers_uses_shared_msearch_index():
namespace = fake_namespace([[hit_response("CHEBI:17310"), no_hit_response()]])
Expand Down