diff --git a/src/nodenorm/handlers/normalized_nodes.py b/src/nodenorm/handlers/normalized_nodes.py index 8da380d..64df0e8 100644 --- a/src/nodenorm/handlers/normalized_nodes.py +++ b/src/nodenorm/handlers/normalized_nodes.py @@ -201,7 +201,7 @@ async def get_normalized_nodes( async def create_normalized_node( aggregate_node: NormalizedNode, - include_descriptions: bool = True, + include_descriptions: bool = False, include_individual_types: bool = False, conflations: dict = None, ) -> dict: @@ -254,16 +254,19 @@ async def create_normalized_node( else: normal_node = {"id": {"identifier": aggregate_node.canonical_identifier}} - # if descriptions are enabled, look for the first available description and use that + # if descriptions are enabled, collect all available descriptions and use the first as the preferred one if include_descriptions: - descriptions = list( - map( - lambda x: x[0], - filter(lambda x: len(x) > 0, [eid["d"] for eid in aggregate_node.identifiers if "d" in eid]), - ) + descriptions = unique_list( + [ + description + for identifier in aggregate_node.identifiers + for description in identifier.get("d", []) + if description + ] ) if len(descriptions) > 0: normal_node["id"]["description"] = descriptions[0] + normal_node["descriptions"] = descriptions # now need to reformat the identifier keys. It could be cleaner but we have to worry about if there is a label normal_node["equivalent_identifiers"] = [] diff --git a/tests/test_normalized_nodes_lookup.py b/tests/test_normalized_nodes_lookup.py index 118eecf..418726f 100644 --- a/tests/test_normalized_nodes_lookup.py +++ b/tests/test_normalized_nodes_lookup.py @@ -41,6 +41,8 @@ def load_normalized_nodes_module(): normalized_nodes = load_normalized_nodes_module() _lookup_curie_metadata = normalized_nodes._lookup_curie_metadata _lookup_equivalent_identifiers = normalized_nodes._lookup_equivalent_identifiers +create_normalized_node = normalized_nodes.create_normalized_node +NormalizedNode = normalized_nodes.NormalizedNode class FakeAsyncElasticsearch: @@ -78,6 +80,50 @@ def no_hit_response(): return {"hits": {"total": {"value": 0}, "hits": []}} +@pytest.mark.asyncio +async def test_create_normalized_node_aggregates_descriptions_when_requested(): + node = NormalizedNode( + curie="NCIT:C34373", + canonical_identifier="MONDO:0004976", + preferred_label="amyotrophic lateral sclerosis", + information_content=74.9, + identifiers=[ + {"i": "MONDO:0004976", "l": "amyotrophic lateral sclerosis", "d": ["first description"]}, + {"i": "NCIT:C34373", "l": "Amyotrophic Lateral Sclerosis", "d": ["second description"]}, + {"i": "UMLS:C0002736", "l": "Amyotrophic Lateral Sclerosis", "d": ["first description", ""]}, + {"i": "MESH:D000690", "l": "Amyotrophic Lateral Sclerosis"}, + ], + types=["biolink:Disease"], + taxa=[], + ) + + response = await create_normalized_node(node, include_descriptions=True) + + assert response["id"]["description"] == "first description" + assert response["descriptions"] == ["first description", "second description"] + assert response["equivalent_identifiers"][0]["description"] == "first description" + assert response["equivalent_identifiers"][1]["description"] == "second description" + + +@pytest.mark.asyncio +async def test_create_normalized_node_hides_descriptions_by_default(): + node = NormalizedNode( + curie="NCIT:C34373", + canonical_identifier="MONDO:0004976", + preferred_label="amyotrophic lateral sclerosis", + information_content=74.9, + identifiers=[{"i": "MONDO:0004976", "l": "amyotrophic lateral sclerosis", "d": ["first description"]}], + types=["biolink:Disease"], + taxa=[], + ) + + response = await create_normalized_node(node) + + assert "description" not in response["id"] + assert "descriptions" not in response + assert "description" not in response["equivalent_identifiers"][0] + + @pytest.mark.asyncio async def test_lookup_equivalent_identifiers_uses_shared_msearch_index(): namespace = fake_namespace([[hit_response("CHEBI:17310"), no_hit_response()]])