Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
e88ad80
biolog templates
realmarcin Mar 4, 2025
c200198
Add py classes for biolog templates
caufieldjh Mar 4, 2025
ac0db1c
Import core NamedEntity and use example for paper title
caufieldjh Mar 4, 2025
0c81c17
Fix prompt example
caufieldjh Mar 4, 2025
59b22a6
Extract full author list
caufieldjh Mar 4, 2025
0bb91e3
Restructure experiments slot
caufieldjh Mar 4, 2025
f694b9c
Tuning slots in Experiment class
caufieldjh Mar 4, 2025
7a6d63b
Tuning for subclasses
caufieldjh Mar 4, 2025
0b60a23
Add description field to Experiment
caufieldjh Mar 4, 2025
098bf6e
gpt 4.5 preview
realmarcin Mar 5, 2025
7441b0c
gpt 4.5 preview
realmarcin Mar 5, 2025
89d76f4
adding METPO annotator
realmarcin Mar 19, 2025
faeaa6b
Fixes for the table extractions
caufieldjh Mar 19, 2025
0b33b52
Refine organism extraction; add slot for group name
caufieldjh Mar 19, 2025
815a976
Add trait extraction
caufieldjh Mar 19, 2025
4f2674e
Add traits extraction to study and experiment
caufieldjh Mar 19, 2025
014c1d5
Adjust how METPO is used to annotate traits
caufieldjh Mar 19, 2025
5d8ee7d
Prompt tuning in plate name extraction
caufieldjh Mar 19, 2025
b115fb3
Expand protocol step extraction; move culture conditions into step
caufieldjh Mar 19, 2025
9c685e1
growth experiment
realmarcin Mar 25, 2025
53513e0
Schema tuning
caufieldjh Mar 25, 2025
81eeadf
Further prompt tuning
caufieldjh Mar 25, 2025
2d09094
Further tweaks for more consistent parsing
caufieldjh Mar 25, 2025
7a70fb6
biolog PM01
realmarcin Apr 1, 2025
468d0d4
adding ethics and data governance module
realmarcin Apr 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,772 changes: 3,772 additions & 0 deletions fmicb-12-651282__ontogpt_gpt4.5preview.txt

Large diffs are not rendered by default.

3,772 changes: 3,772 additions & 0 deletions fmicb-12-651282__ontogpt_gpto4.5preview.txt

Large diffs are not rendered by default.

845 changes: 845 additions & 0 deletions src/ontogpt/templates/biolog.py

Large diffs are not rendered by default.

560 changes: 560 additions & 0 deletions src/ontogpt/templates/biolog.yaml

Large diffs are not rendered by default.

585 changes: 585 additions & 0 deletions src/ontogpt/templates/biolog_PM01.yaml

Large diffs are not rendered by default.

124 changes: 124 additions & 0 deletions src/ontogpt/templates/biolog_simple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
from __future__ import annotations

import re
import sys
from datetime import (
date,
datetime,
time
)
from decimal import Decimal
from enum import Enum
from typing import (
Any,
ClassVar,
Dict,
List,
Literal,
Optional,
Union
)

from pydantic import (
BaseModel,
ConfigDict,
Field,
RootModel,
field_validator
)


metamodel_version = "None"
version = "0.1.0"


class ConfiguredBaseModel(BaseModel):
model_config = ConfigDict(
validate_assignment = True,
validate_default = True,
extra = "forbid",
arbitrary_types_allowed = True,
use_enum_values = True,
strict = False,
)
pass




class LinkMLMeta(RootModel):
root: Dict[str, Any] = {}
model_config = ConfigDict(frozen=True)

def __getattr__(self, key:str):
return getattr(self.root, key)

def __getitem__(self, key:str):
return self.root[key]

def __setitem__(self, key:str, value):
self.root[key] = value

def __contains__(self, key:str) -> bool:
return key in self.root


linkml_meta = LinkMLMeta({'default_prefix': 'https://example.org/PaperExtractionSchema/',
'description': 'A simplified schema describing the fields to extract from a '
'paper that includes study metadata and a minimal notion of '
'experiments.\n',
'id': 'https://example.org/PaperExtractionSchema',
'imports': ['linkml:types'],
'name': 'PaperExtractionSchema',
'prefixes': {'linkml': {'prefix_prefix': 'linkml',
'prefix_reference': 'https://w3id.org/linkml/'},
'xsd': {'prefix_prefix': 'xsd',
'prefix_reference': 'http://www.w3.org/2001/XMLSchema#'}},
'source_file': 'src/ontogpt/templates/biolog_simple.yaml',
'title': 'Paper Extraction Schema',
'types': {'boolean': {'base': 'bool',
'from_schema': 'https://example.org/PaperExtractionSchema',
'name': 'boolean'},
'string': {'base': 'str',
'from_schema': 'https://example.org/PaperExtractionSchema',
'name': 'string'}}} )


class Paper(ConfiguredBaseModel):
"""
Top-level class representing a single paper/study.
"""
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://example.org/PaperExtractionSchema', 'tree_root': True})

study_title: str = Field(default=..., description="""Title of the study.""", json_schema_extra = { "linkml_meta": {'alias': 'study_title', 'domain_of': ['Paper']} })
authors: List[Author] = Field(default=..., description="""List of authors of the paper.""", json_schema_extra = { "linkml_meta": {'alias': 'authors', 'domain_of': ['Paper']} })
doi: Optional[str] = Field(default=None, description="""DOI of the publication.""", json_schema_extra = { "linkml_meta": {'alias': 'doi', 'domain_of': ['Paper']} })
date: Optional[str] = Field(default=None, description="""Date of the publication or the study.""", json_schema_extra = { "linkml_meta": {'alias': 'date', 'domain_of': ['Paper']} })
experiments: Optional[List[Experiment]] = Field(default=None, description="""List of experiments mentioned in the paper.""", json_schema_extra = { "linkml_meta": {'alias': 'experiments', 'domain_of': ['Paper']} })


class Author(ConfiguredBaseModel):
"""
Represents an author of the paper.
"""
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://example.org/PaperExtractionSchema'})

name: str = Field(default=..., description="""Name of the author.""", json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['Author']} })


class Experiment(ConfiguredBaseModel):
"""
A minimal experiment class (could be Biolog or otherwise).
"""
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://example.org/PaperExtractionSchema'})

experiment_title: Optional[str] = Field(default=None, description="""Title or short description of the experiment.""", json_schema_extra = { "linkml_meta": {'alias': 'experiment_title', 'domain_of': ['Experiment']} })
is_biolog_experiment: Optional[bool] = Field(default=None, description="""Flag indicating if this experiment is a Biolog experiment.""", json_schema_extra = { "linkml_meta": {'alias': 'is_biolog_experiment', 'domain_of': ['Experiment']} })


# Model rebuild
# see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model
Paper.model_rebuild()
Author.model_rebuild()
Experiment.model_rebuild()

98 changes: 98 additions & 0 deletions src/ontogpt/templates/biolog_simple.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
id: https://example.org/PaperExtractionSchema
name: PaperExtractionSchema
title: Paper Extraction Schema
description: >
A simplified schema describing the fields to extract from a paper that includes
study metadata and a minimal notion of experiments.
version: 0.1.0

prefixes:
linkml: "https://w3id.org/linkml/"
xsd: "http://www.w3.org/2001/XMLSchema#"

imports:
- linkml:types

types:
string:
base: str
boolean:
base: bool

########################
# CLASSES
########################

classes:
# The top-level class (root) for the paper
Paper:
description: "Top-level class representing a single paper/study."
tree_root: true
slots:
- study_title
- authors
- doi
- date
- experiments

Author:
description: "Represents an author of the paper."
slots:
- name

Experiment:
description: "A minimal experiment class (could be Biolog or otherwise)."
slots:
- experiment_title
- is_biolog_experiment

########################
# SLOTS
########################

slots:
# Paper-level slots
study_title:
description: "Title of the study."
range: string
required: true

authors:
description: "List of authors of the paper."
range: Author
multivalued: true
required: true

doi:
description: "DOI of the publication."
range: string
required: false

date:
description: "Date of the publication or the study."
range: string
required: false

experiments:
description: "List of experiments mentioned in the paper."
range: Experiment
multivalued: true
required: false

# Author-level slot
name:
description: "Name of the author."
range: string
required: true

# Experiment-level slots
experiment_title:
description: "Title or short description of the experiment."
range: string
required: false

is_biolog_experiment:
description: "Flag indicating if this experiment is a Biolog experiment."
range: boolean
required: false

Loading