From 455a396ff29f2360f96913d8d55f3a8a28a995e5 Mon Sep 17 00:00:00 2001 From: Dave Thaler Date: Sat, 20 Jun 2026 14:47:54 +0000 Subject: [PATCH] Merge main into v7.1 (excluding extracted-files) --- .github/copilot-instructions.md | 13 +- .github/workflows/generate-files.yml | 2 +- .github/workflows/propagate-main-to-v7.1.yml | 2 +- .github/workflows/validate-yaml.yml | 2 +- LICENSE | 2 +- NOTICE | 2 +- README.md | 4 +- build/Makefile | 1 + build/extract-yaml.py | 8 +- build/hyperlink-code.py | 1 - build/hyperlink.py | 6 +- build/yaml-to-tsv.py | 11 +- changelog.md | 47 ++ specification/gedcom-0-introduction.md | 7 +- .../gedcom-1-hierarchical-container-format.md | 12 +- specification/gedcom-2-data-types.md | 4 +- .../gedcom-3-structures-1-organization.md | 212 ++++----- .../gedcom-3-structures-3-meaning.md | 434 ++++++++---------- .../gedcom-3-structures-4-enumerations.md | 203 ++++---- specification/gedcom-6-appendix-calendars.md | 6 +- 20 files changed, 469 insertions(+), 510 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 85d52c4f..e69e6d9c 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -37,7 +37,8 @@ make If the build completes but the `extracted-files/tags/` directory is empty, run the URI extraction manually: ```bash cd build -python3 uri-def.py ../specification/gedcom*.md ../extracted-files/tags +python3 extract-yaml.py --spec=../specification/ --dest=../extracted-files/ +python3 yaml-to-tsv.py --dest=../extracted-files/ ../extracted-files/tags ``` This command generates: @@ -94,7 +95,8 @@ mkdir -p ../extracted-files/tags make # If tags directory is empty, run URI extraction manually -python3 uri-def.py ../specification/gedcom*.md ../extracted-files/tags +python3 extract-yaml.py --spec=../specification/ --dest=../extracted-files/ +python3 yaml-to-tsv.py --dest=../extracted-files/ ../extracted-files/tags # Verify generated files exist ls -la ../specification/gedcom.html ../specification/gedcom.pdf @@ -117,7 +119,7 @@ The repository has automated workflows that run on pushes and pull requests: - Creates PRs with updated extracted files if changes detected - Uses commands: - `python3 extract-grammars.py ../specification/gedcom*.md ../extracted-files/` - - `python3 uri-def.py ../specification/gedcom*.md ../extracted-files/tags` + - `python3 extract-yaml.py --spec=../specification/ --dest=../extracted-files/` ## Repository Structure @@ -144,7 +146,8 @@ The repository has automated workflows that run on pushes and pull requests: - `hyperlink.py` - Adds hyperlinks to markdown - `hyperlink-code.py` - Adds hyperlinks to code blocks in HTML - `extract-grammars.py` - Extracts ABNF and structure grammars -- `uri-def.py` - Extracts tag definitions and generates YAML files +- `extract-yaml.py` - Extracts tag definitions and generates YAML files +- `yaml-to-tsv.py` - Extracts TSV files from YAML files - `push_to_gedcomio.py` - Uploads to gedcom.io (requires special access) ## Common Development Tasks @@ -195,4 +198,4 @@ If build fails: The build process emits CSS-related warnings from weasyprint - these are normal and documented. Only stop the build for actual errors, not warnings. ### File Publishing -Publishing to gedcom.io requires access to the separate GEDCOM.io repository and is not part of normal development workflows. \ No newline at end of file +Publishing to gedcom.io requires access to the separate GEDCOM.io repository and is not part of normal development workflows. diff --git a/.github/workflows/generate-files.yml b/.github/workflows/generate-files.yml index f37f134d..3a5bbd65 100644 --- a/.github/workflows/generate-files.yml +++ b/.github/workflows/generate-files.yml @@ -27,7 +27,7 @@ jobs: steps: - name: Check out GEDCOM - uses: actions/checkout@v6 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 - name: Get the branch name id: extract_branch diff --git a/.github/workflows/propagate-main-to-v7.1.yml b/.github/workflows/propagate-main-to-v7.1.yml index b17d02bc..c5f690c5 100644 --- a/.github/workflows/propagate-main-to-v7.1.yml +++ b/.github/workflows/propagate-main-to-v7.1.yml @@ -24,7 +24,7 @@ jobs: steps: - name: Check out GEDCOM - uses: actions/checkout@v6 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 - name: Set git config env: diff --git a/.github/workflows/validate-yaml.yml b/.github/workflows/validate-yaml.yml index 70e2fb92..89db1122 100644 --- a/.github/workflows/validate-yaml.yml +++ b/.github/workflows/validate-yaml.yml @@ -21,7 +21,7 @@ jobs: steps: - name: Checkout GEDCOM - uses: actions/checkout@v6 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 - name: Validate YAML run: yamllint . diff --git a/LICENSE b/LICENSE index 8aae145c..2455ff08 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 1984-2025 Intellectual Reserve, Inc. All rights reserved. A service provided by The Church of Jesus Christ of Latter-day Saints. + Copyright 1984-2026 Intellectual Reserve, Inc. All rights reserved. A service provided by The Church of Jesus Christ of Latter-day Saints. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/NOTICE b/NOTICE index d3423d84..576452cf 100644 --- a/NOTICE +++ b/NOTICE @@ -1,7 +1,7 @@ NOTICE: This work comprises, is based on, or is derived from the FAMILYSEARCH GEDCOM™ -Specification, © 1984-2025 Intellectual Reserve, Inc. All rights reserved. +Specification, © 1984-2026 Intellectual Reserve, Inc. All rights reserved. "FAMILYSEARCH GEDCOM™" and "FAMILYSEARCH®" are trademarks of Intellectual Reserve, Inc. and may not be used except as allowed by the Apache 2.0 license diff --git a/README.md b/README.md index f6893b50..1bd06d9c 100644 --- a/README.md +++ b/README.md @@ -15,13 +15,13 @@ If you are looking for FamilySearch's GEDCOM 5.5.1 Java parser, which previously - `specification/gedcom-`number`-`title`.md` files are the source documents used to define the FamilySearch GEDCOM specification. It is written in pandoc-flavor markdown and is intended to be more easily written than read. It is split into several files (ordered by the integer in their names) to facilitate comparing files. - In a local check-out, this is also where the build scripts place rendered files `gedcom.html` and `gedcom.pdf`; see [releases](releases/latest) for a pre-rendered copy of these. - [`specification/terms/`](specification/terms/) - - YAML files to be served in the namespace, augmenting those automatically extracted from the specification itself by [`build/uri-def.py`](build/uri-def.py). + - YAML files to be served in the namespace, augmenting those automatically extracted from the specification itself by [`build/extract-yaml.py`](build/extract-yaml.py). - [`build/`](build/) contains files needed to render the specification - See [`build/README.md`](build/) for more - [`extracted-files/`](extracted-files/) contains digested information automatically extracted from the specification. All files in this directory are automatically generated by scripts in the [`build/`](build/) directory. - [`extracted-files/grammar.abnf`](extracted-files/grammar.abnf) contains all the character-level ABNF for parsing lines and datatypes. - [`extracted-files/grammar.gedstruct`](extracted-files/grammar.gedstruct) contains a custom structure organization metasyntax. - - various `.tsv` files to assist automated validation of files, including: + - various `.tsv` files to assist automated validation of files, extracted from the YAML files by [`build/yaml-to-tsv.py`](build/yaml-to-tsv.py), including: - [`extracted-files/cardinalities.tsv`](extracted-files/cardinalities.tsv) with columns "superstructure type ID, substructure type ID, cardinality marker" - [`extracted-files/enumerations.tsv`](extracted-files/enumerations.tsv) with columns "superstructure type ID, enumeration string, enumeration ID" - [`extracted-files/payloads.tsv`](extracted-files/payloads.tsv) with columns "structure type ID, payload type" diff --git a/build/Makefile b/build/Makefile index 8b97cf85..0d010db0 100644 --- a/build/Makefile +++ b/build/Makefile @@ -21,6 +21,7 @@ $(HTML_FILE): hyperlink-code.py GEDCOM-tmp.html python3 hyperlink-code.py GEDCOM-tmp.html $(HTML_FILE) $(TAGDEFS): $(MD_FILES) $(TERMS_FILES) $(EXTDIR)grammar.gedstruct extract-yaml.py + mkdir -p $(TAGDEFS) python3 extract-yaml.py --spec=$(SPECDIR) --dest=$(EXTDIR) rsync -au $(TERMS_FILES) $(EXTDIR)tags python3 yaml-to-tsv.py --dest=$(EXTDIR) $(TAGDEFS) diff --git a/build/extract-yaml.py b/build/extract-yaml.py index 9aa656aa..1fd7cfad 100644 --- a/build/extract-yaml.py +++ b/build/extract-yaml.py @@ -157,12 +157,12 @@ def type_specific(self) -> list[str]: if val is None: ans.append(key+': null') elif val == [] or isinstance(val, bool): ans.append(key+': '+str(val).lower()) elif isinstance(val, str): - assert '"' not in val and '\n' not in val, f"Simplified serialization failed for {uri}'s {key}" + assert '"' not in val and '\n' not in val, f"Simplified serialization failed for {self.uri}'s {key}" ans.append(key+': "'+val+'"') else: entry = key+':' for v in (sorted(val) if key != 'months' else val): - assert '"' not in v and '\n' not in v, f"Simplified serialization failed for {uri}'s {key}" + assert '"' not in v and '\n' not in v, f"Simplified serialization failed for {self.uri}'s {key}" entry += '\n - "'+v+'"' ans.append(entry) @@ -392,7 +392,7 @@ def do_pfx(uri:str) -> str: if not uri.startswith('https://gedcom.io'): continue # not ours to define if uri not in data: data[uri] = Concept('data type', uri) data[uri].set('label', header) - if re.search(f'^{typename.replace(':','-')} +=', section, flags=re.M): + if re.search(f"^{typename.replace(':','-')} +=", section, flags=re.M): data[uri].set('abnf_production', typename.replace(':','-')) data[uri].spec.append(section) @@ -436,7 +436,7 @@ def do_pfx(uri:str) -> str: # step 1: read the files src_gedstruct = open(Path(args.dest, 'grammar.gedstruct')).read() - src_markdown = '\n\n'.join(open(s).read().replace('\xA0',' ') for s in args.spec.glob('gedcom*.md')) + src_markdown = '\n\n'.join(open(s).read().replace('\xA0',' ') for s in sorted(args.spec.glob('gedcom*.md'))) # step 2: find all tables and convert them to {section header: [{column header: column value}]} tables = all_tables(src_markdown) diff --git a/build/hyperlink-code.py b/build/hyperlink-code.py index e9fca01d..d8baaf7c 100644 --- a/build/hyperlink-code.py +++ b/build/hyperlink-code.py @@ -44,7 +44,6 @@ def anchorify(m): return full doc = re.sub(r'(g7:[^<]*)\1(g7.1:[^<]*)\1]*ged(?:struct|com)[^>]*>.*?)', doc, flags=re.DOTALL) diff --git a/build/hyperlink.py b/build/hyperlink.py index 1fe7a24e..4dd9abf4 100644 --- a/build/hyperlink.py +++ b/build/hyperlink.py @@ -20,10 +20,6 @@ def slugify(bit): si = bit.rfind('`g7:')+4 ei = bit.find('`', si) slug = bit[si:ei].replace('#','-') - elif '`g7.1:' in bit: - si = bit.rfind('`g7.1:')+6 - ei = bit.find('`', si) - slug = bit[si:ei].replace('#','-') elif '`' in bit: bit = re.search('`[A-Z0-9_`.]+`', bit) slug = bit.group(0).replace('`','').replace('.','-') @@ -91,7 +87,7 @@ def abnf(m): slug = table_tags[m.group(1)] return linkify(m.group(0), slug) return m.group(0) - uried = re.sub(r'(? NOTICE: > - > This work comprises, is based on, or is derived from the FAMILYSEARCH GEDCOM™ Specification, © 1984-2025 Intellectual Reserve, Inc. All rights reserved. + > This work comprises, is based on, or is derived from the FAMILYSEARCH GEDCOM™ Specification, © 1984-2026 Intellectual Reserve, Inc. All rights reserved. > > "FAMILYSEARCH GEDCOM™" and "FAMILYSEARCH®" are trademarks of Intellectual Reserve, Inc. and may not be used except as allowed by the Apache 2.0 license that governs this work or as expressly authorized in writing and in advance by Intellectual Reserve, Inc. ::: @@ -147,7 +147,6 @@ is shorthand for a URI beginning with the corresponding URI prefix | Short Prefix | URI Prefix | |:-------------|:------------------------------------| | `g7` | `https://gedcom.io/terms/v7/` | -| `g7.1` | `https://gedcom.io/terms/v7.1/` | | `xsd` | `http://www.w3.org/2001/XMLSchema#` | | `dcat` | `http://www.w3.org/ns/dcat#` | diff --git a/specification/gedcom-1-hierarchical-container-format.md b/specification/gedcom-1-hierarchical-container-format.md index 13fb90b0..fed052af 100644 --- a/specification/gedcom-1-hierarchical-container-format.md +++ b/specification/gedcom-1-hierarchical-container-format.md @@ -176,9 +176,9 @@ The tag `ADOP` is used in this document to represent two structure types. Which one is meant can be identified by the superstructure type as follows: | Superstructure type | Structure type identified by tag `ADOP` | -|---------------------|-----------------------------------------| -| `g7.1:record-INDI` | `g7:ADOP` | -| `g7:ADOP-FAMC` | `g7:FAMC-ADOP` | +|------------------|------------------| +| `g7:record-INDI` | `g7:ADOP` | +| `g7:ADOP-FAMC` | `g7:FAMC-ADOP` | An [extension-defined substructure](#extensions) could also be used to place either of these structure types in extension superstructures. @@ -319,7 +319,7 @@ Extensions cannot change existing meanings, cardinalities, or calendars. A **tagged extension structure** is a structure whose tag matches production `extTag`. Tagged extension structures may appear as records or substructures of any other structure. Their meaning is defined by their tag, as is discussed more fully in the section [Extension Tags]. Any substructure of a tagged extension structure that uses a tag matching `stdTag` is an **extension-defined substructure**. -Substructures of an extension-defined substructure that uses a tag matching `stdTag` are also extension-defined substructures, but this specification deprecates using a `stdTag` with a definition that does not match any standard type with that tag. +Substructures of an extension-defined substructure that uses a tag matching `stdTag` are also extension-defined substructures. The meaning and use of each extension-defined substructure is defined by the tagged extension structure it occurs within, not by its tag alone nor by this specification. :::example @@ -343,7 +343,9 @@ deprecated. - Even though both `DATE`s appear to have `g7:type-DATE` payloads, we can't know that is the intended data type without consulting the defining specifications of `_LOC` and `_POP`, respectively. The first might be a `g7:type-DATE#period` and the second a `g7:type-DATE#exact`, for example. ::: -If an extension-defined substructure has a tag that is also used by one or more standard structures, its meaning and payload type should match at least one of those standard structure types. +Extension-defined substructures should match the structure type, payload, and substructure collection of at least one +standard type with the same tag, though it can add more substructures to the substructure collection. +This specification deprecates using a `stdTag` with a definition that does not match any standard type with that tag. :::example An extension-defined substructure with tag "`DATE`" should provide a date or date period relevant to its superstructure, as do all `DATE`-tagged structures in this specification. Extensions should not use "`DATE`" to tag a structure describing anything else (even something that might reasonably be abbreviated "date", such as someone an individual dated). diff --git a/specification/gedcom-2-data-types.md b/specification/gedcom-2-data-types.md index 08afb038..880aac37 100644 --- a/specification/gedcom-2-data-types.md +++ b/specification/gedcom-2-data-types.md @@ -380,7 +380,7 @@ rather, they are used as machine-readable identifiers with formally-defined mean The payload is a "URI Reference" as defined in [RFC 3986 section 4.1](https://www.rfc-editor.org/rfc/rfc3986#section-4.1) with ABNF production `URI-reference`. The URI Reference is a more restrictive syntax than the URL Strings permitted by the [File Path] data type, -faciltiating easier automated equality tests between URIs. +facilitating easier automated equality tests between URIs. Relative URIs should be avoided in datasets that are expected to be shared on the web or with unknown parties, but may be appropriate for close collaboration between parties with a shared base URI. @@ -433,7 +433,7 @@ Minutes and seconds are not used and should be converted to fractional degrees p The number of degrees is limited by definition to be between 0 (the prime meridian) and 180 (the 180th meridian). ```abnf -Longitude = ("N" / "S") upto180 [ "." 1*digit] +Longitude = ("E" / "W") upto180 [ "." 1*digit] upto180 = "180" / "1" upto7 digit / [["0"] digit] digit upto7 = "0" / "1" / "2" / "3" / "4" / "5" / "6" / "7" ``` diff --git a/specification/gedcom-3-structures-1-organization.md b/specification/gedcom-3-structures-1-organization.md index 83445db7..69a1e093 100644 --- a/specification/gedcom-3-structures-1-organization.md +++ b/specification/gedcom-3-structures-1-organization.md @@ -70,7 +70,7 @@ The intent of this metasyntax is to resemble the line encoding of allowable stru - `@@` means a pointer to a structure with this cross-reference template; `@VOID@` is also permitted. - `<`data type`>` means a non-pointer payload, as described in [Data types](#datatypes). If the data type allows the empty string, the payload may be omitted. - - `[`text`|]` means the payload is optional but if present must be the given text. + - `[`text`|]` means the payload is optional but if present must be the given text. The `[`text`|]` payload descriptor is only used in this version of the specification for `[Y|]` in event structures, as explained in [Events](#events). If there is a payload descriptor, a payload that matches the payload is required of the described structure unless the descriptor says the payload is optional. @@ -129,7 +129,7 @@ n <> {1:1} #### `HEADER` := ```gedstruct -n HEAD {1:1} g7.1:HEAD +n HEAD {1:1} g7:HEAD +1 GEDC {1:1} g7:GEDC +2 VERS {1:1} g7:GEDC-VERS +1 SCHMA {0:1} g7:SCHMA @@ -150,20 +150,12 @@ n HEAD {1:1} g7.1:HEAD +1 DEST {0:1} g7:DEST +1 DATE {0:1} g7:HEAD-DATE +2 TIME