diff --git a/gnomad_qc/federated/__init__.py b/gnomad_qc/federated/__init__.py
new file mode 100644
index 000000000..2746f4ac1
--- /dev/null
+++ b/gnomad_qc/federated/__init__.py
@@ -0,0 +1 @@
+# noqa: D104X
diff --git a/gnomad_qc/v5/configs/__init__.py b/gnomad_qc/federated/configs/__init__.py
similarity index 100%
rename from gnomad_qc/v5/configs/__init__.py
rename to gnomad_qc/federated/configs/__init__.py
diff --git a/gnomad_qc/v5/configs/validity_inputs_config.json b/gnomad_qc/federated/configs/validity_inputs_config.json
similarity index 75%
rename from gnomad_qc/v5/configs/validity_inputs_config.json
rename to gnomad_qc/federated/configs/validity_inputs_config.json
index ae54ceb5e..0d2be12cf 100644
--- a/gnomad_qc/v5/configs/validity_inputs_config.json
+++ b/gnomad_qc/federated/configs/validity_inputs_config.json
@@ -1,6 +1,5 @@
 {
-    "missingness_threshold": 0.5,
-    "struct_annotations_for_missingness": [],
+    "struct_annotations_to_skip_missingness": ["vep"],
     "freq_fields": {"freq": "freq", "freq_meta": "freq_meta", "freq_meta_sample_count": "freq_meta_sample_count"},
     "faf_fields": {},
     "freq_annotations_to_sum": ["AC", "AN", "homozygote_count"],
@@ -8,5 +7,6 @@
     "nhomalt_metric": "homozygote_count",
     "subsets": [""],
     "variant_filter_field": "",
-    "check_mono_and_only_het" : true
+    "data_type": "genomes",
+    "check_mono_and_only_het": true
 }
diff --git a/gnomad_qc/v5/configs/validity_inputs_schema.py b/gnomad_qc/federated/configs/validity_inputs_schema.py
similarity index 88%
rename from gnomad_qc/v5/configs/validity_inputs_schema.py
rename to gnomad_qc/federated/configs/validity_inputs_schema.py
index fa418b50d..88bc9e517 100644
--- a/gnomad_qc/v5/configs/validity_inputs_schema.py
+++ b/gnomad_qc/federated/configs/validity_inputs_schema.py
@@ -3,7 +3,10 @@
 schema = {
     "type": "object",
     "properties": {
-        "missingness_threshold": {"type": "number"},
+        "struct_annotations_to_skip_missingness": {
+            "type": "array",
+            "items": {"type": "string"},
+        },
         "struct_annotations_for_missingness": {
             "type": "array",
             "items": {"type": "string"},
@@ -46,10 +49,10 @@
             "items": {"type": "string"},
         },
         "variant_filter_field": {"type": "string"},
+        "data_type": {"type": "string", "enum": ["exomes", "genomes"]},
         "check_mono_and_only_het": {"type": "boolean"},
     },
     "required": [
-        "missingness_threshold",
         "freq_fields",
         "freq_annotations_to_sum",
         "sort_order",
@@ -57,6 +60,7 @@
         "subsets",
         "variant_filter_field",
         "check_mono_and_only_het",
+        "data_type",
     ],
     "additionalProperties": False,
 }
diff --git a/gnomad_qc/v5/data_ingestion/__init__.py b/gnomad_qc/federated/data_ingestion/__init__.py
similarity index 100%
rename from gnomad_qc/v5/data_ingestion/__init__.py
rename to gnomad_qc/federated/data_ingestion/__init__.py
diff --git a/gnomad_qc/v5/data_ingestion/create_vds.py b/gnomad_qc/federated/data_ingestion/create_vds.py
similarity index 100%
rename from gnomad_qc/v5/data_ingestion/create_vds.py
rename to gnomad_qc/federated/data_ingestion/create_vds.py
diff --git a/gnomad_qc/v5/data_ingestion/field_md_to_html.py b/gnomad_qc/federated/data_ingestion/field_md_to_html.py
similarity index 100%
rename from gnomad_qc/v5/data_ingestion/field_md_to_html.py
rename to gnomad_qc/federated/data_ingestion/field_md_to_html.py
diff --git a/gnomad_qc/v5/data_ingestion/field_requirements.html b/gnomad_qc/federated/data_ingestion/field_requirements.html
similarity index 56%
rename from gnomad_qc/v5/data_ingestion/field_requirements.html
rename to gnomad_qc/federated/data_ingestion/field_requirements.html
index f188a18ed..70e4f1f72 100644
--- a/gnomad_qc/v5/data_ingestion/field_requirements.html
+++ b/gnomad_qc/federated/data_ingestion/field_requirements.html
@@ -14,9 +14,9 @@ <h1>Global Fields Specification</h1>
 A variant is considered to fail the "adj" criteria if any of the following conditions are met:
  <strong>GQ</strong> &lt; 20, <strong>DP</strong> &lt; 10, or <strong>AB</strong> &lt; 0.2 (heterozygous calls).</p>
 <p>All histograms annotations must use the same bin edges as defined in their respective 'Description' columns.</p>
-<table style="border-collapse: collapse; border: 1px solid #000; width: 100%;"><thead><tr style="background-color:#f8f8f8;"><th style="border: 1px solid #000; padding: 5px; text-align: left;">Field</th><th style="border: 1px solid #000; padding: 5px; text-align: left;">Type</th><th style="border: 1px solid #000; padding: 5px; text-align: left;">Description</th><th style="border: 1px solid #000; padding: 5px; text-align: left;">Example</th><th style="border: 1px solid #000; padding: 5px; text-align: left;">Field Necessity</th></tr></thead><tbody><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>freq_meta</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;dict&lt;str, str&gt;&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Array of frequency metadata dictionaries containing the frequency aggregation group for each element of the ‘freq’ array row annotation. Each dictionary should have the following keys: 'gen_anc', 'group', 'sex'. A 'downsampling' key is optional. The 'adj' group should always be the first value of the array, and the 'raw' group should be the second value. Required 'group' values are 'adj' and 'raw'. Required 'sex' values are 'XX' and 'XY'. Specific values are not required for 'gen_anc'.</td><td style="border: 1px solid #000; padding: 5px;"><code>[{'group': 'adj'},{'group': 'raw'},{'gen_anc': 'afr', 'group': 'adj'},{'gen_anc': 'amr', 'group': 'adj'},{'group': 'adj', 'sex': 'XX'},{'group': 'adj', 'sex': 'XY'}, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><strong>freq_index_dict</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>dict&lt;str, int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Dictionary keyed by specified label grouping combinations ('group': 'adj'/'raw', 'gen_anc': inferred genetic ancestry group, 'sex': sex karyotype), with values describing the corresponding index of each grouping entry in the ‘freq’ array row annotation. <br/><br/>If provided, keys need to be formatted in the order of 'gen_anc'_ 'sex'_'group'. If a 'downsampling' key is included, the order should be 'downsampling'_'gen_anc'_ 'sex'_'group'. The 'adj' value should always be at index 0 and the 'raw' value at index 1. A more detailed description can be found at https://gnomad.broadinstitute.org/help/v4-hts.</td><td style="border: 1px solid #000; padding: 5px;"><code>{"adj": 0, "raw": 1, "afr_adj": 2, "amr_adj": 3, "XX_adj": 4, "XY_adj": 5, ...}</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>freq_meta_sample_count</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">A sample count per sample grouping defined in the 'freq_meta' global annotation. Must be in the same order as 'freq'/'freq_meta'.</td><td style="border: 1px solid #000; padding: 5px;"><code>[730947, 730947, 16740, 15001, 50000, 680947, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>faf_meta</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;dict&lt;str, str&gt;&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Filtering allele frequency metadata. An ordered list containing the frequency aggregation group for each element of the ‘faf’ array row annotation.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>faf_index_dict</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>dict&lt;str, int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Dictionary keyed by specified label grouping combinations ('group': 'adj'/'raw', 'gen_anc': inferred genetic ancestry group, 'sex': sex karyotype), with values describing the corresponding index of each grouping entry in the filtering allele frequency (‘faf’) row annotation.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>age_distribution</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Callset-wide age histogram. Cohorts for gnomAD vary in how they report age (some report the age at diagnosis, others report the age of last visit, etc), so the ages associated with the gnomAD data can be thought of as the last known age of the individual. Information on age is not available for all gnomAD samples. This field is required with the acknowledgement that age data may not be available for all samples. If no age is available for any samples, set all <code>bin_freq</code> values to <code>0</code>.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">age_distribution.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for age histogram: <code>30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>[30.0, 35.0, 40.0, 45.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">age_distribution.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the age histogram. This is the number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[101, 122, 85, 4, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">age_distribution.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Count of age values falling below lowest histogram bin edge.</td><td style="border: 1px solid #000; padding: 5px;"><code>1000</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">age_distribution.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Count of age values falling above highest histogram bin edge.</td><td style="border: 1px solid #000; padding: 5px;"><code>30</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>downsamplings</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>dict&lt;str, array&lt;int32&gt;&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Dictionary keyed by dataset with values corresponding to available downsampled sample counts.</td><td style="border: 1px solid #000; padding: 5px;"><code>{'gnomad': [10, 100, 500...]}</code></td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>filtering_model</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">The variant filtering model used and its specific cutoffs.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.</span>filter_name</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Variant filtering model name used in the 'filters' row annotation, indicating the variant was filtered by this model during variant QC.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.</span>score_name</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Name of the score used in filtering.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.</span>snv_cutoff</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">SNV filtering cutoff information.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.snv_cutoff.</span>bin</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Filtering percentile cutoff for SNVs.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.snv_cutoff.</span>min_score</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Minimum score at SNV filtering percentile cutoff.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.</span>indel_cutoff</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Indel filtering cutoff information.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.indel_cutoff.</span>bin</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Filtering percentile cutoff for indels.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.indel_cutoff.</span>min_score</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Minimum score at indel filtering percentile cutoff.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.</span>snv_training_variables</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;str&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Variant annotations used as features in the SNV filtering model.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.</span>indel_training_variables</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;str&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Variant annotations used as features in the indel filtering model.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>inbreeding_coeff_cutoff</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Inbreeding Coefficient threshold used to hard filter variants.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>excess_het_cutoff</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Excess heterozygosity threshold used to hard filter variants.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>tool_versions</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Versions of in silico predictors used in the callset.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">tool_versions</span>.cadd_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Combined Annotation Dependent Depletion (CADD) version.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">tool_versions</span>.revel_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Rare Exome Variant Ensemble Learner (REVEL) version.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">tool_versions</span>.spliceai_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">SpliceAI version.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">tool_versions</span>.pangolin_version</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;str&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Pangolin version.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">tool_versions</span>.phylop_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">phyloP version.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">tool_versions</span>.dbsnp_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">dbSNP version.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">tool_versions</span>.sift_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Sorting Intolerant from Tolerant (SIFT) version.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">tool_versions</span>.polyphen_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Polymorphism Phenotyping v2 (Polyphen-v2) version.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><strong>vrs_versions</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">The Variant Representation Specification version that was used to compute IDs on the callset. Global and row VRS annotations are optional, but the global annotaions must be filled out if the row annotations are provided.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vrs_versions</span>.vrs_schema_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">The version of the VRS schema that is used to represent variants and compute identifiers. Must be <code>2.0.1</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>"2.0.1"</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vrs_versions</span>.vrs_python_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">The version of the vrs-python library that was used to compute IDs on the callset. Must be <code>2.1.3</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>"2.1.3"</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vrs_versions</span>.seqrepo_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">The version of the SeqRepo database that was used in VRS computations. Must be <code>2024-12-20</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>"2024-12-20"</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>vep_globals</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Information about VEP annotations.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vep_globals</span>.vep_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">VEP version that was run on the callset.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vep_globals</span>.vep_help</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Output from vep --help.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vep_globals</span>.vep_config</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">VEP configuration to run VEP version with Hail. File created using command within VEP init shell script in https://github.com/broadinstitute/gnomad_methods/tree/main.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vep_globals</span>.gencode_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">GENCODE version used in VEP.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vep_globals</span>.mane_select_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">MANE select version used in VEP.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>frequency_README</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Explanation of how to use the 'freq_index_dict' global annotation to extract frequencies from the 'freq' row annotation.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>date</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Date Hail Table was created.</td><td style="border: 1px solid #000; padding: 5px;"><code>"2025-04-09"</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>version</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Version of the file.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr></tbody></table>
+<table style="border-collapse: collapse; border: 1px solid #000; width: 100%;"><thead><tr style="background-color:#f8f8f8;"><th style="border: 1px solid #000; padding: 5px; text-align: left;">Field</th><th style="border: 1px solid #000; padding: 5px; text-align: left;">Type</th><th style="border: 1px solid #000; padding: 5px; text-align: left;">Description</th><th style="border: 1px solid #000; padding: 5px; text-align: left;">Example</th><th style="border: 1px solid #000; padding: 5px; text-align: left;">Field Necessity</th></tr></thead><tbody><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>freq_meta</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;dict&lt;str, str&gt;&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Array of frequency metadata dictionaries containing the frequency aggregation group for each element of the ‘freq’ array row annotation. Each dictionary should have the following keys: 'gen_anc', 'group', 'sex'. A 'downsampling' key is optional. The 'adj' group should always be the first value of the array, and the 'raw' group should be the second value. Required 'group' values are 'adj' and 'raw'. Required 'sex' values are 'XX' and 'XY'. Specific values are not required for 'gen_anc'.</td><td style="border: 1px solid #000; padding: 5px;"><code>[{'group': 'adj'},{'group': 'raw'},{'gen_anc': 'afr', 'group': 'adj'},{'gen_anc': 'amr', 'group': 'adj'},{'group': 'adj', 'sex': 'XX'},{'group': 'adj', 'sex': 'XY'}, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><strong>freq_index_dict</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>dict&lt;str, int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Dictionary keyed by specified label grouping combinations ('group': 'adj'/'raw', 'gen_anc': inferred genetic ancestry group, 'sex': sex karyotype), with values describing the corresponding index of each grouping entry in the ‘freq’ array row annotation. <br/><br/>If provided, keys need to be formatted in the order of 'gen_anc'_ 'sex'_'group'. If a 'downsampling' key is included, the order should be 'downsampling'_'gen_anc'_ 'sex'_'group'. The 'adj' value should always be at index 0 and the 'raw' value at index 1. A more detailed description can be found at https://gnomad.broadinstitute.org/help/v4-hts.</td><td style="border: 1px solid #000; padding: 5px;"><code>{"adj": 0, "raw": 1, "afr_adj": 2, "amr_adj": 3, "XX_adj": 4, "XY_adj": 5, ...}</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>freq_meta_sample_count</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">A sample count per sample grouping defined in the 'freq_meta' global annotation. Must be in the same order as 'freq'/'freq_meta'.</td><td style="border: 1px solid #000; padding: 5px;"><code>[730947, 730947, 16740, 15001, 50000, 680947, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>faf_meta</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;dict&lt;str, str&gt;&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Filtering allele frequency metadata. An ordered list containing the frequency aggregation group for each element of the ‘faf’ array row annotation.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>faf_index_dict</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>dict&lt;str, int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Dictionary keyed by specified label grouping combinations ('group': 'adj'/'raw', 'gen_anc': inferred genetic ancestry group, 'sex': sex karyotype), with values describing the corresponding index of each grouping entry in the filtering allele frequency (‘faf’) row annotation.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>age_distribution</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Callset-wide age histogram. Cohorts for gnomAD vary in how they report age (some report the age at diagnosis, others report the age of last visit, etc), so the ages associated with the gnomAD data can be thought of as the last known age of the individual. Information on age is not available for all gnomAD samples. This field is required with the acknowledgement that age data may not be available for all samples. If no age is available for any samples, set all <code>bin_freq</code> values to <code>0</code>.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">age_distribution.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for age histogram: 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0.</td><td style="border: 1px solid #000; padding: 5px;"><code>[30.0, 35.0, 40.0, 45.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">age_distribution.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the age histogram. This is the number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[101, 122, 85, 4, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">age_distribution.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Count of age values falling below lowest histogram bin edge.</td><td style="border: 1px solid #000; padding: 5px;"><code>1000</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">age_distribution.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Count of age values falling above highest histogram bin edge.</td><td style="border: 1px solid #000; padding: 5px;"><code>30</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>downsamplings</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>dict&lt;str, array&lt;int32&gt;&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Dictionary keyed by dataset with values corresponding to available downsampled sample counts.</td><td style="border: 1px solid #000; padding: 5px;"><code>{'gnomad': [10, 100, 500...]}</code></td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>filtering_model</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">The variant filtering model used and its specific cutoffs.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.</span>filter_name</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Variant filtering model name used in the 'filters' row annotation, indicating the variant was filtered by this model during variant QC.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.</span>score_name</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Name of the score used in filtering.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.</span>snv_cutoff</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">SNV filtering cutoff information.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.snv_cutoff.</span>bin</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Filtering percentile cutoff for SNVs.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.snv_cutoff.</span>min_score</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Minimum score at SNV filtering percentile cutoff.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.</span>indel_cutoff</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Indel filtering cutoff information.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.indel_cutoff.</span>bin</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Filtering percentile cutoff for indels.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.indel_cutoff.</span>min_score</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Minimum score at indel filtering percentile cutoff.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.</span>snv_training_variables</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;str&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Variant annotations used as features in the SNV filtering model.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">filtering_model.</span>indel_training_variables</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;str&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Variant annotations used as features in the indel filtering model.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>inbreeding_coeff_cutoff</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Inbreeding Coefficient threshold used to hard filter variants.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>excess_het_cutoff</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Excess heterozygosity threshold used to hard filter variants.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>tool_versions</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Versions of in silico predictors used in the callset.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">tool_versions</span>.cadd_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Combined Annotation Dependent Depletion (CADD) version.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">tool_versions</span>.revel_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Rare Exome Variant Ensemble Learner (REVEL) version.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">tool_versions</span>.spliceai_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">SpliceAI version.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">tool_versions</span>.pangolin_version</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;str&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Pangolin version.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">tool_versions</span>.phylop_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">phyloP version.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">tool_versions</span>.dbsnp_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">dbSNP version.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">tool_versions</span>.sift_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Sorting Intolerant from Tolerant (SIFT) version.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">tool_versions</span>.polyphen_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Polymorphism Phenotyping v2 (Polyphen-v2) version.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><strong>vrs_versions</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">The Variant Representation Specification version that was used to compute IDs on the callset. Global and row VRS annotations are optional, but the global annotaions must be filled out if the row annotations are provided.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vrs_versions</span>.vrs_schema_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">The version of the VRS schema that is used to represent variants and compute identifiers. Must be <code>2.0.1</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>"2.0.1"</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vrs_versions</span>.vrs_python_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">The version of the vrs-python library that was used to compute IDs on the callset. Must be <code>2.1.3</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>"2.1.3"</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vrs_versions</span>.seqrepo_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">The version of the SeqRepo database that was used in VRS computations. Must be <code>2024-12-20</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>"2024-12-20"</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>vep_globals</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Information about VEP annotations.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vep_globals</span>.vep_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">VEP version that was run on the callset.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vep_globals</span>.vep_help</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Output from vep --help.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vep_globals</span>.vep_config</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">VEP configuration to run VEP version with Hail. File created using command within VEP init shell script in https://github.com/broadinstitute/gnomad_methods/tree/main.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vep_globals</span>.gencode_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">GENCODE version used in VEP.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vep_globals</span>.mane_select_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">MANE select version used in VEP.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>vep115_globals</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Information about VEP 115 annotations.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vep115_globals</span>.vep_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">VEP version that was run on the callset (115).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vep115_globals</span>.vep_help</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Output from vep --help for VEP 115.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vep115_globals</span>.vep_config</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">VEP 115 configuration to run VEP version with Hail.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vep115_globals</span>.gencode_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">GENCODE version used in VEP 115.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vep115_globals</span>.mane_select_version</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">MANE select version used in VEP 115.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>frequency_README</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Explanation of how to use the 'freq_index_dict' global annotation to extract frequencies from the 'freq' row annotation.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>date</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Date Hail Table was created.</td><td style="border: 1px solid #000; padding: 5px;"><code>"2025-04-09"</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>version</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Version of the file.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr></tbody></table>
 <h1>Row Fields Specification</h1>
 <p>This table provides a view of each row field. Every leaf field (i.e., non-struct field) is listed on its own row with the full annotation. The parent portion is shown in a lighter color to indicate context. Cell colors indicate the field necessity: <span style="background-color:#fff0f0; padding:2px 6px; border-radius:4px;">"Required" in red</span>, <span style="background-color:#f0faff; padding:2px 6px; border-radius:4px;">"Optional" in blue</span>, <span style="background-color:#dedbe4; padding:2px 6px; border-radius:4px;">"Not Needed" in grey</span>.</p>
 <p>All histograms annotations must use the same bin edges as defined in their respective 'Description' columns.</p>
 <p>Data must be supplied as a "split" dataset, where multiallelic variants are split so that information for each alternate allele is in a separate row.</p>
-<table style="border-collapse: collapse; border: 1px solid #000; width: 100%;"><thead><tr style="background-color:#f8f8f8;"><th style="border: 1px solid #000; padding: 5px; text-align: left;">Field</th><th style="border: 1px solid #000; padding: 5px; text-align: left;">Type</th><th style="border: 1px solid #000; padding: 5px; text-align: left;">Description</th><th style="border: 1px solid #000; padding: 5px; text-align: left;">Example</th><th style="border: 1px solid #000; padding: 5px; text-align: left;">Field Necessity</th></tr></thead><tbody><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>locus</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>locus&lt;GRCh38&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Variant locus. Contains contig and position information. Must be build GRCh38.</td><td style="border: 1px solid #000; padding: 5px;"><code>chr1:12345</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>alleles</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;str&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Variant alleles (reference and alternate).</td><td style="border: 1px solid #000; padding: 5px;"><code>["A", "G"]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>freq</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;struct {...}&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Array of allele frequency information (AC, AN, AF, homozygote count) for each frequency aggregation group corresponding to each frequency metadata group. The 'adj' value should always be the first value of the array, and the 'raw' value should be the second value.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">freq</span>.AC</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Alternate allele count.</td><td style="border: 1px solid #000; padding: 5px;"><code>10</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">freq</span>.AF</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Alternate allele frequency, (AC/AN).</td><td style="border: 1px solid #000; padding: 5px;"><code>0.1</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">freq</span>.AN</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Total number of alleles.</td><td style="border: 1px solid #000; padding: 5px;"><code>100</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">freq</span>.homozygote_count</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Count of homozygous alternate individuals for the alternate allele.</td><td style="border: 1px solid #000; padding: 5px;"><code>2</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>grpmax</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct {...}</code></td><td style="border: 1px solid #000; padding: 5px;">Allele frequency information (AC, AN, AF, homozygote count) for the group with maximum allele frequency.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">grpmax</span>.AC</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Alternate allele count in the group with the maximum allele frequency.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">grpmax</span>.AF</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Maximum alternate allele frequency, (AC/AN), across all groups.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">grpmax</span>.AN</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Total number of alleles in the group with the maximum allele frequency.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">grpmax</span>.homozygote_count</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Count of homozygous individuals in the group with the maximum allele frequency.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">grpmax</span>.gen_anc</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Genetic ancestry corresponding to the maximum frequency group.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>faf</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;struct {...}&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Filtering allele frequency.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">faf</span>.faf95</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">FAF95: Filtering allele frequency (using Poisson 95% CI).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">faf</span>.faf99</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">FAF99: Filtering allele frequency (using Poisson 99% CI) frequency.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>fafmax</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct {...}</code></td><td style="border: 1px solid #000; padding: 5px;">Information about the genetic ancestry group with the maximum filtering allele frequency.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">fafmax</span>.faf95_max</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Maximum filtering allele frequency (using Poisson 95% CI).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">fafmax</span>.faf95_max_gen_anc</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Genetic ancestry group with the maximum filtering allele frequency (95% CI).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">fafmax</span>.faf99_max</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Maximum filtering allele frequency (using Poisson 99% CI).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">fafmax</span>.faf99_max_gen_anc</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Genetic ancestry group with the maximum filtering allele frequency (99% CI).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>a_index</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">The original index of this alternate allele in the multiallelic representation (1 is the first alternate allele or the only alternate allele in a biallelic variant).</td><td style="border: 1px solid #000; padding: 5px;"><code>1</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>was_split</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">True if this variant was originally multiallelic, otherwise False.</td><td style="border: 1px solid #000; padding: 5px;"><code>False</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>rsid</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>set&lt;str&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">dbSNP reference SNP identification (rsID) numbers.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><strong>filters</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>set&lt;str&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Variant filters; 'AC0': Allele count is zero after filtering out low-confidence genotypes (GQ &lt; 20; DP &lt; 10; or AB &lt; 0.2 for het calls), 'AS_VQSR': Failed allele-specific VQSR filtering thresholds, 'InbreedingCoeff': GATK InbreedingCoeff &lt; -0.3. An empty set in this field indicates that the variant passed all variant filters.</td><td style="border: 1px solid #000; padding: 5px;"><code>{"AC0","AS_VQSR"}</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>info</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct {...}</code></td><td style="border: 1px solid #000; padding: 5px;">Struct containing typical GATK allele-specific (AS) info fields and additional variant QC fields.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.FS</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Phred-scaled p-value of Fisher's exact test for strand bias.</td><td style="border: 1px solid #000; padding: 5px;"><code>7.30e+00</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.MQ</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Root mean square of the mapping quality of reads across all samples.</td><td style="border: 1px solid #000; padding: 5px;"><code>3.48e+01</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.MQRankSum</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Z-score from Wilcoxon rank sum test of alternate vs. reference read mapping qualities.</td><td style="border: 1px solid #000; padding: 5px;"><code>6.70e-02</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.</span>MQRankSum_cdf</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ...}</code></td><td style="border: 1px solid #000; padding: 5px;">CDF summary of overall MQRankSum values. The <code>_raw</code> parameter must be set to True to return an internal representation of the CDF approximation. This annotation is generated using Hail's function: <code>hl.agg.approx_cdf(x, k=XX, _raw=True)</code>. The <code>k</code> value must be set to 200 in order to merge with the gnomAD dataset. This method is non-deterministic: computing approx_cdf multiple times will give slightly different results each time. It is currently not possible to seed the aggregator.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.MQRankSum_cdf.</span>levels</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">List indicating how many items are stored at each compression level.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0,3]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.MQRankSum_cdf.</span>items</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Ordered sample of values from the MQRankSum distribution.</td><td style="border: 1px solid #000; padding: 5px;"><code>[-9.38e-01,-2.27e+00,-1.34e+00]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.MQRankSum_cdf.</span>_compaction_counts</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Used internally to support downstream error estimation.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.QUALapprox</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Sum of PL[0] values; used to approximate the QUAL score.</td><td style="border: 1px solid #000; padding: 5px;"><code>96</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.QD</td><td style="border: 1px solid #000; padding: 5px;"><code>float32</code></td><td style="border: 1px solid #000; padding: 5px;">Variant call confidence normalized by depth of sample reads supporting a variant.</td><td style="border: 1px solid #000; padding: 5px;"><code>2.74e+00</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.ReadPosRankSum</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Z-score from Wilcoxon rank sum test of alternate vs. reference read position bias.</td><td style="border: 1px solid #000; padding: 5px;"><code>-1.07e+00</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.</span>ReadPosRankSum_cdf</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ...}</code></td><td style="border: 1px solid #000; padding: 5px;">CDF summary of overall ReadPosRankSum values. The <code>_raw</code> parameter must be set to True to return an internal representation of the CDF approximation. This annotation is generated using Hail's function: <code>hl.agg.approx_cdf(x, k=XX, _raw=True)</code>. The <code>k</code> value must be set to 200 in order to merge with the gnomAD dataset. This method is non-deterministic: computing approx_cdf multiple times will give slightly different results each time. It is currently not possible to seed the aggregator.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.ReadPosRankSum_cdf.</span>levels</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">List indicating how many items are stored at each compression level.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0,3]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.ReadPosRankSum_cdf.</span>items</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Ordered sample of values from the ReadPosRankSum distribution.</td><td style="border: 1px solid #000; padding: 5px;"><code>[9.67e-01,-9.67e-01,9.67e-01]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.ReadPosRankSum_cdf.</span>_compaction_counts</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Used internally to support downstream error estimation.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.SB</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Aggregate counts of strand depth across all non-homozygous-reference calls. The values are the depth of reference allele on forward strand, depth of the reference allele on reverse strand, depth of all alternate alleles on forward strand, depth of all alternate alleles on reverse strand.</td><td style="border: 1px solid #000; padding: 5px;"><code>[21,6,4,4]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.SOR</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Strand bias estimated by the symmetric odds ratio test.</td><td style="border: 1px solid #000; padding: 5px;"><code>9.60e-02</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.VarDP</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Depth over variant genotypes (does not include depth of reference samples).</td><td style="border: 1px solid #000; padding: 5px;"><code>35</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_FS</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific phred-scaled p-value of Fisher's exact test for strand bias.</td><td style="border: 1px solid #000; padding: 5px;"><code>5.10e+00</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_MQ</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific root mean square of the mapping quality of reads across all samples.</td><td style="border: 1px solid #000; padding: 5px;"><code>3.51e+01</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_MQRankSum</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific z-score from Wilcoxon rank sum test of alternate vs. reference read mapping qualities.</td><td style="border: 1px solid #000; padding: 5px;"><code>-5.72e-01</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.</span>AS_MQRankSum_cdf</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ...}</code></td><td style="border: 1px solid #000; padding: 5px;">CDF summary of allele-specific MQRankSum values. The <code>_raw</code> parameter must be set to True to return an internal representation of the CDF approximation. This annotation is generated using Hail's function: <code>hl.agg.approx_cdf(x, k=XX, _raw=True)</code>. The <code>k</code> value must be set to 200 in order to merge with the gnomAD dataset. This method is non-deterministic: computing approx_cdf multiple times will give slightly different results each time. It is currently not possible to seed the aggregator.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.AS_MQRankSum_cdf.</span>levels</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">List indicating how many items are stored at each compression level.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0,3]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.AS_MQRankSum_cdf.</span>items</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Ordered sample of values from the allele-specific MQRankSum distribution.</td><td style="border: 1px solid #000; padding: 5px;"><code>[-1.38e+00,4.31e-01,-9.67e-01]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.AS_MQRankSum_cdf.</span>_compaction_counts</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Used internally to support downstream error estimation for allele-specific MQRankSum CDF.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_pab_max</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Maximum p-value over callset for binomial test of observed allele balance for a heterozygous genotype, given expectation of 0.5.</td><td style="border: 1px solid #000; padding: 5px;"><code>6.87e-01</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_QUALapprox</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific sum of PL[0] values; used to approximate the QUAL score.</td><td style="border: 1px solid #000; padding: 5px;"><code>77</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_QD</td><td style="border: 1px solid #000; padding: 5px;"><code>float32</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific variant call confidence normalized by depth of sample reads supporting a variant.</td><td style="border: 1px solid #000; padding: 5px;"><code>2.96e+00</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_ReadPosRankSum</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific z-score from Wilcoxon rank sum test of alternate vs. reference read position bias.</td><td style="border: 1px solid #000; padding: 5px;"><code>-1.38e+00</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.</span>AS_ReadPosRankSum_cdf</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ...}</code></td><td style="border: 1px solid #000; padding: 5px;">CDF summary of allele-specific ReadPosRankSum values. The <code>_raw</code> parameter must be set to True to return an internal representation of the CDF approximation. This annotation is generated using Hail's function: <code>hl.agg.approx_cdf(x, k=XX, _raw=True)</code>. The <code>k</code> value must be set to 200 in order to merge with the gnomAD dataset. This method is non-deterministic: computing approx_cdf multiple times will give slightly different results each time. It is currently not possible to seed the aggregator.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.AS_ReadPosRankSum_cdf.</span>levels</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">List indicating how many items are stored at each compression level.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0,1]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.AS_ReadPosRankSum_cdf.</span>items</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Ordered sample of values from the allele-specific ReadPosRankSum distribution.</td><td style="border: 1px solid #000; padding: 5px;"><code>[-1.78e-01]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.AS_ReadPosRankSum_cdf.</span>_compaction_counts</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Used internally to support downstream error estimation for allele-specific ReadPosRankSum CDF.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_SB_TABLE</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific forward/reverse read counts for strand bias tests.</td><td style="border: 1px solid #000; padding: 5px;"><code>[21,6,3,3]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_SOR</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific strand bias estimated by the symmetric odds ratio test.</td><td style="border: 1px solid #000; padding: 5px;"><code>9.64e-02</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_VarDP</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific depth over variant genotypes (does not include depth of reference samples).</td><td style="border: 1px solid #000; padding: 5px;"><code>26</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.singleton</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant is seen once in the callset.</td><td style="border: 1px solid #000; padding: 5px;"><code>True</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.transmitted_singleton</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant was a callset-wide doubleton that was transmitted within a family from a parent to a child (i.e., a singleton amongst unrelated samples in cohort).</td><td style="border: 1px solid #000; padding: 5px;"><code>True</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.sibling_singleton</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant was a callset-wide doubleton that was present only in two siblings (i.e., a singleton amongst unrelated samples in cohort).</td><td style="border: 1px solid #000; padding: 5px;"><code>True</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.omni</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant is present on the Omni 2.5 genotyping array and found in 1000 Genomes data.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.mills</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Indel is present in the Mills and Devine data.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.monoallelic</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">All samples are homozygous alternate for the variant.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.only_het</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">All samples are heterozygous for the variant (no homozygous reference or alternate genotype calls).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.inbreeding_coeff</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Inbreeding coefficient, the excess heterozygosity at a variant site, computed as <code>1 - (the number of heterozygous genotypes) / (the number of heterozygous genotypes expected under Hardy-Weinberg equilibrium)</code>.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.excess_het</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Phred-scaled p-value for exact test of excess heterozygosity.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.vrs</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Struct containing information related to the Global Alliance for Genomic Health (GA4GH) Variant Representation Specification (VRS) standard. VRS annotations must be created using the following tool versions: (vrs_schema_version=<code>2.0.1</code>, vrs_python_version=<code>2.1.3</code>, seqrepo_version=<code>2024-12-20</code>). <br/><br/></td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.vrs</span>.VRS_Allele_IDs</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;str&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">The computed identifiers for the GA4GH VRS Alleles corresponding to the GT indexes of the reference and alternate alleles.</td><td style="border: 1px solid #000; padding: 5px;"><code>["ga4gh:VA.oTAtTrgYxm81O9fu6Mrhfo1t3eHsgg4L","ga4gh:VA.Y283OnlLjyi1T1IT_JzvW255rC6YJsW6"]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.vrs</span>.VRS_Starts</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Interresidue coordinates used as the location starts for the GA4GH VRS Alleles corresponding to the GT indexes of the reference and alternate alleles.</td><td style="border: 1px solid #000; padding: 5px;"><code>[10030,10030]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.vrs</span>.VRS_Ends</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Interresidue coordinates used as the location ends for the GA4GH VRS Alleles corresponding to the GT indexes of the reference and alternate alleles.</td><td style="border: 1px solid #000; padding: 5px;"><code>[10031,10031]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.vrs</span>.VRS_States</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;str&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">The literal sequence states used for the GA4GH VRS Alleles corresponding to the GT indexes of the reference and alternate alleles.</td><td style="border: 1px solid #000; padding: 5px;"><code>["T","C"]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.vrs</span>.VRS_Lengths</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">The length values from ReferenceLengthExpression states for the GA4GH VRS Alleles corresponding to the GT indexes of the reference and alternate alleles.</td><td style="border: 1px solid #000; padding: 5px;"><code>[1,NA]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.vrs</span>.VRS_RepeatSubunitLengths</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">The repeatSubunitLength values from ReferenceLengthExpression states for the GA4GH VRS Alleles corresponding to the GT indexes of the reference and alternate alleles.</td><td style="border: 1px solid #000; padding: 5px;"><code>[1,NA]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>vep</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">VEP annotations generated by the VEP tool (to be re-annotated).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>vqsr_results</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct</code></td><td style="border: 1px solid #000; padding: 5px;">VQSR related variant annotations.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vqsr_results</span>.AS_VQSLOD</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific log-odds ratio of being a true variant versus being a false positive under the trained VQSR Gaussian mixture model.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vqsr_results</span>.AS_culprit</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific worst-performing annotation in the VQSR Gaussian mixture model.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vqsr_results</span>.positive_train_site</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant was used to build the positive training set of high-quality variants for VQSR.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vqsr_results</span>.negative_train_site</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant was used to build the negative training set of low-quality variants for VQSR.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>region_flags</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct</code></td><td style="border: 1px solid #000; padding: 5px;">Struct containing flags about regions.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">region_flags</span>.non_par</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant falls within a non-pseudoautosomal region.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">region_flags</span>.lcr</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant falls within a low complexity region.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">region_flags</span>.segdup</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant falls within a segmental duplication region.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>allele_info</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct</code></td><td style="border: 1px solid #000; padding: 5px;">Allele information.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">allele_info</span>.allele_type</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Allele type (one of: 'snv', 'insertion', 'deletion', or 'mixed').</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">allele_info</span>.n_alt_alleles</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Total number of alternate alleles observed at variant locus.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">allele_info</span>.variant_type</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Variant type (one of: 'snv', 'indel', 'multi-snv', 'multi-indel', or 'mixed').</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">allele_info</span>.was_mixed</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant type was mixed.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>histograms</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Variant information histograms.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.</span>qual_hists</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Genotype quality metric histograms for high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.</span>gq_hist_all</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for GQ calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.gq_hist_all.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the GQ histogram calculated on high quality genotypes are: <code>0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.0, 5.0, 10.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.gq_hist_all.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the GQ histogram calculated on high quality genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[50, 56, 101, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.gq_hist_all.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of GQ values falling below the lowest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.gq_hist_all.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of GQ values falling above the highest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.</span>dp_hist_all</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for DP calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.dp_hist_all.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the DP histogram calculated on high quality genotypes are: <code>0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.0, 5.0, 10.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.dp_hist_all.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the DP histogram calculated on high quality genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[2, 2, 16, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.dp_hist_all.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of DP values falling below the lowest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.dp_hist_all.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of DP values falling above the highest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>500</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.</span>gq_hist_alt</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for GQ in non-reference individuals calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.gq_hist_alt.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the histogram of GQ in non-reference individuals calculated on high quality genotypes are: <code>0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.0, 5.0, 10.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.gq_hist_alt.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the histogram of GQ in non-reference individuals calculated on high quality genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0, 0 , 1, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.gq_hist_alt.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of GQ values in non-reference individuals falling below the lowest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.gq_hist_alt.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of GQ values in non-reference individuals falling above the highest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.</span>dp_hist_alt</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for DP in non-reference individuals calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.dp_hist_alt.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the histogram of DP in non-reference individuals calculated on high quality genotypes are: <code>0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.0, 5.0, 10.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.dp_hist_alt.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the histogram of DP in non-reference individuals calculated on high quality genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[1, 1, 10, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.dp_hist_alt.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of DP values in non-reference individuals falling below the lowest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.dp_hist_alt.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of DP values in non-reference individuals falling above highest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>10</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.</span>ab_hist_alt</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for AB in heterozygous individuals calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.ab_hist_alt.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the histogram of AB in heterozygous individuals calculated on high quality genotypes are: <code>0.00 | 0.05 | 0.10 | 0.20 | 0.25 | 0.30 | 0.35 | 0.40 | 0.45 | 0.50 | 0.55 | 0.60 | 0.65 | 0.70 | 0.75 | 0.80 | 0.85 | 0.90 | 0.95 | 1.00</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.00 , 0.05 , 0.10, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.ab_hist_alt.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the histogram of AB in heterozygous individuals calculated on high quality genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0 , 0 , 5, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.ab_hist_alt.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of AB values in heterozygous individuals falling below the lowest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.ab_hist_alt.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of AB values in heterozygous individuals falling above the highest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.</span>raw_qual_hists</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Genotype quality metric histograms for all genotypes as opposed to high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.</span>gq_hist_all</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for GQ calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.gq_hist_all.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the GQ histogram calculated on all genotypes are: <code>0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.0, 5.0, 10.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.gq_hist_all.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the GQ histogram calculated on all genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[60, 76, 130, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.gq_hist_all.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of GQ values falling below lowest histogram bin edge, for GQ calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.gq_hist_all.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of GQ values falling above highest histogram bin edge, for GQ calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.</span>dp_hist_all</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for DP calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.dp_hist_all.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the DP histogram calculated on all genotypes are: <code>0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.0, 5.0, 10.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.dp_hist_all.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the DP histogram calculated on all genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[4, 7, 20, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.dp_hist_all.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of DP values falling below lowest histogram bin edge, for DP calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.dp_hist_all.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of DP values falling above highest histogram bin edge, for DP calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>600</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.</span>gq_hist_alt</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for GQ in non-reference individuals calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.gq_hist_alt.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the histogram of GQ in non-reference individuals calculated on all genotypes are: <code>0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.0, 5.0, 10.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.gq_hist_alt.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the histogram of GQ in non-reference individuals calculated on all genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[3, 4, 10, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.gq_hist_alt.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of GQ values in non-reference individuals falling below lowest histogram bin edge, calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.gq_hist_alt.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of GQ values in non-reference individuals falling above highest histogram bin edge, calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.</span>dp_hist_alt</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for DP in non-reference individuals calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.dp_hist_alt.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the histogram of DP in non-reference individuals calculated on all genotypes are: <code>0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.0, 5.0, 10.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.dp_hist_alt.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the histogram of DP in non-reference individuals calculated on all genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[2, 2, 6, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.dp_hist_alt.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of DP values in non-reference individuals falling below lowest histogram bin edge, calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.dp_hist_alt.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of DP values in non-reference individuals falling above highest histogram bin edge, calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>11</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.</span>ab_hist_alt</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for AB in heterozygous individuals calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.ab_hist_alt.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the histogram of AB in heterozygous individuals calculated on all genotypes are: <code>0.00 | 0.05 | 0.10 | 0.20 | 0.25 | 0.30 | 0.35 | 0.40 | 0.45 | 0.50 | 0.55 | 0.60 | 0.65 | 0.70 | 0.75 | 0.80 | 0.85 | 0.90 | 0.95 | 1.00</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.00 , 0.05 , 0.10, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.ab_hist_alt.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the histogram of AB in heterozygous individuals calculated on all genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0 , 0 , 6, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.ab_hist_alt.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of AB values in heterozygous individuals falling below lowest histogram bin edge, calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.ab_hist_alt.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of AB values in heterozygous individuals falling above highest histogram bin edge, calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.</span>age_hists</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histograms containing age information for high quality genotypes.  Cohorts for gnomAD vary in how they report age (some report the age at diagnosis, others report the age of last visit, etc), so the ages associated with the gnomAD data can be thought of as the last known age of the individual. Information on age is not available for all gnomAD samples. This field is required with the acknowledgement that age data may not be available for all samples. If no age is available for any samples, set all <code>bin_freq</code> values to <code>0</code>.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.</span>age_hist_het</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for age in all heterozygous samples calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.age_hist_het.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the age histogram: <code>30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>[30.0, 35.0,  40.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.age_hist_het.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the age histogram. This is the number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0, 3,  4, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.age_hist_het.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of age values falling below lowest histogram bin edge.</td><td style="border: 1px solid #000; padding: 5px;"><code>1</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.age_hist_het.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of age values falling above highest histogram bin edge.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.</span>age_hist_hom</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for age in all homozygous samples calculated on high quality genotypes. If variant is in the pseudoautosomal regions of chrX or chrY, this histogram also includes age counts of hemizygous samples.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.age_hist_hom.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the age histogram: <code>30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0</code>.</td><td style="border: 1px solid #000; padding: 5px;"><code>[30.0, 35.0,  40.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.age_hist_hom.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the age histogram. This is the number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0, 2,  2, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.age_hist_hom.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of age values falling below lowest histogram bin edge.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.age_hist_hom.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of age values falling above highest histogram bin edge.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>in_silico_predictors</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Variant prediction annotations.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.</span>cadd</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Score used to predict deleteriousness of SNVs and indels.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.cadd.</span>phred</td><td style="border: 1px solid #000; padding: 5px;"><code>float32</code></td><td style="border: 1px solid #000; padding: 5px;">CADD Phred-like scaled C-scores ranging from 1 to 99 based on the rank of each variant relative to all possible 8.6 billion substitutions in the human reference genome. Larger values indicate increased predicted deleteriousness.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.cadd.</span>raw_score</td><td style="border: 1px solid #000; padding: 5px;"><code>float32</code></td><td style="border: 1px solid #000; padding: 5px;">Unscaled CADD scores indicating whether a variant is likely to be "observed" (negative values) vs "simulated" (positive values). Larger values indicate increased predicted deleteriousness.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.</span>revel_max</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">An ensemble score for predicting the pathogenicity of missense variants (based on 13 other variant predictors).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.</span>spliceai_ds_max</td><td style="border: 1px solid #000; padding: 5px;"><code>float32</code></td><td style="border: 1px solid #000; padding: 5px;">Maximum delta score across 4 splicing consequences, which reflects the probability of the variant being splice-altering.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.</span>pangolin_largest_ds</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Largest delta score across 2 splicing consequences, which reflects the probability of the variant being splice-altering.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.</span>phylop</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Base-wise conservation score across the 241 placental mammals in the Zoonomia project. Score ranges from -20 to 9.28, and reflects acceleration (faster evolution than expected under neutral drift, assigned negative scores) as well as conservation (slower than expected evolution, assigned positive scores).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.</span>sift_max</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Score reflecting the scaled probability of the amino acid substitution being tolerated, ranging from 0 to 1.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.</span>polyphen_max</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Score that predicts the possible impact of an amino acid substitution on the structure and function of a human protein, ranging from 0.0 (tolerated) to 1.0 (deleterious).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr></tbody></table>
+<table style="border-collapse: collapse; border: 1px solid #000; width: 100%;"><thead><tr style="background-color:#f8f8f8;"><th style="border: 1px solid #000; padding: 5px; text-align: left;">Field</th><th style="border: 1px solid #000; padding: 5px; text-align: left;">Type</th><th style="border: 1px solid #000; padding: 5px; text-align: left;">Description</th><th style="border: 1px solid #000; padding: 5px; text-align: left;">Example</th><th style="border: 1px solid #000; padding: 5px; text-align: left;">Field Necessity</th></tr></thead><tbody><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>locus</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>locus&lt;GRCh38&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Variant locus. Contains contig and position information. Must be build GRCh38.</td><td style="border: 1px solid #000; padding: 5px;"><code>chr1:12345</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>alleles</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;str&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Variant alleles (reference and alternate).</td><td style="border: 1px solid #000; padding: 5px;"><code>["A", "G"]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>freq</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;struct {...}&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Array of allele frequency information (AC, AN, AF, homozygote count) for each frequency aggregation group corresponding to each frequency metadata group. The 'adj' value should always be the first value of the array, and the 'raw' value should be the second value.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">freq</span>.AC</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Alternate allele count.</td><td style="border: 1px solid #000; padding: 5px;"><code>10</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">freq</span>.AF</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Alternate allele frequency, (AC/AN).</td><td style="border: 1px solid #000; padding: 5px;"><code>0.1</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">freq</span>.AN</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Total number of alleles.</td><td style="border: 1px solid #000; padding: 5px;"><code>100</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">freq</span>.homozygote_count</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Count of homozygous alternate individuals for the alternate allele.</td><td style="border: 1px solid #000; padding: 5px;"><code>2</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>grpmax</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct {...}</code></td><td style="border: 1px solid #000; padding: 5px;">Allele frequency information (AC, AN, AF, homozygote count) for the group with maximum allele frequency.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">grpmax</span>.AC</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Alternate allele count in the group with the maximum allele frequency.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">grpmax</span>.AF</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Maximum alternate allele frequency, (AC/AN), across all groups.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">grpmax</span>.AN</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Total number of alleles in the group with the maximum allele frequency.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">grpmax</span>.homozygote_count</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Count of homozygous individuals in the group with the maximum allele frequency.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">grpmax</span>.gen_anc</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Genetic ancestry corresponding to the maximum frequency group.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>faf</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;struct {...}&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Filtering allele frequency.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">faf</span>.faf95</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">FAF95: Filtering allele frequency (using Poisson 95% CI).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">faf</span>.faf99</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">FAF99: Filtering allele frequency (using Poisson 99% CI) frequency.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>fafmax</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct {...}</code></td><td style="border: 1px solid #000; padding: 5px;">Information about the genetic ancestry group with the maximum filtering allele frequency.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">fafmax</span>.faf95_max</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Maximum filtering allele frequency (using Poisson 95% CI).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">fafmax</span>.faf95_max_gen_anc</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Genetic ancestry group with the maximum filtering allele frequency (95% CI).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">fafmax</span>.faf99_max</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Maximum filtering allele frequency (using Poisson 99% CI).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">fafmax</span>.faf99_max_gen_anc</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Genetic ancestry group with the maximum filtering allele frequency (99% CI).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>a_index</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">The original index of this alternate allele in the multiallelic representation (1 is the first alternate allele or the only alternate allele in a biallelic variant).</td><td style="border: 1px solid #000; padding: 5px;"><code>1</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>was_split</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">True if this variant was originally multiallelic, otherwise False.</td><td style="border: 1px solid #000; padding: 5px;"><code>False</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>rsid</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>set&lt;str&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">dbSNP reference SNP identification (rsID) numbers.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><strong>filters</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>set&lt;str&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Variant filters; 'AC0': Allele count is zero after filtering out low-confidence genotypes (GQ &lt; 20; DP &lt; 10; or AB &lt; 0.2 for het calls), 'AS_VQSR': Failed allele-specific VQSR filtering thresholds, 'InbreedingCoeff': GATK InbreedingCoeff &lt; -0.3. An empty set in this field indicates that the variant passed all variant filters.</td><td style="border: 1px solid #000; padding: 5px;"><code>{"AC0","AS_VQSR"}</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>info</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct {...}</code></td><td style="border: 1px solid #000; padding: 5px;">Struct containing typical GATK allele-specific (AS) info fields and additional variant QC fields.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.FS</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Phred-scaled p-value of Fisher's exact test for strand bias.</td><td style="border: 1px solid #000; padding: 5px;"><code>7.30e+00</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.MQ</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Root mean square of the mapping quality of reads across all samples.</td><td style="border: 1px solid #000; padding: 5px;"><code>3.48e+01</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.MQRankSum</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Z-score from Wilcoxon rank sum test of alternate vs. reference read mapping qualities.</td><td style="border: 1px solid #000; padding: 5px;"><code>6.70e-02</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.</span>MQRankSum_cdf</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ...}</code></td><td style="border: 1px solid #000; padding: 5px;">CDF summary of overall MQRankSum values. The <code>_raw</code> parameter must be set to True to return an internal representation of the CDF approximation. This annotation is generated using Hail's function: <code>hl.agg.approx_cdf(x, k=XX, _raw=True)</code>. The <code>k</code> value must be set to 200 in order to merge with the gnomAD dataset. This method is non-deterministic: computing approx_cdf multiple times will give slightly different results each time. It is currently not possible to seed the aggregator.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.MQRankSum_cdf.</span>levels</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">List indicating how many items are stored at each compression level.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0,3]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.MQRankSum_cdf.</span>items</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Ordered sample of values from the MQRankSum distribution.</td><td style="border: 1px solid #000; padding: 5px;"><code>[-9.38e-01,-2.27e+00,-1.34e+00]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.MQRankSum_cdf.</span>_compaction_counts</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Used internally to support downstream error estimation.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.QUALapprox</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Sum of PL[0] values; used to approximate the QUAL score.</td><td style="border: 1px solid #000; padding: 5px;"><code>96</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.QD</td><td style="border: 1px solid #000; padding: 5px;"><code>float32</code></td><td style="border: 1px solid #000; padding: 5px;">Variant call confidence normalized by depth of sample reads supporting a variant.</td><td style="border: 1px solid #000; padding: 5px;"><code>2.74e+00</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.ReadPosRankSum</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Z-score from Wilcoxon rank sum test of alternate vs. reference read position bias.</td><td style="border: 1px solid #000; padding: 5px;"><code>-1.07e+00</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.</span>ReadPosRankSum_cdf</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ...}</code></td><td style="border: 1px solid #000; padding: 5px;">CDF summary of overall ReadPosRankSum values. The <code>_raw</code> parameter must be set to True to return an internal representation of the CDF approximation. This annotation is generated using Hail's function: <code>hl.agg.approx_cdf(x, k=XX, _raw=True)</code>. The <code>k</code> value must be set to 200 in order to merge with the gnomAD dataset. This method is non-deterministic: computing approx_cdf multiple times will give slightly different results each time. It is currently not possible to seed the aggregator.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.ReadPosRankSum_cdf.</span>levels</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">List indicating how many items are stored at each compression level.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0,3]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.ReadPosRankSum_cdf.</span>items</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Ordered sample of values from the ReadPosRankSum distribution.</td><td style="border: 1px solid #000; padding: 5px;"><code>[9.67e-01,-9.67e-01,9.67e-01]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.ReadPosRankSum_cdf.</span>_compaction_counts</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Used internally to support downstream error estimation.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.SB</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Aggregate counts of strand depth across all non-homozygous-reference calls. The values are the depth of reference allele on forward strand, depth of the reference allele on reverse strand, depth of all alternate alleles on forward strand, depth of all alternate alleles on reverse strand.</td><td style="border: 1px solid #000; padding: 5px;"><code>[21,6,4,4]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.SOR</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Strand bias estimated by the symmetric odds ratio test.</td><td style="border: 1px solid #000; padding: 5px;"><code>9.60e-02</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.VarDP</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Depth over variant genotypes (does not include depth of reference samples).</td><td style="border: 1px solid #000; padding: 5px;"><code>35</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_FS</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific phred-scaled p-value of Fisher's exact test for strand bias.</td><td style="border: 1px solid #000; padding: 5px;"><code>5.10e+00</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_MQ</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific root mean square of the mapping quality of reads across all samples.</td><td style="border: 1px solid #000; padding: 5px;"><code>3.51e+01</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_MQRankSum</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific z-score from Wilcoxon rank sum test of alternate vs. reference read mapping qualities.</td><td style="border: 1px solid #000; padding: 5px;"><code>-5.72e-01</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.</span>AS_MQRankSum_cdf</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ...}</code></td><td style="border: 1px solid #000; padding: 5px;">CDF summary of allele-specific MQRankSum values. The <code>_raw</code> parameter must be set to True to return an internal representation of the CDF approximation. This annotation is generated using Hail's function: <code>hl.agg.approx_cdf(x, k=XX, _raw=True)</code>. The <code>k</code> value must be set to 200 in order to merge with the gnomAD dataset. This method is non-deterministic: computing approx_cdf multiple times will give slightly different results each time. It is currently not possible to seed the aggregator.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.AS_MQRankSum_cdf.</span>levels</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">List indicating how many items are stored at each compression level.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0,3]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.AS_MQRankSum_cdf.</span>items</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Ordered sample of values from the allele-specific MQRankSum distribution.</td><td style="border: 1px solid #000; padding: 5px;"><code>[-1.38e+00,4.31e-01,-9.67e-01]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.AS_MQRankSum_cdf.</span>_compaction_counts</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Used internally to support downstream error estimation for allele-specific MQRankSum CDF.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_pab_max</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Maximum p-value over callset for binomial test of observed allele balance for a heterozygous genotype, given expectation of 0.5.</td><td style="border: 1px solid #000; padding: 5px;"><code>6.87e-01</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_QUALapprox</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific sum of PL[0] values; used to approximate the QUAL score.</td><td style="border: 1px solid #000; padding: 5px;"><code>77</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_QD</td><td style="border: 1px solid #000; padding: 5px;"><code>float32</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific variant call confidence normalized by depth of sample reads supporting a variant.</td><td style="border: 1px solid #000; padding: 5px;"><code>2.96e+00</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_ReadPosRankSum</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific z-score from Wilcoxon rank sum test of alternate vs. reference read position bias.</td><td style="border: 1px solid #000; padding: 5px;"><code>-1.38e+00</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.</span>AS_ReadPosRankSum_cdf</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ...}</code></td><td style="border: 1px solid #000; padding: 5px;">CDF summary of allele-specific ReadPosRankSum values. The <code>_raw</code> parameter must be set to True to return an internal representation of the CDF approximation. This annotation is generated using Hail's function: <code>hl.agg.approx_cdf(x, k=XX, _raw=True)</code>. The <code>k</code> value must be set to 200 in order to merge with the gnomAD dataset. This method is non-deterministic: computing approx_cdf multiple times will give slightly different results each time. It is currently not possible to seed the aggregator.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.AS_ReadPosRankSum_cdf.</span>levels</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">List indicating how many items are stored at each compression level.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0,1]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.AS_ReadPosRankSum_cdf.</span>items</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Ordered sample of values from the allele-specific ReadPosRankSum distribution.</td><td style="border: 1px solid #000; padding: 5px;"><code>[-1.78e-01]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.AS_ReadPosRankSum_cdf.</span>_compaction_counts</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Used internally to support downstream error estimation for allele-specific ReadPosRankSum CDF.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_SB_TABLE</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific forward/reverse read counts for strand bias tests.</td><td style="border: 1px solid #000; padding: 5px;"><code>[21,6,3,3]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_SOR</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific strand bias estimated by the symmetric odds ratio test.</td><td style="border: 1px solid #000; padding: 5px;"><code>9.64e-02</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.AS_VarDP</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific depth over variant genotypes (does not include depth of reference samples).</td><td style="border: 1px solid #000; padding: 5px;"><code>26</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.singleton</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant is seen once in the callset.</td><td style="border: 1px solid #000; padding: 5px;"><code>True</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.transmitted_singleton</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant was a callset-wide doubleton that was transmitted within a family from a parent to a child (i.e., a singleton amongst unrelated samples in cohort).</td><td style="border: 1px solid #000; padding: 5px;"><code>True</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.sibling_singleton</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant was a callset-wide doubleton that was present only in two siblings (i.e., a singleton amongst unrelated samples in cohort).</td><td style="border: 1px solid #000; padding: 5px;"><code>True</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.omni</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant is present on the Omni 2.5 genotyping array and found in 1000 Genomes data.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.mills</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Indel is present in the Mills and Devine data.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.monoallelic</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">All samples are homozygous alternate for the variant.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.only_het</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">All samples are heterozygous for the variant (no homozygous reference or alternate genotype calls).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.inbreeding_coeff</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Inbreeding coefficient, the excess heterozygosity at a variant site, computed as <code>1 - (the number of heterozygous genotypes) / (the number of heterozygous genotypes expected under Hardy-Weinberg equilibrium)</code>.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.excess_het</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Phred-scaled p-value for exact test of excess heterozygosity.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info</span>.vrs</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Struct containing information related to the Global Alliance for Genomic Health (GA4GH) Variant Representation Specification (VRS) standard. VRS annotations must be created using the following tool versions: (vrs_schema_version=<code>2.0.1</code>, vrs_python_version=<code>2.1.3</code>, seqrepo_version=<code>2024-12-20</code>). <br/><br/></td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.vrs</span>.VRS_Allele_IDs</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;str&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">The computed identifiers for the GA4GH VRS Alleles corresponding to the GT indexes of the reference and alternate alleles.</td><td style="border: 1px solid #000; padding: 5px;"><code>["ga4gh:VA.oTAtTrgYxm81O9fu6Mrhfo1t3eHsgg4L","ga4gh:VA.Y283OnlLjyi1T1IT_JzvW255rC6YJsW6"]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.vrs</span>.VRS_Starts</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Interresidue coordinates used as the location starts for the GA4GH VRS Alleles corresponding to the GT indexes of the reference and alternate alleles.</td><td style="border: 1px solid #000; padding: 5px;"><code>[10030,10030]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.vrs</span>.VRS_Ends</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Interresidue coordinates used as the location ends for the GA4GH VRS Alleles corresponding to the GT indexes of the reference and alternate alleles.</td><td style="border: 1px solid #000; padding: 5px;"><code>[10031,10031]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.vrs</span>.VRS_States</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;str&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">The literal sequence states used for the GA4GH VRS Alleles corresponding to the GT indexes of the reference and alternate alleles.</td><td style="border: 1px solid #000; padding: 5px;"><code>["T","C"]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.vrs</span>.VRS_Lengths</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">The length values from ReferenceLengthExpression states for the GA4GH VRS Alleles corresponding to the GT indexes of the reference and alternate alleles.</td><td style="border: 1px solid #000; padding: 5px;"><code>[1,NA]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">info.vrs</span>.VRS_RepeatSubunitLengths</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int32&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">The repeatSubunitLength values from ReferenceLengthExpression states for the GA4GH VRS Alleles corresponding to the GT indexes of the reference and alternate alleles.</td><td style="border: 1px solid #000; padding: 5px;"><code>[1,NA]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>vep</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">VEP annotations generated by the VEP tool (to be re-annotated).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>vqsr_results</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct</code></td><td style="border: 1px solid #000; padding: 5px;">VQSR related variant annotations.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vqsr_results</span>.AS_VQSLOD</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific log-odds ratio of being a true variant versus being a false positive under the trained VQSR Gaussian mixture model.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vqsr_results</span>.AS_culprit</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Allele-specific worst-performing annotation in the VQSR Gaussian mixture model.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vqsr_results</span>.positive_train_site</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant was used to build the positive training set of high-quality variants for VQSR.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">vqsr_results</span>.negative_train_site</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant was used to build the negative training set of low-quality variants for VQSR.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>region_flags</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct</code></td><td style="border: 1px solid #000; padding: 5px;">Struct containing flags about regions.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">region_flags</span>.non_par</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant falls within a non-pseudoautosomal region.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">region_flags</span>.lcr</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant falls within a low complexity region.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">region_flags</span>.segdup</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant falls within a segmental duplication region.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>allele_info</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct</code></td><td style="border: 1px solid #000; padding: 5px;">Allele information.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">allele_info</span>.allele_type</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Allele type (one of: 'snv', 'insertion', 'deletion', or 'mixed').</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">allele_info</span>.n_alt_alleles</td><td style="border: 1px solid #000; padding: 5px;"><code>int32</code></td><td style="border: 1px solid #000; padding: 5px;">Total number of alternate alleles observed at variant locus.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">allele_info</span>.variant_type</td><td style="border: 1px solid #000; padding: 5px;"><code>str</code></td><td style="border: 1px solid #000; padding: 5px;">Variant type (one of: 'snv', 'indel', 'multi-snv', 'multi-indel', or 'mixed').</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">allele_info</span>.was_mixed</td><td style="border: 1px solid #000; padding: 5px;"><code>bool</code></td><td style="border: 1px solid #000; padding: 5px;">Variant type was mixed.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><strong>histograms</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Variant information histograms.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.</span>qual_hists</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Genotype quality metric histograms for high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.</span>gq_hist_all</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for GQ calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.gq_hist_all.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the GQ histogram calculated on high quality genotypes are: 0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.0, 5.0, 10.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.gq_hist_all.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the GQ histogram calculated on high quality genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[50, 56, 101, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.gq_hist_all.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of GQ values falling below the lowest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.gq_hist_all.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of GQ values falling above the highest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.</span>dp_hist_all</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for DP calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.dp_hist_all.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the DP histogram calculated on high quality genotypes are: 0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.0, 5.0, 10.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.dp_hist_all.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the DP histogram calculated on high quality genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[2, 2, 16, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.dp_hist_all.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of DP values falling below the lowest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.dp_hist_all.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of DP values falling above the highest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>500</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.</span>gq_hist_alt</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for GQ in non-reference individuals calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.gq_hist_alt.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the histogram of GQ in non-reference individuals calculated on high quality genotypes are: 0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.0, 5.0, 10.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.gq_hist_alt.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the histogram of GQ in non-reference individuals calculated on high quality genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0, 0 , 1, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.gq_hist_alt.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of GQ values in non-reference individuals falling below the lowest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.gq_hist_alt.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of GQ values in non-reference individuals falling above the highest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.</span>dp_hist_alt</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for DP in non-reference individuals calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.dp_hist_alt.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the histogram of DP in non-reference individuals calculated on high quality genotypes are: 0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.0, 5.0, 10.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.dp_hist_alt.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the histogram of DP in non-reference individuals calculated on high quality genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[1, 1, 10, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.dp_hist_alt.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of DP values in non-reference individuals falling below the lowest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.dp_hist_alt.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of DP values in non-reference individuals falling above highest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>10</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.</span>ab_hist_alt</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for AB in heterozygous individuals calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.ab_hist_alt.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the histogram of AB in heterozygous individuals calculated on high quality genotypes are: 0.00 | 0.05 | 0.10 | 0.20 | 0.25 | 0.30 | 0.35 | 0.40 | 0.45 | 0.50 | 0.55 | 0.60 | 0.65 | 0.70 | 0.75 | 0.80 | 0.85 | 0.90 | 0.95 | 1.00.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.00 , 0.05 , 0.10, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.ab_hist_alt.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the histogram of AB in heterozygous individuals calculated on high quality genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0 , 0 , 5, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.ab_hist_alt.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of AB values in heterozygous individuals falling below the lowest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.qual_hists.ab_hist_alt.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of AB values in heterozygous individuals falling above the highest histogram bin edge, calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.</span>raw_qual_hists</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Genotype quality metric histograms for all genotypes as opposed to high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.</span>gq_hist_all</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for GQ calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.gq_hist_all.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the GQ histogram calculated on all genotypes are: 0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.0, 5.0, 10.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.gq_hist_all.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the GQ histogram calculated on all genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[60, 76, 130, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.gq_hist_all.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of GQ values falling below lowest histogram bin edge, for GQ calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.gq_hist_all.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of GQ values falling above highest histogram bin edge, for GQ calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.</span>dp_hist_all</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for DP calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.dp_hist_all.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the DP histogram calculated on all genotypes are: 0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.0, 5.0, 10.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.dp_hist_all.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the DP histogram calculated on all genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[4, 7, 20, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.dp_hist_all.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of DP values falling below lowest histogram bin edge, for DP calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.dp_hist_all.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of DP values falling above highest histogram bin edge, for DP calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>600</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.</span>gq_hist_alt</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for GQ in non-reference individuals calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.gq_hist_alt.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the histogram of GQ in non-reference individuals calculated on all genotypes are: 0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.0, 5.0, 10.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.gq_hist_alt.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the histogram of GQ in non-reference individuals calculated on all genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[3, 4, 10, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.gq_hist_alt.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of GQ values in non-reference individuals falling below lowest histogram bin edge, calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.gq_hist_alt.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of GQ values in non-reference individuals falling above highest histogram bin edge, calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.</span>dp_hist_alt</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for DP in non-reference individuals calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.dp_hist_alt.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the histogram of DP in non-reference individuals calculated on all genotypes are: 0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.0, 5.0, 10.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.dp_hist_alt.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the histogram of DP in non-reference individuals calculated on all genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[2, 2, 6, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.dp_hist_alt.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of DP values in non-reference individuals falling below lowest histogram bin edge, calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.dp_hist_alt.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of DP values in non-reference individuals falling above highest histogram bin edge, calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>11</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.</span>ab_hist_alt</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for AB in heterozygous individuals calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.ab_hist_alt.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the histogram of AB in heterozygous individuals calculated on all genotypes are: 0.00 | 0.05 | 0.10 | 0.20 | 0.25 | 0.30 | 0.35 | 0.40 | 0.45 | 0.50 | 0.55 | 0.60 | 0.65 | 0.70 | 0.75 | 0.80 | 0.85 | 0.90 | 0.95 | 1.00.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0.00 , 0.05 , 0.10, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.ab_hist_alt.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the histogram of AB in heterozygous individuals calculated on all genotypes. The number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0 , 0 , 6, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.ab_hist_alt.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of AB values in heterozygous individuals falling below lowest histogram bin edge, calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#f0faff;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.raw_qual_hists.ab_hist_alt.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of AB values in heterozygous individuals falling above highest histogram bin edge, calculated on all genotypes.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Optional</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.</span>age_hists</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histograms containing age information for high quality genotypes.  Cohorts for gnomAD vary in how they report age (some report the age at diagnosis, others report the age of last visit, etc), so the ages associated with the gnomAD data can be thought of as the last known age of the individual. Information on age is not available for all gnomAD samples. This field is required with the acknowledgement that age data may not be available for all samples. If no age is available for any samples, set all <code>bin_freq</code> values to <code>0</code>.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.</span>age_hist_het</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for age in all heterozygous samples calculated on high quality genotypes.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.age_hist_het.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the age histogram: 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0.</td><td style="border: 1px solid #000; padding: 5px;"><code>[30.0, 35.0,  40.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.age_hist_het.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the age histogram. This is the number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0, 3,  4, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.age_hist_het.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of age values falling below lowest histogram bin edge.</td><td style="border: 1px solid #000; padding: 5px;"><code>1</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.age_hist_het.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of age values falling above highest histogram bin edge.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.</span>age_hist_hom</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Histogram for age in all homozygous samples calculated on high quality genotypes. If variant is in the pseudoautosomal regions of chrX or chrY, this histogram also includes age counts of hemizygous samples.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.age_hist_hom.</span>bin_edges</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;float64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin edges for the age histogram: 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0.</td><td style="border: 1px solid #000; padding: 5px;"><code>[30.0, 35.0,  40.0, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.age_hist_hom.</span>bin_freq</td><td style="border: 1px solid #000; padding: 5px;"><code>array&lt;int64&gt;</code></td><td style="border: 1px solid #000; padding: 5px;">Bin frequencies for the age histogram. This is the number of records found in each bin.</td><td style="border: 1px solid #000; padding: 5px;"><code>[0, 2,  2, ...]</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.age_hist_hom.</span>n_smaller</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of age values falling below lowest histogram bin edge.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#fff0f0;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">histograms.age_hists.age_hist_hom.</span>n_larger</td><td style="border: 1px solid #000; padding: 5px;"><code>int64</code></td><td style="border: 1px solid #000; padding: 5px;">Count of age values falling above highest histogram bin edge.</td><td style="border: 1px solid #000; padding: 5px;"><code>0</code></td><td style="border: 1px solid #000; padding: 5px;">Required</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>in_silico_predictors</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Variant prediction annotations.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.</span>cadd</td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">Score used to predict deleteriousness of SNVs and indels.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.cadd.</span>phred</td><td style="border: 1px solid #000; padding: 5px;"><code>float32</code></td><td style="border: 1px solid #000; padding: 5px;">CADD Phred-like scaled C-scores ranging from 1 to 99 based on the rank of each variant relative to all possible 8.6 billion substitutions in the human reference genome. Larger values indicate increased predicted deleteriousness.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.cadd.</span>raw_score</td><td style="border: 1px solid #000; padding: 5px;"><code>float32</code></td><td style="border: 1px solid #000; padding: 5px;">Unscaled CADD scores indicating whether a variant is likely to be "observed" (negative values) vs "simulated" (positive values). Larger values indicate increased predicted deleteriousness.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.</span>revel_max</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">An ensemble score for predicting the pathogenicity of missense variants (based on 13 other variant predictors).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.</span>spliceai_ds_max</td><td style="border: 1px solid #000; padding: 5px;"><code>float32</code></td><td style="border: 1px solid #000; padding: 5px;">Maximum delta score across 4 splicing consequences, which reflects the probability of the variant being splice-altering.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.</span>pangolin_largest_ds</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Largest delta score across 2 splicing consequences, which reflects the probability of the variant being splice-altering.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.</span>phylop</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Base-wise conservation score across the 241 placental mammals in the Zoonomia project. Score ranges from -20 to 9.28, and reflects acceleration (faster evolution than expected under neutral drift, assigned negative scores) as well as conservation (slower than expected evolution, assigned positive scores).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.</span>sift_max</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Score reflecting the scaled probability of the amino acid substitution being tolerated, ranging from 0 to 1.</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><span style="color:#999;">in_silico_predictors.</span>polyphen_max</td><td style="border: 1px solid #000; padding: 5px;"><code>float64</code></td><td style="border: 1px solid #000; padding: 5px;">Score that predicts the possible impact of an amino acid substitution on the structure and function of a human protein, ranging from 0.0 (tolerated) to 1.0 (deleterious).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr><tr style="background-color:#dedbe4;"><td style="border: 1px solid #000; padding: 5px;"><strong>vep115</strong></td><td style="border: 1px solid #000; padding: 5px;"><code>struct { ... }</code></td><td style="border: 1px solid #000; padding: 5px;">VEP 115 annotations generated by the VEP tool (to be re-annotated).</td><td style="border: 1px solid #000; padding: 5px;">—</td><td style="border: 1px solid #000; padding: 5px;">Not Needed</td></tr></tbody></table>
\ No newline at end of file
diff --git a/gnomad_qc/v5/data_ingestion/field_requirements.md b/gnomad_qc/federated/data_ingestion/field_requirements.md
similarity index 92%
rename from gnomad_qc/v5/data_ingestion/field_requirements.md
rename to gnomad_qc/federated/data_ingestion/field_requirements.md
index 1cc4745bc..2b15cc7ec 100644
--- a/gnomad_qc/v5/data_ingestion/field_requirements.md
+++ b/gnomad_qc/federated/data_ingestion/field_requirements.md
@@ -17,7 +17,7 @@ All histograms annotations must use the same bin edges as defined in their respe
 | **faf_meta** | `array<dict<str, str>>` | Filtering allele frequency metadata. An ordered list containing the frequency aggregation group for each element of the ‘faf’ array row annotation. | — | Not Needed |
 | **faf_index_dict** | `dict<str, int32>` | Dictionary keyed by specified label grouping combinations ('group': 'adj'/'raw', 'gen_anc': inferred genetic ancestry group, 'sex': sex karyotype), with values describing the corresponding index of each grouping entry in the filtering allele frequency (‘faf’) row annotation. | — | Not Needed |
 | **age_distribution** | `struct { ... }` | Callset-wide age histogram. Cohorts for gnomAD vary in how they report age (some report the age at diagnosis, others report the age of last visit, etc), so the ages associated with the gnomAD data can be thought of as the last known age of the individual. Information on age is not available for all gnomAD samples. This field is required with the acknowledgement that age data may not be available for all samples. If no age is available for any samples, set all `bin_freq` values to `0`. | — | Required |
-| <span style="color:#999;">age_distribution.</span>bin_edges | `array<float64>` | Bin edges for age histogram: `30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0`. | `[30.0, 35.0, 40.0, 45.0, ...]` | Required |
+| <span style="color:#999;">age_distribution.</span>bin_edges | `array<float64>` | Bin edges for age histogram: 30.0 \| 35.0 \| 40.0 \| 45.0 \| 50.0 \| 55.0 \| 60.0 \| 65.0 \| 70.0 \| 75.0 \| 80.0. | `[30.0, 35.0, 40.0, 45.0, ...]` | Required |
 | <span style="color:#999;">age_distribution.</span>bin_freq | `array<int32>` | Bin frequencies for the age histogram. This is the number of records found in each bin. | `[101, 122, 85, 4, ...]` | Required |
 | <span style="color:#999;">age_distribution.</span>n_smaller | `int32` | Count of age values falling below lowest histogram bin edge. | `1000` | Required |
 | <span style="color:#999;">age_distribution.</span>n_larger | `int32` | Count of age values falling above highest histogram bin edge. | `30` | Required |
@@ -54,6 +54,12 @@ All histograms annotations must use the same bin edges as defined in their respe
 | <span style="color:#999;">vep_globals</span>.vep_config | `str` | VEP configuration to run VEP version with Hail. File created using command within VEP init shell script in https://github.com/broadinstitute/gnomad_methods/tree/main. | — | Not Needed |
 | <span style="color:#999;">vep_globals</span>.gencode_version | `str` | GENCODE version used in VEP. | — | Not Needed |
 | <span style="color:#999;">vep_globals</span>.mane_select_version | `str` | MANE select version used in VEP. | — | Not Needed |
+| **vep115_globals** | `struct { ... }` | Information about VEP 115 annotations. | — | Not Needed |
+| <span style="color:#999;">vep115_globals</span>.vep_version | `str` | VEP version that was run on the callset (115). | — | Not Needed |
+| <span style="color:#999;">vep115_globals</span>.vep_help | `str` | Output from vep --help for VEP 115. | — | Not Needed |
+| <span style="color:#999;">vep115_globals</span>.vep_config | `str` | VEP 115 configuration to run VEP version with Hail. | — | Not Needed |
+| <span style="color:#999;">vep115_globals</span>.gencode_version | `str` | GENCODE version used in VEP 115. | — | Not Needed |
+| <span style="color:#999;">vep115_globals</span>.mane_select_version | `str` | MANE select version used in VEP 115. | — | Not Needed |
 | **frequency_README** | `str` | Explanation of how to use the 'freq_index_dict' global annotation to extract frequencies from the 'freq' row annotation. | — | Not Needed |
 | **date** | `str` | Date Hail Table was created. | `"2025-04-09"` | Required |
 | **version** | `str` | Version of the file. | — | Not Needed |
@@ -164,64 +170,64 @@ Data must be supplied as a "split" dataset, where multiallelic variants are spli
 | **histograms** | `struct { ... }` | Variant information histograms. | — | Required |
 | <span style="color:#999;">histograms.</span>qual_hists | `struct { ... }` | Genotype quality metric histograms for high quality genotypes. | — | Required |
 | <span style="color:#999;">histograms.qual_hists.</span>gq_hist_all | `struct { ... }` | Histogram for GQ calculated on high quality genotypes. | — | Required |
-| <span style="color:#999;">histograms.qual_hists.gq_hist_all.</span>bin_edges | `array<float64>` | Bin edges for the GQ histogram calculated on high quality genotypes are: `0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0`. | `[0.0, 5.0, 10.0, ...]` | Required |
+| <span style="color:#999;">histograms.qual_hists.gq_hist_all.</span>bin_edges | `array<float64>` | Bin edges for the GQ histogram calculated on high quality genotypes are: 0.0 \| 5.0 \| 10.0 \| 20.0 \| 25.0 \| 30.0 \| 35.0 \| 40.0 \| 45.0 \| 50.0 \| 55.0 \| 60.0 \| 65.0 \| 70.0 \| 75.0 \| 80.0 \| 85.0 \| 90.0 \| 95.0 \| 100.0. | `[0.0, 5.0, 10.0, ...]` | Required |
 | <span style="color:#999;">histograms.qual_hists.gq_hist_all.</span>bin_freq | `array<int64>` | Bin frequencies for the GQ histogram calculated on high quality genotypes. The number of records found in each bin. | `[50, 56, 101, ...]` | Required |
 | <span style="color:#999;">histograms.qual_hists.gq_hist_all.</span>n_smaller | `int64` | Count of GQ values falling below the lowest histogram bin edge, calculated on high quality genotypes. | `0` | Required |
 | <span style="color:#999;">histograms.qual_hists.gq_hist_all.</span>n_larger | `int64` | Count of GQ values falling above the highest histogram bin edge, calculated on high quality genotypes. | `0` | Required |
 | <span style="color:#999;">histograms.qual_hists.</span>dp_hist_all | `struct { ... }` | Histogram for DP calculated on high quality genotypes. | — | Required |
-| <span style="color:#999;">histograms.qual_hists.dp_hist_all.</span>bin_edges | `array<float64>` | Bin edges for the DP histogram calculated on high quality genotypes are: `0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0`. | `[0.0, 5.0, 10.0, ...]` | Required |
+| <span style="color:#999;">histograms.qual_hists.dp_hist_all.</span>bin_edges | `array<float64>` | Bin edges for the DP histogram calculated on high quality genotypes are: 0.0 \| 5.0 \| 10.0 \| 20.0 \| 25.0 \| 30.0 \| 35.0 \| 40.0 \| 45.0 \| 50.0 \| 55.0 \| 60.0 \| 65.0 \| 70.0 \| 75.0 \| 80.0 \| 85.0 \| 90.0 \| 95.0 \| 100.0. | `[0.0, 5.0, 10.0, ...]` | Required |
 | <span style="color:#999;">histograms.qual_hists.dp_hist_all.</span>bin_freq | `array<int64>` | Bin frequencies for the DP histogram calculated on high quality genotypes. The number of records found in each bin. | `[2, 2, 16, ...]` | Required |
 | <span style="color:#999;">histograms.qual_hists.dp_hist_all.</span>n_smaller | `int64` | Count of DP values falling below the lowest histogram bin edge, calculated on high quality genotypes. | `0` | Required |
 | <span style="color:#999;">histograms.qual_hists.dp_hist_all.</span>n_larger | `int64` | Count of DP values falling above the highest histogram bin edge, calculated on high quality genotypes. | `500` | Required |
 | <span style="color:#999;">histograms.qual_hists.</span>gq_hist_alt | `struct { ... }` | Histogram for GQ in non-reference individuals calculated on high quality genotypes. | — | Required |
-| <span style="color:#999;">histograms.qual_hists.gq_hist_alt.</span>bin_edges | `array<float64>` | Bin edges for the histogram of GQ in non-reference individuals calculated on high quality genotypes are: `0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0`. | `[0.0, 5.0, 10.0, ...]` | Required |
+| <span style="color:#999;">histograms.qual_hists.gq_hist_alt.</span>bin_edges | `array<float64>` | Bin edges for the histogram of GQ in non-reference individuals calculated on high quality genotypes are: 0.0 \| 5.0 \| 10.0 \| 20.0 \| 25.0 \| 30.0 \| 35.0 \| 40.0 \| 45.0 \| 50.0 \| 55.0 \| 60.0 \| 65.0 \| 70.0 \| 75.0 \| 80.0 \| 85.0 \| 90.0 \| 95.0 \| 100.0. | `[0.0, 5.0, 10.0, ...]` | Required |
 | <span style="color:#999;">histograms.qual_hists.gq_hist_alt.</span>bin_freq | `array<int64>` | Bin frequencies for the histogram of GQ in non-reference individuals calculated on high quality genotypes. The number of records found in each bin. | `[0, 0 , 1, ...]` | Required |
 | <span style="color:#999;">histograms.qual_hists.gq_hist_alt.</span>n_smaller | `int64` | Count of GQ values in non-reference individuals falling below the lowest histogram bin edge, calculated on high quality genotypes. | `0` | Required |
 | <span style="color:#999;">histograms.qual_hists.gq_hist_alt.</span>n_larger | `int64` | Count of GQ values in non-reference individuals falling above the highest histogram bin edge, calculated on high quality genotypes. | `0` | Required |
 | <span style="color:#999;">histograms.qual_hists.</span>dp_hist_alt | `struct { ... }` | Histogram for DP in non-reference individuals calculated on high quality genotypes. | — | Required |
-| <span style="color:#999;">histograms.qual_hists.dp_hist_alt.</span>bin_edges | `array<float64>` | Bin edges for the histogram of DP in non-reference individuals calculated on high quality genotypes are: `0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0`. | `[0.0, 5.0, 10.0, ...]` | Required |
+| <span style="color:#999;">histograms.qual_hists.dp_hist_alt.</span>bin_edges | `array<float64>` | Bin edges for the histogram of DP in non-reference individuals calculated on high quality genotypes are: 0.0 \| 5.0 \| 10.0 \| 20.0 \| 25.0 \| 30.0 \| 35.0 \| 40.0 \| 45.0 \| 50.0 \| 55.0 \| 60.0 \| 65.0 \| 70.0 \| 75.0 \| 80.0 \| 85.0 \| 90.0 \| 95.0 \| 100.0. | `[0.0, 5.0, 10.0, ...]` | Required |
 | <span style="color:#999;">histograms.qual_hists.dp_hist_alt.</span>bin_freq | `array<int64>` | Bin frequencies for the histogram of DP in non-reference individuals calculated on high quality genotypes. The number of records found in each bin. | `[1, 1, 10, ...]` | Required |
 | <span style="color:#999;">histograms.qual_hists.dp_hist_alt.</span>n_smaller | `int64` | Count of DP values in non-reference individuals falling below the lowest histogram bin edge, calculated on high quality genotypes. | `0` | Required |
 | <span style="color:#999;">histograms.qual_hists.dp_hist_alt.</span>n_larger | `int64` | Count of DP values in non-reference individuals falling above highest histogram bin edge, calculated on high quality genotypes. | `10` | Required |
 | <span style="color:#999;">histograms.qual_hists.</span>ab_hist_alt | `struct { ... }` | Histogram for AB in heterozygous individuals calculated on high quality genotypes. | — | Required |
-| <span style="color:#999;">histograms.qual_hists.ab_hist_alt.</span>bin_edges | `array<float64>` | Bin edges for the histogram of AB in heterozygous individuals calculated on high quality genotypes are: `0.00 | 0.05 | 0.10 | 0.20 | 0.25 | 0.30 | 0.35 | 0.40 | 0.45 | 0.50 | 0.55 | 0.60 | 0.65 | 0.70 | 0.75 | 0.80 | 0.85 | 0.90 | 0.95 | 1.00`. | `[0.00 , 0.05 , 0.10, ...]` | Required |
+| <span style="color:#999;">histograms.qual_hists.ab_hist_alt.</span>bin_edges | `array<float64>` | Bin edges for the histogram of AB in heterozygous individuals calculated on high quality genotypes are: 0.00 \| 0.05 \| 0.10 \| 0.20 \| 0.25 \| 0.30 \| 0.35 \| 0.40 \| 0.45 \| 0.50 \| 0.55 \| 0.60 \| 0.65 \| 0.70 \| 0.75 \| 0.80 \| 0.85 \| 0.90 \| 0.95 \| 1.00. | `[0.00 , 0.05 , 0.10, ...]` | Required |
 | <span style="color:#999;">histograms.qual_hists.ab_hist_alt.</span>bin_freq | `array<int64>` | Bin frequencies for the histogram of AB in heterozygous individuals calculated on high quality genotypes. The number of records found in each bin. | `[0 , 0 , 5, ...]` | Required |
 | <span style="color:#999;">histograms.qual_hists.ab_hist_alt.</span>n_smaller | `int64` | Count of AB values in heterozygous individuals falling below the lowest histogram bin edge, calculated on high quality genotypes. | `0` | Required |
 | <span style="color:#999;">histograms.qual_hists.ab_hist_alt.</span>n_larger | `int64` | Count of AB values in heterozygous individuals falling above the highest histogram bin edge, calculated on high quality genotypes. | `0` | Required |
 | <span style="color:#999;">histograms.</span>raw_qual_hists | `struct { ... }` | Genotype quality metric histograms for all genotypes as opposed to high quality genotypes. | — | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.</span>gq_hist_all | `struct { ... }` | Histogram for GQ calculated on all genotypes. | — | Optional |
-| <span style="color:#999;">histograms.raw_qual_hists.gq_hist_all.</span>bin_edges | `array<float64>` | Bin edges for the GQ histogram calculated on all genotypes are: `0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0`. | `[0.0, 5.0, 10.0, ...]` | Optional |
+| <span style="color:#999;">histograms.raw_qual_hists.gq_hist_all.</span>bin_edges | `array<float64>` | Bin edges for the GQ histogram calculated on all genotypes are: 0.0 \| 5.0 \| 10.0 \| 20.0 \| 25.0 \| 30.0 \| 35.0 \| 40.0 \| 45.0 \| 50.0 \| 55.0 \| 60.0 \| 65.0 \| 70.0 \| 75.0 \| 80.0 \| 85.0 \| 90.0 \| 95.0 \| 100.0. | `[0.0, 5.0, 10.0, ...]` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.gq_hist_all.</span>bin_freq | `array<int64>` | Bin frequencies for the GQ histogram calculated on all genotypes. The number of records found in each bin. | `[60, 76, 130, ...]` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.gq_hist_all.</span>n_smaller | `int64` | Count of GQ values falling below lowest histogram bin edge, for GQ calculated on all genotypes. | `0` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.gq_hist_all.</span>n_larger | `int64` | Count of GQ values falling above highest histogram bin edge, for GQ calculated on all genotypes. | `0` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.</span>dp_hist_all | `struct { ... }` | Histogram for DP calculated on all genotypes. | — | Optional |
-| <span style="color:#999;">histograms.raw_qual_hists.dp_hist_all.</span>bin_edges | `array<float64>` | Bin edges for the DP histogram calculated on all genotypes are: `0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0`. | `[0.0, 5.0, 10.0, ...]` | Optional |
+| <span style="color:#999;">histograms.raw_qual_hists.dp_hist_all.</span>bin_edges | `array<float64>` | Bin edges for the DP histogram calculated on all genotypes are: 0.0 \| 5.0 \| 10.0 \| 20.0 \| 25.0 \| 30.0 \| 35.0 \| 40.0 \| 45.0 \| 50.0 \| 55.0 \| 60.0 \| 65.0 \| 70.0 \| 75.0 \| 80.0 \| 85.0 \| 90.0 \| 95.0 \| 100.0. | `[0.0, 5.0, 10.0, ...]` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.dp_hist_all.</span>bin_freq | `array<int64>` | Bin frequencies for the DP histogram calculated on all genotypes. The number of records found in each bin. | `[4, 7, 20, ...]` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.dp_hist_all.</span>n_smaller | `int64` | Count of DP values falling below lowest histogram bin edge, for DP calculated on all genotypes. | `0` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.dp_hist_all.</span>n_larger | `int64` | Count of DP values falling above highest histogram bin edge, for DP calculated on all genotypes. | `600` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.</span>gq_hist_alt | `struct { ... }` | Histogram for GQ in non-reference individuals calculated on all genotypes. | — | Optional |
-| <span style="color:#999;">histograms.raw_qual_hists.gq_hist_alt.</span>bin_edges | `array<float64>` | Bin edges for the histogram of GQ in non-reference individuals calculated on all genotypes are: `0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0`. | `[0.0, 5.0, 10.0, ...]` | Optional |
+| <span style="color:#999;">histograms.raw_qual_hists.gq_hist_alt.</span>bin_edges | `array<float64>` | Bin edges for the histogram of GQ in non-reference individuals calculated on all genotypes are: 0.0 \| 5.0 \| 10.0 \| 20.0 \| 25.0 \| 30.0 \| 35.0 \| 40.0 \| 45.0 \| 50.0 \| 55.0 \| 60.0 \| 65.0 \| 70.0 \| 75.0 \| 80.0 \| 85.0 \| 90.0 \| 95.0 \| 100.0. | `[0.0, 5.0, 10.0, ...]` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.gq_hist_alt.</span>bin_freq | `array<int64>` | Bin frequencies for the histogram of GQ in non-reference individuals calculated on all genotypes. The number of records found in each bin. | `[3, 4, 10, ...]` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.gq_hist_alt.</span>n_smaller | `int64` | Count of GQ values in non-reference individuals falling below lowest histogram bin edge, calculated on all genotypes. | `0` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.gq_hist_alt.</span>n_larger | `int64` | Count of GQ values in non-reference individuals falling above highest histogram bin edge, calculated on all genotypes. | `0` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.</span>dp_hist_alt | `struct { ... }` | Histogram for DP in non-reference individuals calculated on all genotypes. | — | Optional |
-| <span style="color:#999;">histograms.raw_qual_hists.dp_hist_alt.</span>bin_edges | `array<float64>` | Bin edges for the histogram of DP in non-reference individuals calculated on all genotypes are: `0.0 | 5.0 | 10.0 | 20.0 | 25.0 | 30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0 | 85.0 | 90.0 | 95.0 | 100.0`. | `[0.0, 5.0, 10.0, ...]` | Optional |
+| <span style="color:#999;">histograms.raw_qual_hists.dp_hist_alt.</span>bin_edges | `array<float64>` | Bin edges for the histogram of DP in non-reference individuals calculated on all genotypes are: 0.0 \| 5.0 \| 10.0 \| 20.0 \| 25.0 \| 30.0 \| 35.0 \| 40.0 \| 45.0 \| 50.0 \| 55.0 \| 60.0 \| 65.0 \| 70.0 \| 75.0 \| 80.0 \| 85.0 \| 90.0 \| 95.0 \| 100.0. | `[0.0, 5.0, 10.0, ...]` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.dp_hist_alt.</span>bin_freq | `array<int64>` | Bin frequencies for the histogram of DP in non-reference individuals calculated on all genotypes. The number of records found in each bin. | `[2, 2, 6, ...]` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.dp_hist_alt.</span>n_smaller | `int64` | Count of DP values in non-reference individuals falling below lowest histogram bin edge, calculated on all genotypes. | `0` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.dp_hist_alt.</span>n_larger | `int64` | Count of DP values in non-reference individuals falling above highest histogram bin edge, calculated on all genotypes. | `11` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.</span>ab_hist_alt | `struct { ... }` | Histogram for AB in heterozygous individuals calculated on all genotypes. | — | Optional |
-| <span style="color:#999;">histograms.raw_qual_hists.ab_hist_alt.</span>bin_edges | `array<float64>` | Bin edges for the histogram of AB in heterozygous individuals calculated on all genotypes are: `0.00 | 0.05 | 0.10 | 0.20 | 0.25 | 0.30 | 0.35 | 0.40 | 0.45 | 0.50 | 0.55 | 0.60 | 0.65 | 0.70 | 0.75 | 0.80 | 0.85 | 0.90 | 0.95 | 1.00`. | `[0.00 , 0.05 , 0.10, ...]` | Optional |
+| <span style="color:#999;">histograms.raw_qual_hists.ab_hist_alt.</span>bin_edges | `array<float64>` | Bin edges for the histogram of AB in heterozygous individuals calculated on all genotypes are: 0.00 \| 0.05 \| 0.10 \| 0.20 \| 0.25 \| 0.30 \| 0.35 \| 0.40 \| 0.45 \| 0.50 \| 0.55 \| 0.60 \| 0.65 \| 0.70 \| 0.75 \| 0.80 \| 0.85 \| 0.90 \| 0.95 \| 1.00. | `[0.00 , 0.05 , 0.10, ...]` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.ab_hist_alt.</span>bin_freq | `array<int64>` | Bin frequencies for the histogram of AB in heterozygous individuals calculated on all genotypes. The number of records found in each bin. | `[0 , 0 , 6, ...]` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.ab_hist_alt.</span>n_smaller | `int64` | Count of AB values in heterozygous individuals falling below lowest histogram bin edge, calculated on all genotypes. | `0` | Optional |
 | <span style="color:#999;">histograms.raw_qual_hists.ab_hist_alt.</span>n_larger | `int64` | Count of AB values in heterozygous individuals falling above highest histogram bin edge, calculated on all genotypes. | `0` | Optional |
 | <span style="color:#999;">histograms.</span>age_hists | `struct { ... }` | Histograms containing age information for high quality genotypes.  Cohorts for gnomAD vary in how they report age (some report the age at diagnosis, others report the age of last visit, etc), so the ages associated with the gnomAD data can be thought of as the last known age of the individual. Information on age is not available for all gnomAD samples. This field is required with the acknowledgement that age data may not be available for all samples. If no age is available for any samples, set all `bin_freq` values to `0`. | — | Required |
 | <span style="color:#999;">histograms.age_hists.</span>age_hist_het | `struct { ... }` | Histogram for age in all heterozygous samples calculated on high quality genotypes. | — | Required |
-| <span style="color:#999;">histograms.age_hists.age_hist_het.</span>bin_edges | `array<float64>` | Bin edges for the age histogram: `30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0`. | `[30.0, 35.0,  40.0, ...]` | Required |
+| <span style="color:#999;">histograms.age_hists.age_hist_het.</span>bin_edges | `array<float64>` | Bin edges for the age histogram: 30.0 \| 35.0 \| 40.0 \| 45.0 \| 50.0 \| 55.0 \| 60.0 \| 65.0 \| 70.0 \| 75.0 \| 80.0. | `[30.0, 35.0,  40.0, ...]` | Required |
 | <span style="color:#999;">histograms.age_hists.age_hist_het.</span>bin_freq | `array<int64>` | Bin frequencies for the age histogram. This is the number of records found in each bin. | `[0, 3,  4, ...]` | Required |
 | <span style="color:#999;">histograms.age_hists.age_hist_het.</span>n_smaller | `int64` | Count of age values falling below lowest histogram bin edge. | `1` | Required |
 | <span style="color:#999;">histograms.age_hists.age_hist_het.</span>n_larger | `int64` | Count of age values falling above highest histogram bin edge. | `0` | Required |
 | <span style="color:#999;">histograms.age_hists.</span>age_hist_hom | `struct { ... }` | Histogram for age in all homozygous samples calculated on high quality genotypes. If variant is in the pseudoautosomal regions of chrX or chrY, this histogram also includes age counts of hemizygous samples. | — | Required |
-| <span style="color:#999;">histograms.age_hists.age_hist_hom.</span>bin_edges | `array<float64>` | Bin edges for the age histogram: `30.0 | 35.0 | 40.0 | 45.0 | 50.0 | 55.0 | 60.0 | 65.0 | 70.0 | 75.0 | 80.0`. | `[30.0, 35.0,  40.0, ...]`| Required |
+| <span style="color:#999;">histograms.age_hists.age_hist_hom.</span>bin_edges | `array<float64>` | Bin edges for the age histogram: 30.0 \| 35.0 \| 40.0 \| 45.0 \| 50.0 \| 55.0 \| 60.0 \| 65.0 \| 70.0 \| 75.0 \| 80.0. | `[30.0, 35.0,  40.0, ...]`| Required |
 | <span style="color:#999;">histograms.age_hists.age_hist_hom.</span>bin_freq | `array<int64>` | Bin frequencies for the age histogram. This is the number of records found in each bin. | `[0, 2,  2, ...]` | Required |
 | <span style="color:#999;">histograms.age_hists.age_hist_hom.</span>n_smaller | `int64` | Count of age values falling below lowest histogram bin edge. | `0` | Required |
 | <span style="color:#999;">histograms.age_hists.age_hist_hom.</span>n_larger | `int64` | Count of age values falling above highest histogram bin edge. | `0` | Required |
@@ -235,3 +241,4 @@ Data must be supplied as a "split" dataset, where multiallelic variants are spli
 | <span style="color:#999;">in_silico_predictors.</span>phylop | `float64` | Base-wise conservation score across the 241 placental mammals in the Zoonomia project. Score ranges from -20 to 9.28, and reflects acceleration (faster evolution than expected under neutral drift, assigned negative scores) as well as conservation (slower than expected evolution, assigned positive scores). | — | Not Needed |
 | <span style="color:#999;">in_silico_predictors.</span>sift_max | `float64` | Score reflecting the scaled probability of the amino acid substitution being tolerated, ranging from 0 to 1. | — | Not Needed |
 | <span style="color:#999;">in_silico_predictors.</span>polyphen_max | `float64` | Score that predicts the possible impact of an amino acid substitution on the structure and function of a human protein, ranging from 0.0 (tolerated) to 1.0 (deleterious). | — | Not Needed |
+| **vep115** | `struct { ... }` | VEP 115 annotations generated by the VEP tool (to be re-annotated). | — | Not Needed |
diff --git a/gnomad_qc/v5/data_ingestion/federated_validity_checks.py b/gnomad_qc/federated/federated_validity_checks.py
similarity index 66%
rename from gnomad_qc/v5/data_ingestion/federated_validity_checks.py
rename to gnomad_qc/federated/federated_validity_checks.py
index 5c3fd44f6..5c5a9b86e 100644
--- a/gnomad_qc/v5/data_ingestion/federated_validity_checks.py
+++ b/gnomad_qc/federated/federated_validity_checks.py
@@ -1,12 +1,16 @@
 """Script to perform validity checks on input federated data or final release files."""
 
 import argparse
+import importlib
+import inspect
 import json
 import logging
 import re
 from collections import defaultdict
+from copy import deepcopy
 from io import StringIO
-from typing import Any, Dict, List, Tuple
+from pprint import pformat
+from typing import Any, Dict, List, Optional, Tuple
 
 import hail as hl
 from bs4 import BeautifulSoup
@@ -18,23 +22,20 @@
     check_raw_and_adj_callstats,
     check_sex_chr_metrics,
     compare_subset_freqs,
-    compute_missingness,
     flatten_missingness_struct,
     sum_group_callstats,
     summarize_variant_filters,
     summarize_variants,
     unfurl_array_annotations,
 )
-from gnomad.resources.grch38.gnomad import public_release
+from gnomad.resources.resource_utils import VersionedTableResource
+from gnomad.utils.filtering import remove_fields_from_constant
 from gnomad.utils.reference_genome import get_reference_genome
+from gnomad.utils.vcf import ALLELE_TYPE_FIELDS, REGION_FLAG_FIELDS
 from jsonschema import validate
 from jsonschema.exceptions import ValidationError
 
-from gnomad_qc.v4.create_release.validate_and_export_vcf import (
-    ALLELE_TYPE_FIELDS,
-    REGION_FLAG_FIELDS,
-)
-from gnomad_qc.v5.configs.validity_inputs_schema import schema
+from gnomad_qc.federated.configs.validity_inputs_schema import schema
 from gnomad_qc.v5.resources.basics import get_logging_path
 
 for handler in logging.root.handlers[:]:
@@ -61,12 +62,38 @@
 memory_handler.setFormatter(formatter)
 logger.addHandler(memory_handler)
 
-ALLELE_TYPE_FIELDS = ALLELE_TYPE_FIELDS["genomes"]
-REGION_FLAG_FIELDS = REGION_FLAG_FIELDS["genomes"]
+# Remove original alleles for containing non-releasable alleles.
+ALLELE_TYPE_FIELDS = deepcopy(ALLELE_TYPE_FIELDS)
+ALLELE_TYPE_FIELDS = remove_fields_from_constant(
+    ALLELE_TYPE_FIELDS, ["original_alleles"]
+)
+
+ALLELE_TYPE_FIELDS = {
+    "exomes": ALLELE_TYPE_FIELDS,
+    "genomes": remove_fields_from_constant(ALLELE_TYPE_FIELDS, ["has_star"]),
+}
+
+# Drop decoy, still doesn't exist on 38.
+REGION_FLAG_FIELDS = deepcopy(REGION_FLAG_FIELDS)
+REGION_FLAG_FIELDS = remove_fields_from_constant(
+    REGION_FLAG_FIELDS, ["decoy", "nonpar"]
+)
+REGION_FLAG_FIELDS = {
+    "exomes": (
+        REGION_FLAG_FIELDS
+        + [
+            "fail_interval_qc",
+            "outside_ukb_capture_region",
+            "outside_broad_capture_region",
+        ]
+    ),
+    "genomes": REGION_FLAG_FIELDS,
+}
 
 
 def get_table_kind(lines, header_index) -> str:
-    """Determine whether a markdown table corresponds to "global" or "row" fields by scanning upward from the table header line.
+    """
+    Determine whether a markdown table corresponds to "global" or "row" fields by scanning upward from the table header line.
 
     :param lines: The full list of lines from the markdown document.
     :param header_index: The index of the table header line (the line with column names).
@@ -343,12 +370,16 @@ def validate_config_fields_in_ht(ht: hl.Table, config: Dict[str, Any]) -> None:
     missing_fields["globals"] = missing_global_fields
 
     # Check that specified row annotations are present.
-    row_fields = array_struct_annotations + config["struct_annotations_for_missingness"]
+    structs_to_skip_missingness = config.get(
+        "struct_annotations_to_skip_missingness", []
+    )
+
+    row_fields = array_struct_annotations + structs_to_skip_missingness
 
     missing_row_fields = [i for i in row_fields if i not in ht.row]
     missing_fields["rows"] = missing_row_fields
 
-    # Check that specified info annotations are present.
+    # Check that specified info annotations are present when configured.
     if config.get("check_mono_and_only_het"):
         info_annotations = ["monoallelic", "only_het"]
         info_fields = list(ht.info.dtype)
@@ -509,65 +540,257 @@ def _check_field_exists_and_type(
     return field_issues, type_issues, fields_validated, types_validated
 
 
+def check_fields_not_in_requirements(
+    ht: hl.Table, field_types: Dict[str, Dict[str, Any]]
+) -> None:
+    """
+    Warn about fields in HT missing from requirements.
+
+    :param ht: Hail Table.
+    :param field_types: Nested dictionary of both global and row fields and their expected types. There should be two keys: "global_field_types" and "row_field_types".
+    :return: None.
+    """
+
+    def _flatten_dtype(dtype: hl.expr.types.HailType, prefix: str = "") -> List[str]:
+        """Recursively extract nested names from a Hail DataType."""
+        names = []
+
+        # Handle structs.
+        if isinstance(dtype, hl.tstruct):
+            for field, field_dtype in dtype.items():
+                name = f"{prefix}.{field}" if prefix else field
+                # Check if this field itself is a struct or container
+                names.extend(_flatten_dtype(field_dtype, name))
+        # Handle arrays and sets.
+        elif isinstance(dtype, (hl.tarray, hl.tset)):
+            names.extend(_flatten_dtype(dtype.element_type, prefix))
+        # Handle dicts.
+        elif isinstance(dtype, hl.tdict):
+            names.extend(_flatten_dtype(dtype.value_type, prefix))
+        else:
+            if prefix:
+                names.append(prefix)
+
+        return names
+
+    # Define the mapping between HT components and the requirements dict.
+    tasks = [
+        ("Global", ht.globals.dtype, "global_field_types"),
+        ("Row", ht.row.dtype, "row_field_types"),
+    ]
+
+    for label, dtype, req_key in tasks:
+        table_fields = set(_flatten_dtype(dtype))
+        required_fields = set(field_types.get(req_key, {}).keys())
+
+        unexpected = table_fields - required_fields
+
+        if unexpected:
+            logger.warning(
+                "%s fields present in Table but missing from requirements: %s",
+                label,
+                ", ".join(sorted(unexpected)),
+            )
+
+
+def filter_to_test_partitions(
+    ht: hl.Table,
+    test_n_partitions: int = 2,
+) -> hl.Table:
+    """
+    Filter the Table to a specified number of partitions on autosomes and sex chromosomes for testing purposes.
+
+    :param ht: Input Table.
+    :param test_n_partitions: Number of partitions to filter to. Default is 2.
+    :return: Filtered Table with only the specified number of partitions.
+    """
+    test_ht = ht._filter_partitions(range(test_n_partitions))
+    x_ht = hl.filter_intervals(
+        ht, [hl.parse_locus_interval("chrX")]
+    )._filter_partitions(range(test_n_partitions))
+
+    y_ht = hl.filter_intervals(
+        ht, [hl.parse_locus_interval("chrY")]
+    )._filter_partitions(range(test_n_partitions))
+
+    ht = test_ht.union(x_ht, y_ht)
+
+    return ht
+
+
 def check_missingness(
     ht: hl.Table,
     missingness_threshold: float = 0.5,
-    struct_annotations: List[str] = ["grpmax", "fafmax", "histograms"],
+    structs_to_not_traverse: Optional[Tuple[str]] = ("vep",),
 ) -> None:
     """
-    Check for and report the fraction of missing data in the Table.
+    Check for and report the fraction of missing data in row annotations.
+
+    For struct annotations, missingness is checked recursively unless the
+    annotation name is included in `structs_to_not_traverse`, in which case
+    only top-level missingness of the struct itself is checked.
 
     :param ht: Input Table.
-    :param missingness_threshold: Upper cutoff for allowed amount of missingness. Default is 0.50.
-    :param struct_annotations: List of struct annotations to check for missingness. Default is ['grpmax', 'fafmax', 'histograms'].
+    :param missingness_threshold: Upper cutoff for allowed amount of
+        missingness. Default is 0.50.
+    :param structs_to_not_traverse: Optional tuple of top-level struct row
+        annotations that should be treated as a single field rather than
+        recursively traversed. Default is ("vep",).
     :return: None
     """
-    logger.info("Checking for missingness within struct annotations...")
-    logger.info("Struct annotations being checked: %s.", struct_annotations)
-    # Determine missingness of each struct annotation.
+    n_sites = ht.count()
+    structs_to_not_traverse = tuple(structs_to_not_traverse or ())
+
+    logger.info(
+        "Missingness threshold (upper cutoff for allowed missingness): %.2f",
+        missingness_threshold,
+    )
+
     metric_missingness = {}
-    for metric in struct_annotations:
-        metric_missingness.update(check_missingness_of_struct(ht[metric], metric))
+    struct_annotations_checked = []
+    non_struct_annotations_checked = []
+    non_traversed_struct_annotations = []
+
+    for field, dtype in ht.row.dtype.items():
+        field_expr = ht[field]
+
+        if isinstance(dtype, hl.tstruct):
+            if field in structs_to_not_traverse:
+                non_traversed_struct_annotations.append(field)
+                metric_missingness[field] = hl.agg.sum(hl.is_missing(field_expr))
+            else:
+                struct_annotations_checked.append(field)
+                metric_missingness.update(
+                    check_missingness_of_struct(field_expr, field)
+                )
+        else:
+            non_struct_annotations_checked.append(field)
+            metric_missingness[field] = hl.agg.sum(hl.is_missing(field_expr))
+
+    logger.info(
+        "Struct annotations being recursively checked: %s.",
+        struct_annotations_checked,
+    )
+    logger.info(
+        "Struct annotations checked only at the top level: %s.",
+        non_traversed_struct_annotations,
+    )
+    logger.info(
+        "Non-struct annotations being checked: %s.",
+        non_struct_annotations_checked,
+    )
+    logger.info(
+        "Checking missingness for %d annotations.",
+        len(metric_missingness),
+    )
 
-    missingness_struct = ht.aggregate(hl.struct(**metric_missingness))
-    missingness_dict = flatten_missingness_struct(missingness_struct)
+    output = flatten_missingness_struct(ht.aggregate(hl.struct(**metric_missingness)))
 
-    # Report whether or not each metric pass or fails the missingness check
-    # based on the missingness_threshold.
-    for field, missingness in missingness_dict.items():
-        if missingness > missingness_threshold:
+    n_fail = 0
+    for field, n_missing in output.items():
+        frac_missing = n_missing / n_sites
+
+        if frac_missing > missingness_threshold:
             logger.info(
-                "FAILED missingness check for %s: %.2f%% missing",
+                "FAILED missingness check for %s: %d sites or %.2f%% missing",
                 field,
-                100 * missingness,
+                n_missing,
+                100 * frac_missing,
             )
+            n_fail += 1
         else:
             logger.info(
-                "Passed missingness check for %s: %.2f%% missing",
+                "Passed missingness check for %s: %d sites or %.2f%% missing",
                 field,
-                100 * missingness,
+                n_missing,
+                100 * frac_missing,
             )
 
-    logger.info("Checking for missingness of info and non-info fields...")
-    # Gather info and non-info metrics (or if doesn't exist, set to an empty list)
-    # and substract missingness dict.
-    info_metrics = (
-        set(ht.row.info) - missingness_dict.keys() if "info" in ht.row else set()
-    )
-    non_info_metrics = set(ht.row) - {"info"} - missingness_dict.keys()
-    n_sites = ht.count()
-    logger.info("Info metrics are %s", info_metrics)
-    logger.info("Non-info metrics are %s", non_info_metrics)
-    compute_missingness(
-        ht, info_metrics, non_info_metrics, n_sites, missingness_threshold
+    logger.warning("%d missingness checks failed.", n_fail)
+
+
+def run_row_to_globals_length_check(
+    ht: hl.Table,
+    config: Dict[str, Any],
+    check_all_rows: bool = True,
+) -> None:
+    """
+    Build the row_to_globals_check mapping from config and run check_global_and_row_annot_lengths.
+
+    :param ht: Hail table to check.
+    :param config: Configuration dictionary containing freq_fields and optional faf_fields.
+    :param check_all_rows: Whether to check all rows. If False, only checks first rows. Default is True.
+    :return: None
+    """
+    row_to_globals_check = {
+        config["freq_fields"]["freq"]: [
+            config["freq_fields"]["freq_meta"],
+            config["freq_fields"]["freq_meta_sample_count"],
+        ]
+    }
+    if config["freq_fields"].get("freq_index_dict"):
+        row_to_globals_check[config["freq_fields"]["freq"]].append(
+            config["freq_fields"]["freq_index_dict"]
+        )
+    if config.get("faf_fields"):
+        row_to_globals_check[config["faf_fields"]["faf"]] = [
+            config["faf_fields"]["faf_meta"],
+        ]
+        if config["faf_fields"].get("faf_index_dict"):
+            row_to_globals_check[config["faf_fields"]["faf"]].append(
+                config["faf_fields"]["faf_index_dict"]
+            )
+
+    check_global_and_row_annot_lengths(
+        t=ht, row_to_globals_check=row_to_globals_check, check_all_rows=check_all_rows
     )
 
 
+def add_info_annotations(
+    ht: hl.Table, region_flag_fields: List[str], allele_type_fields: List[str]
+) -> hl.Table:
+    """
+    Add select annotations to `info` if present in the Table.
+
+    :param ht: Table to annotate.
+    :param region_flag_fields: List of region flag fields to check for and add to info if present in the Table.
+    :param allele_type_fields: List of allele type fields to check for and add to info if present in the Table.
+    :return: Annotated Table with new `info` field.
+    """
+    info_dict = {}
+    missing_region_flags = []
+
+    if "region_flags" in ht.row:
+        for field in region_flag_fields:
+            if field in ht["region_flags"]:
+                info_dict[field] = ht["region_flags"][field]
+            else:
+                missing_region_flags.append(field)
+
+    if missing_region_flags:
+        logger.warning("Missing region_flag fields: %s", missing_region_flags)
+
+    missing_allele_info = []
+    if "allele_info" in ht.row:
+        for field in allele_type_fields:
+            if field in ht["allele_info"]:
+                info_dict[field] = ht["allele_info"][field]
+            else:
+                missing_allele_info.append(field)
+
+    if missing_allele_info:
+        logger.warning("Missing allele type fields: %s", missing_allele_info)
+
+    ht = ht.annotate(info=ht.info.annotate(**info_dict))
+
+    return ht
+
+
 def validate_federated_data(
     ht: hl.Table,
     freq_meta_expr: hl.expr.ArrayExpression,
     missingness_threshold: float = 0.50,
-    struct_annotations_for_missingness: List[str] = ["grpmax", "fafmax", "histograms"],
+    struct_annotations_to_skip_missingness: Optional[List[str]] = None,
     freq_annotations_to_sum: List[str] = ["AC", "AN", "homozygote_count"],
     sort_order: List[str] = ["subset", "downsampling", "gen_anc", "sex", "group"],
     nhomalt_metric: str = "nhomalt",
@@ -584,6 +807,9 @@ def validate_federated_data(
     :param freq_meta_expr: Metadata expression that contains the values of the elements in
         `meta_indexed_expr`. The most often used expression is `freq_meta` to index into
         a 'freq' array (example: ht.freq_meta).
+    :param missingness_threshold: Upper cutoff for allowed amount of
+        missingness. Default is 0.50.
+    :param struct_annotations_to_skip_missingness: Optional list of top-level struct row annotations that should be treated as a single field rather than recursively traversed when checking missingness. Default is None.
     :param freq_annotations_to_sum: List of annotation fields within `meta_expr` to sum. Default is ['AC', 'AN', 'homozygote_count'].
     :param sort_order: Order in which groupings are unfurled into flattened annotations. Default is ["subset", "downsampling", gen_anc", "sex", "group"].
     :param nhomalt_metric: Name of metric denoting homozygous alternate count. Default is "nhomalt".
@@ -613,7 +839,7 @@ def validate_federated_data(
     check_missingness(
         ht,
         missingness_threshold,
-        struct_annotations=struct_annotations_for_missingness,
+        structs_to_not_traverse=struct_annotations_to_skip_missingness,
     )
 
     # Check that subset totals sum to expected totals.
@@ -681,6 +907,16 @@ def validate_federated_data(
             metrics=freq_annotations_to_sum,
         )
 
+    logger.info("Printing schema of annotations and globals...")
+
+    full_description = ht._type.pretty()
+    logger.info("Table describe:\n%s", full_description)
+    global_eval = {g: hl.eval(ht[g]) for g in ht.globals}
+    logger.info(
+        "Globals eval:\n%s",
+        pformat(global_eval, sort_dicts=False, compact=True, width=160),
+    )
+
 
 def create_logtest_ht(exclude_xnonpar_y: bool = False) -> hl.Table:
     """
@@ -885,6 +1121,7 @@ def create_logtest_ht(exclude_xnonpar_y: bool = False) -> hl.Table:
         faf_meta=faf_meta,
         freq_meta_sample_count=freq_meta_sample_count,
         faf_meta_sample_count=faf_meta_sample_count,
+        extra_global_field="extra_global_field",
     )
 
     # Add in retired terms to globals.
@@ -939,12 +1176,111 @@ def create_logtest_ht(exclude_xnonpar_y: bool = False) -> hl.Table:
 
     ht = ht.annotate(grpmax=grpmax, fafmax=fafmax)
     # Add monoallelic and only_het annotations.
-    ht = ht.annotate(monoallelic=hl.rand_bool(0.50), only_het=hl.rand_bool(0.10))
+    ht = ht.annotate(
+        info=ht.info.annotate(
+            monoallelic=hl.rand_bool(0.50), only_het=hl.rand_bool(0.10)
+        )
+    )
     ht = ht.key_by("locus", "alleles")
 
     return ht
 
 
+def load_gnomad_data(
+    gnomad_input_file: str,
+    version: str,
+    data_type: str = "genomes",
+    test: bool = False,
+    sample_set: Optional[str] = None,
+    public_release: Optional[bool] = None,
+    environment: Optional[str] = None,
+) -> hl.Table:
+    """
+    Load gnomAD data based on specified input file and parameters.
+
+    :param gnomad_input_file: Name of resource to load, either "freq" or "release_sites".
+    :param version: Version to load. For example "4.0", "4.1", "5.0". Default is "5.0".
+    :param data_type: Type of gnomAD data to load, either "exomes" or "genomes".
+    :param test: If True, load test version of the data. Default is False.
+    :param sample_set: Sample set of annotation resource. One of "aou", "gnomad", or "merged". If None, uses the default defined by the underlying resource function. Default is None.
+    :param public_release: Whether or not to use the public version of the release. If None, uses the default defined by the underlying resource function.Default is None.
+    :param environment: Environment to use. Must be one of "rwb", "batch", or
+        "dataproc". If None, uses the default defined by the underlying resource function. Default is None.
+    :return: Hail Table of the specified gnomAD data.
+    """
+    major_v = version.split(".")[0]
+
+    # Define module mapping based on major version.
+    module_mapping = {
+        "4": {
+            "freq": ("gnomad_qc.v4.resources.annotations", "get_freq"),
+            "release_sites": ("gnomad_qc.v4.resources.release", "release_sites"),
+        },
+        "5": {
+            "freq": ("gnomad_qc.v5.resources.annotations", "get_freq"),
+            "release_sites": ("gnomad_qc.v5.resources.release", "release_sites"),
+        },
+    }
+
+    if major_v not in module_mapping:
+        raise ValueError(f"Major version {major_v} not supported.")
+
+    if gnomad_input_file not in module_mapping[major_v]:
+        raise ValueError(f"Input '{gnomad_input_file}' not found for v{major_v}")
+
+    module_path, function_name = module_mapping[major_v][gnomad_input_file]
+
+    # Import the module and get the function to call.
+    module = importlib.import_module(module_path)
+    resource_func = getattr(module, function_name)
+
+    logger.info("Loading %s version %s (%s)...", gnomad_input_file, major_v, data_type)
+
+    # Collect all possible params for the function.
+    all_params = {
+        "data_type": data_type,
+        "test": test,
+        "version": version,
+        "sample_set": sample_set,
+        "public": public_release,
+        "environment": environment,
+    }
+
+    # Filter to only the parameter that function can accept.
+    sig_params = inspect.signature(resource_func).parameters
+    valid_args = {
+        k: v for k, v in all_params.items() if k in sig_params and v is not None
+    }
+
+    logger.info("Using valid parameters %s for function %s", valid_args, function_name)
+
+    # Log which file and params are being used.
+    arg_preview = ", ".join([f"{k}={v}" for k, v in valid_args.items()])
+    logger.info(f"Calling {module_path}.{function_name}({arg_preview})")
+
+    resource = resource_func(**valid_args)
+
+    # Some resources (e.g. v4 release_sites) return a VersionedTableResource and do
+    # not accept a version argument in their function signature. Select the requested
+    # version explicitly instead of relying on the resource default.
+    if isinstance(resource, VersionedTableResource):
+        if version not in resource.versions:
+            available_versions = ", ".join(sorted(resource.versions.keys()))
+            raise ValueError(
+                f"Requested version '{version}' is not available for "
+                f"{gnomad_input_file}. Available versions: {available_versions}"
+            )
+
+        logger.info(
+            "Using resource version '%s' for %s.",
+            version,
+            gnomad_input_file,
+        )
+        return resource.versions[version].ht()
+
+    return resource.ht()
+
+
 def main(args):
     """Perform validity checks for federated data."""
     hl.init(
@@ -955,12 +1291,21 @@ def main(args):
     test_n_partitions = args.test_n_partitions
     config_path = args.config_path
     verbose = args.verbose
+    output_base = args.output_base
 
     if args.exclude_xnonpar_y_in_logtest and not args.use_logtest_ht:
         raise ValueError(
             "exclude_xnonpar_y_in_logtest can only be used with use_logtest_ht."
         )
 
+    if not args.use_logtest_ht and (
+        args.gnomad_input_file is None or args.gnomad_version is None
+    ):
+        raise ValueError(
+            "When --use-logtest-ht is not set, both --gnomad-input-file and "
+            "--gnomad-version are required."
+        )
+
     try:
         # Read in config file and validate.
         with hl.hadoop_open(config_path, "r") as f:
@@ -968,6 +1313,10 @@ def main(args):
 
         validate_config(config, schema)
 
+        data_type = config["data_type"]
+        allele_type_fields = ALLELE_TYPE_FIELDS[data_type]
+        region_flag_fields = REGION_FLAG_FIELDS[data_type]
+
         # Read in field necessity markdown file.
         # When submitting hail dataproc job, include "--files field_requirements.md".
         try:
@@ -985,10 +1334,20 @@ def main(args):
         if args.use_logtest_ht:
             logger.info("Using logtest ht...")
             ht = create_logtest_ht(args.exclude_xnonpar_y_in_logtest)
+            validate_config_fields_in_ht(ht=ht, config=config)
 
         else:
-            # TODO: Add resources to intake federated data once obtained.
-            ht = public_release(data_type="genomes").ht()
+            # Load data from the specified gnomAD resource function.
+            ht = load_gnomad_data(
+                gnomad_input_file=args.gnomad_input_file,
+                version=args.gnomad_version,
+                data_type=data_type,
+                test=args.gnomad_test,
+                sample_set=args.gnomad_sample_set,
+                public_release=args.gnomad_public_release,
+                environment=args.gnomad_environment,
+            )
+            output_base = f"{output_base}/{data_type}/{args.gnomad_input_file}"
 
             # Check that fields specified in the config are present in the Table.
             validate_config_fields_in_ht(ht=ht, config=config)
@@ -998,48 +1357,17 @@ def main(args):
             if build != "GRCh38":
                 raise ValueError(f"Reference genome is {build}, not GRCh38!")
 
-            # Filter to test partitions if specified.
             if test_n_partitions:
                 logger.info(
                     "Filtering to %d partitions and sex chromosomes...",
                     test_n_partitions,
                 )
-                test_ht = ht._filter_partitions(range(test_n_partitions))
-
-                x_ht = hl.filter_intervals(
-                    ht, [hl.parse_locus_interval("chrX")]
-                )._filter_partitions(range(test_n_partitions))
-
-                y_ht = hl.filter_intervals(
-                    ht, [hl.parse_locus_interval("chrY")]
-                )._filter_partitions(range(test_n_partitions))
-
-                ht = test_ht.union(x_ht, y_ht)
-
-        row_to_globals_check = {
-            config["freq_fields"]["freq"]: [
-                config["freq_fields"]["freq_meta"],
-                config["freq_fields"]["freq_meta_sample_count"],
-            ]
-        }
-        if config["freq_fields"].get("freq_index_dict"):
-            row_to_globals_check[config["freq_fields"]["freq"]].append(
-                config["freq_fields"]["freq_index_dict"]
-            )
-
-        if config.get("faf_fields"):
-            row_to_globals_check[config["faf_fields"]["faf"]] = [
-                config["faf_fields"]["faf_meta"],
-            ]
-            if config["faf_fields"].get("faf_index_dict"):
-                row_to_globals_check[config["faf_fields"]["faf"]].append(
-                    config["faf_fields"]["faf_index_dict"]
-                )
+                ht = filter_to_test_partitions(ht, test_n_partitions)
 
         logger.info("Check that row and global annotations lengths match...")
-        check_global_and_row_annot_lengths(
-            t=ht,
-            row_to_globals_check=row_to_globals_check,
+        run_row_to_globals_length_check(
+            ht=ht,
+            config=config,
             check_all_rows=not args.check_only_first_rows_to_globals,
         )
         check_globals_for_retired_terms(ht)
@@ -1069,6 +1397,8 @@ def main(args):
             field_issues, fields_validated, type_issues, types_validated
         )
 
+        check_fields_not_in_requirements(ht, field_types)
+
         # TODO: Add in lof per person check.
         logger.info("Unfurl array annotations...")
         annotations = unfurl_array_annotations(
@@ -1078,38 +1408,8 @@ def main(args):
         )
         ht = ht.annotate(info=ht.info.annotate(**annotations))
 
-        info_dict = {}
-
-        # Add region_flag fields if present.
-        missing_region_flags = []
-        if "region_flags" in ht.row:
-            for field in REGION_FLAG_FIELDS:
-                if field in ht["region_flags"]:
-                    info_dict[field] = ht["region_flags"][field]
-                else:
-                    missing_region_flags.append(field)
-        region_flags = [f for f in REGION_FLAG_FIELDS if f not in missing_region_flags]
-        if missing_region_flags:
-            logger.warning("Missing region_flag fields: %s", missing_region_flags)
-
-        # Add allele_info fields if present.
-        missing_allele_info = []
-        if "allele_info" in ht.row:
-            for field in ALLELE_TYPE_FIELDS:
-                if field in ht["allele_info"]:
-                    info_dict[field] = ht["allele_info"][field]
-                else:
-                    missing_allele_info.append(field)
-        if missing_allele_info:
-            logger.warning("Missing allele type fields: %s", missing_allele_info)
-
-        # Add monoallelic and only_het fields to info dict.
-        if "monoallelic" in ht.row:
-            info_dict["monoallelic"] = ht["monoallelic"]
-        if "only_het" in ht.row:
-            info_dict["only_het"] = ht["only_het"]
-
-        ht = ht.annotate(info=ht.info.annotate(**info_dict))
+        logger.info("Creating info annotations...")
+        ht = add_info_annotations(ht, region_flag_fields, allele_type_fields)
 
         # If config specifies to check for monoallelic and only heterozygous sites,
         # create the site_gt_check_expr to pass to validate_federated_data.
@@ -1121,12 +1421,14 @@ def main(args):
         else:
             site_gt_check_expr = None
 
+        region_flags = [f for f in region_flag_fields if f in ht.info]
+
         validate_federated_data(
             ht=ht,
-            missingness_threshold=config["missingness_threshold"],
-            struct_annotations_for_missingness=config[
-                "struct_annotations_for_missingness"
-            ],
+            missingness_threshold=args.missingness_threshold,
+            struct_annotations_to_skip_missingness=config.get(
+                "struct_annotations_to_skip_missingness"
+            ),
             freq_meta_expr=ht[config["freq_fields"]["freq_meta"]],
             freq_annotations_to_sum=config["freq_annotations_to_sum"],
             sort_order=config["sort_order"],
@@ -1138,18 +1440,19 @@ def main(args):
             site_gt_check_expr=site_gt_check_expr,
         )
 
-        handler.flush()
+        memory_handler.flush()
         log_output = log_stream.getvalue()
 
         # TODO: Create resource functions when know organization of federated data.
-        log_file = args.output_base + ".log"
-        output_file = args.output_base + ".html"
+        log_file = output_base + ".log"
+        output_file = output_base + ".html"
 
         # Write parsed log to html file.
         with hl.hadoop_open(log_file, "w") as f:
             f.write(log_output)
 
         parsed_logs = parse_log_file(log_file)
+        logger.info("Writing html file to %s...", output_file)
         generate_html_report(parsed_logs, output_file)
 
     finally:
@@ -1158,16 +1461,16 @@ def main(args):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
+    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
 
-    # Create a mutually exclusive group for --test-n-partitions and --use-test-ht.
+    # Create a mutually exclusive group for --test-n-partitions and --use-logtest-ht.
     test_group = parser.add_mutually_exclusive_group()
 
     test_group.add_argument(
         "--test-n-partitions",
         help=(
             "Use only N partitions of the input (as well as sex chromosomes) for testing purposes. Defaults"
-            "to 2 if passed without a value. Cannot be used if --use-logtest-ht is set."
+            " to 2 if passed without a value. Cannot be used if --use-logtest-ht is set."
         ),
         nargs="?",
         const=2,
@@ -1191,21 +1494,31 @@ def main(args):
     parser.add_argument(
         "--config-path",
         help=(
-            "Path to JSON config file for defining parameters. Paramters to define are as follows:"
-            "missingness_threshold: Float defining upper cutoff for allowed amount of missingness. Missingness above this value will be flagged as 'FAILED'."
-            "struct_annotations_for_missingness: List of struct annotations to check for missingness."
-            "freq_fields: Dictionary containing the names of frequency-related fields ('freq': Name of annotation containing the array of frequency metric objects "
-            "corresponding to each frequency metadata group; 'freq_meta': Name of annotation containing allele frequency metadata, an ordered list containing the frequency aggregation group for "
-            "each element of the 'freq' array row annotation, with at least the following groups: ('group': adj/raw, 'gen_anc': inferred genetic ancestry group, 'sex': sex karyotype).; 'freq_meta_sample_count': Name of "
-            "annotation containing sample count per sample grouping defined in the 'freq_meta' global annotation."
-            "faf_fields: Dictionary containing the names of filtering allele frequency (FAF) related fields ('faf': Name of annotation containing structs of FAF information; 'faf_meta': Name of annotation "
-            "for FAF metadata, an ordered list containing the frequency aggregation group for each element of the 'faf' arrays, with at least the following groups: ('group': adj/raw, 'gen_anc': inferred genetic ancestry group, 'sex': sex karyotype). "
-            "freq_annotations_to_sum: List of annotation fields within `freq_meta` to sum. Example: ['AC', 'AN', 'homozygote_count']."
-            "sort_order: Order in which groupings are unfurled into flattened annotations. Default is ['gen_anc', 'sex', 'group']."
-            "nhomalt_metric: Name of metric denoting homozygous alternate count."
-            "subsets: List of sample subsets to include for the subset validity check."
-            "variant_filter_field: String of variant filtration used in the filters annotation of the Hail Table (e.g. 'RF', 'VQSR', 'AS_VQSR')."
-            "check_mono_and_only_het: Boolean indicating whether to check for monoallelic and 100 percent heterozygous sites in the Table ('monoallelic' and 'only_het' annotations must be present)."
+            "Path to JSON config file for defining parameters. Parameters to define are as follows:\n"
+            " - struct_annotations_to_skip_missingness: Optional list of top-level struct annotations to skip during missingness checks.\n"
+            " - freq_fields: Dictionary containing the names of frequency-related fields:\n"
+            "      * freq: Name of annotation containing the array of frequency metric objects\n"
+            "        corresponding to each frequency metadata group.\n"
+            "      * freq_meta: Name of annotation containing allele frequency metadata, an\n"
+            "        ordered list containing the frequency aggregation group for each element\n"
+            "        of the freq array row annotation, with at least the following groups:\n"
+            "        group (adj/raw), gen_anc (inferred genetic ancestry group), and sex\n"
+            "        (sex karyotype).\n"
+            "      * freq_meta_sample_count: Name of annotation containing sample count per\n"
+            "        sample grouping defined in the freq_meta global annotation.\n"
+            " - faf_fields: Dictionary containing the names of filtering allele frequency (FAF) related fields:\n"
+            "      * faf: Name of annotation containing structs of FAF information.\n"
+            "      * faf_meta: Name of annotation for FAF metadata, an ordered list\n"
+            "        containing the frequency aggregation group for each element of the faf\n"
+            "        arrays, with at least the following groups: group (adj/raw), gen_anc\n"
+            "        (inferred genetic ancestry group), and sex (sex karyotype).\n"
+            " - freq_annotations_to_sum: List of annotation fields within `freq_meta` to sum. Example: ['AC', 'AN', 'homozygote_count'].\n"
+            " - sort_order: Order in which groupings are unfurled into flattened annotations. Default is ['gen_anc', 'sex', 'group'].\n"
+            " - nhomalt_metric: Name of metric denoting homozygous alternate count.\n"
+            " - subsets: List of sample subsets to include for the subset validity check.\n"
+            " - variant_filter_field: String of variant filtration used in the filters annotation of the Hail Table (e.g. 'RF', 'VQSR', 'AS_VQSR').\n"
+            " - data_type: Data type to run checks on. One of 'exomes' or 'genomes'.\n"
+            " - check_mono_and_only_het: Whether to run the check for monoallelic and 100 percent heterozygous sites in the Table('monoallelic' and 'only_het' annotations must be present)."
         ),
         type=str,
     )
@@ -1227,6 +1540,53 @@ def main(args):
         type=str,
         default="gs://gnomad-tmp/federated_validity_checks/federated_validity_checks",
     )
-
+    parser.add_argument(
+        "--missingness-threshold",
+        help="Float defining upper cutoff for allowed amount of missingness. Missingness above this value will be flagged as 'FAILED'.",
+        type=float,
+        default=0.50,
+    )
+    # Create a group for gnomAD input arguments.
+    gnomad_group = parser.add_argument_group("gnomad", "gnomAD input options")
+    gnomad_group.add_argument(
+        "--gnomad-input-file",
+        help="Source to load gnomAD data from. 'freq' loads from get_freq and 'release_sites' loads from release_sites. Default is None.",
+        choices=["freq", "release_sites"],
+        type=str,
+        default=None,
+    )
+    gnomad_group.add_argument(
+        "--gnomad-version",
+        help="Version of gnomAD resources to use. Default is None.",
+        choices=["4.0", "4.1", "4.1.1", "5.0"],
+        default=None,
+        type=str,
+    )
+    gnomad_group.add_argument(
+        "--gnomad-test",
+        help="Load test dataset (smaller subset for testing).",
+        action="store_true",
+    )
+    gnomad_group.add_argument(
+        "--gnomad-sample-set",
+        help="Sample set of annotation resource to load, if applicable. One of 'aou', 'gnomad', or 'merged'. Default is None.",
+        choices=["aou", "gnomad", "merged"],
+        type=str,
+        default=None,
+    )
+    gnomad_group.add_argument(
+        "--gnomad-public-release",
+        help="Whether or not to use the public version of the release when loading data. Only applicable when loading 'release_sites'.",
+        action="store_true",
+    )
+    gnomad_group.add_argument(
+        "--gnomad-environment",
+        help=(
+            "Environment to use when loading gnomAD data. Must be one of 'rwb', 'batch', or 'dataproc'. Default is None."
+        ),
+        choices=["rwb", "batch", "dataproc"],
+        type=str,
+        default=None,
+    )
     args = parser.parse_args()
     main(args)

Field	Type	Description	Example	Field Necessity
freq_meta	`array<dict<str, str>>`	Array of frequency metadata dictionaries containing the frequency aggregation group for each element of the ‘freq’ array row annotation. Each dictionary should have the following keys: 'gen_anc', 'group', 'sex'. A 'downsampling' key is optional. The 'adj' group should always be the first value of the array, and the 'raw' group should be the second value. Required 'group' values are 'adj' and 'raw'. Required 'sex' values are 'XX' and 'XY'. Specific values are not required for 'gen_anc'.	`[{'group': 'adj'},{'group': 'raw'},{'gen_anc': 'afr', 'group': 'adj'},{'gen_anc': 'amr', 'group': 'adj'},{'group': 'adj', 'sex': 'XX'},{'group': 'adj', 'sex': 'XY'}, ...]`	Required
freq_index_dict	`dict<str, int32>`	Dictionary keyed by specified label grouping combinations ('group': 'adj'/'raw', 'gen_anc': inferred genetic ancestry group, 'sex': sex karyotype), with values describing the corresponding index of each grouping entry in the ‘freq’ array row annotation. If provided, keys need to be formatted in the order of 'gen_anc'_ 'sex'_'group'. If a 'downsampling' key is included, the order should be 'downsampling'_'gen_anc'_ 'sex'_'group'. The 'adj' value should always be at index 0 and the 'raw' value at index 1. A more detailed description can be found at https://gnomad.broadinstitute.org/help/v4-hts.	`{"adj": 0, "raw": 1, "afr_adj": 2, "amr_adj": 3, "XX_adj": 4, "XY_adj": 5, ...}`	Optional
freq_meta_sample_count	`array<int32>`	A sample count per sample grouping defined in the 'freq_meta' global annotation. Must be in the same order as 'freq'/'freq_meta'.	`[730947, 730947, 16740, 15001, 50000, 680947, ...]`	Required
faf_meta	`array<dict<str, str>>`	Filtering allele frequency metadata. An ordered list containing the frequency aggregation group for each element of the ‘faf’ array row annotation.	—	Not Needed
faf_index_dict	`dict<str, int32>`	Dictionary keyed by specified label grouping combinations ('group': 'adj'/'raw', 'gen_anc': inferred genetic ancestry group, 'sex': sex karyotype), with values describing the corresponding index of each grouping entry in the filtering allele frequency (‘faf’) row annotation.	—	Not Needed
age_distribution	`struct { ... }`	Callset-wide age histogram. Cohorts for gnomAD vary in how they report age (some report the age at diagnosis, others report the age of last visit, etc), so the ages associated with the gnomAD data can be thought of as the last known age of the individual. Information on age is not available for all gnomAD samples. This field is required with the acknowledgement that age data may not be available for all samples. If no age is available for any samples, set all `bin_freq` values to `0`.	—	Required
age_distribution.bin_edges	`array<float64>`	Bin edges for age histogram: `30.0 \| 35.0 \| 40.0 \| 45.0 \| 50.0 \| 55.0 \| 60.0 \| 65.0 \| 70.0 \| 75.0 \| 80.0`.	`[30.0, 35.0, 40.0, 45.0, ...]`	Required
age_distribution.bin_freq	`array<int32>`	Bin frequencies for the age histogram. This is the number of records found in each bin.	`[101, 122, 85, 4, ...]`	Required
age_distribution.n_smaller	`int32`	Count of age values falling below lowest histogram bin edge.	`1000`	Required
age_distribution.n_larger	`int32`	Count of age values falling above highest histogram bin edge.	`30`	Required
downsamplings	`dict<str, array<int32>>`	Dictionary keyed by dataset with values corresponding to available downsampled sample counts.	`{'gnomad': [10, 100, 500...]}`	Not Needed
filtering_model	`struct { ... }`	The variant filtering model used and its specific cutoffs.	—	Not Needed
filtering_model.filter_name	`str`	Variant filtering model name used in the 'filters' row annotation, indicating the variant was filtered by this model during variant QC.	—	Not Needed
filtering_model.score_name	`str`	Name of the score used in filtering.	—	Not Needed
filtering_model.snv_cutoff	`struct { ... }`	SNV filtering cutoff information.	—	Not Needed
filtering_model.snv_cutoff.bin	`int32`	Filtering percentile cutoff for SNVs.	—	Not Needed
filtering_model.snv_cutoff.min_score	`float64`	Minimum score at SNV filtering percentile cutoff.	—	Not Needed
filtering_model.indel_cutoff	`struct { ... }`	Indel filtering cutoff information.	—	Not Needed
filtering_model.indel_cutoff.bin	`int32`	Filtering percentile cutoff for indels.	—	Not Needed
filtering_model.indel_cutoff.min_score	`float64`	Minimum score at indel filtering percentile cutoff.	—	Not Needed
filtering_model.snv_training_variables	`array<str>`	Variant annotations used as features in the SNV filtering model.	—	Not Needed
filtering_model.indel_training_variables	`array<str>`	Variant annotations used as features in the indel filtering model.	—	Not Needed
inbreeding_coeff_cutoff	`float64`	Inbreeding Coefficient threshold used to hard filter variants.	—	Not Needed
excess_het_cutoff	`float64`	Excess heterozygosity threshold used to hard filter variants.	—	Not Needed
tool_versions	`struct { ... }`	Versions of in silico predictors used in the callset.	—	Not Needed
tool_versions.cadd_version	`str`	Combined Annotation Dependent Depletion (CADD) version.	—	Not Needed
tool_versions.revel_version	`str`	Rare Exome Variant Ensemble Learner (REVEL) version.	—	Not Needed
tool_versions.spliceai_version	`str`	SpliceAI version.	—	Not Needed
tool_versions.pangolin_version	`array<str>`	Pangolin version.	—	Not Needed
tool_versions.phylop_version	`str`	phyloP version.	—	Not Needed
tool_versions.dbsnp_version	`str`	dbSNP version.	—	Not Needed
tool_versions.sift_version	`str`	Sorting Intolerant from Tolerant (SIFT) version.	—	Not Needed
tool_versions.polyphen_version	`str`	Polymorphism Phenotyping v2 (Polyphen-v2) version.	—	Not Needed
vrs_versions	`struct { ... }`	The Variant Representation Specification version that was used to compute IDs on the callset. Global and row VRS annotations are optional, but the global annotaions must be filled out if the row annotations are provided.	—	Optional
vrs_versions.vrs_schema_version	`str`	The version of the VRS schema that is used to represent variants and compute identifiers. Must be `2.0.1`.	`"2.0.1"`	Optional
vrs_versions.vrs_python_version	`str`	The version of the vrs-python library that was used to compute IDs on the callset. Must be `2.1.3`.	`"2.1.3"`	Optional
vrs_versions.seqrepo_version	`str`	The version of the SeqRepo database that was used in VRS computations. Must be `2024-12-20`.	`"2024-12-20"`	Optional
vep_globals	`struct { ... }`	Information about VEP annotations.	—	Not Needed
vep_globals.vep_version	`str`	VEP version that was run on the callset.	—	Not Needed
vep_globals.vep_help	`str`	Output from vep --help.	—	Not Needed
vep_globals.vep_config	`str`	VEP configuration to run VEP version with Hail. File created using command within VEP init shell script in https://github.com/broadinstitute/gnomad_methods/tree/main.	—	Not Needed
vep_globals.gencode_version	`str`	GENCODE version used in VEP.	—	Not Needed
vep_globals.mane_select_version	`str`	MANE select version used in VEP.	—	Not Needed
frequency_README	`str`	Explanation of how to use the 'freq_index_dict' global annotation to extract frequencies from the 'freq' row annotation.	—	Not Needed
date	`str`	Date Hail Table was created.	`"2025-04-09"`	Required
version	`str`	Version of the file.	—	Not Needed