Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions docs/src/format/table/transaction.md
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ are rebaseable conflicts with Update:

### UpdateConfig

Modifies table configuration, table metadata, schema metadata, or field metadata without changing data.
Modifies table configuration, table metadata, schema metadata, field metadata, or fragment metadata without changing data.

<details>
<summary>UpdateConfig protobuf message</summary>
Expand All @@ -423,11 +423,14 @@ Modifies table configuration, table metadata, schema metadata, or field metadata

#### UpdateConfig Compatibility

An UpdateConfig operation only modifies table config and tends to be compatible with other operations. Here
An UpdateConfig operation only modifies metadata and tends to be compatible with other operations. Here
are the operations that conflict with UpdateConfig:

- Overwrite
- UpdateConfig (only if the two operations modify the same config)
- Overwrite (if the UpdateConfig modifies schema, field, or fragment metadata)
- Restore (if the UpdateConfig modifies schema, field, or fragment metadata)
- UpdateConfig (only if the two operations modify the same config or the same fragment/field/schema metadata)
- Delete (only if the UpdateConfig modifies metadata on a fragment that was deleted)
- Update (only if the UpdateConfig modifies metadata on a fragment that was removed)

### DataReplacement

Expand Down
4 changes: 3 additions & 1 deletion docs/src/format/table/versioning.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ they should return an "unsupported" error on any read or write operation.
| 4 | `FLAG_USE_V2_FORMAT_DEPRECATED` | No | No | Files are written with the new v2 format. This flag is deprecated and no longer used. |
| 8 | `FLAG_TABLE_CONFIG` | No | Yes | Table config is present in the manifest. |
| 16 | `FLAG_BASE_PATHS` | Yes | Yes | Dataset uses multiple base paths (for shallow clones or multi-base datasets). |
| 32 | `FLAG_DISABLE_TRANSACTION_FILE` | No | No | Inline transaction in manifest; no separate transaction file written. |
| 64 | `FLAG_FRAGMENT_METADATA` | No | Yes | Fragments may contain free-form key-value metadata. |

</div>

Flags with bit values 32 and above are unknown and will cause implementations to reject the dataset with an "unsupported" error.
Flags with bit values 128 and above are unknown and will cause implementations to reject the dataset with an "unsupported" error.
4 changes: 4 additions & 0 deletions protos/table.proto
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,10 @@ message DataFragment {
// deletion tombstones. To compute the current number of rows, subtract
// `deletion_file.num_deleted_rows` from this value.
uint64 physical_rows = 4;

// Free-form key-value metadata for the fragment.
// Intended for lightweight metadata such as creation time, process ID, etc.
map<string, string> metadata = 11;
}

message DataFile {
Expand Down
3 changes: 2 additions & 1 deletion protos/transaction.proto
Original file line number Diff line number Diff line change
Expand Up @@ -287,12 +287,13 @@ message Transaction {
}

// An operation that updates the table config, table metadata, schema metadata,
// or field metadata.
// field metadata, or fragment metadata.
message UpdateConfig {
UpdateMap config_updates = 6;
UpdateMap table_metadata_updates = 7;
UpdateMap schema_metadata_updates = 8;
map<int32, UpdateMap> field_metadata_updates = 9;
map<uint64, UpdateMap> fragment_metadata_updates = 10;

// Deprecated -------------------------------
map<string, string> upsert_values = 1;
Expand Down
2 changes: 2 additions & 0 deletions rust/lance-table/benches/manifest_intern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ fn make_uniform_pb_fragments(n: u64, num_fields: usize) -> Vec<pb::DataFragment>
version_bytes.clone(),
),
),
metadata: Default::default(),
})
.collect()
}
Expand Down Expand Up @@ -148,6 +149,7 @@ fn make_diverse_pb_fragments(
version_payloads[version_idx].clone(),
),
),
metadata: Default::default(),
}
})
.collect()
Expand Down
57 changes: 56 additions & 1 deletion rust/lance-table/src/feature_flags.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ pub const FLAG_TABLE_CONFIG: u64 = 8;
pub const FLAG_BASE_PATHS: u64 = 16;
/// Disable writing transaction file under _transaction/, this flag is set when we only want to write inline transaction in manifest
pub const FLAG_DISABLE_TRANSACTION_FILE: u64 = 32;
/// Fragments may contain free-form key-value metadata
pub const FLAG_FRAGMENT_METADATA: u64 = 64;
/// The first bit that is unknown as a feature flag
pub const FLAG_UNKNOWN: u64 = 64;
pub const FLAG_UNKNOWN: u64 = 128;

/// Set the reader and writer feature flags in the manifest based on the contents of the manifest.
pub fn apply_feature_flags(
Expand Down Expand Up @@ -74,6 +76,15 @@ pub fn apply_feature_flags(
if disable_transaction_file {
manifest.writer_feature_flags |= FLAG_DISABLE_TRANSACTION_FILE;
}

let has_fragment_metadata = manifest
.fragments
.iter()
.any(|frag| !frag.metadata.is_empty());
if has_fragment_metadata {
manifest.writer_feature_flags |= FLAG_FRAGMENT_METADATA;
}

Ok(())
}

Expand Down Expand Up @@ -103,6 +114,7 @@ mod tests {
assert!(can_read_dataset(super::FLAG_TABLE_CONFIG));
assert!(can_read_dataset(super::FLAG_BASE_PATHS));
assert!(can_read_dataset(super::FLAG_DISABLE_TRANSACTION_FILE));
assert!(can_read_dataset(super::FLAG_FRAGMENT_METADATA));
assert!(can_read_dataset(
super::FLAG_DELETION_FILES
| super::FLAG_STABLE_ROW_IDS
Expand All @@ -120,12 +132,14 @@ mod tests {
assert!(can_write_dataset(super::FLAG_TABLE_CONFIG));
assert!(can_write_dataset(super::FLAG_BASE_PATHS));
assert!(can_write_dataset(super::FLAG_DISABLE_TRANSACTION_FILE));
assert!(can_write_dataset(super::FLAG_FRAGMENT_METADATA));
assert!(can_write_dataset(
super::FLAG_DELETION_FILES
| super::FLAG_STABLE_ROW_IDS
| super::FLAG_USE_V2_FORMAT_DEPRECATED
| super::FLAG_TABLE_CONFIG
| super::FLAG_BASE_PATHS
| super::FLAG_FRAGMENT_METADATA
));
assert!(!can_write_dataset(super::FLAG_UNKNOWN));
}
Expand Down Expand Up @@ -181,4 +195,45 @@ mod tests {
0
);
}

#[test]
fn test_fragment_metadata_feature_flag() {
use crate::format::{DataStorageFormat, Fragment, Manifest};
use arrow_schema::{Field as ArrowField, Schema as ArrowSchema};
use lance_core::datatypes::Schema;
use std::collections::HashMap;
use std::sync::Arc;

let arrow_schema = ArrowSchema::new(vec![ArrowField::new(
"x",
arrow_schema::DataType::Int64,
false,
)]);
let schema = Schema::try_from(&arrow_schema).unwrap();

// No fragment metadata → no flag
let mut manifest = Manifest::new(
schema.clone(),
Arc::new(vec![Fragment::new(0)]),
DataStorageFormat::default(),
HashMap::new(),
);
apply_feature_flags(&mut manifest, false, false).unwrap();
assert_eq!(manifest.writer_feature_flags & FLAG_FRAGMENT_METADATA, 0);
assert_eq!(manifest.reader_feature_flags & FLAG_FRAGMENT_METADATA, 0);

// With fragment metadata → writer flag only
let mut frag = Fragment::new(0);
frag.metadata
.insert("created_at".into(), "2026-05-22".into());
let mut manifest = Manifest::new(
schema,
Arc::new(vec![frag]),
DataStorageFormat::default(),
HashMap::new(),
);
apply_feature_flags(&mut manifest, false, false).unwrap();
assert_ne!(manifest.writer_feature_flags & FLAG_FRAGMENT_METADATA, 0);
assert_eq!(manifest.reader_feature_flags & FLAG_FRAGMENT_METADATA, 0);
}
}
46 changes: 46 additions & 0 deletions rust/lance-table/src/format/fragment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,7 @@ impl DataFileFieldInterner {
physical_rows,
last_updated_at_version_meta,
created_at_version_meta,
metadata: p.metadata,
})
}
}
Expand Down Expand Up @@ -503,6 +504,10 @@ pub struct Fragment {
/// Created at version metadata
#[serde(skip_serializing_if = "Option::is_none")]
pub created_at_version_meta: Option<RowDatasetVersionMeta>,

/// Free-form key-value metadata for the fragment.
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub metadata: HashMap<String, String>,
}

impl Fragment {
Expand All @@ -515,6 +520,7 @@ impl Fragment {
physical_rows: None,
last_updated_at_version_meta: None,
created_at_version_meta: None,
metadata: HashMap::new(),
}
}

Expand Down Expand Up @@ -554,6 +560,7 @@ impl Fragment {
row_id_meta: None,
last_updated_at_version_meta: None,
created_at_version_meta: None,
metadata: HashMap::new(),
}
}

Expand Down Expand Up @@ -680,6 +687,7 @@ impl TryFrom<pb::DataFragment> for Fragment {
.created_at_version_sequence
.map(RowDatasetVersionMeta::try_from)
.transpose()?,
metadata: p.metadata,
})
}
}
Expand Down Expand Up @@ -721,6 +729,7 @@ impl From<&Fragment> for pb::DataFragment {
physical_rows: f.physical_rows.unwrap_or_default() as u64,
last_updated_at_version_sequence,
created_at_version_sequence,
metadata: f.metadata.clone(),
}
}
}
Expand Down Expand Up @@ -786,6 +795,43 @@ mod tests {
assert_eq!(fragment, fragment2);
}

#[test]
fn test_roundtrip_fragment_with_metadata() {
let mut fragment = Fragment::new(1);
let schema = ArrowSchema::new(vec![ArrowField::new("x", DataType::Float16, true)]);
fragment.add_file_legacy("data.lance", &Schema::try_from(&schema).unwrap());
fragment
.metadata
.insert("created_at".into(), "2026-05-22T00:00:00Z".into());
fragment
.metadata
.insert("process_id".into(), "pid-42".into());

let proto = pb::DataFragment::from(&fragment);
assert_eq!(proto.metadata.len(), 2);
assert_eq!(
proto.metadata.get("created_at").unwrap(),
"2026-05-22T00:00:00Z"
);

let fragment2 = Fragment::try_from(proto).unwrap();
assert_eq!(fragment, fragment2);
}

#[test]
fn test_roundtrip_fragment_empty_metadata() {
let mut fragment = Fragment::new(2);
let schema = ArrowSchema::new(vec![ArrowField::new("x", DataType::Float16, true)]);
fragment.add_file_legacy("data.lance", &Schema::try_from(&schema).unwrap());

let proto = pb::DataFragment::from(&fragment);
assert!(proto.metadata.is_empty());

let fragment2 = Fragment::try_from(proto).unwrap();
assert_eq!(fragment, fragment2);
assert!(fragment2.metadata.is_empty());
}

#[test]
fn test_to_json() {
let mut fragment = Fragment::new(123);
Expand Down
2 changes: 2 additions & 0 deletions rust/lance-table/src/format/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1321,6 +1321,7 @@ mod tests {
physical_rows: None,
created_at_version_meta: None,
last_updated_at_version_meta: None,
metadata: HashMap::new(),
},
Fragment {
id: 1,
Expand All @@ -1333,6 +1334,7 @@ mod tests {
physical_rows: None,
created_at_version_meta: None,
last_updated_at_version_meta: None,
metadata: HashMap::new(),
},
];

Expand Down
1 change: 1 addition & 0 deletions rust/lance/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3288,6 +3288,7 @@ impl Dataset {
table_metadata_updates: None,
schema_metadata_updates: None,
field_metadata_updates,
fragment_metadata_updates: HashMap::new(),
},
)
.await
Expand Down
4 changes: 4 additions & 0 deletions rust/lance/src/dataset/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,18 +80,21 @@ impl<'a> std::future::IntoFuture for UpdateMetadataBuilder<'a> {
table_metadata_updates: None,
schema_metadata_updates: None,
field_metadata_updates: HashMap::new(),
fragment_metadata_updates: HashMap::new(),
},
MetadataType::TableMetadata => Operation::UpdateConfig {
config_updates: None,
table_metadata_updates: Some(update_map),
schema_metadata_updates: None,
field_metadata_updates: HashMap::new(),
fragment_metadata_updates: HashMap::new(),
},
MetadataType::SchemaMetadata => Operation::UpdateConfig {
config_updates: None,
table_metadata_updates: None,
schema_metadata_updates: Some(update_map),
field_metadata_updates: HashMap::new(),
fragment_metadata_updates: HashMap::new(),
},
};

Expand Down Expand Up @@ -167,6 +170,7 @@ impl<'a> std::future::IntoFuture for UpdateFieldMetadataBuilder<'a> {
table_metadata_updates: None,
schema_metadata_updates: None,
field_metadata_updates: self.field_metadata_updates,
fragment_metadata_updates: HashMap::new(),
},
)
.await?;
Expand Down
1 change: 1 addition & 0 deletions rust/lance/src/dataset/optimize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1691,6 +1691,7 @@ mod tests {
physical_rows: Some(0),
last_updated_at_version_meta: None,
created_at_version_meta: None,
metadata: HashMap::new(),
};
let single_bin = CandidateBin {
fragments: vec![fragment.clone()],
Expand Down
1 change: 1 addition & 0 deletions rust/lance/src/dataset/schema_evolution.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1046,6 +1046,7 @@ mod test {
physical_rows: Some(50),
last_updated_at_version_meta: None,
created_at_version_meta: None,
metadata: HashMap::new(),
}))
} else {
Ok(None)
Expand Down
Loading
Loading