Skip to content
Open
37 changes: 37 additions & 0 deletions docs/configuration/storage-config.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Quickwit currently supports four types of storage providers:

Storage URIs refer to different storage providers identified by a URI "protocol" or "scheme". Quickwit supports the following storage URI protocols:
- `s3://` for Amazon S3 and S3-compatible
- `s3+<name>://` for additional S3-compatible backends configured under `storage.s3.named.<name>` (see [Named S3 backends](#named-s3-backends))
- `azure://` for Azure Blob Storage
- `file://` for local file systems
- `gs://` for Google Cloud Storage
Expand Down Expand Up @@ -104,6 +105,42 @@ storage:
endpoint: https://storage.googleapis.com
```

#### Named S3 backends

In addition to the primary `s3:` block, you can declare any number of additional S3-compatible backends under `storage.s3.named.<name>`. Each entry is an independent endpoint with its own credentials, region, and flags. Indexes route to a named backend via the URI scheme `s3+<name>://bucket/path` (plain `s3://` continues to use the primary endpoint).

Each named entry accepts the same fields as the primary `s3:` block, *except* `named` itself (no recursion). If `access_key_id` / `secret_access_key` are omitted on a named entry, the global AWS SDK credential chain is used (env vars, instance metadata, etc.).

Named backends are self-contained: the process-wide `QW_S3_ENDPOINT` and `QW_S3_FORCE_PATH_STYLE_ACCESS` overrides apply to the primary `s3:` backend only. A named backend always uses its own `endpoint` and `force_path_style_access` values.

```yaml
storage:
s3:
# Primary backend — addressed by plain `s3://...` URIs.
endpoint: https://s3.us-east-1.amazonaws.com
region: us-east-1
named:
# Addressed by `s3+secondary://bucket/path` URIs.
secondary:
endpoint: https://s3.eu-west-3.amazonaws.com
region: eu-west-3
access_key_id: ${SECONDARY_S3_ACCESS_KEY_ID}
secret_access_key: ${SECONDARY_S3_SECRET_ACCESS_KEY}
# Addressed by `s3+seaweed://bucket/path` URIs. Falls back to the
# global AWS SDK credentials when keys are omitted.
seaweed:
endpoint: http://seaweedfs-s3:8333
region: us-east-1
force_path_style_access: true
```

An index pointed at a named backend declares its URI accordingly:

```yaml
index_id: logs-eu
index_uri: s3+secondary://logs-bucket/logs-eu
```

### Azure storage configuration

| Property | Description | Default value |
Expand Down
85 changes: 79 additions & 6 deletions quickwit/quickwit-common/src/uri.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ impl FromStr for Protocol {
"ram" => Ok(Protocol::Ram),
"s3" => Ok(Protocol::S3),
"gs" => Ok(Protocol::Google),
// `s3+<name>://...` for a named S3-compatible backend configured under
// `storage.s3.named.<name>`. Routes to the same factory as `s3://`.
s if s.starts_with("s3+") && s.len() > 3 => Ok(Protocol::S3),
Comment thread
papaharry marked this conversation as resolved.
_ => bail!("unknown URI protocol `{protocol}`"),
}
}
Expand Down Expand Up @@ -186,13 +189,33 @@ impl Uri {
let parent_path = path.parent()?;

Some(Self {
uri: format!("{protocol}{PROTOCOL_SEPARATOR}{}", parent_path.display()),
// Preserve the scheme verbatim so an `s3+<name>` qualifier survives.
uri: format!(
"{}{PROTOCOL_SEPARATOR}{}",
self.scheme(),
parent_path.display()
),
protocol,
})
}

/// Returns the URI scheme, preserving any `s3+<name>` qualifier, which may
/// differ from the canonical protocol string (e.g. `s3+alt` vs `s3`).
fn scheme(&self) -> &str {
match self.uri.split_once(PROTOCOL_SEPARATOR) {
Some((scheme, _path)) => scheme,
None => self.protocol.as_str(),
}
}

fn path(&self) -> &Path {
Path::new(&self.uri[self.protocol.as_str().len() + PROTOCOL_SEPARATOR.len()..])
// Slice at the actual `://` separator rather than assuming the scheme
// equals the canonical protocol — `s3+<name>` schemes are longer.
let path = match self.uri.split_once(PROTOCOL_SEPARATOR) {
Some((_scheme, path)) => path,
None => &self.uri,
};
Path::new(path)
}

/// Returns the last component of the URI.
Expand Down Expand Up @@ -262,9 +285,13 @@ impl Uri {
if uri_str.is_empty() {
bail!("failed to parse empty URI");
}
let (protocol, mut path) = match uri_str.split_once(PROTOCOL_SEPARATOR) {
None => (Protocol::File, uri_str.to_string()),
Some((protocol, path)) => (Protocol::from_str(protocol)?, path.to_string()),
let (scheme_opt, protocol, mut path) = match uri_str.split_once(PROTOCOL_SEPARATOR) {
None => (None, Protocol::File, uri_str.to_string()),
Some((scheme, path)) => (
Some(scheme.to_string()),
Protocol::from_str(scheme)?,
path.to_string(),
),
};
if protocol == Protocol::File {
if path.starts_with('~') {
Expand Down Expand Up @@ -292,8 +319,14 @@ impl Uri {
.to_string_lossy()
.to_string();
}
// Preserve `s3+<name>` qualifier so the storage resolver can route to
// the named backend; other schemes normalize to canonical form.
let display_scheme: &str = match scheme_opt.as_deref() {
Some(s) if s.starts_with("s3+") => s,
_ => protocol.as_str(),
};
Ok(Self {
uri: format!("{protocol}{PROTOCOL_SEPARATOR}{path}"),
uri: format!("{display_scheme}{PROTOCOL_SEPARATOR}{path}"),
protocol,
})
}
Expand Down Expand Up @@ -663,6 +696,26 @@ mod tests {
);
}

#[test]
fn test_uri_named_s3_scheme() {
// `s3+<name>` schemes are preserved end-to-end: `path` strips the real
// scheme (not the canonical `s3`), and `parent`/`file_name` keep the
// qualifier intact.
let uri = Uri::for_test("s3+alt://bucket/foo/bar");
assert_eq!(uri.as_str(), "s3+alt://bucket/foo/bar");
assert_eq!(uri.protocol(), Protocol::S3);
assert_eq!(uri.parent().unwrap(), "s3+alt://bucket/foo");
assert_eq!(uri.file_name().unwrap(), Path::new("bar"));

let uri = Uri::for_test("s3+with-dash://bucket/key");
assert_eq!(uri.parent().unwrap(), "s3+with-dash://bucket");
assert_eq!(uri.file_name().unwrap(), Path::new("key"));

// Mirrors the plain-`s3` guard: a bucket-only URI has no parent.
assert!(Uri::for_test("s3+alt://bucket").parent().is_none());
assert!(Uri::for_test("s3+alt://bucket/").parent().is_none());
}

#[test]
fn test_uri_file_name() {
assert!(Uri::for_test("file:///").file_name().is_none());
Expand Down Expand Up @@ -812,4 +865,24 @@ mod tests {
serde_json::Value::String("s3://bucket/key".to_string())
);
}

#[test]
fn test_uri_s3_named_preserved() {
// The `s3+<name>` qualifier is the routing token for named S3-compatible
// backends (`storage.s3.named.<name>`). It must survive parse + serialize
// so the storage resolver can recover the backend name on deserialization;
// before this guarantee, the qualifier was stripped by URI normalization
// and every `s3+<name>://` URI silently resolved to the primary endpoint.
let uri = Uri::from_str("s3+alt://bucket/key").unwrap();
assert_eq!(uri.protocol(), Protocol::S3);
assert_eq!(uri.as_str(), "s3+alt://bucket/key");
let json = serde_json::to_value(&uri).unwrap();
assert_eq!(
json,
serde_json::Value::String("s3+alt://bucket/key".to_string())
);
let round_trip: Uri = serde_json::from_value(json).unwrap();
assert_eq!(round_trip.as_str(), "s3+alt://bucket/key");
assert_eq!(round_trip.protocol(), Protocol::S3);
}
}
Loading