diff --git a/.github/workflows/rust-data-daemon.yaml b/.github/workflows/rust-data-daemon.yaml new file mode 100644 index 000000000..e84016672 --- /dev/null +++ b/.github/workflows/rust-data-daemon.yaml @@ -0,0 +1,81 @@ +name: Rust Data Daemon + +on: + pull_request: + branches: + - main + paths: + - 'rust/**' + - '.github/workflows/rust-data-daemon.yaml' + push: + branches: + - main + paths: + - 'rust/**' + - '.github/workflows/rust-data-daemon.yaml' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-and-test: + runs-on: ubuntu-22.04 + defaults: + run: + working-directory: rust + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt, clippy + + - name: Cache cargo build + uses: Swatinem/rust-cache@v2 + with: + workspaces: rust + + # PyO3 builds need a discoverable Python interpreter; the runner's default + # /usr/bin/python is fine, but actions/setup-python pins the version we + # test the cdylib against. + - name: Set up Python + id: setup-python + uses: actions/setup-python@v6 + with: + python-version: "3.11" + + - name: Check formatting + run: cargo fmt --check + + - name: Lint with clippy (workspace) + env: + PYO3_PYTHON: ${{ steps.setup-python.outputs.python-path }} + run: cargo clippy --workspace --all-targets -- -D warnings + + # The video-encoder tests early-return when ffmpeg is absent, so without + # this the encode/preflight path (and the ffmpeg-4.4.2 `-vsync` vs + # `-fps_mode` compatibility the daemon defends against) is never exercised + # in CI. ubuntu-22.04 ships ffmpeg 4.4.x, matching the target host. + - name: Install ffmpeg + run: | + sudo apt-get update + sudo apt-get install -y ffmpeg + + - name: Test (workspace) + env: + PYO3_PYTHON: ${{ steps.setup-python.outputs.python-path }} + run: cargo test --workspace --verbose + + - name: Build release (workspace) + env: + PYO3_PYTHON: ${{ steps.setup-python.outputs.python-path }} + run: cargo build --release --workspace --verbose + + - name: Build documentation + env: + RUSTDOCFLAGS: -D warnings + PYO3_PYTHON: ${{ steps.setup-python.outputs.python-path }} + run: cargo doc --no-deps --document-private-items diff --git a/.gitignore b/.gitignore index b4443a765..69f03c827 100644 --- a/.gitignore +++ b/.gitignore @@ -121,3 +121,10 @@ examples/logs/ examples/test_streaming/ .data_daemon_test_state/ + +# Bundled rust artefacts built into the package tree by +# `rust/scripts/build_wheel_artefacts.sh`. Both are per-machine and per-build, +# so we never commit them; the wheel-build CI job recreates them on every run. +# See docs/rust_data_daemon_development.md#packaging-the-wheel. +neuracore/data_daemon/bin/ +neuracore/data_daemon/_native_producer*.so diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 53d0f2962..22fa5a2b9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -69,3 +69,24 @@ repos: - .git/COMMIT_EDITMSG stages: [commit-msg] always_run: true + + # Rust formatting and linting for the data-daemon rewrite crate. + # Uses the system cargo toolchain (rustup) so the rustfmt / clippy version + # matches the developer's local install and the version CI uses. + - repo: local + hooks: + - id: cargo-fmt + name: cargo fmt + description: Format Rust sources in the data-daemon crate. + entry: cargo fmt --manifest-path rust/data_daemon/Cargo.toml -- + language: system + files: ^rust/data_daemon/.*\.rs$ + pass_filenames: false + + - id: cargo-clippy + name: cargo clippy + description: Lint the data-daemon crate with clippy (warnings denied). + entry: cargo clippy --manifest-path rust/data_daemon/Cargo.toml --all-targets -- -D warnings + language: system + files: ^rust/data_daemon/.*\.(rs|toml|lock)$ + pass_filenames: false diff --git a/README.md b/README.md index b764fc07d..7c6807ad0 100644 --- a/README.md +++ b/README.md @@ -152,6 +152,7 @@ predictions = policy.predict(timeout=5) - [Environment Variables](./docs/environment_variable.md) - [Contribution Guide](./docs/contribution_guide.md) - [Data Daemon](./docs/data_daemon.md) +- [Rust Data Daemon β€” Developer Guide](./docs/rust_data_daemon_development.md) β€” building the [rust/](./rust/) workspace that ships inside the wheel as the data-daemon binary + `neuracore.data_daemon._native_producer` cdylib. # πŸ’¬ Community diff --git a/cSpell.json b/cSpell.json index cfd3492c3..d6be1195c 100644 --- a/cSpell.json +++ b/cSpell.json @@ -18,6 +18,5 @@ "language": "en,en-GB", "dictionaries": [ "neuracore-dictionary" - ], - "words": [] + ] } \ No newline at end of file diff --git a/docs/contribution_guide.md b/docs/contribution_guide.md index 5591f256b..78d70817d 100644 --- a/docs/contribution_guide.md +++ b/docs/contribution_guide.md @@ -323,6 +323,32 @@ If you encounter issues with your algorithm: - When uploading as a ZIP, make sure your module imports are correctly structured +## Development environment + +### Python + +```bash +git clone https://github.com/neuracoreai/neuracore +cd neuracore +pip install -e .[dev,ml] +pre-commit install +``` + +### Rust toolchain (data daemon) + +The data daemon is being rewritten in Rust under [rust/](../rust/). The pre-commit configuration runs `cargo fmt` and `cargo clippy` against the `data_daemon` crate, and CI ([.github/workflows/rust-data-daemon.yaml](../.github/workflows/rust-data-daemon.yaml)) builds and tests it on every PR that touches `rust/data_daemon/**`. + +The hooks invoke `cargo` from your `PATH` (`language: system`), so you need a working Rust toolchain locally to commit changes that touch the crate. Install one via [rustup](https://rustup.rs/): + +```bash +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh +rustup component add rustfmt clippy +``` + +If you do not touch any file under `rust/data_daemon/`, the cargo hooks are skipped and rustup is not required. + +For the full developer workflow on the Rust crates β€” workspace layout, build/test/lint commands, running the daemon locally, the PyO3 producer cdylib, and SQLite state inspection β€” see [rust_data_daemon_development.md](rust_data_daemon_development.md). + ## Release Process ### Branch Strategy diff --git a/docs/data_daemon.md b/docs/data_daemon.md index e37e57c46..f5599fdfe 100644 --- a/docs/data_daemon.md +++ b/docs/data_daemon.md @@ -15,7 +15,7 @@ Profiles are optional. If you do not use a named profile, the daemon uses the de - How to run the daemon (CLI or from a script) - How profiles work (optional) and where they are stored - The configuration fields you can set -- Environment variables that control DB path, recordings root, and upload concurrency +- Environment variables that control DB path, recordings root, and other runtime settings - The order of precedence (defaults, profile, environment variables, CLI) - What happens to old daemon databases at startup (automatic schema migration) - A full CLI reference for the commands currently in use @@ -32,13 +32,21 @@ It does not explain internal implementation details. pip install -e . ``` -Optional, but recommended for video performance: +Recommended for video recording, and **required** when running the Rust daemon +(`NCD_RUST_DAEMON=1`): ```bash sudo apt-get update && sudo apt-get install -y ffmpeg ``` -The data daemon prefers the `ffmpeg` CLI encoder for recording. If the binary is not installed or encoder init fails, it automatically falls back to PyAV. +Both daemons encode video with the `ffmpeg` CLI, but they differ when `ffmpeg` +is missing or fails to initialise: +- The **Rust daemon** shells out to `ffmpeg` and runs a preflight at startup; if + the binary is missing or the build is incompatible it fails fast with a clear + message rather than starting and dropping every video recording. Install + `ffmpeg` before launching. +- The **legacy Python daemon** prefers the `ffmpeg` CLI but automatically falls + back to PyAV when `ffmpeg` is unavailable or the encoder fails to initialise. ### 2) Launch the daemon @@ -115,13 +123,18 @@ When you run: neuracore data-daemon launch ``` -the CLI starts the daemon as a separate Python process by running: +the CLI launches the daemon as a separate background process. There are two +daemon implementations and the launcher picks one based on the `NCD_RUST_DAEMON` +flag (see [rust_data_daemon_development.md](rust_data_daemon_development.md)): -```text -python -m neuracore.data_daemon.runner_entry -``` +- **Rust daemon** β€” when `NCD_RUST_DAEMON` is truthy, + the launcher `exec`s the bundled native binary shipped in the wheel at + `neuracore/data_daemon/bin/data-daemon`. This is the implementation described + throughout this guide. +- **Legacy Python daemon (default)** β€” when `NCD_RUST_DAEMON` is unset or not + truthy, the launcher runs the Python implementation instead. -That daemon process: +Either daemon process: - boots the internal components it needs - starts its main loop - stays running until you stop it (or the machine shuts down) @@ -132,20 +145,16 @@ You may see simple messages when it stops: ### Startup and schema migration -On startup, the daemon initializes the SQLite store and ensures schema compatibility. - -If an older single-table schema is detected (legacy `traces.status` format), the daemon -automatically migrates data to the current schema: +On startup the daemon opens its SQLite store (WAL mode) and applies any pending +schema migrations before serving requests. -- `traces` rows are transformed into lifecycle fields: - - `write_status` - - `registration_status` - - `upload_status` -- `recordings` rows are generated per unique `recording_id` -- Existing trace metadata/bytes/error fields are preserved -- Migration runs before normal startup reconciliation - -Migration runs once per DB file. After a successful migration, startup continues normally. +The Rust daemon's schema is defined by the SQL migrations under +[rust/data_daemon/migrations/](../rust/data_daemon/migrations/) and applied +automatically with `sqlx::migrate!`. A fresh database is created and migrated on +first launch; an existing one has only the not-yet-applied migrations run. There +is no legacy single-table conversion β€” the migrations are the single source of +truth for the schema. To inspect the live database see +[rust_data_daemon_development.md#sqlite-state-inspection](rust_data_daemon_development.md#sqlite-state-inspection). --- @@ -290,11 +299,6 @@ export NEURACORE_DAEMON_DB_PATH=/workspaces/neuracore/data_daemon_state.db export NEURACORE_DAEMON_RECORDINGS_ROOT=/workspaces/neuracore/recordings ``` -Recommended upload concurrency: -- Most machines: `5-10` -- Start at `5`, increase only if CPU/network/disk are stable -- Very high values can increase retries, memory pressure, and shutdown latency - --- ## CLI reference @@ -366,9 +370,11 @@ Notes: ### `neuracore data-daemon launch` ```bash -neuracore data-daemon launch [--profile ] [--background] +neuracore data-daemon launch [--profile ] [--background] [--debug] ``` +`--debug` raises the log level (equivalent to setting `NDD_DEBUG=1`). + Examples: ```bash @@ -395,6 +401,18 @@ neuracore data-daemon status neuracore data-daemon stop ``` +### `neuracore data-daemon reset` + +Stops the daemon (if running) and removes **all** of its local state: the +recordings tree, the SQLite database, the PID file, and the shared-memory +artefacts. This is destructive and cannot be undone β€” use it to return a wedged +host to a clean slate. + +```bash +neuracore data-daemon reset # prompts for confirmation +neuracore data-daemon reset --yes # skip the prompt (for scripts) +``` + --- ## Offline Recordings @@ -463,17 +481,23 @@ neuracore data-daemon launch ### Which video encoder backend is being used -The recording encoder selects backend at runtime: -- Uses `ffmpeg` CLI when `ffmpeg` is available on `PATH` -- Falls back to PyAV when `ffmpeg` is unavailable or fails to initialize +Both daemons encode video with `ffmpeg`, but they handle a missing or broken +`ffmpeg` differently: +- **Rust daemon** (`NCD_RUST_DAEMON=1`) β€” verifies `ffmpeg` at startup. If the + binary is missing from `PATH`, or the local build cannot run the encode the + daemon needs, the preflight fails and the daemon refuses to start (rather than + starting and silently dropping every video recording). +- **Legacy Python daemon** (default) β€” uses the `ffmpeg` CLI when it is on + `PATH` and falls back to PyAV when `ffmpeg` is unavailable or fails to + initialise. -Quick check: +Confirm `ffmpeg` is installed and runnable: ```bash ffmpeg -version ``` -If this command succeeds, the daemon will use the FFmpeg backend for new recordings. +If that command fails, install `ffmpeg` (see [Quick start](#1-install-from-repo-root)) for the Rust daemon, or rely on the PyAV fallback under the Python daemon. ### Migration issues on startup diff --git a/docs/presentations/rust_daemon_intro.html b/docs/presentations/rust_daemon_intro.html new file mode 100644 index 000000000..23721a6bd --- /dev/null +++ b/docs/presentations/rust_daemon_intro.html @@ -0,0 +1,708 @@ + + + + + + Intro to the Rust Data Daemon + + + + + + + + + +
Neuracore Β· Data Daemon
+
+
+ + +
+ Engineering Β· Internal +

Intro to the Rust Data Daemon

+

The successor to our Python recording daemon:
why we're building it, how it works, and how we roll it out.

+

Press S for speaker notes Β· β†’ to advance Β· ESC for overview

+ +
+ + +
+ Roadmap +

What we'll cover

+
+

1 Β· Background

Why a rewrite makes sense now.

+

2 Β· Why Rust

What the POC told us, and why not Go / Zig / C.

+

3 Β· What's changed

The overall architecture, plus the data hot path.

+

4 Β· Rollout

From flag-gated opt-in to default to Python removal.

+

5 Β· Q&A

We'll close with time for your questions.

+
+

Headline: mostly the same system, with the bottlenecks rebuilt.

+ +
+ + +
+ Part 1 +

Background

+

Why does a rewrite make sense?

+ +
+ + +
+

The Python daemon has had a storied development

+
    +
  • It grew organically as the product did: multiple transports, process models, and recording strategies layered over time.
  • +
  • It works, and it taught us what the problem actually is.
  • +
  • But a lot of it is now legacy bloat: state and machinery we kept for reasons that no longer apply.
  • +
+
+

βœ“ Keep the learnings

Recording lifecycle, the API/CLI contract, cloud upload semantics, hard-won edge cases.

+

βœ— Leave behind the bloat

Redundant buffering & double-handling, scattered producer state, fighting the interpreter.

+
+

A rewrite lets us carry the knowledge forward without carrying the cruft.

+ +
+ + +
+ Part 2 +

Why Rust?

+

If the daemon is mostly I/O bound… why a systems language at all?

+ +
+ + +
+

We're reversing an earlier call

+
    +
  • The POC showed no substantial throughput difference between Python and Rust. The daemon is I/O bound: network and disk, not CPU.
  • +
  • So we built the daemon out in Python, for simplicity. A reasonable call at the time.
  • +
  • In practice it took more complexity to reach similar performance, and it still ends up slightly slower and more resource hungry.
  • +
+
+

This rewrite reverses that decision.

+

Same lessons, rebuilt in Rust. As a bonus we gain consistency: no interpreter and no GC pauses mean steadier tail latency, which is exactly where recording integrity lives or dies.

+
+ +
+ + +
+

Why not another language?

+

A balance of memory safety, maturity, and complexity.

+
+
Go + Go: great ergonomics, but it's garbage-collected. The same tail-latency risk we're trying to escape.
+
Zig + Zig: promising and fast, but too immature for a production data path today.
+
C + C: no GC, but no memory-safety net. Unacceptable risk for long-running data integrity.
+
C++ + C++: capable, but high complexity and footgun surface for the team to maintain.
+
+
+ Rs + Rust: memory-safe without a GC, mature tooling and ecosystem, and the compiler keeps a concurrent data pipeline honest. The combination is the win. +
+ +
+ + +
+ Part 3 +

What's changed

+

Spoiler: less than you'd think.

+ +
+ + +
+
+

The overall architecture Β· three Rust crates + the existing Python

+
+ +
+ Python process Β· unchanged API + CLI +
+
neuracore SDKlog_joints Β· log_frame Β· start/stop Β· same public API
+
crate β‘  data_daemon_producerPyO3 .so linked in-process Β· batch Β· spool Β· publish
+
+
+ + + + +
+ crate β‘’ data_daemon Β· the daemon binary +
+
IPC listenerdrain shared mem
+
Dispatcherwindows Β· integer recording id
+
Trace actorsone per stream
+
+
+
Encodingchunked video Β· metadata
+
State storesingle DB writer
+
Cloudregister Β· resumable upload
+
+
+ +
+ πŸ’Ύ local disk spool + SQLite + 🎞 ffmpeg encode + ☁️ Cloud / GCS upload +
+
+

It's mostly the same: the old daemon is unchanged in most places; in a few spots it now calls into the Rust daemon. To the end user the CLI and API are identical.

+ +
+ + +
+

crate β‘‘ Β· data_daemon_shared

+

The whole contract is one enum

+
// data_daemon_shared/src/lib.rs: linked by BOTH the producer and the daemon
+pub enum Envelope {
+    // lifecycle: fire-and-forget, tagged only by its source
+    StartRecording  { robot_id: String, robot_instance: i64, started_at_ns: i64, /* … */ },
+    StopRecording   { robot_id: String, robot_instance: i64, stopped_at_ns: i64 },
+    CancelRecording { robot_id: String, robot_instance: i64 },
+
+    // data: opaque payload, no trace identity on the wire
+    Data            { data_type: String, timestamp_ns: i64, payload: Vec<u8>, /* … */ },
+    BatchedData     { items: Vec<BatchedDataItem>, timestamp_ns: i64, /* … */ }, // N joints β†’ 1 msg
+    VideoChunkReady { byte_count: u64, frame_count: u32, /* … */ },              // pixels are on disk
+}
+
+pub fn encode(&self) -> Result<Vec<u8>, _> { postcard::to_allocvec(self) }
+pub fn decode(b: &[u8]) -> Result<Self, _> { postcard::from_bytes(b) }
+

No recording_id or trace_id on the wire, only the source: the daemon owns recording identity. Encoded with postcard, so f64 values round-trip bit-exact.

+ +
+ +
+ + +
+
+

The hot path Β· logged β†’ spooled β†’ encoded β†’ uploaded

+
+
+
RGB
+
+ log_frame()β†’ + batchβ†’ + spool pixels β†’ .nut chunk πŸ’Ύ + β‹―"chunk ready" Β· IPC +
+
+
+
Joints
+
+ log_joints()→ + batch N joints→ + one BatchedData envelope · IPC +
+
+ +
iceoryx2 shared memory Β· only lightweight envelopes cross the boundary
+ +
+
Daemon
+
+ IPC listenerβ†’ + dispatcher (window Β· int id)β†’ + trace actorβ†’ + 🎞 chunked encode (lossy+lossless)β†’ + concatβ†’ + single DB writerβ†’ + register + upload ☁️ +
+
+
+ +

What actually changed

+
    +
  • Spooling is more efficient: no double-handling, so pixels hit the disk once and stay there.
  • +
  • The producer is nearly stateless: just light batching before it sends.
  • +
  • Recordings are identified by an integer locally; the cloud id is resolved separately.
  • +
  • Encoding is chunked: it balances CPU against disk, then concatenates into the full video.
  • +
  • A single database writer: no write contention.
  • +
+ +
+ + +
+

crate β‘  Β· data_daemon_producer

+

Stateless producer: N joints β†’ one message

+
// data_daemon_producer/src/lib.rs: runs INSIDE the Python process
+#[pyfunction]
+fn log_joints(py, robot_id, robot_instance, data_type,
+              items: Vec<(String, f64)>, timestamp_ns, timestamp_s) -> PyResult<()> {
+    py.allow_threads(|| {
+        let mut batch_items = Vec::with_capacity(items.len());
+        for (name, value) in items {
+            let payload = serde_json::to_vec(&ScalarFrameEntry { timestamp, value })?;
+            batch_items.push(BatchedDataItem { data_type, sensor_name: Some(name), payload });
+        }
+        // one envelope for every joint captured at this instant
+        publish(&Envelope::BatchedData {
+            robot_id, robot_instance,
+            publish_timestamp_ns: now_ns(), timestamp_ns, timestamp_s,
+            items: batch_items,
+        })
+    })
+}
+

No per-stream state to manage: pack the batch and publish. One IPC message carries every joint for the timestep.

+ +
+ + +
+

crate β‘  Β· data_daemon_producer

+

Pixels go to disk, not over IPC

+
// data_daemon_producer/src/lib.rs: log_frame()
+// SAFETY: hold the GIL and pass a zero-copy view from numpy into the NUT
+// writer; releasing it would force a multi-MiB memcpy per frame.
+let payload_slice: &[u8] =
+    unsafe { std::slice::from_raw_parts(payload.buf_ptr() as *const u8, actual_bytes) };
+
+// …append straight to the (source, sensor) on-disk NUT chunk.
+// Pixels NEVER touch the IPC bus. When the chunk fills, announce only metadata:
+publish(&Envelope::VideoChunkReady {
+    byte_count, frame_count, frame_timestamps_ns, /* … */
+})?;
+

No double-handling: the bulk data is written once. Only the chunk's metadata crosses the bus.

+ +
+ + +
+

crate β‘’ Β· data_daemon Β· dispatcher

+

The daemon mints the local integer id

+
// pipeline/dispatcher.rs: StartRecording handler (one tokio task owns this)
+let recording_index = match self.store.create_recording(new).await {
+    Ok(row)    => row.recording_index,     // ← local integer id, assigned here
+    Err(error) => { tracing::warn!(%error, "failed to create recording row"); return; }
+};
+
+// open the source's active window; data is bucketed by capture time
+entry.live = Some(ActiveWindow {
+    recording_index,
+    started_at_ns,
+    stopped_at_ns: None,        // set on StopRecording
+    traces: HashMap::new(),
+});
+

Recordings are identified by a local integer; the dispatcher buckets each datum into the matching window. The cloud id is resolved separately.

+ +
+ + +
+

crate β‘’ Β· data_daemon Β· encoding

+

Chunked encode, then stream-copy concat

+
// encoding/video_encoder.rs: encode_chunk() turns one NUT chunk into two mp4s
+Command::new(ffmpeg)
+    .args(["-i", raw_nut, "-map", "0:v"])
+    .args(["-c:v", "libx264", "-pix_fmt", "yuv420p",     "-preset", "ultrafast", "-qp", "23"])
+    .arg(lossy_out)        // chunk_NNNN_lossy.mp4
+    .args(["-map", "0:v"])
+    .args(["-c:v", "libx264", "-pix_fmt", "yuv444p10le", "-preset", "ultrafast", "-qp", "0"])
+    .arg(lossless_out);    // chunk_NNNN_lossless.mp4  (mathematically lossless)
+
+// on EndTrace: concat_segments() stream-copies the chunks into the final
+// lossy.mp4 / lossless.mp4, with no second decode/encode pass.
+

Transcode each chunk as it lands (balancing CPU against disk); stitch the segments at the end with a cheap stream-copy, no re-encode.

+ +
+ + +
+

crate β‘’ Β· data_daemon Β· state

+

One writer task: batched and coalesced

+
// state/trace_writer.rs: ONE tokio task owns every DB write
+pub fn spawn(store: Arc<SqliteStateStore>) -> (TraceWriteHandle, TraceWriter) {
+    let (tx, rx) = mpsc::unbounded_channel();
+    let join = tokio::spawn(run(store, rx));     // ← the one and only writer
+    (TraceWriteHandle { tx: tx.clone() }, TraceWriter { tx, join })
+}
+
+async fn run(store: Arc<SqliteStateStore>, mut rx: Receiver<Message>) {
+    let mut pending: HashMap<String, CoalescedTraceWrite> = HashMap::new();
+    loop {
+        tokio::select! {
+            Some(Message::Write(w)) = rx.recv() => merge(&mut pending, w),       // last-writer-wins
+            _ = ticker.tick()                   => flush(&store, &mut pending).await, // 1 batched txn
+        }
+    }
+}
+

Every actor sends writes to one task: no write contention, and per-frame updates coalesce into batched transactions.

+ +
+ +
+ + +
+ Part 4 +

The rollout plan

+

Opt-in β†’ bundled β†’ default β†’ Python removed.

+ +
+ + +
+

A staged, reversible path

+
    +
  • + Now: introduce, behind a flag + Initial PRs land the Rust daemon. Built & enabled explicitly via NCD_RUST_DAEMON. +
  • +
  • + Bundle in Linux wheels + Move to pyproject.toml and ship the daemon in our Linux wheels with maturin. +
  • +
  • + Default on Linux (opt-out) + Rust daemon becomes the default for Linux users; Python is still there to fall back to. +
  • +
  • + macOS & Windows support + Extend the daemon beyond Linux. +
  • +
  • + Opt-out on all platforms + Rust becomes the default everywhere. +
  • +
  • + Remove the Python daemon + Once we're confident across platforms, delete the legacy code. +
  • +
+ +
+ + +
+ Step 0 Β· today +

Where we are right now

+
+

Flag-gated

The daemon must be built and enabled with NCD_RUST_DAEMON before it does anything.

+

Build tooling

You'll need cargo and clang to build the daemon locally.

+

Linux only, not in wheels

Binaries aren't built into our wheels yet, but you can start trying it out today.

+

Early signal

We had initial success and good feedback using it at ICRA.

+
+

Low risk to try: it does nothing unless you opt in, and the Python daemon is untouched.

+ +
+ + +
+

Thanks for listening

+

Any questions?

+ +
+ +
+
+ + + + + + + diff --git a/docs/rust_data_daemon_development.md b/docs/rust_data_daemon_development.md new file mode 100644 index 000000000..d4d792a61 --- /dev/null +++ b/docs/rust_data_daemon_development.md @@ -0,0 +1,319 @@ +# Rust Data Daemon β€” Developer Guide + +This guide is for developers working on the Rust data daemon under [rust/](../rust/). For the end-user CLI (profiles, launch, stop, troubleshooting) see [data_daemon.md](data_daemon.md). + +--- + +## Workspace layout + +The [rust/](../rust/) directory is a Cargo workspace with three members declared in [rust/Cargo.toml](../rust/Cargo.toml): + +| Crate | Path | What it is | +|---|---|---| +| `data-daemon` | [rust/data_daemon/](../rust/data_daemon/) | The daemon binary β€” CLI, lifecycle, SQLite state, IPC listener, per-trace pipeline, encoding. | +| `data_daemon_shared` | [rust/data_daemon_shared/](../rust/data_daemon_shared/) | Shared library β€” IPC envelope types and service-name constants, plus the daemon configuration model and filesystem-path resolution the two processes must compute identically. Linked by both the daemon and the producer crate. | +| `data_daemon_producer` | [rust/data_daemon_producer/](../rust/data_daemon_producer/) | PyO3 `cdylib` β€” producer-side IPC client exposed to Python as `neuracore.data_daemon._native_producer`. | + +Shared workspace dependencies and the Rust edition (`2021`) are pinned in [rust/Cargo.toml](../rust/Cargo.toml); individual crates inherit them via `.workspace = true`. + +--- + +## Architecture + +The producer is a *thin shipper*: it publishes source/sensor/timestamp-tagged +data and fire-and-forget lifecycle events, and the daemon owns all recording +identity and routing. Pixel data never travels the IPC bus β€” the producer spools +NUT chunks to disk and only announces them. + +```mermaid +flowchart LR + subgraph SDK["Python SDK"] + LOG["log_joints / log_json / log_frame
start / stop / cancel_recording"] + end + subgraph PROD["data_daemon_producer (PyO3 cdylib)"] + LOG -->|GIL released| PUBT["publisher thread"] + LOG -->|RGB frames| WRT["writer thread β†’ NUT spool on disk"] + end + PUBT -->|postcard envelopes| BUS[("iceoryx2 commands")] + WRT -. VideoChunkReady .-> PUBT + BUS --> LIS["ipc::listener (adaptive poll)"] + LIS --> DISP["dispatcher
single task Β· holdback Β· publish-clock routing"] + DISP --> ACT["per-trace actors"] + ACT -->|fire-and-forget| TW["trace_writer
batched DB write-behind"] + ACT -->|fire-and-forget| JW["json_writer (IO thread)"] + ACT -->|spawn_blocking| FF["ffmpeg chunk encode"] + TW --> DB[("SQLite WAL")] + LIS -->|recording-id queries| DB + subgraph CLOUD["cloud coordinators"] + REG["registration"] --> UP["uploader"] --> STA["status"] + PR["progress"] + NOT["start / stop / cancel notifiers"] + RP["reaper"] + end + DB --> CLOUD + UP -->|PUT chunks| GCS[("GCS / backend")] +``` + +A recording's lifecycle β€” the daemon assigns the local `recording_index` +immediately and the cloud `recording_id` is backfilled asynchronously: + +```mermaid +sequenceDiagram + participant SDK + participant Producer + participant Dispatcher + participant Store as SQLite + participant StartN as start-notifier + participant Backend + + SDK->>Producer: start_recording(source) + Producer->>Dispatcher: StartRecording envelope + Dispatcher->>Store: insert recording (recording_index) + Dispatcher-->>StartN: RecordingStarted (event bus) + StartN->>Backend: POST /recording/start + Backend-->>StartN: cloud recording_id + StartN->>Store: persist recording_id + SDK->>Producer: log_* (data / frames) + Producer->>Dispatcher: Data / VideoChunkReady (routed by publish ts) + SDK->>Producer: stop_recording + Producer->>Dispatcher: StopRecording envelope + Dispatcher->>Store: mark stopped + Note over StartN,Backend: stop-notifier POSTs /recording/stop + Note over StartN,Backend: uploader PUTs traces, reaper reclaims once fully uploaded +``` + +--- + +## Prerequisites + +### Rust toolchain + +The pre-commit hooks and CI ([.github/workflows/rust-data-daemon.yaml](../.github/workflows/rust-data-daemon.yaml)) invoke `cargo` from your `PATH`. Install via [rustup](https://rustup.rs/): + +```bash +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh +rustup component add rustfmt clippy +``` + +CI uses `stable` (via `dtolnay/rust-toolchain@stable`), so any recent stable toolchain works locally. + +### System dependencies + +- **ffmpeg + ffprobe** β€” required by the video-encoder subprocess and the `encoding::video_encoder` / `encoding::nut_writer` test suites (tests that need ffmpeg self-skip if it's missing, but the daemon itself depends on it at runtime): + + ```bash + sudo apt-get update && sudo apt-get install -y ffmpeg + ``` + +- **maturin** (only when working on the `data_daemon_producer` PyO3 crate): + + ```bash + pip install maturin + ``` + +--- + +## Common commands + +Run all `cargo` commands from the workspace root [rust/](../rust/) unless noted otherwise. Targeting a specific crate uses `-p ` (the workspace member names from the table above). + +### Build + +```bash +# Whole workspace (debug) +cargo build --workspace + +# Release binary, daemon only +cargo build --release -p data-daemon + +# Producer cdylib only +cargo build -p data_daemon_producer +``` + +The release binary lands at [rust/target/release/data-daemon](../rust/target/release/data-daemon). + +### Test + +```bash +# Whole workspace +cargo test --workspace + +# A specific crate +cargo test -p data-daemon +cargo test -p data_daemon_shared + +# A specific module or test name (partial match) +cargo test -p data-daemon pipeline::dispatcher +cargo test -p data-daemon encoding::metadata::fixture_matches_python_video_trace_output +``` + +Tests that shell out to `ffmpeg` / `ffprobe` self-skip on hosts without those binaries β€” install them (see above) to exercise the full encoding suite. + +### Format and lint + +These are the gates the pre-commit hooks and CI enforce; run them before pushing. + +```bash +cargo fmt --check # passive β€” fails if anything is unformatted +cargo fmt # apply formatting +cargo clippy --all-targets -- -D warnings # workspace-wide, warnings denied +``` + +The pre-commit hooks in [.pre-commit-config.yaml](../.pre-commit-config.yaml) run `cargo fmt` and `cargo clippy` against [rust/data_daemon/](../rust/data_daemon/) only β€” `language: system`, so they need the local toolchain. If you don't touch any file under `rust/data_daemon/`, the cargo hooks are skipped. + +### Documentation + +```bash +RUSTDOCFLAGS="-D warnings" cargo doc --no-deps --document-private-items +``` + +The `-D warnings` flag matches CI; broken intra-doc links and missing items fail the build. Generated HTML lands at [rust/target/doc/](../rust/target/doc/). + +--- + +## Running the daemon locally + +Once built, run the CLI directly through cargo. The command tree mirrors the user-facing CLI documented in [data_daemon.md#cli-reference](data_daemon.md#cli-reference): + +```bash +cargo run -p data-daemon -- profile list +cargo run -p data-daemon -- profile create dev +cargo run -p data-daemon -- launch --profile dev +cargo run -p data-daemon -- status +cargo run -p data-daemon -- stop +``` + +### Pointing the daemon at scratch paths + +To avoid polluting your real `~/.neuracore`, override the runtime paths (also documented in [data_daemon.md#runtime-path-environment-variables](data_daemon.md#runtime-path-environment-variables)): + +```bash +export NEURACORE_DAEMON_PID_PATH=/tmp/ndd-dev/daemon.pid +export NEURACORE_DAEMON_DB_PATH=/tmp/ndd-dev/state.db +export NEURACORE_DAEMON_RECORDINGS_ROOT=/tmp/ndd-dev/recordings +cargo run -p data-daemon -- launch +``` + +### Foreground vs background + +- **Foreground** (default): logs stream to stderr, Ctrl-C triggers graceful shutdown. Use this for almost everything during development. +- **Background** (`launch --background`): double-forks via [lifecycle::daemonize](../rust/data_daemon/src/lifecycle/daemonize.rs); logs go to a `daemon.log` sibling of the SQLite DB. Use this when you specifically need to test the daemonized path or PID-file handling. + +### Debug logging + +The daemon defaults to the `warn` tracing level. `--debug` (or `NDD_DEBUG=1`) bumps it to `debug`. `RUST_LOG` overrides both β€” for example: + +```bash +RUST_LOG=data_daemon=trace,iceoryx2=warn cargo run -p data-daemon -- launch +``` + +--- + +## Working on the PyO3 producer + +The `data_daemon_producer` crate compiles to a `cdylib` that Python imports as `neuracore.data_daemon._native_producer`. During development, use `maturin develop` from the producer crate directory to build and install it into your active virtualenv in one step: + +```bash +cd rust/data_daemon_producer +maturin develop +python -c "import neuracore.data_daemon._native_producer as p; print(p)" +``` + +To route the Python SDK through the native producer instead of the legacy zmq one, set the rollout flag: + +```bash +export NCD_RUST_DAEMON=1 +python your_script.py +``` + +Selection logic lives in [neuracore/data_daemon/rust_selection.py](../neuracore/data_daemon/rust_selection.py); both the daemon binary handoff and the SDK's `DataStream` construction read it. A small shim bridges the native producer to the Python `ProducerChannel` contract. + +--- + +## Packaging the wheel + +The Python wheel ships two Rust artefacts inside the `neuracore.data_daemon` package: + +| Artefact | Wheel location | Source crate | Imported / executed as | +|---|---|---|---| +| Daemon binary | `neuracore/data_daemon/bin/data-daemon` | `data-daemon` (bin) | Re-exec'd by [neuracore/data_daemon/__main__.py](../neuracore/data_daemon/__main__.py) when `NCD_RUST_DAEMON` is truthy | +| Producer cdylib | `neuracore/data_daemon/_native_producer*.so` | `data_daemon_producer` (cdylib) | `import neuracore.data_daemon._native_producer` from the SDK producer shim | + +Both paths are inside the Python package tree, so vanilla setuptools `package_data` is enough to package them once they're built β€” there is no `pyproject.toml`/maturin build-backend migration. The trade-off is that each wheel build runs cargo twice (once per crate) before `python -m build` packages the result. + +### One-shot local build + +Use the helper script to compile both crates in release mode and copy the artefacts into the package tree at the locations the runtime expects: + +```bash +./rust/scripts/build_wheel_artefacts.sh +``` + +What it does: + +1. `cargo build --release -p data-daemon` and copies the binary to [neuracore/data_daemon/bin/data-daemon](../neuracore/data_daemon/bin/data-daemon). +2. `cargo build --release -p data_daemon_producer` and copies the cdylib to [neuracore/data_daemon/_native_producer.so](../neuracore/data_daemon/_native_producer.so) (renames `libdata_daemon_producer.so` β†’ `_native_producer.so` so PyO3's `PyInit__native_producer` is discoverable). + +Both targets are gitignored (`neuracore/data_daemon/bin/` and `*.so`); the script is idempotent so re-running it after a `cargo` edit refreshes the in-tree copies. `pip install -e .` after the script picks the new artefacts up automatically via `package_data`. + +For day-to-day iteration on the producer crate only, prefer `maturin develop` from [rust/data_daemon_producer/](../rust/data_daemon_producer/) β€” it skips the binary build, only refreshes the cdylib, and is faster. + +### Building a wheel + +```bash +./rust/scripts/build_wheel_artefacts.sh +python -m build --wheel +``` + +The wheel is platform-tagged (Linux x86_64 today) because [setup.py](../setup.py) sets `Distribution.has_ext_modules` so setuptools tags the wheel for the host platform β€” without that hook setuptools would tag it `py3-none-any` and pip would happily install a Linux .so onto macOS. `package_data` ships both artefacts; `MANIFEST.in` ships the script and the `rust/` sources for the sdist. + +### CI + +The wheel job runs in [.github/workflows/build-wheels.yaml](../.github/workflows/build-wheels.yaml): + +1. Installs the Rust toolchain + ffmpeg (for unit tests). +2. Runs the helper script above. +3. Runs `python -m build --wheel` to produce the wheel. +4. Uploads the wheel as an artefact for the release job to consume. + +The matrix is Linux x86_64 only for v1; aarch64 ships when there's demand (the script is platform-agnostic β€” only the cross-compilation toolchain would need to grow). Each wheel is one Python version Γ— one platform, matching the cdylib's ABI. + +### Release path + +The [release workflow](../.github/workflows/release.yaml) wires the wheel job into its publish step: it depends on `build-wheels.yaml`, downloads the matrix of wheels, and `twine upload`s them alongside the sdist. The sdist remains useful as a portable fallback (users build the Rust artefacts themselves at install time) but is not the recommended install path β€” the bundled-binary wheel is. + +--- + +## SQLite state inspection + +The daemon stores its state at `NEURACORE_DAEMON_DB_PATH` (default `~/.neuracore/data_daemon/state.db`), opened in WAL mode. Migrations live in [rust/data_daemon/migrations/](../rust/data_daemon/migrations/) and run automatically on startup via `sqlx::migrate!`. To poke at the live DB: + +```bash +sqlite3 "$NEURACORE_DAEMON_DB_PATH" ".tables" +sqlite3 "$NEURACORE_DAEMON_DB_PATH" "SELECT trace_id, write_status, registration_status, upload_status FROM traces;" +``` + +The schema is defined by the `sqlx` migrations under [rust/data_daemon/migrations/](../rust/data_daemon/migrations/). + +--- + +## Before committing + +The pre-commit hooks cover formatting and lint, but CI also runs the test suite and builds the release binary and the docs. To catch failures locally before pushing: + +```bash +cargo fmt --check +cargo clippy --all-targets -- -D warnings +cargo test --workspace +cargo build --release -p data-daemon +RUSTDOCFLAGS="-D warnings" cargo doc --no-deps --document-private-items +``` + +Run `pre-commit run --all-files` from the repo root to exercise the full hook chain (including the Python checks). + +--- + +## Further reading + +- [data_daemon.md](data_daemon.md) β€” end-user CLI, profiles, environment variables, troubleshooting. +- [contribution_guide.md](contribution_guide.md) β€” repo-wide contribution flow, release process, PR conventions. diff --git a/neuracore-dictionary.txt b/neuracore-dictionary.txt index 8f1d2932b..9178b380d 100644 --- a/neuracore-dictionary.txt +++ b/neuracore-dictionary.txt @@ -1,3 +1,5 @@ +absolutises +absolutising acked acks adarms @@ -9,10 +11,13 @@ agilex aiolimiter aiortc allclose +allocvec altclip asarray ascontiguousarray assimp +asyncio +atfork attns autocast Autoencoders @@ -29,6 +34,7 @@ bigendian bigym blit blowaway +bodyless Brawner broadcastable buildtool @@ -36,16 +42,21 @@ byteswap calcsize caplog capsys +castagnoli castshadow categoryarray categoryorder cdef +cdylib +chdir checkpointed checkpointing childbody childinertia chonk +chrono CLIPMLP +clippy CLOEXEC closedness cmap @@ -58,9 +69,13 @@ colab Colab colcon Colormaps +colorspace colwise conaffinity +concatenator +concats condim +condvar connectionstatechange conq Conq @@ -72,6 +87,9 @@ ctrllimited ctrlrange cuda cudnn +daemonising +daemonization +dashmap dataconfig dataformats dataid @@ -79,15 +97,27 @@ ddim DDIM ddpm DDPM +debouncer delenv +demux +demuxer +demuxers +demuxes denoise denoised denoising +Deque +descheduled +deserialise +devnull diaginertia dinov dinov2 +distclass distilbert docstrings +doctests +DONTNEED dtype EADDRINUSE eigenpy @@ -95,8 +125,15 @@ elementwise elems embs Emika +ENOENT +EPERM +EPIPE erfinv +errno +ESRCH +EWOULDBLOCK excinfo +execv expanduser extr extractall @@ -105,18 +142,27 @@ Extrinsics faceadr facecolor facenum +fadvise +falsey +fcntl feedforward +fflags +ffprobe figsize +filtergraph finetune finetuning finfo fixturenames fogend fogstart +footgun forcelimited forcerange +fourcc fovy FRACT +framecode FRANKA freqs frictionloss @@ -129,12 +175,14 @@ ftruncate gdown geglu gelu +genpts geomadr geomid geomnum getbuffer getpgid getpid +gettid ghjdidnia gptj GPTJ @@ -145,6 +193,7 @@ hparams hstack huggingface hyperparameters +iceoryx iiwa imageio imagenet @@ -170,17 +219,22 @@ jobname jointbody jsons jtps +keepalive keepdim kernelspec keypoints killpg KINOVA kwargs +lavfi layernorm lecun lerobot LEROBOT levelname +libavformat +libc +libcs libgl libglew libglib @@ -191,6 +245,7 @@ listconfig llava Llava logdir +loglevel loglik logsigmoid logvar @@ -220,6 +275,7 @@ multihead Multihead multinode multirun +muxer nans nbconvert nbformat @@ -237,6 +293,10 @@ newbyteorder nhead nheads njoints +nokey +Nonblock +noprint +nostdin nprocs numpy octomap @@ -244,6 +304,7 @@ offsamples oieb Oieb OIEB +oneshot openarm openarm_description opencv @@ -264,6 +325,7 @@ pbar pbtxt perceptrons pgoa +pidfile Pieb pinnochio PJRT @@ -278,11 +340,17 @@ preds preexec pretrained pretraining +prio proprio proprios PSNR +pthread pyav +pycache +pydantic +pyfunction pygments +pymodule pyquaternion pytest Qbcaa @@ -302,26 +370,40 @@ RDEM RDWR reannounce rels +renice +renicing +reqwest reraises restartability +resumably RETRYABLE rgba rgbd rgbs rlds +rlib Robotiq rosdep rotvec roundoff rowwise +rposition +rsplit rtype +rustls +rustup Safetensors schematypens SCTP +sdecode +sdist sdpa secho +sencode seqlen +serde sess +setpriority setsid setuptools shadowsize @@ -332,22 +414,32 @@ silu softmax solimp solref +splitn +sqlx squaredcos +startcode +startcodes staticmethod +statvfs subdirs synchronizable +syncpoint +syncpoints targetbody TDVB teleop temb tensorboard +testsrc tfds TFDS tfrecord +thiserror thres timestep timesteps tinyxml +tlsv tmpfs tobytes tolist @@ -357,22 +449,36 @@ torq tqdm traj triu +trunc tryfirst TTYNTK +typer UFACTORY +ultrawide unet Unet Unet's +unflushed +uninit +unistd UNITREE +unitreeh unitreeh1 +unniced unnormalization unpackb +unparseable +unregisterable upserts URDF urdfdom usefixtures +userspace utaustin +varint Vaswani +vdecode +vencode vertadr vertnum VIPERX @@ -380,10 +486,12 @@ viser vlln Vsqo vsync +waitpid wakelock WAKELOCK widowx worldbody +writeback WRONLY wxyz XARM diff --git a/rust/Cargo.lock b/rust/Cargo.lock new file mode 100644 index 000000000..94bea9cd9 --- /dev/null +++ b/rust/Cargo.lock @@ -0,0 +1,3797 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "assert-json-diff" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "atomic-polyfill" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4" +dependencies = [ + "critical-section", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + +[[package]] +name = "bindgen" +version = "0.72.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", +] + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +dependencies = [ + "serde_core", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "cc" +version = "1.2.62" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "num-traits", + "serde", + "windows-link", +] + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "cobs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" +dependencies = [ + "thiserror 2.0.18", +] + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "217698eaf96b4a3f0bc4f3662aaa55bdf913cd54d7204591faa790070c6d0853" + +[[package]] +name = "crc32c" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" +dependencies = [ + "rustc_version", +] + +[[package]] +name = "critical-section" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b" + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "data-daemon" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "base64", + "bytes", + "chrono", + "clap", + "crc32c", + "dashmap", + "data_daemon_shared", + "dirs", + "iceoryx2", + "libc", + "nix", + "reqwest", + "serde", + "serde_json", + "serde_yaml", + "sqlx", + "tempfile", + "thiserror 1.0.69", + "tokio", + "tracing", + "tracing-subscriber", + "url", + "uuid", + "wiremock", +] + +[[package]] +name = "data_daemon_producer" +version = "0.1.0" +dependencies = [ + "data_daemon_shared", + "iceoryx2", + "libc", + "pyo3", + "serde", + "serde_json", + "tempfile", + "thiserror 1.0.69", + "tracing", +] + +[[package]] +name = "data_daemon_shared" +version = "0.1.0" +dependencies = [ + "dirs", + "nix", + "postcard", + "serde", + "serde_json", + "serde_yaml", + "tempfile", + "thiserror 1.0.69", + "tracing", +] + +[[package]] +name = "deadpool" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0be2b1d1d6ec8d846f05e137292d0b89133caf95ef33695424c09568bdd39b1b" +dependencies = [ + "deadpool-runtime", + "lazy_static", + "num_cpus", + "tokio", +] + +[[package]] +name = "deadpool-runtime" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" + +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "const-oid", + "crypto-common", + "subtle", +] + +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.48.0", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +dependencies = [ + "serde", +] + +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + +[[package]] +name = "enum-iterator" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4549325971814bda7a44061bf3fe7e487d447cba01e4220a4b454d630d7a016" +dependencies = [ + "enum-iterator-derive", +] + +[[package]] +name = "enum-iterator-derive" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "685adfa4d6f3d765a26bc5dbc936577de9abf756c1feeb3089b01dd395034842" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "etcetera" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +dependencies = [ + "cfg-if", + "home", + "windows-sys 0.48.0", +] + +[[package]] +name = "event-listener" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "flume" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" +dependencies = [ + "futures-core", + "futures-sink", + "spin", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi 5.3.0", + "wasip2", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "h2" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "171fefbc92fe4a4de27e0698d6a5b392d6a0e333506bc49133760b3bcf948733" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hash32" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67" +dependencies = [ + "byteorder", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown 0.15.5", +] + +[[package]] +name = "heapless" +version = "0.7.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f" +dependencies = [ + "atomic-polyfill", + "hash32", + "rustc_version", + "serde", + "spin", + "stable_deref_trait", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "home" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" +dependencies = [ + "http", + "hyper", + "hyper-util", + "rustls", + "tokio", + "tokio-rustls", + "tower-service", + "webpki-roots 1.0.7", +] + +[[package]] +name = "hyper-util" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "iceoryx2" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13ef16a640893a859edaadf15983acaff5e2101971cb8871c7aa4c75ab082d6d" +dependencies = [ + "iceoryx2-bb-concurrency", + "iceoryx2-bb-container", + "iceoryx2-bb-derive-macros", + "iceoryx2-bb-elementary", + "iceoryx2-bb-elementary-traits", + "iceoryx2-bb-lock-free", + "iceoryx2-bb-memory", + "iceoryx2-bb-posix", + "iceoryx2-bb-system-types", + "iceoryx2-cal", + "iceoryx2-log", + "iceoryx2-loggers", + "iceoryx2-pal-configuration", + "serde", + "tiny-fn", + "toml", +] + +[[package]] +name = "iceoryx2-bb-concurrency" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843e04d61a3672277f2fa8798ecdbdcc0493396cc46e93d07fcaf685d21b1be2" +dependencies = [ + "iceoryx2-bb-elementary-traits", + "iceoryx2-pal-concurrency-sync", +] + +[[package]] +name = "iceoryx2-bb-container" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c5ef37b55baeb467b4446593728ce7a9cb4e46c2b758bc39fbc5246065abb26" +dependencies = [ + "iceoryx2-bb-concurrency", + "iceoryx2-bb-derive-macros", + "iceoryx2-bb-elementary", + "iceoryx2-bb-elementary-traits", + "iceoryx2-log", + "serde", +] + +[[package]] +name = "iceoryx2-bb-derive-macros" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "795556d27b6e7aa92589e08b35b6fdd142daaf228cc7f46a2d68f3bab7feedea" +dependencies = [ + "iceoryx2-bb-elementary", + "iceoryx2-bb-elementary-traits", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "iceoryx2-bb-elementary" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f64584974d7341e8c1ec1ecab11d59d8ff20d9b4d02104943fbcd2b76c319d3" +dependencies = [ + "iceoryx2-bb-concurrency", + "iceoryx2-bb-elementary-traits", +] + +[[package]] +name = "iceoryx2-bb-elementary-traits" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbb7bbe8a55d0df1b716c72e79d3a568d74f2e6a73d97025b7f9c9676e893aa4" + +[[package]] +name = "iceoryx2-bb-linux" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dd3e2047b1977c6eba67f3ff9b92cbcf5b0c8560b2afc51341f37d549be17c9" +dependencies = [ + "iceoryx2-bb-concurrency", + "iceoryx2-bb-container", + "iceoryx2-bb-posix", + "iceoryx2-bb-system-types", + "iceoryx2-log", + "iceoryx2-pal-os-api", + "iceoryx2-pal-posix", +] + +[[package]] +name = "iceoryx2-bb-lock-free" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d03af3a743d2e9a2aab3f466d07e1643adba9c2f844acfdde7a5abaf85531ecf" +dependencies = [ + "iceoryx2-bb-concurrency", + "iceoryx2-bb-elementary", + "iceoryx2-bb-elementary-traits", + "iceoryx2-log", +] + +[[package]] +name = "iceoryx2-bb-memory" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c93fc98adc0b9e4cdb2b3205fa7fba21222c5c399f20379df0a08592ea9abb16" +dependencies = [ + "iceoryx2-bb-concurrency", + "iceoryx2-bb-elementary", + "iceoryx2-bb-elementary-traits", + "iceoryx2-bb-lock-free", + "iceoryx2-bb-posix", + "iceoryx2-log", +] + +[[package]] +name = "iceoryx2-bb-posix" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50ce62d48ce40169d985a968b91bb3effa1db9be50993a023b725a3c3ac0eacf" +dependencies = [ + "enum-iterator", + "iceoryx2-bb-concurrency", + "iceoryx2-bb-container", + "iceoryx2-bb-derive-macros", + "iceoryx2-bb-elementary", + "iceoryx2-bb-elementary-traits", + "iceoryx2-bb-system-types", + "iceoryx2-log", + "iceoryx2-pal-configuration", + "iceoryx2-pal-posix", + "lazy_static", + "serde", + "tiny-fn", +] + +[[package]] +name = "iceoryx2-bb-system-types" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b192b5448d30d9a0957eff438e9e0358df3476ac9b210c158081c6cf7ae9145" +dependencies = [ + "iceoryx2-bb-container", + "iceoryx2-bb-derive-macros", + "iceoryx2-bb-elementary", + "iceoryx2-bb-elementary-traits", + "iceoryx2-log", + "iceoryx2-pal-configuration", + "iceoryx2-pal-posix", + "serde", +] + +[[package]] +name = "iceoryx2-cal" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81e142ae2fbb3c4bdcc17a23013dd17e58ac9a55fc2effc8cf757575fb8b5c57" +dependencies = [ + "iceoryx2-bb-concurrency", + "iceoryx2-bb-container", + "iceoryx2-bb-derive-macros", + "iceoryx2-bb-elementary", + "iceoryx2-bb-elementary-traits", + "iceoryx2-bb-linux", + "iceoryx2-bb-lock-free", + "iceoryx2-bb-memory", + "iceoryx2-bb-posix", + "iceoryx2-bb-system-types", + "iceoryx2-log", + "lazy_static", + "postcard", + "serde", + "sha1_smol", + "tiny-fn", + "toml", +] + +[[package]] +name = "iceoryx2-log" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b6c959532c7e18463a3c68c6beaaa489a139907edf5b26f7c606e5c64361b3e" +dependencies = [ + "iceoryx2-log-types", + "iceoryx2-pal-concurrency-sync", +] + +[[package]] +name = "iceoryx2-log-types" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f97fa5729b562d2317e35b1c2b32eb5d8e965eb63ad6c4a040cd6b1cbbb2394" + +[[package]] +name = "iceoryx2-loggers" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec234d4a279b9e849dab6b91b8189325205a9e6e77f0ab21f9bfcd7ff370621e" +dependencies = [ + "iceoryx2-log-types", + "iceoryx2-pal-concurrency-sync", +] + +[[package]] +name = "iceoryx2-pal-concurrency-sync" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90753053905f56465c28015d5699e413fad76a9c3ad3f2398c05fbcaeac4916e" + +[[package]] +name = "iceoryx2-pal-configuration" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62156f568065d979cc9200ab160ecfbe99bcc3186e5947642472fa2fa9db3cba" + +[[package]] +name = "iceoryx2-pal-os-api" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1db27a7ee0e2d5a3b873b16bb1edaa52d71ff0cf5f9c8f9d93d328b46d777472" +dependencies = [ + "bindgen", + "cc", + "iceoryx2-pal-posix", +] + +[[package]] +name = "iceoryx2-pal-posix" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "047637116257281d1102490689e2d0073d94b8d5f4cda20b00b65aed35c09e6e" +dependencies = [ + "bindgen", + "cc", + "iceoryx2-pal-concurrency-sync", + "iceoryx2-pal-configuration", + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "icu_collections" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" +dependencies = [ + "displaydoc", + "potential_utf", + "utf8_iter", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" + +[[package]] +name = "icu_properties" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" + +[[package]] +name = "icu_provider" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.1", + "serde", + "serde_core", +] + +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] + +[[package]] +name = "ipnet" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "js-sys" +version = "0.3.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08" +dependencies = [ + "cfg-if", + "futures-util", + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin", +] + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "libredox" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c" +dependencies = [ + "bitflags", + "libc", + "plain", + "redox_syscall 0.7.5", +] + +[[package]] +name = "libsqlite3-sys" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "litemap" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "mio" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "nix" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num-bigint-dig" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" +dependencies = [ + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.6", + "smallvec", + "zeroize", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.5.18", + "smallvec", + "windows-link", +] + +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "postcard" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24" +dependencies = [ + "cobs", + "embedded-io 0.4.0", + "embedded-io 0.6.1", + "heapless", + "serde", +] + +[[package]] +name = "potential_utf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" +dependencies = [ + "zerovec", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quinn" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls", + "socket2", + "thiserror 2.0.18", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" +dependencies = [ + "bytes", + "getrandom 0.3.4", + "lru-slab", + "rand 0.9.4", + "ring", + "rustc-hash", + "rustls", + "rustls-pki-types", + "slab", + "thiserror 2.0.18", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2", + "tracing", + "windows-sys 0.52.0", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rand" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "redox_syscall" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4666a1a60d8412eab19d94f6d13dcc9cea0a5ef4fdf6a5db306537413c661b1b" +dependencies = [ + "bitflags", +] + +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom 0.2.17", + "libredox", + "thiserror 1.0.69", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "reqwest" +version = "0.12.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64", + "bytes", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls", + "tokio-util", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "webpki-roots 1.0.7", +] + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.17", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rsa" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" +dependencies = [ + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core 0.6.4", + "signature", + "spki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustc-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls" +version = "0.23.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" +dependencies = [ + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" +dependencies = [ + "web-time", + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "indexmap", + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_spanned" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6662b5879511e06e8999a8a235d848113e942c9124f211511b16466ee2995f26" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sha1_smol" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d" + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" +dependencies = [ + "errno", + "libc", +] + +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +dependencies = [ + "serde", +] + +[[package]] +name = "socket2" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + +[[package]] +name = "sqlx" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc" +dependencies = [ + "sqlx-core", + "sqlx-macros", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", +] + +[[package]] +name = "sqlx-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" +dependencies = [ + "base64", + "bytes", + "chrono", + "crc", + "crossbeam-queue", + "either", + "event-listener", + "futures-core", + "futures-intrusive", + "futures-io", + "futures-util", + "hashbrown 0.15.5", + "hashlink", + "indexmap", + "log", + "memchr", + "once_cell", + "percent-encoding", + "rustls", + "serde", + "serde_json", + "sha2", + "smallvec", + "thiserror 2.0.18", + "tokio", + "tokio-stream", + "tracing", + "url", + "webpki-roots 0.26.11", +] + +[[package]] +name = "sqlx-macros" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d" +dependencies = [ + "proc-macro2", + "quote", + "sqlx-core", + "sqlx-macros-core", + "syn", +] + +[[package]] +name = "sqlx-macros-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19a9c1841124ac5a61741f96e1d9e2ec77424bf323962dd894bdb93f37d5219b" +dependencies = [ + "dotenvy", + "either", + "heck", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2", + "sqlx-core", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", + "syn", + "tokio", + "url", +] + +[[package]] +name = "sqlx-mysql" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" +dependencies = [ + "atoi", + "base64", + "bitflags", + "byteorder", + "bytes", + "chrono", + "crc", + "digest", + "dotenvy", + "either", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "generic-array", + "hex", + "hkdf", + "hmac", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "percent-encoding", + "rand 0.8.6", + "rsa", + "serde", + "sha1", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 2.0.18", + "tracing", + "whoami", +] + +[[package]] +name = "sqlx-postgres" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" +dependencies = [ + "atoi", + "base64", + "bitflags", + "byteorder", + "chrono", + "crc", + "dotenvy", + "etcetera", + "futures-channel", + "futures-core", + "futures-util", + "hex", + "hkdf", + "hmac", + "home", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "rand 0.8.6", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 2.0.18", + "tracing", + "whoami", +] + +[[package]] +name = "sqlx-sqlite" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2d12fe70b2c1b4401038055f90f151b78208de1f9f89a7dbfd41587a10c3eea" +dependencies = [ + "atoi", + "chrono", + "flume", + "futures-channel", + "futures-core", + "futures-executor", + "futures-intrusive", + "futures-util", + "libsqlite3-sys", + "log", + "percent-encoding", + "serde", + "serde_urlencoded", + "sqlx-core", + "thiserror 2.0.18", + "tracing", + "url", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "stringprep" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", + "unicode-properties", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "tiny-fn" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9659b108631d1e1cf3e8e489f894bee40bc9d68fd6cc67ec4d4ce9b72d565228" + +[[package]] +name = "tinystr" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tinyvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.52.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe" +dependencies = [ + "bytes", + "libc", + "mio", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.61.2", +] + +[[package]] +name = "tokio-macros" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" +dependencies = [ + "rustls", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "toml" +version = "0.9.12+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf92845e79fc2e2def6a5d828f0801e29a2f8acc037becc5ab08595c7d5e9863" +dependencies = [ + "serde_core", + "serde_spanned", + "toml_datetime", + "toml_parser", + "toml_writer", + "winnow 0.7.15", +] + +[[package]] +name = "toml_datetime" +version = "0.7.5+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_parser" +version = "1.1.2+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" +dependencies = [ + "winnow 1.0.3", +] + +[[package]] +name = "toml_writer" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "756daf9b1013ebe47a8776667b466417e2d4c5679d441c26230efd9ef78692db" + +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-http" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cfcf7e2740e6fc6d4d688b4ef00650406bb94adf4731e43c096c3a19fe40840" +dependencies = [ + "bitflags", + "bytes", + "futures-util", + "http", + "http-body", + "pin-project-lite", + "tower", + "tower-layer", + "tower-service", + "url", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "typenum" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" + +[[package]] +name = "unicode-bidi" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-normalization" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-properties" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "unindent" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uuid" +version = "1.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" +dependencies = [ + "getrandom 0.4.2", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen 0.57.1", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", +] + +[[package]] +name = "wasite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" + +[[package]] +name = "wasm-bindgen" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96492d0d3ffba25305a7dc88720d250b1401d7edca02cc3bcd50633b424673b8" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "web-sys" +version = "0.3.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b572dff8bcf38bad0fa19729c89bb5748b2b9b1d8be70cf90df697e3a8f32aa" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.7", +] + +[[package]] +name = "webpki-roots" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "whoami" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" +dependencies = [ + "libredox", + "wasite", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" + +[[package]] +name = "winnow" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1" + +[[package]] +name = "wiremock" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08db1edfb05d9b3c1542e521aea074442088292f00b5f28e435c714a98f85031" +dependencies = [ + "assert-json-diff", + "base64", + "deadpool", + "futures", + "http", + "http-body-util", + "hyper", + "hyper-util", + "log", + "once_cell", + "regex", + "serde", + "serde_json", + "tokio", + "url", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "writeable" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" + +[[package]] +name = "yoke" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerofrom" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zerotrie" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 000000000..0e20c8537 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,68 @@ +[workspace] +members = ["data_daemon", "data_daemon_shared", "data_daemon_producer"] +resolver = "2" + +[workspace.package] +edition = "2021" +license = "MIT" +version = "0.1.0" + +[workspace.dependencies] +anyhow = "1" +async-trait = "0.1" +base64 = "0.22" +bytes = "1" +chrono = { version = "0.4", default-features = false, features = [ + "clock", + "serde", +] } +clap = { version = "4", features = ["derive"] } +crc32c = "0.6" +dashmap = "6" +dirs = "5" +iceoryx2 = "0.8" +libc = "0.2" +nix = { version = "0.29", features = ["fs", "process", "signal"] } +once_cell = "1" +postcard = { version = "1", features = ["alloc"] } +pyo3 = { version = "0.22", features = ["extension-module"] } +reqwest = { version = "0.12", default-features = false, features = [ + "rustls-tls", + "http2", + "json", + "stream", +] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +serde_yaml = "0.9" +sqlx = { version = "0.8", default-features = false, features = [ + "runtime-tokio-rustls", + "sqlite", + "chrono", + "migrate", + "macros", +] } +thiserror = "1" +tokio = { version = "1", features = [ + "rt-multi-thread", + "macros", + "process", + "signal", + "sync", + "time", + "fs", + "io-util", +] } +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +url = "2" +uuid = { version = "1", features = ["v4"] } + +# Dev-only deps are declared directly in each crate's [dev-dependencies] (not +# here) so a test-only crate like the wiremock HTTP *server* can never slip into +# a prod [dependencies] table by inheriting `.workspace = true`. + +[profile.release] +# Strip symbols from the shipped daemon binary / producer cdylib (~3 MB off the +# 16 MB debug-symbol binary); the wheel ships only the stripped artefacts. +strip = true diff --git a/rust/data_daemon/.gitignore b/rust/data_daemon/.gitignore new file mode 100644 index 000000000..ea8c4bf7f --- /dev/null +++ b/rust/data_daemon/.gitignore @@ -0,0 +1 @@ +/target diff --git a/rust/data_daemon/Cargo.toml b/rust/data_daemon/Cargo.toml new file mode 100644 index 000000000..3aa175f6b --- /dev/null +++ b/rust/data_daemon/Cargo.toml @@ -0,0 +1,44 @@ +[package] +name = "data-daemon" +version.workspace = true +edition.workspace = true +license.workspace = true +description = "Neuracore data daemon." + +[[bin]] +name = "data-daemon" +path = "src/main.rs" + +[dependencies] +anyhow.workspace = true +async-trait.workspace = true +base64.workspace = true +bytes.workspace = true +chrono.workspace = true +clap.workspace = true +crc32c.workspace = true +dashmap.workspace = true +data_daemon_shared = { path = "../data_daemon_shared" } +dirs.workspace = true +iceoryx2.workspace = true +libc.workspace = true +nix.workspace = true +reqwest.workspace = true +serde.workspace = true +# `preserve_order` keeps `serde_json::Map` insertion-ordered (via IndexMap) +# so the video-trace sidecar serialises with a stable, predictable key +# ordering. +serde_json = { workspace = true, features = ["preserve_order"] } +serde_yaml.workspace = true +sqlx.workspace = true +thiserror.workspace = true +tokio.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true +url.workspace = true +uuid.workspace = true + +[dev-dependencies] +tempfile = "3" +tokio = { workspace = true, features = ["test-util"] } +wiremock = "0.6" diff --git a/rust/data_daemon/migrations/0001_initial.sql b/rust/data_daemon/migrations/0001_initial.sql new file mode 100644 index 000000000..e969d75e9 --- /dev/null +++ b/rust/data_daemon/migrations/0001_initial.sql @@ -0,0 +1,82 @@ +-- Initial schema for the Rust data daemon state store. +-- +-- The daemon owns recording/trace identity: `recordings` are keyed by a local +-- autoincrement `recording_index` (the cloud `recording_id` is backfilled +-- asynchronously by the start notifier), and `traces` are keyed by a +-- daemon-minted UUID. Column names and the status-enum strings are part of the +-- behavioural contract the integration suite relies on (see +-- tests/integration/platform/data_daemon/shared/db_constants.py). + +CREATE TABLE IF NOT EXISTS recordings ( + recording_index INTEGER PRIMARY KEY AUTOINCREMENT, + -- Cloud handle. NULL until the recording-start notifier POSTs + -- `/recording/start`. + recording_id TEXT, + robot_id TEXT, + robot_instance INTEGER, + dataset_id TEXT, + -- Caller capture timestamps (ns); routed window bounds live in memory. + start_timestamp_ns INTEGER, + stop_timestamp_ns INTEGER, + expected_trace_count INTEGER, + expected_trace_count_reported INTEGER NOT NULL DEFAULT 0, + progress_reported TEXT NOT NULL DEFAULT 'pending', + -- Daemon wall-clock lifecycle timestamps. + stopped_at DATETIME, + cancelled_at DATETIME, + -- Cloud-notify bookkeeping. + backend_start_notified_at DATETIME, + backend_stop_notified_at DATETIME, + backend_cancel_notified_at DATETIME, + created_at DATETIME NOT NULL, + last_updated DATETIME NOT NULL +); + +CREATE TABLE IF NOT EXISTS traces ( + trace_id TEXT PRIMARY KEY, + recording_index INTEGER NOT NULL, + write_status TEXT NOT NULL DEFAULT 'pending', + registration_status TEXT NOT NULL DEFAULT 'pending', + upload_status TEXT NOT NULL DEFAULT 'pending', + data_type TEXT, + data_type_name TEXT, + path TEXT, + bytes_written INTEGER NOT NULL DEFAULT 0, + total_bytes INTEGER NOT NULL DEFAULT 0, + bytes_uploaded INTEGER NOT NULL DEFAULT 0, + error_code TEXT, + error_message TEXT, + upload_session_uris TEXT, + created_at DATETIME NOT NULL, + last_updated DATETIME NOT NULL +); + +CREATE INDEX IF NOT EXISTS idx_recordings_stopped_at + ON recordings(stopped_at); +CREATE INDEX IF NOT EXISTS idx_recordings_cancelled_at + ON recordings(cancelled_at); +CREATE INDEX IF NOT EXISTS idx_recordings_source + ON recordings(robot_id, robot_instance, recording_index); +CREATE INDEX IF NOT EXISTS idx_recordings_start_notify + ON recordings(recording_id, backend_start_notified_at); +-- `traces.recording_index` deliberately carries NO foreign key, and +-- `PRAGMA foreign_keys` is left at SQLite's OFF default. The recording β†’ trace +-- cascade is hand-rolled in `SqliteStateStore::delete_recording_cascade` (two +-- DELETEs in one transaction) as the single, deliberate integrity mechanism: +-- it keeps the reaper's delete free of FK-ordering constraints and avoids +-- enabling the pragma on every pooled connection. If that cascade is ever +-- split or reordered, orphan trace rows become possible β€” keep both deletes in +-- one transaction. +-- +-- No `idx_traces_recording_index` on `traces(recording_index)`: the composite +-- `idx_traces_recording_upload` below has `recording_index` as its leading +-- column, so SQLite already uses it for plain `WHERE recording_index = ?` +-- lookups. A separate single-column index would be pure write amplification. +CREATE INDEX IF NOT EXISTS idx_traces_recording_upload + ON traces(recording_index, upload_status); +CREATE INDEX IF NOT EXISTS idx_traces_write_status + ON traces(write_status); +CREATE INDEX IF NOT EXISTS idx_traces_registration_status + ON traces(registration_status); +CREATE INDEX IF NOT EXISTS idx_traces_upload_status + ON traces(upload_status); diff --git a/rust/data_daemon/src/api/auth.rs b/rust/data_daemon/src/api/auth.rs new file mode 100644 index 000000000..4c6216e90 --- /dev/null +++ b/rust/data_daemon/src/api/auth.rs @@ -0,0 +1,425 @@ +//! Auth provider for the Neuracore API client. +//! +//! Reads the API key from `~/.neuracore/config.json` β€” the same file the +//! Python SDK writes after a successful `nc.login()` β€” and exchanges it for a +//! short-lived JWT via `POST {api_url}/auth/verify-api-key`. The JWT is the +//! actual bearer token sent to the backend; `nrc_…` API keys are not accepted +//! directly. If the file already contains an `access_token` (set by tooling +//! that does the exchange itself), the provider uses it verbatim and skips +//! the exchange. On a 401 response the API client calls +//! [`AuthProvider::reload`], which drops the cached JWT and forces a fresh +//! exchange on the next call. +//! +//! Tests rely on a custom provider via [`AuthProvider`] (the trait) so we can +//! inject a fixed token without touching the user's home directory. + +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; + +use async_trait::async_trait; +use reqwest::{Client, StatusCode}; +use serde::Deserialize; +use thiserror::Error; +use tokio::fs; +use tokio::sync::Mutex; + +/// Errors surfaced by the auth provider. +#[derive(Debug, Error)] +pub enum AuthError { + /// Underlying I/O error reading the config file. + #[error("failed to read auth config {path}: {source}")] + Io { + path: PathBuf, + #[source] + source: std::io::Error, + }, + /// Config file present but did not deserialise. + #[error("failed to parse auth config {path}: {source}")] + Parse { + path: PathBuf, + #[source] + source: serde_json::Error, + }, + /// Config loaded but contained no API key / access token. + #[error("auth config {path} is missing an access token")] + Missing { path: PathBuf }, + /// `verify-api-key` request failed at the transport level. + #[error("verify-api-key request to {url} failed: {source}")] + ExchangeTransport { + url: String, + #[source] + source: reqwest::Error, + }, + /// `verify-api-key` returned a non-2xx response. + #[error("verify-api-key at {url} returned HTTP {status}: {body}")] + ExchangeStatus { + url: String, + status: StatusCode, + body: String, + }, + /// `verify-api-key` returned a 2xx response without an `access_token`. + #[error("verify-api-key at {url} returned no access_token")] + ExchangeMissingToken { url: String }, +} + +/// Trait implemented by every auth source β€” the file-backed implementation in +/// production, and the in-memory stub used by tests. +#[async_trait] +pub trait AuthProvider: Send + Sync { + /// Return the current bearer token. Cached internally; cheap to call. + async fn bearer_token(&self) -> Result; + + /// Drop the cached token and re-load on the next call. Invoked by the + /// HTTP client after a 401 response. + async fn reload(&self) -> Result<(), AuthError>; +} + +/// On-disk config shape β€” matches `neuracore.core.config.config_manager.Config`. +/// +/// The Python SDK writes the `api_key` field after a successful `nc.login()` +/// and trades it for an in-memory `access_token` via `auth/verify-api-key`. +/// The daemon does the same exchange at the boundary so it never relies on +/// the SDK persisting a JWT to disk (it doesn't). A pre-populated +/// `access_token` is still honoured for tests and tooling that wants to +/// bypass the exchange. +#[derive(Debug, Default, Deserialize)] +struct AuthConfig { + api_key: Option, + #[serde(default)] + access_token: Option, +} + +/// Response body for `POST /auth/verify-api-key`. Matches the +/// `neuracore.core.auth.AccessTokenResponse` shape on the Python side. +#[derive(Debug, Deserialize)] +struct VerifyApiKeyResponse { + #[serde(default)] + access_token: Option, +} + +/// HTTP timeout for the verify-api-key exchange. Matches the default +/// per-request budget of the main API client so a stalled identity service +/// can't pin the registration coordinator indefinitely. +const VERIFY_API_KEY_TIMEOUT: Duration = Duration::from_secs(30); +/// Cap on the response body the provider reads back when surfacing a non-2xx +/// error; the JSON payload is ~100 bytes and a runaway HTML page from a +/// misconfigured proxy could otherwise blow up the trace log line. +const VERIFY_API_KEY_ERROR_BODY_LIMIT: usize = 4096; + +/// Default auth source: reads `~/.neuracore/config.json` lazily, exchanges +/// the API key for a JWT, and caches the JWT until [`AuthProvider::reload`] +/// is called. +pub struct FileAuthProvider { + path: PathBuf, + api_url: String, + http: Client, + cached: Mutex>, +} + +impl FileAuthProvider { + /// Build a provider that reads from `path` and exchanges the API key via + /// `{api_url}/auth/verify-api-key`. + pub fn new(path: impl Into, api_url: impl Into) -> Result { + // A fresh `reqwest::Client` per provider instance is fine β€” the + // provider itself is a long-lived `Arc` shared by every coordinator, + // so the underlying connection pool is reused across the daemon's + // lifetime. + let http = Client::builder() + .timeout(VERIFY_API_KEY_TIMEOUT) + .build() + .map_err(|source| AuthError::ExchangeTransport { + url: String::new(), + source, + })?; + Ok(Self { + path: path.into(), + api_url: api_url.into(), + http, + cached: Mutex::new(None), + }) + } + + /// Build a provider that reads from `~/.neuracore/config.json`. + #[allow(dead_code)] + pub fn default_path(api_url: impl Into) -> Result { + let path = dirs::home_dir() + .map(|home| home.join(".neuracore").join("config.json")) + .unwrap_or_else(|| PathBuf::from(".neuracore/config.json")); + Self::new(path, api_url) + } + + /// Borrow the file path the provider reads. + #[allow(dead_code)] + pub fn path(&self) -> &std::path::Path { + &self.path + } + + async fn read_config(&self) -> Result { + let bytes = fs::read(&self.path).await.map_err(|source| AuthError::Io { + path: self.path.clone(), + source, + })?; + serde_json::from_slice(&bytes).map_err(|source| AuthError::Parse { + path: self.path.clone(), + source, + }) + } + + async fn load(&self) -> Result { + let config = self.read_config().await?; + // A pre-populated access_token wins so tests / tooling can pin a + // specific JWT without standing up a verify-api-key endpoint. + if let Some(token) = config.access_token { + return Ok(token); + } + let api_key = config.api_key.ok_or_else(|| AuthError::Missing { + path: self.path.clone(), + })?; + self.exchange_api_key(&api_key).await + } + + async fn exchange_api_key(&self, api_key: &str) -> Result { + let url = verify_api_key_url(&self.api_url); + let response = self + .http + .post(&url) + .json(&serde_json::json!({ "api_key": api_key })) + .send() + .await + .map_err(|source| AuthError::ExchangeTransport { + url: url.clone(), + source, + })?; + let status = response.status(); + if !status.is_success() { + let mut body = response.text().await.unwrap_or_default(); + if body.len() > VERIFY_API_KEY_ERROR_BODY_LIMIT { + body.truncate(VERIFY_API_KEY_ERROR_BODY_LIMIT); + } + return Err(AuthError::ExchangeStatus { url, status, body }); + } + let parsed: VerifyApiKeyResponse = + response + .json() + .await + .map_err(|source| AuthError::ExchangeTransport { + url: url.clone(), + source, + })?; + parsed + .access_token + .ok_or(AuthError::ExchangeMissingToken { url }) + } +} + +/// Compose `{api_url}/auth/verify-api-key` without doubling up the separator +/// when `api_url` already ends in a slash. +fn verify_api_key_url(api_url: &str) -> String { + let base = api_url.trim_end_matches('/'); + format!("{base}/auth/verify-api-key") +} + +#[async_trait] +impl AuthProvider for FileAuthProvider { + async fn bearer_token(&self) -> Result { + // The cache lock is intentionally held across `load().await` (the + // verify-api-key exchange, up to ~30 s on a slow link). This makes the + // method single-flight: a cold-cache thundering herd triggers exactly + // ONE backend exchange while the rest wait, then all observe the cached + // token β€” instead of every caller firing its own verify. The cost is + // that those concurrent callers serialise for the duration of that one + // exchange; acceptable because it is bounded by the client request + // timeout and only happens on a cold/just-reloaded cache. + let mut cached = self.cached.lock().await; + if let Some(token) = cached.as_ref() { + return Ok(token.clone()); + } + let token = self.load().await?; + *cached = Some(token.clone()); + Ok(token) + } + + async fn reload(&self) -> Result<(), AuthError> { + let mut cached = self.cached.lock().await; + *cached = None; + Ok(()) + } +} + +/// In-memory provider for tests: returns a fixed token every call. +pub struct StaticAuthProvider { + token: Arc>, +} + +impl StaticAuthProvider { + /// Create a provider that returns `token` on every call. + pub fn new(token: impl Into) -> Self { + Self { + token: Arc::new(Mutex::new(token.into())), + } + } + + /// Replace the cached token; useful for asserting reload behaviour. + #[allow(dead_code)] + pub async fn set_token(&self, token: impl Into) { + let mut guard = self.token.lock().await; + *guard = token.into(); + } +} + +#[async_trait] +impl AuthProvider for StaticAuthProvider { + async fn bearer_token(&self) -> Result { + Ok(self.token.lock().await.clone()) + } + + async fn reload(&self) -> Result<(), AuthError> { + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + use wiremock::matchers::{body_json, method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + #[tokio::test] + async fn file_provider_exchanges_api_key_for_jwt() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/auth/verify-api-key")) + .and(body_json(serde_json::json!({"api_key": "nrc_abc"}))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "access_token": "jwt-1" + }))) + .expect(1) + .mount(&server) + .await; + + let dir = TempDir::new().unwrap(); + let config = dir.path().join("config.json"); + tokio::fs::write(&config, r#"{"api_key":"nrc_abc"}"#) + .await + .unwrap(); + let provider = FileAuthProvider::new(&config, server.uri()).unwrap(); + assert_eq!(provider.bearer_token().await.unwrap(), "jwt-1"); + // Cached β€” second call must not re-hit the exchange endpoint. + assert_eq!(provider.bearer_token().await.unwrap(), "jwt-1"); + } + + #[tokio::test] + async fn file_provider_reload_re_exchanges() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/auth/verify-api-key")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "access_token": "jwt-1" + }))) + .up_to_n_times(1) + .mount(&server) + .await; + Mock::given(method("POST")) + .and(path("/auth/verify-api-key")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "access_token": "jwt-2" + }))) + .expect(1) + .mount(&server) + .await; + + let dir = TempDir::new().unwrap(); + let config = dir.path().join("config.json"); + tokio::fs::write(&config, r#"{"api_key":"nrc_abc"}"#) + .await + .unwrap(); + let provider = FileAuthProvider::new(&config, server.uri()).unwrap(); + assert_eq!(provider.bearer_token().await.unwrap(), "jwt-1"); + provider.reload().await.unwrap(); + assert_eq!(provider.bearer_token().await.unwrap(), "jwt-2"); + } + + #[tokio::test] + async fn file_provider_prefers_pre_populated_access_token() { + // No mock server needed β€” a pre-populated access_token bypasses the + // exchange entirely so we should not hit the network at all. + let dir = TempDir::new().unwrap(); + let config = dir.path().join("config.json"); + tokio::fs::write( + &config, + r#"{"api_key":"nrc_abc","access_token":"jwt-pinned"}"#, + ) + .await + .unwrap(); + let provider = FileAuthProvider::new(&config, "http://127.0.0.1:1/unused").unwrap(); + assert_eq!(provider.bearer_token().await.unwrap(), "jwt-pinned"); + } + + #[tokio::test] + async fn file_provider_missing_token_errors() { + let dir = TempDir::new().unwrap(); + let config = dir.path().join("config.json"); + tokio::fs::write(&config, r#"{}"#).await.unwrap(); + let provider = FileAuthProvider::new(&config, "http://127.0.0.1:1/unused").unwrap(); + let err = provider.bearer_token().await.unwrap_err(); + assert!(matches!(err, AuthError::Missing { .. })); + } + + #[tokio::test] + async fn file_provider_exchange_non_2xx_surfaces_status() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/auth/verify-api-key")) + .respond_with(ResponseTemplate::new(401).set_body_string("nope")) + .mount(&server) + .await; + + let dir = TempDir::new().unwrap(); + let config = dir.path().join("config.json"); + tokio::fs::write(&config, r#"{"api_key":"nrc_abc"}"#) + .await + .unwrap(); + let provider = FileAuthProvider::new(&config, server.uri()).unwrap(); + let err = provider.bearer_token().await.unwrap_err(); + match err { + AuthError::ExchangeStatus { status, body, .. } => { + assert_eq!(status, StatusCode::UNAUTHORIZED); + assert!(body.contains("nope")); + } + other => panic!("unexpected error: {other:?}"), + } + } + + #[tokio::test] + async fn file_provider_exchange_missing_access_token_errors() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/auth/verify-api-key")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({}))) + .mount(&server) + .await; + + let dir = TempDir::new().unwrap(); + let config = dir.path().join("config.json"); + tokio::fs::write(&config, r#"{"api_key":"nrc_abc"}"#) + .await + .unwrap(); + let provider = FileAuthProvider::new(&config, server.uri()).unwrap(); + let err = provider.bearer_token().await.unwrap_err(); + assert!(matches!(err, AuthError::ExchangeMissingToken { .. })); + } + + #[test] + fn verify_api_key_url_dedupes_trailing_slash() { + assert_eq!( + verify_api_key_url("https://api/api"), + "https://api/api/auth/verify-api-key" + ); + assert_eq!( + verify_api_key_url("https://api/api/"), + "https://api/api/auth/verify-api-key" + ); + } +} diff --git a/rust/data_daemon/src/api/client.rs b/rust/data_daemon/src/api/client.rs new file mode 100644 index 000000000..66fb8d277 --- /dev/null +++ b/rust/data_daemon/src/api/client.rs @@ -0,0 +1,713 @@ +//! Shared HTTP client used by every cloud coordinator. +//! +//! Centralises the auth header, retry policy, timeouts, and base URL so each +//! coordinator (registration, uploader, status updater, progress reporter) +//! talks to the backend through a single configured instance. The retry +//! policy is max 3 attempts on `{408, 425, 429, 500..504}`, with exponential +//! backoff capped at 30 s. + +use std::collections::HashMap; +use std::sync::Arc; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION}; +use reqwest::{Client, Method, Request, Response, StatusCode}; +use serde::Serialize; +use thiserror::Error; +use tokio::time::sleep; + +use crate::api::auth::{AuthError, AuthProvider}; +use crate::api::models::{ + BatchRegisterResponse, RecordingStartResponse, RegisterTraceRequest, + ResumableUploadUrlResponse, TraceStatusUpdate, +}; + +/// Retry policy constants β€” match `const.py::BACKEND_API_*`. +pub const BACKEND_API_MAX_RETRIES: u32 = 3; +/// Cap for exponential backoff between retries (seconds). +pub const BACKEND_API_MAX_BACKOFF_SECONDS: u64 = 30; +/// Status codes the client retries on automatically. +pub const RETRYABLE_STATUS_CODES: &[u16] = &[408, 425, 429, 500, 502, 503, 504]; + +/// Construction-time configuration for [`ApiClient`]. +#[derive(Debug, Clone)] +pub struct ApiClientOptions { + /// Base URL, e.g. `https://api.neuracore.app/api`. + pub base_url: String, + /// Per-request timeout. Defaults to 30 seconds. + pub timeout: Duration, + /// Retry budget on retryable status codes. + pub max_retries: u32, + /// Cap on the exponential backoff between retries. + pub max_backoff: Duration, +} + +impl ApiClientOptions { + /// Build options for the given backend base URL with the policy defaults. + pub fn new(base_url: impl Into) -> Self { + Self { + base_url: base_url.into(), + timeout: Duration::from_secs(30), + max_retries: BACKEND_API_MAX_RETRIES, + max_backoff: Duration::from_secs(BACKEND_API_MAX_BACKOFF_SECONDS), + } + } +} + +/// Errors raised by the API client. +#[derive(Debug, Error)] +pub enum ApiClientError { + /// Underlying transport failure (DNS, timeout, TLS, etc.). + #[error(transparent)] + Transport(#[from] reqwest::Error), + /// Auth provider failed to supply a token (file missing, malformed, etc.). + #[error(transparent)] + Auth(#[from] AuthError), + /// Non-retryable response status. + #[error("backend responded with HTTP {status}: {body}")] + Status { + /// HTTP status code returned by the backend. + status: StatusCode, + /// Response body (truncated to a few KiB for the log line). + body: String, + }, + /// Response body did not deserialise. + #[error("failed to decode backend response: {0}")] + Decode(#[source] serde_json::Error), + /// Response was missing a header the client expected. + #[error("response missing required header {0}")] + MissingHeader(&'static str), +} + +impl ApiClientError { + /// True when the backend responded `404 Not Found`. + /// + /// The recording notifiers use this to treat "the recording is already not + /// open on the backend" as the desired post-condition rather than a + /// failure: a cancel/stop POST that 404s means another path already closed + /// the recording (a benign race between the cancel-notifier sweep and the + /// start-notifier's `resolve_prior_pending`), so there is nothing left to do. + pub fn is_not_found(&self) -> bool { + matches!(self, ApiClientError::Status { status, .. } if *status == StatusCode::NOT_FOUND) + } +} + +/// Upload connection-pool tuning for [`build_upload_client`]. Sized together for +/// the uploader's burst of concurrent per-file PUTs: +/// - `MAX_IDLE_PER_HOST` matches the uploader's `MAX_CONCURRENT_UPLOADS` so a +/// full burst can each keep its connection warm for the next file instead of +/// being evicted between files. +/// - `IDLE_TIMEOUT` keeps an idle connection warm across the gap between +/// consecutive recordings, so the next burst reuses the pool rather than +/// re-handshaking TLS. +/// - `TCP_KEEPALIVE` holds a warm-but-idle path open (and detects a dead one) +/// rather than letting it silently go half-open. +const UPLOAD_POOL_MAX_IDLE_PER_HOST: usize = 128; +const UPLOAD_POOL_IDLE_TIMEOUT: Duration = Duration::from_secs(300); +const UPLOAD_TCP_KEEPALIVE: Duration = Duration::from_secs(60); + +/// Build the dedicated client the uploader PUTs file chunks through. +/// +/// Pinned to **HTTP/1.1** so the uploader's concurrent per-file PUTs open +/// *parallel* TCP connections (each with its own congestion window) instead of +/// multiplexing onto a single HTTP/2 connection. A single h2 connection shares +/// one slow-start congestion window across every upload, so a burst of small +/// files finishes before the window ramps and never reaches link speed; N +/// parallel h1 connections fill the bandwidth-delay product immediately. The +/// generous idle pool + TCP keepalive keep those connections warm so files +/// 2..N (and the next recording's burst) reuse them instead of re-handshaking. +/// +/// Deliberately sets **no client-level `timeout`**: a streaming chunk PUT is +/// bounded by `upload_transfer::CHUNK_UPLOAD_TIMEOUT` (a per-chunk tokio +/// timeout), which tolerates a full 16 MiB chunk on a slow link. The API +/// client's short overall timeout would cap that sustained-throughput floor. +fn build_upload_client() -> Result { + Client::builder() + .http1_only() + .pool_max_idle_per_host(UPLOAD_POOL_MAX_IDLE_PER_HOST) + .pool_idle_timeout(Some(UPLOAD_POOL_IDLE_TIMEOUT)) + .tcp_keepalive(Some(UPLOAD_TCP_KEEPALIVE)) + .build() +} + +/// Generic HTTP client wrapping a [`reqwest::Client`] with auth + retry. +pub struct ApiClient { + /// Control-plane client for the Neuracore API (registration, lifecycle, + /// status). HTTP/2-capable: many small request/response calls benefit from + /// multiplexing over one connection. + inner: Client, + /// Data-plane client for direct-to-GCS chunk PUTs. HTTP/1.1 with a warm + /// connection pool β€” see [`build_upload_client`]. + upload_inner: Client, + options: ApiClientOptions, + auth: Arc, +} + +impl ApiClient { + /// Build a client with the given options and auth provider. + pub fn new( + options: ApiClientOptions, + auth: Arc, + ) -> Result { + let inner = Client::builder() + .timeout(options.timeout) + .http2_adaptive_window(true) + .pool_max_idle_per_host(128) + .build()?; + let upload_inner = build_upload_client()?; + Ok(Self { + inner, + upload_inner, + options, + auth, + }) + } + + /// Borrow the dedicated upload client β€” exposed for the uploader, which PUTs + /// chunks straight to GCS-issued URLs that are not relative to the configured + /// `base_url`. This is the HTTP/1.1, warm-pool client (see + /// [`build_upload_client`]), kept separate from the control-plane `inner` + /// client so concurrent file PUTs fan out across parallel connections. + pub fn raw_client(&self) -> &Client { + &self.upload_inner + } + + /// Borrow the configured auth provider. + pub fn auth(&self) -> &Arc { + &self.auth + } + + /// Borrow the configured options. + pub fn options(&self) -> &ApiClientOptions { + &self.options + } + + /// Build a URL beneath the configured `base_url`. The `path` is appended + /// verbatim (and may start with `/`). + pub fn url(&self, path: &str) -> String { + if path.starts_with("http://") || path.starts_with("https://") { + return path.to_string(); + } + let base = self.options.base_url.trim_end_matches('/'); + if let Some(stripped) = path.strip_prefix('/') { + format!("{base}/{stripped}") + } else { + format!("{base}/{path}") + } + } + + /// `HEAD /status/health` β€” used by the connection monitor. + /// + /// Returns `true` when the backend reports any non-5xx status. + pub async fn health_check(&self) -> Result { + let request = self.inner.head(self.url("/status/health")).build()?; + let response = self.inner.execute(request).await?; + Ok(response.status().as_u16() < 500) + } + + /// `POST /org/{org}/recording/traces/batch-register`. + pub async fn batch_register( + &self, + org_id: &str, + traces: &[RegisterTraceRequest], + ) -> Result { + let path = format!("/org/{org_id}/recording/traces/batch-register"); + #[derive(Serialize)] + struct Body<'a> { + traces: &'a [RegisterTraceRequest], + } + let body = Body { traces }; + let response = self + .send_with_retry(Method::POST, &path, |builder| builder.json(&body)) + .await?; + let bytes = response.bytes().await?; + serde_json::from_slice::(&bytes).map_err(ApiClientError::Decode) + } + + /// `GET /org/{org}/recording/{rec}/resumable_upload_url`. + pub async fn fetch_resumable_upload_url( + &self, + org_id: &str, + recording_id: &str, + filepath: &str, + content_type: &str, + ) -> Result { + let path = format!("/org/{org_id}/recording/{recording_id}/resumable_upload_url"); + let query = [("filepath", filepath), ("content_type", content_type)]; + let response = self + .send_with_retry(Method::GET, &path, |builder| builder.query(&query)) + .await?; + let bytes = response.bytes().await?; + let parsed: ResumableUploadUrlResponse = + serde_json::from_slice(&bytes).map_err(ApiClientError::Decode)?; + Ok(parsed.url) + } + + /// `PUT /org/{org}/recording/{rec}/traces/batch-update`. + pub async fn batch_update_traces( + &self, + org_id: &str, + recording_id: &str, + updates: &HashMap, + ) -> Result<(), ApiClientError> { + let path = format!("/org/{org_id}/recording/{recording_id}/traces/batch-update"); + #[derive(Serialize)] + struct Body<'a> { + updates: &'a HashMap, + } + let body = Body { updates }; + let _ = self + .send_with_retry(Method::PUT, &path, |builder| builder.json(&body)) + .await?; + Ok(()) + } + + /// `POST /org/{org}/recording/{rec}/traces-metadata`. + pub async fn report_progress( + &self, + org_id: &str, + recording_id: &str, + traces: &HashMap, + ) -> Result<(), ApiClientError> { + let path = format!("/org/{org_id}/recording/{recording_id}/traces-metadata"); + #[derive(Serialize)] + struct Body<'a> { + traces: &'a HashMap, + } + let body = Body { traces }; + let _ = self + .send_with_retry(Method::POST, &path, |builder| builder.json(&body)) + .await?; + Ok(()) + } + + /// `POST /org/{org}/recording/stop` with a JSON body carrying + /// `recording_id` and the producer-captured `end_time` (Unix seconds). + /// + /// `end_time` is the recording window's real upper bound captured by the + /// producer, so the backend reports the true duration even for recordings + /// notified late (e.g. after reconnecting). + pub async fn recording_stop( + &self, + org_id: &str, + recording_id: &str, + end_time: f64, + ) -> Result<(), ApiClientError> { + self.recording_lifecycle_post(org_id, "stop", recording_id, end_time) + .await + } + + /// Shared body/send for the byte-identical `/recording/stop` and + /// `/recording/cancel` POSTs β€” they differ only in the trailing URL segment + /// (`action`) and both carry `{recording_id, end_time}`. + async fn recording_lifecycle_post( + &self, + org_id: &str, + action: &str, + recording_id: &str, + end_time: f64, + ) -> Result<(), ApiClientError> { + let path = format!("/org/{org_id}/recording/{action}"); + #[derive(Serialize)] + struct Body<'a> { + recording_id: &'a str, + end_time: f64, + } + let body = Body { + recording_id, + end_time, + }; + let _ = self + .send_with_retry(Method::POST, &path, |builder| builder.json(&body)) + .await?; + Ok(()) + } + + /// `POST /org/{org}/recording/cancel` with a JSON body carrying + /// `recording_id` and `end_time` (the cancel time, Unix seconds) β€” the same + /// body shape the backend now requires for `/recording/stop`. + /// + /// Cancels the recording server-side, which also clears it as the robot + /// instance's *pending* recording so the next `/recording/start` mints a + /// fresh id instead of reusing this one. The daemon's cancel notifier makes + /// this call best-effort once it knows the cloud `recording_id`; the SDK + /// no longer calls it inline. + pub async fn recording_cancel( + &self, + org_id: &str, + recording_id: &str, + end_time: f64, + ) -> Result<(), ApiClientError> { + self.recording_lifecycle_post(org_id, "cancel", recording_id, end_time) + .await + } + + /// `POST /org/{org}/recording/start`. + /// + /// Opens a recording server-side and returns the backend-minted cloud + /// `recording_id`. Mirrors [`recording_stop`](Self::recording_stop): the + /// SDK no longer makes this call inline β€” the daemon's recording-start + /// notifier POSTs it in the background once the local recording row + /// exists, absorbing staging POST tail latency off the SDK's hot path. + /// The body carries the source identity plus the client-captured + /// `start_time` (Unix seconds) the backend requires; the response is + /// `{"id": "..."}`. + pub async fn recording_start( + &self, + org_id: &str, + robot_id: &str, + instance: i64, + dataset_id: &str, + start_time: f64, + ) -> Result { + let path = format!("/org/{org_id}/recording/start"); + #[derive(Serialize)] + struct Body<'a> { + robot_id: &'a str, + instance: i64, + dataset_id: &'a str, + start_time: f64, + } + let body = Body { + robot_id, + instance, + dataset_id, + start_time, + }; + let response = self + .send_with_retry(Method::POST, &path, |builder| builder.json(&body)) + .await?; + let bytes = response.bytes().await?; + let parsed: RecordingStartResponse = + serde_json::from_slice(&bytes).map_err(ApiClientError::Decode)?; + Ok(parsed.id) + } + + /// `PUT /org/{org}/recording/{rec}/expected-trace-count`. + /// + /// Tells the backend how many traces to expect for this recording so it + /// can promote the recording into its parent dataset once all traces are + /// uploaded. Without this the recording stays hidden from + /// `nc.get_dataset(...)` indefinitely even after every trace is uploaded. + pub async fn put_expected_trace_count( + &self, + org_id: &str, + recording_id: &str, + expected_trace_count: i64, + ) -> Result<(), ApiClientError> { + let path = format!("/org/{org_id}/recording/{recording_id}/expected-trace-count"); + #[derive(Serialize)] + struct Body { + expected_trace_count: i64, + } + let body = Body { + expected_trace_count, + }; + let _ = self + .send_with_retry(Method::PUT, &path, |builder| builder.json(&body)) + .await?; + Ok(()) + } + + /// Send a request with the daemon's standard retry policy. + /// + /// `build` is invoked on a fresh `RequestBuilder` so the body / query + /// closure is *re-evaluated* on every retry (`reqwest::RequestBuilder` + /// captures the body up front; sharing one across retries would + /// re-transmit the same buffer, which is what we want here). + async fn send_with_retry( + &self, + method: Method, + path: &str, + build: F, + ) -> Result + where + F: Fn(reqwest::RequestBuilder) -> reqwest::RequestBuilder, + { + let url = self.url(path); + let mut refreshed_auth = false; + let mut attempt: u32 = 0; + loop { + let headers = self.authorised_headers().await?; + let builder = self + .inner + .request(method.clone(), &url) + .headers(headers.clone()); + let builder = build(builder); + let request: Request = builder.build()?; + let response = match self.inner.execute(request).await { + Ok(response) => response, + Err(error) => { + // Transport-level failures β€” connection resets, DNS blips, + // TLS errors, and request timeouts β€” are the common failure + // on a flaky robot link and would otherwise bypass the + // status-code retry below entirely. Retry them within the + // same attempt budget; a non-transient error (or an + // exhausted budget) propagates. + if (error.is_timeout() || error.is_connect()) + && attempt + 1 < self.options.max_retries + { + attempt += 1; + let backoff = self.backoff(attempt); + tracing::warn!(%url, %error, attempt, "retrying after transport error"); + sleep(backoff).await; + continue; + } + return Err(error.into()); + } + }; + + let status = response.status(); + if status == StatusCode::UNAUTHORIZED && !refreshed_auth { + tracing::debug!(%url, "received 401, reloading auth token"); + self.auth.reload().await?; + refreshed_auth = true; + continue; + } + + if status.is_success() { + return Ok(response); + } + + if RETRYABLE_STATUS_CODES.contains(&status.as_u16()) + && attempt + 1 < self.options.max_retries + { + attempt += 1; + let backoff = self.backoff(attempt); + tracing::warn!( + %url, + %status, + attempt, + "retrying after retryable status" + ); + sleep(backoff).await; + continue; + } + + let body = response.text().await.unwrap_or_default(); + return Err(ApiClientError::Status { status, body }); + } + } + + /// Build the `Authorization` header map applied to every outgoing request. + /// + /// Fetches a fresh bearer token from the auth provider and formats it as + /// `Authorization: Bearer `. The token comes from user-controlled + /// config, so a value that cannot be encoded into a header byte string is + /// surfaced as [`ApiClientError::Decode`]. This is the only header set at the + /// client level β€” bodied `POST`/`PUT` calls add their own + /// `Content-Type: application/json` via the request builder. + async fn authorised_headers(&self) -> Result { + let token = self.auth.bearer_token().await?; + let mut headers = HeaderMap::new(); + let value = HeaderValue::from_str(&format!("Bearer {token}")).map_err(|_| { + // The token came from user-controlled JSON, but a value that + // cannot fit in a header byte string would mean the file is + // corrupt β€” surface it as a Decode error so it shows up in the + // tracing log alongside other parse failures. + ApiClientError::Decode(serde_json::Error::io(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "bearer token contains invalid header characters", + ))) + })?; + headers.insert(AUTHORIZATION, value); + // Only the Authorization header is shared across methods. The bodied + // POST/PUT calls set `Content-Type: application/json` themselves via + // `.json(..)`, so the bodyless GET (and HEAD) don't advertise a JSON body. + Ok(headers) + } + + fn backoff(&self, attempt: u32) -> Duration { + let secs = 2u64.saturating_pow(attempt.saturating_sub(1)); + let capped = secs.min(self.options.max_backoff.as_secs().max(1)); + // Equal jitter: a fixed half plus a random half in `[0, base/2]`. Pure + // `2^n` backoff makes every client that backed off together retry on the + // same tick, stampeding the backend the instant it recovers; spreading + // each client's wake over a window decorrelates them. (Mean wait is also + // ≀ the old fixed value.) + let base_ms = capped.saturating_mul(1000); + let half = base_ms / 2; + let jitter = if half > 0 { jitter_below(half) } else { 0 }; + Duration::from_millis(half + jitter) + } +} + +/// A pseudo-random value in `[0, upper]`, seeded from the wall clock's +/// nanosecond component. Good enough to decorrelate retry delays across +/// clients without pulling in a `rand` dependency for a non-cryptographic use. +fn jitter_below(upper: u64) -> u64 { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|elapsed| elapsed.as_nanos()) + .unwrap_or(0); + (nanos % (u128::from(upper) + 1)) as u64 +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::api::auth::StaticAuthProvider; + use std::sync::atomic::{AtomicUsize, Ordering}; + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + fn options(base_url: String) -> ApiClientOptions { + ApiClientOptions { + base_url, + timeout: Duration::from_secs(5), + max_retries: 3, + // Tighten the backoff cap so retry-tests run inside their own + // tokio time advance window without waiting real seconds. + max_backoff: Duration::from_secs(1), + } + } + + fn client(server: &MockServer) -> ApiClient { + let auth = Arc::new(StaticAuthProvider::new("test-token")); + ApiClient::new(options(server.uri()), auth).expect("client") + } + + #[tokio::test] + async fn health_check_returns_true_on_2xx() { + let server = MockServer::start().await; + Mock::given(method("HEAD")) + .and(path("/status/health")) + .respond_with(ResponseTemplate::new(200)) + .expect(1) + .mount(&server) + .await; + + let client = client(&server); + assert!(client.health_check().await.unwrap()); + } + + #[tokio::test] + async fn health_check_returns_false_on_5xx() { + let server = MockServer::start().await; + Mock::given(method("HEAD")) + .and(path("/status/health")) + .respond_with(ResponseTemplate::new(503)) + .expect(1) + .mount(&server) + .await; + + let client = client(&server); + assert!(!client.health_check().await.unwrap()); + } + + #[tokio::test] + async fn batch_register_round_trips_response() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/org/org-1/recording/traces/batch-register")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "registered_traces": [{ + "trace_id": "trace-1", + "upload_session_uris": {"rgb/cam_0/lossy.mp4": "https://upload.example/1"} + }], + "failed_traces": [] + }))) + .expect(1) + .mount(&server) + .await; + let client = client(&server); + + let traces = vec![RegisterTraceRequest { + recording_id: "rec-1".to_string(), + data_type: "RGB_IMAGES".to_string(), + trace_id: "trace-1".to_string(), + cloud_files: vec![], + }]; + let outcome = client.batch_register("org-1", &traces).await.unwrap(); + assert_eq!(outcome.registered_traces.len(), 1); + assert_eq!(outcome.registered_traces[0].trace_id, "trace-1"); + } + + #[tokio::test] + async fn retry_on_5xx_until_success() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/org/org-1/recording/traces/batch-register")) + .respond_with(ResponseTemplate::new(503)) + .up_to_n_times(2) + .mount(&server) + .await; + Mock::given(method("POST")) + .and(path("/org/org-1/recording/traces/batch-register")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "registered_traces": [], "failed_traces": [] + }))) + .expect(1) + .mount(&server) + .await; + + let client = client(&server); + let result = client.batch_register("org-1", &[]).await.unwrap(); + assert!(result.registered_traces.is_empty()); + } + + #[tokio::test] + async fn reloads_auth_on_401() { + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path("/org/org-1/recording/rec-1/resumable_upload_url")) + .respond_with(ResponseTemplate::new(401)) + .up_to_n_times(1) + .mount(&server) + .await; + Mock::given(method("GET")) + .and(path("/org/org-1/recording/rec-1/resumable_upload_url")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "url": "https://upload.example/abc" + }))) + .expect(1) + .mount(&server) + .await; + + let calls = Arc::new(AtomicUsize::new(0)); + struct CountingProvider { + calls: Arc, + } + #[async_trait::async_trait] + impl AuthProvider for CountingProvider { + async fn bearer_token(&self) -> Result { + Ok("token".to_string()) + } + async fn reload(&self) -> Result<(), AuthError> { + self.calls.fetch_add(1, Ordering::SeqCst); + Ok(()) + } + } + let auth = Arc::new(CountingProvider { + calls: Arc::clone(&calls), + }); + let client = ApiClient::new(options(server.uri()), auth).unwrap(); + let url = client + .fetch_resumable_upload_url("org-1", "rec-1", "path", "application/json") + .await + .unwrap(); + assert_eq!(url, "https://upload.example/abc"); + assert_eq!(calls.load(Ordering::SeqCst), 1); + } + + #[tokio::test] + async fn non_retryable_status_surfaces_error() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/org/org-1/recording/traces/batch-register")) + .respond_with(ResponseTemplate::new(400).set_body_string("bad request")) + .expect(1) + .mount(&server) + .await; + + let client = client(&server); + let error = client.batch_register("org-1", &[]).await.unwrap_err(); + match error { + ApiClientError::Status { status, body } => { + assert_eq!(status, StatusCode::BAD_REQUEST); + assert!(body.contains("bad request")); + } + other => panic!("unexpected error: {other:?}"), + } + } +} diff --git a/rust/data_daemon/src/api/mod.rs b/rust/data_daemon/src/api/mod.rs new file mode 100644 index 000000000..b2afb14c8 --- /dev/null +++ b/rust/data_daemon/src/api/mod.rs @@ -0,0 +1,20 @@ +//! HTTP client, auth, and request/response types for the Neuracore backend. +//! +//! Centralises the construction of a single [`ApiClient`] used by every +//! upload coordinator so the bearer header, retry policy, and timeouts are +//! configured in exactly one place. The backend endpoints are exposed as +//! methods on the client. + +pub mod auth; +pub mod client; +pub mod models; + +#[allow(unused_imports)] +pub use auth::{AuthError, AuthProvider, FileAuthProvider}; +#[allow(unused_imports)] +pub use client::{ApiClient, ApiClientError, ApiClientOptions}; +#[allow(unused_imports)] +pub use models::{ + BatchRegisterResponse, CloudFile, RegisterTraceRequest, ResumableUploadUrlResponse, + TraceStatusUpdate, TraceStatusValue, +}; diff --git a/rust/data_daemon/src/api/models.rs b/rust/data_daemon/src/api/models.rs new file mode 100644 index 000000000..d9228b6ee --- /dev/null +++ b/rust/data_daemon/src/api/models.rs @@ -0,0 +1,186 @@ +//! Request and response shapes for the Neuracore backend. +//! +//! The daemon's serde types are kept thin: only fields the daemon writes or +//! reads are modelled, so a schema change on a field the daemon ignores does +//! not break the client. + +use std::collections::BTreeMap; + +use serde::{Deserialize, Serialize}; + +/// One file the backend should expect for a trace registration request. +/// +/// Matches the body of `POST /org/{org}/recording/traces/batch-register`, +/// `traces[].cloud_files[]`. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct CloudFile { + /// Path inside the trace's cloud directory, e.g. `"video/cam_0/lossy.mp4"`. + pub filepath: String, + /// MIME type, e.g. `"video/mp4"`. + pub content_type: String, +} + +/// One trace inside a batch-register request body. +#[derive(Debug, Clone, Serialize, PartialEq, Eq)] +pub struct RegisterTraceRequest { + /// Recording the trace belongs to. + pub recording_id: String, + /// Wire data-type label (e.g. `"RGB_IMAGES"`). + pub data_type: String, + /// Trace identifier. + pub trace_id: String, + /// Files to register for this trace. + pub cloud_files: Vec, +} + +/// Backend response payload for `POST /traces/batch-register`. +#[derive(Debug, Clone, Deserialize, Default)] +pub struct BatchRegisterResponse { + /// Traces the backend accepted, with the GCS resumable session URIs the + /// daemon should PUT chunks to. + #[serde(default)] + pub registered_traces: Vec, + /// Traces the backend rejected, with a per-trace error message. + #[serde(default)] + pub failed_traces: Vec, +} + +/// One successful entry in the batch-register response. +#[derive(Debug, Clone, Deserialize)] +pub struct RegisteredTrace { + /// Trace identifier accepted by the backend. + pub trace_id: String, + /// Map of `cloud_file.filepath β†’ resumable session URI`. Optional because + /// the backend may omit the field when the trace has no upload targets + /// (e.g. a metadata-only trace). + #[serde(default)] + pub upload_session_uris: BTreeMap, +} + +/// One failed entry in the batch-register response. +#[derive(Debug, Clone, Deserialize)] +pub struct FailedTrace { + /// Trace identifier the backend rejected. + pub trace_id: String, + /// Optional human-readable error message. + #[serde(default)] + pub error: Option, +} + +/// Response for `GET /recording/{rec}/resumable_upload_url`. +#[derive(Debug, Clone, Deserialize)] +pub struct ResumableUploadUrlResponse { + /// Fresh resumable session URI. + pub url: String, +} + +/// Response for `POST /recording/start`. +#[derive(Debug, Clone, Deserialize)] +pub struct RecordingStartResponse { + /// Backend-minted cloud `recording_id`. + pub id: String, +} + +/// Status value for the per-trace batch update API. Matches the wire enum +/// `RecordingDataTraceStatus` in `neuracore_types`. +#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)] +pub enum TraceStatusValue { + /// Trace has been queued for upload. + #[serde(rename = "QUEUED")] + Queued, + /// Upload of this trace has started. + #[serde(rename = "UPLOAD_STARTED")] + UploadStarted, + /// Upload of this trace has completed. + #[serde(rename = "UPLOAD_COMPLETE")] + UploadComplete, +} + +/// One per-trace update inside a batch-update request body. +/// +/// Fields are `Option`-wrapped and skipped when null so the wire body only +/// carries fields the caller actually wants to change. +#[derive(Debug, Clone, Serialize, Default)] +pub struct TraceStatusUpdate { + /// Lifecycle status. + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, + /// Bytes uploaded so far. + #[serde(skip_serializing_if = "Option::is_none")] + pub uploaded_bytes: Option, + /// Total bytes once finalised. + #[serde(skip_serializing_if = "Option::is_none")] + pub total_bytes: Option, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn batch_register_body_matches_python_layout() { + let request = RegisterTraceRequest { + recording_id: "rec-1".to_string(), + data_type: "RGB_IMAGES".to_string(), + trace_id: "trace-1".to_string(), + cloud_files: vec![CloudFile { + filepath: "rgb/cam_0/lossy.mp4".to_string(), + content_type: "video/mp4".to_string(), + }], + }; + let body = serde_json::to_value(serde_json::json!({"traces": [request]})).unwrap(); + assert_eq!( + body, + serde_json::json!({ + "traces": [{ + "recording_id": "rec-1", + "data_type": "RGB_IMAGES", + "trace_id": "trace-1", + "cloud_files": [{ + "filepath": "rgb/cam_0/lossy.mp4", + "content_type": "video/mp4" + }] + }] + }) + ); + } + + #[test] + fn trace_status_update_strips_unset_fields() { + let update = TraceStatusUpdate { + status: Some(TraceStatusValue::UploadComplete), + uploaded_bytes: Some(42), + total_bytes: None, + }; + let json = serde_json::to_value(&update).unwrap(); + assert_eq!( + json, + serde_json::json!({"status": "UPLOAD_COMPLETE", "uploaded_bytes": 42}) + ); + } + + #[test] + fn batch_register_response_round_trips() { + let body = serde_json::json!({ + "registered_traces": [{ + "trace_id": "trace-1", + "upload_session_uris": {"rgb/cam_0/lossy.mp4": "https://upload.example/1"} + }], + "failed_traces": [{ + "trace_id": "trace-2", + "error": "bad cloud file" + }] + }); + let response: BatchRegisterResponse = serde_json::from_value(body).unwrap(); + assert_eq!(response.registered_traces.len(), 1); + assert_eq!(response.registered_traces[0].trace_id, "trace-1"); + assert_eq!( + response.registered_traces[0] + .upload_session_uris + .get("rgb/cam_0/lossy.mp4") + .map(String::as_str), + Some("https://upload.example/1") + ); + assert_eq!(response.failed_traces[0].trace_id, "trace-2"); + } +} diff --git a/rust/data_daemon/src/cli/coordinators.rs b/rust/data_daemon/src/cli/coordinators.rs new file mode 100644 index 000000000..ffd6b9fa5 --- /dev/null +++ b/rust/data_daemon/src/cli/coordinators.rs @@ -0,0 +1,154 @@ +//! Cloud-coordinator wiring for the daemon main loop. +//! +//! Builds the shared `ApiClient` and spawns the cloud-side coordinators +//! (registration, upload, status, progress, recording notifiers, connection +//! monitor, and org watcher), bundling their handles so `cli::launch` can join +//! them in a defined order at shutdown. + +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use anyhow::{Context, Result}; + +use crate::api::auth::FileAuthProvider; +use crate::api::client::{ApiClient, ApiClientOptions}; +use crate::cloud::{ + spawn_org_watcher, spawn_progress_reporter, spawn_recording_cancel_notifier, + spawn_recording_start_notifier, spawn_recording_stop_notifier, spawn_registration, + spawn_status_updater, spawn_uploader, OrgWatcherHandle, StatusUpdate, +}; +use crate::connection::spawn_connection_monitor; +use crate::state::{EventBus, SqliteStateStore, TraceWriteHandle}; + +/// Bundle of handles for the cloud coordinators. +pub(crate) struct CloudHandles { + connection: crate::connection::MonitorHandle, + org_watcher: OrgWatcherHandle, + registration: crate::cloud::RegistrationHandle, + uploader: crate::cloud::UploaderHandle, + status: crate::cloud::StatusUpdaterHandle, + progress: crate::cloud::ProgressReporterHandle, + recording_start: crate::cloud::NotifierHandle, + recording_stop: crate::cloud::NotifierHandle, + recording_cancel: crate::cloud::NotifierHandle, +} + +impl CloudHandles { + pub(crate) async fn join_all(self) { + // Connection monitor drops first because its tick is bounded by the + // health-check interval; the others have either bus subscriptions + // or pending requests that may need a moment to wrap up after the + // shutdown signal fires. + self.connection.join().await; + self.org_watcher.join().await; + self.registration.join().await; + self.uploader.join().await; + self.status.join().await; + self.progress.join().await; + self.recording_start.join().await; + self.recording_stop.join().await; + self.recording_cancel.join().await; + } +} + +pub(crate) fn build_api_client(api_url: &str, config_path: &Path) -> Result> { + let auth = Arc::new( + FileAuthProvider::new(config_path, api_url.to_string()) + .context("failed to construct auth provider")?, + ); + let options = ApiClientOptions::new(api_url.to_string()); + let client = ApiClient::new(options, auth).context("failed to build api client")?; + Ok(Arc::new(client)) +} + +#[allow(clippy::too_many_arguments)] +pub(crate) fn spawn_cloud_coordinators( + state_store: SqliteStateStore, + trace_writer: TraceWriteHandle, + event_bus: EventBus, + client: Arc, + recordings_root: Arc, + config_path: PathBuf, + fallback_org_id: Option, + shutdown_tx: crate::lifecycle::shutdown::ShutdownBroadcaster, +) -> CloudHandles { + let (status_tx, status_rx) = tokio::sync::mpsc::unbounded_channel::(); + // Watch the SDK config for the current org; every coordinator reads the + // live value at the moment it POSTs rather than a value frozen onto the + // recording row at creation time. + let (org_rx, org_watcher) = + spawn_org_watcher(config_path, fallback_org_id, shutdown_tx.subscribe()); + let connection = spawn_connection_monitor( + Arc::clone(&client), + event_bus.clone(), + shutdown_tx.subscribe(), + ); + let registration = spawn_registration( + state_store.clone(), + event_bus.clone(), + Arc::clone(&client), + org_rx.clone(), + shutdown_tx.subscribe(), + ); + let uploader = spawn_uploader( + state_store.clone(), + trace_writer, + event_bus.clone(), + Arc::clone(&client), + Arc::clone(&recordings_root), + org_rx.clone(), + status_tx.clone(), + shutdown_tx.subscribe(), + ); + // Drop the local sender so the only remaining sender is the uploader; once + // the uploader exits the inbox closes, which (alongside the shutdown + // broadcast) lets the status task exit cleanly without a dangling sender + // keeping the channel open. + drop(status_tx); + let status = spawn_status_updater( + state_store.clone(), + Arc::clone(&client), + org_rx.clone(), + status_rx, + shutdown_tx.subscribe(), + ); + let progress = spawn_progress_reporter( + state_store.clone(), + Arc::clone(&client), + org_rx.clone(), + shutdown_tx.subscribe(), + ); + let recording_start = spawn_recording_start_notifier( + state_store.clone(), + event_bus.clone(), + Arc::clone(&client), + org_rx.clone(), + shutdown_tx.subscribe(), + ); + let recording_stop = spawn_recording_stop_notifier( + state_store.clone(), + event_bus.clone(), + Arc::clone(&client), + org_rx.clone(), + shutdown_tx.subscribe(), + ); + let recording_cancel = spawn_recording_cancel_notifier( + state_store, + event_bus, + Arc::clone(&client), + org_rx, + shutdown_tx.subscribe(), + ); + + CloudHandles { + connection, + org_watcher, + registration, + uploader, + status, + progress, + recording_start, + recording_stop, + recording_cancel, + } +} diff --git a/rust/data_daemon/src/cli/launch.rs b/rust/data_daemon/src/cli/launch.rs new file mode 100644 index 000000000..e6956d527 --- /dev/null +++ b/rust/data_daemon/src/cli/launch.rs @@ -0,0 +1,524 @@ +//! `launch` subcommand handler. +//! +//! Resolves configuration, then wires the daemon lifecycle: the PID-file +//! single-instance lock, signal handling, and optional background +//! detachment. The daemon main loop brings up the iceoryx2 IPC listener, the +//! per-trace pipeline, and the cloud coordinators, then waits for +//! SIGTERM/SIGINT to broadcast a graceful shutdown. + +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::Duration; + +use anyhow::{Context, Result}; + +use crate::cli::coordinators::{build_api_client, spawn_cloud_coordinators}; +use crate::cli::launch_logging::{init_tracing, log_path_for, report_failure}; +use crate::cloud::{read_org_id_from_config, spawn_recording_reaper}; +use crate::config::env::RuntimeEnv; +use crate::config::profile::{ProfileError, ProfileManager}; +use crate::config::{resolve_effective_config, DaemonConfig, DEFAULT_PROFILE_NAME}; +use crate::connection::spawn_wakelock; +use crate::encoding::video_encoder::VideoEncoder; +use crate::ipc::listener; +use crate::ipc::node::IpcTransport; +use crate::lifecycle::daemonize::{daemonize, DaemonizeOutcome, Readiness, ReadinessReporter}; +use crate::lifecycle::pidfile::{PidFile, PidFileError}; +use crate::lifecycle::recovery::{cleanup_stale_ipc, reclaim_stale_pid_file, PidReclaim}; +use crate::lifecycle::shutdown::{install_shutdown_handler, ShutdownSignal}; +use crate::pipeline::dispatcher::{self, DispatcherContext}; +use crate::pipeline::trace_actor::TraceActorContext; +use crate::state::{EventBus, SqliteStateStore}; +use crate::storage::budget::{StorageBudget, StoragePolicy}; + +/// Upper bound on how long we wait for the signal-capture task after the +/// listener returns. In normal operation it has already completed; the +/// timeout exists so a future bug that lets the listener exit without a +/// shutdown signal degrades to a `?signal=sigterm` log rather than a hang. +const SIGNAL_CAPTURE_TIMEOUT: Duration = Duration::from_secs(1); + +/// Run the launch command. +pub fn run(profile: Option, background: bool, debug: bool) -> Result<()> { + let runtime_env = RuntimeEnv::from_env(); + let profiles = ProfileManager::new(); + + // Ensure the default profile exists before resolving config. A missing + // *named* profile needs no separate existence pre-check: the + // `resolve_effective_config` call below surfaces it as + // `ProfileError::NotFound`, handled in the same match arm. + if let Err(error) = ensure_default_profile_exists(&profiles) { + eprintln!("Failed to create default profile '{DEFAULT_PROFILE_NAME}': {error}"); + std::process::exit(1); + } + + let selected_profile = profile + .or_else(|| runtime_env.profile.clone()) + .unwrap_or_else(|| DEFAULT_PROFILE_NAME.to_string()); + + let config = match resolve_effective_config(&profiles, Some(&selected_profile), None) { + Ok(config) => config, + Err(error) => { + eprintln!("{error}"); + std::process::exit(1); + } + }; + + let effective_debug = debug || runtime_env.debug; + + if background { + // Tracing is initialised inside `run_daemon` *after* the fork+stream + // redirect so the subscriber writes to a real destination instead of + // /dev/null. The original parent doesn't need tracing β€” it only + // prints status. + let log_path = log_path_for(&runtime_env); + match daemonize().context("failed to daemonize")? { + DaemonizeOutcome::Parent(reader) => handle_parent_readiness(reader), + DaemonizeOutcome::Child(reporter) => run_daemon( + runtime_env, + config, + effective_debug, + Some(reporter), + Some(log_path), + ), + } + } else { + print_preflight(&runtime_env, &config, &selected_profile); + run_daemon(runtime_env, config, effective_debug, None, None) + } +} + +/// Handle the original-caller branch of `daemonize`: block on the readiness +/// pipe, then propagate the grandchild's startup status to the user's shell. +fn handle_parent_readiness(reader: crate::lifecycle::daemonize::ReadinessReader) -> Result<()> { + match reader.read().context("failed to read daemon readiness")? { + Readiness::Ready(pid) => { + println!("{pid}"); + Ok(()) + } + Readiness::Failed(message) => { + eprintln!("{message}"); + std::process::exit(1); + } + Readiness::Disconnected => { + eprintln!("Daemon failed to start (no status reported)"); + std::process::exit(1); + } + } +} + +/// Run the daemon main loop until a shutdown signal arrives. +/// +/// `reporter` is `Some` in background mode and must receive a single ready or +/// fail message before the original caller unblocks. `log_file` is `Some` in +/// background mode and points at the file the grandchild routes tracing to, +/// because its stderr has already been redirected to /dev/null. +fn run_daemon( + runtime_env: RuntimeEnv, + config: DaemonConfig, + debug: bool, + reporter: Option, + log_file: Option, +) -> Result<()> { + if let Err(error) = init_tracing(debug, log_file.as_deref()) { + let message = format!("failed to initialise logging: {error}"); + report_failure(reporter, &message); + return Err(error.context("failed to initialise logging")); + } + if debug { + tracing::debug!(?config, "effective configuration resolved"); + } + + // Sweep a stale PID file *before* acquire so the next `status` command (or + // diagnostics that read the file without taking the flock) doesn't report + // a misleading "running" against a dead PID in the window between SIGKILL + // and the new daemon's PID being written. `PidFile::acquire` would itself + // recover via flock + truncate even without this β€” but doing it eagerly + // keeps the on-disk PID file consistent for everyone, not just the + // launcher. + match reclaim_stale_pid_file(&runtime_env.pid_path) { + Ok(PidReclaim::RemovedStale(prev)) => { + tracing::info!(previous_pid = ?prev, "removed stale pid file from prior unclean exit"); + } + Ok(PidReclaim::StillRunning(_) | PidReclaim::Absent) => {} + Err(error) => { + // Non-fatal: `PidFile::acquire` below still recovers via flock + + // truncate. But a failure here (e.g. a permissions problem on the + // pid dir) is worth surfacing rather than silently discarding. + tracing::warn!( + %error, + path = %runtime_env.pid_path.display(), + "failed to reclaim stale pid file at startup; relying on acquire's flock recovery" + ); + } + } + let cleaned = cleanup_stale_ipc(); + if cleaned > 0 { + tracing::info!(count = cleaned, "cleaned stale ipc artefacts"); + } + + // Acquire the single-instance PID file *before* starting the Tokio runtime + // so a duplicate-launch error is reported promptly and doesn't leak a + // half-built runtime. + let pid_file = match PidFile::acquire(&runtime_env.pid_path) { + Ok(pid_file) => pid_file, + Err(PidFileError::AlreadyRunning(pid)) => { + let message = format!("Daemon already running (pid={pid})"); + tracing::error!("{message}"); + report_failure(reporter, &message); + std::process::exit(1); + } + Err(PidFileError::Io(error)) => { + let context = format!("failed to acquire {}", runtime_env.pid_path.display()); + tracing::error!(%error, "{context}"); + report_failure(reporter, &format!("{context}: {error}")); + return Err(anyhow::Error::from(error).context(context)); + } + }; + + // Verify ffmpeg is present and supports the options the encoder depends on + // *before* standing up the runtime β€” an incompatible build (e.g. one that + // lacks `-vsync passthrough`, or is missing libx264) otherwise fails every + // video encode silently at recording time, marking traces `failed`. Mirrors + // the fail-fast PID-file acquisition above. Reused as the pipeline's encoder + // so the probe and the real encodes share one configured binary. + let video_encoder = VideoEncoder::new(); + match video_encoder.preflight() { + Ok(version) => tracing::info!(ffmpeg_version = %version, "ffmpeg preflight passed"), + Err(error) => { + let message = format!("ffmpeg preflight failed: {error}"); + tracing::error!("{message}"); + report_failure(reporter, &message); + return Err(anyhow::Error::new(error).context("ffmpeg preflight failed")); + } + } + + let mut runtime_builder = tokio::runtime::Builder::new_multi_thread(); + runtime_builder.enable_all(); + // Honour the configured worker-thread count (`NCD_NUM_THREADS` / the YAML + // `num_threads` / `--num-threads`); a non-positive value falls back to + // tokio's default (one worker per core). + match config.num_threads { + Some(threads) if threads > 0 => { + runtime_builder.worker_threads(threads as usize); + } + Some(threads) => { + tracing::warn!( + num_threads = threads, + "ignoring non-positive num_threads; using default" + ); + } + None => {} + } + let runtime = match runtime_builder.build() { + Ok(runtime) => runtime, + Err(error) => { + let message = format!("failed to build tokio runtime: {error}"); + tracing::error!("{message}"); + report_failure(reporter, &message); + return Err(anyhow::Error::from(error).context("failed to build tokio runtime")); + } + }; + + // Signal readiness to the launcher *before* the main loop blocks so the + // user's shell prompt returns as soon as the daemon is actually up. + if let Some(reporter) = reporter { + if let Err(error) = reporter.ready(std::process::id()) { + tracing::warn!(%error, "failed to report readiness to launcher (continuing)"); + } + } + + let db_path = runtime_env.db_path.clone(); + let recordings_root = runtime_env.recordings_root.clone(); + // The recordings root is shared with the producer, which lives in a + // *separate* process and resolves it from `NEURACORE_DAEMON_RECORDINGS_ROOT` + // (or the db-dir sibling) β€” it never reads the daemon profile. So a + // profile `path_to_store_record` that disagrees with the effective root + // cannot be silently honoured here without stranding the producer's spooled + // video under a path the daemon never scans. Surface the mismatch loudly + // instead and point the operator at the knob that actually coordinates both + // processes. + if let Some(configured) = config + .path_to_store_record + .as_deref() + .filter(|value| !value.is_empty()) + { + if Path::new(configured) != recordings_root { + tracing::warn!( + configured, + effective = %recordings_root.display(), + "profile `path_to_store_record` is ignored; the recordings root is set by \ + NEURACORE_DAEMON_RECORDINGS_ROOT (read by both daemon and producer). \ + Set that env var to relocate recordings." + ); + } + } + let storage_policy = StoragePolicy { + storage_limit_bytes: config + .storage_limit + .and_then(|value| u64::try_from(value).ok()), + ..StoragePolicy::default() + }; + let api_url = runtime_env.api_url.clone(); + let config_for_runtime = config; + let outcome = runtime.block_on(async move { + let state_store = SqliteStateStore::open(&db_path) + .await + .with_context(|| format!("failed to open state store at {}", db_path.display()))?; + tracing::info!(path = %db_path.display(), "state store ready"); + crate::lifecycle::recovery::run_startup_sweeps(&state_store, &recordings_root).await; + let storage_budget = Arc::new(StorageBudget::new(&recordings_root, storage_policy)); + // Reconcile the storage budget (directory scan + `statvfs`) on a + // background interval instead of on the trace actors' per-frame + // `check` path: a raw `statvfs` on the shared spool periodically blocks + // for hundreds of ms behind an ext4 journal commit, and at the frame + // rate that stall would back-pressure the whole dispatcher β†’ IPC drain. + // `spawn_blocking` keeps the scan/syscall off the async runtime threads. + { + let refresh_budget = storage_budget.clone(); + let refresh_interval = refresh_budget.policy().refresh_interval; + if !refresh_interval.is_zero() { + tokio::spawn(async move { + loop { + tokio::time::sleep(refresh_interval).await; + let budget = refresh_budget.clone(); + if tokio::task::spawn_blocking(move || budget.refresh()) + .await + .is_err() + { + break; + } + } + }); + } + } + let event_bus = EventBus::new(); + // Write-behind for the per-trace actors' high-frequency progress / + // status / finalise writes: coalesced per trace and flushed in batches + // off the actors' hot path so they never contend on the store's single + // write mutex (see `state::trace_event_database_writer`). Drained + flushed at shutdown + // below, after the dispatcher (and so every actor) has stopped. + let (trace_write_handle, trace_writer) = + crate::state::trace_event_database_writer::spawn(Arc::new(state_store.clone())); + // Write-behind for the per-trace `trace.json` appends: the blocking JSON + // `write()` periodically stalls behind an ext4 journal commit on the + // shared spool, so running it on the actor's hot path back-pressures the + // dispatcher and IPC listener and spikes producer `log_*` latency. The + // dedicated thread keeps that disk I/O off the drain path (see + // `pipeline::json_writer`). The join handle is dropped β€” the thread exits + // once the dispatcher and every actor (the last `JsonWriteHandle` holders) + // are gone at shutdown. + let (json_write_handle, _json_writer_owner) = crate::pipeline::json_writer::spawn(); + let actor_context = Arc::new( + TraceActorContext::new( + recordings_root.clone(), + storage_budget, + video_encoder, + trace_write_handle.clone(), + json_write_handle, + ) + .with_event_bus(event_bus.clone()), + ); + + // Run the wait loop in a nested block so the state store can be + // closed in both the success and error paths before the runtime + // tears connections down. + let result: Result = async { + let (shutdown_tx, shutdown_rx) = + install_shutdown_handler().context("failed to install shutdown handlers")?; + + // Bring up iceoryx2 *before* the dispatcher: a failure here is + // user-visible (the daemon can't accept IPC at all) and must + // unwind cleanly through the same path as a normal shutdown. + let transport = + IpcTransport::bring_up().context("failed to bring up iceoryx2 transport")?; + + // Resolve the initial org_id from the local SDK-managed config, + // falling back to the daemon profile (NCD_CURRENT_ORG_ID or the + // YAML profile override). This is only the seed value: the cloud + // coordinators spawn a watcher over `config_path` and read the + // *current* org live, so an org selected after launch is picked up + // without restarting the daemon. The profile override remains the + // documented escape hatch for tests (the file watcher's fallback). + let config_path = dirs::home_dir() + .map(|home| home.join(".neuracore").join("config.json")) + .unwrap_or_else(|| std::path::PathBuf::from(".neuracore/config.json")); + let org_id = read_org_id_from_config(&config_path) + .or_else(|| config_for_runtime.current_org_id.clone()); + + // Spawn the cloud-side coordinators *before* the dispatcher so + // they have an active subscription to the event bus by the time + // any `TraceWritten` / `RecordingStopped` fires. A late + // subscriber sees no replay (broadcast channels don't replay), + // so a coordinator that races behind a fast end-to-end trace + // would otherwise miss its trigger event and have to wait for + // the next periodic tick. Order is also load-bearing for + // ordered shutdown: dropping the dispatcher first guarantees + // no new `TraceWritten` lands while the coordinators drain. + let cloud_handles = if config_for_runtime.offline.unwrap_or(false) { + tracing::info!("offline mode β€” skipping cloud coordinator spawn"); + None + } else { + match build_api_client(&api_url, &config_path) { + Ok(api_client) => Some(spawn_cloud_coordinators( + state_store.clone(), + trace_write_handle.clone(), + event_bus.clone(), + api_client, + Arc::new(recordings_root.clone()), + config_path.clone(), + org_id.clone(), + shutdown_tx.clone(), + )), + Err(error) => { + tracing::warn!(%error, "failed to build API client; cloud uploads disabled"); + None + } + } + }; + + // Hold a wakelock while at least one trace is queued + // for upload. Spawned regardless of `offline` so a profile + // configured "online but flaky network" still keeps the host + // awake when traces queue up locally; the wakelock task does + // nothing on hosts without `systemd-inhibit`. + let wakelock_handle = config_for_runtime + .keep_wakelock_while_upload + .unwrap_or(false) + .then(|| { + tracing::info!("wakelock-while-upload enabled"); + spawn_wakelock(event_bus.clone(), shutdown_tx.subscribe()) + }); + + let dispatcher_context = DispatcherContext { + event_bus: Some(event_bus.clone()), + }; + let (dispatcher_tx, dispatcher_handle) = dispatcher::spawn_with_context( + state_store.clone(), + Arc::clone(&actor_context), + dispatcher_context, + shutdown_tx.subscribe(), + ); + + // Reclaim fully-uploaded recordings' files + rows. Spawned + // regardless of `offline` so a daemon restarted offline still + // reaps recordings that completed in a prior online session; it + // only ever acts on recordings the backend already holds in full. + let reaper_handle = spawn_recording_reaper( + state_store.clone(), + Arc::new(recordings_root.clone()), + shutdown_tx.subscribe(), + ); + + // Capture the actual shutdown signal in a spawned task so we + // can log which signal triggered the exit *after* the listener + // returns. The listener itself cannot be `tokio::spawn`'d β€” + // iceoryx2 subscriber ports are `!Send` β€” so we run it inline + // and let the dispatcher + signal-capture tasks ride the + // multi-thread runtime in parallel. + let mut signal_rx = shutdown_tx.subscribe(); + let signal_task = tokio::spawn(async move { + signal_rx + .recv() + .await + .ok() + .unwrap_or(ShutdownSignal::Sigterm) + }); + // Drain the primary handler-installed receiver so it doesn't + // accumulate broadcasts behind our back; we no longer need it. + drop(shutdown_rx); + + tracing::info!(?org_id, "daemon ready; awaiting shutdown signal"); + listener::run( + transport, + dispatcher_tx.clone(), + Arc::new(state_store.clone()), + shutdown_tx.subscribe(), + ) + .await; + + // Ordered shutdown β€” by the time `listener::run` has returned + // the iceoryx2 node has already been dropped (it lived inside + // the listener task's frame): + // 1. drop our local dispatcher sender so the dispatcher + // inbox closes, + // 2. dispatcher drains, clears the routing map, and the + // per-trace actors observe EOF and exit, + // 3. wait for the cloud coordinators to finish their + // in-flight requests, + // 4. read the captured shutdown signal for the log line. + drop(dispatcher_tx); + dispatcher_handle.shutdown().await; + // Every per-trace actor has now exited, so no further trace writes + // can be produced. Drain + flush the write-behind's final batch + // before the store closes so finalise/failed states are durable. + trace_writer.shutdown().await; + reaper_handle.join().await; + if let Some(handles) = cloud_handles { + handles.join_all().await; + } + if let Some(handle) = wakelock_handle { + handle.join().await; + } + // In normal operation the listener returns *because* a shutdown + // signal fired, so `signal_task` is already complete. Bound the + // wait so a future code path that lets the listener exit + // independently (panic, dispatcher dropped) can't hang the + // daemon's exit on a signal that never arrives. + let signal = match tokio::time::timeout(SIGNAL_CAPTURE_TIMEOUT, signal_task).await { + Ok(Ok(captured)) => captured, + Ok(Err(error)) => { + tracing::warn!(?error, "signal-capture task join failed"); + ShutdownSignal::Sigterm + } + Err(_) => { + tracing::debug!( + "listener exited without a shutdown signal; defaulting to sigterm" + ); + ShutdownSignal::Sigterm + } + }; + + Ok(signal) + } + .await; + + state_store.close().await; + result + }); + + drop(pid_file); + runtime.shutdown_background(); + + match outcome { + Ok(signal) => tracing::info!(?signal, "daemon stopped"), + Err(error) => { + tracing::error!(%error, "daemon main loop returned error"); + return Err(error); + } + } + + Ok(()) +} + +fn ensure_default_profile_exists(profiles: &ProfileManager) -> Result<(), ProfileError> { + match profiles.create_profile(DEFAULT_PROFILE_NAME) { + Ok(()) | Err(ProfileError::AlreadyExists(_)) => Ok(()), + Err(error) => Err(error), + } +} + +fn print_preflight(runtime_env: &RuntimeEnv, config: &DaemonConfig, selected_profile: &str) { + let offline = config.offline.unwrap_or(false); + + // Only the foreground launch path prints the preflight. + println!("Daemon launch prepared."); + println!(" profile: {selected_profile}"); + println!(" offline: {offline}"); + println!(" pid file: {}", runtime_env.pid_path.display()); + println!(" database: {}", runtime_env.db_path.display()); + println!( + " recordings root: {}", + runtime_env.recordings_root.display() + ); + println!(" api url: {}", runtime_env.api_url); +} diff --git a/rust/data_daemon/src/cli/launch_logging.rs b/rust/data_daemon/src/cli/launch_logging.rs new file mode 100644 index 000000000..8034486da --- /dev/null +++ b/rust/data_daemon/src/cli/launch_logging.rs @@ -0,0 +1,84 @@ +//! Logging and readiness-reporting helpers for the `launch` subcommand. +//! +//! Resolves the background-mode log destination, configures +//! `tracing-subscriber`, and reports startup failures either to the launcher's +//! readiness pipe or stderr. + +use std::fs::OpenOptions; +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result}; + +use crate::config::env::RuntimeEnv; +use crate::lifecycle::daemonize::ReadinessReporter; + +pub(crate) fn report_failure(reporter: Option, message: &str) { + if let Some(reporter) = reporter { + let _ = reporter.fail(message); + } else { + eprintln!("{message}"); + } +} + +/// Resolve the log-file destination for background mode. +/// +/// Defaults to a `daemon.log` sibling of the state database, which is itself +/// configurable via `NEURACORE_DAEMON_DB_PATH`. If the DB path is relative or +/// has no parent (e.g. a user override like `state.db`), falls back to +/// `~/.neuracore/data_daemon/daemon.log` rather than the launcher's CWD β€” +/// `daemonize` `chdir("/")`s the grandchild, so a relative log path would +/// otherwise land at the filesystem root. +pub(crate) fn log_path_for(runtime_env: &RuntimeEnv) -> PathBuf { + let candidate = runtime_env + .db_path + .parent() + .map(|parent| parent.join("daemon.log")); + if let Some(path) = candidate { + if path.is_absolute() { + return path; + } + } + if let Some(home) = dirs::home_dir() { + return home + .join(".neuracore") + .join("data_daemon") + .join("daemon.log"); + } + PathBuf::from("/tmp/neuracore-data-daemon.log") +} + +/// Configure `tracing-subscriber` from `RUST_LOG` / `NDD_DEBUG`. +/// +/// In background mode the caller passes `Some(log_path)`; otherwise tracing +/// writes to stderr. `try_init` is used to tolerate test harnesses that have +/// already installed a global subscriber. +pub(crate) fn init_tracing(debug: bool, log_file: Option<&Path>) -> Result<()> { + let default_level = if debug { "debug" } else { "warn" }; + let filter = tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_level)); + + let builder = tracing_subscriber::fmt() + .with_env_filter(filter) + .with_target(false); + + if let Some(path) = log_file { + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent) + .with_context(|| format!("failed to create log directory {}", parent.display()))?; + } + let file = OpenOptions::new() + .create(true) + .append(true) + .open(path) + .with_context(|| format!("failed to open log file {}", path.display()))?; + let _ = builder + .with_writer(std::sync::Mutex::new(file)) + .with_ansi(false) + .try_init(); + } else { + // Write to stderr so the parent's stdout=DEVNULL plumbing in + // background mode does not silently swallow structured log output. + let _ = builder.with_writer(std::io::stderr).try_init(); + } + Ok(()) +} diff --git a/rust/data_daemon/src/cli/mod.rs b/rust/data_daemon/src/cli/mod.rs new file mode 100644 index 000000000..1af77e154 --- /dev/null +++ b/rust/data_daemon/src/cli/mod.rs @@ -0,0 +1,169 @@ +//! Command-line interface. +//! +//! The `clap` command tree β€” commands, flag names, aliases, and help strings +//! β€” is part of the daemon's public contract: `python -m neuracore.data_daemon +//! ` execs this binary, so the surface here must stay stable. + +mod coordinators; +mod launch; +mod launch_logging; +mod profile; +mod reset; +mod status; +mod stop; + +use anyhow::Result; +use clap::builder::styling::{AnsiColor, Styles}; +use clap::{Parser, Subcommand}; + +use crate::config::env::parse_bytes; + +/// Colour scheme for `--help` output: green section headers/usage and cyan +/// literals so commands and flags stand out from prose. +const HELP_STYLES: Styles = Styles::styled() + .header(AnsiColor::Green.on_default().bold()) + .usage(AnsiColor::Green.on_default().bold()) + .literal(AnsiColor::Cyan.on_default().bold()) + .placeholder(AnsiColor::Cyan.on_default()); + +/// Neuracore Data Daemon CLI. +#[derive(Parser)] +#[command( + name = "data-daemon", + about = "Neuracore Data Daemon CLI.", + disable_help_subcommand = true, + styles = HELP_STYLES +)] +struct Cli { + #[command(subcommand)] + command: Command, +} + +#[derive(Subcommand)] +enum Command { + /// Launch the data daemon. + Launch { + /// Profile name to launch (from ~/.neuracore/data_daemon/profiles). + #[arg(long)] + profile: Option, + /// Run the daemon in the background without terminal output. + #[arg(long)] + background: bool, + /// Enable debug mode. + #[arg(long)] + debug: bool, + }, + /// Stop the data daemon. + Stop, + /// Remove all daemon state: recordings, database, and iceoryx2/shared-memory artefacts. + Reset { + /// Skip the confirmation prompt β€” required for non-interactive use. + #[arg(long, short = 'y', visible_alias = "force")] + yes: bool, + }, + /// Show daemon status. + Status, + /// Install the data daemon as a system service. + Install, + /// Uninstall the data daemon system service. + Uninstall, + /// Manage daemon profiles. + Profile { + #[command(subcommand)] + command: ProfileCommand, + }, +} + +#[derive(Subcommand)] +enum ProfileCommand { + /// Create a profile. + Create { + /// Profile name. + name: String, + }, + /// Update an existing profile. + Update { + /// Profile name to update. + name: Option, + /// Storage limit in bytes. + #[arg(long = "storage-limit", visible_alias = "storage_limit", value_parser = parse_bytes)] + storage_limit: Option, + /// Bandwidth limit in bytes per second. + #[arg(long = "bandwidth-limit", visible_alias = "bandwidth_limit", value_parser = parse_bytes)] + bandwidth_limit: Option, + /// Producer video spool-backlog cap in bytes (0 disables the bound). + #[arg(long = "spool-limit", visible_alias = "spool_limit", value_parser = parse_bytes)] + spool_limit: Option, + /// Path where records should be stored. + #[arg( + long = "storage-path", + visible_aliases = ["storage_path", "path_to_store_record"] + )] + storage_path: Option, + /// Number of worker threads. + #[arg(long = "num-threads", visible_alias = "num_threads")] + num_threads: Option, + /// Keep a wakelock while uploading. + #[arg(long = "wakelock", overrides_with = "no_wakelock")] + wakelock: bool, + /// Do not keep a wakelock while uploading. + #[arg(long = "no-wakelock", overrides_with = "wakelock")] + no_wakelock: bool, + /// Run in offline mode. + #[arg(long = "offline", overrides_with = "online")] + offline: bool, + /// Run in online mode. + #[arg(long = "online", overrides_with = "offline")] + online: bool, + /// API key used for authenticating the daemon. + #[arg(long = "api-key", visible_alias = "api_key")] + api_key: Option, + /// Active organisation ID for scoping daemon operations. + #[arg(long = "current-org-id", visible_alias = "current_org_id")] + current_org_id: Option, + }, + /// Get a profile's configuration. + Get { + /// Profile name to get. + name: Option, + }, + /// Delete a profile. + Delete { + /// Profile name to delete. + name: String, + }, + /// List all configured daemon profiles. + List, +} + +/// Parse the process arguments and dispatch to the matching command handler. +/// +/// Each handler is responsible for spinning up its own Tokio runtime when +/// needed; this keeps `launch --background` able to `fork` before the +/// multi-threaded runtime spawns worker threads (post-fork-with-threads is UB +/// on most libcs). +pub fn run() -> Result<()> { + let cli = Cli::parse(); + match cli.command { + Command::Launch { + profile, + background, + debug, + } => launch::run(profile, background, debug), + Command::Stop => stop::run(), + Command::Reset { yes } => reset::run(yes), + Command::Status => status::run(), + Command::Install => { + println!("Install command is not implemented yet."); + Ok(()) + } + Command::Uninstall => { + println!("Uninstall command is not implemented yet."); + Ok(()) + } + Command::Profile { command } => { + profile::run(command); + Ok(()) + } + } +} diff --git a/rust/data_daemon/src/cli/profile.rs b/rust/data_daemon/src/cli/profile.rs new file mode 100644 index 000000000..62d776c22 --- /dev/null +++ b/rust/data_daemon/src/cli/profile.rs @@ -0,0 +1,127 @@ +//! `profile` subcommand handlers. +//! +//! Mirrors `config_manager/args_handler.py`: the `run_profile_*` functions, +//! including their exact stdout/stderr messages and exit codes. + +use super::ProfileCommand; +use crate::config::profile::ProfileManager; +use crate::config::{DaemonConfig, DEFAULT_PROFILE_NAME}; + +/// Print `message` to stderr and exit with status 1, mirroring +/// `typer.echo(..., err=True)` followed by `raise typer.Exit(code=1)`. +fn fail(message: impl std::fmt::Display) -> ! { + eprintln!("{message}"); + std::process::exit(1); +} + +/// Dispatch a `profile` subcommand. +pub fn run(command: ProfileCommand) { + let profiles = ProfileManager::new(); + match command { + ProfileCommand::Create { name } => create(&profiles, &name), + ProfileCommand::Update { + name, + storage_limit, + bandwidth_limit, + spool_limit, + storage_path, + num_threads, + wakelock, + no_wakelock, + offline, + online, + api_key, + current_org_id, + } => { + let updates = DaemonConfig { + storage_limit, + bandwidth_limit, + spool_limit, + path_to_store_record: storage_path, + num_threads, + keep_wakelock_while_upload: tristate(wakelock, no_wakelock), + offline: tristate(offline, online), + api_key, + current_org_id, + }; + update(&profiles, name.as_deref(), &updates); + } + ProfileCommand::Get { name } => get(&profiles, name.as_deref()), + ProfileCommand::Delete { name } => delete(&profiles, &name), + ProfileCommand::List => list(&profiles), + } +} + +/// Collapse a `--flag` / `--no-flag` pair into a tri-state `Option`: +/// unset stays `None`, otherwise the flag that `clap` left set (last one +/// wins) decides the value. +fn tristate(enabled: bool, disabled: bool) -> Option { + if enabled { + Some(true) + } else if disabled { + Some(false) + } else { + None + } +} + +fn create(profiles: &ProfileManager, name: &str) { + match profiles.create_profile(name) { + Ok(()) => println!("Created profile '{name}'."), + Err(error) => fail(error), + } +} + +fn update(profiles: &ProfileManager, name: Option<&str>, updates: &DaemonConfig) { + let name = name.unwrap_or(DEFAULT_PROFILE_NAME); + match profiles.update_profile(name, updates) { + Ok(_) => println!("Updated profile '{name}'."), + Err(error) => fail(error), + } +} + +fn get(profiles: &ProfileManager, name: Option<&str>) { + let name = name.unwrap_or(DEFAULT_PROFILE_NAME); + match profiles.get_profile(Some(name)) { + Ok(config) => match serde_json::to_string_pretty(&config) { + Ok(json) => println!("{json}"), + Err(error) => fail(error), + }, + Err(error) => fail(error), + } +} + +fn delete(profiles: &ProfileManager, name: &str) { + if name == DEFAULT_PROFILE_NAME { + fail(format!( + "Cannot delete default profile '{DEFAULT_PROFILE_NAME}'." + )); + } + match profiles.delete_profile(name) { + Ok(()) => println!("Deleted profile '{name}'."), + Err(error) => fail(error), + } +} + +fn list(profiles: &ProfileManager) { + let names = profiles.list_profiles(); + if names.is_empty() { + println!("No profiles found."); + return; + } + for name in names { + println!("{name}"); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn tristate_collapses_flag_pairs() { + assert_eq!(tristate(false, false), None); + assert_eq!(tristate(true, false), Some(true)); + assert_eq!(tristate(false, true), Some(false)); + } +} diff --git a/rust/data_daemon/src/cli/reset.rs b/rust/data_daemon/src/cli/reset.rs new file mode 100644 index 000000000..916dacec6 --- /dev/null +++ b/rust/data_daemon/src/cli/reset.rs @@ -0,0 +1,233 @@ +//! `reset` subcommand handler. +//! +//! Wipes every piece of host state the daemon owns so a wedged host can be +//! returned to a clean slate without hand-deleting paths: the recordings tree, +//! the SQLite state database (plus its WAL/SHM sidecars), the PID file, and the +//! iceoryx2 discovery files together with the `/dev/shm` shared-memory segments +//! backing them. This mirrors the host cleanup that +//! `rust/scripts/run_integration_tests.sh` performs before a fresh run. +//! +//! The daemon is stopped first: removing its state while it is running would +//! corrupt an in-flight recording, and the live daemon would immediately +//! re-create the files we just deleted. + +use std::io::{IsTerminal, Write}; +use std::path::{Path, PathBuf}; + +use anyhow::Result; +use iceoryx2::config::Config; + +use crate::cli::stop; +use crate::config::env::{db_path, pid_path, recordings_root_path}; + +/// Prefix iceoryx2 gives the Python producer's shared frame slots in +/// `/dev/shm` (`_NEURACORE_SHARED_SLOT_PREFIX` in +/// `data_daemon/lifecycle/runtime_recovery.py`). +const NEURACORE_SHM_PREFIX: &str = "neuracore-"; + +/// POSIX shared-memory mount where both iceoryx2 and the producer place their +/// segments on Linux. +const SHM_DIR: &str = "/dev/shm"; + +/// Run the reset command. +/// +/// `assume_yes` (the `--yes` flag) skips the interactive confirmation for +/// scripted use; otherwise the operator must confirm at the prompt. +pub fn run(assume_yes: bool) -> Result<()> { + if !assume_yes && !confirm()? { + println!("Reset aborted; nothing was removed."); + return Ok(()); + } + // Stop first so nothing re-creates the state we are about to remove. `stop` + // is idempotent and a no-op when no daemon is running. + stop::run()?; + + println!("Resetting daemon state:"); + + purge_path("recordings", &recordings_root_path()); + purge_database(&db_path()); + purge_path("pid file", &pid_path()); + purge_iceoryx_state(); + + println!("Reset complete."); + Ok(()) +} + +/// Prompt the operator to confirm the destructive reset, listing exactly what +/// will be removed so the blast radius is visible before they commit. +/// +/// Returns `Ok(true)` only when the operator types `y`/`yes`. A non-interactive +/// stdin (piped or redirected) cannot answer, so the reset is refused with +/// guidance to re-run with `--yes` rather than silently proceeding or hanging +/// on a read that never arrives. +fn confirm() -> Result { + let recordings_root = recordings_root_path(); + let recording_count = count_recordings(&recordings_root); + + println!("This permanently removes all data daemon state, including:"); + println!(" {recording_count} recording(s)"); + if !std::io::stdin().is_terminal() { + eprintln!("Refusing to reset: stdin is not a terminal. Re-run with --yes to confirm."); + return Ok(false); + } + + print!("Continue? [y/N] "); + std::io::stdout().flush()?; + + let mut answer = String::new(); + std::io::stdin().read_line(&mut answer)?; + let answer = answer.trim().to_lowercase(); + Ok(answer == "y" || answer == "yes") +} + +/// Count recording directories under `recordings_root` for the confirmation +/// summary. +/// +/// Recordings are the numeric per-recording directories; hidden entries (the +/// `.rgb_spool` staging tree) and stray files are ignored. A missing or +/// unreadable root counts as zero β€” the summary is advisory, not a precondition. +fn count_recordings(recordings_root: &Path) -> usize { + let Ok(entries) = std::fs::read_dir(recordings_root) else { + return 0; + }; + entries + .flatten() + .filter(|entry| { + !entry.file_name().to_string_lossy().starts_with('.') + && entry.file_type().is_ok_and(|file_type| file_type.is_dir()) + }) + .count() +} + +/// Remove the SQLite state database and its WAL-mode sidecars. +/// +/// SQLite keeps the write-ahead log and shared-memory index alongside the main +/// file; an orphaned sidecar would otherwise resurrect a half-state, so they are +/// purged together with the database itself. +fn purge_database(database: &Path) { + purge_path("database", database); + for suffix in ["-wal", "-shm", "-journal"] { + purge_path("database sidecar", &sidecar(database, suffix)); + } +} + +/// Remove iceoryx2's discovery files and the `/dev/shm` segments backing them. +/// +/// The root path and segment prefix are read from iceoryx2's global config so +/// they track any host configuration override rather than assuming the +/// `/tmp/iceoryx2` + `iox2_` defaults. +fn purge_iceoryx_state() { + let config = Config::global_config(); + + let root_path = PathBuf::from(config.global.root_path().to_string()); + purge_path("iceoryx2 state", &root_path); + + let iceoryx_prefix = config.global.prefix.to_string(); + purge_shm_segments("iceoryx2 shared memory", &iceoryx_prefix); + purge_shm_segments("producer shared memory", NEURACORE_SHM_PREFIX); +} + +/// Remove `/dev/shm` segments whose name starts with `prefix`. +/// +/// POSIX shared memory has no recursive-remove primitive, so the segments are +/// swept by listing the mount and unlinking matches by name. +fn purge_shm_segments(label: &str, prefix: &str) { + let entries = match std::fs::read_dir(SHM_DIR) { + Ok(entries) => entries, + Err(error) => { + eprintln!(" ! could not read {SHM_DIR} for {label}: {error}"); + return; + } + }; + + for entry in entries.flatten() { + if entry.file_name().to_string_lossy().starts_with(prefix) { + purge_path(label, &entry.path()); + } + } +} + +/// Remove a file, directory tree, or symlink at `path`, reporting the outcome. +/// +/// Cleanup is best-effort: a missing target is silently skipped and any other +/// failure is reported but never aborts the reset, so one undeletable artefact +/// cannot leave the rest of the state in place. +fn purge_path(label: &str, path: &Path) { + let metadata = match std::fs::symlink_metadata(path) { + Ok(metadata) => metadata, + Err(error) if error.kind() == std::io::ErrorKind::NotFound => return, + Err(error) => { + eprintln!(" ! {label} ({}): {error}", path.display()); + return; + } + }; + + // `symlink_metadata` does not follow links, so a symlink reports as a + // non-directory and is unlinked with `remove_file` rather than followed. + let result = if metadata.is_dir() { + std::fs::remove_dir_all(path) + } else { + std::fs::remove_file(path) + }; + + match result { + Ok(()) => println!(" - removed {label}: {}", path.display()), + Err(error) if error.kind() == std::io::ErrorKind::NotFound => {} + Err(error) => eprintln!(" ! {label} ({}): {error}", path.display()), + } +} + +/// Append `suffix` to a path's filename (`state.db` + `-wal` -> `state.db-wal`). +fn sidecar(path: &Path, suffix: &str) -> PathBuf { + let mut name = path.file_name().unwrap_or_default().to_os_string(); + name.push(suffix); + path.with_file_name(name) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn sidecar_appends_suffix_to_filename() { + assert_eq!( + sidecar(Path::new("/a/b/state.db"), "-wal"), + PathBuf::from("/a/b/state.db-wal") + ); + } + + #[test] + fn count_recordings_counts_dirs_and_ignores_spool_files_and_missing() { + // Missing root counts as zero. + assert_eq!(count_recordings(Path::new("/no/such/root")), 0); + + let dir = tempdir().unwrap(); + let root = dir.path(); + std::fs::create_dir_all(root.join("1")).unwrap(); + std::fs::create_dir_all(root.join("2")).unwrap(); + // The hidden spool tree and stray files must not be counted. + std::fs::create_dir_all(root.join(".rgb_spool/robot")).unwrap(); + std::fs::write(root.join("state.db"), b"x").unwrap(); + + assert_eq!(count_recordings(root), 2); + } + + #[test] + fn purge_path_removes_files_dirs_and_is_quiet_on_missing() { + let dir = tempdir().unwrap(); + + let file = dir.path().join("state.db"); + std::fs::write(&file, b"x").unwrap(); + purge_path("file", &file); + assert!(!file.exists()); + + let tree = dir.path().join("recordings"); + std::fs::create_dir_all(tree.join("1/RGB")).unwrap(); + purge_path("tree", &tree); + assert!(!tree.exists()); + + // Missing path must not panic or error. + purge_path("missing", &dir.path().join("nope")); + } +} diff --git a/rust/data_daemon/src/cli/status.rs b/rust/data_daemon/src/cli/status.rs new file mode 100644 index 000000000..31853a950 --- /dev/null +++ b/rust/data_daemon/src/cli/status.rs @@ -0,0 +1,26 @@ +//! `status` subcommand handler. +//! +//! Reads `NEURACORE_DAEMON_PID_PATH` and reports whether the daemon process +//! it points at is alive. The output shape is a stable contract so external +//! scripts can parse it. + +use anyhow::Result; + +use crate::config::env::pid_path; +use crate::lifecycle::pidfile::{pid_is_running, read_pid_from_file}; + +/// Run the status command. +pub fn run() -> Result<()> { + let path = pid_path(); + let Some(pid_value) = read_pid_from_file(&path) else { + println!("Daemon not running."); + return Ok(()); + }; + + if pid_is_running(pid_value) { + println!("Daemon running (pid={pid_value})."); + } else { + println!("Daemon not running (stale pid file: {pid_value})."); + } + Ok(()) +} diff --git a/rust/data_daemon/src/cli/stop.rs b/rust/data_daemon/src/cli/stop.rs new file mode 100644 index 000000000..285bddf65 --- /dev/null +++ b/rust/data_daemon/src/cli/stop.rs @@ -0,0 +1,91 @@ +//! `stop` subcommand handler. +//! +//! Reads the daemon's PID from `NEURACORE_DAEMON_PID_PATH`, sends SIGTERM, +//! waits up to 10 s for graceful exit, and escalates to SIGKILL if the +//! daemon is still alive at the deadline. + +use std::path::Path; +use std::time::{Duration, Instant}; + +use anyhow::Result; +use nix::sys::signal::{kill, Signal}; +use nix::unistd::Pid; + +use crate::config::env::pid_path; +use crate::lifecycle::pidfile::{pid_is_running, read_pid_from_file}; +use crate::lifecycle::recovery::reclaim_stale_pid_file; + +const GRACEFUL_TIMEOUT: Duration = Duration::from_secs(10); +const SIGKILL_REAP_TIMEOUT: Duration = Duration::from_secs(5); +const POLL_INTERVAL: Duration = Duration::from_millis(100); + +/// Run the stop command. +pub fn run() -> Result<()> { + let path = pid_path(); + let Some(pid_value) = read_pid_from_file(&path) else { + println!("Daemon not running (no pid file at {}).", path.display()); + // Best-effort cleanup: if the file exists but is unreadable, drop it. + let _ = reclaim_stale_pid_file(&path); + return Ok(()); + }; + + if !pid_is_running(pid_value) { + println!("Daemon not running (pid={pid_value}); removing stale pid file."); + let _ = reclaim_stale_pid_file(&path); + return Ok(()); + } + + let pid = Pid::from_raw(pid_value); + match kill(pid, Signal::SIGTERM) { + Ok(()) => {} + Err(nix::errno::Errno::ESRCH) => { + println!("Daemon exited before SIGTERM (pid={pid_value})."); + cleanup(&path); + return Ok(()); + } + Err(error) => { + eprintln!("Failed to send SIGTERM to pid={pid_value}: {error}"); + std::process::exit(1); + } + } + + if wait_for_exit(pid_value, GRACEFUL_TIMEOUT) { + println!("Daemon stopped (pid={pid_value})."); + cleanup(&path); + return Ok(()); + } + + eprintln!( + "Daemon (pid={pid_value}) did not exit within {}s; sending SIGKILL.", + GRACEFUL_TIMEOUT.as_secs() + ); + let _ = kill(pid, Signal::SIGKILL); + if !wait_for_exit(pid_value, SIGKILL_REAP_TIMEOUT) { + eprintln!("Daemon (pid={pid_value}) still alive after SIGKILL."); + std::process::exit(1); + } + cleanup(&path); + Ok(()) +} + +/// Poll until `pid_value` exits or `timeout` elapses. +/// +/// Uses `std::thread::sleep` because `stop` runs synchronously without a +/// Tokio runtime β€” see `cli::run` for the per-command runtime policy. Do not +/// call from async code. +fn wait_for_exit(pid_value: i32, timeout: Duration) -> bool { + let deadline = Instant::now() + timeout; + while Instant::now() < deadline { + if !pid_is_running(pid_value) { + return true; + } + std::thread::sleep(POLL_INTERVAL); + } + !pid_is_running(pid_value) +} + +fn cleanup(pid_path: &Path) { + // The daemon's `PidFile::Drop` removes the file on a clean exit; this is + // a defence-in-depth pass for the SIGKILL escalation path. + let _ = reclaim_stale_pid_file(pid_path); +} diff --git a/rust/data_daemon/src/cloud/cloud_files.rs b/rust/data_daemon/src/cloud/cloud_files.rs new file mode 100644 index 000000000..53b4fd33f --- /dev/null +++ b/rust/data_daemon/src/cloud/cloud_files.rs @@ -0,0 +1,113 @@ +//! Derive the cloud-file list for a trace from its data-type label. +//! +//! The data type partitions into `JSON` vs `RGB` content. The classification +//! works directly off the wire string so an unrecognised data type still +//! registers (as JSON) rather than being refused. + +use crate::api::models::CloudFile; + +// The artefact filenames are wire-critical and owned by `storage::paths`, where +// the on-disk writers stamp them. Re-export rather than redefine so there is a +// single source of truth β€” two copies could silently drift and break the +// upload↔disk filename contract. +pub use crate::storage::paths::{ + LOSSLESS_VIDEO_FILENAME as LOSSLESS_VIDEO_NAME, LOSSY_VIDEO_FILENAME as LOSSY_VIDEO_NAME, + TRACE_JSON_FILENAME as TRACE_FILE, +}; + +/// Wire-side classification used to build the cloud-file list. The set of +/// data types that produce video is small and stable, so a hard-coded match +/// on the wire string is good enough. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ContentKind { + /// JSON-only payload (scalar / sensor / event traces). + Json, + /// RGB video payload: lossy + lossless mp4 plus a JSON sidecar. + Rgb, +} + +/// Classify a data-type wire label. +/// +/// Anything not recognised is treated as JSON. +/// +/// `DEPTH_IMAGES` is intentionally mapped to `Rgb`: the upload pipeline uses +/// the same `lossy.mp4` + `lossless.mp4` artefact pair to carry depth frames +/// packed into RGB channels. Diverging here would register a different +/// cloud-file set than the backend expects and break wire compatibility. +fn content_type_for(data_type: &str) -> ContentKind { + match data_type { + "RGB_IMAGES" | "DEPTH_IMAGES" => ContentKind::Rgb, + _ => ContentKind::Json, + } +} + +/// The MIME content-type the daemon registers (and re-acquires session URIs) +/// for an artefact, keyed off its filename suffix. The single source of truth +/// for the mapping, shared by [`cloud_file_list`] and the uploader's session +/// refresh so the two can't disagree. Only the `.mp4` video artefacts are +/// `video/mp4`; everything else (the JSON trace / sidecar) is `application/json`. +pub fn content_type_for_filename(filename: &str) -> &'static str { + if filename.ends_with(".mp4") { + "video/mp4" + } else { + "application/json" + } +} + +/// Build the cloud-file list for a trace. +/// +/// `data_type` is the wire label. `data_type_name` is the producer-supplied +/// alias (e.g. camera name); when missing we fall back to a single underscore +/// so the path is well-formed. +pub fn cloud_file_list(data_type: &str, data_type_name: Option<&str>) -> Vec { + let prefix = format!("{data_type}/{}", data_type_name.unwrap_or("_")); + let mut filenames = Vec::with_capacity(3); + if matches!(content_type_for(data_type), ContentKind::Rgb) { + filenames.push(LOSSY_VIDEO_NAME); + filenames.push(LOSSLESS_VIDEO_NAME); + } + filenames.push(TRACE_FILE); + filenames + .into_iter() + .map(|filename| CloudFile { + filepath: format!("{prefix}/{filename}"), + content_type: content_type_for_filename(filename).to_string(), + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn scalar_trace_lists_only_trace_json() { + let files = cloud_file_list("JOINT_POSITIONS", Some("arm0")); + assert_eq!(files.len(), 1); + assert_eq!(files[0].filepath, "JOINT_POSITIONS/arm0/trace.json"); + assert_eq!(files[0].content_type, "application/json"); + } + + #[test] + fn rgb_trace_lists_both_mp4_outputs_plus_sidecar() { + let files = cloud_file_list("RGB_IMAGES", Some("cam_0")); + let paths: Vec<&str> = files.iter().map(|f| f.filepath.as_str()).collect(); + assert_eq!( + paths, + vec![ + "RGB_IMAGES/cam_0/lossy.mp4", + "RGB_IMAGES/cam_0/lossless.mp4", + "RGB_IMAGES/cam_0/trace.json", + ] + ); + assert_eq!(files[0].content_type, "video/mp4"); + assert_eq!(files[1].content_type, "video/mp4"); + assert_eq!(files[2].content_type, "application/json"); + } + + #[test] + fn missing_data_type_name_falls_back_to_underscore() { + let files = cloud_file_list("JOINT_POSITIONS", None); + assert_eq!(files[0].filepath, "JOINT_POSITIONS/_/trace.json"); + } +} diff --git a/rust/data_daemon/src/cloud/coordinators/mod.rs b/rust/data_daemon/src/cloud/coordinators/mod.rs new file mode 100644 index 000000000..90b69f75e --- /dev/null +++ b/rust/data_daemon/src/cloud/coordinators/mod.rs @@ -0,0 +1,10 @@ +//! Cloud coordinators that drive each trace's lifecycle to the backend: batch +//! registration, resumable uploads, debounced status updates, and the periodic +//! progress reporter. Each exposes a single `spawn_*` entry point so the launch +//! routine can drive ordered shutdown by dropping the handle. + +pub mod progress; +pub mod registration; +pub mod status; +mod upload_transfer; +pub mod uploader; diff --git a/rust/data_daemon/src/cloud/coordinators/progress.rs b/rust/data_daemon/src/cloud/coordinators/progress.rs new file mode 100644 index 000000000..dc02e4f47 --- /dev/null +++ b/rust/data_daemon/src/cloud/coordinators/progress.rs @@ -0,0 +1,537 @@ +//! Periodic progress reporter. +//! +//! Every [`crate::intervals::PROGRESS_TICK`] the reporter sweeps the recordings still +//! pending a report ([`StateStore::recordings_pending_progress`] β€” a +//! server-side filter, so fully-settled recordings drop out of the scan) and, +//! for every stopped recording whose traces have all finished *writing* (and +//! whose `progress_reported` is still `Pending`), +//! POSTs `/org/{org}/recording/{rec}/traces-metadata` with the per-trace +//! `total_bytes` snapshot. This establishes the recording's upload +//! denominators on the backend up front β€” before uploads finish β€” so the +//! live per-trace `uploaded_bytes` stream renders as a partial-upload +//! percentage rather than a single jump to 100%. On success the recording +//! row flips to `progress_reported = 'reported'`. + +use std::collections::HashMap; +use std::sync::Arc; + +use tokio::sync::broadcast; +use tokio::task::JoinHandle; +use tokio::time::{interval, MissedTickBehavior}; + +use crate::api::ApiClient; +use crate::cloud::OrgIdRx; +use crate::lifecycle::shutdown::ShutdownSignal; +use crate::state::{ + ProgressReportStatus, RecordingRow, SqliteStateStore, StateStore, TraceRecord, TraceWriteStatus, +}; + +/// Handle returned by [`spawn_progress_reporter`]. +pub struct ProgressReporterHandle { + join: JoinHandle<()>, +} + +impl ProgressReporterHandle { + /// Wait for the reporter task to exit. + pub async fn join(self) { + if let Err(error) = self.join.await { + tracing::warn!(?error, "progress reporter join failed"); + } + } +} + +/// Spawn the progress reporter task on the current Tokio runtime. +pub fn spawn_progress_reporter( + store: SqliteStateStore, + client: Arc, + org_rx: OrgIdRx, + mut shutdown_rx: broadcast::Receiver, +) -> ProgressReporterHandle { + let store = Arc::new(store); + let join = tokio::spawn(async move { + let mut ticker = interval(crate::intervals::PROGRESS_TICK); + ticker.set_missed_tick_behavior(MissedTickBehavior::Delay); + + loop { + tokio::select! { + biased; + signal = shutdown_rx.recv() => { + tracing::debug!(?signal, "progress reporter shutting down"); + break; + } + _ = ticker.tick() => { + sweep_once(&store, &client, &org_rx).await; + } + } + } + }); + ProgressReporterHandle { join } +} + +async fn sweep_once(store: &Arc, client: &Arc, org_rx: &OrgIdRx) { + // Server-side filter to stopped, non-cancelled, cloud-id-assigned + // recordings that still have reporting work outstanding, so fully-settled + // recordings drop out of the sweep instead of being re-scanned (and their + // traces re-fetched) on every tick. The cancelled/stopped/cloud-id guards + // below are kept as belt-and-braces against a row racing the query. + let recordings = match store.recordings_pending_progress().await { + Ok(rows) => rows, + Err(error) => { + tracing::warn!(%error, "progress reporter could not query pending recordings"); + return; + } + }; + for recording in recordings { + if recording.stopped_at.is_none() || recording.cancelled_at.is_some() { + continue; + } + let Some(org_id) = org_rx.borrow().clone() else { + continue; + }; + // Every cloud URL needs the backend `recording_id`. A None here means + // the start notifier hasn't populated the cloud id yet β€” skip until it + // has (e.g. a recording made while the daemon was offline). + let Some(recording_id) = recording.recording_id.clone() else { + tracing::warn!( + recording_index = recording.recording_index, + "progress reporter skipping recording with no cloud recording_id yet" + ); + continue; + }; + let traces = match store + .list_traces_for_recording(recording.recording_index) + .await + { + Ok(rows) => rows, + Err(error) => { + tracing::warn!(%error, recording_index = recording.recording_index, "progress reporter could not list traces"); + continue; + } + }; + if traces.is_empty() { + continue; + } + report_expected_trace_count(store, client, &recording, &org_id, &recording_id, &traces) + .await; + if matches!(recording.progress_reported, ProgressReportStatus::Reported) { + continue; + } + report_progress(store, client, &recording, &org_id, &recording_id, &traces).await; + } +} + +/// Tell the backend how many traces this recording will have. Until this PUT +/// lands, the backend keeps the recording hidden from its parent dataset +/// regardless of how many trace blobs are already uploaded. Idempotent: +/// short-circuits once `expected_trace_count_reported` is non-zero. +async fn report_expected_trace_count( + store: &Arc, + client: &Arc, + recording: &RecordingRow, + org_id: &str, + recording_id: &str, + traces: &[TraceRecord], +) { + if recording.expected_trace_count_reported > 0 { + return; + } + // Wait until every trace has reached a terminal write state. Reporting + // the count too early would race the per-trace actors and risk telling + // the backend a number that excludes traces still being flushed. + if !traces.iter().all(write_status_is_terminal) { + return; + } + let count = i64::try_from(traces.len()).unwrap_or(i64::MAX); + + // Persist locally first so a transient PUT failure does not lose the + // count, and so a re-claim by the next tick sees the same value. + if let Err(error) = store + .set_expected_trace_count(recording.recording_index, count) + .await + { + tracing::warn!( + %error, + recording_index = recording.recording_index, + "failed to persist expected trace count" + ); + return; + } + + match client + .put_expected_trace_count(org_id, recording_id, count) + .await + { + Ok(()) => { + if let Err(error) = store + .mark_expected_trace_count_reported(recording.recording_index, count) + .await + { + tracing::warn!( + %error, + recording_index = recording.recording_index, + "failed to mark expected trace count as reported" + ); + return; + } + tracing::info!( + recording_index = recording.recording_index, + recording_id, + count, + "expected trace count reported" + ); + } + Err(error) => { + tracing::warn!( + %error, + recording_index = recording.recording_index, + "expected trace count PUT failed" + ); + } + } +} + +async fn report_progress( + store: &Arc, + client: &Arc, + recording: &RecordingRow, + org_id: &str, + recording_id: &str, + traces: &[TraceRecord], +) { + // Send the snapshot of per-trace sizes (`total_bytes`) as soon as every + // trace has finished *writing* β€” not once it has finished *uploading*. + // This establishes the recording's denominators on the backend early, so + // the live per-trace `uploaded_bytes` stream (sent via the batch-update + // endpoint) can render a partial-upload percentage. Gating on upload + // completion instead would withhold the denominators until the whole + // recording is already uploaded, collapsing progress to a single 0β†’100% + // jump. Failed writes are terminal too, so one bad trace can't pin the + // recording in `progress_reported = pending` forever. + if !traces.iter().all(write_status_is_terminal) { + return; + } + let trace_map: HashMap = traces + .iter() + .map(|trace| (trace.trace_id.clone(), trace.total_bytes)) + .collect(); + // Move into a Reporting state so a slow request can't be re-issued + // by the next tick. + match store + .set_progress_report_status( + recording.recording_index, + ProgressReportStatus::Pending, + ProgressReportStatus::Reporting, + ) + .await + { + Ok(Some(row)) if matches!(row.progress_reported, ProgressReportStatus::Reporting) => {} + _ => return, + } + + match client + .report_progress(org_id, recording_id, &trace_map) + .await + { + Ok(()) => { + let _ = store + .set_progress_report_status( + recording.recording_index, + ProgressReportStatus::Reporting, + ProgressReportStatus::Reported, + ) + .await; + tracing::info!( + recording_index = recording.recording_index, + recording_id, + "progress report sent" + ); + } + Err(error) => { + tracing::warn!(%error, recording_index = recording.recording_index, "progress report failed"); + let _ = store + .set_progress_report_status( + recording.recording_index, + ProgressReportStatus::Reporting, + ProgressReportStatus::Pending, + ) + .await; + } + } +} + +fn write_status_is_terminal(trace: &TraceRecord) -> bool { + matches!( + trace.write_status, + TraceWriteStatus::Written | TraceWriteStatus::Failed + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Duration; + + use crate::api::auth::StaticAuthProvider; + use crate::api::client::ApiClientOptions; + use crate::state::store::{NewRecording, TraceUpdate}; + use crate::state::{TraceUploadStatus, TraceWriteStatus}; + use tempfile::TempDir; + use wiremock::matchers::{body_json, method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + async fn open_store() -> (SqliteStateStore, TempDir) { + let dir = TempDir::new().unwrap(); + let store = SqliteStateStore::open(&dir.path().join("state.db")) + .await + .unwrap(); + (store, dir) + } + + /// Create a recording stamped with `org-1` and the given cloud + /// `recording_id` so the wiremock URL expectations resolve. Returns the + /// local `recording_index`. + async fn seed_recording(store: &SqliteStateStore, cloud_recording_id: &str) -> i64 { + let recording = store + .create_recording(NewRecording::default()) + .await + .unwrap(); + store + .mark_recording_start_notified(recording.recording_index, cloud_recording_id) + .await + .unwrap(); + recording.recording_index + } + + /// A live-org receiver fixed at `org`. The sender is leaked so the channel + /// stays open for the test's duration. + fn org_rx(org: Option<&str>) -> OrgIdRx { + let (org_tx, org_rx) = tokio::sync::watch::channel(org.map(str::to_string)); + Box::leak(Box::new(org_tx)); + org_rx + } + + fn client(server: &MockServer) -> Arc { + let auth = Arc::new(StaticAuthProvider::new("test")); + let mut options = ApiClientOptions::new(server.uri()); + options.max_backoff = Duration::from_millis(10); + Arc::new(ApiClient::new(options, auth).unwrap()) + } + + #[tokio::test] + async fn sweep_reports_count_and_progress_once_writes_settle() { + let server = MockServer::start().await; + Mock::given(method("PUT")) + .and(path("/org/org-1/recording/rec-1/expected-trace-count")) + .respond_with(ResponseTemplate::new(200)) + .expect(1) + .mount(&server) + .await; + // The progress snapshot must carry each trace's `total_bytes` (the + // upload denominator), not its `uploaded_bytes` β€” and it must fire as + // soon as writes settle, before uploads finish, so the backend can + // render a live percentage from the streamed byte counts. + Mock::given(method("POST")) + .and(path("/org/org-1/recording/rec-1/traces-metadata")) + .and(body_json(serde_json::json!({ + "traces": { "t-1": 100, "t-2": 200 } + }))) + .respond_with(ResponseTemplate::new(200)) + .expect(1) + .mount(&server) + .await; + + let (store, _dir) = open_store().await; + let recording_index = seed_recording(&store, "rec-1").await; + // Two traces finished writing (with known sizes) but neither has + // uploaded yet β€” both the expected-count PUT and the progress POST + // must fire on write completion, not upload completion. + for (trace_id, total_bytes) in [("t-1", 100), ("t-2", 200)] { + store + .create_trace(recording_index, trace_id, Some("JOINT_POSITIONS"), None) + .await + .unwrap(); + store + .update_trace( + trace_id, + TraceUpdate { + write_status: Some(TraceWriteStatus::Written), + total_bytes: Some(total_bytes), + ..TraceUpdate::default() + }, + ) + .await + .unwrap(); + } + store + .mark_recording_stopped(recording_index, 0) + .await + .unwrap(); + + let api = client(&server); + sweep_once(&Arc::new(store.clone()), &api, &org_rx(Some("org-1"))).await; + + let recording = store.get_recording(recording_index).await.unwrap().unwrap(); + assert_eq!(recording.expected_trace_count, Some(2)); + assert_eq!(recording.expected_trace_count_reported, 2); + // Progress reports once writes settle β€” uploads need not be done. + assert!(matches!( + recording.progress_reported, + ProgressReportStatus::Reported + )); + } + + #[tokio::test] + async fn sweep_skips_expected_count_while_writes_in_flight() { + let server = MockServer::start().await; + // No mock for the PUT β€” if the sweep fires it would 404 and we'd + // catch a state-change side effect via the assertion below. + let (store, _dir) = open_store().await; + let recording_index = seed_recording(&store, "rec-1").await; + store + .create_trace(recording_index, "t-1", Some("JOINT_POSITIONS"), None) + .await + .unwrap(); + store + .update_trace( + "t-1", + TraceUpdate { + write_status: Some(TraceWriteStatus::Writing), + ..TraceUpdate::default() + }, + ) + .await + .unwrap(); + store + .mark_recording_stopped(recording_index, 0) + .await + .unwrap(); + + let api = client(&server); + sweep_once(&Arc::new(store.clone()), &api, &org_rx(Some("org-1"))).await; + + let recording = store.get_recording(recording_index).await.unwrap().unwrap(); + assert_eq!(recording.expected_trace_count, None); + assert_eq!(recording.expected_trace_count_reported, 0); + } + + #[tokio::test] + async fn sweep_reports_when_one_trace_failed_and_rest_uploaded() { + // Mixed terminal state: one trace Uploaded, one trace Failed. + // The progress reporter should still POST and flip the + // recording's status β€” a single failure must not deadlock the + // whole recording. + let server = MockServer::start().await; + Mock::given(method("PUT")) + .and(path("/org/org-1/recording/rec-1/expected-trace-count")) + .respond_with(ResponseTemplate::new(200)) + .expect(1) + .mount(&server) + .await; + Mock::given(method("POST")) + .and(path("/org/org-1/recording/rec-1/traces-metadata")) + .respond_with(ResponseTemplate::new(200)) + .expect(1) + .mount(&server) + .await; + + let (store, _dir) = open_store().await; + let recording_index = seed_recording(&store, "rec-1").await; + store + .create_trace(recording_index, "ok", Some("JOINT_POSITIONS"), None) + .await + .unwrap(); + store + .update_trace( + "ok", + TraceUpdate { + write_status: Some(TraceWriteStatus::Written), + upload_status: Some(TraceUploadStatus::Uploaded), + bytes_uploaded: Some(7), + total_bytes: Some(7), + ..TraceUpdate::default() + }, + ) + .await + .unwrap(); + store + .create_trace(recording_index, "bad", Some("JOINT_POSITIONS"), None) + .await + .unwrap(); + store + .update_trace( + "bad", + TraceUpdate { + write_status: Some(TraceWriteStatus::Written), + upload_status: Some(TraceUploadStatus::Failed), + ..TraceUpdate::default() + }, + ) + .await + .unwrap(); + store + .mark_recording_stopped(recording_index, 0) + .await + .unwrap(); + + let api = client(&server); + sweep_once(&Arc::new(store.clone()), &api, &org_rx(Some("org-1"))).await; + + let recording = store.get_recording(recording_index).await.unwrap().unwrap(); + assert!( + matches!(recording.progress_reported, ProgressReportStatus::Reported), + "progress should be reported even when one trace failed; \ + got {:?}", + recording.progress_reported + ); + } + + #[tokio::test] + async fn sweep_marks_recording_reported_after_post() { + let server = MockServer::start().await; + Mock::given(method("PUT")) + .and(path("/org/org-1/recording/rec-1/expected-trace-count")) + .respond_with(ResponseTemplate::new(200)) + .expect(1) + .mount(&server) + .await; + Mock::given(method("POST")) + .and(path("/org/org-1/recording/rec-1/traces-metadata")) + .respond_with(ResponseTemplate::new(200)) + .expect(1) + .mount(&server) + .await; + + let (store, _dir) = open_store().await; + let recording_index = seed_recording(&store, "rec-1").await; + store + .create_trace(recording_index, "trace-1", Some("JOINT_POSITIONS"), None) + .await + .unwrap(); + store + .update_trace( + "trace-1", + TraceUpdate { + write_status: Some(TraceWriteStatus::Written), + upload_status: Some(TraceUploadStatus::Uploaded), + bytes_uploaded: Some(42), + total_bytes: Some(42), + ..TraceUpdate::default() + }, + ) + .await + .unwrap(); + store + .mark_recording_stopped(recording_index, 0) + .await + .unwrap(); + + let api = client(&server); + sweep_once(&Arc::new(store.clone()), &api, &org_rx(Some("org-1"))).await; + + let recording = store.get_recording(recording_index).await.unwrap().unwrap(); + assert!(matches!( + recording.progress_reported, + ProgressReportStatus::Reported + )); + } +} diff --git a/rust/data_daemon/src/cloud/coordinators/registration.rs b/rust/data_daemon/src/cloud/coordinators/registration.rs new file mode 100644 index 000000000..d4c38a879 --- /dev/null +++ b/rust/data_daemon/src/cloud/coordinators/registration.rs @@ -0,0 +1,768 @@ +//! Batch registration coordinator. +//! +//! Claims traces whose row exists (any write_status except `failed`) β€” not just +//! fully-written ones β€” buffers up to `BATCH_SIZE` (or `MAX_WAIT`) and POSTs +//! them to `/org/{org}/recording/traces/batch-register`. Registration only +//! needs the trace's *identity* (recording id, trace id, data type, cloud +//! files), all known at `/recording/start`, so it runs **while the recording is +//! still writing** β€” overlapping the round trip with the recording instead of +//! adding it to the post-stop tail ("pre-registration"). +//! +//! Because registration and the on-disk write can finish in either order, +//! `ReadyForUpload` is gated on BOTH states; [`publish_ready_traces`] owns that +//! promotion (and its write-behind-lag safety-net role). Registration failures +//! roll the status back to `Pending` so the next tick re-claims them. + +use std::collections::HashMap; +use std::sync::Arc; +use std::time::Duration; + +use tokio::sync::broadcast; +use tokio::task::JoinHandle; +use tokio::time::{interval, MissedTickBehavior}; + +use crate::api::models::RegisterTraceRequest; +use crate::api::ApiClient; +use crate::cloud::cloud_files::cloud_file_list; +use crate::cloud::OrgIdRx; +use crate::lifecycle::shutdown::ShutdownSignal; +use crate::state::store::TraceUpdate; +use crate::state::{ + DaemonEvent, EventBus, SqliteStateStore, StateStore, TraceRecord, TraceRegistrationStatus, +}; + +/// Maximum traces to register in a single call. Matches the +/// `claim_traces_for_registration` size trigger. +pub const BATCH_SIZE: usize = 50; +/// Maximum age before flushing a partial batch. +pub const MAX_WAIT: Duration = Duration::from_millis(200); +/// How many times a trace the backend explicitly rejects (returns in +/// `failed_traces`) is rolled back to `pending` and retried before being marked +/// terminally `failed`. Backend registration errors are frequently transient +/// (e.g. a staging "Unexpected error during registration" under a large +/// registration burst); terminally failing on the first one permanently wedges +/// the whole recording (its traces never upload, so it never reaches "all +/// uploaded" and is never reaped). A small bounded retry rides out the hiccup +/// while still terminating a genuinely-permanent failure. +const MAX_REGISTRATION_ATTEMPTS: u32 = 5; + +/// Handle returned by [`spawn_registration`]. +pub struct RegistrationHandle { + join: JoinHandle<()>, +} + +impl RegistrationHandle { + /// Wait for the coordinator task to exit. + pub async fn join(self) { + if let Err(error) = self.join.await { + tracing::warn!(?error, "registration coordinator join failed"); + } + } +} + +/// Spawn the registration coordinator on the current Tokio runtime. +pub fn spawn_registration( + store: SqliteStateStore, + bus: EventBus, + client: Arc, + org_rx: OrgIdRx, + mut shutdown_rx: broadcast::Receiver, +) -> RegistrationHandle { + let mut subscriber = bus.subscribe(); + let store = Arc::new(store); + let join = tokio::spawn(async move { + let mut ticker = interval(crate::intervals::REGISTRATION_POLL); + ticker.set_missed_tick_behavior(MissedTickBehavior::Delay); + + // Per-trace count of backend-rejected registration attempts, kept for + // the coordinator's lifetime so the retry budget spans drains. Entries + // are removed once a trace registers or is terminally failed, so the map + // only ever holds currently-retrying traces. + let mut registration_attempts: HashMap = HashMap::new(); + + loop { + tokio::select! { + biased; + signal = shutdown_rx.recv() => { + tracing::debug!(?signal, "registration coordinator shutting down"); + break; + } + event = subscriber.recv() => { + match event { + Ok(DaemonEvent::TraceWritten { .. }) => { + drain_once(&store, &bus, &client, &org_rx, MAX_WAIT, &mut registration_attempts).await; + } + Ok(_) => {} + Err(broadcast::error::RecvError::Lagged(skipped)) => { + tracing::warn!( + skipped, + "registration coordinator missed bus events; \ + falling back to a drain" + ); + drain_once(&store, &bus, &client, &org_rx, MAX_WAIT, &mut registration_attempts).await; + } + Err(broadcast::error::RecvError::Closed) => break, + } + } + _ = ticker.tick() => { + drain_once(&store, &bus, &client, &org_rx, MAX_WAIT, &mut registration_attempts).await; + } + } + } + }); + RegistrationHandle { join } +} + +async fn drain_once( + store: &Arc, + bus: &EventBus, + client: &Arc, + org_rx: &OrgIdRx, + max_wait: Duration, + registration_attempts: &mut HashMap, +) { + // Safety net: promote any traces that became (registered + written) since + // the last drain. This runs even when there is nothing new to register, so + // the periodic tick eventually promotes a pre-registered trace once its + // write-behind `write_status = written` commit lands. + publish_ready_traces(store, bus).await; + + let claimed = match store + .claim_traces_for_registration(BATCH_SIZE, max_wait.as_secs_f64()) + .await + { + Ok(rows) => rows, + Err(error) => { + tracing::warn!(%error, "claim_traces_for_registration failed"); + return; + } + }; + if claimed.is_empty() { + return; + } + tracing::debug!(count = claimed.len(), "claimed traces for registration"); + submit_batch(store, bus, client, org_rx, claimed, registration_attempts).await; + publish_ready_traces(store, bus).await; +} + +async fn submit_batch( + store: &Arc, + bus: &EventBus, + client: &Arc, + org_rx: &OrgIdRx, + traces: Vec, + registration_attempts: &mut HashMap, +) { + // Group by recording so we can look up the recording row once per + // recording rather than once per trace; in practice every claim ships + // traces from a single recording but the protocol does not require that. + let mut by_recording: HashMap> = HashMap::new(); + for trace in traces { + by_recording + .entry(trace.recording_index) + .or_default() + .push(trace); + } + + for (recording_index, traces) in by_recording { + let row = match store.get_recording(recording_index).await { + Ok(Some(row)) => row, + Ok(None) => { + tracing::warn!( + recording_index, + "recording row missing; rolling traces back to pending" + ); + rollback_to_pending(store, &traces).await; + continue; + } + Err(error) => { + tracing::warn!(%error, recording_index, "failed to read recording row"); + rollback_to_pending(store, &traces).await; + continue; + } + }; + + let Some(org_id) = org_rx.borrow().clone() else { + tracing::warn!( + recording_index, + "no current org_id configured yet; rolling traces back to pending" + ); + rollback_to_pending(store, &traces).await; + continue; + }; + + // The backend recording_id always comes from `/recording/start`. An + // offline recording (or one whose `/recording/start` POST has not yet + // landed) carries no cloud id, so there is nothing to register against + // yet β€” roll the traces back to pending and retry once the start + // notifier has populated the id. + let Some(cloud_id) = row.recording_id.clone() else { + rollback_to_pending(store, &traces).await; + continue; + }; + + let payload: Vec = traces + .iter() + .map(|trace| RegisterTraceRequest { + recording_id: cloud_id.clone(), + data_type: trace.data_type.clone().unwrap_or_default(), + trace_id: trace.trace_id.clone(), + cloud_files: cloud_file_list( + trace.data_type.as_deref().unwrap_or(""), + trace.data_type_name.as_deref(), + ), + }) + .collect(); + + match client.batch_register(&org_id, &payload).await { + Ok(response) => { + let registered_ids: HashMap = response + .registered_traces + .into_iter() + .map(|entry| (entry.trace_id.clone(), entry.upload_session_uris)) + .collect(); + let failed_ids: HashMap> = response + .failed_traces + .into_iter() + .map(|entry| (entry.trace_id, entry.error)) + .collect(); + + for trace in &traces { + if let Some(uris) = registered_ids.get(&trace.trace_id) { + // A serialise failure must NOT mark the trace registered + // with a "{}" placeholder β€” that records an empty URI map + // and the uploader later finalises it as 0 bytes uploaded + // (silent data loss). Roll back to pending so the next + // tick re-registers it instead. + let serialised = match serde_json::to_string(uris) { + Ok(serialised) => serialised, + Err(error) => { + tracing::warn!(%error, trace_id = trace.trace_id, "failed to serialise session URIs; rolling back to pending"); + rollback_single_to_pending(store, &trace.trace_id).await; + continue; + } + }; + let update = TraceUpdate { + registration_status: Some(TraceRegistrationStatus::Registered), + upload_session_uris: Some(serialised), + ..TraceUpdate::default() + }; + if let Err(error) = store.update_trace(&trace.trace_id, update).await { + // The backend registered the trace but we couldn't + // persist it; leaving it in `registering` would wedge + // it for the session (no coordinator re-claims that + // state mid-session). Roll back to `pending` so the + // next tick re-claims and re-registers it. + tracing::warn!(%error, trace_id = trace.trace_id, "failed to persist registration outcome; rolling back to pending"); + rollback_single_to_pending(store, &trace.trace_id).await; + continue; + } + // Registered β€” clear any accumulated retry budget. + registration_attempts.remove(&trace.trace_id); + bus.publish(DaemonEvent::TraceRegistered { + trace_id: trace.trace_id.clone(), + recording_index, + }); + } else if let Some(error) = failed_ids.get(&trace.trace_id) { + // Backend rejections are usually transient (e.g. a + // staging burst error); retry under the shared budget. + handle_registration_setback( + store, + registration_attempts, + &trace.trace_id, + error.clone(), + error.as_deref().unwrap_or("backend rejected trace"), + ) + .await; + } else { + // Backend returned neither a registered nor a failed + // entry for this trace; retry under the same bounded + // budget so a persistently-omitted trace can't loop + // forever. + handle_registration_setback( + store, + registration_attempts, + &trace.trace_id, + Some("backend returned no registration outcome".to_string()), + "backend silently dropped trace", + ) + .await; + } + } + } + Err(error) => { + tracing::warn!(%error, recording_index, "batch register request failed"); + rollback_to_pending(store, &traces).await; + } + } + } +} + +/// Promote any traces that are now both registered and written to `queued` and +/// emit `ReadyForUpload` for each. +/// +/// Run on every drain (including the periodic tick) so it doubles as the safety +/// net for the lag between the `TraceWritten` event and the write-behind commit +/// of `write_status`: a pre-registered trace is promoted on whichever drain +/// first sees both states committed, rather than depending on a single event. +async fn publish_ready_traces(store: &Arc, bus: &EventBus) { + match store.promote_ready_traces_to_queued().await { + Ok(ready) => { + for (trace_id, recording_index) in ready { + bus.publish(DaemonEvent::ReadyForUpload { + trace_id, + recording_index, + }); + } + } + Err(error) => { + tracing::warn!(%error, "failed to promote ready traces for upload"); + } + } +} + +async fn rollback_to_pending(store: &Arc, traces: &[TraceRecord]) { + for trace in traces { + rollback_single_to_pending(store, &trace.trace_id).await; + } +} + +/// Apply bounded-retry accounting to a trace the backend did not register β€” +/// either an explicit rejection or a silent omission. Rolls the trace back to +/// `pending` for another attempt, or terminally marks it `failed` once +/// [`MAX_REGISTRATION_ATTEMPTS`] is reached, so a persistently-unregisterable +/// trace can't re-claim and re-POST forever. `error_message` is the reason +/// persisted on terminal failure; `reason` is the human-readable log context. +async fn handle_registration_setback( + store: &Arc, + registration_attempts: &mut HashMap, + trace_id: &str, + error_message: Option, + reason: &str, +) { + let attempts = registration_attempts + .entry(trace_id.to_string()) + .or_insert(0); + *attempts += 1; + if *attempts < MAX_REGISTRATION_ATTEMPTS { + tracing::warn!( + trace_id, + reason, + attempt = *attempts, + "trace registration setback; rolling back to pending for retry" + ); + rollback_single_to_pending(store, trace_id).await; + return; + } + tracing::warn!( + trace_id, + reason, + attempts = *attempts, + "trace registration setback after retry budget exhausted; marking failed" + ); + registration_attempts.remove(trace_id); + let update = TraceUpdate { + registration_status: Some(TraceRegistrationStatus::Failed), + error_message: Some(error_message), + ..TraceUpdate::default() + }; + // If persisting the `failed` status itself fails, the trace would otherwise + // sit in `registering` forever (no coordinator re-claims that state + // mid-session), so roll it back to `pending` for the next tick. + if let Err(persist_error) = store.update_trace(trace_id, update).await { + tracing::warn!(%persist_error, trace_id, "failed to persist registration failure; rolling back to pending"); + rollback_single_to_pending(store, trace_id).await; + } +} + +async fn rollback_single_to_pending(store: &Arc, trace_id: &str) { + let update = TraceUpdate { + registration_status: Some(TraceRegistrationStatus::Pending), + ..TraceUpdate::default() + }; + if let Err(error) = store.update_trace(trace_id, update).await { + tracing::warn!(%error, trace_id, "failed to roll registration status back"); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::api::auth::StaticAuthProvider; + use crate::api::client::ApiClientOptions; + use crate::state::store::TraceUpdate; + use crate::state::{NewRecording, TraceUploadStatus, TraceWriteStatus}; + use std::time::Duration; + use tempfile::TempDir; + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + async fn open_store() -> (SqliteStateStore, TempDir) { + let dir = TempDir::new().unwrap(); + let store = SqliteStateStore::open(&dir.path().join("state.db")) + .await + .unwrap(); + (store, dir) + } + + /// A live-org receiver fixed at `org` for the duration of a test. The + /// sender is leaked so the channel stays open and `borrow()` keeps + /// returning the seeded value. + fn org_rx(org: Option<&str>) -> OrgIdRx { + let (org_tx, org_rx) = tokio::sync::watch::channel(org.map(str::to_string)); + Box::leak(Box::new(org_tx)); + org_rx + } + + /// Seed a recording plus a single written trace under it, returning the + /// local `recording_index`. When `cloud_id` is `Some`, the recording's + /// cloud `recording_id` is persisted (as the start notifier would) so + /// registration finds one; when `None`, the recording has no cloud id yet + /// and registration must defer. + async fn seed_written_trace( + store: &SqliteStateStore, + trace_id: &str, + cloud_id: Option<&str>, + ) -> i64 { + let recording_index = store + .create_recording(NewRecording { + robot_id: Some("robot-1"), + robot_instance: Some(0), + dataset_id: Some("ds-1"), + start_timestamp_ns: 1_700_000_000_000_000_000, + }) + .await + .unwrap() + .recording_index; + if let Some(cloud_id) = cloud_id { + store + .mark_recording_start_notified(recording_index, cloud_id) + .await + .unwrap(); + } + store + .create_trace( + recording_index, + trace_id, + Some("JOINT_POSITIONS"), + Some("arm0"), + ) + .await + .unwrap(); + store + .update_trace( + trace_id, + TraceUpdate { + write_status: Some(TraceWriteStatus::Written), + ..TraceUpdate::default() + }, + ) + .await + .unwrap(); + recording_index + } + + fn client(server: &MockServer) -> Arc { + let auth = Arc::new(StaticAuthProvider::new("test-token")); + let mut options = ApiClientOptions::new(server.uri()); + options.max_backoff = Duration::from_millis(10); + Arc::new(ApiClient::new(options, auth).unwrap()) + } + + #[tokio::test] + async fn successful_registration_persists_session_uri_and_emits_event() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/org/org-1/recording/traces/batch-register")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "registered_traces": [{ + "trace_id": "trace-1", + "upload_session_uris": {"JOINT_POSITIONS/arm0/trace.json": "https://upload/abc"} + }], + "failed_traces": [] + }))) + .expect(1) + .mount(&server) + .await; + + let (store, _dir) = open_store().await; + let recording_index = seed_written_trace(&store, "trace-1", Some("cloud-rec-1")).await; + let bus = EventBus::new(); + let mut subscriber = bus.subscribe(); + let api = client(&server); + + // Drive a single drain directly so the test does not depend on the + // ticker firing: register the batch, then run the promotion sweep that + // emits ReadyForUpload once a trace is both registered and written. + let store_arc = Arc::new(store.clone()); + let claimed = store + .claim_traces_for_registration(BATCH_SIZE, 0.0) + .await + .unwrap(); + submit_batch( + &store_arc, + &bus, + &api, + &org_rx(Some("org-1")), + claimed, + &mut HashMap::new(), + ) + .await; + publish_ready_traces(&store_arc, &bus).await; + + let trace = store.get_trace("trace-1").await.unwrap().unwrap(); + assert_eq!( + trace.registration_status, + TraceRegistrationStatus::Registered + ); + assert_eq!(trace.upload_status, TraceUploadStatus::Queued); + assert!(trace + .upload_session_uris + .as_ref() + .unwrap() + .contains("https://upload/abc")); + + // First two events on the bus are TraceRegistered + ReadyForUpload. + let mut saw_registered = false; + let mut saw_ready = false; + for _ in 0..2 { + match subscriber.recv().await.unwrap() { + DaemonEvent::TraceRegistered { + trace_id, + recording_index: event_index, + } => { + assert_eq!(trace_id, "trace-1"); + assert_eq!(event_index, recording_index); + saw_registered = true; + } + DaemonEvent::ReadyForUpload { + trace_id, + recording_index: event_index, + } => { + assert_eq!(trace_id, "trace-1"); + assert_eq!(event_index, recording_index); + saw_ready = true; + } + other => panic!("unexpected event: {other:?}"), + } + } + assert!(saw_registered); + assert!(saw_ready); + } + + #[tokio::test] + async fn failed_request_rolls_back_to_pending() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/org/org-1/recording/traces/batch-register")) + .respond_with(ResponseTemplate::new(500)) + .mount(&server) + .await; + + let (store, _dir) = open_store().await; + seed_written_trace(&store, "trace-1", Some("cloud-rec-1")).await; + let bus = EventBus::new(); + let api = client(&server); + + let claimed = store + .claim_traces_for_registration(BATCH_SIZE, 0.0) + .await + .unwrap(); + submit_batch( + &Arc::new(store.clone()), + &bus, + &api, + &org_rx(Some("org-1")), + claimed, + &mut HashMap::new(), + ) + .await; + + let trace = store.get_trace("trace-1").await.unwrap().unwrap(); + assert_eq!(trace.registration_status, TraceRegistrationStatus::Pending); + } + + #[tokio::test] + async fn missing_org_id_rolls_back_to_pending() { + let server = MockServer::start().await; + let (store, _dir) = open_store().await; + let recording_index = store + .create_recording(NewRecording { + robot_id: Some("robot-1"), + robot_instance: Some(0), + dataset_id: Some("ds-1"), + start_timestamp_ns: 1_700_000_000_000_000_000, + }) + .await + .unwrap() + .recording_index; + store + .create_trace( + recording_index, + "trace-1", + Some("JOINT_POSITIONS"), + Some("arm"), + ) + .await + .unwrap(); + store + .update_trace( + "trace-1", + TraceUpdate { + write_status: Some(TraceWriteStatus::Written), + ..TraceUpdate::default() + }, + ) + .await + .unwrap(); + let bus = EventBus::new(); + let api = client(&server); + + let claimed = store + .claim_traces_for_registration(BATCH_SIZE, 0.0) + .await + .unwrap(); + submit_batch( + &Arc::new(store.clone()), + &bus, + &api, + &org_rx(None), + claimed, + &mut HashMap::new(), + ) + .await; + + let trace = store.get_trace("trace-1").await.unwrap().unwrap(); + assert_eq!(trace.registration_status, TraceRegistrationStatus::Pending); + } + + #[tokio::test] + async fn defers_registration_when_recording_has_no_cloud_id() { + let server = MockServer::start().await; + // The recording has no cloud id yet, so registration must not POST. + Mock::given(method("POST")) + .and(path("/org/org-1/recording/traces/batch-register")) + .respond_with(ResponseTemplate::new(200)) + .expect(0) + .mount(&server) + .await; + + let (store, _dir) = open_store().await; + // No cloud id seeded: the start notifier hasn't populated one yet. + let recording_index = seed_written_trace(&store, "trace-1", None).await; + assert_eq!( + store + .get_recording(recording_index) + .await + .unwrap() + .unwrap() + .recording_id, + None, + "recording starts with no cloud id" + ); + let bus = EventBus::new(); + let api = client(&server); + + let claimed = store + .claim_traces_for_registration(BATCH_SIZE, 0.0) + .await + .unwrap(); + submit_batch( + &Arc::new(store.clone()), + &bus, + &api, + &org_rx(Some("org-1")), + claimed, + &mut HashMap::new(), + ) + .await; + + // The recording still has no cloud id β€” none is minted locally. + let row = store.get_recording(recording_index).await.unwrap().unwrap(); + assert_eq!( + row.recording_id, None, + "registration must not mint a cloud id" + ); + // The trace is rolled back to pending for a later retry. + let trace = store.get_trace("trace-1").await.unwrap().unwrap(); + assert_eq!(trace.registration_status, TraceRegistrationStatus::Pending); + } + + #[tokio::test] + async fn backend_rejection_retries_then_fails_after_budget() { + // A backend that rejects a trace (returns it in `failed_traces`) is + // treated as transient: the trace is rolled back to `pending` and + // retried up to MAX_REGISTRATION_ATTEMPTS, then marked terminally + // `failed`. Terminally failing on the first rejection would permanently + // wedge the recording (the regression a staging burst-error exposed). + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/org/org-1/recording/traces/batch-register")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "registered_traces": [], + "failed_traces": [{ + "trace_id": "trace-1", + "error": "Unexpected error during registration" + }] + }))) + .mount(&server) + .await; + + let (store, _dir) = open_store().await; + seed_written_trace(&store, "trace-1", Some("cloud-rec-1")).await; + let bus = EventBus::new(); + let api = client(&server); + let store_arc = Arc::new(store.clone()); + let mut attempts = HashMap::new(); + + // Each of the first MAX-1 rejections rolls the trace back to pending so + // the next tick re-claims and retries it. + for attempt in 1..MAX_REGISTRATION_ATTEMPTS { + let claimed = store + .claim_traces_for_registration(BATCH_SIZE, 0.0) + .await + .unwrap(); + assert_eq!(claimed.len(), 1, "the pending trace is re-claimable"); + submit_batch( + &store_arc, + &bus, + &api, + &org_rx(Some("org-1")), + claimed, + &mut attempts, + ) + .await; + let trace = store.get_trace("trace-1").await.unwrap().unwrap(); + assert_eq!( + trace.registration_status, + TraceRegistrationStatus::Pending, + "attempt {attempt} (< budget) must retry, not terminate" + ); + } + + // The final rejection exhausts the budget β†’ terminal failure. + let claimed = store + .claim_traces_for_registration(BATCH_SIZE, 0.0) + .await + .unwrap(); + submit_batch( + &store_arc, + &bus, + &api, + &org_rx(Some("org-1")), + claimed, + &mut attempts, + ) + .await; + let trace = store.get_trace("trace-1").await.unwrap().unwrap(); + assert_eq!( + trace.registration_status, + TraceRegistrationStatus::Failed, + "an exhausted retry budget terminates the trace" + ); + assert_eq!( + trace.error_message.as_deref(), + Some("Unexpected error during registration") + ); + } +} diff --git a/rust/data_daemon/src/cloud/coordinators/status.rs b/rust/data_daemon/src/cloud/coordinators/status.rs new file mode 100644 index 000000000..d52f06127 --- /dev/null +++ b/rust/data_daemon/src/cloud/coordinators/status.rs @@ -0,0 +1,421 @@ +//! Debounced trace status updater. +//! +//! The uploader pushes [`StatusUpdate`] entries onto an unbounded mpsc; the +//! updater coalesces them into per-recording batches and flushes when one of +//! the following becomes true: +//! +//! - `MAX_BATCH_SIZE` (50) traces are queued. +//! - `IN_PROGRESS_MAX_WAIT` (4 s) elapsed since the batch opened. +//! - A completed-trace entry is in the batch and `COMPLETION_MAX_WAIT` +//! (0.2 s) has elapsed. + +use std::collections::HashMap; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use tokio::sync::broadcast; +use tokio::sync::mpsc; +use tokio::task::{JoinHandle, JoinSet}; +use tokio::time::{interval, MissedTickBehavior}; + +use crate::api::models::{TraceStatusUpdate, TraceStatusValue}; +use crate::api::ApiClient; +use crate::cloud::OrgIdRx; +use crate::lifecycle::shutdown::ShutdownSignal; +use crate::state::{RecordingRow, SqliteStateStore, StateStore}; + +/// Maximum number of traces to coalesce before flushing. +pub const MAX_BATCH_SIZE: usize = 50; +/// Maximum age of an in-progress batch before flushing. +pub const IN_PROGRESS_MAX_WAIT: Duration = Duration::from_secs(4); +/// Maximum age of a batch containing a completed trace. +pub const COMPLETION_MAX_WAIT: Duration = Duration::from_millis(200); +/// How long to wait before re-attempting a flush when no current `org_id` is +/// configured yet, or the recording's cloud id hasn't been assigned. Picked +/// larger than the `MAX_WAIT` triggers above so a perpetually-missing org +/// doesn't spin the executor while waiting for login / org selection. +const ORG_RESOLVE_RETRY_BACKOFF: Duration = Duration::from_secs(2); + +/// Update emitted by the uploader for the status coordinator to forward to +/// the backend. +#[derive(Debug, Clone)] +pub struct StatusUpdate { + /// Recording the trace belongs to (local `recording_index`). + pub recording_index: i64, + /// Trace identifier. + pub trace_id: String, + /// Bytes uploaded so far. + pub uploaded_bytes: i64, + /// `true` when this update represents an `UPLOAD_COMPLETE` transition. + pub completed: bool, + /// Total bytes once finalised; required when `completed` is `true`. + pub total_bytes: Option, +} + +impl StatusUpdate { + /// Build an in-progress (bytes-only) status update. + pub fn in_progress(recording_index: i64, trace_id: String, uploaded_bytes: i64) -> Self { + Self { + recording_index, + trace_id, + uploaded_bytes, + completed: false, + total_bytes: None, + } + } + + /// Build a completion update (status=UPLOAD_COMPLETE). + pub fn completed(recording_index: i64, trace_id: String, total_bytes: i64) -> Self { + Self { + recording_index, + trace_id, + uploaded_bytes: total_bytes, + completed: true, + total_bytes: Some(total_bytes), + } + } +} + +/// Handle returned by [`spawn_status_updater`]. +pub struct StatusUpdaterHandle { + join: JoinHandle<()>, +} + +impl StatusUpdaterHandle { + /// Wait for the status updater to exit. + pub async fn join(self) { + if let Err(error) = self.join.await { + tracing::warn!(?error, "status updater join failed"); + } + } +} + +/// Spawn the status updater. Returns the mpsc sender used by the uploader. +pub fn spawn_status_updater( + store: SqliteStateStore, + client: Arc, + org_rx: OrgIdRx, + inbox: mpsc::UnboundedReceiver, + shutdown_rx: broadcast::Receiver, +) -> StatusUpdaterHandle { + let store = Arc::new(store); + let join = tokio::spawn(async move { + run(store, client, org_rx, inbox, shutdown_rx).await; + }); + StatusUpdaterHandle { join } +} + +async fn run( + store: Arc, + client: Arc, + org_rx: OrgIdRx, + mut inbox: mpsc::UnboundedReceiver, + mut shutdown_rx: broadcast::Receiver, +) { + // Per-recording pending batches keyed by recording_index; preserves the + // last-seen update per trace (later updates supersede earlier ones). + let mut pending: HashMap = HashMap::new(); + // Flush tasks running in the background β€” spawned by flush_due and the + // max-batch path so the select loop never blocks on HTTP round-trips. + let mut background_flushes: JoinSet> = JoinSet::new(); + // Periodic flush ticker β€” fires on the STATUS_FLUSH cadence regardless of inbox load. + let mut flush_ticker = interval(crate::intervals::STATUS_FLUSH); + flush_ticker.set_missed_tick_behavior(MissedTickBehavior::Skip); + loop { + tokio::select! { + biased; + signal = shutdown_rx.recv() => { + tracing::debug!(?signal, "status updater shutting down"); + // Let in-flight flushes finish; re-queue any deferred batches + // so flush_all gets a chance to send them. + while let Some(flush_result) = background_flushes.join_next().await { + if let Ok(Some(deferred_batch)) = flush_result { + pending.insert(deferred_batch.recording_index, deferred_batch); + } + } + flush_all(&store, &client, &org_rx, &mut pending).await; + break; + } + // Drain completed background flush tasks without blocking the loop. + Some(flush_result) = background_flushes.join_next(), + if !background_flushes.is_empty() => + { + match flush_result { + Ok(Some(deferred_batch)) => { + pending.insert(deferred_batch.recording_index, deferred_batch); + } + Ok(None) => {} + Err(panic_err) => { + tracing::warn!(?panic_err, "flush_batch task panicked"); + } + } + } + _ = flush_ticker.tick() => { + flush_due(&store, &client, &org_rx, &mut pending, &mut background_flushes); + } + maybe_update = inbox.recv() => { + let Some(update) = maybe_update else { break }; + let recording_index = update.recording_index; + let batch = pending + .entry(recording_index) + .or_insert_with(|| RecordingBatch::new(recording_index)); + batch.add(update); + if batch.size() >= MAX_BATCH_SIZE { + if let Some(batch) = pending.remove(&recording_index) { + background_flushes.spawn(flush_batch( + Arc::clone(&store), + Arc::clone(&client), + org_rx.clone(), + batch, + )); + } + } + } + } + } +} + +/// Spawn a background task for every batch whose deadline has passed. +/// Synchronous β€” never blocks the select loop on HTTP I/O. +fn flush_due( + store: &Arc, + client: &Arc, + org_rx: &OrgIdRx, + pending: &mut HashMap, + background_flushes: &mut JoinSet>, +) { + let now = Instant::now(); + let due_ids: Vec = pending + .iter() + .filter(|(_, batch)| now >= batch.deadline()) + .map(|(recording_index, _)| *recording_index) + .collect(); + for recording_index in &due_ids { + if let Some(batch) = pending.remove(recording_index) { + background_flushes.spawn(flush_batch( + Arc::clone(store), + Arc::clone(client), + org_rx.clone(), + batch, + )); + } + } +} + +async fn flush_all( + store: &Arc, + client: &Arc, + org_rx: &OrgIdRx, + pending: &mut HashMap, +) { + let mut tasks: JoinSet> = JoinSet::new(); + for (_, batch) in pending.drain() { + tasks.spawn(flush_batch( + Arc::clone(store), + Arc::clone(client), + org_rx.clone(), + batch, + )); + } + // Deferred batches (org_id / cloud id not yet known) can't be sent and are + // dropped on shutdown. The persisted trace rows and the final reclaim are + // the source of truth that recovers state; the live per-trace progress in + // these dropped batches is forfeited on shutdown. Count them so a + // surprising number is visible rather than silent. + let mut dropped = 0usize; + while let Some(result) = tasks.join_next().await { + match result { + Ok(Some(_deferred_batch)) => dropped += 1, + Ok(None) => {} + Err(panic_err) => { + tracing::warn!(?panic_err, "flush_batch task panicked on shutdown"); + } + } + } + if dropped > 0 { + tracing::info!( + dropped, + "dropped deferred status batches on shutdown (no org/cloud id yet; \ + persisted rows remain source-of-truth)" + ); + } +} + +/// Flush a single recording's batch. Returns the batch back if the recording's +/// `org_id` / cloud `recording_id` isn't available yet (caller should re-insert +/// with deferred deadline), or `None` when the flush was sent (or the batch was +/// empty). +async fn flush_batch( + store: Arc, + client: Arc, + org_rx: OrgIdRx, + mut batch: RecordingBatch, +) -> Option { + let recording_index = batch.recording_index; + let row = match resolve_recording(&store, recording_index).await { + Some(row) => row, + None => { + // Re-queue with a fresh `opened_at` pushed + // `ORG_RESOLVE_RETRY_BACKOFF` into the future so the next + // `flush_due` skips this batch until the start notifier has + // populated the cloud id. Without this, a missing field pins + // `deadline()` permanently in the past and the select loop becomes + // a busy-wait until the row is ready. + batch.defer(ORG_RESOLVE_RETRY_BACKOFF); + return Some(batch); + } + }; + let (Some(org_id), Some(recording_id)) = (org_rx.borrow().clone(), row.recording_id) else { + batch.defer(ORG_RESOLVE_RETRY_BACKOFF); + return Some(batch); + }; + let updates = batch.into_updates(); + if updates.is_empty() { + return None; + } + let updates_payload: HashMap = updates.into_iter().collect(); + match client + .batch_update_traces(&org_id, &recording_id, &updates_payload) + .await + { + Ok(()) => { + tracing::debug!( + recording_index, + recording_id, + count = updates_payload.len(), + "flushed status updates" + ); + } + Err(error) => { + tracing::warn!(%error, recording_index, recording_id, count = updates_payload.len(), "status batch update failed"); + } + } + None +} + +async fn resolve_recording( + store: &Arc, + recording_index: i64, +) -> Option { + match store.get_recording(recording_index).await { + Ok(Some(row)) => Some(row), + Ok(None) => None, + Err(error) => { + tracing::warn!(%error, recording_index, "status updater could not read recording row"); + None + } + } +} + +#[derive(Debug)] +struct RecordingBatch { + recording_index: i64, + opened_at: Instant, + has_completion: bool, + updates: HashMap, +} + +impl RecordingBatch { + fn new(recording_index: i64) -> Self { + Self { + recording_index, + opened_at: Instant::now(), + has_completion: false, + updates: HashMap::new(), + } + } + + fn add(&mut self, update: StatusUpdate) { + let entry = self.updates.entry(update.trace_id).or_default(); + entry.uploaded_bytes = Some(update.uploaded_bytes); + if update.completed { + entry.status = Some(TraceStatusValue::UploadComplete); + entry.total_bytes = update.total_bytes.or(entry.total_bytes); + self.has_completion = true; + } + } + + fn size(&self) -> usize { + self.updates.len() + } + + fn deadline(&self) -> Instant { + if self.has_completion { + self.opened_at + COMPLETION_MAX_WAIT + } else { + self.opened_at + IN_PROGRESS_MAX_WAIT + } + } + + /// Slide `opened_at` forward by `delay` so the next deadline tick lands + /// at least `delay` from now. Used by the org-id retry path to space + /// out flush attempts when the recording's org isn't yet stamped. + fn defer(&mut self, delay: Duration) { + // Pin the new `opened_at` so that whatever the current deadline + // policy returns is at least `delay` from now. + let target = Instant::now() + delay; + let policy_wait = if self.has_completion { + COMPLETION_MAX_WAIT + } else { + IN_PROGRESS_MAX_WAIT + }; + self.opened_at = target.checked_sub(policy_wait).unwrap_or(target); + } + + fn into_updates(self) -> Vec<(String, TraceStatusUpdate)> { + self.updates.into_iter().collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn batch_records_completion_flag() { + let mut batch = RecordingBatch::new(1); + batch.add(StatusUpdate::in_progress(1, "t1".to_string(), 1)); + assert!(!batch.has_completion); + batch.add(StatusUpdate::completed(1, "t1".to_string(), 100)); + assert!(batch.has_completion); + // The latest update for the same trace_id overrides bytes_uploaded. + let entry = batch.updates.get("t1").unwrap(); + assert_eq!(entry.uploaded_bytes, Some(100)); + assert!(matches!( + entry.status, + Some(TraceStatusValue::UploadComplete) + )); + } + + #[test] + fn defer_slides_deadline_forward_into_future() { + // The defer path is invoked when no current org_id is configured + // yet. Without it the batch's deadline stays in the past + // and the select loop spins; with it the next deadline is at + // least `delay` from now. + let mut batch = RecordingBatch::new(1); + batch.add(StatusUpdate::in_progress(1, "t".to_string(), 1)); + // Force the batch's apparent deadline well into the past. + batch.opened_at = Instant::now() - Duration::from_secs(60); + assert!(batch.deadline() < Instant::now()); + + let delay = Duration::from_secs(2); + let before = Instant::now(); + batch.defer(delay); + let deadline = batch.deadline(); + // `deadline` should be at least `delay` from `before` (timing + // slop ~50ms is generous for CI). The exact value is `before + + // delay` because the batch is in-progress (IN_PROGRESS_MAX_WAIT + // is subtracted then re-added by deadline()). + assert!(deadline >= before + delay - Duration::from_millis(50)); + } + + #[test] + fn completion_deadline_is_shorter() { + let mut batch = RecordingBatch::new(1); + let baseline = batch.opened_at + IN_PROGRESS_MAX_WAIT; + assert!(batch.deadline() <= baseline); + batch.add(StatusUpdate::completed(1, "t".to_string(), 1)); + assert!(batch.deadline() < baseline); + } +} diff --git a/rust/data_daemon/src/cloud/coordinators/upload_transfer.rs b/rust/data_daemon/src/cloud/coordinators/upload_transfer.rs new file mode 100644 index 000000000..8260ffd80 --- /dev/null +++ b/rust/data_daemon/src/cloud/coordinators/upload_transfer.rs @@ -0,0 +1,624 @@ +//! Wire-level resumable-upload transfer mechanics. +//! +//! The per-file and per-chunk PUT machinery the upload coordinator +//! ([`super::uploader`]) drives: [`upload_one_file`] streams a single on-disk +//! artefact as 16 MiB chunks to the GCS resumable session URI, handling the +//! 308-continue, 410-session-expired, and 401-auth-refresh transitions, and +//! verifies the server-side CRC32C checksum on completion. + +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use base64::engine::general_purpose::STANDARD as BASE64; +use base64::Engine; +use bytes::Bytes; +use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION, CONTENT_LENGTH, CONTENT_RANGE}; +use reqwest::StatusCode; +use tokio::fs::File; +use tokio::io::{AsyncReadExt, AsyncSeekExt}; +use tokio::time::{sleep, timeout}; + +use super::status::StatusUpdate; +use crate::api::ApiClient; +use crate::cloud::OrgIdRx; +use crate::state::{DaemonEvent, EventBus, TraceRecord, TraceWriteHandle}; + +/// Chunk size used for resumable uploads. +/// +/// Must be a multiple of 256 KiB (the GCS resumable-upload requirement for +/// every non-final chunk); 16 MiB = 64 Γ— 256 KiB. Larger chunks raise peak +/// throughput on fast links (fewer sequential PUTs) at the cost of coarser +/// upload-progress granularity, higher per-upload memory, and a higher minimum +/// sustained speed: a chunk must transfer within `CHUNK_UPLOAD_TIMEOUT` (200 s), +/// so the minimum sustained speed is `CHUNK_SIZE / CHUNK_UPLOAD_TIMEOUT` +/// (16 MiB / 200 s β‰ˆ 0.67 Mbit/s). +pub const CHUNK_SIZE: usize = 16 * 1024 * 1024; +/// Persist `bytes_uploaded` to SQLite only every Nth chunk (plus once when the +/// file finishes), instead of every chunk. The per-chunk write took the store's +/// single `write_guard` once per 16 MiB and, at `MAX_CONCURRENT_UPLOADS` +/// in-flight files, serialised all uploads against each other and against the +/// notifiers/progress reporter β€” eroding the stop-recording SLA. Resume +/// correctness does not depend on this value: the 308-continue path +/// (`parse_resume_offset`) re-derives the committed offset from the server on +/// restart, so a stale DB offset only costs re-sending at most this many chunks. +const PROGRESS_PERSIST_EVERY_CHUNKS: u32 = 4; +/// Cap on the exponential backoff for transient upload failures. +const MAX_BACKOFF: Duration = Duration::from_secs(300); +/// Maximum retries for a single chunk. +const MAX_RETRIES: u32 = 5; +/// Hard deadline for a single chunk PUT. Belt-and-braces over the reqwest +/// client-level timeout, which can silently fail to fire for direct GCS +/// resumable session URI uploads. +const CHUNK_UPLOAD_TIMEOUT: Duration = Duration::from_secs(200); + +/// Outcome of [`upload_one_file`]. Carries a refreshed `session_uri` when the +/// server expired the original one mid-upload so the caller can persist it +/// for restart-resume. +pub(crate) struct UploadFileOutcome { + pub(crate) bytes_uploaded: i64, + pub(crate) final_session_uri: Option, +} + +#[allow(clippy::too_many_arguments)] +pub(crate) async fn upload_one_file( + client: &Arc, + trace_writer: &TraceWriteHandle, + bus: &EventBus, + org_rx: &OrgIdRx, + status_tx: &tokio::sync::mpsc::UnboundedSender, + trace: &TraceRecord, + recording_id: &str, + local_path: &std::path::Path, + cloud_filepath: &str, + content_type: &str, + session_uri: String, +) -> Result { + let metadata = tokio::fs::metadata(local_path) + .await + .map_err(|error| format!("stat {} failed: {error}", local_path.display()))?; + let total_bytes = metadata.len(); + let original_uri = session_uri.clone(); + if total_bytes == 0 { + return Ok(UploadFileOutcome { + bytes_uploaded: 0, + final_session_uri: None, + }); + } + + let mut file = File::open(local_path) + .await + .map_err(|error| format!("open {} failed: {error}", local_path.display()))?; + let mut offset: u64 = 0; + let mut crc: u32 = 0; + let mut server_crc: Option = None; + let mut session_uri = session_uri; + let recording_index = trace.recording_index; + let trace_id = trace.trace_id.clone(); + let Some(org_id) = org_rx.borrow().clone() else { + return Err("no current org_id configured; cannot refresh session URI".to_string()); + }; + + tracing::info!( + trace_id, + path = %local_path.display(), + bytes = total_bytes, + "starting file upload" + ); + let upload_started = Instant::now(); + let mut chunks_since_persist: u32 = 0; + let mut last_persisted_offset: u64 = 0; + // Bound consecutive iterations that make no forward progress (e.g. a peer + // returning repeated zero-advance 308s) so a misbehaving server cannot wedge + // this upload task β€” and its concurrency permit β€” in an infinite loop. + let mut stalled_iterations: u32 = 0; + while offset < total_bytes { + let offset_before = offset; + let chunk_end = (offset + CHUNK_SIZE as u64).min(total_bytes) - 1; + let chunk_len = (chunk_end - offset + 1) as usize; + let mut buffer = vec![0u8; chunk_len]; + file.seek(std::io::SeekFrom::Start(offset)) + .await + .map_err(|error| format!("seek failed: {error}"))?; + file.read_exact(&mut buffer) + .await + .map_err(|error| format!("read failed: {error}"))?; + let chunk = Bytes::from(buffer); + let is_final = chunk_end + 1 == total_bytes; + + let outcome = put_chunk( + client, + &session_uri, + chunk.clone(), + offset, + chunk_end, + total_bytes, + is_final, + ) + .await?; + match outcome { + PutChunkOutcome::Accepted { headers, body } => { + crc = crc32c::crc32c_append(crc, &chunk); + if is_final { + server_crc = extract_server_crc32c(&headers, &body); + } + offset += chunk_len as u64; + } + PutChunkOutcome::Incomplete { headers } => { + // 308 β€” the server reports how much it actually committed via + // the Range header. GCS commits in 256 KiB units, so it can + // accept only a *prefix* of a 16 MiB chunk. We must hash exactly + // the committed prefix: hashing the whole chunk then resuming at + // `server_offset` re-reads β€” and re-hashes β€” the uncommitted + // tail on the next iteration, double-counting it into the local + // checksum and failing the final compare with a spurious mismatch. + let server_offset = parse_resume_offset(&headers).unwrap_or(offset); + match resume_decision(offset, chunk_len, server_offset) { + ResumeDecision::Behind => { + return Err(format!( + "server resume offset {server_offset} is behind local offset \ + {offset}; refusing to corrupt {}", + local_path.display() + )); + } + ResumeDecision::Ahead { new_offset } => { + // Server has bytes we didn't send this session (e.g. a + // prior session) and can't re-hash β€” accept its view but + // flag the local checksum untrustworthy. + tracing::warn!( + server_offset, + local_offset = offset + chunk_len as u64, + path = %local_path.display(), + "server resume offset is ahead of local; skipping local checksum" + ); + crc = crc32c::crc32c_append(crc, &chunk); + server_crc = None; + offset = new_offset; + } + ResumeDecision::Committed { + hash_len, + new_offset, + } => { + // Fold in only the committed prefix; the tail is re-sent + // (and hashed) on the next read, so every byte is hashed + // exactly once. + crc = crc32c::crc32c_append(crc, &chunk[..hash_len]); + offset = new_offset; + } + } + } + PutChunkOutcome::SessionExpired => { + tracing::info!( + trace_id, + path = %local_path.display(), + "upload session expired; requesting fresh URI" + ); + match client + .fetch_resumable_upload_url(&org_id, recording_id, cloud_filepath, content_type) + .await + { + Ok(new_uri) => { + session_uri = new_uri; + // A new session means the server has zero bytes for + // this file; restart from offset 0 and rehash. + offset = 0; + crc = 0; + server_crc = None; + continue; + } + Err(error) => { + return Err(format!("failed to fetch fresh session URI: {error}")); + } + } + } + PutChunkOutcome::Failed { status, body } => { + return Err(format!( + "upload failed with HTTP {status} for {}: {body}", + local_path.display() + )); + } + } + + if offset > offset_before { + stalled_iterations = 0; + } else { + stalled_iterations += 1; + if stalled_iterations >= MAX_RETRIES { + return Err(format!( + "upload of {} stalled at offset {offset}: server reported no \ + progress after {MAX_RETRIES} consecutive attempts", + local_path.display() + )); + } + } + + bus.publish(DaemonEvent::UploadProgress { + trace_id: trace_id.clone(), + recording_index, + bytes_uploaded: offset as i64, + total_bytes: Some(total_bytes as i64), + }); + let _ = status_tx.send(StatusUpdate::in_progress( + recording_index, + trace_id.clone(), + offset as i64, + )); + // Persist the rolling progress on a coarse cadence (not every chunk): + // the in-memory bus/status updates above are debounced downstream, and + // only the SQLite write contends on the shared write_guard. Resume + // correctness comes from the server's 308 offset, not this row. + chunks_since_persist += 1; + if chunks_since_persist >= PROGRESS_PERSIST_EVERY_CHUNKS { + persist_upload_offset(trace_writer, &trace_id, offset); + chunks_since_persist = 0; + last_persisted_offset = offset; + } + } + + // Persist the final offset once so the DB row reflects the completed bytes + // even if the last persisted checkpoint was several chunks back. + if offset != last_persisted_offset { + persist_upload_offset(trace_writer, &trace_id, offset); + } + + tracing::info!( + trace_id, + path = %local_path.display(), + bytes = total_bytes, + elapsed_ms = upload_started.elapsed().as_millis(), + "file upload complete" + ); + if let Some(expected) = server_crc { + if expected != crc { + return Err(format!( + "crc32c mismatch for {}: local={crc:#010x} server={expected:#010x}", + local_path.display() + )); + } + } + let final_session_uri = (session_uri != original_uri).then_some(session_uri); + Ok(UploadFileOutcome { + bytes_uploaded: total_bytes as i64, + final_session_uri, + }) +} + +/// Persist the rolling `bytes_uploaded` checkpoint for a trace via the +/// coalescing write-behind β€” fire-and-forget, so a burst of concurrent uploads +/// collapses to one batched row write per flush tick instead of a synchronous +/// transaction each. A missed checkpoint only costs re-sending a few chunks on +/// restart, never correctness (resume uses the server's 308 offset). +fn persist_upload_offset(trace_writer: &TraceWriteHandle, trace_id: &str, offset: u64) { + trace_writer.upload_progress(trace_id, offset as i64); +} + +/// Outcome of a single PUT to the resumable session URI. Returned by +/// [`put_chunk`] so [`upload_one_file`] can dispatch on it without re-parsing +/// status codes. +enum PutChunkOutcome { + /// 2xx β€” chunk accepted. Headers/body carry the final response on the + /// last chunk (the server-side CRC32C lives here). + Accepted { headers: HeaderMap, body: String }, + /// 308 β€” chunk accepted but the server wants more bytes. The Range + /// header tells us where it is. + Incomplete { headers: HeaderMap }, + /// 410/404 β€” the resumable session is gone. The caller must call + /// `/resumable_upload_url` to obtain a fresh one. + SessionExpired, + /// Any other non-retryable status; the caller surfaces it as a hard + /// error and lets the upload coordinator roll the trace to `retrying`. + Failed { status: StatusCode, body: String }, +} + +async fn put_chunk( + client: &Arc, + session_uri: &str, + chunk: Bytes, + chunk_start: u64, + chunk_end: u64, + total_bytes: u64, + is_final: bool, +) -> Result { + let mut headers = HeaderMap::new(); + let content_range = if is_final { + format!("bytes {chunk_start}-{chunk_end}/{total_bytes}") + } else { + format!("bytes {chunk_start}-{chunk_end}/*") + }; + headers.insert( + CONTENT_RANGE, + HeaderValue::from_str(&content_range).unwrap(), + ); + headers.insert(CONTENT_LENGTH, HeaderValue::from(chunk.len() as u64)); + + let mut attempt: u32 = 0; + let mut refreshed_auth = false; + loop { + let bearer = match client.auth().bearer_token().await { + Ok(token) => token, + Err(error) => { + tracing::warn!(%error, "uploader could not read auth token"); + return Err(format!("auth load failed: {error}")); + } + }; + let mut request_headers = headers.clone(); + request_headers.insert( + AUTHORIZATION, + HeaderValue::from_str(&format!("Bearer {bearer}")) + .map_err(|error| format!("auth header invalid: {error}"))?, + ); + + // `Bytes` is cheaply cloneable (Arc-backed), so re-sending the + // same chunk on retry is a refcount bump, not a 16 MiB copy. + let request = client + .raw_client() + .put(session_uri) + .headers(request_headers) + .body(chunk.clone()) + .build() + .map_err(|error| format!("failed to build request: {error}"))?; + tracing::debug!( + attempt, + bytes = chunk.len(), + chunk_start, + chunk_end, + "sending upload chunk" + ); + let chunk_started = Instant::now(); + let response = + match timeout(CHUNK_UPLOAD_TIMEOUT, client.raw_client().execute(request)).await { + Ok(Ok(response)) => response, + Ok(Err(error)) => { + if attempt + 1 >= MAX_RETRIES { + return Err(format!("transport error: {error}")); + } + attempt += 1; + tracing::warn!(%error, attempt, "upload chunk transport error; retrying"); + sleep(backoff(attempt)).await; + continue; + } + Err(_elapsed) => { + tracing::warn!( + attempt, + timeout_secs = CHUNK_UPLOAD_TIMEOUT.as_secs(), + bytes = chunk.len(), + "upload chunk PUT timed out; retrying" + ); + if attempt + 1 >= MAX_RETRIES { + return Err(format!( + "chunk PUT timed out after {}s ({MAX_RETRIES} attempts exhausted)", + CHUNK_UPLOAD_TIMEOUT.as_secs() + )); + } + attempt += 1; + sleep(backoff(attempt)).await; + continue; + } + }; + tracing::debug!( + elapsed_ms = chunk_started.elapsed().as_millis(), + bytes = chunk.len(), + status = response.status().as_u16(), + "upload chunk response received" + ); + + let status = response.status(); + let response_headers = response.headers().clone(); + let body = response.text().await.unwrap_or_default(); + + if status == StatusCode::UNAUTHORIZED && !refreshed_auth { + if let Err(error) = client.auth().reload().await { + return Err(format!("auth reload failed: {error}")); + } + refreshed_auth = true; + continue; + } + if status.is_success() { + return Ok(PutChunkOutcome::Accepted { + headers: response_headers, + body, + }); + } + if status.as_u16() == 308 { + return Ok(PutChunkOutcome::Incomplete { + headers: response_headers, + }); + } + if matches!(status.as_u16(), 410 | 404) { + return Ok(PutChunkOutcome::SessionExpired); + } + if matches!(status.as_u16(), 429 | 500 | 502 | 503 | 504) && attempt + 1 < MAX_RETRIES { + attempt += 1; + tracing::warn!(%status, attempt, "retrying upload chunk after transient failure"); + sleep(backoff(attempt)).await; + continue; + } + return Ok(PutChunkOutcome::Failed { status, body }); + } +} + +fn backoff(attempt: u32) -> Duration { + let secs = 2u64.saturating_pow(attempt.saturating_sub(1)); + Duration::from_secs(secs.min(MAX_BACKOFF.as_secs())) +} + +fn parse_resume_offset(headers: &HeaderMap) -> Option { + let value = headers.get("range")?.to_str().ok()?; + let last = value.split('-').nth(1)?; + let last_byte: u64 = last.parse().ok()?; + Some(last_byte + 1) +} + +/// How a 308's committed `server_offset` reconciles against the just-sent chunk +/// `[offset, offset + chunk_len)`. +#[derive(Debug, PartialEq, Eq)] +enum ResumeDecision { + /// Server is behind our local offset β€” would corrupt the object; abort. + Behind, + /// Server is ahead of anything we sent this session (bytes we can't + /// re-hash); accept its offset but treat the local checksum as unusable. + Ahead { new_offset: u64 }, + /// Server committed `hash_len` bytes of this chunk; fold exactly that prefix + /// into the running checksum and resume from `new_offset`. + Committed { hash_len: usize, new_offset: u64 }, +} + +/// Decide how many bytes of the just-sent chunk the running checksum should absorb +/// after a 308, given the server's committed `server_offset`. Hashing only the +/// committed prefix is what keeps every byte hashed exactly once across a +/// partial (sub-chunk) commit and the resend of its tail. +fn resume_decision(offset: u64, chunk_len: usize, server_offset: u64) -> ResumeDecision { + if server_offset < offset { + ResumeDecision::Behind + } else if server_offset > offset + chunk_len as u64 { + ResumeDecision::Ahead { + new_offset: server_offset, + } + } else { + ResumeDecision::Committed { + hash_len: (server_offset - offset) as usize, + new_offset: server_offset, + } + } +} + +/// Extract the server's CRC32C for the completed object as a `u32`. +/// +/// GCS reports CRC32C as base64 of the 4-byte big-endian checksum, via the +/// `x-goog-hash` header (`crc32c=…,md5=…`, components in arbitrary order) on a +/// resumable PUT and via the `crc32c` field of the JSON object resource. Unlike +/// `md5Hash`, CRC32C is present on every object β€” including composite objects β€” +/// so the completion check can never be silently skipped. +fn extract_server_crc32c(headers: &HeaderMap, body: &str) -> Option { + let decode = |b64: &str| -> Option { + let bytes = BASE64.decode(b64).ok()?; + Some(u32::from_be_bytes( + <[u8; 4]>::try_from(bytes.as_slice()).ok()?, + )) + }; + if let Some(text) = headers + .get("x-goog-hash") + .and_then(|value| value.to_str().ok()) + { + for part in text.split(',') { + if let Some(b64) = part.trim().strip_prefix("crc32c=") { + return decode(b64); + } + } + } + if let Ok(json) = serde_json::from_str::(body) { + if let Some(b64) = json.get("crc32c").and_then(|value| value.as_str()) { + return decode(b64); + } + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_resume_offset_uses_last_byte_plus_one() { + // 308 carries a `Range: bytes=0-` header; the offset is + // ` + 1`. The 308 commit path keys off this so anyone + // refactoring it later sees an explicit covering test. + let mut headers = HeaderMap::new(); + headers.insert("range", HeaderValue::from_static("bytes=0-99")); + assert_eq!(parse_resume_offset(&headers), Some(100)); + let mut empty = HeaderMap::new(); + empty.insert("range", HeaderValue::from_static("bytes=*")); + assert_eq!(parse_resume_offset(&empty), None); + } + + #[test] + fn resume_full_chunk_commit_hashes_whole_chunk() { + // Server committed the entire chunk β†’ hash all of it, advance fully. + assert_eq!( + resume_decision(0, CHUNK_SIZE, CHUNK_SIZE as u64), + ResumeDecision::Committed { + hash_len: CHUNK_SIZE, + new_offset: CHUNK_SIZE as u64, + } + ); + } + + #[test] + fn resume_partial_commit_hashes_only_committed_prefix() { + // M7 regression: GCS commits in 256 KiB units, so a 16 MiB chunk can be + // committed only up to, say, 16 MiB βˆ’ 256 KiB. We must hash exactly that + // committed prefix β€” NOT the whole chunk β€” or the re-sent tail is hashed + // twice and the final checksum spuriously mismatches. + let committed = (CHUNK_SIZE - 256 * 1024) as u64; + assert_eq!( + resume_decision(0, CHUNK_SIZE, committed), + ResumeDecision::Committed { + hash_len: committed as usize, + new_offset: committed, + } + ); + } + + #[test] + fn resume_zero_advance_hashes_nothing() { + // Server has nothing yet (missing/zero Range) β†’ hash nothing, retry the + // same offset; otherwise the whole chunk would be double-hashed. + assert_eq!( + resume_decision(100, CHUNK_SIZE, 100), + ResumeDecision::Committed { + hash_len: 0, + new_offset: 100, + } + ); + } + + #[test] + fn resume_ahead_marks_checksum_untrustworthy() { + assert_eq!( + resume_decision(0, CHUNK_SIZE, CHUNK_SIZE as u64 + 1), + ResumeDecision::Ahead { + new_offset: CHUNK_SIZE as u64 + 1, + } + ); + } + + #[test] + fn resume_behind_is_a_corruption_guard() { + assert_eq!(resume_decision(100, CHUNK_SIZE, 50), ResumeDecision::Behind); + } + + #[test] + fn crc32c_matches_known_vector() { + // Castagnoli CRC32C of the standard check string; guards against a + // future swap to the wrong (zlib/ISO) polynomial, which would compile + // fine but never match a GCS-reported checksum. + assert_eq!(crc32c::crc32c(b"123456789"), 0xE306_9283); + } + + #[test] + fn extract_server_crc32c_reads_x_goog_hash_in_any_order() { + let expected = crc32c::crc32c(b"hello world"); + let b64 = BASE64.encode(expected.to_be_bytes()); + let mut headers = HeaderMap::new(); + // md5 first, crc32c second β€” component order is arbitrary, md5 ignored. + let value = format!("md5=ignored, crc32c={b64}"); + headers.insert("x-goog-hash", HeaderValue::from_str(&value).unwrap()); + assert_eq!(extract_server_crc32c(&headers, ""), Some(expected)); + } + + #[test] + fn extract_server_crc32c_falls_back_to_json_body() { + let expected = crc32c::crc32c(b"resumable-payload"); + let b64 = BASE64.encode(expected.to_be_bytes()); + let body = format!(r#"{{"crc32c":"{b64}","md5Hash":"ignored"}}"#); + assert_eq!( + extract_server_crc32c(&HeaderMap::new(), &body), + Some(expected) + ); + } + + #[test] + fn extract_server_crc32c_absent_is_none() { + // No crc32c anywhere β†’ None β†’ completion check is skipped, not failed. + assert_eq!(extract_server_crc32c(&HeaderMap::new(), ""), None); + } +} diff --git a/rust/data_daemon/src/cloud/coordinators/uploader.rs b/rust/data_daemon/src/cloud/coordinators/uploader.rs new file mode 100644 index 000000000..6cf13048a --- /dev/null +++ b/rust/data_daemon/src/cloud/coordinators/uploader.rs @@ -0,0 +1,938 @@ +//! Resumable file uploader coordinator. +//! +//! Subscribes to [`DaemonEvent::ReadyForUpload`] (and re-scans the +//! state store on startup for any traces already in the registered/queued +//! state). For each on-disk artefact the coordinator PUTs `CHUNK_SIZE` (16 MiB) +//! chunks to the GCS resumable session URI persisted by the registration coordinator, +//! handling 308-continue, 410-session-expired, and 401-auth-refresh +//! transitions. On completion the trace is marked `Uploaded` and the upload +//! sub-system publishes `UploadComplete` for the status updater. + +use std::collections::{HashMap, HashSet}; +use std::path::PathBuf; +use std::sync::Arc; + +use tokio::sync::{broadcast, Semaphore}; +use tokio::task::{JoinHandle, JoinSet}; +use tokio::time::{interval, MissedTickBehavior}; + +use super::status::StatusUpdate; +use super::upload_transfer::{upload_one_file, UploadFileOutcome}; +use crate::api::ApiClient; +use crate::cloud::cloud_files::content_type_for_filename; +use crate::cloud::OrgIdRx; +use crate::lifecycle::shutdown::ShutdownSignal; +use crate::state::store::TraceUpdate; +use crate::state::{ + ConnectionState, DaemonEvent, EventBus, SqliteStateStore, StateStore, TraceRecord, + TraceUploadStatus, TraceWriteHandle, +}; +use crate::storage::paths::TracePath; + +/// Maximum number of traces uploading concurrently. With 8 parallel contexts +/// each queuing ~128 traces simultaneously (1024 total), 32 slots serialise +/// into ~32 rounds Γ— 300 ms β‰ˆ 9.6 s. 128 slots cuts that to ~8 rounds Γ— +/// 300 ms β‰ˆ 2.4 s, giving ~6 s headroom against the 9 s stop-recording SLA. +pub const MAX_CONCURRENT_UPLOADS: usize = 128; + +/// Handle returned by [`spawn_uploader`]. +pub struct UploaderHandle { + join: JoinHandle<()>, +} + +impl UploaderHandle { + /// Wait for the uploader task to exit. + pub async fn join(self) { + if let Err(error) = self.join.await { + tracing::warn!(?error, "uploader join failed"); + } + } +} + +/// Spawn the uploader task on the current Tokio runtime. +#[allow(clippy::too_many_arguments)] +pub fn spawn_uploader( + store: SqliteStateStore, + trace_writer: TraceWriteHandle, + bus: EventBus, + client: Arc, + recordings_root: Arc, + org_rx: OrgIdRx, + status_tx: tokio::sync::mpsc::UnboundedSender, + mut shutdown_rx: broadcast::Receiver, +) -> UploaderHandle { + let mut subscriber = bus.subscribe(); + let store = Arc::new(store); + let join = tokio::spawn(async move { + let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_UPLOADS)); + let mut in_flight: JoinSet = JoinSet::new(); + // Tracks dispatched trace IDs to prevent a drain triggered by + // join_next from re-queuing a trace whose task hasn't yet run the + // DB update to mark itself Uploading. + let mut in_flight_ids: HashSet = HashSet::new(); + // Safety-net rescan: catch any traces that were skipped when the + // semaphore was full during a drain, without relying on bus events. + let mut rescan_tick = interval(crate::intervals::UPLOAD_RESCAN); + rescan_tick.set_missed_tick_behavior(MissedTickBehavior::Skip); + let mut connected = false; + loop { + tokio::select! { + biased; + signal = shutdown_rx.recv() => { + tracing::debug!(?signal, "uploader shutting down"); + break; + } + // Reap a completed task immediately and chain the next drain + // so a finishing upload starts the next one without waiting + // for a bus event or the rescan tick. + Some(join_result) = in_flight.join_next(), if !in_flight.is_empty() => { + match join_result { + Ok(completed_trace_id) => { in_flight_ids.remove(&completed_trace_id); } + Err(panic_err) => { tracing::warn!(?panic_err, "upload task panicked"); } + } + if connected { + drain_ready_traces( + &store, + &trace_writer, + &bus, + &client, + &recordings_root, + &org_rx, + &status_tx, + &semaphore, + &mut in_flight, + &mut in_flight_ids, + ) + .await; + } + } + event = subscriber.recv() => { + match event { + Ok(DaemonEvent::ConnectionStateChanged(state)) => { + connected = matches!(state, ConnectionState::Up); + if connected { + drain_ready_traces( + &store, + &trace_writer, + &bus, + &client, + &recordings_root, + &org_rx, + &status_tx, + &semaphore, + &mut in_flight, + &mut in_flight_ids, + ) + .await; + } + } + Ok(DaemonEvent::ReadyForUpload { trace_id, .. }) => { + if !connected { + tracing::debug!(trace_id, "deferring upload until connection up"); + continue; + } + spawn_upload_task( + &store, + &trace_writer, + &bus, + &client, + &recordings_root, + &org_rx, + &status_tx, + &semaphore, + &mut in_flight, + &mut in_flight_ids, + trace_id, + ); + } + Ok(_) => {} + Err(broadcast::error::RecvError::Lagged(skipped)) => { + tracing::warn!(skipped, "uploader missed bus events; rescanning"); + if connected { + drain_ready_traces( + &store, + &trace_writer, + &bus, + &client, + &recordings_root, + &org_rx, + &status_tx, + &semaphore, + &mut in_flight, + &mut in_flight_ids, + ) + .await; + } + } + Err(broadcast::error::RecvError::Closed) => break, + } + } + _ = rescan_tick.tick() => { + if connected { + drain_ready_traces( + &store, + &trace_writer, + &bus, + &client, + &recordings_root, + &org_rx, + &status_tx, + &semaphore, + &mut in_flight, + &mut in_flight_ids, + ) + .await; + } + } + } + } + in_flight.shutdown().await; + }); + UploaderHandle { join } +} + +#[allow(clippy::too_many_arguments)] +async fn drain_ready_traces( + store: &Arc, + trace_writer: &TraceWriteHandle, + bus: &EventBus, + client: &Arc, + recordings_root: &Arc, + org_rx: &OrgIdRx, + status_tx: &tokio::sync::mpsc::UnboundedSender, + semaphore: &Arc, + in_flight: &mut JoinSet, + in_flight_ids: &mut HashSet, +) { + // Server-side filter for `queued`/`retrying` traces (uses + // `idx_traces_upload_status`) instead of walking every recording's full + // trace set on each completed upload β€” the old N+1 scan was quadratic under + // the burst this loop runs after every `join_next`. + let trace_ids = match store.traces_ready_for_upload().await { + Ok(ids) => ids, + Err(error) => { + tracing::warn!(%error, "uploader could not query traces ready for upload"); + return; + } + }; + for trace_id in trace_ids { + spawn_upload_task( + store, + trace_writer, + bus, + client, + recordings_root, + org_rx, + status_tx, + semaphore, + in_flight, + in_flight_ids, + trace_id, + ); + } +} + +#[allow(clippy::too_many_arguments)] +fn spawn_upload_task( + store: &Arc, + trace_writer: &TraceWriteHandle, + bus: &EventBus, + client: &Arc, + recordings_root: &Arc, + org_rx: &OrgIdRx, + status_tx: &tokio::sync::mpsc::UnboundedSender, + semaphore: &Arc, + in_flight: &mut JoinSet, + in_flight_ids: &mut HashSet, + trace_id: String, +) { + if in_flight_ids.contains(&trace_id) { + tracing::debug!(trace_id, "trace already dispatched; skipping duplicate"); + return; + } + let Ok(permit) = Arc::clone(semaphore).try_acquire_owned() else { + tracing::debug!(trace_id, "upload semaphore full; will retry on next drain"); + return; + }; + in_flight_ids.insert(trace_id.clone()); + let store = Arc::clone(store); + let trace_writer = trace_writer.clone(); + let bus = bus.clone(); + let client = Arc::clone(client); + let recordings_root = Arc::clone(recordings_root); + let org_rx = org_rx.clone(); + let status_tx = status_tx.clone(); + in_flight.spawn(async move { + upload_single( + &store, + &trace_writer, + &bus, + &client, + &recordings_root, + &org_rx, + &status_tx, + &trace_id, + ) + .await; + drop(permit); + trace_id + }); +} + +#[allow(clippy::too_many_arguments)] +async fn upload_single( + store: &Arc, + trace_writer: &TraceWriteHandle, + bus: &EventBus, + client: &Arc, + recordings_root: &Arc, + org_rx: &OrgIdRx, + status_tx: &tokio::sync::mpsc::UnboundedSender, + trace_id: &str, +) { + let trace = match store.get_trace(trace_id).await { + Ok(Some(trace)) => trace, + Ok(None) => { + tracing::warn!(trace_id, "uploader could not find trace row"); + return; + } + Err(error) => { + tracing::warn!(%error, trace_id, "uploader failed to load trace row"); + return; + } + }; + let session_uris = match parse_session_uris(&trace) { + Some(uris) => uris, + None => return, + }; + if session_uris.is_empty() { + // Nothing to upload β€” mark uploaded immediately so downstream + // accounting matches a registered-but-empty trace. + finalise_upload(store, bus, status_tx, &trace, 0).await; + return; + } + + // Resolve the cloud `recording_id` (needed for the resumable-upload-url + // refresh) before we touch the trace's upload state. A None here means + // registration hasn't minted the cloud id yet β€” leave the trace in its + // queued/retrying state and skip; a later drain re-enters once it lands. + let Some(recording_id) = recording_cloud_id(store, trace.recording_index).await else { + tracing::warn!( + trace_id, + recording_index = trace.recording_index, + "recording has no cloud recording_id yet; deferring upload" + ); + return; + }; + + // Mark the trace as uploading so the next bus tick doesn't repeat the + // attempt (the registration path is one-shot, but the periodic rescan + // could re-enter on a long-running upload). + let _ = store + .update_trace( + trace_id, + TraceUpdate { + upload_status: Some(TraceUploadStatus::Uploading), + ..TraceUpdate::default() + }, + ) + .await; + + tracing::info!(trace_id, data_type = ?trace.data_type, "starting trace upload"); + let Some(data_type) = trace.data_type.as_deref() else { + // No data_type means we never saw a `StartTrace` for this row, so we + // can't locate the on-disk artefact. Surface the failure both to the + // status updater and on the event bus so the recording's progress + // reporter (which gates on every trace having settled) doesn't wait + // for an upload that can never happen. + tracing::warn!(trace_id, "trace row missing data_type; marking failed"); + mark_failed_and_emit(store, bus, status_tx, &trace, "trace missing data_type").await; + return; + }; + // On-disk artefacts are keyed by the local `recording_index`, matching the + // directory the dispatcher / trace actors wrote to. + let trace_dir = TracePath::new( + trace.recording_index.to_string(), + data_type, + trace_id.to_string(), + ) + .directory(recordings_root.as_path()); + + // Upload each on-disk artefact under its session URI and persist the + // refreshed URI back into the same slot (by index) for resume on retry. + let mut total_uploaded: i64 = 0; + let mut session_uris = session_uris; + for index in 0..session_uris.len() { + let (filename, session_uri) = session_uris[index].clone(); + let local_path = trace_dir.join(file_basename(&filename)); + if !local_path.exists() { + tracing::warn!( + trace_id, + path = %local_path.display(), + "expected upload artefact missing; marking trace failed" + ); + mark_failed_and_emit( + store, + bus, + status_tx, + &trace, + &format!("missing artefact {filename}"), + ) + .await; + return; + } + + // `content_type` here drives the GCS-side metadata when we re-acquire a + // session URI on 410. Use the same filenameβ†’type mapping registration + // used (`cloud_files::content_type_for_filename`) so the refresh can't + // disagree with what was originally registered. + let content_type = content_type_for_filename(&filename); + let outcome = upload_one_file( + client, + trace_writer, + bus, + org_rx, + status_tx, + &trace, + &recording_id, + &local_path, + &filename, + content_type, + session_uri, + ) + .await; + match outcome { + Ok(UploadFileOutcome { + bytes_uploaded, + final_session_uri, + }) => { + total_uploaded = total_uploaded.saturating_add(bytes_uploaded); + // Persist the (possibly refreshed) URI so a subsequent + // restart resumes from the right session, even if the + // refresh path fired mid-stream. + if let Some(new_uri) = final_session_uri { + session_uris[index].1 = new_uri; + persist_session_uris(store, trace_id, &session_uris).await; + } + } + Err(error) => { + tracing::warn!(%error, trace_id, "upload failed; rolling back to retrying"); + let update = TraceUpdate { + upload_status: Some(TraceUploadStatus::Retrying), + error_message: Some(Some(error)), + ..TraceUpdate::default() + }; + if let Err(error) = store.update_trace(trace_id, update).await { + tracing::warn!(%error, trace_id, "failed to mark trace as retrying"); + } + return; + } + } + } + + finalise_upload(store, bus, status_tx, &trace, total_uploaded).await; +} + +async fn finalise_upload( + store: &Arc, + bus: &EventBus, + status_tx: &tokio::sync::mpsc::UnboundedSender, + trace: &TraceRecord, + total_uploaded: i64, +) { + let update = TraceUpdate { + upload_status: Some(TraceUploadStatus::Uploaded), + bytes_uploaded: Some(total_uploaded), + total_bytes: Some(total_uploaded.max(trace.total_bytes)), + ..TraceUpdate::default() + }; + if let Err(error) = store.update_trace(&trace.trace_id, update).await { + tracing::warn!(%error, trace_id = trace.trace_id, "failed to mark trace uploaded"); + } + bus.publish(DaemonEvent::UploadComplete { + trace_id: trace.trace_id.clone(), + recording_index: trace.recording_index, + }); + let _ = status_tx.send(StatusUpdate::completed( + trace.recording_index, + trace.trace_id.clone(), + total_uploaded.max(trace.total_bytes), + )); +} + +async fn mark_failed_and_emit( + store: &Arc, + bus: &EventBus, + status_tx: &tokio::sync::mpsc::UnboundedSender, + trace: &TraceRecord, + message: &str, +) { + let update = TraceUpdate { + upload_status: Some(TraceUploadStatus::Failed), + error_message: Some(Some(message.to_string())), + ..TraceUpdate::default() + }; + if let Err(error) = store.update_trace(&trace.trace_id, update).await { + tracing::warn!(%error, trace_id = trace.trace_id, "failed to mark trace as failed"); + } + // Publishing on the upload-complete topic lets the progress reporter and + // status updater treat the trace as terminal β€” without this signal a + // single bad trace would block the recording's progress report forever. + bus.publish(DaemonEvent::UploadComplete { + trace_id: trace.trace_id.clone(), + recording_index: trace.recording_index, + }); + let _ = status_tx.send(StatusUpdate::completed( + trace.recording_index, + trace.trace_id.clone(), + trace.total_bytes.max(0), + )); +} + +async fn persist_session_uris( + store: &Arc, + trace_id: &str, + uris: &[(String, String)], +) { + let map: HashMap<&str, &str> = uris + .iter() + .map(|(filename, uri)| (filename.as_str(), uri.as_str())) + .collect(); + let serialised = match serde_json::to_string(&map) { + Ok(serialised) => serialised, + Err(error) => { + tracing::warn!(%error, trace_id, "failed to serialise refreshed session URIs"); + return; + } + }; + let update = TraceUpdate { + upload_session_uris: Some(serialised), + ..TraceUpdate::default() + }; + if let Err(error) = store.update_trace(trace_id, update).await { + tracing::warn!(%error, trace_id, "failed to persist refreshed session URIs"); + } +} + +fn parse_session_uris(trace: &TraceRecord) -> Option> { + let Some(serialised) = &trace.upload_session_uris else { + tracing::warn!( + trace_id = trace.trace_id, + "trace ready-for-upload but no session URIs stored" + ); + return None; + }; + match serde_json::from_str::>(serialised) { + Ok(map) => Some(map.into_iter().collect()), + Err(error) => { + tracing::warn!(%error, trace_id = trace.trace_id, "failed to decode stored session URIs"); + None + } + } +} + +fn file_basename(path: &str) -> &str { + match path.rsplit_once('/') { + Some((_, tail)) => tail, + None => path, + } +} + +/// Resolve the cloud `recording_id` (the backend handle every cloud URL needs) +/// from its local `recording_index`. `None` when registration hasn't minted +/// the cloud id yet, or the row is missing. +async fn recording_cloud_id(store: &Arc, recording_index: i64) -> Option { + match store.get_recording(recording_index).await { + Ok(Some(row)) => row.recording_id, + Ok(None) => None, + Err(error) => { + tracing::warn!(%error, recording_index, "uploader could not read recording cloud id"); + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::api::auth::StaticAuthProvider; + use crate::api::client::ApiClientOptions; + use crate::state::store::{NewRecording, TraceUpdate}; + use crate::state::{TraceUploadStatus, TraceWriteStatus}; + use crate::storage::paths::TRACE_JSON_FILENAME; + use base64::engine::general_purpose::STANDARD as BASE64; + use base64::Engine; + use std::collections::HashMap; + use std::time::Duration; + use tempfile::TempDir; + use tokio::sync::mpsc; + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, Request, ResponseTemplate}; + + /// A live-org receiver fixed at `org`. The sender is leaked so the channel + /// stays open for the test's duration. + fn org_rx(org: Option<&str>) -> OrgIdRx { + let (org_tx, org_rx) = tokio::sync::watch::channel(org.map(str::to_string)); + Box::leak(Box::new(org_tx)); + org_rx + } + + async fn open_store() -> (SqliteStateStore, TempDir) { + let dir = TempDir::new().unwrap(); + let store = SqliteStateStore::open(&dir.path().join("state.db")) + .await + .unwrap(); + (store, dir) + } + + fn client(server: &MockServer) -> Arc { + let auth = Arc::new(StaticAuthProvider::new("t")); + let mut options = ApiClientOptions::new(server.uri()); + options.max_backoff = Duration::from_millis(10); + Arc::new(ApiClient::new(options, auth).unwrap()) + } + + #[allow(clippy::too_many_arguments)] + async fn seed_ready_trace( + store: &SqliteStateStore, + recordings_root: &std::path::Path, + cloud_recording_id: &str, + trace_id: &str, + data_type: &str, + data_type_name: &str, + session_uri: &str, + contents: &[u8], + ) -> (i64, std::path::PathBuf) { + let recording = store + .create_recording(NewRecording::default()) + .await + .unwrap(); + let recording_index = recording.recording_index; + // Stamp the cloud `recording_id` so the uploader's cloud-id resolution + // and the resumable-upload-url refresh see the same id the wiremock + // expectations assert on. + store + .mark_recording_start_notified(recording_index, cloud_recording_id) + .await + .unwrap(); + store + .create_trace( + recording_index, + trace_id, + Some(data_type), + Some(data_type_name), + ) + .await + .unwrap(); + // On-disk artefacts are keyed by the local `recording_index`. + let dir = TracePath::new(recording_index.to_string(), data_type, trace_id.to_string()) + .directory(recordings_root); + std::fs::create_dir_all(&dir).unwrap(); + let local = dir.join(TRACE_JSON_FILENAME); + std::fs::write(&local, contents).unwrap(); + let mut uris = HashMap::new(); + uris.insert( + format!("{data_type}/{data_type_name}/{TRACE_JSON_FILENAME}"), + session_uri.to_string(), + ); + store + .update_trace( + trace_id, + TraceUpdate { + write_status: Some(TraceWriteStatus::Written), + upload_status: Some(TraceUploadStatus::Queued), + upload_session_uris: Some(serde_json::to_string(&uris).unwrap()), + total_bytes: Some(contents.len() as i64), + ..TraceUpdate::default() + }, + ) + .await + .unwrap(); + (recording_index, local) + } + + #[tokio::test] + async fn bad_server_checksum_marks_trace_retrying() { + // Server returns a deliberately wrong CRC32C (0) β€” the real payload's + // checksum is non-zero, so the guard must reject the upload and roll + // the trace back to `retrying` + // (the registration coordinator's recovery sweep takes it from + // there). The doc-claimed happy path is covered by + // `uploader_marks_uploaded_when_checksum_matches` below. + let server = MockServer::start().await; + Mock::given(method("PUT")) + .and(path("/upload/abc")) + .respond_with(|_req: &Request| { + ResponseTemplate::new(200).insert_header("X-Goog-Hash", "crc32c=AAAAAA==") + }) + .expect(1) + .mount(&server) + .await; + + let (store, tempdir) = open_store().await; + let recordings_root = tempdir.path().join("recordings"); + let payload = b"some-bytes"; + let (_recording_index, _) = seed_ready_trace( + &store, + &recordings_root, + "rec-1", + "trace-1", + "JOINT_POSITIONS", + "arm", + &format!("{}/upload/abc", server.uri()), + payload, + ) + .await; + + let api = client(&server); + let bus = EventBus::new(); + let (status_tx, mut status_rx) = mpsc::unbounded_channel::(); + + let store_arc = Arc::new(store.clone()); + let (trace_writer, _trace_writer_owner) = + crate::state::trace_event_database_writer::spawn(store_arc.clone()); + let recordings_root = Arc::new(recordings_root); + upload_single( + &store_arc, + &trace_writer, + &bus, + &api, + &recordings_root, + &org_rx(Some("org-1")), + &status_tx, + "trace-1", + ) + .await; + + let trace = store.get_trace("trace-1").await.unwrap().unwrap(); + assert_eq!(trace.upload_status, TraceUploadStatus::Retrying); + // Status updates are sent regardless. + let _ = status_rx.try_recv(); + } + + #[tokio::test] + async fn uploader_marks_uploaded_when_checksum_matches() { + let server = MockServer::start().await; + // Use the CRC32C of the payload below. + let payload = b"hello world"; + let b64 = BASE64.encode(crc32c::crc32c(payload).to_be_bytes()); + let header_value = format!("crc32c={b64}"); + let header_value_clone = header_value.clone(); + Mock::given(method("PUT")) + .and(path("/upload/abc")) + .respond_with(move |_req: &Request| { + ResponseTemplate::new(200).insert_header("X-Goog-Hash", header_value_clone.as_str()) + }) + .expect(1) + .mount(&server) + .await; + + let (store, tempdir) = open_store().await; + let recordings_root = tempdir.path().join("recordings"); + let (_recording_index, _) = seed_ready_trace( + &store, + &recordings_root, + "rec-1", + "trace-1", + "JOINT_POSITIONS", + "arm", + &format!("{}/upload/abc", server.uri()), + payload, + ) + .await; + + let api = client(&server); + let bus = EventBus::new(); + let (status_tx, mut status_rx) = mpsc::unbounded_channel::(); + + let store_arc = Arc::new(store.clone()); + let (trace_writer, _trace_writer_owner) = + crate::state::trace_event_database_writer::spawn(store_arc.clone()); + let recordings_root = Arc::new(recordings_root); + upload_single( + &store_arc, + &trace_writer, + &bus, + &api, + &recordings_root, + &org_rx(Some("org-1")), + &status_tx, + "trace-1", + ) + .await; + + let trace = store.get_trace("trace-1").await.unwrap().unwrap(); + assert_eq!(trace.upload_status, TraceUploadStatus::Uploaded); + assert_eq!(trace.bytes_uploaded, payload.len() as i64); + // At least one in-progress + one final status update should have + // been queued. + let mut count = 0; + while status_rx.try_recv().is_ok() { + count += 1; + } + assert!(count >= 1); + } + + #[tokio::test] + async fn session_expired_410_fetches_fresh_uri_and_restarts() { + // First PUT to /upload/dead returns 410 (expired session). + // GET resumable_upload_url returns a fresh /upload/live URI. + // Subsequent PUT to /upload/live succeeds with a valid checksum. + let server = MockServer::start().await; + let payload = b"resumable-payload"; + let b64 = BASE64.encode(crc32c::crc32c(payload).to_be_bytes()); + let header_value = format!("crc32c={b64}"); + + Mock::given(method("PUT")) + .and(path("/upload/dead")) + .respond_with(ResponseTemplate::new(410)) + .expect(1) + .mount(&server) + .await; + let live_uri = format!("{}/upload/live", server.uri()); + let fresh_response = serde_json::json!({"url": live_uri}); + Mock::given(method("GET")) + .and(path("/org/org-1/recording/rec-1/resumable_upload_url")) + .respond_with(ResponseTemplate::new(200).set_body_json(fresh_response)) + .expect(1) + .mount(&server) + .await; + let header_value_clone = header_value.clone(); + Mock::given(method("PUT")) + .and(path("/upload/live")) + .respond_with(move |_req: &Request| { + ResponseTemplate::new(200).insert_header("X-Goog-Hash", header_value_clone.as_str()) + }) + .expect(1) + .mount(&server) + .await; + + let (store, tempdir) = open_store().await; + let recordings_root = tempdir.path().join("recordings"); + let dead_uri = format!("{}/upload/dead", server.uri()); + let (_recording_index, _) = seed_ready_trace( + &store, + &recordings_root, + "rec-1", + "trace-1", + "JOINT_POSITIONS", + "arm", + &dead_uri, + payload, + ) + .await; + + let api = client(&server); + let bus = EventBus::new(); + let (status_tx, _status_rx) = mpsc::unbounded_channel::(); + + let store_arc = Arc::new(store.clone()); + let (trace_writer, _trace_writer_owner) = + crate::state::trace_event_database_writer::spawn(store_arc.clone()); + let recordings_root = Arc::new(recordings_root); + upload_single( + &store_arc, + &trace_writer, + &bus, + &api, + &recordings_root, + &org_rx(Some("org-1")), + &status_tx, + "trace-1", + ) + .await; + + let trace = store.get_trace("trace-1").await.unwrap().unwrap(); + assert_eq!(trace.upload_status, TraceUploadStatus::Uploaded); + // Persisted URI must be the refreshed one so a restart resumes + // against the live session, not the dead one. + let serialised = trace.upload_session_uris.as_ref().expect("uris stored"); + assert!( + serialised.contains("/upload/live"), + "refreshed URI not persisted: {serialised}" + ); + assert!( + !serialised.contains("/upload/dead"), + "dead URI still present: {serialised}" + ); + } + + #[tokio::test] + async fn missing_data_type_emits_terminal_failure_and_unblocks_progress() { + // A trace registered without a data_type cannot be located on + // disk. The uploader must mark it Failed *and* emit an + // UploadComplete so the progress reporter's "all settled" gate + // moves on β€” otherwise the recording sits as `pending` forever. + let server = MockServer::start().await; + let (store, tempdir) = open_store().await; + let recordings_root = tempdir.path().join("recordings"); + + let recording = store + .create_recording(NewRecording::default()) + .await + .unwrap(); + let recording_index = recording.recording_index; + // Stamp a cloud id so the uploader's cloud-id resolution passes and it + // reaches the missing-data-type branch (not the deferral path). + store + .mark_recording_start_notified(recording_index, "rec-1") + .await + .unwrap(); + // Insert directly with NULL data_type so the uploader hits the + // missing-data-type branch. + store + .create_trace(recording_index, "trace-1", None, None) + .await + .unwrap(); + let mut uris = HashMap::new(); + uris.insert("dummy".to_string(), "https://upload/abc".to_string()); + store + .update_trace( + "trace-1", + TraceUpdate { + write_status: Some(TraceWriteStatus::Written), + upload_status: Some(TraceUploadStatus::Queued), + upload_session_uris: Some(serde_json::to_string(&uris).unwrap()), + ..TraceUpdate::default() + }, + ) + .await + .unwrap(); + + let api = client(&server); + let bus = EventBus::new(); + let mut subscriber = bus.subscribe(); + let (status_tx, mut status_rx) = mpsc::unbounded_channel::(); + + let store_arc = Arc::new(store.clone()); + let (trace_writer, _trace_writer_owner) = + crate::state::trace_event_database_writer::spawn(store_arc.clone()); + let recordings_root = Arc::new(recordings_root); + upload_single( + &store_arc, + &trace_writer, + &bus, + &api, + &recordings_root, + &org_rx(Some("org-1")), + &status_tx, + "trace-1", + ) + .await; + + let trace = store.get_trace("trace-1").await.unwrap().unwrap(); + assert_eq!(trace.upload_status, TraceUploadStatus::Failed); + // UploadComplete fires so the recording's progress report can + // proceed β€” otherwise a stray no-data-type trace would deadlock + // the whole recording. + match subscriber.try_recv() { + Ok(DaemonEvent::UploadComplete { trace_id, .. }) => { + assert_eq!(trace_id, "trace-1"); + } + other => panic!("expected UploadComplete event, got {other:?}"), + } + // Status updater also gets a terminal entry. + let update = status_rx.try_recv().expect("status update enqueued"); + assert!(update.completed); + } +} diff --git a/rust/data_daemon/src/cloud/mod.rs b/rust/data_daemon/src/cloud/mod.rs new file mode 100644 index 000000000..ee57bbc55 --- /dev/null +++ b/rust/data_daemon/src/cloud/mod.rs @@ -0,0 +1,85 @@ +//! Cloud-side coordinators: batch registration, resumable uploads, +//! debounced status updates, and the periodic progress reporter. +//! +//! Every coordinator is spawned by the daemon launch routine and subscribes +//! to the broadcast event bus. The flow is: +//! +//! 1. Per-trace actor finishes writing β†’ emits `TraceWritten`. +//! 2. Registration coordinator claims up to `registration::BATCH_SIZE` traces +//! (or whatever is ready after `registration::MAX_WAIT`), batch-registers, +//! persists the resumable session URIs, emits `ReadyForUpload`. +//! 3. Upload coordinator opens each on-disk artefact and PUTs it resumably, +//! emitting `UploadProgress` and `UploadComplete` events as it goes. +//! 4. Status updater debounces `UploadProgress` / `UploadComplete` into +//! batched backend updates. +//! 5. Progress reporter ticks every `intervals::PROGRESS_TICK` and posts a +//! per-recording total_bytes (upload-denominator) snapshot until every trace +//! lands in `progress_reported`. +//! +//! Each sub-module exposes a single `spawn_*` entry point so the launch +//! routine can drive ordered shutdown by dropping the handle. + +pub mod cloud_files; +pub mod coordinators; +pub mod notifiers; +pub mod watchers; + +#[allow(unused_imports)] +pub use cloud_files::{cloud_file_list, content_type_for_filename}; +pub use coordinators::progress::{spawn_progress_reporter, ProgressReporterHandle}; +pub use coordinators::registration::{spawn_registration, RegistrationHandle}; +pub use coordinators::status::{spawn_status_updater, StatusUpdate, StatusUpdaterHandle}; +pub use coordinators::uploader::{spawn_uploader, UploaderHandle}; +pub use notifiers::notifier::NotifierHandle; +pub use notifiers::recording_cancel_notifier::spawn_recording_cancel_notifier; +pub use notifiers::recording_start_notifier::spawn_recording_start_notifier; +pub use notifiers::recording_stop_notifier::spawn_recording_stop_notifier; +pub use watchers::org_watcher::{spawn_org_watcher, OrgIdRx, OrgWatcherHandle}; +pub use watchers::recording_reaper::spawn_recording_reaper; + +use std::path::Path; + +use serde::Deserialize; + +/// Read the `current_org_id` field from `~/.neuracore/config.json` (blocking). +/// +/// Used for the one-shot resolutions at launch/spawn. Returns `None` when the +/// file is missing, malformed, or the field is unset. The daemon falls back to +/// `NCD_CURRENT_ORG_ID` (via the `DaemonConfig` resolved at launch) when this +/// returns `None`. +pub fn read_org_id_from_config(path: &Path) -> Option { + match std::fs::read(path) { + Ok(bytes) => parse_org_id(&bytes, path), + // Absent / unreadable β€” fall back silently (the org simply isn't set + // yet). A *present but corrupt* file is surfaced by `parse_org_id`. + Err(_) => None, + } +} + +/// Async counterpart of [`read_org_id_from_config`] for the org watcher's +/// periodic poll, so the re-read + parse runs off the runtime worker rather +/// than blocking it once per second for the daemon's whole life. +pub async fn read_org_id_from_config_async(path: &Path) -> Option { + match tokio::fs::read(path).await { + Ok(bytes) => parse_org_id(&bytes, path), + Err(_) => None, + } +} + +/// Parse `current_org_id` out of config bytes. A parse failure on a file that +/// *exists* is logged (rather than silently mapped to `None`) so a corrupt +/// config the user expects to be live doesn't disappear without a trace. +fn parse_org_id(bytes: &[u8], path: &Path) -> Option { + #[derive(Deserialize)] + struct ConfigShape { + #[serde(default)] + current_org_id: Option, + } + match serde_json::from_slice::(bytes) { + Ok(parsed) => parsed.current_org_id, + Err(error) => { + tracing::warn!(%error, path = %path.display(), "failed to parse config.json; ignoring org_id until it is valid"); + None + } + } +} diff --git a/rust/data_daemon/src/cloud/notifiers/mod.rs b/rust/data_daemon/src/cloud/notifiers/mod.rs new file mode 100644 index 000000000..fe4ea4c0d --- /dev/null +++ b/rust/data_daemon/src/cloud/notifiers/mod.rs @@ -0,0 +1,8 @@ +//! Backend recording-lifecycle notifiers (start / stop / cancel), each built on +//! the shared notifier framework in [`notifier`] that subscribes to the event +//! bus and POSTs the matching `/recording/*` endpoint. + +pub mod notifier; +pub mod recording_cancel_notifier; +pub mod recording_start_notifier; +pub mod recording_stop_notifier; diff --git a/rust/data_daemon/src/cloud/notifiers/notifier.rs b/rust/data_daemon/src/cloud/notifiers/notifier.rs new file mode 100644 index 000000000..bd515596c --- /dev/null +++ b/rust/data_daemon/src/cloud/notifiers/notifier.rs @@ -0,0 +1,327 @@ +//! Shared skeleton for the backend recording-lifecycle notifiers. +//! +//! The start / stop / cancel notifiers each POST a different backend endpoint, +//! but their machinery is identical: subscribe to the event bus, sweep any +//! recordings whose notification is pending from a previous (offline) session, +//! then POST whenever the relevant lifecycle event fires β€” retrying via a +//! startup sweep and on broadcast lag. This module owns that machinery once; a +//! notifier supplies only the three things that actually differ via +//! [`RecordingNotifier`]: which event(s) trigger it, which "pending" query +//! drives its recovery sweep, and the per-recording POST itself. +//! +//! Events are processed sequentially: each POST is awaited inline before the +//! next event is read, so a slow or retrying POST delays later events on the +//! same notifier and can push the broadcast channel into `Lagged`. That is +//! handled by re-running the recovery sweep (the POSTs are idempotent), which +//! is the recovery mechanism rather than a failure. +//! +//! Each `recording_*_notifier` module defines a small unit struct implementing +//! the trait plus a thin `spawn_recording_*_notifier` wrapper, so the call +//! sites (and their tests) are unchanged. + +use std::sync::Arc; + +use async_trait::async_trait; +use tokio::sync::broadcast; +use tokio::task::JoinHandle; + +use crate::api::ApiClient; +use crate::cloud::OrgIdRx; +use crate::lifecycle::shutdown::ShutdownSignal; +use crate::state::{ + DaemonEvent, EventBus, RecordingRow, SqliteStateStore, StateStore, StateStoreError, +}; + +/// Handle returned by every recording notifier's `spawn_*` wrapper. +pub struct NotifierHandle { + join: JoinHandle<()>, + label: &'static str, +} + +impl NotifierHandle { + /// Wait for the notifier task to exit. + pub async fn join(self) { + if let Err(error) = self.join.await { + tracing::warn!( + ?error, + notifier = self.label, + "recording notifier join failed" + ); + } + } +} + +/// Shared dependencies handed to a notifier's `notify`. +pub struct NotifierCtx { + /// State store (already `Arc`-wrapped for the spawned task). + pub store: Arc, + /// Backend HTTP client. + pub client: Arc, + /// Event bus β€” the start notifier publishes `RecordingCloudIdAssigned` on it. + pub bus: EventBus, + /// Live current-org receiver, read at POST time. + pub org_rx: OrgIdRx, +} + +/// One backend recording-lifecycle notifier (start / stop / cancel). +/// +/// Everything common β€” the spawn loop, the offline-recovery sweep, the +/// shutdown/lag handling β€” lives in [`spawn_notifier`]; an implementor supplies +/// only what differs. +#[async_trait] +pub trait RecordingNotifier: Send + Sync + 'static { + /// Short label used in this notifier's log lines. + fn label(&self) -> &'static str; + + /// The recording index to notify for `event`, or `None` to ignore it. + fn triggered_by(&self, event: &DaemonEvent) -> Option; + + /// Recordings whose notification is still pending β€” the offline-recovery + /// sweep set, run on startup and after a broadcast lag. + async fn pending( + &self, + store: &Arc, + ) -> Result, StateStoreError>; + + /// Fire the backend POST for one recording. Idempotent and self-logging: + /// the shared loop never inspects the result. + async fn notify(&self, ctx: &NotifierCtx, recording_index: i64); +} + +/// Spawn a notifier task driven by `notifier` on the current Tokio runtime. +/// +/// Sweeps pending notifications first (so recordings that finished while the +/// daemon was offline recover), then serves live bus events until shutdown. +pub fn spawn_notifier( + notifier: N, + store: SqliteStateStore, + bus: EventBus, + client: Arc, + org_rx: OrgIdRx, + mut shutdown_rx: broadcast::Receiver, +) -> NotifierHandle { + let label = notifier.label(); + let mut subscriber = bus.subscribe(); + let ctx = NotifierCtx { + store: Arc::new(store), + client, + bus, + org_rx, + }; + let join = tokio::spawn(async move { + // Recover pending notifications before serving live events. Run inside a + // `select!` against shutdown so a long sweep cannot hold up exit. + tokio::select! { + biased; + signal = shutdown_rx.recv() => { + tracing::debug!(?signal, notifier = label, "recording notifier shutting down before sweep"); + return; + } + _ = sweep(¬ifier, &ctx) => {} + } + loop { + tokio::select! { + biased; + signal = shutdown_rx.recv() => { + tracing::debug!(?signal, notifier = label, "recording notifier shutting down"); + break; + } + event = subscriber.recv() => { + match event { + Ok(event) => { + if let Some(recording_index) = notifier.triggered_by(&event) { + notifier.notify(&ctx, recording_index).await; + } + } + Err(broadcast::error::RecvError::Lagged(skipped)) => { + tracing::warn!( + skipped, + notifier = label, + "recording notifier missed bus events; re-sweeping pending notifications", + ); + sweep(¬ifier, &ctx).await; + } + Err(broadcast::error::RecvError::Closed) => { + tracing::debug!(notifier = label, "event bus closed; recording notifier exiting"); + break; + } + } + } + } + } + }); + NotifierHandle { join, label } +} + +/// Notify every recording the notifier reports as pending. +async fn sweep(notifier: &N, ctx: &NotifierCtx) { + let pending = match notifier.pending(&ctx.store).await { + Ok(rows) => rows, + Err(error) => { + tracing::warn!(%error, notifier = notifier.label(), "failed to query recordings pending notify"); + return; + } + }; + if pending.is_empty() { + return; + } + tracing::info!( + count = pending.len(), + notifier = notifier.label(), + "sweeping recordings with pending backend notify", + ); + for row in pending { + notifier.notify(ctx, row.recording_index).await; + } +} + +/// Which `/recording/*` endpoint a lifecycle notify targets. The stop and +/// cancel notifiers run the *same* guard chain (row fetch β†’ already-notified +/// guard β†’ cloud-id guard β†’ org guard β†’ `stop_timestamp_ns` guard β†’ POST β†’ +/// 404-as-success β†’ mark-notified); only these per-kind bits differ. +#[derive(Clone, Copy)] +pub enum LifecycleKind { + Stop, + Cancel, +} + +impl LifecycleKind { + /// Word used in this notifier's log lines ("stop" / "cancel"). + fn action(self) -> &'static str { + match self { + LifecycleKind::Stop => "stop", + LifecycleKind::Cancel => "cancel", + } + } + + /// Whether this recording's notification has already been persisted. + fn already_notified(self, row: &RecordingRow) -> bool { + match self { + LifecycleKind::Stop => row.backend_stop_notified_at.is_some(), + LifecycleKind::Cancel => row.backend_cancel_notified_at.is_some(), + } + } +} + +/// Run the shared stop/cancel backend-notify flow for one recording. +/// +/// Idempotent and self-logging (the spawn loop never inspects the result): a +/// 404 is treated as success (the start notifier's prior-pending resolution +/// already closed the recording server-side), and a persist failure after a +/// successful POST is left for the next sweep since the POST is idempotent. +pub async fn notify_recording_lifecycle( + kind: LifecycleKind, + store: &Arc, + client: &Arc, + org_rx: &OrgIdRx, + recording_index: i64, +) { + let action = kind.action(); + let row = match store.get_recording(recording_index).await { + Ok(Some(row)) => row, + Ok(None) => { + tracing::warn!( + recording_index, + "recording row missing on {action}; skipping backend notify" + ); + return; + } + Err(error) => { + tracing::warn!(%error, recording_index, "failed to look up recording for {action} notify"); + return; + } + }; + + if kind.already_notified(&row) { + // Another path (sweep or earlier event) already notified. + return; + } + // Stop is also triggered by `RecordingCloudIdAssigned`, which can fire for a + // still-running recording; hold the POST until it has actually stopped. + // (A cancel only ever reaches here once `cancelled_at` is stamped.) + if matches!(kind, LifecycleKind::Stop) && row.stopped_at.is_none() { + return; + } + let Some(recording_id) = row.recording_id else { + // No cloud id β†’ nothing exists server-side to act on. The sweep + // re-fires once the start notifier mints the id. + tracing::debug!( + recording_index, + "recording has no cloud id at {action} time; deferring backend notify" + ); + return; + }; + let Some(org_id) = org_rx.borrow().clone() else { + tracing::warn!( + recording_index, + recording_id, + "no current org_id configured at {action} time; skipping backend notify" + ); + return; + }; + let Some(stop_timestamp_ns) = row.stop_timestamp_ns else { + tracing::warn!( + recording_index, + recording_id, + "recording has no stop_timestamp_ns at {action} time; skipping backend notify" + ); + return; + }; + // The producer captured this as the recording window's real upper bound; + // the backend requires it (seconds) and derives the reported duration from + // it, so a late notify still reports correctly. + let end_time = stop_timestamp_ns as f64 / 1_000_000_000.0; + + let post_result = match kind { + LifecycleKind::Stop => { + client + .recording_stop(&org_id, &recording_id, end_time) + .await + } + LifecycleKind::Cancel => { + client + .recording_cancel(&org_id, &recording_id, end_time) + .await + } + }; + + let mark_result = match &post_result { + Ok(()) => mark_notified(kind, store, recording_index).await, + // 404 means the backend no longer has this recording open β€” the + // start-notifier's `resolve_prior_pending` already closed it. That is + // the post-condition we wanted, so record it rather than re-sweeping. + Err(error) if error.is_not_found() => mark_notified(kind, store, recording_index).await, + Err(error) => { + tracing::warn!(%error, recording_index, recording_id, "failed to notify backend of recording {action}"); + return; + } + }; + if let Err(error) = mark_result { + tracing::warn!( + %error, + recording_index, + recording_id, + "POST succeeded but persisting backend_{action}_notified_at failed; \ + the next sweep will re-post (the backend POST is idempotent)", + ); + } else { + tracing::info!( + recording_index, + recording_id, + "backend notified of recording {action}" + ); + } +} + +/// Persist the "notified" timestamp for the given lifecycle kind. +async fn mark_notified( + kind: LifecycleKind, + store: &Arc, + recording_index: i64, +) -> Result<(), StateStoreError> { + match kind { + LifecycleKind::Stop => store.mark_recording_stop_notified(recording_index).await, + LifecycleKind::Cancel => store.mark_recording_cancel_notified(recording_index).await, + } + .map(|_| ()) +} diff --git a/rust/data_daemon/src/cloud/notifiers/recording_cancel_notifier.rs b/rust/data_daemon/src/cloud/notifiers/recording_cancel_notifier.rs new file mode 100644 index 000000000..a5d49a1a2 --- /dev/null +++ b/rust/data_daemon/src/cloud/notifiers/recording_cancel_notifier.rs @@ -0,0 +1,347 @@ +//! Backend recording-cancel notifier. +//! +//! Subscribes to [`DaemonEvent::RecordingCancelled`] and POSTs +//! `/org/{org}/recording/cancel` (JSON body `{recording_id, end_time}`) to the +//! backend. The Python +//! SDK used to make this call inline from `nc.cancel_recording`, but that +//! required the SDK to know the cloud `recording_id` β€” which the thin-shipper +//! model removes. The notifier picks up the responsibility: once the local +//! cancel is stamped and the cloud id is known, it fires the POST in the +//! background with the daemon's standard retry policy. +//! +//! Recordings cancelled before `/recording/start` was ever notified (i.e. +//! `recording_id IS NULL`) have no cloud representation, so there is nothing +//! to cancel server-side; the notifier silently skips them. + +use std::sync::Arc; + +use async_trait::async_trait; +use tokio::sync::broadcast; + +use super::notifier::{ + notify_recording_lifecycle, spawn_notifier, LifecycleKind, NotifierCtx, NotifierHandle, + RecordingNotifier, +}; +use crate::api::ApiClient; +use crate::cloud::OrgIdRx; +use crate::lifecycle::shutdown::ShutdownSignal; +use crate::state::{ + DaemonEvent, EventBus, RecordingRow, SqliteStateStore, StateStore, StateStoreError, +}; + +/// Notifier that POSTs `/recording/cancel` once a recording is cancelled and +/// its cloud id is known. Recordings cancelled before `/recording/start` ever +/// landed have no cloud representation, so `notify_backend` skips them. +struct CancelNotifier; + +#[async_trait] +impl RecordingNotifier for CancelNotifier { + fn label(&self) -> &'static str { + "recording-cancel" + } + + fn triggered_by(&self, event: &DaemonEvent) -> Option { + match event { + DaemonEvent::RecordingCancelled { recording_index } => Some(*recording_index), + _ => None, + } + } + + async fn pending( + &self, + store: &Arc, + ) -> Result, StateStoreError> { + store.recordings_pending_cancel_notify().await + } + + async fn notify(&self, ctx: &NotifierCtx, recording_index: i64) { + notify_recording_lifecycle( + LifecycleKind::Cancel, + &ctx.store, + &ctx.client, + &ctx.org_rx, + recording_index, + ) + .await; + } +} + +/// Spawn the recording-cancel notifier on the current Tokio runtime. +pub fn spawn_recording_cancel_notifier( + store: SqliteStateStore, + bus: EventBus, + client: Arc, + org_rx: OrgIdRx, + shutdown_rx: broadcast::Receiver, +) -> NotifierHandle { + spawn_notifier(CancelNotifier, store, bus, client, org_rx, shutdown_rx) +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::time::Duration; + + use tempfile::TempDir; + use tokio::sync::broadcast; + use tokio::time::{sleep, timeout}; + use wiremock::matchers::{body_partial_json, method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + use crate::api::auth::StaticAuthProvider; + use crate::api::{ApiClient, ApiClientOptions}; + use crate::state::{DaemonEvent, EventBus, NewRecording, SqliteStateStore, StateStore}; + + async fn open_store() -> (SqliteStateStore, TempDir) { + let dir = TempDir::new().expect("tempdir"); + let store = SqliteStateStore::open(&dir.path().join("state.db")) + .await + .expect("open store"); + (store, dir) + } + + fn options(base_url: String) -> ApiClientOptions { + ApiClientOptions { + base_url, + timeout: Duration::from_secs(5), + max_retries: 1, + max_backoff: Duration::from_secs(1), + } + } + + async fn seed_cancelled_recording_with_cloud_id( + store: &SqliteStateStore, + cloud_id: &str, + ) -> i64 { + let row = store + .create_recording(NewRecording { + robot_id: Some("robot-1"), + robot_instance: Some(0), + start_timestamp_ns: 0, + ..NewRecording::default() + }) + .await + .expect("create_recording"); + let index = row.recording_index; + store + .mark_recording_start_notified(index, cloud_id) + .await + .expect("mark start notified"); + store + .cancel_recording(index, 5_000_000_000) + .await + .expect("cancel"); + index + } + + /// A live-org receiver fixed at `org`. The sender is leaked so the channel + /// stays open for the test's duration. + fn org_rx(org: Option<&str>) -> OrgIdRx { + let (org_tx, org_rx) = tokio::sync::watch::channel(org.map(str::to_string)); + Box::leak(Box::new(org_tx)); + org_rx + } + + #[tokio::test] + async fn posts_backend_cancel_on_recording_cancelled_event() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/org/org-1/recording/cancel")) + .and(body_partial_json( + serde_json::json!({ "recording_id": "rec-cancel-1" }), + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!("ok"))) + .mount(&server) + .await; + + let (store, _dir) = open_store().await; + seed_cancelled_recording_with_cloud_id(&store, "rec-cancel-1").await; + + let auth = Arc::new(StaticAuthProvider::new("token-1")); + let client = Arc::new(ApiClient::new(options(server.uri()), auth).expect("client")); + let bus = EventBus::new(); + let (shutdown_tx, _) = broadcast::channel::(8); + let handle = spawn_recording_cancel_notifier( + store.clone(), + bus.clone(), + client, + org_rx(Some("org-1")), + shutdown_tx.subscribe(), + ); + + bus.publish(DaemonEvent::RecordingCancelled { recording_index: 1 }); + + timeout(Duration::from_secs(3), async { + loop { + let received = server.received_requests().await.unwrap_or_default(); + if !received.is_empty() { + break; + } + sleep(Duration::from_millis(20)).await; + } + }) + .await + .expect("expected one POST within 3s"); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } + + #[tokio::test] + async fn startup_sweep_recovers_recordings_cancelled_while_offline() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/org/org-1/recording/cancel")) + .and(body_partial_json( + serde_json::json!({ "recording_id": "rec-offline-cancel" }), + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!("ok"))) + .mount(&server) + .await; + + let (store, _dir) = open_store().await; + let index = seed_cancelled_recording_with_cloud_id(&store, "rec-offline-cancel").await; + + let auth = Arc::new(StaticAuthProvider::new("token-1")); + let client = Arc::new(ApiClient::new(options(server.uri()), auth).expect("client")); + let bus = EventBus::new(); + let (shutdown_tx, _) = broadcast::channel::(8); + let handle = spawn_recording_cancel_notifier( + store.clone(), + bus, + client, + org_rx(Some("org-1")), + shutdown_tx.subscribe(), + ); + + timeout(Duration::from_secs(3), async { + loop { + let received = server.received_requests().await.unwrap_or_default(); + if !received.is_empty() { + break; + } + sleep(Duration::from_millis(20)).await; + } + }) + .await + .expect("sweep must POST within 3s"); + + timeout(Duration::from_secs(3), async { + loop { + let row = store + .get_recording(index) + .await + .expect("get") + .expect("exists"); + if row.backend_cancel_notified_at.is_some() { + break; + } + sleep(Duration::from_millis(20)).await; + } + }) + .await + .expect("backend_cancel_notified_at must be stamped within 3s"); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } + + #[tokio::test] + async fn treats_backend_404_as_already_cancelled() { + // The start-notifier's `resolve_prior_pending` may have closed this + // recording on the backend first (cancel-then-start with no gap), so a + // 404 here is the desired post-condition, not a failure: the row must + // still be marked notified so the sweep stops re-posting. + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/org/org-1/recording/cancel")) + .respond_with( + ResponseTemplate::new(404) + .set_body_json(serde_json::json!({ "detail": "Recording not found." })), + ) + .mount(&server) + .await; + + let (store, _dir) = open_store().await; + let index = seed_cancelled_recording_with_cloud_id(&store, "rec-already-gone").await; + + let auth = Arc::new(StaticAuthProvider::new("token-1")); + let client = Arc::new(ApiClient::new(options(server.uri()), auth).expect("client")); + let bus = EventBus::new(); + let (shutdown_tx, _) = broadcast::channel::(8); + let handle = spawn_recording_cancel_notifier( + store.clone(), + bus, + client, + org_rx(Some("org-1")), + shutdown_tx.subscribe(), + ); + + timeout(Duration::from_secs(3), async { + loop { + let row = store + .get_recording(index) + .await + .expect("get") + .expect("exists"); + if row.backend_cancel_notified_at.is_some() { + break; + } + sleep(Duration::from_millis(20)).await; + } + }) + .await + .expect("a 404 must still stamp backend_cancel_notified_at within 3s"); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } + + #[tokio::test] + async fn skips_notify_when_recording_has_no_cloud_id() { + let server = MockServer::start().await; + let (store, _dir) = open_store().await; + + // A recording that was cancelled before /start was ever notified. + let row = store + .create_recording(NewRecording { + robot_id: Some("robot-1"), + robot_instance: Some(0), + start_timestamp_ns: 0, + ..NewRecording::default() + }) + .await + .unwrap(); + store + .cancel_recording(row.recording_index, 5_000_000_000) + .await + .unwrap(); + + let auth = Arc::new(StaticAuthProvider::new("token-1")); + let client = Arc::new(ApiClient::new(options(server.uri()), auth).expect("client")); + let bus = EventBus::new(); + let (shutdown_tx, _) = broadcast::channel::(8); + let handle = spawn_recording_cancel_notifier( + store, + bus.clone(), + client, + org_rx(Some("org-1")), + shutdown_tx.subscribe(), + ); + + bus.publish(DaemonEvent::RecordingCancelled { + recording_index: row.recording_index, + }); + + sleep(Duration::from_millis(150)).await; + let received = server.received_requests().await.unwrap_or_default(); + assert!( + received.is_empty(), + "no backend POST expected when recording has no cloud id" + ); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } +} diff --git a/rust/data_daemon/src/cloud/notifiers/recording_start_notifier.rs b/rust/data_daemon/src/cloud/notifiers/recording_start_notifier.rs new file mode 100644 index 000000000..20b0d6486 --- /dev/null +++ b/rust/data_daemon/src/cloud/notifiers/recording_start_notifier.rs @@ -0,0 +1,550 @@ +//! Backend recording-start notifier. +//! +//! Subscribes to [`DaemonEvent::RecordingStarted`] and POSTs +//! `/org/{org}/recording/start` to the backend, persisting the cloud +//! `recording_id` the backend mints in response. The Python SDK used to make +//! this call inline from `nc.start_recording`, but the staging POST has a fat +//! upper tail. Doing it here means the SDK call returns as soon as the +//! producer publishes the `StartRecording` envelope, and the cloud-id mint +//! rides the daemon's standard retry policy in the background. +//! +//! The shared loop/sweep/lag semantics live in +//! [`notifier`](super::notifier); see there for how events are processed. What +//! is start-specific: the cloud `recording_id` is minted and persisted here, +//! and any prior pending recording is closed before the next start. Every +//! downstream coordinator (registration, progress, upload) waits for this id, +//! so an offline recording simply stays pending until the daemon is online and +//! `/recording/start` lands. + +use std::sync::Arc; + +use async_trait::async_trait; +use tokio::sync::broadcast; + +use super::notifier::{spawn_notifier, NotifierCtx, NotifierHandle, RecordingNotifier}; +use crate::api::ApiClient; +use crate::cloud::OrgIdRx; +use crate::lifecycle::shutdown::ShutdownSignal; +use crate::state::{ + DaemonEvent, EventBus, RecordingRow, SqliteStateStore, StateStore, StateStoreError, +}; + +/// Notifier that POSTs `/recording/start` and persists the cloud `recording_id` +/// the backend mints. The cloud id is always minted here β€” every downstream +/// coordinator waits on it β€” so an offline recording stays pending until the +/// daemon is online and the start POST lands. Before opening the new recording +/// it closes any earlier still-pending recording for the same source (see +/// [`resolve_prior_pending`]). +struct StartNotifier; + +#[async_trait] +impl RecordingNotifier for StartNotifier { + fn label(&self) -> &'static str { + "recording-start" + } + + fn triggered_by(&self, event: &DaemonEvent) -> Option { + match event { + DaemonEvent::RecordingStarted { recording_index } => Some(*recording_index), + _ => None, + } + } + + async fn pending( + &self, + store: &Arc, + ) -> Result, StateStoreError> { + store.recordings_pending_start_notify().await + } + + async fn notify(&self, ctx: &NotifierCtx, recording_index: i64) { + notify_backend( + &ctx.store, + &ctx.client, + &ctx.bus, + &ctx.org_rx, + recording_index, + ) + .await; + } +} + +/// Spawn the recording-start notifier on the current Tokio runtime. +pub fn spawn_recording_start_notifier( + store: SqliteStateStore, + bus: EventBus, + client: Arc, + org_rx: OrgIdRx, + shutdown_rx: broadcast::Receiver, +) -> NotifierHandle { + spawn_notifier(StartNotifier, store, bus, client, org_rx, shutdown_rx) +} + +async fn notify_backend( + store: &Arc, + client: &Arc, + bus: &EventBus, + org_rx: &OrgIdRx, + recording_index: i64, +) { + let row = match store.get_recording(recording_index).await { + Ok(Some(row)) => row, + Ok(None) => { + tracing::warn!( + recording_index, + "recording row missing on start; skipping backend notify", + ); + return; + } + Err(error) => { + tracing::warn!( + %error, + recording_index, + "failed to look up recording for start notify", + ); + return; + } + }; + if row.recording_id.is_some() || row.backend_start_notified_at.is_some() { + // Already notified β€” another path handled it. + return; + } + + let Some(org_id) = org_rx.borrow().clone() else { + // No current org configured yet (not logged in / org not selected). + // Without it we can't address the POST; the next sweep retries once + // the config watcher picks up a current org. + tracing::warn!( + recording_index, + "no current org_id configured at start time; skipping backend notify", + ); + return; + }; + let Some(robot_id) = row.robot_id else { + tracing::warn!( + recording_index, + "recording has no robot_id at start time; skipping backend notify", + ); + return; + }; + let Some(dataset_id) = row.dataset_id else { + tracing::warn!( + recording_index, + "recording has no dataset_id at start time; skipping backend notify", + ); + return; + }; + let instance = row.robot_instance.unwrap_or(0); + let Some(start_timestamp_ns) = row.start_timestamp_ns else { + tracing::warn!( + recording_index, + "recording has no start_timestamp_ns at start time; skipping backend notify", + ); + return; + }; + // The producer captured this as the recording window's real lower bound; + // the backend requires it (seconds) and derives the reported duration from + // it, so a late notify (e.g. after reconnecting) still reports correctly. + let start_time = start_timestamp_ns as f64 / 1_000_000_000.0; + + // Before opening this recording server-side, close any earlier recording for + // the same source that finished locally (cancel/stop) but whose backend + // notification has not landed yet. The backend dedupes pending recordings + // per robot instance β€” it returns the existing pending recording instead of + // minting a new one β€” so a still-pending prior recording would otherwise + // hand its cloud id to this one, collapsing both into one backend recording + // (e.g. cancel-then-start with no gap). The start notifier processes + // `RecordingStarted` events in order, so the prior recording's cloud id is + // already on its row by the time we reach here. + resolve_prior_pending(store, client, &org_id, &robot_id, instance, recording_index).await; + + match client + .recording_start(&org_id, &robot_id, instance, &dataset_id, start_time) + .await + { + Ok(recording_id) => { + if let Err(error) = store + .mark_recording_start_notified(recording_index, &recording_id) + .await + { + tracing::warn!( + %error, + recording_index, + recording_id, + "POST succeeded but persisting the cloud recording_id failed; \ + the next sweep will re-post (the start notify is idempotent)", + ); + } else { + tracing::info!( + recording_index, + recording_id, + "backend notified of recording start", + ); + // The cloud id is now available. Wake any coordinator that was + // waiting on it β€” notably the stop notifier, for a recording + // that was stopped while offline before its start was notified. + bus.publish(DaemonEvent::RecordingCloudIdAssigned { recording_index }); + } + } + Err(error) => { + // The producer-side iceoryx2 publish has already succeeded by + // the time we get here; logging is the only available recourse + // until the next sweep retries. + tracing::warn!( + %error, + recording_index, + "failed to notify backend of recording start", + ); + } + } +} + +/// Close, on the backend, any earlier recording for `(robot_id, instance)` that +/// finished locally (cancelled or stopped) but is still pending server-side, so +/// the backend does not hand its cloud id to the next `/recording/start` for +/// this instance. See +/// [`StateStore::recordings_pending_backend_resolution_for_source`]. +async fn resolve_prior_pending( + store: &Arc, + client: &Arc, + org_id: &str, + robot_id: &str, + instance: i64, + before_index: i64, +) { + let prior = match store + .recordings_pending_backend_resolution_for_source(robot_id, instance, before_index) + .await + { + Ok(rows) => rows, + Err(error) => { + tracing::warn!( + %error, + before_index, + "failed to query prior pending recordings for source; next start may reuse a cloud id", + ); + return; + } + }; + for row in prior { + let index = row.recording_index; + let is_cancelled = row.cancelled_at.is_some(); + // Cancel and stop both report the recording's captured stop time as + // `end_time` (a cancel is a stop that discards data). Compute it before + // `recording_id` is moved out of `row`. + let end_time = row.stop_timestamp_ns.map(|ns| ns as f64 / 1_000_000_000.0); + // Defensive against the query contract: the pending-resolution query + // only returns cloud-id-assigned, stopped/cancelled rows (which always + // carry a stop timestamp), so these guards should never skip in + // practice β€” they just keep the extraction total. + let Some(recording_id) = row.recording_id else { + continue; + }; + let Some(end_time) = end_time else { + continue; + }; + if is_cancelled { + match client + .recording_cancel(org_id, &recording_id, end_time) + .await + { + Ok(()) => { + let _ = store.mark_recording_cancel_notified(index).await; + tracing::info!( + recording_index = index, + recording_id, + next_recording_index = before_index, + "cancelled prior pending recording on the backend before opening the next", + ); + } + Err(error) if error.is_not_found() => { + // Already closed β€” the cancel-notifier sweep won the race. + // The prior recording is not pending on the backend, so the + // next start cannot reuse its id; mark it notified so the + // sweep stops re-posting too. + let _ = store.mark_recording_cancel_notified(index).await; + tracing::debug!( + recording_index = index, + recording_id, + next_recording_index = before_index, + "prior pending recording already cancelled on backend (404)", + ); + } + Err(error) => { + tracing::warn!( + %error, + recording_index = index, + recording_id, + "failed to cancel prior pending recording before next start; \ + the next start may reuse its cloud id", + ); + } + } + } else { + match client.recording_stop(org_id, &recording_id, end_time).await { + Ok(()) => { + let _ = store.mark_recording_stop_notified(index).await; + tracing::info!( + recording_index = index, + recording_id, + next_recording_index = before_index, + "stopped prior pending recording on the backend before opening the next", + ); + } + Err(error) if error.is_not_found() => { + // Already closed β€” the stop-notifier sweep won the race. Mark + // it notified so the sweep stops re-posting too. + let _ = store.mark_recording_stop_notified(index).await; + tracing::debug!( + recording_index = index, + recording_id, + next_recording_index = before_index, + "prior pending recording already stopped on backend (404)", + ); + } + Err(error) => { + tracing::warn!( + %error, + recording_index = index, + recording_id, + "failed to stop prior pending recording before next start", + ); + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::time::Duration; + + use tempfile::TempDir; + use tokio::sync::broadcast; + use tokio::time::{sleep, timeout}; + use wiremock::matchers::{body_partial_json, method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + use crate::api::auth::StaticAuthProvider; + use crate::api::{ApiClient, ApiClientOptions}; + use crate::lifecycle::shutdown::ShutdownSignal; + use crate::state::{DaemonEvent, EventBus, NewRecording, SqliteStateStore, StateStore}; + + async fn open_store() -> (SqliteStateStore, TempDir) { + let dir = TempDir::new().expect("tempdir"); + let store = SqliteStateStore::open(&dir.path().join("state.db")) + .await + .expect("open store"); + (store, dir) + } + + fn options(base_url: String) -> ApiClientOptions { + ApiClientOptions { + base_url, + timeout: Duration::from_secs(5), + max_retries: 1, + max_backoff: Duration::from_secs(1), + } + } + + /// Insert a fresh recording (no cloud id yet) and return its local index. + async fn seed_recording(store: &SqliteStateStore) -> i64 { + store + .create_recording(NewRecording { + robot_id: Some("robot-1"), + robot_instance: Some(7), + dataset_id: Some("ds-1"), + start_timestamp_ns: 1_700_000_000_000_000_000, + }) + .await + .expect("create recording") + .recording_index + } + + /// A live-org receiver fixed at `org`. The sender is leaked so the channel + /// stays open for the test's duration. + fn org_rx(org: Option<&str>) -> OrgIdRx { + let (org_tx, org_rx) = tokio::sync::watch::channel(org.map(str::to_string)); + Box::leak(Box::new(org_tx)); + org_rx + } + + fn start_ok_mock(recording_id: &'static str) -> wiremock::Mock { + Mock::given(method("POST")) + .and(path("/org/org-1/recording/start")) + .respond_with( + ResponseTemplate::new(200).set_body_json(serde_json::json!({ "id": recording_id })), + ) + } + + #[tokio::test] + async fn posts_backend_start_on_recording_started_event() { + let server = MockServer::start().await; + start_ok_mock("cloud-rec-1").mount(&server).await; + + let (store, _dir) = open_store().await; + let index = seed_recording(&store).await; + + let auth = Arc::new(StaticAuthProvider::new("token-1")); + let client = Arc::new(ApiClient::new(options(server.uri()), auth).expect("client")); + + let bus = EventBus::new(); + let (shutdown_tx, _) = broadcast::channel::(8); + let handle = spawn_recording_start_notifier( + store.clone(), + bus.clone(), + client, + org_rx(Some("org-1")), + shutdown_tx.subscribe(), + ); + + bus.publish(DaemonEvent::RecordingStarted { + recording_index: index, + }); + + // The cloud id lands on the row once the POST round-trips. + timeout(Duration::from_secs(3), async { + loop { + let row = store + .get_recording(index) + .await + .expect("get") + .expect("exists"); + if row.recording_id.is_some() { + assert_eq!(row.recording_id.as_deref(), Some("cloud-rec-1")); + assert!(row.backend_start_notified_at.is_some()); + break; + } + sleep(Duration::from_millis(20)).await; + } + }) + .await + .expect("cloud recording_id must be persisted within 3s"); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } + + #[tokio::test] + async fn cancels_prior_pending_recording_before_opening_the_next() { + // Cancel-then-start (no gap) for one source: the prior recording was + // cancelled before its cloud id was notified, so it is still pending on + // the backend. Opening the next recording must cancel it FIRST, so the + // backend mints a fresh id instead of handing back the cancelled one + // (which would collapse both recordings into one cloud recording). + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/org/org-1/recording/cancel")) + .and(body_partial_json( + serde_json::json!({ "recording_id": "cloud-cancelled-A" }), + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!("ok"))) + .mount(&server) + .await; + start_ok_mock("cloud-fresh-B").mount(&server).await; + + let (store, _dir) = open_store().await; + // Prior recording A (same source): start-notified, then cancelled, with + // its backend cancel still pending. + let prior = seed_recording(&store).await; + store + .mark_recording_start_notified(prior, "cloud-cancelled-A") + .await + .expect("mark start notified"); + store + .cancel_recording(prior, 5_000_000_000) + .await + .expect("cancel"); + // The next recording B for the same source. + let next = seed_recording(&store).await; + + let auth = Arc::new(StaticAuthProvider::new("token-1")); + let client = Arc::new(ApiClient::new(options(server.uri()), auth).expect("client")); + let bus = EventBus::new(); + let (shutdown_tx, _) = broadcast::channel::(8); + let handle = spawn_recording_start_notifier( + store.clone(), + bus.clone(), + client, + org_rx(Some("org-1")), + shutdown_tx.subscribe(), + ); + + bus.publish(DaemonEvent::RecordingStarted { + recording_index: next, + }); + + timeout(Duration::from_secs(3), async { + loop { + let prior_row = store + .get_recording(prior) + .await + .expect("get") + .expect("exists"); + let next_row = store + .get_recording(next) + .await + .expect("get") + .expect("exists"); + if prior_row.backend_cancel_notified_at.is_some() && next_row.recording_id.is_some() + { + // Prior cancelled server-side; next opened with a FRESH id. + assert_eq!(next_row.recording_id.as_deref(), Some("cloud-fresh-B")); + break; + } + sleep(Duration::from_millis(20)).await; + } + }) + .await + .expect("prior recording must be cancelled and next opened fresh within 3s"); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } + + #[tokio::test] + async fn startup_sweep_notifies_recordings_opened_while_offline() { + // A recording opened during a previous offline session: no cloud id, + // no start-notify/failed stamps. The pre-loop sweep must POST and + // persist the minted cloud id. + let server = MockServer::start().await; + start_ok_mock("cloud-rec-offline").mount(&server).await; + + let (store, _dir) = open_store().await; + let index = seed_recording(&store).await; + + let auth = Arc::new(StaticAuthProvider::new("token-1")); + let client = Arc::new(ApiClient::new(options(server.uri()), auth).expect("client")); + + let bus = EventBus::new(); + let (shutdown_tx, _) = broadcast::channel::(8); + let handle = spawn_recording_start_notifier( + store.clone(), + bus, + client, + org_rx(Some("org-1")), + shutdown_tx.subscribe(), + ); + + timeout(Duration::from_secs(3), async { + loop { + let row = store + .get_recording(index) + .await + .expect("get") + .expect("exists"); + if row.recording_id.as_deref() == Some("cloud-rec-offline") { + break; + } + sleep(Duration::from_millis(20)).await; + } + }) + .await + .expect("sweep must persist the minted cloud id within 3s"); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } +} diff --git a/rust/data_daemon/src/cloud/notifiers/recording_stop_notifier.rs b/rust/data_daemon/src/cloud/notifiers/recording_stop_notifier.rs new file mode 100644 index 000000000..569815c74 --- /dev/null +++ b/rust/data_daemon/src/cloud/notifiers/recording_stop_notifier.rs @@ -0,0 +1,452 @@ +//! Backend recording-stop notifier. +//! +//! Subscribes to [`DaemonEvent::RecordingStopped`] and POSTs +//! `/org/{org}/recording/stop` (JSON body `{recording_id, end_time}`) to the +//! backend. The Python SDK +//! used to make this call inline from `nc.stop_recording`, but the staging +//! POST has a fat upper tail (occasional 1-2 s spikes on otherwise +//! sub-second calls). Doing it here means the SDK call returns as soon as +//! the producer publishes the `StopRecording` envelope, and the staging +//! notification rides the daemon's standard retry policy in the background. +//! +//! A single long-lived task processes lifecycle events sequentially, awaiting +//! each POST inline (a pre-loop sweep and a broadcast-lag sweep recover any +//! recordings whose notification is pending from an offline session β€” see +//! [`notifier`](super::notifier) for the shared loop). Failures are logged +//! with the recording index but never surfaced to the SDK β€” by the time we +//! reach this notifier the SDK is long gone and the producer's iceoryx2 +//! publish already succeeded. + +use std::sync::Arc; + +use async_trait::async_trait; +use tokio::sync::broadcast; + +use super::notifier::{ + notify_recording_lifecycle, spawn_notifier, LifecycleKind, NotifierCtx, NotifierHandle, + RecordingNotifier, +}; +use crate::api::ApiClient; +use crate::cloud::OrgIdRx; +use crate::lifecycle::shutdown::ShutdownSignal; +use crate::state::{ + DaemonEvent, EventBus, RecordingRow, SqliteStateStore, StateStore, StateStoreError, +}; + +/// Notifier that POSTs `/recording/stop` once a recording stops and its cloud +/// id is known. Triggered by `RecordingStopped` (the live path) and by +/// `RecordingCloudIdAssigned` (offline recovery: a recording stopped while +/// offline already fired `RecordingStopped` before any coordinator could see +/// it, so the POST is unblocked only when the start notifier later mints the +/// cloud id β€” `notify_backend` no-ops for a not-yet-stopped recording). +struct StopNotifier; + +#[async_trait] +impl RecordingNotifier for StopNotifier { + fn label(&self) -> &'static str { + "recording-stop" + } + + fn triggered_by(&self, event: &DaemonEvent) -> Option { + match event { + DaemonEvent::RecordingStopped { recording_index } + | DaemonEvent::RecordingCloudIdAssigned { recording_index } => Some(*recording_index), + _ => None, + } + } + + async fn pending( + &self, + store: &Arc, + ) -> Result, StateStoreError> { + store.recordings_pending_stop_notify().await + } + + async fn notify(&self, ctx: &NotifierCtx, recording_index: i64) { + notify_recording_lifecycle( + LifecycleKind::Stop, + &ctx.store, + &ctx.client, + &ctx.org_rx, + recording_index, + ) + .await; + } +} + +/// Spawn the recording-stop notifier on the current Tokio runtime. +pub fn spawn_recording_stop_notifier( + store: SqliteStateStore, + bus: EventBus, + client: Arc, + org_rx: OrgIdRx, + shutdown_rx: broadcast::Receiver, +) -> NotifierHandle { + spawn_notifier(StopNotifier, store, bus, client, org_rx, shutdown_rx) +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::time::Duration; + + use tempfile::TempDir; + use tokio::sync::broadcast; + use tokio::time::{sleep, timeout}; + use wiremock::matchers::{body_partial_json, method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + use crate::api::auth::StaticAuthProvider; + use crate::api::{ApiClient, ApiClientOptions}; + use crate::lifecycle::shutdown::ShutdownSignal; + use crate::state::{DaemonEvent, EventBus, NewRecording, SqliteStateStore, StateStore}; + + async fn open_store() -> (SqliteStateStore, TempDir) { + let dir = TempDir::new().expect("tempdir"); + let store = SqliteStateStore::open(&dir.path().join("state.db")) + .await + .expect("open store"); + (store, dir) + } + + fn options(base_url: String) -> ApiClientOptions { + ApiClientOptions { + base_url, + timeout: Duration::from_secs(5), + max_retries: 1, + max_backoff: Duration::from_secs(1), + } + } + + /// Insert a recording, stamp its cloud id (as if `/start` was notified), + /// and return its local index. + async fn seed_notified_recording(store: &SqliteStateStore, recording_id: &str) -> i64 { + let index = store + .create_recording(NewRecording { + robot_id: Some("robot-1"), + robot_instance: Some(0), + dataset_id: Some("ds-1"), + start_timestamp_ns: 1_700_000_000_000_000_000, + }) + .await + .expect("create recording") + .recording_index; + store + .mark_recording_start_notified(index, recording_id) + .await + .expect("mark start notified"); + index + } + + /// A live-org receiver fixed at `org`. The sender is leaked so the channel + /// stays open for the test's duration. + fn org_rx(org: Option<&str>) -> OrgIdRx { + let (org_tx, org_rx) = tokio::sync::watch::channel(org.map(str::to_string)); + Box::leak(Box::new(org_tx)); + org_rx + } + + #[tokio::test] + async fn posts_backend_stop_on_recording_stopped_event() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/org/org-1/recording/stop")) + .and(body_partial_json( + serde_json::json!({ "recording_id": "rec-stop-1" }), + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!("ok"))) + .mount(&server) + .await; + + let (store, _dir) = open_store().await; + let index = seed_notified_recording(&store, "rec-stop-1").await; + store + .mark_recording_stopped(index, 1) + .await + .expect("mark stopped"); + + let auth = Arc::new(StaticAuthProvider::new("token-1")); + let client = Arc::new(ApiClient::new(options(server.uri()), auth).expect("client")); + + let bus = EventBus::new(); + let (shutdown_tx, _) = broadcast::channel::(8); + let handle = spawn_recording_stop_notifier( + store.clone(), + bus.clone(), + client, + org_rx(Some("org-1")), + shutdown_tx.subscribe(), + ); + + bus.publish(DaemonEvent::RecordingStopped { + recording_index: index, + }); + + // Give the notifier task a moment to drain the event and call wiremock. + timeout(Duration::from_secs(3), async { + loop { + let received = server.received_requests().await.unwrap_or_default(); + if !received.is_empty() { + break; + } + sleep(Duration::from_millis(20)).await; + } + }) + .await + .expect("expected one POST within 3s"); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } + + #[tokio::test] + async fn startup_sweep_recovers_recordings_stopped_while_offline() { + // Simulate a daemon coming online with a recording that was + // stopped during a previous offline session: `stopped_at` is + // already set, `backend_stop_notified_at` is still NULL. The + // notifier's pre-loop sweep must POST and mark the row notified. + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/org/org-1/recording/stop")) + .and(body_partial_json( + serde_json::json!({ "recording_id": "rec-offline-1" }), + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!("ok"))) + .mount(&server) + .await; + + let (store, _dir) = open_store().await; + let index = seed_notified_recording(&store, "rec-offline-1").await; + store + .mark_recording_stopped(index, 1) + .await + .expect("mark stopped"); + + let auth = Arc::new(StaticAuthProvider::new("token-1")); + let client = Arc::new(ApiClient::new(options(server.uri()), auth).expect("client")); + + let bus = EventBus::new(); + let (shutdown_tx, _) = broadcast::channel::(8); + let handle = spawn_recording_stop_notifier( + store.clone(), + bus, + client, + org_rx(Some("org-1")), + shutdown_tx.subscribe(), + ); + + timeout(Duration::from_secs(3), async { + loop { + let received = server.received_requests().await.unwrap_or_default(); + if !received.is_empty() { + break; + } + sleep(Duration::from_millis(20)).await; + } + }) + .await + .expect("sweep must POST within 3s"); + + // Give the notifier a beat to persist the success column. + timeout(Duration::from_secs(3), async { + loop { + let row = store + .get_recording(index) + .await + .expect("get") + .expect("exists"); + if row.backend_stop_notified_at.is_some() { + break; + } + sleep(Duration::from_millis(20)).await; + } + }) + .await + .expect("backend_stop_notified_at must be stamped within 3s"); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } + + #[tokio::test] + async fn skips_notify_when_recording_row_missing() { + let server = MockServer::start().await; + let (store, _dir) = open_store().await; + let auth = Arc::new(StaticAuthProvider::new("token-1")); + let client = Arc::new(ApiClient::new(options(server.uri()), auth).expect("client")); + + let bus = EventBus::new(); + let (shutdown_tx, _) = broadcast::channel::(8); + let handle = spawn_recording_stop_notifier( + store, + bus.clone(), + client, + org_rx(Some("org-1")), + shutdown_tx.subscribe(), + ); + + bus.publish(DaemonEvent::RecordingStopped { + recording_index: 9_999, + }); + + // Yield enough for the notifier to process the event and bail. We + // assert *absence* of an HTTP request: wiremock has no mocks armed, + // so any incoming request would have already failed the test. A + // short sleep is the cheapest way to observe quiescence. + sleep(Duration::from_millis(150)).await; + let received = server.received_requests().await.unwrap_or_default(); + assert!( + received.is_empty(), + "no backend POST expected when recording row is missing" + ); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } + + #[tokio::test] + async fn skips_notify_when_cloud_id_absent() { + // A stopped recording without a cloud id has nothing to stop + // server-side; the notifier must defer (no POST) until the start + // notifier fills the id. + let server = MockServer::start().await; + let (store, _dir) = open_store().await; + let index = store + .create_recording(NewRecording { + robot_id: Some("robot-1"), + robot_instance: Some(0), + start_timestamp_ns: 1_700_000_000_000_000_000, + ..NewRecording::default() + }) + .await + .expect("create recording") + .recording_index; + store + .mark_recording_stopped(index, 1) + .await + .expect("mark stopped"); + + let auth = Arc::new(StaticAuthProvider::new("token-1")); + let client = Arc::new(ApiClient::new(options(server.uri()), auth).expect("client")); + + let bus = EventBus::new(); + let (shutdown_tx, _) = broadcast::channel::(8); + let handle = spawn_recording_stop_notifier( + store, + bus.clone(), + client, + org_rx(Some("org-1")), + shutdown_tx.subscribe(), + ); + + bus.publish(DaemonEvent::RecordingStopped { + recording_index: index, + }); + + sleep(Duration::from_millis(150)).await; + let received = server.received_requests().await.unwrap_or_default(); + assert!( + received.is_empty(), + "no backend POST expected when the cloud recording_id is absent" + ); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } + + #[tokio::test] + async fn cloud_id_assigned_event_notifies_a_recording_stopped_while_offline() { + // Offline recovery: a recording stopped while offline already fired its + // `RecordingStopped` (which no coordinator saw). Once the start notifier + // assigns the cloud id and publishes `RecordingCloudIdAssigned`, the + // stop notifier must POST `/recording/stop`. + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/org/org-1/recording/stop")) + .and(body_partial_json( + serde_json::json!({ "recording_id": "rec-recovered-1" }), + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!("ok"))) + .mount(&server) + .await; + + let (store, _dir) = open_store().await; + let index = seed_notified_recording(&store, "rec-recovered-1").await; + store + .mark_recording_stopped(index, 1) + .await + .expect("mark stopped"); + + let auth = Arc::new(StaticAuthProvider::new("token-1")); + let client = Arc::new(ApiClient::new(options(server.uri()), auth).expect("client")); + + let bus = EventBus::new(); + let (shutdown_tx, _) = broadcast::channel::(8); + let handle = spawn_recording_stop_notifier( + store.clone(), + bus.clone(), + client, + org_rx(Some("org-1")), + shutdown_tx.subscribe(), + ); + + // The cloud-id-assigned event β€” not RecordingStopped β€” drives the POST. + bus.publish(DaemonEvent::RecordingCloudIdAssigned { + recording_index: index, + }); + + timeout(Duration::from_secs(3), async { + loop { + let received = server.received_requests().await.unwrap_or_default(); + if !received.is_empty() { + break; + } + sleep(Duration::from_millis(20)).await; + } + }) + .await + .expect("cloud-id-assigned event must POST within 3s"); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } + + #[tokio::test] + async fn cloud_id_assigned_event_ignores_a_running_recording() { + // A recording that just got its cloud id but has not stopped yet must + // not be stop-notified β€” the `stopped_at` guard holds the POST until the + // recording actually stops. + let server = MockServer::start().await; + let (store, _dir) = open_store().await; + let index = seed_notified_recording(&store, "rec-running-1").await; + // Deliberately NOT stopped. + + let auth = Arc::new(StaticAuthProvider::new("token-1")); + let client = Arc::new(ApiClient::new(options(server.uri()), auth).expect("client")); + + let bus = EventBus::new(); + let (shutdown_tx, _) = broadcast::channel::(8); + let handle = spawn_recording_stop_notifier( + store, + bus.clone(), + client, + org_rx(Some("org-1")), + shutdown_tx.subscribe(), + ); + + bus.publish(DaemonEvent::RecordingCloudIdAssigned { + recording_index: index, + }); + + sleep(Duration::from_millis(150)).await; + let received = server.received_requests().await.unwrap_or_default(); + assert!( + received.is_empty(), + "no backend POST expected for a recording that has not stopped" + ); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } +} diff --git a/rust/data_daemon/src/cloud/watchers/mod.rs b/rust/data_daemon/src/cloud/watchers/mod.rs new file mode 100644 index 000000000..f57463aed --- /dev/null +++ b/rust/data_daemon/src/cloud/watchers/mod.rs @@ -0,0 +1,6 @@ +//! Periodic watchers: the org-id config poller that publishes the live org for +//! every coordinator, and the recording reaper that reclaims durably-settled +//! recordings. + +pub mod org_watcher; +pub mod recording_reaper; diff --git a/rust/data_daemon/src/cloud/watchers/org_watcher.rs b/rust/data_daemon/src/cloud/watchers/org_watcher.rs new file mode 100644 index 000000000..e90bd61d8 --- /dev/null +++ b/rust/data_daemon/src/cloud/watchers/org_watcher.rs @@ -0,0 +1,183 @@ +//! Live `org_id` resolution. +//! +//! The organisation that owns a recording is no longer frozen onto the +//! recording row at creation time. Instead this module watches the +//! SDK-managed `~/.neuracore/config.json` and publishes the *current* +//! `current_org_id` into a [`watch::channel`] that every cloud coordinator +//! reads at the moment it issues a backend POST. A recording opened before +//! the org was selected therefore picks the org up as soon as it lands in +//! config β€” no daemon restart, and no per-recording backfill. + +use std::path::PathBuf; + +use tokio::sync::{broadcast, watch}; +use tokio::task::JoinHandle; +use tokio::time::{interval, MissedTickBehavior}; + +use crate::cloud::{read_org_id_from_config, read_org_id_from_config_async}; +use crate::lifecycle::shutdown::ShutdownSignal; + +/// Shared read handle for the current `org_id`. Cheap to clone; read the +/// current value with `org_rx.borrow().clone()`. +pub type OrgIdRx = tokio::sync::watch::Receiver>; + +/// Handle for the config-file watcher task. +pub struct OrgWatcherHandle { + join: JoinHandle<()>, +} + +impl OrgWatcherHandle { + /// Wait for the watcher task to exit. + pub async fn join(self) { + if let Err(error) = self.join.await { + tracing::warn!(?error, "org watcher join failed"); + } + } +} + +/// Spawn the config-file watcher. +/// +/// Returns a [`OrgIdRx`] seeded with the org resolved at spawn time and the +/// task handle. `fallback` is the daemon-profile override (`NCD_CURRENT_ORG_ID` +/// / YAML profile) used whenever the config file has no `current_org_id`, +/// matching the launch-time resolution order. +pub fn spawn_org_watcher( + config_path: PathBuf, + fallback: Option, + mut shutdown_rx: broadcast::Receiver, +) -> (OrgIdRx, OrgWatcherHandle) { + // One-shot blocking seed is fine β€” it runs once before the task spawns. + let initial = read_org_id_from_config(&config_path).or_else(|| fallback.clone()); + let (org_tx, org_rx) = watch::channel(initial); + + let join = tokio::spawn(async move { + let mut ticker = interval(crate::intervals::ORG_CONFIG_POLL); + ticker.set_missed_tick_behavior(MissedTickBehavior::Delay); + + loop { + tokio::select! { + biased; + signal = shutdown_rx.recv() => { + tracing::debug!(?signal, "org watcher shutting down"); + break; + } + _ = ticker.tick() => { + let current = read_org_id_from_config_async(&config_path) + .await + .or_else(|| fallback.clone()); + org_tx.send_if_modified(|existing| { + if *existing == current { + false + } else { + tracing::info!( + org_id = ?current, + "config change picked up; updating current org_id" + ); + *existing = current; + true + } + }); + } + } + } + }); + + (org_rx, OrgWatcherHandle { join }) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use std::time::Duration; + + use tempfile::TempDir; + use tokio::time::timeout; + + fn write_config(path: &std::path::Path, org_id: Option<&str>) { + let body = match org_id { + Some(org) => format!(r#"{{"current_org_id": "{org}"}}"#), + None => "{}".to_string(), + }; + let mut file = std::fs::File::create(path).expect("write config"); + file.write_all(body.as_bytes()).expect("write body"); + } + + #[tokio::test] + async fn seeds_initial_value_from_config() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("config.json"); + write_config(&path, Some("org-initial")); + + let (shutdown_tx, _) = broadcast::channel::(8); + let (org_rx, handle) = spawn_org_watcher(path, None, shutdown_tx.subscribe()); + assert_eq!(org_rx.borrow().as_deref(), Some("org-initial")); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } + + #[tokio::test] + async fn falls_back_when_config_has_no_org() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("config.json"); + write_config(&path, None); + + let (shutdown_tx, _) = broadcast::channel::(8); + let (org_rx, handle) = spawn_org_watcher( + path, + Some("profile-org".to_string()), + shutdown_tx.subscribe(), + ); + assert_eq!(org_rx.borrow().as_deref(), Some("profile-org")); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } + + #[tokio::test] + async fn corrupt_config_falls_back_without_crashing() { + // M15: a present-but-corrupt config must not crash the watcher or wipe + // the fallback org β€” it logs and is treated as "no org in config". + let dir = TempDir::new().unwrap(); + let path = dir.path().join("config.json"); + std::fs::write(&path, b"{ this is not valid json ").unwrap(); + + let (shutdown_tx, _) = broadcast::channel::(8); + let (org_rx, handle) = spawn_org_watcher( + path, + Some("profile-org".to_string()), + shutdown_tx.subscribe(), + ); + assert_eq!(org_rx.borrow().as_deref(), Some("profile-org")); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } + + #[tokio::test] + async fn picks_up_org_written_after_launch() { + // The recording-blocking case: the daemon comes up before any org is + // selected, then the SDK writes one. The watcher must publish it + // without a restart. + let dir = TempDir::new().unwrap(); + let path = dir.path().join("config.json"); + write_config(&path, None); + + let (shutdown_tx, _) = broadcast::channel::(8); + let (mut org_rx, handle) = spawn_org_watcher(path.clone(), None, shutdown_tx.subscribe()); + assert_eq!(org_rx.borrow().as_deref(), None, "starts org-less"); + + // Select an org after launch. + write_config(&path, Some("org-late")); + + timeout(Duration::from_secs(5), org_rx.changed()) + .await + .expect("watcher must observe the config change within 5s") + .expect("sender alive"); + assert_eq!(org_rx.borrow().as_deref(), Some("org-late")); + + let _ = shutdown_tx.send(ShutdownSignal::Sigterm); + handle.join().await; + } +} diff --git a/rust/data_daemon/src/cloud/watchers/recording_reaper.rs b/rust/data_daemon/src/cloud/watchers/recording_reaper.rs new file mode 100644 index 000000000..87b9fa123 --- /dev/null +++ b/rust/data_daemon/src/cloud/watchers/recording_reaper.rs @@ -0,0 +1,139 @@ +//! Periodic recording reaper. +//! +//! Reclaims recordings whose local copy is redundant β€” the daemon owns no other +//! cleanup for a recording that reaches a settled terminal state, so without +//! this task both their files and DB rows leak forever. Two shapes qualify: +//! +//! * **Stopped + fully uploaded** β€” every declared trace uploaded and the +//! backend fully notified (stop POSTed, expected-trace-count + per-trace +//! progress reported). The cloud holds everything. +//! * **Cancelled** β€” the data was discarded; once the backend cancel has been +//! notified (`backend_cancel_notified_at`) nothing local needs keeping. +//! +//! For both, the reaper deletes the on-disk recording directory and then the +//! `recordings` / `traces` rows, keeping local disk and the state DB bounded +//! over a long-running daemon's lifetime. It is the single owner of +//! cancelled-recording file removal β€” the cancel path no longer unlinks files. +//! +//! The uploaded gate reads the authoritative per-trace `upload_status` rows; a +//! recording with a permanently `failed` trace never satisfies it, so data that +//! did not upload is intentionally retained. The startup sweep still handles +//! partial (mid-write) recordings separately. + +use std::path::PathBuf; +use std::sync::Arc; + +use tokio::sync::broadcast; +use tokio::task::JoinHandle; +use tokio::time::{interval, MissedTickBehavior}; + +use crate::lifecycle::shutdown::ShutdownSignal; +use crate::state::{RecordingRow, SqliteStateStore, StateStore}; +use crate::storage::paths::recording_dir; + +/// Handle returned by [`spawn_recording_reaper`]. +pub struct RecordingReaperHandle { + join: JoinHandle<()>, +} + +impl RecordingReaperHandle { + /// Wait for the reaper task to exit. + pub async fn join(self) { + if let Err(error) = self.join.await { + tracing::warn!(?error, "recording reaper join failed"); + } + } +} + +/// Spawn the recording reaper task on the current Tokio runtime. +pub fn spawn_recording_reaper( + store: SqliteStateStore, + recordings_root: Arc, + mut shutdown_rx: broadcast::Receiver, +) -> RecordingReaperHandle { + let store = Arc::new(store); + let join = tokio::spawn(async move { + let mut ticker = interval(crate::intervals::RECORDING_RECLAIM); + ticker.set_missed_tick_behavior(MissedTickBehavior::Delay); + + loop { + tokio::select! { + biased; + signal = shutdown_rx.recv() => { + tracing::debug!(?signal, "recording reaper shutting down"); + break; + } + _ = ticker.tick() => { + sweep_once(&store, &recordings_root).await; + } + } + } + }); + RecordingReaperHandle { join } +} + +/// Run one reclamation pass: list the durably-settled recordings the server-side +/// filter reports as reclaimable and reclaim each (deletes the on-disk artefacts +/// and drops the row). Invoked once per reclaim tick by the spawned reaper task. +async fn sweep_once(store: &Arc, recordings_root: &Arc) { + // Server-side filter returns *only* durably-settled, reclaimable + // recordings (cancel-notified, or stopped + fully uploaded with the + // expected trace count met). This walks neither every recording nor the + // traces of a recording wedged on a permanently-failed upload β€” both of + // which the old `list_recordings` + per-row trace fetch re-scanned every + // sweep, forever. + let recordings = match store.recordings_pending_reclaim().await { + Ok(rows) => rows, + Err(error) => { + tracing::warn!(%error, "recording reaper could not list reclaimable recordings"); + return; + } + }; + for recording in recordings { + reclaim(store, recordings_root, &recording).await; + } +} + +/// Remove the recording's on-disk directory, then its DB rows. Files are +/// deleted first: if the unlink fails the rows are left in place so the next +/// sweep retries rather than orphaning files with no row pointing at them. +async fn reclaim( + store: &Arc, + recordings_root: &Arc, + recording: &RecordingRow, +) { + let dir = recording_dir(recordings_root, recording.recording_index); + // `tokio::fs` so a large directory tree doesn't block a runtime worker + // (the sweep runs on the async reaper task). + match tokio::fs::remove_dir_all(&dir).await { + Ok(()) => {} + // Already gone (e.g. reclaimed on a prior sweep that crashed before the + // row delete committed) β€” fall through and finish removing the rows. + Err(error) if error.kind() == std::io::ErrorKind::NotFound => {} + Err(error) => { + tracing::warn!( + %error, + recording_index = recording.recording_index, + path = %dir.display(), + "recording reaper could not remove recording directory; retrying next sweep" + ); + return; + } + } + + match store + .delete_recording_cascade(recording.recording_index) + .await + { + Ok(traces_deleted) => tracing::info!( + recording_index = recording.recording_index, + traces_deleted, + "reclaimed fully-uploaded recording" + ), + Err(error) => tracing::warn!( + %error, + recording_index = recording.recording_index, + "recording reaper removed files but could not delete rows" + ), + } +} diff --git a/rust/data_daemon/src/connection/mod.rs b/rust/data_daemon/src/connection/mod.rs new file mode 100644 index 000000000..d25b63b2b --- /dev/null +++ b/rust/data_daemon/src/connection/mod.rs @@ -0,0 +1,13 @@ +//! Connection-state monitoring for the daemon. +//! +//! Runs a 10 s `HEAD /status/health` tick and publishes +//! [`crate::state::DaemonEvent::ConnectionStateChanged`] transitions to the broadcast bus so +//! the upload coordinator can pause / resume on persistent network failures. + +pub mod monitor; +pub mod wakelock; + +#[allow(unused_imports)] +pub use monitor::{spawn_connection_monitor, ConnectionState, MonitorHandle}; +#[allow(unused_imports)] +pub use wakelock::{spawn_wakelock, WakelockHandle}; diff --git a/rust/data_daemon/src/connection/monitor.rs b/rust/data_daemon/src/connection/monitor.rs new file mode 100644 index 000000000..090a46900 --- /dev/null +++ b/rust/data_daemon/src/connection/monitor.rs @@ -0,0 +1,171 @@ +//! Periodic `HEAD /status/health` probe. +//! +//! Mirrors `connection_manager.py::ConnectionManager`. Runs on the Tokio +//! runtime as a single supervised task. Each transition is broadcast to the +//! daemon event bus as [`DaemonEvent::ConnectionStateChanged`]; subscribers +//! (currently the upload coordinator) pause when the connection drops and +//! resume on recovery. + +use std::sync::Arc; + +use tokio::sync::broadcast; +use tokio::task::JoinHandle; +use tokio::time::{interval, MissedTickBehavior}; + +use crate::api::ApiClient; +use crate::lifecycle::shutdown::ShutdownSignal; +pub use crate::state::ConnectionState; +use crate::state::{DaemonEvent, EventBus}; + +/// Handle to the spawned connection monitor task. +pub struct MonitorHandle { + join: JoinHandle<()>, +} + +impl MonitorHandle { + /// Wait for the monitor task to exit (used during ordered shutdown). + pub async fn join(self) { + if let Err(error) = self.join.await { + tracing::warn!(?error, "connection monitor join failed"); + } + } +} + +/// Spawn the monitor task on the current Tokio runtime. +/// +/// The spawned task publishes `ConnectionState::Down` as its first action so +/// subscribers (currently the upload coordinator) see a definite initial state +/// regardless of subscription ordering β€” the first successful probe then +/// flips them to `Up`. +pub fn spawn_connection_monitor( + client: Arc, + bus: EventBus, + mut shutdown_rx: broadcast::Receiver, +) -> MonitorHandle { + let join = tokio::spawn(async move { + // Publish the initial Down state from inside the task so that any + // task calling `bus.subscribe()` between launch and the next yield + // point sees the seed event before the first probe runs. + let mut state = ConnectionState::Down; + bus.publish(DaemonEvent::ConnectionStateChanged(state)); + let mut ticker = interval(crate::intervals::CONNECTION_HEALTH_CHECK); + // Don't try to "catch up" missed ticks during long pauses β€” one + // probe per real-time interval is enough and avoids storming the + // backend after a daemon stall. + ticker.set_missed_tick_behavior(MissedTickBehavior::Delay); + + loop { + tokio::select! { + biased; + signal = shutdown_rx.recv() => { + tracing::debug!(?signal, "connection monitor shutting down"); + break; + } + _ = ticker.tick() => { + match client.health_check().await { + Ok(true) => { + if state != ConnectionState::Up { + state = ConnectionState::Up; + tracing::info!("backend connection restored"); + bus.publish(DaemonEvent::ConnectionStateChanged(state)); + } + } + Ok(false) | Err(_) => { + if state != ConnectionState::Down { + state = ConnectionState::Down; + tracing::warn!("backend connection lost"); + bus.publish(DaemonEvent::ConnectionStateChanged(state)); + } + } + } + } + } + } + }); + MonitorHandle { join } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Duration; + + use crate::api::auth::StaticAuthProvider; + use crate::api::client::ApiClientOptions; + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + async fn build_client(server: &MockServer) -> Arc { + let auth = Arc::new(StaticAuthProvider::new("test")); + let mut options = ApiClientOptions::new(server.uri()); + options.timeout = Duration::from_secs(2); + Arc::new(ApiClient::new(options, auth).expect("client")) + } + + #[tokio::test] + async fn publishes_initial_down_state() { + let server = MockServer::start().await; + Mock::given(method("HEAD")) + .and(path("/status/health")) + .respond_with(ResponseTemplate::new(503)) + .mount(&server) + .await; + let client = build_client(&server).await; + let bus = EventBus::new(); + let mut subscriber = bus.subscribe(); + let (tx, rx) = broadcast::channel::(1); + + let handle = spawn_connection_monitor(client, bus, rx); + // The very first event after subscription is the explicit Down + // publish from `spawn_connection_monitor`. + let event = subscriber.recv().await.unwrap(); + assert!(matches!( + event, + DaemonEvent::ConnectionStateChanged(ConnectionState::Down) + )); + + // Tear the monitor down so the test exits. + let _ = tx.send(ShutdownSignal::Sigterm); + handle.join.abort(); + } + + #[tokio::test] + async fn transitions_to_up_when_health_check_passes() { + // The first probe returns 200 so the monitor flips Down -> Up. + // The probe runs on a tokio `interval` whose first tick fires + // immediately, so we only need to wait a couple of polls. + let server = MockServer::start().await; + Mock::given(method("HEAD")) + .and(path("/status/health")) + .respond_with(ResponseTemplate::new(200)) + .mount(&server) + .await; + let client = build_client(&server).await; + let bus = EventBus::new(); + let mut subscriber = bus.subscribe(); + let (tx, rx) = broadcast::channel::(1); + + let handle = spawn_connection_monitor(client, bus, rx); + + // Initial Down, then Up once the probe succeeds. + let first = tokio::time::timeout(Duration::from_secs(2), subscriber.recv()) + .await + .expect("initial event") + .unwrap(); + assert!(matches!( + first, + DaemonEvent::ConnectionStateChanged(ConnectionState::Down) + )); + let second = tokio::time::timeout(Duration::from_secs(5), subscriber.recv()) + .await + .expect("up event") + .unwrap(); + assert!(matches!( + second, + DaemonEvent::ConnectionStateChanged(ConnectionState::Up) + )); + + let _ = tx.send(ShutdownSignal::Sigterm); + handle.join.abort(); + } +} diff --git a/rust/data_daemon/src/connection/wakelock.rs b/rust/data_daemon/src/connection/wakelock.rs new file mode 100644 index 000000000..185e9bbd8 --- /dev/null +++ b/rust/data_daemon/src/connection/wakelock.rs @@ -0,0 +1,334 @@ +//! Hold a wakelock while uploads are in flight. +//! +//! When `NCD_KEEP_WAKELOCK_WHILE_UPLOAD=1` the daemon prevents the host from +//! idling into suspend or hitting a session-idle inhibitor while a recording +//! upload is still going. On Linux this is best +//! expressed through `systemd-inhibit`, which holds the inhibitor for as +//! long as its child process keeps running. The wakelock task: +//! +//! 1. Subscribes to the daemon event bus. +//! 2. Tracks each in-flight trace (by `trace_id`, with its `recording_index`): +//! a trace is added on [`ReadyForUpload`](DaemonEvent::ReadyForUpload) and +//! removed on [`UploadComplete`](DaemonEvent::UploadComplete); +//! [`RecordingCancelled`](DaemonEvent::RecordingCancelled) drops only that +//! recording's traces. +//! 3. Spawns `systemd-inhibit --what=idle:sleep --mode=block sleep infinity` +//! on the emptyβ†’non-empty transition and kills it on the +//! non-emptyβ†’empty transition. +//! +//! Hosts without `systemd-inhibit` on `$PATH` log a single warning and +//! degrade to a no-op β€” the upload path itself is untouched. macOS / BSDs +//! would need a per-platform shim (the macOS-equivalent stay-awake CLI); +//! this feature is Linux-only. + +use std::collections::HashMap; +use std::process::{Child, Command, Stdio}; + +use tokio::sync::broadcast; +use tokio::task::JoinHandle; + +use crate::lifecycle::shutdown::ShutdownSignal; +use crate::state::{DaemonEvent, EventBus}; + +/// Handle returned by [`spawn_wakelock`]. Drop or join to tear the task down. +pub struct WakelockHandle { + join: JoinHandle<()>, +} + +impl WakelockHandle { + /// Wait for the wakelock task to exit. Idempotent: the task itself + /// releases its inhibitor on shutdown. + pub async fn join(self) { + if let Err(error) = self.join.await { + tracing::warn!(?error, "wakelock task join failed"); + } + } +} + +/// Spawn the wakelock task on the current Tokio runtime. +/// +/// Subscribes to `bus` for [`ReadyForUpload`](DaemonEvent::ReadyForUpload), +/// [`UploadComplete`](DaemonEvent::UploadComplete), and +/// [`RecordingCancelled`](DaemonEvent::RecordingCancelled). Holds a +/// `systemd-inhibit` child while at least one trace is in flight. +pub fn spawn_wakelock( + bus: EventBus, + mut shutdown_rx: broadcast::Receiver, +) -> WakelockHandle { + // Subscribe synchronously at spawn time (matching the monitor's deliberate + // ordering) so an event published between this call returning and the task + // first being polled is not missed. + let mut subscriber = bus.subscribe(); + let join = tokio::spawn(async move { + let mut active = ActiveUploads::default(); + let mut inhibitor = InhibitorChild::new(); + + loop { + tokio::select! { + biased; + signal = shutdown_rx.recv() => { + tracing::debug!(?signal, "wakelock task shutting down"); + break; + } + event = subscriber.recv() => { + match event { + Ok(DaemonEvent::ReadyForUpload { trace_id, recording_index }) => { + if active.add(trace_id, recording_index) { + inhibitor.ensure_held(); + } + } + Ok(DaemonEvent::UploadComplete { trace_id, .. }) => { + if active.complete(&trace_id) { + inhibitor.release(); + } + } + Ok(DaemonEvent::RecordingCancelled { recording_index }) => { + // Drop only the cancelled recording's in-flight + // traces (whose actors were torn down, so their + // `UploadComplete` will never arrive) β€” releasing + // the inhibitor only if nothing else is uploading. + // Clearing *all* traces here would drop the + // inhibitor another recording's still-running + // upload needs. + let released = active.cancel_recording(recording_index); + tracing::debug!( + recording_index, + remaining = active.len(), + "wakelock handling recording cancel" + ); + if released { + inhibitor.release(); + } + } + Ok(_) => {} + Err(broadcast::error::RecvError::Lagged(skipped)) => { + // We may have dropped `UploadComplete`s, which would + // otherwise pin phantom trace-ids in `active` and + // leave the inhibitor held forever. Resync + // conservatively: clear the bookkeeping and release. + // The next `ReadyForUpload` re-acquires β€” a brief + // inhibitor gap beats an inhibitor stuck on. + tracing::warn!( + skipped, + pending = active.len(), + "wakelock missed bus events; resyncing (clearing pending + releasing)" + ); + active.clear(); + inhibitor.release(); + } + Err(broadcast::error::RecvError::Closed) => break, + } + } + } + } + inhibitor.release(); + }); + WakelockHandle { join } +} + +/// In-flight upload bookkeeping for the wakelock: each pending trace mapped to +/// the recording it belongs to, so a recording cancel releases only *its* own +/// traces rather than every recording's. +#[derive(Default)] +struct ActiveUploads { + /// `trace_id β†’ recording_index` for every trace currently uploading. + by_trace: HashMap, +} + +impl ActiveUploads { + /// Record a trace as in-flight. Returns `true` on the 0β†’non-empty + /// transition, i.e. when the caller should acquire the inhibitor. + fn add(&mut self, trace_id: String, recording_index: i64) -> bool { + let was_empty = self.by_trace.is_empty(); + self.by_trace.insert(trace_id, recording_index); + was_empty + } + + /// Remove a finished trace. Returns `true` when the last in-flight trace + /// completed, i.e. when the caller should release the inhibitor. + fn complete(&mut self, trace_id: &str) -> bool { + self.by_trace.remove(trace_id).is_some() && self.by_trace.is_empty() + } + + /// Drop every trace belonging to `recording_index` (its actors are gone, so + /// their `UploadComplete` will never arrive). Returns `true` when nothing + /// remains in flight, i.e. when the caller should release the inhibitor. + fn cancel_recording(&mut self, recording_index: i64) -> bool { + self.by_trace.retain(|_, index| *index != recording_index); + self.by_trace.is_empty() + } + + /// Forget every in-flight trace (used on a lagged-bus resync). + fn clear(&mut self) { + self.by_trace.clear(); + } + + fn len(&self) -> usize { + self.by_trace.len() + } +} + +/// Owns the optional `systemd-inhibit` child process. Each `ensure_held` is +/// idempotent (re-entrant) and `release` is a no-op when nothing is held. +struct InhibitorChild { + child: Option, + /// Logged once when the platform doesn't provide `systemd-inhibit`, so + /// the daemon doesn't repeatedly warn at every transition. + warned_unavailable: bool, +} + +impl InhibitorChild { + fn new() -> Self { + Self { + child: None, + warned_unavailable: false, + } + } + + fn ensure_held(&mut self) { + if self.child.is_some() { + return; + } + match spawn_systemd_inhibit() { + Ok(child) => { + tracing::info!("wakelock acquired (systemd-inhibit)"); + self.child = Some(child); + } + Err(InhibitorError::NotInstalled) => { + if !self.warned_unavailable { + self.warned_unavailable = true; + tracing::warn!( + "NCD_KEEP_WAKELOCK_WHILE_UPLOAD is set but systemd-inhibit \ + is not available; uploads will run without an inhibitor" + ); + } + } + Err(InhibitorError::Spawn(error)) => { + tracing::warn!(%error, "failed to spawn systemd-inhibit"); + } + } + } + + fn release(&mut self) { + let Some(mut child) = self.child.take() else { + return; + }; + if let Err(error) = child.kill() { + tracing::warn!(%error, "failed to release wakelock (systemd-inhibit)"); + } + // `wait` reaps the child so we don't leak a zombie process. We + // don't care about the exit status β€” the inhibitor we asked it to + // hold is released the moment the process exits. + let _ = child.wait(); + tracing::info!("wakelock released"); + } +} + +impl Drop for InhibitorChild { + fn drop(&mut self) { + self.release(); + } +} + +#[derive(Debug, thiserror::Error)] +enum InhibitorError { + #[error("systemd-inhibit is not installed on this host")] + NotInstalled, + #[error("failed to spawn systemd-inhibit: {0}")] + Spawn(#[from] std::io::Error), +} + +fn spawn_systemd_inhibit() -> Result { + let mut command = Command::new("systemd-inhibit"); + command + .args([ + "--what=idle:sleep", + "--who=neuracore-data-daemon", + "--why=Active recording upload in progress", + "--mode=block", + // `sleep infinity` is a no-op placeholder that keeps + // systemd-inhibit alive until we kill it. + "sleep", + "infinity", + ]) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()); + match command.spawn() { + Ok(child) => Ok(child), + Err(error) if error.kind() == std::io::ErrorKind::NotFound => { + Err(InhibitorError::NotInstalled) + } + Err(error) => Err(InhibitorError::Spawn(error)), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn inhibitor_release_without_held_is_noop() { + // The `release` path must tolerate the no-child case β€” the wakelock + // task calls it once at startup before any traces are in flight. + let mut inhibitor = InhibitorChild::new(); + inhibitor.release(); + } + + #[test] + fn add_signals_acquire_only_on_first_trace() { + let mut active = ActiveUploads::default(); + assert!( + active.add("a".into(), 1), + "first trace acquires the inhibitor" + ); + assert!( + !active.add("b".into(), 1), + "a later trace does not re-acquire" + ); + } + + #[test] + fn complete_signals_release_only_when_last_trace_finishes() { + let mut active = ActiveUploads::default(); + active.add("a".into(), 1); + active.add("b".into(), 1); + assert!(!active.complete("a"), "one trace still in flight"); + assert!(active.complete("b"), "last trace finishing releases"); + // Completing an unknown trace never signals a release. + assert!(!active.complete("a")); + } + + #[test] + fn cancel_releases_only_the_cancelled_recordings_traces() { + // M6 regression: cancelling recording A must NOT release the inhibitor + // while recording B still has an upload in flight. + let mut active = ActiveUploads::default(); + active.add("a-trace".into(), 1); + active.add("b-trace".into(), 2); + + let released = active.cancel_recording(1); + assert!( + !released, + "recording B is still uploading, so the inhibitor must stay held" + ); + assert_eq!(active.len(), 1, "only A's trace was dropped"); + + // B finishing now releases. + assert!( + active.complete("b-trace"), + "B's completion releases the inhibitor" + ); + } + + #[test] + fn cancel_releases_when_it_empties_the_set() { + let mut active = ActiveUploads::default(); + active.add("a-trace".into(), 1); + assert!( + active.cancel_recording(1), + "cancelling the only in-flight recording releases the inhibitor" + ); + assert_eq!(active.len(), 0); + } +} diff --git a/rust/data_daemon/src/encoding/json_trace.rs b/rust/data_daemon/src/encoding/json_trace.rs new file mode 100644 index 000000000..3938fc803 --- /dev/null +++ b/rust/data_daemon/src/encoding/json_trace.rs @@ -0,0 +1,279 @@ +//! Incremental JSON-array writer for scalar / sensor traces. +//! +//! The output file is created (and truncated) on [`JsonTraceWriter::open`]; the +//! JSON content (the leading `[` and the entries) is buffered in memory and +//! written lazily β€” first once the buffer reaches [`DEFAULT_FLUSH_BYTES`], then +//! finally on `finish`. The on-disk byte layout is a single JSON array with one +//! entry per frame, comma-separated, no whitespace. + +use std::fs::{File, OpenOptions}; +use std::io::{self, BufWriter, Write}; +use std::path::{Path, PathBuf}; + +use serde::Serialize; + +use crate::storage::paths::TRACE_JSON_FILENAME; + +/// Default flush threshold: buffered entries are written to disk once they +/// reach 4 MiB. +pub const DEFAULT_FLUSH_BYTES: usize = 4 * 1024 * 1024; + +/// Errors raised by [`JsonTraceWriter`]. +#[derive(Debug, thiserror::Error)] +pub enum JsonTraceError { + /// Failed to create the parent directory or open the output file. + #[error("failed to open trace file {path}: {source}")] + Open { + /// Path that failed to open. + path: PathBuf, + /// Underlying I/O error. + #[source] + source: io::Error, + }, + /// Failed to serialize an entry to JSON. + #[error("failed to serialise trace entry: {0}")] + Serialize(#[source] serde_json::Error), + /// Failed to write buffered bytes to disk. + #[error("failed to write trace file {path}: {source}")] + Write { + /// Path being written to. + path: PathBuf, + /// Underlying I/O error. + #[source] + source: io::Error, + }, +} + +/// Incremental writer that streams a JSON array of entries to disk. +/// +/// Entries are buffered in memory until either the buffer reaches +/// `flush_threshold` bytes or [`finish`](Self::finish) is called. Each entry +/// is rendered with compact `(",", ":")` separators, so the on-disk file +/// carries no insignificant whitespace. +pub struct JsonTraceWriter { + path: PathBuf, + writer: BufWriter, + /// Pending JSON bytes not yet flushed to the file. Includes the leading + /// `[` once the first entry has been added. + buffer: Vec, + flush_threshold: usize, + /// Whether any entry has been added yet. The leading `[` is emitted with the + /// first entry; later entries are comma-prefixed. + has_entries: bool, + /// Bytes already flushed to disk (excludes the closing `]`, which is + /// only appended on `finish`). + bytes_on_disk: u64, +} + +impl JsonTraceWriter { + /// Open a writer producing `{output_dir}/trace.json`. + pub fn open(output_dir: &Path) -> Result { + Self::open_with(output_dir, TRACE_JSON_FILENAME, DEFAULT_FLUSH_BYTES) + } + + /// Open a writer with a custom filename and flush threshold. + pub fn open_with( + output_dir: &Path, + filename: &str, + flush_threshold: usize, + ) -> Result { + std::fs::create_dir_all(output_dir).map_err(|source| JsonTraceError::Open { + path: output_dir.to_path_buf(), + source, + })?; + + let path = output_dir.join(filename); + let file = OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(&path) + .map_err(|source| JsonTraceError::Open { + path: path.clone(), + source, + })?; + + Ok(Self { + path, + writer: BufWriter::new(file), + buffer: Vec::with_capacity(flush_threshold.min(64 * 1024)), + flush_threshold, + has_entries: false, + bytes_on_disk: 0, + }) + } + + /// Bytes flushed to disk so far (does not include the trailing `]` until + /// [`finish`](Self::finish) has run). + #[allow(dead_code)] + pub fn bytes_on_disk(&self) -> u64 { + self.bytes_on_disk + } + + /// Append one entry to the JSON array. + /// + /// Generic over any `serde::Serialize` value so callers can hand the + /// writer either a `serde_json::Value` or a typed struct without an extra + /// `to_value` hop. + pub fn add_entry(&mut self, entry: &T) -> Result<(), JsonTraceError> { + self.begin_entry(); + serde_json::to_writer(&mut self.buffer, entry).map_err(JsonTraceError::Serialize)?; + self.flush_if_full() + } + + /// Append a slice of entries. A convenience loop over [`add_entry`](Self::add_entry) + /// that preserves the same per-entry, threshold-based flushing. Useful when + /// the caller already has a `Vec` to commit (e.g. a draining heartbeat). + #[allow(dead_code)] + pub fn add_entries(&mut self, entries: &[T]) -> Result<(), JsonTraceError> { + for entry in entries { + self.add_entry(entry)?; + } + Ok(()) + } + + /// Append an already-serialised JSON entry verbatim. + /// + /// Skips the `Value β†’ bytes` round trip so a float supplied by the SDK + /// (e.g. `7/60 = 0.11666666666666667`) lands on disk with the exact same + /// textual representation it was logged with β€” required by the + /// integration test matrix, which compares `trace.json` floats with + /// `actual != expected` rather than approximate tolerance. The caller + /// must guarantee `entry_bytes` is a complete JSON value with no + /// trailing comma or whitespace. + pub fn add_raw_entry(&mut self, entry_bytes: &[u8]) -> Result<(), JsonTraceError> { + self.begin_entry(); + self.buffer.extend_from_slice(entry_bytes); + self.flush_if_full() + } + + /// Open the array on the first entry (push `[`) or separate it from the + /// previous one (push `,`). + fn begin_entry(&mut self) { + if self.has_entries { + self.buffer.push(b','); + } else { + self.buffer.push(b'['); + self.has_entries = true; + } + } + + /// Flush the buffer to disk if it has grown past the flush threshold. + fn flush_if_full(&mut self) -> Result<(), JsonTraceError> { + if self.buffer.len() >= self.flush_threshold { + self.flush_buffer()?; + } + Ok(()) + } + + /// Finalise the file: append `]`, flush, and close the buffered writer. + /// + /// An empty trace (no `add_entry` calls) is still finalised as `[]` so + /// the file is always valid JSON. + pub fn finish(mut self) -> Result { + if !self.has_entries { + self.buffer.extend_from_slice(b"[]"); + } else { + self.buffer.push(b']'); + } + self.flush_buffer()?; + self.writer + .flush() + .map_err(|source| JsonTraceError::Write { + path: self.path.clone(), + source, + })?; + Ok(self.bytes_on_disk) + } + + fn flush_buffer(&mut self) -> Result<(), JsonTraceError> { + if self.buffer.is_empty() { + return Ok(()); + } + self.writer + .write_all(&self.buffer) + .map_err(|source| JsonTraceError::Write { + path: self.path.clone(), + source, + })?; + self.bytes_on_disk = self.bytes_on_disk.saturating_add(self.buffer.len() as u64); + self.buffer.clear(); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::{json, Value}; + use tempfile::TempDir; + + fn read_back(path: &Path) -> Value { + let bytes = std::fs::read(path).expect("read"); + serde_json::from_slice(&bytes).expect("parse") + } + + #[test] + fn empty_trace_produces_valid_json_array() { + let tempdir = TempDir::new().unwrap(); + let writer = JsonTraceWriter::open(tempdir.path()).unwrap(); + let bytes = writer.finish().unwrap(); + assert_eq!(bytes, b"[]".len() as u64); + + let parsed = read_back(&tempdir.path().join(TRACE_JSON_FILENAME)); + assert_eq!(parsed, json!([])); + } + + #[test] + fn entries_round_trip_through_serde_json() { + let tempdir = TempDir::new().unwrap(); + let mut writer = JsonTraceWriter::open(tempdir.path()).unwrap(); + writer + .add_entry(&json!({"frame": 0, "timestamp": 1.5})) + .unwrap(); + writer + .add_entry(&json!({"frame": 1, "timestamp": 2.5})) + .unwrap(); + let _bytes = writer.finish().unwrap(); + + let parsed = read_back(&tempdir.path().join(TRACE_JSON_FILENAME)); + assert_eq!( + parsed, + json!([ + {"frame": 0, "timestamp": 1.5}, + {"frame": 1, "timestamp": 2.5} + ]) + ); + } + + #[test] + fn buffer_flushes_when_threshold_reached() { + let tempdir = TempDir::new().unwrap(); + // Tiny threshold so a single ~50-byte entry forces a flush. We then + // check that `bytes_on_disk` advances mid-stream, proving the writer + // doesn't buffer the whole file in memory. + let mut writer = + JsonTraceWriter::open_with(tempdir.path(), TRACE_JSON_FILENAME, 32).unwrap(); + writer + .add_entry(&json!({"frame": 0, "padding": "xxxxxxxxxxxxxx"})) + .unwrap(); + assert!(writer.bytes_on_disk() > 0, "writer must flush mid-stream"); + let total = writer.finish().unwrap(); + assert!(total > 0); + + let parsed = read_back(&tempdir.path().join(TRACE_JSON_FILENAME)); + assert_eq!(parsed[0]["frame"], 0); + } + + #[test] + fn add_entries_writes_each_in_order() { + let tempdir = TempDir::new().unwrap(); + let mut writer = JsonTraceWriter::open(tempdir.path()).unwrap(); + let entries = vec![json!({"i": 0}), json!({"i": 1}), json!({"i": 2})]; + writer.add_entries(&entries).unwrap(); + writer.finish().unwrap(); + + let parsed = read_back(&tempdir.path().join(TRACE_JSON_FILENAME)); + assert_eq!(parsed, json!([{"i": 0}, {"i": 1}, {"i": 2}])); + } +} diff --git a/rust/data_daemon/src/encoding/metadata.rs b/rust/data_daemon/src/encoding/metadata.rs new file mode 100644 index 000000000..38d22887b --- /dev/null +++ b/rust/data_daemon/src/encoding/metadata.rs @@ -0,0 +1,299 @@ +//! Sidecar `trace.json` for video traces. +//! +//! Each frame's metadata dictionary is captured as it arrives, and on finalize +//! the writer flushes a compact JSON array alongside the mp4 outputs. +//! +//! On-disk byte layout: +//! +//! - One JSON array; `serde_json` default rendering, no whitespace. +//! - On finish, each entry gets `"frame_idx": ` and `"frame": null` +//! added/overwritten β€” `frame_idx` is the 0-based position in the list and +//! `frame` is a base64 thumbnail slot the dashboard schema expects (always +//! null in the current pipeline, kept for forward compatibility). +//! - Map insertion order is preserved (enabled by `preserve_order` on +//! `serde_json` in `data_daemon`'s `Cargo.toml`). + +use std::fs::OpenOptions; +use std::io::{self, BufWriter, Write}; +use std::path::{Path, PathBuf}; + +use serde_json::{Map, Value}; + +use crate::storage::paths::TRACE_JSON_FILENAME; + +/// Errors raised by [`VideoMetadataAccumulator`]. +#[derive(Debug, thiserror::Error)] +pub enum MetadataError { + /// Failed to create the parent directory or open the output file. + #[error("failed to open metadata file {path}: {source}")] + Open { + /// Path that failed to open. + path: PathBuf, + /// Underlying I/O error. + #[source] + source: io::Error, + }, + /// Failed to serialise the accumulated metadata array. + #[error("failed to serialise video metadata: {0}")] + Serialize(#[source] serde_json::Error), + /// Failed to write buffered bytes to disk. + #[error("failed to write metadata file {path}: {source}")] + Write { + /// Path being written. + path: PathBuf, + /// Underlying I/O error. + #[source] + source: io::Error, + }, +} + +/// Accumulator for per-frame metadata dictionaries. +/// +/// Construction is allocation-only; the file isn't touched until +/// [`finish`](Self::finish) runs, which streams the sidecar entry-by-entry to a +/// `BufWriter` (no whole-array serialised intermediate) after both mp4 encoders +/// close. Buffering the entry maps in memory is acceptable here because video +/// metadata is tiny relative to the raw frame payloads β€” a 30 minute capture at +/// 30 fps caps out around 50 K entries with a handful of small numeric fields +/// each. +#[derive(Debug, Default)] +pub struct VideoMetadataAccumulator { + entries: Vec>, +} + +impl VideoMetadataAccumulator { + /// Construct an empty accumulator. + pub fn new() -> Self { + Self::default() + } + + /// Number of metadata entries currently buffered. + #[allow(dead_code)] + pub fn len(&self) -> usize { + self.entries.len() + } + + /// True when no entries have been recorded yet. + #[allow(dead_code)] + pub fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Record one frame's metadata. The supplied map is taken by value so the + /// caller cannot accidentally mutate it after recording, and so we avoid + /// an extra clone on the hot path. The `"frame"` slot is initialised to + /// `null` immediately; `finish` re-stamps it alongside `frame_idx`. + pub fn record_frame(&mut self, mut entry: Map) { + entry.insert("frame".to_string(), Value::Null); + self.entries.push(entry); + } + + /// Convenience: record a frame whose metadata is provided as a + /// `serde_json::Value`. Non-object values (numbers, strings, arrays) are + /// dropped silently. + #[allow(dead_code)] + pub fn record_value(&mut self, value: Value) { + match value { + Value::Object(map) => self.record_frame(map), + Value::Array(items) => { + for item in items { + match item { + Value::Object(map) => self.record_frame(map), + _ => tracing::trace!("ignoring non-object metadata list item"), + } + } + } + _ => { + tracing::trace!("ignoring non-object metadata entry"); + } + } + } + + /// Flush the accumulated metadata to `{output_dir}/trace.json`. + /// + /// Returns the total bytes written. The directory is created if missing. + pub fn finish(self, output_dir: &Path) -> Result { + self.finish_with_filename(output_dir, TRACE_JSON_FILENAME) + } + + /// Variant of [`finish`](Self::finish) that lets the caller override the + /// sidecar filename. Used by tests; the production code always uses + /// [`TRACE_JSON_FILENAME`]. + pub fn finish_with_filename( + mut self, + output_dir: &Path, + filename: &str, + ) -> Result { + std::fs::create_dir_all(output_dir).map_err(|source| MetadataError::Open { + path: output_dir.to_path_buf(), + source, + })?; + let path = output_dir.join(filename); + + let file = OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(&path) + .map_err(|source| MetadataError::Open { + path: path.clone(), + source, + })?; + let mut writer = BufWriter::new(file); + + // Stream the array one entry at a time rather than building the whole + // serialised blob in a single `serde_json::to_vec` of every entry β€” for + // the doc's 50 K-entry figure that intermediate is multiple MB allocated + // just to be copied straight to the writer. Concatenating compact + // per-entry encodings with `,` separators inside `[` … `]` is byte-for- + // byte identical to serialising the array as a whole. + let mut written: u64 = 0; + let mut write = |bytes: &[u8]| -> Result<(), MetadataError> { + writer + .write_all(bytes) + .map_err(|source| MetadataError::Write { + path: path.clone(), + source, + })?; + written += bytes.len() as u64; + Ok(()) + }; + + write(b"[")?; + for (index, entry) in self.entries.iter_mut().enumerate() { + // `frame_idx` is the 0-based position; `frame` is the base64 + // thumbnail slot the dashboard schema expects but the pipeline + // does not populate. Stamping both here keeps the sidecar + // well-formed even when the producer omitted them upstream. + entry.insert("frame_idx".to_string(), Value::from(index as u64)); + entry.insert("frame".to_string(), Value::Null); + + if index > 0 { + write(b",")?; + } + let entry_bytes = serde_json::to_vec(entry).map_err(MetadataError::Serialize)?; + write(&entry_bytes)?; + } + write(b"]")?; + + writer.flush().map_err(|source| MetadataError::Write { + path: path.clone(), + source, + })?; + Ok(written) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + use tempfile::TempDir; + + fn read_back(path: &Path) -> Vec { + std::fs::read(path).expect("read metadata file") + } + + #[test] + fn empty_accumulator_writes_empty_array() { + let tempdir = TempDir::new().unwrap(); + let accumulator = VideoMetadataAccumulator::new(); + assert!(accumulator.is_empty()); + let bytes = accumulator.finish(tempdir.path()).unwrap(); + let written = read_back(&tempdir.path().join(TRACE_JSON_FILENAME)); + assert_eq!(written, b"[]"); + assert_eq!(bytes, written.len() as u64); + } + + #[test] + fn fixture_matches_expected_video_trace_output() { + // Pins the exact sidecar byte layout: + // - compact JSON (no whitespace, non-ASCII left unescaped) + // - on every entry: `frame_idx` is the index, `frame` is null + // - object insertion order is preserved + // + // Inputs intentionally exercise: integer + float timestamps, string + // values, nested objects, and an entry whose `frame` key was already + // present (overwrite path). + let tempdir = TempDir::new().unwrap(); + let mut accumulator = VideoMetadataAccumulator::new(); + + let mut entry_a = Map::new(); + entry_a.insert("timestamp".to_string(), json!(1.5)); + entry_a.insert("width".to_string(), json!(640)); + entry_a.insert("height".to_string(), json!(480)); + accumulator.record_frame(entry_a); + + let mut entry_b = Map::new(); + entry_b.insert("timestamp".to_string(), json!(2)); + entry_b.insert("source".to_string(), json!("rgb-camera")); + entry_b.insert("extra".to_string(), json!({"sequence": 17, "flag": true})); + // Pre-existing `frame` payload β€” `finish` must overwrite it with null; + // this test confirms that. + entry_b.insert("frame".to_string(), json!("stale")); + accumulator.record_frame(entry_b); + + let written_bytes = accumulator.finish(tempdir.path()).unwrap(); + let actual = read_back(&tempdir.path().join(TRACE_JSON_FILENAME)); + + let expected = br#"[{"timestamp":1.5,"width":640,"height":480,"frame":null,"frame_idx":0},{"timestamp":2,"source":"rgb-camera","extra":{"sequence":17,"flag":true},"frame":null,"frame_idx":1}]"#.to_vec(); + assert_eq!( + actual, expected, + "metadata sidecar bytes diverged from expected fixture" + ); + assert_eq!(written_bytes, expected.len() as u64); + } + + #[test] + fn frame_idx_starts_at_zero_and_is_contiguous() { + let tempdir = TempDir::new().unwrap(); + let mut accumulator = VideoMetadataAccumulator::new(); + for index in 0..5 { + let mut entry = Map::new(); + entry.insert("timestamp".to_string(), json!(index as f64 * 0.033)); + accumulator.record_frame(entry); + } + assert_eq!(accumulator.len(), 5); + accumulator.finish(tempdir.path()).unwrap(); + + let bytes = read_back(&tempdir.path().join(TRACE_JSON_FILENAME)); + let parsed: Value = serde_json::from_slice(&bytes).unwrap(); + let array = parsed.as_array().unwrap(); + assert_eq!(array.len(), 5); + for (index, entry) in array.iter().enumerate() { + assert_eq!(entry["frame_idx"], json!(index as u64)); + assert!(entry["frame"].is_null()); + } + } + + #[test] + fn record_value_flattens_array_payloads() { + // The producer is allowed to batch several frames into one envelope, + // so a list payload must be flattened: the accumulator records each + // contained dict as its own entry. + let tempdir = TempDir::new().unwrap(); + let mut accumulator = VideoMetadataAccumulator::new(); + accumulator.record_value(json!([ + {"timestamp": 0.1}, + {"timestamp": 0.2}, + 42, // non-object β€” dropped + {"timestamp": 0.3}, + ])); + assert_eq!(accumulator.len(), 3); + accumulator.finish(tempdir.path()).unwrap(); + + let bytes = read_back(&tempdir.path().join(TRACE_JSON_FILENAME)); + let parsed: Value = serde_json::from_slice(&bytes).unwrap(); + assert_eq!(parsed.as_array().unwrap().len(), 3); + assert_eq!(parsed[2]["timestamp"], json!(0.3)); + } + + #[test] + fn record_value_ignores_scalar_payloads() { + let mut accumulator = VideoMetadataAccumulator::new(); + accumulator.record_value(json!(42)); + accumulator.record_value(json!("ignored")); + accumulator.record_value(json!(null)); + assert!(accumulator.is_empty()); + } +} diff --git a/rust/data_daemon/src/encoding/mod.rs b/rust/data_daemon/src/encoding/mod.rs new file mode 100644 index 000000000..30c1caca4 --- /dev/null +++ b/rust/data_daemon/src/encoding/mod.rs @@ -0,0 +1,17 @@ +//! Per-trace on-disk encoders. +//! +//! - [`json_trace`] β€” incremental JSON-array writer used by scalar / sensor +//! traces and the video sidecar. +//! - [`video_encoder`] β€” supervised `ffmpeg` subprocess that turns a NUT +//! chunk into a per-chunk MP4 pair, and stitches the chunk segments into +//! the final `lossy.mp4` / `lossless.mp4` on `EndTrace`. +//! - [`metadata`] β€” accumulator that flushes the video-trace sidecar +//! `trace.json` alongside the mp4 outputs. +//! +//! A few writer methods are exercised only by unit tests; those carry a +//! targeted `#[allow(dead_code)]` at their own definition rather than a +//! module-wide allow, so genuinely-dead code elsewhere still surfaces. + +pub mod json_trace; +pub mod metadata; +pub mod video_encoder; diff --git a/rust/data_daemon/src/encoding/video_encoder.rs b/rust/data_daemon/src/encoding/video_encoder.rs new file mode 100644 index 000000000..065d18b47 --- /dev/null +++ b/rust/data_daemon/src/encoding/video_encoder.rs @@ -0,0 +1,1175 @@ +//! Per-chunk `ffmpeg` transcoder and segment concatenator. +//! +//! The producer spools video frames into a sequence of NUT chunk files +//! beneath each trace's `chunks/` directory. As each chunk arrives the +//! per-trace actor calls [`VideoEncoder::encode_chunk`] which shells out to +//! ffmpeg to produce two MP4 segments: +//! +//! - `chunk_NNNN_lossy.mp4` β€” `libx264` `-pix_fmt yuv420p -preset ultrafast +//! -qp 23` for fast playback, downscaled to a preview resolution (see +//! [`LOSSY_PREVIEW_MAX_HEIGHT`]) since it is only a derivable proxy and the +//! full-resolution encode is the transcoder's dominant cost. +//! - `chunk_NNNN_lossless.mp4` β€” `libx264rgb` `-pix_fmt rgb24 -preset +//! ultrafast -qp 0` for mathematically-lossless archival. Encoding the +//! captured rgb24 frames directly (rather than converting to a YUV format) +//! keeps the output bit-exact to the captured pixels, encodes ~2.5Γ— faster +//! than a `yuv444p10le` pass, and matches the Python reference encoder. +//! `ffv1` would also be lossless but is incompatible with the `.mp4` +//! container the on-disk layout contract requires. +//! +//! On `EndTrace` the per-trace actor calls [`VideoEncoder::concat_segments`] +//! which stream-copies the per-chunk segments into the final `lossy.mp4` / +//! `lossless.mp4`. Stream-copy avoids a second decode/encode pass, so the +//! tail of a recording finishes in seconds regardless of total length. +//! +//! Both outputs are verified non-empty before the caller is told the +//! invocation succeeded; ffmpeg occasionally exits 0 but produces a +//! zero-byte file when the requested codec is unavailable in the local +//! build. + +use std::ffi::OsString; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::process::Stdio; + +use tokio::process::Command; + +/// Default ffmpeg binary name. Tests override via [`VideoEncoder::with_binary`] +/// when they need to point at a specific build. +pub const DEFAULT_FFMPEG_BINARY: &str = "ffmpeg"; + +/// `nice` value applied to each transcode child via `setpriority` before exec. +/// +/// Per-chunk transcoding is throughput-oriented background work; a robot's +/// `nc.log_*` calls are latency-critical. On a small (2-vCPU) host an unniced +/// ffmpeg child preempts the producer's logging threads at recording +/// boundaries, so a joint publish that does ~3 ms of work spends ~20 ms +/// descheduled. Renicing the encoder lets the kernel scheduler favour the +/// foreground logging threads while ffmpeg still consumes otherwise-idle CPU. +const ENCODER_NICENESS: libc::c_int = 10; + +/// libx264 frame-thread cap applied to *each* encode output stream. +/// +/// libx264 defaults to roughly one frame-thread per core. With the transcode +/// concurrency permit pool also scaling with the core count, the two multiply: +/// a 14-core host ran ~14 ffmpeg children each spawning ~14 threads, ~200 +/// encode threads fighting over 14 cores. That thrashes the scheduler and +/// steals cycles from the latency-critical `nc.log_*` threads β€” the exact +/// path the renice above tries to protect. Capping each output's thread pool +/// keeps the total encode-thread count near the core count instead. Measured +/// sweet spot on a 14-core host: ~`cores / 2` concurrent children at 2 threads +/// per output beat the uncapped default on both aggregate throughput and +/// logging-thread jitter, so [`default_ffmpeg_concurrency`] divides by this. +/// +/// [`default_ffmpeg_concurrency`]: crate::pipeline::trace_actor::default_ffmpeg_concurrency +pub const ENCODE_THREADS_PER_OUTPUT: usize = 2; + +/// Height (in lines) the lossy *preview* proxy is downscaled to. +/// +/// At 8-context 1080p60 the transcoder is CPU-bound, and the full-resolution +/// lossy pass is the long pole (~38% of the per-chunk encode work) β€” yet the +/// lossy output is only a fast-playback proxy, derivable from the lossless +/// archival copy. Encoding it at preview resolution instead cuts that pass' +/// cost roughly with the pixel-count reduction: measured ~+21% aggregate +/// transcode throughput at 8Γ—1080p60, which is what buys the spool real-time +/// headroom without touching the bit-exact lossless output (which stays at +/// native resolution). +/// +/// The downscale (see [`preview_scale_filter`]) caps *height* at this many +/// lines while preserving aspect ratio, never upscales a smaller source, and +/// rounds both dimensions to even (an H.264 `yuv420p` requirement) β€” so it is +/// correct for any input resolution or aspect ratio. 480 lines is ample for a +/// scrub/preview proxy. +const LOSSY_PREVIEW_MAX_HEIGHT: u32 = 480; + +/// Inputs to one per-chunk transcode invocation. +#[derive(Debug, Clone)] +pub struct ChunkEncodeRequest { + /// Source NUT chunk file produced by the producer. + pub raw_nut: PathBuf, + /// Destination for the per-chunk lossy mp4 segment. + pub lossy_out: PathBuf, + /// Destination for the per-chunk lossless mp4 segment. + pub lossless_out: PathBuf, +} + +/// Outcome of a successful per-chunk transcode. +#[derive(Debug, Clone, Copy)] +pub struct ChunkEncodeOutcome { + /// Bytes written to the lossy segment. + pub lossy_bytes: u64, + /// Bytes written to the lossless segment. + pub lossless_bytes: u64, +} + +/// Outcome of a successful concat invocation. +#[derive(Debug, Clone, Copy)] +pub struct ConcatOutcome { + /// Bytes written to the concatenated output. + pub bytes: u64, +} + +/// Errors raised by [`VideoEncoder`] operations. +#[derive(Debug, thiserror::Error)] +pub enum VideoEncodeError { + /// `ffmpeg` could not be located or spawned (typically `ENOENT`). + #[error("failed to spawn `{}`: {source}", binary.to_string_lossy())] + Spawn { + /// Binary that failed to spawn. + binary: OsString, + /// Underlying OS error. + #[source] + source: std::io::Error, + }, + /// `ffmpeg` exited with a non-zero status. `stderr_tail` captures the last + /// few KiB of ffmpeg's stderr so the caller can surface a diagnostic + /// without trawling the daemon log. + #[error("`ffmpeg` exited with status {status}: {stderr_tail}")] + NonZeroExit { + /// Exit status reported by the child. + status: String, + /// Tail of the child's stderr (UTF-8 with replacements). + stderr_tail: String, + }, + /// One of the expected mp4 outputs was missing or empty after the encoder + /// claimed success β€” usually means the codec is not built into the local + /// ffmpeg binary. + #[error("expected output {path} is missing or empty after ffmpeg exit")] + OutputMissing { + /// Path that should have been written. + path: PathBuf, + }, + /// An I/O operation around the encode (file metadata, unlink, concat list + /// write) failed. + #[error("I/O failure during transcode for {path}: {source}")] + Io { + /// Path being inspected when the error occurred. + path: PathBuf, + /// Underlying I/O error. + #[source] + source: std::io::Error, + }, + /// `concat_segments` was called with no input segments β€” caller bug. + #[error("concat_segments called with empty segment list")] + EmptySegments, +} + +/// Failure modes of [`VideoEncoder::preflight`], surfaced at daemon startup so +/// an unusable ffmpeg is reported once, clearly, instead of failing every +/// video encode at recording time. +#[derive(Debug, thiserror::Error)] +pub enum FfmpegPreflightError { + /// The ffmpeg binary could not be executed at all β€” typically not + /// installed or not on `PATH`. + #[error( + "ffmpeg not found: could not run `{}` ({source}). \ + Install ffmpeg (>= 4.0, built with libx264) and ensure it is on PATH.", + binary.to_string_lossy() + )] + NotFound { + /// Binary that could not be executed. + binary: OsString, + /// Underlying spawn error (e.g. `ENOENT`). + #[source] + source: std::io::Error, + }, + /// ffmpeg ran but rejected a capability the encoder depends on: the + /// `-vsync passthrough` frame-timing mode or the libx264 encoder. + #[error( + "ffmpeg at `{}` (version {version}) is incompatible: a required capability was \ + rejected. The daemon needs `-vsync passthrough` (drop-free, frame-accurate \ + encoding β€” note `-fps_mode passthrough` is ffmpeg >= 5.1 only) and the libx264 / \ + libx264rgb encoders. Install a compatible ffmpeg (>= 4.0 with libx264). ffmpeg \ + reported:\n{stderr_tail}", + binary.to_string_lossy() + )] + Incompatible { + /// Binary that was probed. + binary: OsString, + /// Detected ffmpeg version, or `"unknown"`. + version: String, + /// Tail of ffmpeg's stderr from the failed probe. + stderr_tail: String, + }, +} + +/// Builder for ffmpeg invocations. Keeps the ffmpeg binary path configurable +/// so unit tests can shim in a wrapper script if needed. +#[derive(Debug, Clone)] +pub struct VideoEncoder { + binary: OsString, +} + +impl Default for VideoEncoder { + fn default() -> Self { + Self { + binary: OsString::from(DEFAULT_FFMPEG_BINARY), + } + } +} + +impl VideoEncoder { + /// Construct an encoder that resolves `ffmpeg` from `PATH`. + pub fn new() -> Self { + Self::default() + } + + /// Override the ffmpeg binary location (test/diagnostic seam). + #[allow(dead_code)] + pub fn with_binary(mut self, binary: impl Into) -> Self { + self.binary = binary.into(); + self + } + + /// Verify the configured ffmpeg is present and supports the capabilities + /// [`encode_chunk`](Self::encode_chunk) depends on, returning the detected + /// version string on success. + /// + /// Run once at daemon startup so an incompatible install fails fast with a + /// clear message instead of silently marking every video trace `failed` at + /// recording time. Two steps: `ffmpeg -version` confirms the binary runs + /// (and yields a version for diagnostics), then a one-frame synthetic + /// encode to the null muxer exercises the exact `-vsync passthrough` knob β€” + /// the option ffmpeg < 5.1 rejects when spelled `-fps_mode` β€” together with + /// the libx264 encoder. + pub fn preflight(&self) -> Result { + let version = self.detect_ffmpeg_version()?; + self.probe_passthrough_encode(&version)?; + Ok(version) + } + + /// Run `ffmpeg -version`, mapping a spawn failure to + /// [`FfmpegPreflightError::NotFound`] and parsing the reported version. + fn detect_ffmpeg_version(&self) -> Result { + let output = std::process::Command::new(&self.binary) + .arg("-hide_banner") + .arg("-version") + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .output() + .map_err(|source| FfmpegPreflightError::NotFound { + binary: self.binary.clone(), + source, + })?; + Ok(parse_ffmpeg_version(&output.stdout)) + } + + /// Encode one synthetic frame to the null muxer through **both** output + /// configurations the real [`encode_chunk`](Self::encode_chunk) uses β€” the + /// `yuv420p libx264` lossy pass *and* the `rgb24 libx264rgb -qp 0` lossless + /// pass. A non-zero exit means the local ffmpeg lacks a capability the + /// encoder needs. The lossless `libx264rgb` path is the one that actually + /// varies between builds, so probing only the lossy pass (as before) let the + /// "fail fast at startup" check pass while every real lossless encode failed + /// at recording time. + fn probe_passthrough_encode(&self, version: &str) -> Result<(), FfmpegPreflightError> { + // One 16x16 yuv420p frame (a 16x16 plane plus two 8x8 planes = 384 + // bytes) fed via the rawvideo demuxer on stdin β€” no lavfi/input-file + // dependency, so the probe works even on a minimal build. ffmpeg parses + // (and would reject) the options before reading stdin, so an unsupported + // `-vsync passthrough` or `libx264rgb` encode fails immediately rather + // than on a healthy input. The two `-map 0:v -c:v …` blocks exercise the + // same codec and pixel formats as `encode_chunk` (the build-dependent + // parts); the real lossy encode adds options the probe omits (e.g. + // `-qp 23` / `+genpts`), so the full option set is not identical. + const PROBE_FRAME_LEN: usize = 16 * 16 * 3 / 2; + let frame = vec![128u8; PROBE_FRAME_LEN]; + + let mut child = std::process::Command::new(&self.binary) + .arg("-hide_banner") + .arg("-loglevel") + .arg("error") + .arg("-f") + .arg("rawvideo") + .arg("-pix_fmt") + .arg("yuv420p") + .arg("-video_size") + .arg("16x16") + .arg("-i") + .arg("-") + // Lossy pass (matches encode_chunk's first output). + .arg("-map") + .arg("0:v") + .arg("-vsync") + .arg("passthrough") + .arg("-c:v") + .arg("libx264") + .arg("-pix_fmt") + .arg("yuv420p") + .arg("-preset") + .arg("ultrafast") + .arg("-f") + .arg("null") + .arg("-") + // Lossless pass (matches encode_chunk's second output) β€” the + // build-dependent `libx264rgb` rgb24 capability the encoder relies on. + .arg("-map") + .arg("0:v") + .arg("-vsync") + .arg("passthrough") + .arg("-c:v") + .arg("libx264rgb") + .arg("-pix_fmt") + .arg("rgb24") + .arg("-qp") + .arg("0") + .arg("-preset") + .arg("ultrafast") + .arg("-f") + .arg("null") + .arg("-") + .stdin(Stdio::piped()) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .spawn() + .map_err(|source| FfmpegPreflightError::NotFound { + binary: self.binary.clone(), + source, + })?; + + // The frame is far smaller than a pipe buffer, so writing then dropping + // stdin cannot deadlock against ffmpeg's reads. + if let Some(mut stdin) = child.stdin.take() { + let _ = stdin.write_all(&frame); + } + + let output = child + .wait_with_output() + .map_err(|source| FfmpegPreflightError::NotFound { + binary: self.binary.clone(), + source, + })?; + + if output.status.success() { + Ok(()) + } else { + Err(FfmpegPreflightError::Incompatible { + binary: self.binary.clone(), + version: version.to_string(), + stderr_tail: tail_stderr(&output.stderr), + }) + } + } + + /// Transcode one NUT chunk into the configured per-chunk mp4 outputs. + /// + /// The source `raw.nut` is left in place β€” the caller is responsible for + /// unlinking it after verifying both outputs landed (the per-trace actor + /// drops the source as part of its envelope handling so a partial encode + /// can be retried via the recovery sweep without needing to re-spool). + pub async fn encode_chunk( + &self, + request: &ChunkEncodeRequest, + ) -> Result { + ensure_parent_dirs(&request.lossy_out)?; + ensure_parent_dirs(&request.lossless_out)?; + + // `-y` overwrites existing outputs (resume safety: a previous failed + // run may have left a partial mp4). `-fflags +genpts` rebuilds the + // presentation timestamps from the NUT timing when the spool was + // truncated mid-frame. `-vsync passthrough` (applied per output) is + // the critical knob here: the NUT chunk uses `time_base = 1/1_000_000` + // so ffmpeg's demuxer reports `r_frame_rate = 1_000_000/1` (one + // million fps). With the default `cfr` policy the encoder would then + // synthesise an output frame at every microsecond slot between + // consecutive input PTS values β€” for a 10 s clip that is ~10 million + // duplicate output frames, and the encode effectively never completes. + // + // We must NOT use `vfr` here: vfr drops any frame whose PTS rounds to + // the same tick as its predecessor at the output stream timescale. + // Real-time capture has jitter, so closely-spaced frames (a few hundred + // Β΅s apart under threaded logging) collide and are silently dropped β€” + // the encoded video then has fewer frames than the per-frame timestamp + // sidecar (`trace.json`), and the downstream synced-recording reader + // dereferences a frame index the video never contained. `passthrough` + // emits every input frame exactly once at its original PTS and never + // drops, which is what real-time camera capture actually is. + // + // We spell this `-vsync passthrough` rather than the newer + // `-fps_mode passthrough`: the two select the identical passthrough + // mode, but `-fps_mode` is unrecognised by ffmpeg < 5.1 (e.g. the 4.4 + // build shipped on Ubuntu 22.04 / the integration host), where it aborts + // the encode with "Unrecognized option 'fps_mode'". `-vsync` is accepted + // on both (only deprecated, not removed, on 5.1+). Two `-map 0:v -c:v ...` + // blocks emit both outputs from a single demux pass. + // Bound each output's libx264 thread pool (see + // `ENCODE_THREADS_PER_OUTPUT`) so the transcode fleet doesn't + // oversubscribe the cores the logging threads need. + let encode_threads = ENCODE_THREADS_PER_OUTPUT.to_string(); + // Downscale the lossy preview proxy (only) to keep the dominant pass + // cheap at high resolution. The lossless output stays native. + let preview_filter = preview_scale_filter(LOSSY_PREVIEW_MAX_HEIGHT); + let mut command = Command::new(&self.binary); + command + .arg("-y") + .arg("-hide_banner") + .arg("-nostdin") + .arg("-loglevel") + .arg("error") + .arg("-fflags") + .arg("+genpts") + .arg("-i") + .arg(&request.raw_nut) + .arg("-map") + .arg("0:v") + .arg("-vsync") + .arg("passthrough") + // Lossy preview proxy only: cap to preview resolution (see + // `preview_scale_filter`). vsync passthrough still emits every + // input frame, so the lossy frame count matches the lossless + // output and the per-frame timestamp sidecar. + .arg("-vf") + .arg(&preview_filter) + .arg("-c:v") + .arg("libx264") + .arg("-threads") + .arg(&encode_threads) + .arg("-pix_fmt") + .arg("yuv420p") + .arg("-preset") + .arg("ultrafast") + .arg("-qp") + .arg("23") + .arg(&request.lossy_out) + .arg("-map") + .arg("0:v") + .arg("-vsync") + .arg("passthrough") + // libx264rgb encodes the rgb24 frames directly: bit-exact to the + // captured pixels, ~2.5Γ— faster than a yuv444p10le pass, and the + // format the Python reference encoder writes. + .arg("-c:v") + .arg("libx264rgb") + .arg("-threads") + .arg(&encode_threads) + .arg("-pix_fmt") + .arg("rgb24") + .arg("-preset") + .arg("ultrafast") + .arg("-qp") + .arg("0") + .arg(&request.lossless_out) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + // ffmpeg keeps file descriptors open across `fork`/`exec`; the + // daemon's iceoryx2 sockets must NOT leak into the encoder, so we + // rely on Tokio's default `cloexec` behaviour and additionally + // request `kill_on_drop` to clean up if the supervising future is + // cancelled mid-flight. + .kill_on_drop(true); + // SAFETY: the closure runs in the forked child between `fork` and + // `exec`; `setpriority` is a single raw syscall that touches no + // userspace lock or allocator state, so it is safe to call here between + // fork and exec. A failed renice is non-fatal (ignored), so the encode + // still runs at default priority. + unsafe { + command.pre_exec(|| { + libc::setpriority(libc::PRIO_PROCESS, 0, ENCODER_NICENESS); + Ok(()) + }); + } + + let output = command + .output() + .await + .map_err(|source| VideoEncodeError::Spawn { + binary: self.binary.clone(), + source, + })?; + + if !output.status.success() { + let stderr_tail = tail_stderr(&output.stderr); + return Err(VideoEncodeError::NonZeroExit { + status: format!("{:?}", output.status), + stderr_tail, + }); + } + + let lossy_bytes = non_empty_file_size(&request.lossy_out)?; + let lossless_bytes = non_empty_file_size(&request.lossless_out)?; + + Ok(ChunkEncodeOutcome { + lossy_bytes, + lossless_bytes, + }) + } + + /// Stream-copy concatenate `segments` into `out`. + /// + /// Uses ffmpeg's `concat` demuxer with `-c copy`, so no transcode + /// happens β€” total cost is bounded by the read+write of the segment + /// bytes. Caller is responsible for unlinking the source segments after + /// the concat succeeds. + pub async fn concat_segments( + &self, + segments: &[PathBuf], + out: &Path, + ) -> Result { + if segments.is_empty() { + return Err(VideoEncodeError::EmptySegments); + } + ensure_parent_dirs(out)?; + + // The concat demuxer reads a list-file describing absolute segment + // paths. We write it next to the output so a future debugging pass + // can see exactly which segments were concatenated; the file is + // unlinked on the success path so it doesn't accumulate. + let list_path = list_file_for(out); + write_concat_list(&list_path, segments)?; + + let result = Command::new(&self.binary) + .arg("-y") + .arg("-hide_banner") + .arg("-nostdin") + .arg("-loglevel") + .arg("error") + .arg("-f") + .arg("concat") + // `-safe 0` permits absolute paths (and any non-portable chars) + // in the list file. Without it ffmpeg rejects paths that aren't + // simple relative names. + .arg("-safe") + .arg("0") + .arg("-i") + .arg(&list_path) + .arg("-c") + .arg("copy") + .arg(out) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .kill_on_drop(true) + .output() + .await; + + // Always try to clean up the list file, even on failure β€” leaving it + // around just clutters the trace directory. + let _ = std::fs::remove_file(&list_path); + + let output = result.map_err(|source| VideoEncodeError::Spawn { + binary: self.binary.clone(), + source, + })?; + + if !output.status.success() { + let stderr_tail = tail_stderr(&output.stderr); + return Err(VideoEncodeError::NonZeroExit { + status: format!("{:?}", output.status), + stderr_tail, + }); + } + + let bytes = non_empty_file_size(out)?; + Ok(ConcatOutcome { bytes }) + } +} + +/// Build the ffmpeg `-vf` value that downscales the lossy preview proxy to at +/// most `max_height` lines. +/// +/// The scale factor `s = min(1, max_height/ih)` is applied to both axes, so it +/// preserves aspect ratio and **never upscales** (a source already at or below +/// the cap passes through untouched). `trunc(.../2)*2` rounds each axis to an +/// even number of pixels β€” H.264 `yuv420p` rejects odd dimensions. The comma in +/// `min(1, …)` is escaped (`\,`) because ffmpeg's filtergraph parser otherwise +/// reads it as a filter separator. Works for any resolution or aspect ratio +/// (landscape, portrait, ultrawide); guarded by the `preview_scale_filter_*` tests. +fn preview_scale_filter(max_height: u32) -> String { + format!("scale=trunc(iw*min(1\\,{max_height}/ih)/2)*2:trunc(ih*min(1\\,{max_height}/ih)/2)*2") +} + +/// Build the path to the temporary concat list file used by +/// [`VideoEncoder::concat_segments`]. Placed alongside `out` so concurrent +/// trace concats don't collide. +fn list_file_for(out: &Path) -> PathBuf { + let mut name = out + .file_name() + .map(|n| n.to_os_string()) + .unwrap_or_else(|| OsString::from("concat_list")); + name.push(".concat.txt"); + match out.parent() { + Some(parent) if !parent.as_os_str().is_empty() => parent.join(name), + _ => PathBuf::from(name), + } +} + +/// Render the ffmpeg `concat` list-file format: one `file '...'` entry per +/// segment, single-quoted with escaped embedded single quotes per the +/// demuxer's own escape rule (`'` β†’ `'\''`). +/// +/// Relative segment paths are resolved against the current working directory +/// before being written. ffmpeg's concat demuxer interprets `file '...'` +/// entries *relative to the list-file's directory*, not the daemon's CWD β€” +/// so a relative segment path like `recordings/rec/cam/trace/chunk_0000.mp4` +/// listed in `recordings/rec/cam/trace/lossy.mp4.concat.txt` would expand to +/// `recordings/rec/cam/trace/recordings/rec/cam/trace/chunk_0000.mp4` and +/// fail to open. Absolutising on write side-steps that without forcing +/// callers to pre-canonicalise. +fn write_concat_list(path: &Path, segments: &[PathBuf]) -> Result<(), VideoEncodeError> { + let mut file = std::fs::File::create(path).map_err(|source| VideoEncodeError::Io { + path: path.to_path_buf(), + source, + })?; + for segment in segments { + let absolute = if segment.is_absolute() { + segment.clone() + } else { + std::env::current_dir() + .map_err(|source| VideoEncodeError::Io { + path: segment.clone(), + source, + })? + .join(segment) + }; + let escaped = absolute.to_string_lossy().replace('\'', r"'\''"); + writeln!(file, "file '{escaped}'").map_err(|source| VideoEncodeError::Io { + path: path.to_path_buf(), + source, + })?; + } + Ok(()) +} + +/// Ensure the parent directory for `path` exists. The trace actor normally +/// creates the trace directory before any encoder runs, but ffmpeg refuses to +/// emit into a missing directory and the recovery path may have removed it. +fn ensure_parent_dirs(path: &Path) -> Result<(), VideoEncodeError> { + if let Some(parent) = path.parent() { + if !parent.as_os_str().is_empty() { + std::fs::create_dir_all(parent).map_err(|source| VideoEncodeError::Io { + path: parent.to_path_buf(), + source, + })?; + } + } + Ok(()) +} + +/// Stat `path` and return its byte length, erroring if the file is missing or +/// zero bytes. ffmpeg occasionally exits 0 but produces a zero-byte output when +/// the requested codec is unavailable in the local build. +fn non_empty_file_size(path: &Path) -> Result { + let metadata = std::fs::metadata(path).map_err(|source| { + if source.kind() == std::io::ErrorKind::NotFound { + VideoEncodeError::OutputMissing { + path: path.to_path_buf(), + } + } else { + VideoEncodeError::Io { + path: path.to_path_buf(), + source, + } + } + })?; + if metadata.len() == 0 { + return Err(VideoEncodeError::OutputMissing { + path: path.to_path_buf(), + }); + } + Ok(metadata.len()) +} + +/// Return the trailing portion of `stderr` as a lossy UTF-8 string, capped at +/// 4 KiB. The cap keeps log lines bounded when ffmpeg's diagnostic output runs +/// to megabytes (e.g. one warning per frame). +fn tail_stderr(stderr: &[u8]) -> String { + const MAX_TAIL: usize = 4 * 1024; + let start = stderr.len().saturating_sub(MAX_TAIL); + String::from_utf8_lossy(&stderr[start..]).into_owned() +} + +/// Extract the version token from `ffmpeg -version` stdout. The first line is +/// `ffmpeg version ...` (e.g. `ffmpeg version 4.4.2-0ubuntu0.22.04.1 +/// Copyright ...`); returns `"unknown"` when that prefix is absent (custom +/// builds occasionally reword it). +fn parse_ffmpeg_version(stdout: &[u8]) -> String { + String::from_utf8_lossy(stdout) + .lines() + .next() + .and_then(|line| line.strip_prefix("ffmpeg version ")) + .and_then(|rest| rest.split_whitespace().next()) + .map(|token| token.to_string()) + .unwrap_or_else(|| "unknown".to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + use std::process::Command as StdCommand; + use tempfile::TempDir; + + /// Locate an ffmpeg-suite binary on `PATH`. Returns `None` (with a + /// caller-side skip) so the suite stays green in sandboxes that lack + /// the FFmpeg toolchain. + fn locate_binary(name: &str) -> Option { + let output = StdCommand::new("which").arg(name).output().ok()?; + if !output.status.success() { + return None; + } + let path = String::from_utf8(output.stdout).ok()?; + let trimmed = path.trim(); + if trimmed.is_empty() { + None + } else { + Some(PathBuf::from(trimmed)) + } + } + + /// Synthesise a small NUT chunk via ffmpeg's `testsrc` source so the + /// encoder tests don't need to pull in the producer crate just for the + /// NUT writer. `frame_count` frames at the configured rate land in a + /// NUT-container raw-rgb24 stream that `encode_chunk` can demux. + fn write_synthetic_nut(ffmpeg: &Path, path: &Path, frame_count: u64) { + write_synthetic_nut_sized(ffmpeg, path, frame_count, 16, 16); + } + + /// As [`write_synthetic_nut`] but with an explicit frame geometry, so the + /// preview-downscale test can feed a source larger than the preview cap. + fn write_synthetic_nut_sized( + ffmpeg: &Path, + path: &Path, + frame_count: u64, + width: u32, + height: u32, + ) { + let duration = format!("{}", frame_count); // 1 fps testsrc β†’ frame_count seconds + let status = StdCommand::new(ffmpeg) + .args([ + "-y", + "-hide_banner", + "-loglevel", + "error", + "-f", + "lavfi", + "-i", + ]) + .arg(format!( + "testsrc=duration={duration}:size={width}x{height}:rate=1" + )) + .args(["-c:v", "rawvideo", "-pix_fmt", "rgb24", "-f", "nut"]) + .arg(path) + .status() + .expect("ffmpeg synth status"); + assert!(status.success(), "synthetic NUT generation failed"); + } + + #[test] + fn missing_outputs_classify_as_output_missing() { + let tempdir = TempDir::new().unwrap(); + let result = non_empty_file_size(&tempdir.path().join("absent.mp4")); + assert!(matches!( + result, + Err(VideoEncodeError::OutputMissing { .. }) + )); + } + + #[test] + fn empty_outputs_classify_as_output_missing() { + let tempdir = TempDir::new().unwrap(); + let path = tempdir.path().join("empty.mp4"); + std::fs::write(&path, []).unwrap(); + let result = non_empty_file_size(&path); + assert!(matches!( + result, + Err(VideoEncodeError::OutputMissing { .. }) + )); + } + + #[test] + fn tail_stderr_caps_excessive_output() { + let bytes = vec![b'x'; 16 * 1024]; + let tail = tail_stderr(&bytes); + assert_eq!(tail.len(), 4 * 1024); + } + + #[test] + fn preview_scale_filter_builds_expected_expression() { + // The comma inside `min(1, …)` MUST stay escaped (`\,`) β€” an unescaped + // comma would be parsed as a filter separator and ffmpeg would reject + // the graph. Both axes scale by the same `min(1, H/ih)` factor (AR + // preserved, no upscale) and round to even (`trunc(/2)*2`). + assert_eq!( + preview_scale_filter(480), + "scale=trunc(iw*min(1\\,480/ih)/2)*2:trunc(ih*min(1\\,480/ih)/2)*2" + ); + // The cap is interpolated, so a different target reshapes the filter. + assert!(preview_scale_filter(720).contains("720/ih")); + } + + #[test] + fn parse_version_extracts_token_and_falls_back() { + assert_eq!( + parse_ffmpeg_version(b"ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000\n"), + "4.4.2-0ubuntu0.22.04.1" + ); + assert_eq!(parse_ffmpeg_version(b"ffmpeg version n6.1\n"), "n6.1"); + assert_eq!(parse_ffmpeg_version(b"some custom banner\n"), "unknown"); + assert_eq!(parse_ffmpeg_version(b""), "unknown"); + } + + #[test] + fn preflight_reports_not_found_for_missing_binary() { + let result = VideoEncoder::new() + .with_binary("nc-definitely-not-a-real-ffmpeg-binary") + .preflight(); + assert!( + matches!(result, Err(FfmpegPreflightError::NotFound { .. })), + "expected NotFound, got {result:?}" + ); + } + + #[test] + fn preflight_accepts_a_real_ffmpeg() { + // Skip where the toolchain is unavailable, matching the encode tests. + let Some(ffmpeg) = locate_binary("ffmpeg") else { + return; + }; + let version = VideoEncoder::new() + .with_binary(ffmpeg) + .preflight() + .expect("system ffmpeg should pass preflight"); + assert!(!version.is_empty(), "version string should be populated"); + } + + #[test] + fn concat_list_escapes_single_quotes() { + let tempdir = TempDir::new().unwrap(); + let list = tempdir.path().join("list.txt"); + let segments = vec![ + PathBuf::from("/var/data/recordings/rec/cam/trace/chunks/chunk_0000.nut"), + PathBuf::from("/var/data/rec'with quote/trace/chunks/chunk_0001.nut"), + ]; + write_concat_list(&list, &segments).expect("write list"); + let contents = std::fs::read_to_string(&list).unwrap(); + assert!( + contents.contains("file '/var/data/recordings/rec/cam/trace/chunks/chunk_0000.nut'") + ); + assert!( + contents.contains(r"file '/var/data/rec'\''with quote/trace/chunks/chunk_0001.nut'"), + "got: {contents}" + ); + } + + #[test] + fn concat_list_absolutises_relative_segment_paths() { + // ffmpeg's concat demuxer resolves entries against the list-file's + // directory, not the daemon's CWD. Relative segment paths must be + // joined against the current working directory before being written + // so the demuxer ends up at the same file the daemon meant to open. + let tempdir = TempDir::new().unwrap(); + let list = tempdir.path().join("list.txt"); + let cwd = std::env::current_dir().unwrap(); + let segments = vec![PathBuf::from("rel/chunk_0000.mp4")]; + write_concat_list(&list, &segments).expect("write list"); + let contents = std::fs::read_to_string(&list).unwrap(); + let expected = cwd.join("rel/chunk_0000.mp4"); + assert!( + contents.contains(&format!("file '{}'", expected.display())), + "got: {contents}" + ); + } + + #[test] + fn concat_segments_rejects_empty_input() { + let tempdir = TempDir::new().unwrap(); + let out = tempdir.path().join("out.mp4"); + // Sync wrapper so the test body isn't async for this trivial case. + let result = futures_block(VideoEncoder::new().concat_segments(&[], &out)); + assert!(matches!(result, Err(VideoEncodeError::EmptySegments))); + } + + /// Drive a future to completion on a single-threaded tokio runtime. + /// Used by the trivial unit tests that don't need `#[tokio::test]` + /// scaffolding. + fn futures_block(future: impl std::future::Future) -> T { + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap() + .block_on(future) + } + + #[tokio::test] + async fn encode_chunk_emits_sealed_mp4_outputs() { + let ffmpeg = match locate_binary("ffmpeg") { + Some(path) => path, + None => { + eprintln!( + "ffmpeg not on PATH β€” skipping encode_chunk test. Install \ + `ffmpeg` to enable this test." + ); + return; + } + }; + let ffprobe = match locate_binary("ffprobe") { + Some(path) => path, + None => { + eprintln!("ffprobe not on PATH β€” skipping encode_chunk test."); + return; + } + }; + + let tempdir = TempDir::new().unwrap(); + let raw = tempdir.path().join("chunk_0000.nut"); + let lossy = tempdir.path().join("chunk_0000_lossy.mp4"); + let lossless = tempdir.path().join("chunk_0000_lossless.mp4"); + + write_synthetic_nut(&ffmpeg, &raw, 8); + + let encoder = VideoEncoder::new(); + let request = ChunkEncodeRequest { + raw_nut: raw.clone(), + lossy_out: lossy.clone(), + lossless_out: lossless.clone(), + }; + let outcome = encoder.encode_chunk(&request).await.expect("transcode"); + + assert!(outcome.lossy_bytes > 0); + assert!(outcome.lossless_bytes > 0); + // The new encode_chunk leaves the source in place β€” the per-trace + // actor owns the unlink on its own success path so a partial + // post-encode failure can still be cleaned up by the recovery sweep. + assert!(raw.exists(), "encode_chunk must not unlink its source"); + + for path in [&lossy, &lossless] { + let status = StdCommand::new(&ffprobe) + .args(["-v", "error", "-show_streams", "-of", "json"]) + .arg(path) + .output() + .expect("spawn ffprobe"); + assert!( + status.status.success(), + "ffprobe rejected {}: stderr={}", + path.display(), + String::from_utf8_lossy(&status.stderr) + ); + let parsed: serde_json::Value = + serde_json::from_slice(&status.stdout).expect("ffprobe JSON"); + let streams = parsed["streams"].as_array().expect("streams array"); + assert_eq!( + streams.len(), + 1, + "{} should contain exactly one stream", + path.display() + ); + assert_eq!(streams[0]["codec_type"], "video"); + // 16x16 is far below the preview cap, so the lossy downscale is a + // no-op here β€” both outputs keep the source geometry (no upscale). + assert_eq!(streams[0]["width"], 16); + assert_eq!(streams[0]["height"], 16); + } + } + + #[tokio::test] + async fn encode_chunk_downscales_lossy_preview_keeps_lossless_native() { + let (Some(ffmpeg), Some(ffprobe)) = (locate_binary("ffmpeg"), locate_binary("ffprobe")) + else { + eprintln!("ffmpeg/ffprobe not on PATH β€” skipping preview-downscale test."); + return; + }; + + let tempdir = TempDir::new().unwrap(); + let raw = tempdir.path().join("chunk_0000.nut"); + let lossy = tempdir.path().join("chunk_0000_lossy.mp4"); + let lossless = tempdir.path().join("chunk_0000_lossless.mp4"); + + // A 1280x720 source: above the 480-line preview cap, 16:9 aspect. + write_synthetic_nut_sized(&ffmpeg, &raw, 6, 1280, 720); + + let encoder = VideoEncoder::new(); + encoder + .encode_chunk(&ChunkEncodeRequest { + raw_nut: raw, + lossy_out: lossy.clone(), + lossless_out: lossless.clone(), + }) + .await + .expect("transcode"); + + let dims = |path: &Path| -> (u64, u64, u64) { + let out = StdCommand::new(&ffprobe) + .args([ + "-v", + "error", + "-select_streams", + "v:0", + "-count_frames", + "-show_entries", + "stream=width,height,nb_read_frames", + "-of", + "json", + ]) + .arg(path) + .output() + .expect("spawn ffprobe"); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("ffprobe JSON"); + let stream = &parsed["streams"][0]; + let field = |key: &str| -> u64 { + let value = &stream[key]; + value + .as_u64() + .or_else(|| value.as_str().and_then(|s| s.parse().ok())) + .unwrap_or_else(|| panic!("missing {key}: {stream}")) + }; + (field("width"), field("height"), field("nb_read_frames")) + }; + + let (lossy_w, lossy_h, lossy_frames) = dims(&lossy); + let (lossless_w, lossless_h, lossless_frames) = dims(&lossless); + + // Lossy is capped to 480 lines, aspect ratio preserved (1280x720 -> + // 852x480), and both axes are even (yuv420p requirement). + assert_eq!( + (lossy_w, lossy_h), + (852, 480), + "lossy should be 480p preview" + ); + assert_eq!(lossy_w % 2, 0, "lossy width must be even"); + // Lossless keeps the native geometry β€” it is the archival copy. + assert_eq!( + (lossless_w, lossless_h), + (1280, 720), + "lossless must stay native resolution" + ); + // Both outputs carry every source frame, so the per-frame timestamp + // sidecar stays aligned with each video. + assert_eq!( + lossy_frames, lossless_frames, + "lossy and lossless must hold the same frame count" + ); + assert_eq!(lossy_frames, 6, "all source frames must be encoded"); + } + + #[tokio::test] + async fn concat_segments_produces_single_mp4() { + let ffmpeg = match locate_binary("ffmpeg") { + Some(path) => path, + None => { + eprintln!("ffmpeg not on PATH β€” skipping concat_segments test."); + return; + } + }; + let ffprobe = match locate_binary("ffprobe") { + Some(path) => path, + None => { + eprintln!("ffprobe not on PATH β€” skipping concat_segments test."); + return; + } + }; + + let tempdir = TempDir::new().unwrap(); + let encoder = VideoEncoder::new(); + let mut segments = Vec::new(); + let total_frames: u64 = 4 * 3; + // Encode three synthetic 4-frame NUT chunks into per-chunk MP4s. + for chunk_index in 0..3u32 { + let raw = tempdir.path().join(format!("chunk_{chunk_index:04}.nut")); + let lossy = tempdir + .path() + .join(format!("chunk_{chunk_index:04}_lossy.mp4")); + let lossless = tempdir + .path() + .join(format!("chunk_{chunk_index:04}_lossless.mp4")); + write_synthetic_nut(&ffmpeg, &raw, 4); + encoder + .encode_chunk(&ChunkEncodeRequest { + raw_nut: raw, + lossy_out: lossy.clone(), + lossless_out: lossless, + }) + .await + .expect("transcode chunk"); + segments.push(lossy); + } + + let final_lossy = tempdir.path().join("lossy.mp4"); + let outcome = encoder + .concat_segments(&segments, &final_lossy) + .await + .expect("concat"); + assert!(outcome.bytes > 0); + + // The concat list file lives next to the output during encoding; the + // success path unlinks it. + let list = list_file_for(&final_lossy); + assert!(!list.exists(), "concat list file should be cleaned up"); + + let probe = StdCommand::new(&ffprobe) + .args([ + "-v", + "error", + "-select_streams", + "v:0", + "-count_frames", + "-show_entries", + "stream=nb_read_frames", + "-of", + "default=nokey=1:noprint_wrappers=1", + ]) + .arg(&final_lossy) + .output() + .expect("spawn ffprobe"); + assert!(probe.status.success()); + let trimmed = String::from_utf8(probe.stdout).unwrap(); + let nb_read_frames: u64 = trimmed.trim().parse().unwrap(); + assert_eq!( + nb_read_frames, total_frames, + "concat output should contain all {total_frames} frames" + ); + } + + #[tokio::test] + async fn missing_input_yields_non_zero_exit() { + if locate_binary("ffmpeg").is_none() { + eprintln!("ffmpeg not on PATH β€” skipping non-zero-exit test."); + return; + } + + let tempdir = TempDir::new().unwrap(); + let request = ChunkEncodeRequest { + raw_nut: tempdir.path().join("does-not-exist.nut"), + lossy_out: tempdir.path().join("lossy.mp4"), + lossless_out: tempdir.path().join("lossless.mp4"), + }; + let encoder = VideoEncoder::new(); + let error = encoder + .encode_chunk(&request) + .await + .expect_err("ffmpeg should fail"); + assert!( + matches!(error, VideoEncodeError::NonZeroExit { .. }), + "unexpected error variant: {error:?}" + ); + } + + #[tokio::test] + async fn spawn_failure_surfaces_binary_name() { + let tempdir = TempDir::new().unwrap(); + let raw = tempdir.path().join("raw.nut"); + std::fs::write(&raw, [0u8; 16]).unwrap(); + let request = ChunkEncodeRequest { + raw_nut: raw, + lossy_out: tempdir.path().join("lossy.mp4"), + lossless_out: tempdir.path().join("lossless.mp4"), + }; + let encoder = + VideoEncoder::new().with_binary("this-binary-definitely-does-not-exist-ffmpeg"); + let error = encoder + .encode_chunk(&request) + .await + .expect_err("spawn should fail"); + match error { + VideoEncodeError::Spawn { binary, .. } => { + assert_eq!( + binary, + OsString::from("this-binary-definitely-does-not-exist-ffmpeg") + ); + } + other => panic!("expected Spawn error, got {other:?}"), + } + } +} diff --git a/rust/data_daemon/src/intervals.rs b/rust/data_daemon/src/intervals.rs new file mode 100644 index 000000000..1d66fece1 --- /dev/null +++ b/rust/data_daemon/src/intervals.rs @@ -0,0 +1,46 @@ +//! Centralised poll / tick cadences for the daemon's recurring background loops. +//! +//! Every cloud coordinator, the watchers, and the connection monitor drive their +//! work from a `tokio::time::interval` on one of these constants. Keeping them in +//! one place makes the daemon's timing budget legible at a glance and avoids the +//! per-module `POLL_INTERVAL` name collisions that arise when each loop defines +//! its own. +//! +//! Each loop still picks its own [`tokio::time::MissedTickBehavior`] at the call +//! site: `Delay` for the steady-state pollers (a missed tick simply slips), and +//! `Skip` for the flush / rescan safety-nets (only the next deadline matters). +//! Cadences internal to a single component β€” the dispatcher's interleaved +//! housekeeping, the trace-DB write-behind flush, the IPC drain decay, and the +//! CLI stop-wait β€” stay local to those modules rather than living here. + +use std::time::Duration; + +/// Org-id config poll: re-reads `config.json` so every coordinator observes org +/// changes (`login`, `set_organization`) within a second. The file is tiny and +/// the read is async, so a coarse re-parse each tick is cheaper to reason about +/// than mtime gating. +pub const ORG_CONFIG_POLL: Duration = Duration::from_secs(1); + +/// Registration drain fallback: the coordinator is event-driven off the bus and +/// only falls back to this poll when the bus is quiet. +pub const REGISTRATION_POLL: Duration = Duration::from_millis(500); + +/// Progress-report sweep: kept short so a freshly-uploaded recording reports +/// promptly; the sweep is cheap because settled recordings are filtered out +/// server-side. +pub const PROGRESS_TICK: Duration = Duration::from_secs(2); + +/// Status-update flush: coalesces upload progress / complete updates into +/// batched backend writes, firing regardless of inbox load. +pub const STATUS_FLUSH: Duration = Duration::from_millis(100); + +/// Uploader safety-net rescan: catches traces skipped while the upload semaphore +/// was full during a drain, without relying on bus events. +pub const UPLOAD_RESCAN: Duration = Duration::from_secs(5); + +/// Recording-reaper sweep: reclamation only frees space already replicated to +/// the cloud, so a relaxed cadence keeps the scan off the hot path. +pub const RECORDING_RECLAIM: Duration = Duration::from_secs(60); + +/// Connection health probe β€” matches the Python `connection_manager.py` cadence. +pub const CONNECTION_HEALTH_CHECK: Duration = Duration::from_secs(10); diff --git a/rust/data_daemon/src/ipc/listener.rs b/rust/data_daemon/src/ipc/listener.rs new file mode 100644 index 000000000..77f9f6175 --- /dev/null +++ b/rust/data_daemon/src/ipc/listener.rs @@ -0,0 +1,251 @@ +//! Tokio task that drains the iceoryx2 `commands` subscriber. +//! +//! iceoryx2 0.8 does not expose an `async`/`Notify`-style adaptor, so the +//! listener polls the subscriber on a short tick. The cadence is fast enough +//! to keep frame latency low and slow enough to leave idle daemons +//! effectively quiescent. +//! +//! ## Borrow lifetime +//! +//! iceoryx2's [`Subscriber`] is `Send` but `!Sync`. The loop drains the +//! subscriber synchronously into a local `Vec` before awaiting the +//! dispatcher: that bounds how long the subscriber borrow is held and lets +//! backpressure on the dispatcher send propagate to iceoryx2 without keeping +//! the subscriber locked across the await. (`Send` is not actually required +//! here β€” `run` is awaited inline under `block_on`, never `tokio::spawn`'d, and +//! the later `serve_queries` borrow is itself held across an await.) + +use std::sync::Arc; +use std::time::Duration; + +use data_daemon_shared::{Envelope, RecordingIdQuery, RecordingIdReply}; +use iceoryx2::port::server::Server; +use iceoryx2::port::subscriber::Subscriber; +use iceoryx2::prelude::ipc; +use tokio::select; +use tokio::sync::{broadcast, mpsc}; +use tokio::time::sleep; + +use crate::ipc::node::IpcTransport; +use crate::lifecycle::shutdown::ShutdownSignal; +use crate::state::{SqliteStateStore, StateStore}; + +/// Poll cadence while envelopes are actively flowing. +/// +/// 200 Β΅s bounds the worst-case producer-block time on a full subscriber +/// buffer. At the integration matrix's heaviest fanout (8 multiprocess +/// workers Γ— ~4 producer threads each = ~32 publishers competing for +/// LIFECYCLE_SUBSCRIBER_BUFFER_SIZE=64 slots), 10 ms left producer-side +/// `log_*` calls blocked for ~1 s at a stretch on 2-vCPU hosts when the +/// listener task was preempted off-CPU by ffmpeg / per-trace work. +const POLL_INTERVAL: Duration = Duration::from_micros(200); + +/// Poll cadence once the subscriber has been empty for [`IDLE_POLL_AFTER_EMPTY`] +/// consecutive drains. iceoryx2 0.8 has no async waker, so the listener must +/// poll β€” but a *fixed* 200 Β΅s tick wakes 5000Γ—/s on a daemon with no producer +/// attached. Decaying to 25 ms when idle keeps an idle daemon near-quiescent +/// while the first arriving sample snaps the cadence straight back to 200 Β΅s, so +/// active-load latency is unchanged. +const IDLE_POLL_INTERVAL: Duration = Duration::from_millis(25); + +/// Number of consecutive empty drains before the poll cadence relaxes to +/// [`IDLE_POLL_INTERVAL`]. A handful of empty ticks at 200 Β΅s is a negligible +/// cost and avoids relaxing during a brief lull mid-recording. +const IDLE_POLL_AFTER_EMPTY: u32 = 64; + +/// Drain the iceoryx2 subscriber until a shutdown signal arrives. +/// +/// Each successfully decoded envelope is forwarded to the dispatcher's +/// [`mpsc::Sender`]. If the dispatcher's queue fills (the dispatcher has +/// stalled), the listener blocks instead of dropping samples β€” backpressure +/// propagates back to iceoryx2's publisher queue and the producer SDK. +/// +/// The listener takes ownership of the [`IpcTransport`]; when this task +/// returns the transport's destructor releases the iceoryx2 node back to the +/// OS. +pub async fn run( + transport: IpcTransport, + dispatcher_tx: mpsc::Sender, + store: Arc, + mut shutdown_rx: broadcast::Receiver, +) { + tracing::info!( + commands = data_daemon_shared::service_name::COMMANDS, + queries = data_daemon_shared::service_name::QUERIES, + "ipc listener started" + ); + + let mut counters = LoopCounters::default(); + let mut batch: Vec = Vec::with_capacity(64); + // Consecutive empty drains, used to relax the poll cadence on an idle + // daemon. Reset to 0 the moment any envelope arrives. + let mut empty_drains: u32 = 0; + + loop { + // -- Synchronous drain -------------------------------------------------- + // The subscriber borrow MUST stay inside this block (no `.await` in + // any of these calls). The local `batch` is `Send`, so it can survive + // across the awaits below without infecting the task with !Send. + drain_subscriber(transport.commands_subscriber(), &mut batch, &mut counters); + + empty_drains = if batch.is_empty() { + empty_drains.saturating_add(1) + } else { + 0 + }; + + // -- Async forward ------------------------------------------------------ + for envelope in batch.drain(..) { + let kind = envelope.kind(); + if dispatcher_tx.send(envelope).await.is_err() { + tracing::debug!( + envelope = kind, + "ipc listener stopping: dispatcher receiver dropped" + ); + return; + } + } + + // -- Answer recording-id queries ---------------------------------------- + // Each request is resolved against the daemon's own store (a single + // `.await`) while holding the iceoryx2 `ActiveRequest` to reply on. That + // borrow makes this future `!Send`, which is fine: `run` is awaited + // inline under `block_on`, never `tokio::spawn`'d. + serve_queries(transport.queries_server(), &store).await; + + // -- Yield / shutdown --------------------------------------------------- + // Poll fast while data is flowing; relax once the bus has been empty + // for a while so an idle daemon isn't woken 5000Γ—/s. + let poll_interval = if empty_drains >= IDLE_POLL_AFTER_EMPTY { + IDLE_POLL_INTERVAL + } else { + POLL_INTERVAL + }; + select! { + biased; + signal = shutdown_rx.recv() => { + tracing::debug!(?signal, "ipc listener shutting down"); + return; + } + _ = sleep(poll_interval) => {} + } + } +} + +/// Drain every pending recording-id query, answering each from the daemon's +/// own store. +/// +/// The SDK resolves a recording's cloud id by asking the daemon over the +/// `queries` request-response service instead of reading the daemon's private +/// SQLite DB directly. Requests are cheap and infrequent (one per +/// `get_recording_id` poll), so a malformed request or store error is logged +/// and the next request is served rather than aborting the loop. +/// +/// The per-tick request volume is bounded rather than unbounded: each query +/// client keeps at most one request in flight (it awaits the reply before +/// sending the next), so a single drain serves at most one request per +/// connected client (≀ `MAX_QUERY_CLIENTS_PER_SERVICE`) before `receive` +/// returns `None` and the loop hands the next tick back to the commands drain. +async fn serve_queries( + server: &Server, + store: &Arc, +) { + loop { + let active = match server.receive() { + Ok(Some(active)) => active, + Ok(None) => return, + Err(error) => { + tracing::warn!(%error, "queries server receive failed"); + return; + } + }; + + let query = match RecordingIdQuery::decode(active.payload()) { + Ok(query) => query, + Err(error) => { + tracing::warn!(%error, "dropping malformed recording-id query"); + continue; + } + }; + + let recording_id = match store + .resolve_recording_id_for_marker( + &query.robot_id, + query.robot_instance, + query.timestamp_ns, + ) + .await + { + Ok(recording_id) => recording_id, + Err(error) => { + tracing::warn!(%error, robot_id = query.robot_id, "recording-id lookup failed"); + None + } + }; + + let reply = RecordingIdReply { recording_id }; + match reply.encode() { + Ok(bytes) => match active.loan_slice_uninit(bytes.len()) { + Ok(response) => { + let response = response.write_from_slice(&bytes); + if let Err(error) = response.send() { + tracing::warn!(%error, "failed to send recording-id reply"); + } + } + Err(error) => tracing::warn!(%error, "failed to loan recording-id reply sample"), + }, + Err(error) => tracing::warn!(%error, "failed to encode recording-id reply"), + } + } +} + +/// Counters reported in the slow-path warning logs. +#[derive(Default)] +struct LoopCounters { + decode_failures: u64, + receive_failures: u64, +} + +/// Synchronously drain every available sample on `subscriber`, appending +/// decoded envelopes to `batch`. +/// +/// Receive errors and decode failures are logged with a saturating counter +/// rather than returned β€” both are recoverable (a malformed sample doesn't +/// invalidate the next one) and the only path that escalates to a listener +/// exit is the dispatcher-receiver-closed branch, which is handled in +/// [`run`]. +fn drain_subscriber( + subscriber: &Subscriber, + batch: &mut Vec, + counters: &mut LoopCounters, +) { + loop { + match subscriber.receive() { + Ok(Some(sample)) => match Envelope::decode(sample.payload()) { + // Every envelope is forwarded whole. `BatchedData` is held and + // released as a single unit by the dispatcher (all its items + // share one timestamp, so they belong to one window) and + // expanded into per-sensor routes only at release time. + Ok(envelope) => batch.push(envelope), + Err(error) => { + counters.decode_failures = counters.decode_failures.saturating_add(1); + tracing::warn!( + %error, + decode_failures = counters.decode_failures, + "ipc envelope decode failed; dropping sample" + ); + } + }, + Ok(None) => return, + Err(error) => { + counters.receive_failures = counters.receive_failures.saturating_add(1); + tracing::warn!( + error = %error, + receive_failures = counters.receive_failures, + "ipc subscriber receive failed" + ); + return; + } + } + } +} diff --git a/rust/data_daemon/src/ipc/mod.rs b/rust/data_daemon/src/ipc/mod.rs new file mode 100644 index 000000000..d34606a0f --- /dev/null +++ b/rust/data_daemon/src/ipc/mod.rs @@ -0,0 +1,17 @@ +//! iceoryx2 transport bring-up and async listener. +//! +//! - [`node`]: creates the per-daemon iceoryx2 [`Node`](iceoryx2::node::Node) +//! and opens the `commands` and `queries` services defined in +//! [`data_daemon_shared::service_name`]. +//! - [`listener`]: a tokio task that drains the single `commands` subscriber +//! and answers `queries` requests, forwarding decoded +//! [`Envelope`](data_daemon_shared::Envelope)s to the per-trace dispatcher via +//! an `mpsc::Sender`. +//! +//! iceoryx2 0.8 does not ship a `tokio::sync::Notify` adaptor, so the listener +//! polls. The cadence decays from `POLL_INTERVAL` (200 Β΅s) to +//! `IDLE_POLL_INTERVAL` (25 ms) once the subscriber has been empty for a while, +//! keeping active-load latency low while leaving idle daemons near-quiescent. + +pub mod listener; +pub mod node; diff --git a/rust/data_daemon/src/ipc/node.rs b/rust/data_daemon/src/ipc/node.rs new file mode 100644 index 000000000..ed43a0cc0 --- /dev/null +++ b/rust/data_daemon/src/ipc/node.rs @@ -0,0 +1,235 @@ +//! iceoryx2 node and per-stream service bring-up. +//! +//! The daemon owns a single iceoryx2 [`Node`] for the duration of the process. +//! At startup it opens one long-lived subscriber on the `commands` service: +//! it carries lifecycle envelopes, non-video `Frame`s, and the +//! [`Envelope::VideoChunkReady`] notifications the producer emits when a NUT +//! chunk lands on disk. There is no dedicated video bus β€” pixel buffers are +//! spooled to disk by the producer, so the IPC bus only ever carries +//! metadata-sized payloads. +//! +//! [`Envelope::VideoChunkReady`]: data_daemon_shared::Envelope::VideoChunkReady + +use data_daemon_shared::service_name::{ + COMMANDS, LIFECYCLE_SUBSCRIBER_BUFFER_SIZE, MAX_NODES_PER_SERVICE, MAX_PUBLISHERS_PER_SERVICE, + MAX_QUERY_CLIENTS_PER_SERVICE, MAX_QUERY_SERVERS_PER_SERVICE, MAX_SUBSCRIBERS_PER_SERVICE, + QUERIES, QUERIES_MAX_PAYLOAD_BYTES, +}; +use iceoryx2::node::{Node, NodeBuilder}; +use iceoryx2::port::server::Server; +use iceoryx2::port::subscriber::Subscriber; +use iceoryx2::prelude::{ipc, NodeName}; +use iceoryx2::service::port_factory::publish_subscribe::PortFactory; +use iceoryx2::service::port_factory::request_response::PortFactory as QueryPortFactory; +use thiserror::Error; + +/// Errors raised while bringing up the daemon's iceoryx2 transport. +/// +/// The inner cause fields are named `detail` rather than `source` so +/// `thiserror` doesn't try to wrap them in `dyn StdError`; the iceoryx2 error +/// types only implement `Display`, so we stringify at the boundary. +#[derive(Debug, Error)] +pub enum IpcSetupError { + /// Constructing the iceoryx2 node failed. + #[error("failed to create iceoryx2 node: {0}")] + NodeCreate(String), + /// The configured node name is not a valid iceoryx2 semantic string. + #[error("invalid node name '{name}': {detail}")] + InvalidNodeName { + /// Offending name. + name: String, + /// Underlying iceoryx2 error message. + detail: String, + }, + /// The configured service name is not a valid iceoryx2 semantic string. + #[error("invalid service name '{name}': {detail}")] + InvalidServiceName { + /// Offending name. + name: String, + /// Underlying iceoryx2 error message. + detail: String, + }, + /// Opening or creating an iceoryx2 service failed. + #[error("failed to open service '{name}': {detail}")] + ServiceOpen { + /// Offending service. + name: String, + /// Underlying iceoryx2 error message. + detail: String, + }, + /// Building a subscriber port failed. + #[error("failed to create subscriber on '{name}': {detail}")] + SubscriberCreate { + /// Owning service. + name: String, + /// Underlying iceoryx2 error message. + detail: String, + }, + /// Building a request-response server port failed. + #[error("failed to create server on '{name}': {detail}")] + ServerCreate { + /// Owning service. + name: String, + /// Underlying iceoryx2 error message. + detail: String, + }, +} + +/// Daemon-side iceoryx2 transport. +/// +/// Holds the node and the long-lived `commands` subscriber. The struct is +/// `Send` (so it can move into the tokio main task) but not `Sync` because +/// iceoryx2's subscriber ports own shared-memory descriptors that must be +/// advanced from a single thread. +pub struct IpcTransport { + /// Backing iceoryx2 node. Holding it alive keeps every service this + /// daemon created visible to discovery. + _node: Node, + /// Subscriber on `neuracore/data_daemon/commands`. + commands_subscriber: Subscriber, + /// Service handle held alongside the subscriber so port discovery doesn't + /// race the service handle going out of scope. + _commands_service: PortFactory, + /// Request-response server on `neuracore/data_daemon/queries` that answers + /// SDK recording-id lookups. + queries_server: Server, + /// Service handle held alongside the server, as for the commands service. + _queries_service: QueryPortFactory, +} + +impl IpcTransport { + /// Bring up the daemon's iceoryx2 transport. + /// + /// Creates a node named after this daemon's PID + /// (`neuracore-data-daemon-{pid}`), opens the `commands` service, and + /// builds a subscriber on it. + pub fn bring_up() -> Result { + let node_name = format!("neuracore-data-daemon-{}", std::process::id()); + let parsed_name = + NodeName::new(&node_name).map_err(|error| IpcSetupError::InvalidNodeName { + name: node_name.clone(), + detail: error.to_string(), + })?; + let node = NodeBuilder::new() + .name(&parsed_name) + .create::() + .map_err(|error| IpcSetupError::NodeCreate(error.to_string()))?; + + let (commands_service, commands_subscriber) = + open_subscriber(&node, COMMANDS, LIFECYCLE_SUBSCRIBER_BUFFER_SIZE)?; + + let (queries_service, queries_server) = open_query_server(&node, QUERIES)?; + + Ok(IpcTransport { + _node: node, + commands_subscriber, + _commands_service: commands_service, + queries_server, + _queries_service: queries_service, + }) + } + + /// Borrow the `commands` subscriber port. + pub fn commands_subscriber(&self) -> &Subscriber { + &self.commands_subscriber + } + + /// Borrow the `queries` request-response server port. + pub fn queries_server(&self) -> &Server { + &self.queries_server + } +} + +/// Convenience alias for the `[u8]` pub/sub factory + subscriber pair +/// [`open_subscriber`] returns. +type ByteSliceFactory = PortFactory; +type ByteSliceSubscriber = Subscriber; + +/// Open or attach to a `[u8]` pub/sub service and build a subscriber on it. +/// +/// Centralised so the error annotations carry the offending service name in +/// one place. Subscriber slice lengths are negotiated from the publisher's +/// `initial_max_slice_len` β€” no per-service budget is applied here. +fn open_subscriber( + node: &Node, + name: &str, + subscriber_buffer_size: usize, +) -> Result<(ByteSliceFactory, ByteSliceSubscriber), IpcSetupError> { + let service_name = name + .try_into() + .map_err(|error| IpcSetupError::InvalidServiceName { + name: name.to_string(), + detail: format!("{error}"), + })?; + // `enable_safe_overflow(false)` is load-bearing: iceoryx2 defaults a + // service to safe-overflow *on*, where a full subscriber buffer silently + // evicts the oldest sample β€” which, for the `commands` service, is + // typically a `StartTrace`. With overflow disabled a full buffer instead + // makes the producer's `UnableToDeliverStrategy::Block` take effect, so + // delivery is lossless and in-order regardless of how shallow the buffer + // is. Dropping a lifecycle envelope strands the per-trace actor. + let service = node + .service_builder(&service_name) + .publish_subscribe::<[u8]>() + .enable_safe_overflow(false) + .subscriber_max_buffer_size(subscriber_buffer_size) + .max_publishers(MAX_PUBLISHERS_PER_SERVICE) + .max_subscribers(MAX_SUBSCRIBERS_PER_SERVICE) + .max_nodes(MAX_NODES_PER_SERVICE) + .open_or_create() + .map_err(|error| IpcSetupError::ServiceOpen { + name: name.to_string(), + detail: error.to_string(), + })?; + let subscriber = + service + .subscriber_builder() + .create() + .map_err(|error| IpcSetupError::SubscriberCreate { + name: name.to_string(), + detail: error.to_string(), + })?; + Ok((service, subscriber)) +} + +/// Convenience aliases for the `[u8]` request-response factory + server pair. +type ByteSliceQueryFactory = QueryPortFactory; +type ByteSliceServer = Server; + +/// Open or attach to the `[u8]` request-response `queries` service and build the +/// daemon's single server on it. +/// +/// The SDK opens client ports on the same service (one per OS thread, like the +/// `commands` publisher), so the caps mirror the publisher topology. Requests +/// and responses are both small postcard blobs ([`QUERIES_MAX_PAYLOAD_BYTES`]). +fn open_query_server( + node: &Node, + name: &str, +) -> Result<(ByteSliceQueryFactory, ByteSliceServer), IpcSetupError> { + let service_name = name + .try_into() + .map_err(|error| IpcSetupError::InvalidServiceName { + name: name.to_string(), + detail: format!("{error}"), + })?; + let service = node + .service_builder(&service_name) + .request_response::<[u8], [u8]>() + .max_clients(MAX_QUERY_CLIENTS_PER_SERVICE) + .max_servers(MAX_QUERY_SERVERS_PER_SERVICE) + .max_nodes(MAX_NODES_PER_SERVICE) + .open_or_create() + .map_err(|error| IpcSetupError::ServiceOpen { + name: name.to_string(), + detail: error.to_string(), + })?; + let server = service + .server_builder() + .initial_max_slice_len(QUERIES_MAX_PAYLOAD_BYTES) + .create() + .map_err(|error| IpcSetupError::ServerCreate { + name: name.to_string(), + detail: error.to_string(), + })?; + Ok((service, server)) +} diff --git a/rust/data_daemon/src/lifecycle/daemonize.rs b/rust/data_daemon/src/lifecycle/daemonize.rs new file mode 100644 index 000000000..b3c75fbb3 --- /dev/null +++ b/rust/data_daemon/src/lifecycle/daemonize.rs @@ -0,0 +1,263 @@ +//! Double-fork + `setsid` detachment for `launch --background`. +//! +//! The standard Unix recipe: +//! +//! 1. Parent forks. Parent blocks on a readiness pipe waiting for the +//! grandchild's startup status (see below); the intermediate child +//! continues. +//! 2. Intermediate child calls `setsid` so it becomes the session leader of a +//! new session with no controlling terminal. +//! 3. Intermediate child forks again, then exits β€” orphaning the grandchild +//! to init so it can never reacquire a controlling terminal even on +//! accidental `open(...)` of a TTY. +//! 4. Grandchild closes stdin/stdout/stderr (redirecting to `/dev/null`) and +//! returns to the caller as the long-lived daemon process. +//! +//! ### Startup readiness pipe +//! +//! A pipe is wired between the original caller and the grandchild so that +//! early-startup failures (PID file already held, IO error binding the lock, +//! tracing init failure) reach the user's terminal instead of being lost to +//! the `/dev/null`-redirected stderr. The grandchild must explicitly call +//! [`ReadinessReporter::ready`] or [`ReadinessReporter::fail`]; otherwise the +//! reporter's `Drop` reports an "exited before reporting readiness" message +//! so the launcher always observes some terminal status rather than blocking +//! forever. +//! +//! Foreground mode (no `--background`) is a no-op: the caller stays in the +//! current process group and keeps its terminal. + +use std::fs::{File, OpenOptions}; +use std::io::{self, Read, Write}; +use std::os::unix::io::{AsRawFd, OwnedFd}; + +use nix::sys::stat::{umask, Mode}; +use nix::sys::wait::waitpid; +use nix::unistd::{chdir, dup2, fork, pipe, setsid, ForkResult}; + +/// Outcome of [`daemonize`]. +pub enum DaemonizeOutcome { + /// We are the original caller process. Call [`ReadinessReader::read`] to + /// block until the grandchild reports its startup status, then propagate + /// the result to the user's shell. + Parent(ReadinessReader), + /// We are the long-lived daemon process. Continue with daemon startup + /// and hand off to the reporter once the PID file is acquired (or any + /// earlier step fails) so the launcher unblocks. + Child(ReadinessReporter), +} + +/// Startup status the grandchild reports back to the original caller. +#[derive(Debug)] +pub enum Readiness { + /// Grandchild started successfully; payload is the daemon's PID as a + /// string so the launcher can echo it on stdout for the user. + Ready(String), + /// Grandchild reported a startup failure; payload is the human-readable + /// reason to surface on stderr. + Failed(String), + /// The pipe closed without any message β€” typically the grandchild was + /// signalled or aborted before it could report. Treated as failure. + Disconnected, +} + +/// Read end of the readiness pipe, held by the original caller. +pub struct ReadinessReader { + pipe: OwnedFd, +} + +impl ReadinessReader { + /// Block until the grandchild closes its end of the pipe, then classify + /// the message it sent. + pub fn read(self) -> io::Result { + let mut file = File::from(self.pipe); + let mut buffer = String::new(); + file.read_to_string(&mut buffer)?; + let trimmed = buffer.trim_end_matches('\n'); + if trimmed.is_empty() { + return Ok(Readiness::Disconnected); + } + if let Some(pid) = trimmed.strip_prefix("OK ") { + return Ok(Readiness::Ready(pid.to_string())); + } + if let Some(message) = trimmed.strip_prefix("ERR ") { + return Ok(Readiness::Failed(message.to_string())); + } + // Unrecognised payload β€” surface the raw text so the user has + // something to act on. + Ok(Readiness::Failed(trimmed.to_string())) + } +} + +/// Write end of the readiness pipe, held by the grandchild. +/// +/// `ready` and `fail` are consuming methods: sending a status closes the +/// underlying FD, which is what unblocks the original caller's +/// [`ReadinessReader::read`]. +pub struct ReadinessReporter { + pipe: Option, +} + +impl ReadinessReporter { + /// Report a successful startup, passing the daemon's PID. Tolerates a + /// closed pipe (caller may have died) so the daemon does not crash on + /// `EPIPE` after the launcher has been killed. + pub fn ready(mut self, pid: u32) -> io::Result<()> { + write_status(self.pipe.take(), &format!("OK {pid}")) + } + + /// Report a startup failure. Same broken-pipe tolerance as + /// [`ready`](Self::ready). + pub fn fail(mut self, message: impl AsRef) -> io::Result<()> { + write_status(self.pipe.take(), &format!("ERR {}", message.as_ref())) + } +} + +impl Drop for ReadinessReporter { + fn drop(&mut self) { + // Best-effort fallback so the launcher gets a deterministic failure + // message instead of a blank `Disconnected` when the grandchild + // aborts between fork and the explicit ready/fail call. + let _ = write_status( + self.pipe.take(), + "ERR daemon exited before reporting readiness", + ); + } +} + +fn write_status(pipe: Option, line: &str) -> io::Result<()> { + let Some(pipe) = pipe else { return Ok(()) }; + let mut file = File::from(pipe); + match writeln!(file, "{line}") { + Ok(()) => Ok(()), + Err(error) if error.kind() == io::ErrorKind::BrokenPipe => Ok(()), + Err(error) => Err(error), + } +} + +/// Detach the current process into a background daemon using the standard +/// double-fork + `setsid` recipe. +/// +/// # Safety +/// +/// `fork` after threads have been spawned is undefined behaviour. The caller +/// must invoke this *before* creating the Tokio runtime or spawning any +/// threads β€” `cli::launch::run` enforces that by daemonising at the top of the +/// command handler. +pub fn daemonize() -> io::Result { + let (read_fd, write_fd) = pipe().map_err(io::Error::from)?; + + // First fork: parent waits on the pipe for the grandchild's status. + // SAFETY: caller must not have spawned any threads yet; see fn docs. + match unsafe { fork() }.map_err(io::Error::from)? { + ForkResult::Parent { child } => { + // The parent never writes to the pipe; drop its write end so the + // only remaining writers are the intermediate child and the + // grandchild. Once both close, `read_to_string` returns. + drop(write_fd); + // Reap the intermediate child so it does not become a zombie. It + // exits immediately after the second fork below. + let _ = waitpid(child, None); + return Ok(DaemonizeOutcome::Parent(ReadinessReader { pipe: read_fd })); + } + ForkResult::Child => { + // Intermediate child does not need to read from the pipe. + drop(read_fd); + } + } + + // From here we are inside the intermediate child. Its stderr is still + // attached to the user's TTY, so we must not return Err β€” anyhow would + // print a stray error in addition to whatever the original launcher + // prints. Instead, push any failure down the readiness pipe (which the + // launcher already reads) and `exit(1)`. + match run_intermediate_child(write_fd) { + Ok(outcome) => Ok(outcome), + Err(IntermediateExit) => std::process::exit(1), + } +} + +/// Marker returned when the intermediate child failed and has already +/// reported the failure via the readiness pipe. +struct IntermediateExit; + +/// Runs the post-first-fork steps in the intermediate child. On any error, +/// writes an `ERR ...` line down the readiness pipe and returns +/// [`IntermediateExit`] so the caller can `process::exit(1)` β€” keeping all +/// failure output going through the launcher's already-attached pipe rather +/// than the intermediate child's still-live stderr. +fn run_intermediate_child(write_fd: OwnedFd) -> Result { + // `write_status` consumes the OwnedFd, so keep the original around for + // the success path and only build a fresh File handle on error via + // `try_clone`. + let report_failure = |error: &dyn std::fmt::Display, stage: &str| -> IntermediateExit { + // Best-effort: if `try_clone` fails we still exit β€” Drop on + // `write_fd` will close the pipe and the launcher will surface a + // `Disconnected` status. + if let Ok(clone) = write_fd.try_clone() { + let _ = write_status(Some(clone), &format!("ERR {stage}: {error}")); + } + IntermediateExit + }; + + // Become a session leader so we have no controlling terminal. + if let Err(error) = setsid() { + return Err(report_failure(&error, "setsid failed")); + } + + // Second fork: ensures the daemon is not a session leader, so it can never + // reacquire a controlling terminal. + // SAFETY: only this child is running at this point β€” still single-threaded. + let fork_result = match unsafe { fork() } { + Ok(result) => result, + Err(error) => return Err(report_failure(&error, "second fork failed")), + }; + match fork_result { + ForkResult::Parent { .. } => { + // The grandchild owns its own inherited copy of `write_fd`. We + // drop ours here so the original parent's `read_to_string` only + // waits for the grandchild's eventual write β€” otherwise our + // copy would keep the pipe open until this scope's `exit`. + drop(write_fd); + // Intermediate child exits immediately so the grandchild is + // re-parented to PID 1 (init). + std::process::exit(0); + } + ForkResult::Child => {} + } + + // Standard double-fork hygiene: detach from the launcher's CWD (so it + // can be unmounted) and reset umask to a predictable value so files we + // create later have stable permissions regardless of the launching + // shell's environment. + if let Err(error) = chdir("/") { + return Err(report_failure(&error, "chdir(\"/\") failed")); + } + umask(Mode::empty()); + + if let Err(error) = redirect_standard_streams_to_devnull() { + return Err(report_failure(&error, "redirect std streams failed")); + } + + Ok(DaemonizeOutcome::Child(ReadinessReporter { + pipe: Some(write_fd), + })) +} + +fn redirect_standard_streams_to_devnull() -> io::Result<()> { + let devnull = OpenOptions::new() + .read(true) + .write(true) + .open("/dev/null")?; + let fd = devnull.as_raw_fd(); + // stdin + dup2(fd, 0).map_err(io::Error::from)?; + // stdout + dup2(fd, 1).map_err(io::Error::from)?; + // stderr β€” direct stderr writes (panics, libc messages) are lost after + // this point. `cli::launch::run_daemon` initialises tracing against a + // log file *after* this redirect for background mode, so structured + // logs survive. + dup2(fd, 2).map_err(io::Error::from)?; + Ok(()) +} diff --git a/rust/data_daemon/src/lifecycle/mod.rs b/rust/data_daemon/src/lifecycle/mod.rs new file mode 100644 index 000000000..df83c834b --- /dev/null +++ b/rust/data_daemon/src/lifecycle/mod.rs @@ -0,0 +1,7 @@ +//! Daemon lifecycle: PID file management, daemonization, signal handling, and +//! startup recovery from a previous unclean exit. + +pub mod daemonize; +pub mod pidfile; +pub mod recovery; +pub mod shutdown; diff --git a/rust/data_daemon/src/lifecycle/pidfile.rs b/rust/data_daemon/src/lifecycle/pidfile.rs new file mode 100644 index 000000000..534f25d70 --- /dev/null +++ b/rust/data_daemon/src/lifecycle/pidfile.rs @@ -0,0 +1,253 @@ +//! Single-instance enforcement via an `flock`'d PID file. +//! +//! The launching process opens the PID file with `O_CREAT|O_RDWR`, takes a +//! non-blocking exclusive `flock`, writes its own PID, and keeps the file +//! descriptor open for the rest of the daemon's life. When the [`PidFile`] +//! value is dropped β€” either explicitly on graceful shutdown or implicitly on +//! process exit β€” the kernel releases the lock. The file itself is left in +//! place (unlinking an `flock`'d file races with a concurrent launcher); the +//! next launcher reuses it under the lock. +//! +//! The `flock` gives atomic single-instance semantics across crash, SIGKILL, +//! and parallel launches: a stale PID file from a SIGKILL'd daemon has no +//! active flock holder, so the next launcher's `flock` immediately succeeds +//! and the launcher overwrites the contents with its own PID. + +use std::fs::{File, OpenOptions}; +use std::io::{self, Read, Seek, SeekFrom, Write}; +use std::path::{Path, PathBuf}; + +use nix::fcntl::{Flock, FlockArg}; +use nix::unistd::Pid; +use thiserror::Error; + +/// Errors raised while acquiring or reading a PID file. +#[derive(Debug, Error)] +pub enum PidFileError { + /// Another daemon already holds the PID file's flock. Carries the PID we + /// found on disk (when readable) for the user-facing error message. + #[error("Daemon already running (pid={0})")] + AlreadyRunning(i32), + /// An I/O or `flock` failure prevented us from acquiring the file. + #[error(transparent)] + Io(#[from] io::Error), +} + +/// An exclusively `flock`'d PID file that releases the lock on drop. The file +/// is left in place (see [`PidFile::release`]); the next launcher reuses it. +pub struct PidFile { + lock: Option>, +} + +impl PidFile { + /// Acquire the PID file at `path`, writing the current process's PID into + /// it. Returns an [`PidFileError::AlreadyRunning`] if another daemon + /// already holds the lock. + /// + /// Parent directories are created if missing. + pub fn acquire(path: impl Into) -> Result { + let path = path.into(); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + + let file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(&path)?; + + let mut lock = match Flock::lock(file, FlockArg::LockExclusiveNonblock) { + Ok(lock) => lock, + Err((file, nix::errno::Errno::EWOULDBLOCK)) => { + let pid = read_pid_from_open_file(&file).unwrap_or(-1); + return Err(PidFileError::AlreadyRunning(pid)); + } + Err((_, err)) => return Err(io::Error::from(err).into()), + }; + + // The previous holder may have left a PID written from before its + // exit; truncate before writing ours so a partial read sees a clean + // value. `set_len(0)` does not move the file offset, so the explicit + // seek to byte 0 is what guarantees we overwrite from the start. + lock.set_len(0)?; + lock.seek(SeekFrom::Start(0))?; + writeln!(lock, "{}", Pid::this().as_raw())?; + lock.flush()?; + + Ok(PidFile { lock: Some(lock) }) + } + + /// Release the flock, relinquishing single-instance ownership. Idempotent. + /// + /// Deliberately does **not** unlink the file. Unlinking an `flock`'d PID + /// file races in *both* orderings: unlink-then-drop lets the exiting process + /// briefly co-hold with a starting one, and drop-then-unlink is worse β€” a + /// new launcher can grab the lock on the existing inode in the gap, after + /// which our `remove_file` deletes *its* live PID file, letting a third + /// launcher create a fresh inode and run a second daemon. The flock alone is + /// the single-instance authority, so we just release it and leave the file + /// for the next launcher to reuse in place β€” exactly the path `acquire` + /// already takes after a SIGKILL (lock, truncate, rewrite). + pub fn release(&mut self) { + if let Some(lock) = self.lock.take() { + drop(lock); + } + } +} + +impl Drop for PidFile { + fn drop(&mut self) { + self.release(); + } +} + +/// Read an integer PID from a file at `path`, returning `None` when the file +/// is missing, empty, or contains a non-integer or non-positive value. +/// +/// Mirrors `daemon_os_control.read_pid_from_file`. +pub fn read_pid_from_file(path: &Path) -> Option { + let text = std::fs::read_to_string(path).ok()?; + let trimmed = text.trim(); + if trimmed.is_empty() { + return None; + } + let pid = trimmed.parse::().ok()?; + if pid > 0 { + Some(pid) + } else { + None + } +} + +fn read_pid_from_open_file(file: &File) -> Option { + let mut clone = file.try_clone().ok()?; + clone.seek(SeekFrom::Start(0)).ok()?; + let mut buffer = String::new(); + clone.read_to_string(&mut buffer).ok()?; + let trimmed = buffer.trim(); + if trimmed.is_empty() { + return None; + } + trimmed.parse::().ok().filter(|pid| *pid > 0) +} + +/// Return `true` when `pid` is a live, non-zombie process the current user can +/// signal. +/// +/// `kill(pid, 0)` probes existence, and (on Linux) `/proc//stat` is +/// consulted to exclude zombies. On non-Linux targets the zombie filter is a +/// no-op β€” the daemon is Linux-first. +pub fn pid_is_running(pid: i32) -> bool { + match nix::sys::signal::kill(Pid::from_raw(pid), None) { + Ok(()) => !is_zombie(pid), + Err(nix::errno::Errno::EPERM) => true, + Err(_) => false, + } +} + +#[cfg(target_os = "linux")] +fn is_zombie(pid: i32) -> bool { + let stat_path = PathBuf::from(format!("/proc/{pid}/stat")); + let Ok(contents) = std::fs::read_to_string(stat_path) else { + return false; + }; + // Field 3 of /proc//stat is the process state, immediately after the + // closing paren of the comm field. + let Some(after_comm) = contents.rsplit(')').next() else { + return false; + }; + after_comm.split_whitespace().next() == Some("Z") +} + +#[cfg(not(target_os = "linux"))] +fn is_zombie(_pid: i32) -> bool { + false +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn acquire_writes_current_pid_and_release_frees_the_lock() { + let dir = tempdir().expect("tempdir"); + let path = dir.path().join("daemon.pid"); + + let mut pid_file = PidFile::acquire(&path).expect("acquire"); + assert!(path.exists()); + assert_eq!(read_pid_from_file(&path), Some(std::process::id() as i32)); + + pid_file.release(); + // The file is intentionally left in place (unlinking an flock'd file + // races a concurrent launcher); releasing only frees the lock, so a + // fresh acquire on the same path must succeed. + assert!( + path.exists(), + "release leaves the pid file for in-place reuse" + ); + let reacquired = PidFile::acquire(&path).expect("re-acquire after release"); + drop(reacquired); + } + + #[test] + fn second_acquire_in_same_process_fails_with_already_running() { + let dir = tempdir().expect("tempdir"); + let path = dir.path().join("daemon.pid"); + + let _guard = PidFile::acquire(&path).expect("first acquire"); + match PidFile::acquire(&path) { + Err(PidFileError::AlreadyRunning(pid)) => { + assert_eq!(pid, std::process::id() as i32); + } + Err(other) => panic!("expected AlreadyRunning, got error: {other}"), + Ok(_) => panic!("expected AlreadyRunning, got Ok"), + } + } + + #[test] + fn acquire_after_release_succeeds_and_overwrites_pid() { + let dir = tempdir().expect("tempdir"); + let path = dir.path().join("daemon.pid"); + + { + let _guard = PidFile::acquire(&path).expect("first acquire"); + } + // release() leaves the file in place (only the flock is dropped). Seed a + // stale PID to prove acquire re-locks and overwrites an existing file. + std::fs::write(&path, "99999\n").expect("seed stale pid"); + let pid_file = PidFile::acquire(&path).expect("acquire after stale"); + assert_eq!(read_pid_from_file(&path), Some(std::process::id() as i32)); + drop(pid_file); + } + + #[test] + fn read_pid_from_file_handles_missing_and_garbage() { + let dir = tempdir().expect("tempdir"); + let path = dir.path().join("daemon.pid"); + + assert_eq!(read_pid_from_file(&path), None); + std::fs::write(&path, "").unwrap(); + assert_eq!(read_pid_from_file(&path), None); + std::fs::write(&path, "abc\n").unwrap(); + assert_eq!(read_pid_from_file(&path), None); + std::fs::write(&path, "0\n").unwrap(); + assert_eq!(read_pid_from_file(&path), None); + std::fs::write(&path, " 4321 ").unwrap(); + assert_eq!(read_pid_from_file(&path), Some(4321)); + } + + #[test] + fn pid_is_running_true_for_self_and_false_for_unused_pid() { + let our_pid = std::process::id() as i32; + assert!(pid_is_running(our_pid)); + // Use a PID guaranteed to be above the kernel's `pid_max` (Linux's + // hard cap is 2^22 β‰ˆ 4M, well below `i32::MAX` β‰ˆ 2.1B) so the + // probe is guaranteed to refer to no process. `read_pid_from_file` + // already rejects 0/negative values, so we do not need to guard + // against `kill(0, 0)`'s broadcast-to-process-group semantics here. + assert!(!pid_is_running(i32::MAX)); + } +} diff --git a/rust/data_daemon/src/lifecycle/recovery.rs b/rust/data_daemon/src/lifecycle/recovery.rs new file mode 100644 index 000000000..f5c84c9b2 --- /dev/null +++ b/rust/data_daemon/src/lifecycle/recovery.rs @@ -0,0 +1,429 @@ +//! Startup recovery from a previous unclean exit. +//! +//! After SIGKILL or a host crash, on-disk artefacts from the previous daemon +//! run can be left behind: a stale PID file containing a PID that is no longer +//! running, partially-written recordings, and iceoryx2 dead-node files. This +//! module exposes the small surface needed by +//! `cli::launch` to bring the host into a consistent state before the new +//! daemon starts. + +use std::path::Path; + +use chrono::Utc; +use iceoryx2::config::Config; +use iceoryx2::node::Node; +use iceoryx2::prelude::ipc; + +use crate::lifecycle::pidfile::{pid_is_running, read_pid_from_file}; +use crate::state::{SqliteStateStore, StateStore, StateStoreError, TraceWriteStatus}; + +/// `last_updated` age (in seconds) below which a `writing` / +/// `pending_metadata` / `initializing` trace row is left alone by the +/// startup-time sweep. Comfortably larger than the trace_actor's debounce +/// flush interval, so a row a current daemon has just begun writing isn't +/// caught. +const STALE_WRITE_THRESHOLD_SECS: i64 = 30; + +/// Run the startup recovery sweeps over the state store. +/// +/// Re-arms rows stuck in transient pipeline states, burns trace rows left +/// mid-write, and purges partial recordings left behind by a previous daemon +/// run. Each sweep logs its outcome and is best-effort: a failure is logged +/// and startup continues. +pub async fn run_startup_sweeps(store: &SqliteStateStore, recordings_root: &Path) { + // The claim/drain queries that drive the coordinators only scan + // terminal-or-pending rows, so re-arming the transient `registering` / + // `uploading` rows is what stops a SIGKILL mid-upload from leaking traces. + match store.reset_stale_pipeline_states().await { + Ok(0) => {} + Ok(count) => tracing::info!(count, "re-armed stale pipeline rows from prior run"), + Err(error) => { + tracing::warn!(%error, "failed to reset stale pipeline states (continuing)") + } + } + // Burn trace rows the previous daemon left mid-write. + // Those rows can never reach `written` (the actor that owned them + // is gone) and would otherwise pin their recording in the + // "all traces written" gate the progress reporter waits on. The + // 30 s threshold gives a future daemon launched on top of an + // earlier orderly-shutdown's tail a chance to recover; in + // practice every row caught by this sweep is hours stale. + match store + .mark_stale_writing_traces_failed(STALE_WRITE_THRESHOLD_SECS) + .await + { + Ok(0) => {} + Ok(count) => tracing::info!(count, "marked stale writing traces as failed"), + Err(error) => { + tracing::warn!(%error, "failed to mark stale writing traces failed (continuing)") + } + } + // Purge any recording the prior daemon left mid-write. Producer-side + // chunk spooling means we may have stranded NUT chunks, half-encoded + // segments, and partial concat outputs on disk; mid-encode resume is + // intentionally out of scope so anything not in the `written` + // terminal state at startup is removed and the recording marked + // cancelled. + match sweep_partial_recordings(store, recordings_root).await { + Ok(report) if report == Default::default() => {} + Ok(report) => tracing::info!( + purged = report.recordings_purged, + preserved = report.recordings_preserved, + "partial-recording sweep completed", + ), + Err(error) => { + tracing::warn!(%error, "partial-recording sweep failed (continuing)") + } + } +} + +/// Outcome of [`reclaim_stale_pid_file`], surfaced for logging. +#[derive(Debug, PartialEq, Eq)] +pub enum PidReclaim { + /// No PID file was present. + Absent, + /// A PID file was present and its PID is still alive β€” the next acquire + /// attempt will (correctly) report "already running". + StillRunning(i32), + /// A stale PID file (PID dead or unparseable) was removed. + RemovedStale(Option), +} + +/// Remove a PID file left by a previous SIGKILL'd daemon when its PID is no +/// longer running. +/// +/// The new launcher's `PidFile::acquire` would itself recover via `flock` +/// alone, but eagerly clearing a stale file makes the `status` command and +/// concurrent diagnostics report accurate state instead of a misleading +/// "daemon running (pid=…)" pointed at a dead PID. +pub fn reclaim_stale_pid_file(pid_path: &Path) -> std::io::Result { + if !pid_path.exists() { + return Ok(PidReclaim::Absent); + } + + let pid = read_pid_from_file(pid_path); + if let Some(pid_value) = pid { + if pid_is_running(pid_value) { + return Ok(PidReclaim::StillRunning(pid_value)); + } + } + + match std::fs::remove_file(pid_path) { + Ok(()) => Ok(PidReclaim::RemovedStale(pid)), + Err(error) if error.kind() == std::io::ErrorKind::NotFound => Ok(PidReclaim::Absent), + Err(error) => Err(error), + } +} + +/// Outcome counters for [`sweep_partial_recordings`], surfaced for logging. +#[derive(Debug, Default, PartialEq, Eq)] +pub struct PartialSweepReport { + /// Number of recordings whose on-disk artefacts were removed because at + /// least one trace had not reached the `written` terminal state. + pub recordings_purged: usize, + /// Number of recordings inspected and left untouched (either every trace + /// was `written`, or the recording was already cancelled and has no + /// further state to clean up). + pub recordings_preserved: usize, +} + +/// Sweep partial recordings left behind by a previous daemon run. +/// +/// Producer-side chunk spooling means a SIGKILL between two +/// `VideoChunkReady` envelopes can leave the recording with on-disk +/// artefacts (NUT chunks, half-encoded segments, partial concat outputs) +/// that no current actor will pick up. Mid-encode resume is intentionally +/// out of scope β€” keeping the lifecycle simple is the point of the +/// per-chunk design β€” so anything not in the `written` terminal state at +/// startup is purged. +/// +/// For each recording: +/// - Already-cancelled recordings are skipped; the dispatcher's cancel +/// handler removed their on-disk state when the cancel originally fired. +/// - Recordings where every trace is `written` are left alone β€” the upload +/// path picks them up via the existing `TraceWritten` / pending-upload +/// gate. +/// - Anything else: the recording's directory is recursively removed and +/// the recording row is `cancel_recording`'d so the registration / +/// upload / progress coordinators ignore it (and so the in-flight trace +/// rows are burned to terminal `failed`). +async fn sweep_partial_recordings( + store: &SqliteStateStore, + recordings_root: &Path, +) -> Result { + let mut report = PartialSweepReport::default(); + // Reclaim the producer video spool up front: any recording in flight at + // restart is corrupt, so the spooled NUT chunks staged under the spool dir + // are reclaimed wholesale rather than resumed. `tokio::fs` keeps the + // possibly-large tree removal off the runtime worker. A failure here is worth + // surfacing β€” a surviving spool can let a stale chunk relink into the next + // recording (cf. the `video_chunk_spans_recording` history). + let spool_root = crate::storage::paths::spool_root(recordings_root); + match tokio::fs::remove_dir_all(&spool_root).await { + Ok(()) => {} + Err(error) if error.kind() == std::io::ErrorKind::NotFound => {} + Err(error) => { + tracing::warn!( + %error, + path = %spool_root.display(), + "failed to purge producer video spool at recovery; stale chunks may relink" + ); + } + } + let recordings = store.list_recordings().await?; + for recording in recordings { + if recording.cancelled_at.is_some() { + // The original dispatcher cancel handler removed the on-disk + // state when the cancel fired; if anything is left behind on + // disk it was the cancel handler that failed, not a partial + // write β€” and re-cancelling would be a no-op. Leave it. + report.recordings_preserved += 1; + continue; + } + let traces = store + .list_traces_for_recording(recording.recording_index) + .await?; + let any_non_written = traces + .iter() + .any(|trace| trace.write_status != TraceWriteStatus::Written); + // Only a recording whose every trace finished writing is preserved. A + // recording with no trace rows yet (created by `/recording/start` but + // killed before the producer wrote anything) fails the `!is_empty()` + // guard and is treated as partial β€” purged below. + if !any_non_written && !traces.is_empty() { + report.recordings_preserved += 1; + continue; + } + + let dir = recordings_root.join(recording.recording_index.to_string()); + match tokio::fs::remove_dir_all(&dir).await { + Ok(()) => {} + Err(error) if error.kind() == std::io::ErrorKind::NotFound => {} + Err(error) => { + tracing::warn!( + %error, + recording_index = recording.recording_index, + path = %dir.display(), + "failed to purge partial recording directory; continuing" + ); + } + } + + // Recovery has no producer cancel timestamp; use the recovery wall + // clock as the discarded recording's stop time (β†’ backend end_time). + let cancel_stop_ns = Utc::now().timestamp_nanos_opt().unwrap_or_default(); + if let Err(error) = store + .cancel_recording(recording.recording_index, cancel_stop_ns) + .await + { + tracing::warn!( + %error, + recording_index = recording.recording_index, + "failed to mark partial recording cancelled in state store; continuing" + ); + } + + report.recordings_purged += 1; + } + Ok(report) +} + +/// Reap stale iceoryx2 node files left by a SIGKILL'd daemon. +/// +/// After SIGKILL, iceoryx2's per-node discovery files survive on the +/// filesystem (typically `/tmp/iceoryx2/...`) and prevent a fresh daemon from +/// cleanly attaching to its own services if the OS reuses the killed PID. +/// `Node::cleanup_dead_nodes` walks the global discovery registry, classifies +/// each entry, and removes the artefacts of nodes whose owning process is +/// gone. +/// +/// Returns the number of dead nodes successfully reclaimed. The call itself +/// is infallible from our perspective β€” per-artefact failures are logged here +/// (they typically indicate the current process lacks permission to touch +/// another user's resources, which is expected when iceoryx2 is shared +/// system-wide) and never block daemon startup. +/// +/// `NodeBuilder::create` *also* sweeps dead nodes on construction (controlled +/// by `cleanup_dead_nodes_on_creation`), but doing it eagerly here keeps the +/// `status` command's view of the system consistent before the new daemon +/// races to create its own node. +pub fn cleanup_stale_ipc() -> usize { + let report = Node::::cleanup_dead_nodes(Config::global_config()); + if report.failed_cleanups > 0 { + tracing::warn!( + failed = report.failed_cleanups, + "iceoryx2 dead-node sweep left {} artefacts behind (likely permission-denied; continuing)", + report.failed_cleanups + ); + } + report.cleanups +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn reclaim_returns_absent_when_no_pid_file() { + let dir = tempdir().unwrap(); + let path = dir.path().join("daemon.pid"); + assert_eq!(reclaim_stale_pid_file(&path).unwrap(), PidReclaim::Absent); + } + + #[test] + fn reclaim_removes_stale_pid_file_with_dead_pid() { + let dir = tempdir().unwrap(); + let path = dir.path().join("daemon.pid"); + // `i32::MAX` is always above the kernel's `pid_max` (default 32768 + // on most distros, 2^22 on tuned hosts) and so is guaranteed not to + // refer to a running process. Mirrors the trick used by + // `pid_is_running_true_for_self_and_false_for_unused_pid` in + // `pidfile::tests`. + std::fs::write(&path, format!("{}\n", i32::MAX)).unwrap(); + let outcome = reclaim_stale_pid_file(&path).unwrap(); + assert_eq!(outcome, PidReclaim::RemovedStale(Some(i32::MAX))); + assert!(!path.exists()); + } + + #[test] + fn reclaim_removes_stale_pid_file_with_garbage_contents() { + let dir = tempdir().unwrap(); + let path = dir.path().join("daemon.pid"); + std::fs::write(&path, "not-a-pid\n").unwrap(); + let outcome = reclaim_stale_pid_file(&path).unwrap(); + assert_eq!(outcome, PidReclaim::RemovedStale(None)); + assert!(!path.exists()); + } + + #[test] + fn reclaim_leaves_running_pid_file_in_place() { + let dir = tempdir().unwrap(); + let path = dir.path().join("daemon.pid"); + let our_pid = std::process::id() as i32; + std::fs::write(&path, format!("{our_pid}\n")).unwrap(); + let outcome = reclaim_stale_pid_file(&path).unwrap(); + assert_eq!(outcome, PidReclaim::StillRunning(our_pid)); + assert!(path.exists()); + } + + use crate::state::store::TraceUpdate; + use crate::state::NewRecording; + use crate::storage::paths::TracePath; + + #[tokio::test] + async fn partial_recording_is_removed_on_startup() { + let dir = tempdir().unwrap(); + let store = SqliteStateStore::open(&dir.path().join("state.db")) + .await + .expect("open store"); + let recordings_root = dir.path().join("recordings"); + + let recording_index = store + .create_recording(NewRecording { + robot_id: Some("robot-1"), + robot_instance: Some(0), + ..Default::default() + }) + .await + .unwrap() + .recording_index; + store + .create_trace(recording_index, "trace-1", Some("RGB"), None) + .await + .unwrap(); + store + .update_trace( + "trace-1", + TraceUpdate { + write_status: Some(TraceWriteStatus::Writing), + ..Default::default() + }, + ) + .await + .unwrap(); + + // Synthesise leftover on-disk state the previous daemon would have + // produced β€” a chunks/ dir and a half-encoded segment. + let trace_dir = TracePath::new(recording_index.to_string(), "RGB", "trace-1") + .directory(&recordings_root); + let chunks_dir = trace_dir.join("chunks"); + std::fs::create_dir_all(&chunks_dir).unwrap(); + std::fs::write(chunks_dir.join("chunk_0000.nut"), b"stale-bytes").unwrap(); + std::fs::write(trace_dir.join("chunk_0000_lossy.mp4"), b"halfway").unwrap(); + + let report = sweep_partial_recordings(&store, &recordings_root) + .await + .expect("sweep"); + assert_eq!(report.recordings_purged, 1); + assert_eq!(report.recordings_preserved, 0); + assert!(!recordings_root.join(recording_index.to_string()).exists()); + + let recording = store.get_recording(recording_index).await.unwrap().unwrap(); + assert!(recording.cancelled_at.is_some()); + } + + #[tokio::test] + async fn completed_recording_is_preserved_for_upload() { + let dir = tempdir().unwrap(); + let store = SqliteStateStore::open(&dir.path().join("state.db")) + .await + .expect("open store"); + let recordings_root = dir.path().join("recordings"); + + let recording_index = store + .create_recording(NewRecording { + robot_id: Some("robot-2"), + robot_instance: Some(0), + ..Default::default() + }) + .await + .unwrap() + .recording_index; + store + .create_trace(recording_index, "trace-2", Some("RGB"), None) + .await + .unwrap(); + store + .update_trace( + "trace-2", + TraceUpdate { + write_status: Some(TraceWriteStatus::Written), + total_bytes: Some(1024), + ..Default::default() + }, + ) + .await + .unwrap(); + + let trace_dir = TracePath::new(recording_index.to_string(), "RGB", "trace-2") + .directory(&recordings_root); + std::fs::create_dir_all(&trace_dir).unwrap(); + std::fs::write(trace_dir.join("lossy.mp4"), b"keep-me").unwrap(); + std::fs::write(trace_dir.join("lossless.mp4"), b"keep-me-too").unwrap(); + + let report = sweep_partial_recordings(&store, &recordings_root) + .await + .expect("sweep"); + assert_eq!(report.recordings_purged, 0); + assert_eq!(report.recordings_preserved, 1); + assert!(trace_dir.join("lossy.mp4").exists()); + assert!(trace_dir.join("lossless.mp4").exists()); + + let recording = store.get_recording(recording_index).await.unwrap().unwrap(); + assert!(recording.cancelled_at.is_none()); + } + + #[test] + fn cleanup_stale_ipc_is_safe_on_a_clean_host() { + // Smoke test: the call must return even when there are no dead + // nodes to reclaim. The real reclamation path is exercised by the + // end-to-end signal-cleanup integration test; reproducing a SIGKILL'd + // iceoryx2 node from inside a cargo test would require spawning a + // child binary, which is out of scope here. + // + // We can't assert the exact count because a parallel cargo test + // process could be creating nodes; we just check the call returned. + let _ = cleanup_stale_ipc(); + } +} diff --git a/rust/data_daemon/src/lifecycle/shutdown.rs b/rust/data_daemon/src/lifecycle/shutdown.rs new file mode 100644 index 000000000..41e5ddcca --- /dev/null +++ b/rust/data_daemon/src/lifecycle/shutdown.rs @@ -0,0 +1,110 @@ +//! Async SIGTERM / SIGINT handling, fanned out to subscribers over a +//! `tokio::sync::broadcast`. +//! +//! Both signals trigger a graceful shutdown: the broadcast channel is the +//! notification the daemon's main loop awaits. SIGHUP is intentionally not +//! handled. + +use tokio::signal::unix::{signal, SignalKind}; +use tokio::sync::broadcast; + +/// Source of a graceful-shutdown notification, useful for log messages. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ShutdownSignal { + /// `SIGTERM` (default `kill` signal, CLI `stop` command). + Sigterm, + /// `SIGINT` (Ctrl-C from a controlling terminal). + Sigint, +} + +/// Broadcasts the shutdown notification to subscribers: call +/// [`subscribe`](Self::subscribe) for each task that needs to wait for shutdown. +#[derive(Clone)] +pub struct ShutdownBroadcaster { + sender: broadcast::Sender, +} + +impl ShutdownBroadcaster { + /// Subscribe to receive a single shutdown notification. + pub fn subscribe(&self) -> broadcast::Receiver { + self.sender.subscribe() + } + + /// Fire an explicit shutdown (used by `SystemExit`-style flows and + /// tests). Returns the number of receivers notified. + #[allow(dead_code)] + pub fn signal(&self, kind: ShutdownSignal) -> usize { + self.sender.send(kind).unwrap_or(0) + } +} + +/// Install async SIGTERM and SIGINT handlers, returning a +/// [`ShutdownBroadcaster`] and the *primary* shutdown receiver that the caller +/// must await. +/// +/// Returning the primary receiver alongside the handle closes a race that +/// would otherwise exist between the supervisor task's first `send` and the +/// caller's first `subscribe()`: `broadcast::Sender::send` returns +/// `SendError` when there are zero receivers, and `broadcast` does not +/// replay messages for receivers that subscribe later. By constructing the +/// primary receiver up-front via `broadcast::channel`, we guarantee at least +/// one receiver exists from the moment the supervisor task starts. +pub fn install_shutdown_handler( +) -> std::io::Result<(ShutdownBroadcaster, broadcast::Receiver)> { + let (sender, primary_receiver) = broadcast::channel(8); + let supervisor_sender = sender.clone(); + + let mut sigterm = signal(SignalKind::terminate())?; + let mut sigint = signal(SignalKind::interrupt())?; + + tokio::spawn(async move { + loop { + let received = tokio::select! { + Some(()) = sigterm.recv() => ShutdownSignal::Sigterm, + Some(()) = sigint.recv() => ShutdownSignal::Sigint, + else => return, + }; + tracing::info!(signal = ?received, "shutdown signal received"); + // The primary receiver returned from this function keeps the + // channel populated with at least one receiver; further sends + // only fail if every subscriber has been dropped (typically + // during shutdown), which we ignore. + let _ = supervisor_sender.send(received); + } + }); + + Ok((ShutdownBroadcaster { sender }, primary_receiver)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn explicit_signal_reaches_subscriber() { + // Construct the channel directly (we cannot install signal handlers + // in tests because they're a process-global resource) and exercise + // the `signal` / `subscribe` plumbing the supervisor task relies on. + let (sender, primary_receiver) = broadcast::channel(8); + let handle = ShutdownBroadcaster { sender }; + // Drop the primary so we can test that the explicit subscribe path + // also works for additional listeners. + drop(primary_receiver); + let mut subscriber = handle.subscribe(); + + let notified = handle.signal(ShutdownSignal::Sigterm); + assert_eq!(notified, 1); + let received = subscriber.recv().await.expect("recv"); + assert_eq!(received, ShutdownSignal::Sigterm); + } + + #[tokio::test] + async fn signal_with_no_subscribers_returns_zero() { + let (sender, primary_receiver) = broadcast::channel(8); + let handle = ShutdownBroadcaster { sender }; + drop(primary_receiver); + // No live receivers β€” `send` returns Err, surfaced as `0` from our + // `signal` wrapper. + assert_eq!(handle.signal(ShutdownSignal::Sigint), 0); + } +} diff --git a/rust/data_daemon/src/main.rs b/rust/data_daemon/src/main.rs new file mode 100644 index 000000000..6d69d6173 --- /dev/null +++ b/rust/data_daemon/src/main.rs @@ -0,0 +1,33 @@ +//! Neuracore data daemon. +//! +//! Entry point: parses the CLI synchronously and dispatches to a command +//! handler. The handler decides whether to spin up the Tokio runtime β€” most +//! subcommands (`status`, `stop`, `profile *`) do not need it, and the +//! `launch --background` path must `fork` *before* a multi-threaded runtime is +//! created (forking after spawning worker threads is undefined behaviour). + +// The cloud subsystem and HTTP client expose a wide surface that the daemon +// binary consumes via the launch routine β€” `cargo check` would otherwise +// flag the not-yet-reachable items as dead code. +#[allow(dead_code)] +mod api; +mod cli; +#[allow(dead_code)] +mod cloud; +pub use data_daemon_shared::config; +#[allow(dead_code)] +mod connection; +mod encoding; +#[allow(dead_code)] +mod intervals; +mod ipc; +mod lifecycle; +mod pipeline; +mod state; +mod storage; + +use anyhow::Result; + +fn main() -> Result<()> { + cli::run() +} diff --git a/rust/data_daemon/src/pipeline/dispatcher.rs b/rust/data_daemon/src/pipeline/dispatcher.rs new file mode 100644 index 000000000..51f9e4cc7 --- /dev/null +++ b/rust/data_daemon/src/pipeline/dispatcher.rs @@ -0,0 +1,1449 @@ +//! Routes source/sensor-tagged data into recording windows and on to per-trace +//! actors. +//! +//! The producer is a thin shipper: it publishes lifecycle events +//! (`StartRecording` / `StopRecording` / `CancelRecording`) and +//! source/sensor/timestamp-tagged data, knowing nothing about recordings. This +//! single-owner dispatcher task decides which recording each datum belongs to: +//! +//! - **Lifecycle events are applied immediately**, mutating the per-source +//! active-window map. `StartRecording` allocates a `recording_index` and +//! opens a window; `StopRecording` closes it (begins the drain); `Cancel` +//! tears it down. +//! - **Data is held for a fixed holdback** in a per-source-ordered queue, then +//! routed by its `publish_timestamp_ns` (a wall-clock instant stamped by the +//! producer at publish, on the same clock as the lifecycle bounds) into the +//! window whose `[started_at_ns, stopped_at_ns)` contains it. The holdback +//! absorbs the cross-publisher arrival skew that the old per-frame +//! `sequence_number` machinery used to reconcile. +//! +//! Membership is decided by the *publish timestamp*, never arrival time, and is +//! decoupled from the data's own capture clock β€” so cross-publisher reorder +//! cannot change which recording a datum belongs to, only when it is observed, +//! which the holdback + a closing-window retention of `2Β·HOLDBACK` absorb. A +//! just-closed window stays resolvable until every legitimately-held datum has +//! been released; finalisation is then a single `WindowClosing` signal to each +//! actor (no sequence counting). +//! +//! Everything here is owned by one tokio task, so the window map and holdback +//! queue need no locks β€” total ordering through the `select!` loop is what +//! makes the routing decisions provable. + +use std::collections::{HashMap, VecDeque}; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use chrono::Utc; +use data_daemon_shared::{BatchedDataItem, Envelope}; +use tokio::sync::{broadcast, mpsc}; +use tokio::task::JoinHandle; +use tokio::time::sleep; +use uuid::Uuid; + +use crate::lifecycle::shutdown::ShutdownSignal; +use crate::pipeline::trace_actor::{ + self, TraceActorContext, TraceActorMessage, TraceIdentity, TraceKey, +}; +use crate::state::{DaemonEvent, NewRecording, SqliteStateStore, StateStore}; +use crate::storage::paths; + +/// Default holdback: each data envelope waits this long after daemon receipt +/// before it is routed. Tunable via `NCD_HOLDBACK_MS`. A generous default is +/// safe β€” joint/scalar data is sparse, so even a 1 s holdback retains only a +/// few thousand small envelopes per source. Completeness (catching a datum +/// whose publisher was preempted between capture and publish) scales directly +/// with this value. +const DEFAULT_HOLDBACK_MS: u64 = 500; + +/// Environment override for the holdback, in milliseconds. +const HOLDBACK_ENV: &str = "NCD_HOLDBACK_MS"; + +/// A source silent (no data, no lifecycle) for this long has its open window +/// force-closed as a crash backstop, so a producer that died without a Stop +/// still finalises (or is swept). Distinct from the restart sweep, which +/// handles a daemon that itself died. +const IDLE_REAP: Duration = Duration::from_secs(30); + +/// How long an active source is polled for due releases / evictions. A fully +/// idle daemon (no held data, no closing windows) sleeps [`IDLE_REAP`] instead. +/// The coarse cadence adds at most this much jitter to a release deadline, +/// negligible against the holdback. +const HOUSEKEEP_INTERVAL: Duration = Duration::from_millis(25); + +/// Bounded per-trace queue size. A smaller cap acts as a forced flush throttle; +/// 256 absorbs the high-dimensionality burst at the cost of ~10 KiB of message +/// headers per trace. +const TRACE_QUEUE_CAPACITY: usize = 256; + +/// Bounded listener β†’ dispatcher channel. +const DISPATCHER_INBOX_CAPACITY: usize = 1024; + +/// Source identity: `(robot_id, robot_instance)`. +type Source = (String, i64); + +/// Resolve the configured holdback, honouring the `NCD_HOLDBACK_MS` override. +fn configured_holdback() -> Duration { + let millis = std::env::var(HOLDBACK_ENV) + .ok() + .and_then(|raw| raw.trim().parse::().ok()) + .unwrap_or(DEFAULT_HOLDBACK_MS); + Duration::from_millis(millis) +} + +/// Handle owned by the daemon main loop. Drop it on shutdown to close every +/// per-trace actor. +pub struct DispatcherHandle { + join: JoinHandle<()>, +} + +impl DispatcherHandle { + /// Wait for the dispatcher to finish processing in-flight messages and the + /// per-trace actors to terminate. + pub async fn shutdown(self) { + if let Err(error) = self.join.await { + tracing::warn!(?error, "dispatcher task join failed during shutdown"); + } + } +} + +/// Optional runtime context passed to the dispatcher. +#[derive(Clone, Default)] +pub struct DispatcherContext { + /// Daemon event bus, used to publish recording/trace lifecycle events. + pub event_bus: Option, +} + +/// Spawn the dispatcher task and return its inbound `mpsc::Sender`. +/// +/// Test-only convenience over [`spawn_with_context`] with a default context. +#[cfg(test)] +pub fn spawn( + store: SqliteStateStore, + actor_context: Arc, + shutdown_rx: broadcast::Receiver, +) -> (mpsc::Sender, DispatcherHandle) { + spawn_with_context( + store, + actor_context, + DispatcherContext::default(), + shutdown_rx, + ) +} + +/// Spawn the dispatcher with an explicit [`DispatcherContext`]. +pub fn spawn_with_context( + store: SqliteStateStore, + actor_context: Arc, + context: DispatcherContext, + shutdown_rx: broadcast::Receiver, +) -> (mpsc::Sender, DispatcherHandle) { + let (tx, rx) = mpsc::channel::(DISPATCHER_INBOX_CAPACITY); + let join = tokio::spawn(async move { + let mut dispatcher = Dispatcher::new(store, actor_context, context); + dispatcher.run(rx, shutdown_rx).await; + }); + (tx, DispatcherHandle { join }) +} + +/// A per-trace actor's routing handle, stored inside its window. +struct TraceHandle { + sender: mpsc::Sender, + /// Daemon-assigned, per-trace monotonic video chunk index. + next_video_chunk: u32, +} + +/// One recording window for a source. +/// +/// Membership is decided by the producer **publish-clock** boundaries +/// `[started_at_ns, stopped_at_ns)`. Every data envelope carries a +/// `publish_timestamp_ns` stamped at publish on the same wall clock the +/// lifecycle `started_at_ns` / `stopped_at_ns` use, so routing never depends +/// on the data's own (possibly custom) capture clock β€” it depends only on when +/// the producer published, which is exactly "which recording was active then". +struct ActiveWindow { + recording_index: i64, + /// Inclusive lower bound β€” the lifecycle publish time of the start. + started_at_ns: i64, + /// Exclusive upper bound β€” the lifecycle publish time of the stop. `None` + /// while live (open above). + stopped_at_ns: Option, + /// Daemon clock at which the window closed β€” drives the eviction deadline. + stop_recv_at: Option, + /// Per-trace actors spawned within this window. + traces: HashMap, +} + +impl ActiveWindow { + /// Does this window's `[started_at_ns, stopped_at_ns)` contain `ts`? + fn contains(&self, ts: i64) -> bool { + ts >= self.started_at_ns && self.stopped_at_ns.is_none_or(|stop| ts < stop) + } +} + +/// All windows currently tracked for one source: at most one live, plus +/// recently-closed windows retained until their late data has drained. +#[derive(Default)] +struct WindowsForSource { + live: Option, + closing: Vec, + /// Daemon clock of the last envelope seen for this source β€” drives the + /// idle reaper. + last_seen: Option, +} + +/// One held data envelope awaiting its holdback release. +struct Held { + source: Source, + release_at: Instant, + /// Producer publish time β€” the window-membership key, decided at release. + publish_timestamp_ns: i64, + payload: HeldPayload, +} + +/// The data carried by a held envelope. `timestamp_ns` / `timestamp_s` here are +/// the data's *own* capture clock (content), never routing. +enum HeldPayload { + Data { + data_type: String, + sensor_name: Option, + timestamp_ns: i64, + timestamp_s: Option, + payload: Vec, + }, + Batch { + data_type: String, + timestamp_ns: i64, + timestamp_s: Option, + items: Vec, + }, + Video { + data_type: String, + sensor_name: Option, + thread_id: i64, + width: u32, + height: u32, + byte_count: u64, + frame_count: u32, + frame_timestamps_s: Vec, + }, +} + +/// The dispatcher's task-local state. +struct Dispatcher { + store: SqliteStateStore, + actor_context: Arc, + context: DispatcherContext, + holdback: Duration, + /// Per-source window map. + windows: HashMap, + /// Holdback queue, monotonic in `release_at` (fixed offset + arrival + /// order). + held: VecDeque, + /// Join handles for every spawned actor, awaited on shutdown. + actor_handles: Vec>, + /// Rate-limited orphan-drop counter (data outside any window). + orphan_drops: u64, + /// When the eviction + idle-reap scans last ran. Those scans are throttled + /// to [`HOUSEKEEP_INTERVAL`] so a data stream arriving faster than that + /// doesn't re-run the two full window scans (and their `Vec` allocations) + /// on every inbound envelope β€” only the cheap holdback release does. + last_housekeep: Instant, +} + +impl Dispatcher { + fn new( + store: SqliteStateStore, + actor_context: Arc, + context: DispatcherContext, + ) -> Self { + Self { + store, + actor_context, + context, + holdback: configured_holdback(), + windows: HashMap::new(), + held: VecDeque::new(), + actor_handles: Vec::new(), + orphan_drops: 0, + last_housekeep: Instant::now(), + } + } + + async fn run( + &mut self, + mut rx: mpsc::Receiver, + mut shutdown_rx: broadcast::Receiver, + ) { + tracing::info!( + holdback_ms = self.holdback.as_millis(), + "dispatcher started" + ); + + loop { + // When there is in-flight work, poll frequently for due releases / + // evictions; otherwise sleep until the next idle-reap horizon. + let housekeep_after = if self.held.is_empty() && !self.any_closing() { + IDLE_REAP + } else { + HOUSEKEEP_INTERVAL + }; + + tokio::select! { + biased; + signal = shutdown_rx.recv() => { + tracing::debug!(?signal, "dispatcher shutting down"); + break; + } + envelope = rx.recv() => { + let Some(envelope) = envelope else { + tracing::debug!("dispatcher inbox closed; exiting"); + break; + }; + self.handle_inbound(envelope, Instant::now()).await; + } + _ = sleep(housekeep_after) => {} + } + + // Holdback releases run on every wake-up; the housekeeping scans + // are throttled to HOUSEKEEP_INTERVAL (see `last_housekeep`). + let now = Instant::now(); + self.release_due_holdback(now).await; + if now.duration_since(self.last_housekeep) >= HOUSEKEEP_INTERVAL { + self.housekeep(now).await; + self.last_housekeep = now; + } + } + + self.shutdown().await; + } + + /// Apply one inbound envelope. Lifecycle events take effect immediately; + /// data envelopes enter the holdback queue. + async fn handle_inbound(&mut self, envelope: Envelope, recv_at: Instant) { + match envelope { + Envelope::StartRecording { + robot_id, + robot_instance, + dataset_id, + publish_timestamp_ns, + timestamp_ns, + .. + } => { + self.handle_start( + (robot_id, robot_instance), + dataset_id, + publish_timestamp_ns, + timestamp_ns, + recv_at, + ) + .await; + } + Envelope::StopRecording { + robot_id, + robot_instance, + publish_timestamp_ns, + timestamp_ns, + } => { + self.handle_stop( + (robot_id, robot_instance), + publish_timestamp_ns, + timestamp_ns, + recv_at, + ) + .await; + } + Envelope::CancelRecording { + robot_id, + robot_instance, + timestamp_ns, + } => { + self.handle_cancel((robot_id, robot_instance), timestamp_ns) + .await; + } + Envelope::Data { + robot_id, + robot_instance, + data_type, + sensor_name, + publish_timestamp_ns, + timestamp_ns, + timestamp_s, + payload, + } => { + let source = (robot_id, robot_instance); + self.touch_source(&source, recv_at); + self.held.push_back(Held { + source, + release_at: recv_at + self.holdback, + publish_timestamp_ns, + payload: HeldPayload::Data { + data_type, + sensor_name, + timestamp_ns, + timestamp_s, + payload, + }, + }); + } + Envelope::BatchedData { + robot_id, + robot_instance, + data_type, + publish_timestamp_ns, + timestamp_ns, + timestamp_s, + items, + } => { + let source = (robot_id, robot_instance); + self.touch_source(&source, recv_at); + self.held.push_back(Held { + source, + release_at: recv_at + self.holdback, + publish_timestamp_ns, + payload: HeldPayload::Batch { + data_type, + timestamp_ns, + timestamp_s, + items, + }, + }); + } + Envelope::VideoChunkReady { + robot_id, + robot_instance, + data_type, + sensor_name, + publish_timestamp_ns, + thread_id, + width, + height, + byte_count, + frame_count, + frame_timestamps_ns, + frame_timestamps_s, + } => { + let source = (robot_id, robot_instance); + self.touch_source(&source, recv_at); + let _ = frame_timestamps_ns; // capture-clock content, not routing + self.held.push_back(Held { + source, + release_at: recv_at + self.holdback, + publish_timestamp_ns, + payload: HeldPayload::Video { + data_type, + sensor_name, + thread_id, + width, + height, + byte_count, + frame_count, + frame_timestamps_s, + }, + }); + } + } + } + + fn touch_source(&mut self, source: &Source, recv_at: Instant) { + // Hot path: every inbound `Data` / `BatchedData` / `VideoChunkReady` + // envelope touches its source. The common case is an existing window, so + // probe with `get_mut` (no allocation) and only clone the `(String, i64)` + // key on the rare first-insert. + if let Some(window) = self.windows.get_mut(source) { + window.last_seen = Some(recv_at); + } else { + self.windows.entry(source.clone()).or_default().last_seen = Some(recv_at); + } + } + + #[allow(clippy::too_many_arguments)] + async fn handle_start( + &mut self, + source: Source, + dataset_id: Option, + publish_timestamp_ns: i64, + timestamp_ns: i64, + recv_at: Instant, + ) { + // Insert the recording row synchronously: cloud notifiers react to the + // `RecordingStarted` event by reading this row, and `cancel_recording` + // burns it by index, so the row must exist before either runs. After the + // create_trace burst was folded into the write-behind (the actors no + // longer create rows here), this is a single uncontended write. + // + // The row's `start_timestamp_ns` is the caller's *capture* time (β†’ + // backend `start_time`); the window opens on the *publish* clock below. + let new = NewRecording { + robot_id: Some(&source.0), + robot_instance: Some(source.1), + dataset_id: dataset_id.as_deref(), + start_timestamp_ns: timestamp_ns, + }; + let recording_index = match self.store.create_recording(new).await { + Ok(row) => row.recording_index, + Err(error) => { + tracing::warn!(%error, robot_id = source.0, "failed to create recording row"); + return; + } + }; + tracing::info!(recording_index, robot_id = source.0, "recording started"); + + let entry = self.windows.entry(source).or_default(); + entry.last_seen = Some(recv_at); + // An idle-reaped window sits in `closing` with an open upper bound + // (`i64::MAX`) to catch stragglers; clamp any such window to this new + // start so a restarted recording's data cannot be mis-routed into it + // (`window_for_mut` checks `closing` before the live window). + for closing in entry.closing.iter_mut() { + let open_past_start = closing + .stopped_at_ns + .is_none_or(|stop| stop >= publish_timestamp_ns); + if open_past_start { + closing.stopped_at_ns = Some(publish_timestamp_ns); + if closing.stop_recv_at.is_none() { + closing.stop_recv_at = Some(recv_at); + } + } + } + // A well-behaved producer stops before starting; if a live window is + // somehow still open, retire it to `closing` bounded at the new start's + // publish time so it stops catching data published after this point. + if let Some(mut previous) = entry.live.take() { + if previous.stopped_at_ns.is_none() { + previous.stopped_at_ns = Some(publish_timestamp_ns); + previous.stop_recv_at = Some(recv_at); + } + entry.closing.push(previous); + } + entry.live = Some(ActiveWindow { + recording_index, + started_at_ns: publish_timestamp_ns, + stopped_at_ns: None, + stop_recv_at: None, + traces: HashMap::new(), + }); + + if let Some(bus) = self.context.event_bus.as_ref() { + bus.publish(DaemonEvent::RecordingStarted { recording_index }); + } + } + + async fn handle_stop( + &mut self, + source: Source, + publish_timestamp_ns: i64, + timestamp_ns: i64, + recv_at: Instant, + ) { + let Some(entry) = self.windows.get_mut(&source) else { + tracing::debug!(robot_id = source.0, "stop for unknown source; ignoring"); + return; + }; + entry.last_seen = Some(recv_at); + let Some(mut window) = entry.live.take() else { + tracing::debug!( + robot_id = source.0, + "stop with no active recording; ignoring" + ); + return; + }; + // The window closes on the publish clock; the row's `stop_timestamp_ns` + // (β†’ backend `end_time`) is the caller's capture time. + window.stopped_at_ns = Some(publish_timestamp_ns); + window.stop_recv_at = Some(recv_at); + let recording_index = window.recording_index; + entry.closing.push(window); + + // Persist `stopped_at` before publishing the event: the cloud + // stop-notifier reads this row on `RecordingStopped`, so the timestamp + // must be on disk first. + if let Err(error) = self + .store + .mark_recording_stopped(recording_index, timestamp_ns) + .await + { + tracing::warn!(%error, recording_index, "failed to mark recording stopped"); + } else { + tracing::info!(recording_index, "recording stopped"); + if let Some(bus) = self.context.event_bus.as_ref() { + bus.publish(DaemonEvent::RecordingStopped { recording_index }); + } + } + } + + async fn handle_cancel(&mut self, source: Source, timestamp_ns: i64) { + let Some(mut entry) = self.windows.remove(&source) else { + return; + }; + // Drop any held data for this source β€” a cancelled recording's data + // must never reach an actor. + self.held.retain(|held| held.source != source); + + let mut windows: Vec = Vec::new(); + if let Some(live) = entry.live.take() { + windows.push(live); + } + windows.append(&mut entry.closing); + + for window in windows { + let recording_index = window.recording_index; + for (_, handle) in window.traces { + let _ = handle.sender.send(TraceActorMessage::Cancel).await; + } + // Purge any not-yet-flushed trace creates for this recording before + // burning its rows, so a late batch can't insert an orphan row for + // a recording that's already cancelled. + self.actor_context + .trace_writer + .drop_recording(recording_index) + .await; + // The cancel's capture timestamp becomes the row's + // `stop_timestamp_ns` (β†’ backend `end_time`), exactly as a stop. + match self + .store + .cancel_recording(recording_index, timestamp_ns) + .await + { + Ok((_, touched)) => { + tracing::info!( + recording_index, + trace_rows_touched = touched, + "recording cancelled" + ); + if let Some(bus) = self.context.event_bus.as_ref() { + bus.publish(DaemonEvent::RecordingCancelled { recording_index }); + } + } + Err(error) => { + tracing::warn!(%error, recording_index, "failed to mark recording cancelled"); + } + } + } + } + + /// True when any source has a retained closing window. + fn any_closing(&self) -> bool { + self.windows.values().any(|entry| !entry.closing.is_empty()) + } + + /// Release every held envelope whose hold has elapsed. Cheap β€” pops only + /// what is due β€” and runs on every dispatcher wake-up. Kept strictly ahead + /// of [`housekeep`](Self::housekeep)'s evictions so a datum releasing in + /// this tick still finds its (possibly closing) window. + async fn release_due_holdback(&mut self, now: Instant) { + while self.held.front().is_some_and(|held| held.release_at <= now) { + let held = self.held.pop_front().expect("front checked"); + self.route(held).await; + } + } + + /// Evict windows past their retention and force-close idle sources. Two full + /// window scans, so throttled to [`HOUSEKEEP_INTERVAL`] by the caller rather + /// than run per inbound envelope. + async fn housekeep(&mut self, now: Instant) { + // 2. Window evictions: a closing window is retained for 2Β·HOLDBACK + // after its stop, by which point all in-window data has released. + let retention = self.holdback * 2; + let mut closing_actors: Vec = Vec::new(); + let mut empty_sources: Vec = Vec::new(); + for (source, entry) in self.windows.iter_mut() { + entry.closing.retain_mut(|window| { + let evict = window + .stop_recv_at + .is_some_and(|at| now.duration_since(at) >= retention); + if evict { + for (_, handle) in window.traces.drain() { + closing_actors.push(handle); + } + false + } else { + true + } + }); + if entry.live.is_none() + && entry.closing.is_empty() + && entry + .last_seen + .is_none_or(|at| now.duration_since(at) >= IDLE_REAP) + { + empty_sources.push(source.clone()); + } + } + // Send WindowClosing to every actor of an evicted window. Their senders + // drop after, so each actor finalises and exits. + for handle in closing_actors { + let _ = handle.sender.send(TraceActorMessage::WindowClosing).await; + } + for source in empty_sources { + self.windows.remove(&source); + } + + // 3. Idle reaper: force-close a live window whose source has gone + // silent (producer crashed without a Stop). + self.reap_idle(now).await; + } + + /// Force-close any live window whose source has been silent past + /// [`IDLE_REAP`], giving it an open upper bound (`i64::MAX`) so any + /// straggler data still routes to it before eviction; the row's capture + /// stop time is the reap moment, so the recording reaches a terminal, + /// notifiable state. + async fn reap_idle(&mut self, now: Instant) { + let stale: Vec = self + .windows + .iter() + .filter(|(_, entry)| { + entry.live.is_some() + && entry + .last_seen + .is_some_and(|at| now.duration_since(at) >= IDLE_REAP) + }) + .map(|(source, _)| source.clone()) + .collect(); + for source in stale { + tracing::warn!( + robot_id = source.0, + "source idle past reap horizon; force-closing window" + ); + let Some(entry) = self.windows.get_mut(&source) else { + continue; + }; + let Some(mut window) = entry.live.take() else { + continue; + }; + // The producer crashed without a Stop, so there is no next + // recording to partition against β€” keep the window's publish upper + // bound open (`i64::MAX`) to catch any straggler data before + // eviction. The row's capture stop time (β†’ backend `end_time`) is + // the reap moment, so the backend reports a finite end rather than + // the year-2262 the `i64::MAX` window sentinel would imply. + window.stopped_at_ns = Some(i64::MAX); + window.stop_recv_at = Some(now); + let recording_index = window.recording_index; + entry.closing.push(window); + let stop_capture_ns = Utc::now().timestamp_nanos_opt().unwrap_or(i64::MAX); + if let Err(error) = self + .store + .mark_recording_stopped(recording_index, stop_capture_ns) + .await + { + tracing::warn!(%error, recording_index, "failed to mark idle recording stopped"); + } else if let Some(bus) = self.context.event_bus.as_ref() { + bus.publish(DaemonEvent::RecordingStopped { recording_index }); + } + } + } + + /// Route one released held envelope into its window's actors, using its + /// `publish_timestamp_ns` as the membership key. + async fn route(&mut self, held: Held) { + let publish_ts = held.publish_timestamp_ns; + match held.payload { + HeldPayload::Data { + data_type, + sensor_name, + timestamp_ns, + timestamp_s, + payload, + } => { + self.route_data( + &held.source, + publish_ts, + data_type, + sensor_name, + timestamp_ns, + timestamp_s, + payload, + ) + .await; + } + HeldPayload::Batch { + data_type, + timestamp_ns, + timestamp_s, + items, + } => { + for item in items { + self.route_data( + &held.source, + publish_ts, + data_type.clone(), + item.sensor_name, + timestamp_ns, + timestamp_s, + item.payload, + ) + .await; + } + } + HeldPayload::Video { + data_type, + sensor_name, + thread_id, + width, + height, + byte_count, + frame_count, + frame_timestamps_s, + } => { + self.route_video( + &held.source, + publish_ts, + data_type, + sensor_name, + thread_id, + width, + height, + byte_count, + frame_count, + frame_timestamps_s, + ) + .await; + } + } + } + + /// Find the window for `source` containing `ts`. Closing windows are + /// bounded on both sides and are checked first (newest-first); the live + /// window is an unbounded-above catch-all, so it must be the last resort or + /// it would steal data belonging to a just-closed window. + fn window_for_mut(entry: &mut WindowsForSource, ts: i64) -> Option<&mut ActiveWindow> { + if let Some(pos) = entry.closing.iter().rposition(|window| window.contains(ts)) { + return entry.closing.get_mut(pos); + } + if entry + .live + .as_ref() + .is_some_and(|window| window.contains(ts)) + { + return entry.live.as_mut(); + } + None + } + + #[allow(clippy::too_many_arguments)] + async fn route_data( + &mut self, + source: &Source, + publish_ts: i64, + data_type: String, + sensor_name: Option, + timestamp_ns: i64, + timestamp_s: Option, + payload: Vec, + ) { + let Some(entry) = self.windows.get_mut(source) else { + self.note_orphan(); + return; + }; + let Some(window) = Self::window_for_mut(entry, publish_ts) else { + self.note_orphan(); + return; + }; + let sender = Self::ensure_actor( + window, + &self.actor_context, + data_type, + sensor_name, + &mut self.actor_handles, + ) + .sender + .clone(); + if sender + .send(TraceActorMessage::Data { + timestamp_ns, + timestamp_s, + payload, + }) + .await + .is_err() + { + tracing::warn!("trace actor inbox closed; dropping data"); + } + } + + #[allow(clippy::too_many_arguments)] + async fn route_video( + &mut self, + source: &Source, + publish_ts: i64, + data_type: String, + sensor_name: Option, + thread_id: i64, + width: u32, + height: u32, + byte_count: u64, + frame_count: u32, + frame_timestamps_s: Vec, + ) { + let recordings_root = self.actor_context.recordings_root.clone(); + // The chunk's `publish_timestamp_ns` (its open time) keys both the + // spool filename and the window routing below. + let spool_nut = paths::spool_chunk_path( + recordings_root.as_path(), + &source.0, + source.1, + &data_type, + sensor_name.as_deref(), + publish_ts, + thread_id, + ); + + // The whole chunk routes by its open (publish) time, which lies inside + // exactly one recording window β€” so the tail chunk of a recording is + // routed by a timestamp strictly before the window's stop boundary, + // never on it. + let Some(entry) = self.windows.get_mut(source) else { + remove_spool_nut(&spool_nut); + self.note_orphan(); + return; + }; + let Some(window) = Self::window_for_mut(entry, publish_ts) else { + remove_spool_nut(&spool_nut); + self.note_orphan(); + return; + }; + + let recording_index = window.recording_index; + let handle = Self::ensure_actor( + window, + &self.actor_context, + data_type.clone(), + sensor_name.clone(), + &mut self.actor_handles, + ); + let chunk_index = handle.next_video_chunk; + handle.next_video_chunk = handle.next_video_chunk.saturating_add(1); + let sender = handle.sender.clone(); + + // The actor relinks the spooled NUT into the recording itself β€” on a + // blocking thread inside its background encode task β€” so the rename's + // possible journal-commit stall never lands on this routing path. The + // dispatcher only hands over the source spool path. + if sender + .send(TraceActorMessage::Video { + chunk_index, + spool_nut: spool_nut.clone(), + width, + height, + byte_count, + frame_count, + frame_timestamps_s, + }) + .await + .is_err() + { + tracing::warn!( + recording_index, + "video trace actor inbox closed; dropping chunk" + ); + remove_spool_nut(&spool_nut); + } + } + + /// Look up or spawn the per-trace actor for `(window, data_type, + /// sensor_name)`, returning its routing handle. + fn ensure_actor<'a>( + window: &'a mut ActiveWindow, + actor_context: &Arc, + data_type: String, + sensor_name: Option, + actor_handles: &mut Vec>, + ) -> &'a mut TraceHandle { + let key = TraceKey { + recording_index: window.recording_index, + data_type, + sensor_name, + }; + window.traces.entry(key.clone()).or_insert_with(|| { + let identity = TraceIdentity { + trace_id: Uuid::new_v4().to_string(), + key, + }; + let (tx, actor_rx) = mpsc::channel(TRACE_QUEUE_CAPACITY); + let actor_context = Arc::clone(actor_context); + let join = tokio::spawn(async move { + trace_actor::run(actor_context, identity, actor_rx).await; + }); + actor_handles.push(join); + TraceHandle { + sender: tx, + next_video_chunk: 0, + } + }) + } + + fn note_orphan(&mut self) { + self.orphan_drops = self.orphan_drops.saturating_add(1); + if self.orphan_drops == 1 || self.orphan_drops.is_multiple_of(1024) { + tracing::warn!( + dropped = self.orphan_drops, + "dropped datum outside any recording window" + ); + } + } + + /// Clean shutdown: flush every held datum against the current windows, then + /// signal `WindowClosing` to every actor so in-flight recordings finalise. + async fn shutdown(&mut self) { + let held: Vec = self.held.drain(..).collect(); + for item in held { + self.route(item).await; + } + let windows = std::mem::take(&mut self.windows); + for (_, mut entry) in windows { + let mut all: Vec = Vec::new(); + if let Some(live) = entry.live.take() { + all.push(live); + } + all.append(&mut entry.closing); + for window in all { + for (_, handle) in window.traces { + let _ = handle.sender.send(TraceActorMessage::WindowClosing).await; + } + } + } + let handles = std::mem::take(&mut self.actor_handles); + for handle in handles { + if let Err(error) = handle.await { + tracing::warn!(?error, "trace actor join failed during shutdown"); + } + } + // Every actor has exited, so all their finalise/failed writes are now + // queued in the write-behind. Flush it here so that by the time + // `DispatcherHandle::shutdown` returns the trace rows are durable β€” + // callers (and tests) can read final state without a separate barrier. + self.actor_context.trace_writer.flush().await; + tracing::info!("dispatcher stopped"); + } +} + +fn remove_spool_nut(path: &std::path::Path) { + if let Err(error) = std::fs::remove_file(path) { + if error.kind() != std::io::ErrorKind::NotFound { + tracing::debug!(%error, path = %path.display(), "failed to remove orphan spool NUT"); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::encoding::video_encoder::VideoEncoder; + use crate::state::{SqliteStateStore, TraceWriteStatus}; + use crate::storage::budget::{StorageBudget, StoragePolicy}; + use crate::storage::paths::TracePath; + use std::path::PathBuf; + use tempfile::TempDir; + use tokio::sync::broadcast; + use tokio::time::{timeout, Duration}; + + async fn open_store() -> (SqliteStateStore, TempDir) { + let dir = TempDir::new().expect("tempdir"); + let store = SqliteStateStore::open(&dir.path().join("state.db")) + .await + .expect("open store"); + (store, dir) + } + + fn test_context(recordings_root: PathBuf, store: SqliteStateStore) -> Arc { + let policy = StoragePolicy { + storage_limit_bytes: None, + min_free_disk_bytes: 0, + refresh_interval: Duration::from_secs(60), + }; + let budget = Arc::new(StorageBudget::new(&recordings_root, policy)); + // The writer owner is dropped: the spawned task lives while the handle + // inside the context does. The dispatcher flushes it on shutdown, so + // tests see durable trace state after `handle.shutdown().await`. + let (trace_writer, _writer_owner) = + crate::state::trace_event_database_writer::spawn(Arc::new(store)); + let (json_writer, _json_owner) = crate::pipeline::json_writer::spawn(); + Arc::new(TraceActorContext::new( + recordings_root, + budget, + VideoEncoder::new(), + trace_writer, + json_writer, + )) + } + + // Tests exercise window membership, which is keyed on the publish clock, so + // the helper sets the capture `timestamp_ns` to the same value. + fn start(robot: &str, publish_timestamp_ns: i64) -> Envelope { + Envelope::StartRecording { + robot_id: robot.into(), + robot_instance: 0, + robot_name: None, + dataset_id: None, + dataset_name: None, + publish_timestamp_ns, + timestamp_ns: publish_timestamp_ns, + } + } + + fn stop(robot: &str, publish_timestamp_ns: i64) -> Envelope { + Envelope::StopRecording { + robot_id: robot.into(), + robot_instance: 0, + publish_timestamp_ns, + timestamp_ns: publish_timestamp_ns, + } + } + + /// A datum published at `publish_ts` with `content_ts` as its own + /// (decoupled) capture timestamp. + fn datum_full(robot: &str, publish_ts: i64, content_ts: i64, value: i64) -> Envelope { + Envelope::Data { + robot_id: robot.into(), + robot_instance: 0, + data_type: "joints".into(), + sensor_name: Some("waist".into()), + publish_timestamp_ns: publish_ts, + timestamp_ns: content_ts, + timestamp_s: None, + payload: serde_json::to_vec(&serde_json::json!({ "i": value })).unwrap(), + } + } + + /// A datum whose publish time and capture time coincide. + fn datum(robot: &str, publish_ts: i64, value: i64) -> Envelope { + datum_full(robot, publish_ts, publish_ts, value) + } + + /// A short holdback keeps the tests fast. + fn fast_holdback() { + std::env::set_var(HOLDBACK_ENV, "60"); + } + + #[tokio::test] + async fn routes_data_into_its_window_by_timestamp() { + fast_holdback(); + let (store, dir) = open_store().await; + let context = test_context(dir.path().join("recordings"), store.clone()); + let (_shutdown_tx, shutdown_rx) = broadcast::channel(8); + let bus = crate::state::EventBus::new(); + let dispatcher_context = DispatcherContext { + event_bus: Some(bus.clone()), + }; + let (tx, handle) = spawn_with_context( + store.clone(), + context.clone(), + dispatcher_context, + shutdown_rx, + ); + + tx.send(start("robot-1", 100)).await.unwrap(); + for index in 0..3i64 { + tx.send(datum("robot-1", 100 + index, index)).await.unwrap(); + } + tx.send(stop("robot-1", 200)).await.unwrap(); + + drop(tx); + timeout(Duration::from_secs(5), handle.shutdown()) + .await + .expect("dispatcher shut down in time"); + + // Exactly one recording (index 1) with one written trace. + let recordings = store.recordings_for_source("robot-1", 0).await.unwrap(); + assert_eq!(recordings.len(), 1); + let recording_index = recordings[0].recording_index; + let traces = store + .list_traces_for_recording(recording_index) + .await + .unwrap(); + assert_eq!(traces.len(), 1); + assert_eq!(traces[0].write_status, TraceWriteStatus::Written); + + let trace_dir = TracePath::new( + recording_index.to_string(), + "joints", + traces[0].trace_id.clone(), + ) + .directory(context.recordings_root.as_path()); + let bytes = std::fs::read(trace_dir.join("trace.json")).unwrap(); + let parsed: serde_json::Value = serde_json::from_slice(&bytes).unwrap(); + assert_eq!(parsed, serde_json::json!([{"i": 0}, {"i": 1}, {"i": 2}])); + } + + #[tokio::test] + async fn back_to_back_recordings_route_by_publish_timestamp() { + fast_holdback(); + let (store, dir) = open_store().await; + let context = test_context(dir.path().join("recordings"), store.clone()); + let (_shutdown_tx, shutdown_rx) = broadcast::channel(8); + let (tx, handle) = spawn(store.clone(), context.clone(), shutdown_rx); + + // Recording A: [100, 200). Recording B: [200, 300). + tx.send(start("robot-1", 100)).await.unwrap(); + tx.send(stop("robot-1", 200)).await.unwrap(); + tx.send(start("robot-1", 200)).await.unwrap(); + tx.send(stop("robot-1", 300)).await.unwrap(); + // A datum published inside A's window but delivered after B opened + // still lands in A by its publish timestamp. + tx.send(datum("robot-1", 150, 1)).await.unwrap(); + tx.send(datum("robot-1", 250, 2)).await.unwrap(); + + drop(tx); + timeout(Duration::from_secs(5), handle.shutdown()) + .await + .expect("dispatcher shut down in time"); + + let recordings = store.recordings_for_source("robot-1", 0).await.unwrap(); + assert_eq!(recordings.len(), 2); + let first = recordings[0].recording_index; + let second = recordings[1].recording_index; + + let first_traces = store.list_traces_for_recording(first).await.unwrap(); + let second_traces = store.list_traces_for_recording(second).await.unwrap(); + assert_eq!(first_traces.len(), 1, "ts=150 routes to recording A"); + assert_eq!(second_traces.len(), 1, "ts=250 routes to recording B"); + + let a_dir = TracePath::new( + first.to_string(), + "joints", + first_traces[0].trace_id.clone(), + ) + .directory(context.recordings_root.as_path()); + let a: serde_json::Value = + serde_json::from_slice(&std::fs::read(a_dir.join("trace.json")).unwrap()).unwrap(); + assert_eq!(a, serde_json::json!([{"i": 1}])); + } + + /// Announce a finished video chunk whose open time is `publish_ts`. The + /// caller must have spooled the matching NUT under the spool dir first. + fn video_chunk(robot: &str, publish_ts: i64, thread_id: i64) -> Envelope { + Envelope::VideoChunkReady { + robot_id: robot.into(), + robot_instance: 0, + data_type: "RGB_IMAGES".into(), + sensor_name: Some("camera_0".into()), + publish_timestamp_ns: publish_ts, + thread_id, + width: 64, + height: 64, + byte_count: 9, + frame_count: 1, + frame_timestamps_ns: vec![publish_ts], + frame_timestamps_s: vec![publish_ts as f64 / 1e9], + } + } + + /// Spool a placeholder NUT at the path the producer would have written, so + /// the dispatcher's relink has a file to move. + fn spool_placeholder_nut(recordings_root: &std::path::Path, publish_ts: i64, thread_id: i64) { + let path = paths::spool_chunk_path( + recordings_root, + "robot-1", + 0, + "RGB_IMAGES", + Some("camera_0"), + publish_ts, + thread_id, + ); + std::fs::create_dir_all(path.parent().unwrap()).unwrap(); + std::fs::write(&path, b"nut-bytes").unwrap(); + } + + #[tokio::test] + async fn video_chunk_routes_by_open_time_into_its_window() { + // A video chunk's `publish_timestamp_ns` is its *open* time β€” strictly + // inside the recording β€” so a recording's tail chunk (announced just + // before the stop) routes by a timestamp before the stop boundary and + // lands in the recording rather than being dropped at the boundary. + fast_holdback(); + let (store, dir) = open_store().await; + let recordings_root = dir.path().join("recordings"); + let context = test_context(recordings_root.clone(), store.clone()); + let (_shutdown_tx, shutdown_rx) = broadcast::channel(8); + let (tx, handle) = spawn(store.clone(), context.clone(), shutdown_rx); + + let (publish_ts, thread_id) = (150, 7); + spool_placeholder_nut(&recordings_root, publish_ts, thread_id); + + // Window [100, 200); the chunk (open ts 150) is announced before stop. + tx.send(start("robot-1", 100)).await.unwrap(); + tx.send(video_chunk("robot-1", publish_ts, thread_id)) + .await + .unwrap(); + tx.send(stop("robot-1", 200)).await.unwrap(); + + drop(tx); + timeout(Duration::from_secs(10), handle.shutdown()) + .await + .expect("dispatcher shut down in time"); + + let recordings = store.recordings_for_source("robot-1", 0).await.unwrap(); + assert_eq!(recordings.len(), 1); + let traces = store + .list_traces_for_recording(recordings[0].recording_index) + .await + .unwrap(); + assert!( + traces + .iter() + .any(|trace| trace.data_type.as_deref() == Some("RGB_IMAGES")), + "the in-window video chunk must route to a video trace, not be dropped" + ); + let spool_path = paths::spool_chunk_path( + &recordings_root, + "robot-1", + 0, + "RGB_IMAGES", + Some("camera_0"), + publish_ts, + thread_id, + ); + assert!( + !spool_path.exists(), + "the spooled NUT must be relinked out of the spool dir" + ); + } + + #[tokio::test] + async fn video_chunk_published_after_stop_is_dropped() { + // A chunk whose open time falls after the window closed belongs to no + // window and is dropped β€” the contrast that proves routing is by the + // chunk's own timestamp, not by arrival order. + fast_holdback(); + let (store, dir) = open_store().await; + let recordings_root = dir.path().join("recordings"); + let context = test_context(recordings_root.clone(), store.clone()); + let (_shutdown_tx, shutdown_rx) = broadcast::channel(8); + let (tx, handle) = spawn(store.clone(), context.clone(), shutdown_rx); + + let (publish_ts, thread_id) = (250, 7); // after the window's stop + spool_placeholder_nut(&recordings_root, publish_ts, thread_id); + + tx.send(start("robot-1", 100)).await.unwrap(); + tx.send(stop("robot-1", 200)).await.unwrap(); + tx.send(video_chunk("robot-1", publish_ts, thread_id)) + .await + .unwrap(); + + drop(tx); + timeout(Duration::from_secs(10), handle.shutdown()) + .await + .expect("dispatcher shut down in time"); + + let recordings = store.recordings_for_source("robot-1", 0).await.unwrap(); + assert_eq!(recordings.len(), 1); + let traces = store + .list_traces_for_recording(recordings[0].recording_index) + .await + .unwrap(); + assert!( + !traces + .iter() + .any(|trace| trace.data_type.as_deref() == Some("RGB_IMAGES")), + "a chunk published after the window closed has no window and is dropped" + ); + } + + #[tokio::test] + async fn routing_is_decoupled_from_the_provided_timestamp() { + // The integration matrix's manual timestamp mode logs data with + // 0-based capture timestamps, NOT wall clock. Routing uses the + // publish timestamp (wall clock, in the window), so the data lands + // correctly while its own 0-based timestamp is preserved as content. + fast_holdback(); + let (store, dir) = open_store().await; + let context = test_context(dir.path().join("recordings"), store.clone()); + let (_shutdown_tx, shutdown_rx) = broadcast::channel(8); + let (tx, handle) = spawn(store.clone(), context.clone(), shutdown_rx); + + let base = 1_700_000_000_000_000_000i64; // wall-clock publish window + tx.send(start("robot-1", base)).await.unwrap(); + for index in 0..3i64 { + // publish ts in-window; content ts 0-based. + tx.send(datum_full("robot-1", base + index, index, index)) + .await + .unwrap(); + } + tx.send(stop("robot-1", base + 1000)).await.unwrap(); + + drop(tx); + timeout(Duration::from_secs(5), handle.shutdown()) + .await + .expect("dispatcher shut down in time"); + + let recordings = store.recordings_for_source("robot-1", 0).await.unwrap(); + assert_eq!(recordings.len(), 1); + let traces = store + .list_traces_for_recording(recordings[0].recording_index) + .await + .unwrap(); + assert_eq!( + traces.len(), + 1, + "0-based-content data must route into the window" + ); + assert_eq!(traces[0].write_status, TraceWriteStatus::Written); + } + + #[tokio::test] + async fn data_outside_any_window_is_dropped() { + fast_holdback(); + let (store, dir) = open_store().await; + let context = test_context(dir.path().join("recordings"), store.clone()); + let (_shutdown_tx, shutdown_rx) = broadcast::channel(8); + let (tx, handle) = spawn(store.clone(), context.clone(), shutdown_rx); + + // No StartRecording β€” the datum belongs to no window. + tx.send(datum("robot-1", 100, 1)).await.unwrap(); + + drop(tx); + timeout(Duration::from_secs(5), handle.shutdown()) + .await + .expect("dispatcher shut down in time"); + + let recordings = store.recordings_for_source("robot-1", 0).await.unwrap(); + assert!(recordings.is_empty(), "no recording should be created"); + } + + #[tokio::test] + async fn cancel_purges_held_data_and_marks_cancelled() { + fast_holdback(); + let (store, dir) = open_store().await; + let context = test_context(dir.path().join("recordings"), store.clone()); + let (_shutdown_tx, shutdown_rx) = broadcast::channel(8); + let bus = crate::state::EventBus::new(); + let mut sub = bus.subscribe(); + let dispatcher_context = DispatcherContext { + event_bus: Some(bus.clone()), + }; + let (tx, handle) = spawn_with_context( + store.clone(), + context.clone(), + dispatcher_context, + shutdown_rx, + ); + + tx.send(start("robot-1", 100)).await.unwrap(); + tx.send(datum("robot-1", 110, 1)).await.unwrap(); + tx.send(Envelope::CancelRecording { + robot_id: "robot-1".into(), + robot_instance: 0, + timestamp_ns: 120, + }) + .await + .unwrap(); + + drop(tx); + timeout(Duration::from_secs(5), handle.shutdown()) + .await + .expect("dispatcher shut down in time"); + + let recordings = store.recordings_for_source("robot-1", 0).await.unwrap(); + assert_eq!(recordings.len(), 1); + assert!(recordings[0].cancelled_at.is_some()); + + let mut saw_cancel = false; + while let Ok(event) = sub.try_recv() { + if matches!(event, DaemonEvent::RecordingCancelled { .. }) { + saw_cancel = true; + } + } + assert!(saw_cancel, "RecordingCancelled must be published"); + } +} diff --git a/rust/data_daemon/src/pipeline/json_writer.rs b/rust/data_daemon/src/pipeline/json_writer.rs new file mode 100644 index 000000000..41ee884d0 --- /dev/null +++ b/rust/data_daemon/src/pipeline/json_writer.rs @@ -0,0 +1,224 @@ +//! Write-behind for per-trace `trace.json` files. +//! +//! Scalar / sensor trace actors append JSON entries on their hot path. Writing +//! each entry inline blocks the actor's tokio task on a `write()` which, on the +//! spool's shared ext4 (`data=ordered`, `discard`), periodically stalls for +//! hundreds of ms behind a journal commit. Because the actor sits on the +//! dispatcher β†’ IPC-listener drain path, that stall back-pressures all the way +//! out to the producer's next `log_*` publish β€” the joint-logging latency +//! spikes we chased. +//! +//! This dedicated OS thread owns every open [`JsonTraceWriter`] and performs the +//! blocking appends / finishes off that path. Actors only *enqueue* (open, +//! append and cancel are fire-and-forget; finalise awaits a one-shot ack), so a +//! disk stall blocks this one thread instead of the IPC drain. It mirrors the +//! daemon's SQLite write-behind ([`crate::state::trace_event_database_writer`]); a single +//! thread is ample because trace JSON is tiny (the integration matrix's whole +//! workload is ~MB/s) and the stalls are intermittent, not a sustained +//! slowdown, so the thread always catches up between journal commits. +//! +//! Per-trace ordering is preserved: each `(open, append…, finish)` sequence for +//! one `trace_id` arrives on a single FIFO channel and is applied in order, and +//! different traces never share a writer (they key by `trace_id`). + +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::mpsc::{self, Receiver, Sender}; +use std::thread::JoinHandle; + +use serde_json::Value; +use tokio::sync::oneshot; + +use crate::encoding::json_trace::{JsonTraceError, JsonTraceWriter}; + +/// Work item for the JSON write-behind thread. +enum JsonWriteMsg { + /// Open (create) the `trace.json` for a trace. Errors are deferred to the + /// matching [`JsonWriteMsg::Finish`] so the hot path never blocks on open. + Open { + trace_id: String, + trace_dir: PathBuf, + }, + /// Append one entry. `payload` is forwarded verbatim when it is itself valid + /// JSON, else wrapped in a small fallback object stamped with `timestamp_ns`. + Append { + trace_id: String, + timestamp_ns: i64, + payload: Vec, + }, + /// Finalise the trace (append `]`, flush, close) and report the on-disk byte + /// total β€” or any deferred open/append error β€” back over `ack`. + Finish { + trace_id: String, + ack: oneshot::Sender>, + }, + /// Discard an open writer without finalising it (cancel). No-op if the trace + /// never opened a JSON writer (e.g. a video trace). + Drop { trace_id: String }, +} + +/// Cloneable handle the per-trace actors use to drive the JSON write-behind +/// thread. Every method is non-blocking except [`finish`](Self::finish), which +/// awaits the thread's acknowledgement (off the actor's hot path, at finalise). +#[derive(Clone)] +pub struct JsonWriteHandle { + tx: Sender, +} + +impl JsonWriteHandle { + /// Open the trace's `trace.json` (fire-and-forget). + pub fn open(&self, trace_id: &str, trace_dir: PathBuf) { + let _ = self.tx.send(JsonWriteMsg::Open { + trace_id: trace_id.to_string(), + trace_dir, + }); + } + + /// Append one entry (fire-and-forget). Takes ownership of `payload` so the + /// caller's frame buffer is freed immediately. + pub fn append(&self, trace_id: &str, timestamp_ns: i64, payload: Vec) { + let _ = self.tx.send(JsonWriteMsg::Append { + trace_id: trace_id.to_string(), + timestamp_ns, + payload, + }); + } + + /// Finalise the trace and return its final on-disk byte count, surfacing any + /// deferred open/append error. Awaited at finalise time, never on the hot + /// path. + pub async fn finish(&self, trace_id: &str) -> Result { + let (ack_tx, ack_rx) = oneshot::channel(); + if self + .tx + .send(JsonWriteMsg::Finish { + trace_id: trace_id.to_string(), + ack: ack_tx, + }) + .is_err() + { + return Err(writer_gone()); + } + ack_rx.await.unwrap_or_else(|_| Err(writer_gone())) + } + + /// Discard the trace's open writer without finalising (fire-and-forget). + pub fn drop_trace(&self, trace_id: &str) { + let _ = self.tx.send(JsonWriteMsg::Drop { + trace_id: trace_id.to_string(), + }); + } +} + +/// Spawn the JSON write-behind thread, returning a cloneable handle plus its +/// join handle. The thread exits when the last handle is dropped (the channel +/// closes), draining nothing further β€” finalise acks already in flight resolve +/// to [`writer_gone`]. +pub fn spawn() -> (JsonWriteHandle, JoinHandle<()>) { + let (tx, rx) = mpsc::channel(); + let join = std::thread::Builder::new() + .name("json-trace-writer".to_string()) + .spawn(move || writer_loop(rx)) + .expect("spawn json-trace-writer thread"); + (JsonWriteHandle { tx }, join) +} + +fn writer_loop(rx: Receiver) { + // Open writers, plus a per-trace "first error" that a later `Finish` + // surfaces β€” once a trace errors we stop touching its (possibly broken) + // file and report failure at finalise, matching the old inline behaviour. + let mut writers: HashMap = HashMap::new(); + let mut errored: HashMap = HashMap::new(); + + while let Ok(msg) = rx.recv() { + match msg { + JsonWriteMsg::Open { + trace_id, + trace_dir, + } => match JsonTraceWriter::open(&trace_dir) { + Ok(writer) => { + writers.insert(trace_id, writer); + } + Err(error) => { + errored.insert(trace_id, error); + } + }, + JsonWriteMsg::Append { + trace_id, + timestamp_ns, + payload, + } => { + if errored.contains_key(&trace_id) { + continue; + } + if let Some(writer) = writers.get_mut(&trace_id) { + if let Err(error) = append_entry(writer, timestamp_ns, &payload) { + errored.insert(trace_id, error); + } + } + } + JsonWriteMsg::Finish { trace_id, ack } => { + let result = if let Some(error) = errored.remove(&trace_id) { + writers.remove(&trace_id); + Err(error) + } else if let Some(writer) = writers.remove(&trace_id) { + writer.finish() + } else { + // No writer for this trace_id; nothing to finalise. + Ok(0) + }; + let _ = ack.send(result); + } + JsonWriteMsg::Drop { trace_id } => { + writers.remove(&trace_id); + errored.remove(&trace_id); + } + } + } +} + +/// Append `payload` to `writer`, writing it verbatim when it parses as JSON and +/// wrapping it in a fallback object otherwise. A single parse decides the +/// branch, preserving the producer's bit-exact float formatting on the common +/// (already-JSON) path. +fn append_entry( + writer: &mut JsonTraceWriter, + timestamp_ns: i64, + payload: &[u8], +) -> Result<(), JsonTraceError> { + match serde_json::from_slice::(payload) { + Ok(_) => writer.add_raw_entry(payload), + Err(_) => { + // A non-JSON payload should never reach here (the SDK always ships + // JSON), so this is a defensive fallback. Surface it rather than + // dropping the bytes silently: only the length is retained on disk; + // the raw payload bytes are intentionally discarded. + tracing::warn!( + timestamp_ns, + payload_len = payload.len(), + "non-JSON scalar payload; storing length only (raw bytes discarded)" + ); + writer.add_entry(&scalar_fallback_entry(timestamp_ns, payload)) + } + } +} + +/// Wrap a non-JSON scalar payload in a minimal object so the on-disk +/// `trace.json` array stays parseable. Only the payload length is recorded; the +/// raw bytes are intentionally discarded. Only reached after a structural JSON +/// parse has already failed, so it never re-parses the bytes. +pub(crate) fn scalar_fallback_entry(timestamp_ns: i64, payload: &[u8]) -> Value { + let mut map = serde_json::Map::new(); + map.insert("timestamp_ns".to_string(), Value::from(timestamp_ns)); + map.insert("payload_len".to_string(), Value::from(payload.len() as u64)); + Value::Object(map) +} + +/// The error reported when the write-behind thread has already exited (process +/// shutdown). Reuses [`JsonTraceError::Write`] so callers need no new variant. +fn writer_gone() -> JsonTraceError { + JsonTraceError::Write { + path: PathBuf::from(""), + source: std::io::Error::other("json trace writer thread stopped"), + } +} diff --git a/rust/data_daemon/src/pipeline/mod.rs b/rust/data_daemon/src/pipeline/mod.rs new file mode 100644 index 000000000..113df7c0c --- /dev/null +++ b/rust/data_daemon/src/pipeline/mod.rs @@ -0,0 +1,12 @@ +//! Per-trace dispatcher and trace-actor pipeline. +//! +//! - [`dispatcher`] is a single tokio task that owns a lock-free HashMap of +//! per-source recording windows, routes held data into windows by publish +//! timestamp, and spawns a per-trace actor (keyed by recording_index, +//! data_type, sensor_name) on first datum. +//! - [`trace_actor`] is the per-trace task: it serialises envelopes for one +//! trace, updates the SQLite state store, and drives the JSON / NUT writers. + +pub mod dispatcher; +pub mod json_writer; +pub mod trace_actor; diff --git a/rust/data_daemon/src/pipeline/trace_actor.rs b/rust/data_daemon/src/pipeline/trace_actor.rs new file mode 100644 index 000000000..aae5474c8 --- /dev/null +++ b/rust/data_daemon/src/pipeline/trace_actor.rs @@ -0,0 +1,1259 @@ +//! Per-trace actor task. +//! +//! Owns the SQLite lifecycle and the on-disk encoders for one trace. The +//! daemon owns trace identity: a trace is `(recording_index, data_type, +//! sensor_name)` and the dispatcher mints its `trace_id` (a UUID, the DB +//! primary key and on-disk directory name) when it first routes data for that +//! key. The actor therefore knows its full identity at spawn time β€” there is +//! no `StartTrace` and no pre-`StartTrace` buffering. +//! +//! Scalar / sensor traces stream into a [`crate::encoding::json_trace::JsonTraceWriter`]; video traces +//! consume [`TraceActorMessage::Video`] notifications that hand off +//! daemon-relinked NUT chunks for ffmpeg-side transcoding into per-chunk MP4 +//! segments, then on finalise stitch the segments into the final `lossy.mp4` / +//! `lossless.mp4` and flush the [`VideoMetadataAccumulator`] sidecar. +//! +//! Finalisation is driven by a single [`TraceActorMessage::WindowClosing`] +//! signal: the dispatcher sends every routed datum to the actor's FIFO inbox +//! *before* `WindowClosing`, so by the time the actor sees it every frame has +//! been applied β€” completeness without counting sequence numbers. +//! +//! Database writes never touch the store's single write mutex on the actor's +//! hot path: the row creation *and* every subsequent progress / status / +//! finalise / failed update are fired into the coalescing write-behind +//! ([`crate::state::trace_event_database_writer`]) and never awaited β€” the actor's first write +//! carries the create fields, which are enqueued before any update; the +//! batcher's coalescing plus the `ON CONFLICT DO NOTHING` insert keep the row +//! correct even if the create and its updates land in different flush batches. +//! Because creation is fire-and-forget too, the actor starts draining its inbox the +//! instant it spawns, even during a boundary's spawn burst. Per-frame +//! `bytes_written` updates are still debounced ([`BYTES_WRITTEN_DEBOUNCE_FRAMES`]) +//! before being enqueued, and the batcher further coalesces them per trace and +//! flushes them in batched transactions. + +use std::collections::BTreeMap; +use std::path::PathBuf; +use std::sync::Arc; + +use serde_json::Value; +use tokio::sync::{mpsc, Semaphore}; +use tokio::task::{self, JoinSet}; + +use crate::encoding::json_trace::JsonTraceError; +use crate::encoding::metadata::{MetadataError, VideoMetadataAccumulator}; +use crate::encoding::video_encoder::{ + ChunkEncodeRequest, VideoEncodeError, VideoEncoder, ENCODE_THREADS_PER_OUTPUT, +}; +use crate::pipeline::json_writer::JsonWriteHandle; +use crate::state::TraceWriteHandle; +use crate::storage::budget::StorageBudget; +use crate::storage::paths::{self, TracePath}; + +/// Routing key identifying one per-trace actor. +/// +/// `Data` and `VideoChunkReady` envelopes carry their source + sensor on the +/// wire; the dispatcher resolves the source's active window to a +/// `recording_index` and routes by this key. Two recordings of the same sensor +/// get distinct actors automatically because `recording_index` differs. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct TraceKey { + /// Parent recording's local index. + pub recording_index: i64, + /// Wire data-type label (e.g. `"JOINT_POSITIONS"`, `"RGB_IMAGES"`). + pub data_type: String, + /// Per-stream sensor label (joint name, camera id). Persisted to the trace + /// row's `data_type_name` column. + pub sensor_name: Option, +} + +/// Full identity handed to a spawned actor: its routing key plus the +/// daemon-minted `trace_id` used as the DB primary key and on-disk directory. +#[derive(Debug, Clone)] +pub struct TraceIdentity { + /// Daemon-minted UUID β€” DB primary key and on-disk directory name. + pub trace_id: String, + /// Routing key (`recording_index`, `data_type`, `sensor_name`). + pub key: TraceKey, +} + +/// Flush `bytes_written` to the DB every N frames instead of every frame. +/// +/// At 30 fps video and 200 Hz scalars this keeps the SQLite write rate well +/// under 10 Hz per trace, which the WAL handles comfortably while still giving +/// the upload coordinator a recent enough byte count for its progress reports. +/// A finalise always issues a fresh UPDATE so the terminal row is exact. +const BYTES_WRITTEN_DEBOUNCE_FRAMES: u64 = 32; + +/// Cap on concurrent ffmpeg transcodes. +/// +/// Each per-chunk encode bounds its libx264 thread pool to +/// [`ENCODE_THREADS_PER_OUTPUT`] per output stream, so to keep the encode fleet +/// near β€” not far past β€” the host's core count we run roughly +/// `cores / threads_per_output` invocations at once. Letting the permit count +/// *and* each child's thread pool both scale with the core count (as an earlier +/// revision did, with libx264 defaulting to one frame-thread per core) +/// oversubscribed a 14-core host to ~200 encode threads, which thrashed the +/// scheduler and stole cycles from the latency-critical `nc.log_*` threads. +/// Dividing here holds the total encode-thread count near the core count while +/// still letting bigger hosts transcode multi-camera 8-context workloads in +/// parallel. +/// +/// Floor at 2 so single-core hosts still get a useful permit pool. +pub(crate) fn default_ffmpeg_concurrency() -> usize { + std::thread::available_parallelism() + .map(|n| (n.get() / ENCODE_THREADS_PER_OUTPUT).max(2)) + .unwrap_or(2) +} + +/// Shared context passed to every per-trace actor. +/// +/// Cheap to clone (everything inside is an `Arc` or `Copy`-like config), so +/// the dispatcher hands each actor its own handle without contention. The +/// storage budget is shared across actors so reservations accumulate. +#[derive(Clone)] +pub struct TraceActorContext { + /// Filesystem root under which trace artefacts are written. + pub recordings_root: Arc, + /// Shared storage-budget tracker. Reserved here so the budget can refuse + /// frames when the configured quota is exhausted. + pub storage_budget: Arc, + /// Encoder used to transcode per-chunk NUT files into MP4 segments and + /// to stream-copy concatenate the segments into the final outputs on + /// finalise. Cloning a [`VideoEncoder`] is cheap (it carries only the + /// configured ffmpeg binary path). + pub video_encoder: VideoEncoder, + /// Bounds concurrent ffmpeg children. Shared across actors so the + /// integration matrix's parallel encode storms don't fork-bomb the + /// transcoder. + pub ffmpeg_permits: Arc, + /// Optional daemon event bus. When present, the trace actor publishes a + /// [`crate::state::DaemonEvent::TraceWritten`] on finalise so the + /// registration coordinator can wake immediately. Optional so unit tests + /// can exercise the actor without standing up a bus. + pub event_bus: Option, + /// Write-behind handle for this actor's create / progress / status / + /// finalise updates. Routing these through the coalescing batcher keeps the + /// actor's hot path β€” including row creation β€” off the store's single write + /// mutex entirely (see [`crate::state::trace_event_database_writer`]). + pub trace_writer: TraceWriteHandle, + /// Write-behind handle for this actor's `trace.json` appends. Keeps the + /// blocking JSON `write()` β€” which periodically stalls behind an ext4 + /// journal commit on the shared spool β€” off the actor's hot path, so a disk + /// stall can't back-pressure the dispatcher / IPC listener (see + /// [`crate::pipeline::json_writer`]). + pub json_writer: JsonWriteHandle, +} + +impl TraceActorContext { + /// Build a context with the default ffmpeg concurrency cap. Suitable for + /// production wiring; tests that need a deterministic transcode order may + /// prefer [`TraceActorContext::with_ffmpeg_permits`]. + pub fn new( + recordings_root: impl Into, + storage_budget: Arc, + video_encoder: VideoEncoder, + trace_writer: TraceWriteHandle, + json_writer: JsonWriteHandle, + ) -> Self { + Self::with_ffmpeg_permits( + recordings_root, + storage_budget, + video_encoder, + Arc::new(Semaphore::new(default_ffmpeg_concurrency())), + trace_writer, + json_writer, + ) + } + + /// Build a context with an externally-provided ffmpeg permit pool. + pub fn with_ffmpeg_permits( + recordings_root: impl Into, + storage_budget: Arc, + video_encoder: VideoEncoder, + ffmpeg_permits: Arc, + trace_writer: TraceWriteHandle, + json_writer: JsonWriteHandle, + ) -> Self { + Self { + recordings_root: Arc::new(recordings_root.into()), + storage_budget, + video_encoder, + ffmpeg_permits, + event_bus: None, + trace_writer, + json_writer, + } + } + + /// Attach a daemon event bus to this context. Returns `self` so it + /// composes cleanly with [`TraceActorContext::new`] / + /// [`TraceActorContext::with_ffmpeg_permits`]. + pub fn with_event_bus(mut self, bus: crate::state::EventBus) -> Self { + self.event_bus = Some(bus); + self + } +} + +/// Message accepted by a per-trace actor. +#[derive(Debug)] +pub enum TraceActorMessage { + /// One sensor sample routed to this trace after its holdback elapsed. + Data { + /// Caller-supplied capture time in nanoseconds since the Unix epoch. + timestamp_ns: i64, + /// Optional caller-supplied capture time in seconds. + timestamp_s: Option, + /// Opaque per-sample bytes. + payload: Vec, + }, + /// One finished NUT chunk. The actor relinks it from the producer spool + /// into this trace's `chunks/chunk_NNNN.nut` (on a blocking thread, inside + /// the background encode task) so the rename's possible journal-commit stall + /// stays off the dispatcher's routing path. + Video { + /// Daemon-assigned, per-trace monotonic chunk index. + chunk_index: u32, + /// Producer-spooled source NUT to relink into this trace's chunks dir. + spool_nut: PathBuf, + /// Frame width in pixels (constant across a trace). + width: u32, + /// Frame height in pixels. + height: u32, + /// Size of the spooled NUT file in bytes. + byte_count: u64, + /// Number of frames in the chunk. + frame_count: u32, + /// Per-frame `timestamp_s` for the metadata sidecar, in capture order. + frame_timestamps_s: Vec, + }, + /// The recording window has closed and its holdback has drained: finalise + /// the trace. Every routed datum has already been delivered ahead of this + /// message by the single-owner dispatcher. + WindowClosing, + /// Drop the in-flight writer and delete the on-disk artefacts. Sent by + /// the dispatcher when the parent recording is cancelled. + Cancel, +} + +/// Internal state of a per-trace actor. +/// +/// Encoders are opened lazily: a scalar trace doesn't need a `trace.json` file +/// until the first frame arrives, and a video trace's segment / metadata +/// state is allocated when the first `Video` message lands. +enum TraceWriterKind { + /// No frames yet observed; the writer is decided on the first frame or + /// chunk message. + Pending, + /// Scalar trace streaming into a single `trace.json` array. The actual + /// [`crate::encoding::json_trace::JsonTraceWriter`] lives on the write-behind thread + /// ([`crate::pipeline::json_writer`]); the actor only holds this marker and + /// drives it by `trace_id` through [`TraceActorContext::json_writer`]. + Json, + /// Video trace whose chunk encodes run concurrently as background tasks. + Video { + /// Frame width in pixels (recorded from the first chunk message). + width: u32, + /// Frame height in pixels. + height: u32, + /// Encodes completed so far, keyed by `chunk_index` so the finalise + /// concat can iterate in order regardless of completion order. + completed_chunks: BTreeMap, + /// Spawned chunk-encode tasks still running. + pending_encodes: JoinSet, + }, +} + +/// One successfully encoded chunk, ready to feed into the finalise concat. +struct CompletedChunk { + /// `chunk_NNNN_lossy.mp4` segment path. + lossy_segment: PathBuf, + /// `chunk_NNNN_lossless.mp4` segment path. + lossless_segment: PathBuf, + /// Sum of both segments' on-disk byte counts. + bytes: u64, + /// Per-frame `timestamp_s` values from the chunk message, applied to the + /// metadata accumulator at finalise in chunk-index order. + frame_timestamps_s: Vec, + /// Frame count carried by the chunk message. + frame_count: u32, +} + +/// Outcome of one background chunk-encode task. +struct ChunkEncodeJobResult { + chunk_index: u32, + /// `Ok(CompletedChunk)` on success; `Err` is logged and the trace marked + /// failed by the polling path. + outcome: Result, +} + +/// Run the per-trace actor until the dispatcher closes the inbox or sends a +/// terminal message (`WindowClosing` / `Cancel`). +pub async fn run( + context: Arc, + identity: TraceIdentity, + mut inbox: mpsc::Receiver, +) { + let mut state = ActorState::new(identity); + // Fire-and-forget the row creation as the actor's first write. The batcher + // inserts it on its next flush β€” so the boundary's spawn burst is one + // batched insert, and the actor starts draining its inbox immediately + // instead of blocking on a synchronous `create_trace`. + state.send_create(&context); + + while let Some(message) = inbox.recv().await { + match message { + TraceActorMessage::Data { + timestamp_ns, + timestamp_s, + payload, + } => { + state + .handle_data(&context, timestamp_ns, timestamp_s, payload) + .await; + } + TraceActorMessage::Video { + chunk_index, + spool_nut, + width, + height, + byte_count, + frame_count, + frame_timestamps_s, + } => { + state + .handle_video( + &context, + chunk_index, + spool_nut, + width, + height, + byte_count, + frame_count, + frame_timestamps_s, + ) + .await; + } + TraceActorMessage::WindowClosing => { + state.finalise_trace(&context).await; + return; + } + TraceActorMessage::Cancel => { + tracing::info!( + trace_id = state.identity.trace_id, + "cancel received by actor" + ); + state.handle_cancel(&context).await; + return; + } + } + } + + // Inbox closed without a WindowClosing nor a Cancel β€” typically a daemon + // shutdown. Mark the trace failed so its lifecycle is observable from the + // DB and the registration coordinator doesn't pick it up. + state.handle_shutdown_without_end(&context).await; +} + +/// Per-actor mutable bookkeeping. Pulled out of `run` so the message handlers +/// can be tested with synthetic messages against a clean state object. +struct ActorState { + identity: TraceIdentity, + writer: TraceWriterKind, + frame_count: u64, + bytes_on_disk: u64, + /// Last `bytes_written` value flushed to the DB. Used by the debouncer to + /// avoid issuing a no-op UPDATE when the writer's on-disk size hasn't + /// changed since the last flush. + last_db_bytes: i64, + /// Running count of frames the storage budget refused. Logged + /// periodically so a runaway producer with no disk left doesn't drown + /// the daemon log in identical warnings. + dropped_over_budget: u64, +} + +impl ActorState { + fn new(identity: TraceIdentity) -> Self { + Self { + identity, + writer: TraceWriterKind::Pending, + frame_count: 0, + bytes_on_disk: 0, + last_db_bytes: 0, + dropped_over_budget: 0, + } + } + + /// Enqueue the trace's row creation through the write-behind. Idempotent on + /// `trace_id` (the batched insert is `ON CONFLICT DO NOTHING`). + fn send_create(&self, context: &Arc) { + let key = &self.identity.key; + context.trace_writer.create( + &self.identity.trace_id, + key.recording_index, + Some(&key.data_type), + key.sensor_name.as_deref(), + ); + } + + async fn handle_data( + &mut self, + context: &Arc, + timestamp_ns: i64, + _timestamp_s: Option, + payload: Vec, + ) { + if !self.budget_allows_frame(&context.storage_budget, payload.len()) { + return; + } + + self.ensure_writer_open(context); + + // Try to mark `writing` exactly once. Subsequent frames don't need an + // UPDATE for this field; the bytes-written debouncer covers the rest. + let bumped_status = self.frame_count == 0; + + if let Err(error) = self.append_frame(context, timestamp_ns, payload) { + tracing::warn!( + %error, + trace_id = self.identity.trace_id, + "failed to append frame; marking trace failed" + ); + self.mark_failed(context); + return; + } + + self.frame_count = self.frame_count.saturating_add(1); + + let bytes_changed = self.bytes_on_disk as i64 != self.last_db_bytes; + let debounce_due = self + .frame_count + .is_multiple_of(BYTES_WRITTEN_DEBOUNCE_FRAMES); + // Fire-and-forget into the coalescing write-behind (see module docs): + // the first frame bumps `writing` and the debounced byte count rides + // along. + if bumped_status || (debounce_due && bytes_changed) { + if bumped_status { + context.trace_writer.mark_writing(&self.identity.trace_id); + } + if bytes_changed { + context + .trace_writer + .progress(&self.identity.trace_id, self.bytes_on_disk as i64); + self.last_db_bytes = self.bytes_on_disk as i64; + } + } + } + + /// Reserve `payload_len` bytes against the storage budget before the frame + /// is written. Uses `reserve` (not `check`) so the in-tree usage estimate is + /// actually incremented on the write path β€” otherwise the cap only ever + /// moved via the periodic rescan and `release`, letting the estimate drift + /// low between scans (see `StorageBudget` docs). `reserve` only increments + /// when the result is `Available`, so a refused frame doesn't over-count. + fn budget_allows_frame(&mut self, budget: &Arc, payload_len: usize) -> bool { + match budget.reserve(payload_len as u64) { + Ok(check) if check.is_available() => true, + Ok(check) => { + self.dropped_over_budget = self.dropped_over_budget.saturating_add(1); + if self.dropped_over_budget == 1 || self.dropped_over_budget.is_multiple_of(256) { + tracing::warn!( + trace_id = self.identity.trace_id, + dropped = self.dropped_over_budget, + ?check, + "storage budget refused frame; dropping" + ); + } + false + } + Err(error) => { + tracing::warn!( + %error, + trace_id = self.identity.trace_id, + "storage budget query failed; allowing frame through" + ); + true + } + } + } + + /// Lazily open the JSON writer for scalar traces. Video traces do not open + /// a writer on the data path β€” they wait for the first `Video` message to + /// allocate the video writer. + /// + /// The open is dispatched to the write-behind thread fire-and-forget; an + /// open failure (e.g. disk full) is surfaced when the trace finalises, + /// keeping this hot-path call non-blocking. + fn ensure_writer_open(&mut self, context: &Arc) { + if !matches!(self.writer, TraceWriterKind::Pending) { + return; + } + + let trace_dir = self.trace_directory(context); + context.json_writer.open(&self.identity.trace_id, trace_dir); + self.bytes_on_disk = 0; + self.writer = TraceWriterKind::Json; + } + + fn append_frame( + &mut self, + context: &Arc, + timestamp_ns: i64, + payload: Vec, + ) -> Result<(), FrameAppendError> { + match &self.writer { + TraceWriterKind::Pending => Err(FrameAppendError::WriterNotOpen), + TraceWriterKind::Json => { + // Hand the entry to the write-behind thread, which preserves the + // producer's bit-exact JSON formatting on the verbatim path and + // wraps non-JSON payloads in a fallback object. Any write error + // is deferred to finalise. `bytes_on_disk` is tracked as a + // running estimate from the raw payload sizes β€” exact only at + // finalise (the thread returns the true total there) β€” which is + // ample for the debounced progress reports. + self.bytes_on_disk = self.bytes_on_disk.saturating_add(payload.len() as u64); + context + .json_writer + .append(&self.identity.trace_id, timestamp_ns, payload); + Ok(()) + } + TraceWriterKind::Video { .. } => { + // Video traces no longer receive standalone data samples β€” + // pixel data flows via `Video` messages. A stray sample for a + // video trace is a producer bug; log it and ignore. + tracing::warn!( + trace_id = self.identity.trace_id, + "video trace received standalone Data; ignoring" + ); + Ok(()) + } + } + } + + /// Handle one finished NUT chunk: transcode it to per-chunk MP4 segments, + /// append the segment paths to the pending list for the finalise concat, + /// and unlink the source NUT. + #[allow(clippy::too_many_arguments)] + async fn handle_video( + &mut self, + context: &Arc, + chunk_index: u32, + spool_nut: PathBuf, + width: u32, + height: u32, + byte_count: u64, + frame_count: u32, + frame_timestamps_s: Vec, + ) { + let trace_dir = self.trace_directory(context); + let chunks_dir = trace_dir.join(paths::CHUNKS_DIRNAME); + let raw_nut = chunks_dir.join(paths::chunk_filename(chunk_index)); + let lossy_segment = trace_dir.join(paths::chunk_lossy_filename(chunk_index)); + let lossless_segment = trace_dir.join(paths::chunk_lossless_filename(chunk_index)); + + // Allocate the video writer on the first chunk and mark the trace + // `writing` so the registration coordinator can observe lifecycle + // progress. The mark happens once per trace. + let bumped_status = matches!(self.writer, TraceWriterKind::Pending); + if bumped_status { + self.writer = TraceWriterKind::Video { + width, + height, + completed_chunks: BTreeMap::new(), + pending_encodes: JoinSet::new(), + }; + } + + // Drain any background encodes that finished while we were idle. + if self.drain_completed_encodes(context) { + // A previous chunk's encode failed; mark_failed already ran, no + // point spawning more work. + return; + } + + // Sanity-warn on resolution drift β€” the on-disk sidecar uses the + // first-chunk values, so a producer bug shipping a different + // resolution mid-trace would lose pixels silently. + if let TraceWriterKind::Video { + width: stored_width, + height: stored_height, + .. + } = &self.writer + { + if (*stored_width, *stored_height) != (width, height) { + tracing::warn!( + trace_id = self.identity.trace_id, + chunk_index, + stored = ?(*stored_width, *stored_height), + arrived = ?(width, height), + "video chunk resolution disagrees with first-chunk resolution" + ); + } + } + + let TraceWriterKind::Video { + pending_encodes, .. + } = &mut self.writer + else { + // Should be unreachable β€” we just allocated the writer above. + return; + }; + + // Spawn the encode as a background task. The actor returns to the + // inbox immediately so a slow ffmpeg invocation cannot back-pressure + // unrelated joint / scalar publishers sharing the commands service. + let permits = context.ffmpeg_permits.clone(); + let encoder = context.video_encoder.clone(); + let trace_id = self.identity.trace_id.clone(); + let chunks_dir_for_task = chunks_dir.clone(); + let request = ChunkEncodeRequest { + raw_nut: raw_nut.clone(), + lossy_out: lossy_segment.clone(), + lossless_out: lossless_segment.clone(), + }; + pending_encodes.spawn(async move { + // Acquire a permit, then encode. The permit lives only inside + // the task β€” dropping it releases the slot, even on panic. + let permit = match permits.acquire_owned().await { + Ok(permit) => permit, + Err(_) => { + return ChunkEncodeJobResult { + chunk_index, + outcome: Err(VideoEncodeError::Spawn { + binary: std::ffi::OsString::from("ffmpeg"), + source: std::io::Error::other("ffmpeg permit pool closed"), + }), + }; + } + }; + // Relink the producer-spooled NUT into the recording's chunks dir + // here rather than on the dispatcher's routing path. The `rename` + // (and `mkdir`) are filesystem metadata ops that can stall behind an + // ext4 journal commit on the shared spool, so we run them on a + // blocking thread β€” off both the dispatcher and the runtime workers. + let relink = { + let spool = spool_nut.clone(); + let dest = request.raw_nut.clone(); + let chunks = chunks_dir_for_task.clone(); + tokio::task::spawn_blocking(move || relink_nut(&spool, &chunks, &dest)).await + }; + match relink { + Ok(Ok(())) => {} + Ok(Err(source)) => { + return ChunkEncodeJobResult { + chunk_index, + outcome: Err(VideoEncodeError::Io { + path: spool_nut.clone(), + source, + }), + }; + } + Err(join_error) => { + return ChunkEncodeJobResult { + chunk_index, + outcome: Err(VideoEncodeError::Spawn { + binary: std::ffi::OsString::from("relink"), + source: std::io::Error::other(format!( + "relink task join failed: {join_error}" + )), + }), + }; + } + } + let outcome = encoder.encode_chunk(&request).await; + drop(permit); + match outcome { + Ok(encode) => { + // Drop the source NUT chunk now that both segments are + // sealed. Failure to unlink leaves the file for the + // recovery sweep to collect. + if let Err(error) = std::fs::remove_file(&request.raw_nut) { + if error.kind() != std::io::ErrorKind::NotFound { + tracing::warn!( + %error, + trace_id = %trace_id, + chunk_index, + path = %request.raw_nut.display(), + "failed to remove source NUT chunk after encode" + ); + } + } + let segment_bytes = encode.lossy_bytes.saturating_add(encode.lossless_bytes); + tracing::debug!( + trace_id = %trace_id, + chunk_index, + frame_count, + byte_count, + lossy_bytes = encode.lossy_bytes, + lossless_bytes = encode.lossless_bytes, + "video chunk encoded" + ); + ChunkEncodeJobResult { + chunk_index, + outcome: Ok(CompletedChunk { + lossy_segment: request.lossy_out, + lossless_segment: request.lossless_out, + bytes: segment_bytes, + frame_timestamps_s, + frame_count, + }), + } + } + Err(error) => ChunkEncodeJobResult { + chunk_index, + outcome: Err(error), + }, + } + }); + + // Stamp `writing` on the first chunk so the registration coordinator + // sees the trace's lifecycle moving forward without waiting for the + // first encode to complete. + if bumped_status { + context.trace_writer.mark_writing(&self.identity.trace_id); + } + } + + /// Drain every background encode that has already finished. On encode + /// failure marks the trace failed and returns `true`; otherwise returns + /// `false`. Caller-side use: gate further work on the return value. + fn drain_completed_encodes(&mut self, context: &Arc) -> bool { + let TraceWriterKind::Video { + completed_chunks, + pending_encodes, + .. + } = &mut self.writer + else { + return false; + }; + let mut any_failure = false; + let mut new_bytes: u64 = 0; + let mut new_frames: u64 = 0; + while let Some(joined) = pending_encodes.try_join_next() { + match joined { + Ok(result) => match result.outcome { + Ok(completed) => { + new_bytes = new_bytes.saturating_add(completed.bytes); + new_frames = new_frames.saturating_add(completed.frame_count as u64); + completed_chunks.insert(result.chunk_index, completed); + } + Err(error) => { + tracing::warn!( + %error, + trace_id = self.identity.trace_id, + chunk_index = result.chunk_index, + "failed to encode video chunk" + ); + any_failure = true; + } + }, + Err(join_error) => { + tracing::warn!( + %join_error, + trace_id = self.identity.trace_id, + "video encode task join failed" + ); + any_failure = true; + } + } + } + if new_bytes > 0 || new_frames > 0 { + self.bytes_on_disk = self.bytes_on_disk.saturating_add(new_bytes); + self.frame_count = self.frame_count.saturating_add(new_frames); + let bytes_changed = self.bytes_on_disk as i64 != self.last_db_bytes; + if bytes_changed { + context + .trace_writer + .progress(&self.identity.trace_id, self.bytes_on_disk as i64); + self.last_db_bytes = self.bytes_on_disk as i64; + } + } + if any_failure { + self.mark_failed(context); + } + any_failure + } + + async fn finalise_trace(&mut self, context: &Arc) { + let writer = std::mem::replace(&mut self.writer, TraceWriterKind::Pending); + let finalise = self.finalise_writer(writer, context).await; + match finalise { + Ok(total_bytes) => { + self.bytes_on_disk = total_bytes; + context + .trace_writer + .finalise(&self.identity.trace_id, total_bytes as i64); + tracing::info!( + trace_id = self.identity.trace_id, + recording_index = self.identity.key.recording_index, + frame_count = self.frame_count, + dropped_over_budget = self.dropped_over_budget, + total_bytes, + "trace finalised" + ); + if let Some(bus) = context.event_bus.as_ref() { + bus.publish(crate::state::DaemonEvent::TraceWritten { + trace_id: self.identity.trace_id.clone(), + recording_index: self.identity.key.recording_index, + }); + } + } + Err(error) => { + tracing::warn!( + %error, + trace_id = self.identity.trace_id, + "failed to finalise trace artefacts" + ); + self.mark_failed(context); + } + } + } + + async fn finalise_writer( + &self, + writer: TraceWriterKind, + context: &Arc, + ) -> Result { + match writer { + TraceWriterKind::Pending => { + // Empty trace β€” no encoder was ever opened. Leave a single + // empty `trace.json` behind so the artefact set is complete: + // open it on the write-behind thread then finalise it. + let trace_dir = self.trace_directory(context); + context.json_writer.open(&self.identity.trace_id, trace_dir); + Ok(context.json_writer.finish(&self.identity.trace_id).await?) + } + TraceWriterKind::Json => { + Ok(context.json_writer.finish(&self.identity.trace_id).await?) + } + TraceWriterKind::Video { + width, + height, + mut completed_chunks, + mut pending_encodes, + } => { + // Drain every still-running encode. A failure here is + // terminal β€” without a complete chunk set the concat would + // produce a video with a missing range, which is worse than + // marking the trace failed. + while let Some(joined) = pending_encodes.join_next().await { + let result = match joined { + Ok(result) => result, + Err(join_error) => { + return Err(FrameAppendError::VideoEncode(VideoEncodeError::Spawn { + binary: std::ffi::OsString::from("ffmpeg"), + source: std::io::Error::other(format!( + "video encode task join failed: {join_error}" + )), + })) + } + }; + let completed = result.outcome?; + completed_chunks.insert(result.chunk_index, completed); + } + + if completed_chunks.is_empty() { + // The trace allocated a Video writer but every chunk + // failed (or none ever landed) β€” fall back to the empty + // trace.json path so the artefact set isn't missing a + // sidecar entirely. + let trace_dir = self.trace_directory(context); + context.json_writer.open(&self.identity.trace_id, trace_dir); + return Ok(context.json_writer.finish(&self.identity.trace_id).await?); + } + + let trace_dir = self.trace_directory(context); + let lossy_out = trace_dir.join(paths::LOSSY_VIDEO_FILENAME); + let lossless_out = trace_dir.join(paths::LOSSLESS_VIDEO_FILENAME); + + // BTreeMap iteration is sorted by chunk_index, so the concat + // segment lists are guaranteed in producer-arrival order + // regardless of encode completion order. + let lossy_segments: Vec = completed_chunks + .values() + .map(|chunk| chunk.lossy_segment.clone()) + .collect(); + let lossless_segments: Vec = completed_chunks + .values() + .map(|chunk| chunk.lossless_segment.clone()) + .collect(); + + // Build the metadata accumulator in the same chunk-index + // order so per-frame entries appear in capture order. + let mut metadata = VideoMetadataAccumulator::new(); + for chunk in completed_chunks.values() { + for timestamp_s in &chunk.frame_timestamps_s { + let mut entry = serde_json::Map::new(); + entry.insert("timestamp".to_string(), Value::from(*timestamp_s)); + entry.insert("width".to_string(), Value::from(width as u64)); + entry.insert("height".to_string(), Value::from(height as u64)); + metadata.record_frame(entry); + } + } + + // Concat is stream-copy: cheap relative to encode but still + // bounded by an ffmpeg permit so a tail-stitch storm + // doesn't fork-bomb the host. + let permit = context + .ffmpeg_permits + .clone() + .acquire_owned() + .await + .map_err(|_| FrameAppendError::FfmpegPermits)?; + let lossy_outcome = context + .video_encoder + .concat_segments(&lossy_segments, &lossy_out) + .await?; + let lossless_outcome = context + .video_encoder + .concat_segments(&lossless_segments, &lossless_out) + .await?; + drop(permit); + + // Unlink per-chunk segments now that the final outputs are + // sealed. Best-effort: a leftover segment is wasted disk + // space, not a correctness problem. + for segment in lossy_segments.iter().chain(lossless_segments.iter()) { + if let Err(error) = std::fs::remove_file(segment) { + if error.kind() != std::io::ErrorKind::NotFound { + tracing::warn!( + %error, + trace_id = self.identity.trace_id, + path = %segment.display(), + "failed to remove encoded chunk segment after concat" + ); + } + } + } + + // Sidecar metadata is the *last* thing on disk so a partial + // transcode failure leaves a recognisable "no sidecar" + // signature for the recovery sweep. + let metadata_bytes = flush_metadata_blocking(metadata, trace_dir.clone()).await?; + + tracing::debug!( + trace_id = self.identity.trace_id, + chunks_encoded = completed_chunks.len(), + "video trace concatenated" + ); + + Ok(lossy_outcome + .bytes + .saturating_add(lossless_outcome.bytes) + .saturating_add(metadata_bytes)) + } + } + } + + async fn handle_shutdown_without_end(&mut self, context: &Arc) { + self.mark_failed(context); + } + + /// Enqueue a `failed` write for this trace, preserving the latest byte + /// count. Fire-and-forget through the coalescing batcher; the terminal + /// guard in `apply_trace_writes` keeps it from clobbering an + /// already-`written` row. + fn mark_failed(&mut self, context: &Arc) { + context + .trace_writer + .fail(&self.identity.trace_id, self.bytes_on_disk as i64); + } + + /// Tear down the writer and release the trace's disk budget. + /// + /// Called when the parent recording is cancelled. The on-disk artefacts are + /// *not* removed here: the recording reaper deletes the whole recording + /// directory (and the DB rows) together once the cancel has been durably + /// notified to the backend, so it is the single owner of cancelled-recording + /// file removal. The DB row's `write_status` is left untouched here β€” the + /// dispatcher issues a single `cancel_recording` transaction once every + /// actor has exited. + async fn handle_cancel(&mut self, context: &Arc) { + // Discard any open JSON write-behind writer (no-op for video / unopened + // traces) so its file handle is released without finalising, then drop + // the actor-side writer marker. + // Only scalar/JSON traces reserve against the budget on the write path + // (via `budget_allows_frame`). A video trace's `bytes_on_disk` is encoder + // output that was never reserved, so releasing it here would drive the + // estimate below true usage; release only what a JSON trace reserved. + let reserved_json_bytes = matches!(self.writer, TraceWriterKind::Json); + context.json_writer.drop_trace(&self.identity.trace_id); + self.writer = TraceWriterKind::Pending; + if reserved_json_bytes && self.bytes_on_disk > 0 { + context.storage_budget.release(self.bytes_on_disk); + } + self.bytes_on_disk = 0; + self.last_db_bytes = 0; + } + + /// Build the on-disk directory for this trace: + /// `{recordings_root}/{recording_index}/{data_type}/{trace_id}/`. + fn trace_directory(&self, context: &Arc) -> std::path::PathBuf { + TracePath::new( + self.identity.key.recording_index.to_string(), + self.identity.key.data_type.clone(), + self.identity.trace_id.clone(), + ) + .directory(context.recordings_root.as_path()) + } +} + +/// Errors that can surface while appending or finalising a frame. The variants +/// are unified so `handle_data` / `finalise_trace` can log + mark-failed in +/// one place regardless of which writer raised. +#[derive(Debug, thiserror::Error)] +enum FrameAppendError { + #[error("trace writer not open")] + WriterNotOpen, + #[error("ffmpeg permit pool closed before transcode could start")] + FfmpegPermits, + #[error(transparent)] + Json(#[from] JsonTraceError), + #[error(transparent)] + VideoEncode(#[from] VideoEncodeError), + #[error(transparent)] + Metadata(#[from] MetadataError), +} + +/// Flush the in-memory metadata accumulator to `trace.json` on a blocking +/// thread. +async fn flush_metadata_blocking( + metadata: VideoMetadataAccumulator, + output_dir: std::path::PathBuf, +) -> Result { + let path_for_error = output_dir.clone(); + let handle = task::spawn_blocking(move || metadata.finish(&output_dir)); + match handle.await { + Ok(result) => Ok(result?), + Err(join_error) => Err(FrameAppendError::Metadata(MetadataError::Write { + path: path_for_error, + source: std::io::Error::other(format!("metadata flush join failed: {join_error}")), + })), + } +} + +/// Relink a producer-spooled NUT into the recording's chunks directory. +/// Prefers an atomic rename (same filesystem); falls back to copy + remove. +/// Blocking β€” the actor runs it via `spawn_blocking` inside the background +/// encode task so the rename can't stall the dispatcher or a runtime worker. +fn relink_nut( + src: &std::path::Path, + chunks_dir: &std::path::Path, + dest: &std::path::Path, +) -> std::io::Result<()> { + std::fs::create_dir_all(chunks_dir)?; + match std::fs::rename(src, dest) { + Ok(()) => Ok(()), + Err(_) => { + std::fs::copy(src, dest)?; + let _ = std::fs::remove_file(src); + Ok(()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::state::{SqliteStateStore, StateStore, TraceWriteStatus}; + use crate::storage::budget::StoragePolicy; + use serde_json::json; + use std::time::Duration; + use tempfile::TempDir; + + /// Build an actor context whose write-behind flushes into `store`. The + /// [`TraceEventDatabaseWriter`] owner is dropped β€” the spawned task stays alive while the + /// handle in the returned context lives (dropping its `JoinHandle` detaches, + /// not cancels). Tests call `context.trace_writer.flush().await` before + /// asserting on the DB, since actor writes are now fire-and-forget. + fn test_context( + root: &std::path::Path, + store: Arc, + ) -> Arc { + let policy = StoragePolicy { + storage_limit_bytes: None, + min_free_disk_bytes: 0, + refresh_interval: Duration::from_secs(60), + }; + let budget = Arc::new(StorageBudget::new(root, policy)); + let (trace_writer, _writer_owner) = crate::state::trace_event_database_writer::spawn(store); + let (json_writer, _json_owner) = crate::pipeline::json_writer::spawn(); + Arc::new(TraceActorContext::new( + root.to_path_buf(), + budget, + VideoEncoder::new(), + trace_writer, + json_writer, + )) + } + + fn identity(recording_index: i64, trace_id: &str, data_type: &str) -> TraceIdentity { + TraceIdentity { + trace_id: trace_id.to_string(), + key: TraceKey { + recording_index, + data_type: data_type.to_string(), + sensor_name: None, + }, + } + } + + fn ffmpeg_available() -> bool { + std::process::Command::new("ffmpeg") + .arg("-version") + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status() + .map(|status| status.success()) + .unwrap_or(false) + } + + #[test] + fn scalar_fallback_entry_wraps_non_json_payload() { + let entry = crate::pipeline::json_writer::scalar_fallback_entry(123, &[0xFF, 0xFE]); + assert_eq!(entry, json!({"timestamp_ns": 123, "payload_len": 2})); + } + + #[tokio::test] + async fn json_trace_writes_array_on_finalise() { + let tempdir = TempDir::new().unwrap(); + let store = SqliteStateStore::open(&tempdir.path().join("state.db")) + .await + .expect("open store"); + let store_arc = Arc::new(store.clone()); + let context = test_context(&tempdir.path().join("recordings"), store_arc.clone()); + + let mut state = ActorState::new(identity(7, "trace-1", "joints")); + state.send_create(&context); + for index in 0..3i64 { + let payload = serde_json::to_vec(&json!({"i": index})).unwrap(); + state + .handle_data(&context, index * 1_000_000, None, payload) + .await; + } + state.finalise_trace(&context).await; + context.trace_writer.flush().await; + + let trace_dir = + TracePath::new("7", "joints", "trace-1").directory(context.recordings_root.as_path()); + let bytes = std::fs::read(trace_dir.join("trace.json")).unwrap(); + let parsed: Value = serde_json::from_slice(&bytes).unwrap(); + assert_eq!(parsed, json!([{"i": 0}, {"i": 1}, {"i": 2}])); + + let trace = store + .get_trace("trace-1") + .await + .expect("get trace") + .expect("trace exists"); + assert_eq!(trace.write_status, TraceWriteStatus::Written); + assert_eq!(trace.recording_index, 7); + assert_eq!(trace.total_bytes as u64, bytes.len() as u64); + } + + #[tokio::test] + async fn empty_trace_still_produces_valid_json_array() { + let tempdir = TempDir::new().unwrap(); + let store = SqliteStateStore::open(&tempdir.path().join("state.db")) + .await + .expect("open store"); + let store_arc = Arc::new(store.clone()); + let context = test_context(&tempdir.path().join("recordings"), store_arc.clone()); + + let mut state = ActorState::new(identity(1, "trace-1", "joints")); + state.send_create(&context); + state.finalise_trace(&context).await; + context.trace_writer.flush().await; + + let trace_dir = + TracePath::new("1", "joints", "trace-1").directory(context.recordings_root.as_path()); + let bytes = std::fs::read(trace_dir.join("trace.json")).unwrap(); + assert_eq!(bytes, b"[]"); + + let trace = store + .get_trace("trace-1") + .await + .expect("get trace") + .expect("trace exists"); + assert_eq!(trace.write_status, TraceWriteStatus::Written); + } + + #[tokio::test] + async fn video_chunks_concat_on_finalise() { + if !ffmpeg_available() { + eprintln!("ffmpeg not on PATH β€” skipping video trace_actor test."); + return; + } + + let tempdir = TempDir::new().unwrap(); + let store = SqliteStateStore::open(&tempdir.path().join("state.db")) + .await + .expect("open store"); + let store_arc = Arc::new(store.clone()); + let context = test_context(&tempdir.path().join("recordings"), store_arc.clone()); + + let mut state = ActorState::new(identity(1, "trace-vid", "RGB")); + state.send_create(&context); + + // Build two NUT chunks via ffmpeg testsrc in a spool location; the actor + // relinks each into the recording's chunks dir before transcoding, just + // as it does for a producer-spooled chunk in production. + let trace_dir = + TracePath::new("1", "RGB", "trace-vid").directory(context.recordings_root.as_path()); + let chunks_dir = trace_dir.join(paths::CHUNKS_DIRNAME); + let spool_dir = tempdir.path().join("spool"); + std::fs::create_dir_all(&spool_dir).unwrap(); + + for chunk_index in 0..2u32 { + let spool_nut = spool_dir.join(format!("chunk_{chunk_index}.nut")); + let status = std::process::Command::new("ffmpeg") + .args([ + "-y", + "-hide_banner", + "-loglevel", + "error", + "-f", + "lavfi", + "-i", + ]) + .arg("testsrc=duration=4:size=16x16:rate=1") + .args(["-c:v", "rawvideo", "-pix_fmt", "rgb24", "-f", "nut"]) + .arg(&spool_nut) + .status() + .expect("synth status"); + assert!(status.success(), "synth NUT failed"); + + let byte_count = spool_nut.metadata().unwrap().len(); + let frame_timestamps_s: Vec = + (0..4u32).map(|i| (chunk_index * 4 + i) as f64).collect(); + state + .handle_video( + &context, + chunk_index, + spool_nut, + 16, + 16, + byte_count, + 4, + frame_timestamps_s, + ) + .await; + } + + state.finalise_trace(&context).await; + context.trace_writer.flush().await; + + assert!(trace_dir.join(paths::LOSSY_VIDEO_FILENAME).exists()); + assert!(trace_dir.join(paths::LOSSLESS_VIDEO_FILENAME).exists()); + assert!(trace_dir.join(paths::TRACE_JSON_FILENAME).exists()); + for chunk_index in 0..2u32 { + assert!(!chunks_dir.join(paths::chunk_filename(chunk_index)).exists()); + } + + let trace = store + .get_trace("trace-vid") + .await + .expect("get trace") + .expect("trace exists"); + assert_eq!(trace.write_status, TraceWriteStatus::Written); + assert!(trace.total_bytes > 0); + } +} diff --git a/rust/data_daemon/src/state/events.rs b/rust/data_daemon/src/state/events.rs new file mode 100644 index 000000000..ab881b23a --- /dev/null +++ b/rust/data_daemon/src/state/events.rs @@ -0,0 +1,166 @@ +//! Broadcast event bus driving cross-actor coordination. +//! +//! A `tokio::sync::broadcast::channel(256)` whose subscribers are the +//! dispatcher, registration coordinator, upload coordinator, status updater, +//! and progress reporter. + +use tokio::sync::broadcast; + +/// Default capacity of the daemon event channel. +/// +/// Capacity chosen so bursts of cross-actor events fit without lagging slow +/// subscribers; a lagging subscriber gets [`broadcast::error::RecvError::Lagged`] +/// and re-reads state on its next tick. +pub const EVENT_BUS_CAPACITY: usize = 256; + +/// Connection state reported by the network monitor. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ConnectionState { + /// Backend reachable. + Up, + /// Backend unreachable; uploaders pause until the next `Up` transition. + Down, +} + +/// Events the daemon's coordinator tasks react to. +/// +/// All payloads are owned `String`/`Copy` types so events can be cloned cheaply +/// across `broadcast` receivers without holding any backing buffer alive. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DaemonEvent { + /// A recording was opened by the producer. The recording row (and its + /// `recording_index`) already exists by the time this fires; the + /// recording-start notifier reacts by POSTing `/recording/start`. + RecordingStarted { + /// Local recording index the event applies to. + recording_index: i64, + }, + /// The recording-start notifier persisted the backend cloud `recording_id` + /// (via `/recording/start`). Fires once per recording, the moment its cloud + /// id becomes available. Coordinators that were waiting on the id β€” notably + /// the stop notifier, when a recording was stopped while offline before its + /// start had been notified β€” react to it without polling. + RecordingCloudIdAssigned { + /// Local recording index the event applies to. + recording_index: i64, + }, + /// A trace finished writing to local disk and is ready for registration. + TraceWritten { + /// Trace identifier the event applies to. + trace_id: String, + /// Parent recording's local index. + recording_index: i64, + }, + /// A trace was successfully registered with the backend. + TraceRegistered { + /// Trace identifier the event applies to. + trace_id: String, + /// Parent recording's local index. + recording_index: i64, + }, + /// Registration completed and the trace is queued for upload. + ReadyForUpload { + /// Trace identifier the event applies to. + trace_id: String, + /// Parent recording's local index. + recording_index: i64, + }, + /// A trace has finished uploading. + UploadComplete { + /// Trace identifier the event applies to. + trace_id: String, + /// Parent recording's local index. + recording_index: i64, + }, + /// A trace's upload progressed by some number of bytes (used to drive the + /// debounced status updater). + UploadProgress { + /// Trace identifier the event applies to. + trace_id: String, + /// Parent recording's local index. + recording_index: i64, + /// Bytes uploaded so far. + bytes_uploaded: i64, + /// Total bytes once finalised; reported when known. + total_bytes: Option, + }, + /// A recording was stopped by the producer. + RecordingStopped { + /// Local recording index the event applies to. + recording_index: i64, + }, + /// A recording was cancelled by the producer. The dispatcher publishes + /// this after every per-trace actor for the recording has been torn + /// down, the on-disk artefacts have been deleted, and the recording's + /// `cancelled_at` has been stamped. + RecordingCancelled { + /// Local recording index the event applies to. + recording_index: i64, + }, + /// Connection state to the backend changed. + ConnectionStateChanged(ConnectionState), +} + +/// Owns the sender end of the broadcast channel and hands out subscribers. +/// +/// Clone the bus to share the sender across tasks; clone the receiver via +/// [`subscribe`](Self::subscribe). +#[derive(Clone)] +pub struct EventBus { + sender: broadcast::Sender, +} + +impl EventBus { + /// Create a new bus with the default [`EVENT_BUS_CAPACITY`]. + pub fn new() -> Self { + let (sender, _) = broadcast::channel(EVENT_BUS_CAPACITY); + Self { sender } + } + + /// Subscribe to events. The returned receiver only sees events published + /// *after* it was created β€” replay is intentionally not supported. + pub fn subscribe(&self) -> broadcast::Receiver { + self.sender.subscribe() + } + + /// Publish an event. Returns the number of active receivers reached, or + /// zero when no task is currently subscribed. + pub fn publish(&self, event: DaemonEvent) -> usize { + self.sender.send(event).unwrap_or(0) + } +} + +impl Default for EventBus { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn publish_reaches_each_subscriber() { + let bus = EventBus::new(); + let mut first = bus.subscribe(); + let mut second = bus.subscribe(); + + let event = DaemonEvent::TraceWritten { + trace_id: "trace-1".to_string(), + recording_index: 1, + }; + let delivered = bus.publish(event.clone()); + assert_eq!(delivered, 2); + + assert_eq!(first.recv().await.unwrap(), event); + assert_eq!(second.recv().await.unwrap(), event); + } + + #[test] + fn publish_with_no_subscribers_is_zero() { + let bus = EventBus::new(); + let delivered = bus.publish(DaemonEvent::RecordingStopped { recording_index: 2 }); + assert_eq!(delivered, 0); + } +} diff --git a/rust/data_daemon/src/state/mod.rs b/rust/data_daemon/src/state/mod.rs new file mode 100644 index 000000000..fd3b6fbae --- /dev/null +++ b/rust/data_daemon/src/state/mod.rs @@ -0,0 +1,24 @@ +//! Daemon state management: SQLite persistence and the broadcast event bus. +//! +//! Defines the [`StateStore`] trait, its [`SqliteStateStore`] implementation, +//! and the [`DaemonEvent`] broadcast bus that the dispatcher, trace actors, +//! and cloud coordinators subscribe to. + +pub mod events; +pub mod schema; +pub mod store; +pub mod trace_event_database_writer; + +#[allow(unused_imports)] +pub use events::{ConnectionState, DaemonEvent, EventBus}; +#[allow(unused_imports)] +pub use schema::{ + ProgressReportStatus, RecordingRow, TraceErrorCode, TraceRecord, TraceRegistrationStatus, + TraceUploadStatus, TraceWriteStatus, +}; +#[allow(unused_imports)] +pub use store::{ + CoalescedTraceWrite, NewRecording, SqliteStateStore, StateStore, StateStoreError, TraceUpdate, +}; +#[allow(unused_imports)] +pub use trace_event_database_writer::{TraceEventDatabaseWriter, TraceWriteHandle}; diff --git a/rust/data_daemon/src/state/schema.rs b/rust/data_daemon/src/state/schema.rs new file mode 100644 index 000000000..6d118cafb --- /dev/null +++ b/rust/data_daemon/src/state/schema.rs @@ -0,0 +1,346 @@ +//! Strongly-typed rows and lifecycle enums for the daemon's SQLite tables. +//! +//! The enum string values are part of the integration-test contract: the +//! constants in `tests/integration/platform/data_daemon/shared/db_constants.py` +//! pin the string columns, so the stored spellings must stay stable. + +use std::str::FromStr; + +use chrono::NaiveDateTime; +use serde::{Deserialize, Serialize}; +use sqlx::Row; +use thiserror::Error; + +macro_rules! string_enum { + ( + $(#[$meta:meta])* + $vis:vis enum $name:ident { + $( + $(#[$variant_meta:meta])* + $variant:ident => $value:literal, + )+ + } + ) => { + $(#[$meta])* + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] + $vis enum $name { + $( + $(#[$variant_meta])* + $variant, + )+ + } + + impl $name { + /// Wire-format string used in the SQLite column. + pub fn as_str(self) -> &'static str { + match self { + $( + Self::$variant => $value, + )+ + } + } + } + + impl std::fmt::Display for $name { + fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str(self.as_str()) + } + } + + impl FromStr for $name { + type Err = ParseEnumError; + fn from_str(value: &str) -> Result { + match value { + $( + $value => Ok(Self::$variant), + )+ + other => Err(ParseEnumError { + kind: stringify!($name), + value: other.to_string(), + }), + } + } + } + }; +} + +/// Returned when a status string read from SQLite does not match any known +/// variant. Surfaced as a `StateStoreError::Decode` by the store +/// implementation. +#[derive(Debug, Clone, Error)] +#[error("invalid {kind} value: '{value}'")] +pub struct ParseEnumError { + /// Enum type name that failed to parse. + pub kind: &'static str, + /// Offending column value. + pub value: String, +} + +string_enum! { + /// Write/persistence lifecycle for a trace. + /// + /// Matches `TraceWriteStatus` in `neuracore/data_daemon/models.py`. + pub enum TraceWriteStatus { + Pending => "pending", + Initializing => "initializing", + Writing => "writing", + PendingMetadata => "pending_metadata", + Written => "written", + Failed => "failed", + } +} + +string_enum! { + /// Backend registration lifecycle for a trace. + pub enum TraceRegistrationStatus { + Pending => "pending", + Registering => "registering", + Registered => "registered", + Retrying => "retrying", + Failed => "failed", + } +} + +string_enum! { + /// Upload lifecycle for a trace. + pub enum TraceUploadStatus { + Pending => "pending", + Queued => "queued", + Uploading => "uploading", + Paused => "paused", + Uploaded => "uploaded", + Retrying => "retrying", + Failed => "failed", + } +} + +string_enum! { + /// Standardised error codes for trace failures. + pub enum TraceErrorCode { + Unknown => "unknown", + WriteFailed => "write_failed", + EncodeFailed => "encode_failed", + UploadFailed => "upload_failed", + DiskFull => "disk_full", + NetworkError => "network_error", + ProgressReportError => "progress_report_error", + RecordingCancelled => "recording_cancelled", + } +} + +string_enum! { + /// Status of progress report for a recording. + pub enum ProgressReportStatus { + Pending => "pending", + Reporting => "reporting", + Reported => "reported", + } +} + +/// A row from the `recordings` table. +/// +/// The daemon owns recording identity: `recording_index` is the local primary +/// key (AUTOINCREMENT), allocated when the `StartRecording` envelope is first +/// seen; `recording_id` is the cloud handle, filled asynchronously by the +/// recording-start notifier once `/recording/start` lands. The two are +/// independent β€” never aliased. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RecordingRow { + /// Local primary key (AUTOINCREMENT). The daemon keys every internal + /// structure and the `traces` foreign key on this. + pub recording_index: i64, + /// Cloud handle. `None` until `/recording/start` is notified. Every cloud + /// URL reads this straight from the row; downstream coordinators wait for + /// it, so an offline recording stays pending until the daemon is online. + pub recording_id: Option, + /// Robot identifier β€” first half of the source key. + pub robot_id: Option, + /// Robot instance β€” second half of the source key. + pub robot_instance: Option, + /// Dataset identifier, when supplied. + pub dataset_id: Option, + /// Producer capture-clock window lower bound (Unix nanoseconds). + pub start_timestamp_ns: Option, + /// Producer capture-clock window upper bound (Unix nanoseconds). + pub stop_timestamp_ns: Option, + /// Expected number of traces, set when the producer declares it. + pub expected_trace_count: Option, + /// `1` once the expected trace count has been reported to the backend. + pub expected_trace_count_reported: i64, + /// Progress-report lifecycle for this recording. + pub progress_reported: ProgressReportStatus, + /// Set when the producer issues a stop command. + pub stopped_at: Option, + /// Set when the producer issues a cancel command. Cancelled recordings + /// are ignored by the cloud coordinators and skipped by the progress + /// reporter. + pub cancelled_at: Option, + /// Set when the recording-start notifier successfully POSTed + /// `/recording/start` and persisted the cloud `recording_id`. + pub backend_start_notified_at: Option, + /// Set when the recording-stop notifier successfully POSTed + /// `/recording/stop` to the backend. `None` means the backend has not + /// yet been notified β€” typically a recording stopped while the daemon + /// was offline; the notifier sweeps these on startup. + pub backend_stop_notified_at: Option, + /// Set when the recording-cancel notifier successfully POSTed + /// `/recording/cancel` to the backend. `None` means either the recording + /// was never cancelled, or cancellation has not yet been notified. + pub backend_cancel_notified_at: Option, + /// First-seen timestamp. + pub created_at: NaiveDateTime, + /// Last write timestamp; bumped on every row mutation. + pub last_updated: NaiveDateTime, +} + +impl RecordingRow { + /// Decode a SQLite row into a [`RecordingRow`]. + pub(crate) fn from_row(row: &sqlx::sqlite::SqliteRow) -> Result { + let progress_reported = parse_column::(row, "progress_reported")?; + Ok(RecordingRow { + recording_index: row.try_get("recording_index")?, + recording_id: row.try_get("recording_id")?, + robot_id: row.try_get("robot_id")?, + robot_instance: row.try_get("robot_instance")?, + dataset_id: row.try_get("dataset_id")?, + start_timestamp_ns: row.try_get("start_timestamp_ns")?, + stop_timestamp_ns: row.try_get("stop_timestamp_ns")?, + expected_trace_count: row.try_get("expected_trace_count")?, + expected_trace_count_reported: row.try_get("expected_trace_count_reported")?, + progress_reported, + stopped_at: row.try_get("stopped_at")?, + cancelled_at: row.try_get("cancelled_at")?, + backend_start_notified_at: row.try_get("backend_start_notified_at")?, + backend_stop_notified_at: row.try_get("backend_stop_notified_at")?, + backend_cancel_notified_at: row.try_get("backend_cancel_notified_at")?, + created_at: row.try_get("created_at")?, + last_updated: row.try_get("last_updated")?, + }) + } +} + +/// A row from the `traces` table. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TraceRecord { + /// Primary key (daemon-minted UUID). + pub trace_id: String, + /// Parent recording (local `recording_index`). + pub recording_index: i64, + /// Write lifecycle. + pub write_status: TraceWriteStatus, + /// Registration lifecycle. + pub registration_status: TraceRegistrationStatus, + /// Upload lifecycle. + pub upload_status: TraceUploadStatus, + /// Wire data-type label (e.g. `"video"`); free-form string carried verbatim. + pub data_type: Option, + /// Producer-supplied data-type name. + pub data_type_name: Option, + /// Filesystem path to the on-disk artefact. + pub path: Option, + /// Bytes written so far. + pub bytes_written: i64, + /// Total bytes once finalised (`0` while in progress). + pub total_bytes: i64, + /// Bytes uploaded so far. + pub bytes_uploaded: i64, + /// Latest error code, if any. + pub error_code: Option, + /// Latest error message, if any. + pub error_message: Option, + /// JSON-encoded `{filepath: session_uri}` map populated by the + /// registration coordinator. The uploader reads this back on + /// `ReadyForUpload` and dispatches one resumable upload per entry. + pub upload_session_uris: Option, + /// First-seen timestamp. + pub created_at: NaiveDateTime, + /// Last write timestamp; bumped on every row mutation. + pub last_updated: NaiveDateTime, +} + +impl TraceRecord { + /// Decode a SQLite row into a [`TraceRecord`]. + pub(crate) fn from_row(row: &sqlx::sqlite::SqliteRow) -> Result { + let write_status = parse_column::(row, "write_status")?; + let registration_status = + parse_column::(row, "registration_status")?; + let upload_status = parse_column::(row, "upload_status")?; + let error_code = row + .try_get::, _>("error_code")? + .map(|raw| { + TraceErrorCode::from_str(&raw).map_err(|error| decode_error("error_code", error)) + }) + .transpose()?; + Ok(TraceRecord { + trace_id: row.try_get("trace_id")?, + recording_index: row.try_get("recording_index")?, + write_status, + registration_status, + upload_status, + data_type: row.try_get("data_type")?, + data_type_name: row.try_get("data_type_name")?, + path: row.try_get("path")?, + bytes_written: row.try_get("bytes_written")?, + total_bytes: row.try_get("total_bytes")?, + bytes_uploaded: row.try_get("bytes_uploaded")?, + error_code, + error_message: row.try_get("error_message")?, + upload_session_uris: row.try_get("upload_session_uris")?, + created_at: row.try_get("created_at")?, + last_updated: row.try_get("last_updated")?, + }) + } +} + +fn parse_column(row: &sqlx::sqlite::SqliteRow, column: &'static str) -> Result +where + T: FromStr, +{ + let raw: String = row.try_get(column)?; + T::from_str(&raw).map_err(|error| decode_error(column, error)) +} + +fn decode_error(column: &'static str, error: ParseEnumError) -> sqlx::Error { + sqlx::Error::ColumnDecode { + index: column.to_string(), + source: Box::new(error), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn enum_round_trip_through_str() { + for status in [ + TraceWriteStatus::Pending, + TraceWriteStatus::Initializing, + TraceWriteStatus::Writing, + TraceWriteStatus::PendingMetadata, + TraceWriteStatus::Written, + TraceWriteStatus::Failed, + ] { + assert_eq!(TraceWriteStatus::from_str(status.as_str()).unwrap(), status); + } + } + + #[test] + fn enum_string_values_match_python() { + // Spot-check a few values that integration tests assert on. + assert_eq!( + TraceWriteStatus::PendingMetadata.as_str(), + "pending_metadata" + ); + assert_eq!(TraceUploadStatus::Uploaded.as_str(), "uploaded"); + assert_eq!(ProgressReportStatus::Reported.as_str(), "reported"); + assert_eq!(TraceErrorCode::DiskFull.as_str(), "disk_full"); + } + + #[test] + fn parse_rejects_unknown_value() { + let error = TraceUploadStatus::from_str("bogus").unwrap_err(); + assert_eq!(error.value, "bogus"); + assert_eq!(error.kind, "TraceUploadStatus"); + } +} diff --git a/rust/data_daemon/src/state/store.rs b/rust/data_daemon/src/state/store.rs new file mode 100644 index 000000000..4ea9275c4 --- /dev/null +++ b/rust/data_daemon/src/state/store.rs @@ -0,0 +1,2312 @@ +//! SQLite-backed implementation of the daemon's [`StateStore`]. +//! +//! The persistence layer: schema migration via `sqlx`, WAL pragmas on every +//! connection, and the CRUD operations the per-trace actors and registration +//! coordinator rely on. + +use std::path::{Path, PathBuf}; + +use async_trait::async_trait; +use chrono::Utc; +use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, SqliteSynchronous}; +use sqlx::{ConnectOptions, SqliteConnection, SqlitePool}; +use thiserror::Error; + +use crate::state::schema::{ + ProgressReportStatus, RecordingRow, TraceErrorCode, TraceRecord, TraceRegistrationStatus, + TraceUploadStatus, TraceWriteStatus, +}; + +/// Embedded migrations, applied on every [`SqliteStateStore::open`]. +static MIGRATOR: sqlx::migrate::Migrator = sqlx::migrate!("./migrations"); + +/// Busy timeout (`PRAGMA busy_timeout`) applied to every connection. +/// +/// With the read/write pool split only one connection ever writes, but a +/// background WAL checkpoint or an external reader can still briefly hold the +/// database lock; 5 s comfortably absorbs those transient holds without masking +/// a genuine deadlock. +const BUSY_TIMEOUT_MS: u32 = 5000; + +/// Connections in the read pool. Only the cloud coordinators (a handful of +/// periodic tasks) read concurrently, so a small pool is ample; the writer +/// lives on its own single-connection pool and never competes for these. +const READ_POOL_CONNECTIONS: u32 = 4; + +/// Errors surfaced by [`StateStore`] operations. +#[derive(Debug, Error)] +pub enum StateStoreError { + /// Wrapped `sqlx` error from a query or migration. + #[error(transparent)] + Sqlx(#[from] sqlx::Error), + /// Wrapped migration error. + #[error(transparent)] + Migration(#[from] sqlx::migrate::MigrateError), + /// Failed to create the SQLite parent directory. + #[error("failed to prepare state directory {path}: {source}")] + Io { + /// Directory whose creation failed. + path: PathBuf, + /// Underlying I/O error. + #[source] + source: std::io::Error, + }, +} + +/// Parameters for inserting a new recording row. +/// +/// The daemon supplies the source identity and metadata from the +/// `StartRecording` envelope; the store allocates the `recording_index`. +#[derive(Debug, Clone, Default)] +pub struct NewRecording<'a> { + /// Robot identifier β€” first half of the source key. + pub robot_id: Option<&'a str>, + /// Robot instance β€” second half of the source key. + pub robot_instance: Option, + /// Dataset identifier. + pub dataset_id: Option<&'a str>, + /// Producer capture-clock window lower bound (Unix nanoseconds). + pub start_timestamp_ns: i64, +} + +/// Persistence interface for daemon state. +/// +/// Covers the operations the dispatcher, per-trace actors, and cloud +/// coordinators need: recording / trace lifecycle transitions, upload +/// bookkeeping, and reconciliation queries. Recordings are keyed by the local +/// `recording_index` the store allocates; the cloud `recording_id` is a +/// separate, nullable column filled asynchronously. +#[async_trait] +pub trait StateStore: Send + Sync { + /// Insert a new recording row, allocating its `recording_index`, and + /// return it. Each `StartRecording` envelope opens a distinct recording, + /// so this always inserts (never upserts). + async fn create_recording( + &self, + new: NewRecording<'_>, + ) -> Result; + + /// Fetch a recording by its local index, returning `None` when absent. + async fn get_recording( + &self, + recording_index: i64, + ) -> Result, StateStoreError>; + + /// Return the most recently created recordings for a source, ordered by + /// `recording_index` ascending. Used by the recovery sweep and the + /// integration tests to correlate a recorded session to its DB rows. + #[allow(dead_code)] + async fn recordings_for_source( + &self, + robot_id: &str, + robot_instance: i64, + ) -> Result, StateStoreError>; + + /// Stamp the cloud `recording_id` **and** `backend_start_notified_at` + /// after the recording-start notifier successfully POSTed + /// `/recording/start`. Idempotent. + async fn mark_recording_start_notified( + &self, + recording_index: i64, + recording_id: &str, + ) -> Result, StateStoreError>; + + /// List recordings whose `/recording/start` POST has not yet succeeded: + /// `recording_id IS NULL`, `backend_start_notified_at IS NULL`, and the + /// recording is not cancelled. The start notifier's startup sweep. + async fn recordings_pending_start_notify(&self) -> Result, StateStoreError>; + + /// Insert a trace row in the [`TraceWriteStatus::Initializing`] state under + /// an existing recording. Idempotent on `trace_id`. + #[allow(dead_code)] + async fn create_trace( + &self, + recording_index: i64, + trace_id: &str, + data_type: Option<&str>, + data_type_name: Option<&str>, + ) -> Result; + + /// Apply a partial update to an existing trace. + /// + /// Only set fields are written; unset fields preserve their existing value. + /// Returns `Ok(())` whether or not a row matched (a missing `trace_id` is a + /// no-op `UPDATE`). + async fn update_trace( + &self, + trace_id: &str, + update: TraceUpdate, + ) -> Result<(), StateStoreError>; + + /// Fetch a trace by ID, returning `None` when absent. + async fn get_trace(&self, trace_id: &str) -> Result, StateStoreError>; + + /// Return all traces for the given recording, ordered by `created_at`. + async fn list_traces_for_recording( + &self, + recording_index: i64, + ) -> Result, StateStoreError>; + + /// Claim up to `limit` traces in [`TraceWriteStatus::Written`] / + /// [`TraceRegistrationStatus::Pending`] for registration. + /// + /// Traces are eligible immediately when at least `limit` are ready (size + /// trigger) or when their `last_updated` is older than `max_wait_secs` + /// (age trigger) β€” the registration coordinator's debounce policy. + /// + /// Claimed rows are transitioned to + /// [`TraceRegistrationStatus::Registering`] atomically inside a single + /// transaction so two coordinators cannot double-claim. + async fn claim_traces_for_registration( + &self, + limit: usize, + max_wait_secs: f64, + ) -> Result, StateStoreError>; + + /// Mark a recording as stopped, setting `stopped_at` (wall clock) and + /// `stop_timestamp_ns` (producer capture clock). + /// + /// Idempotent: re-stopping a recording that already has a `stopped_at` + /// leaves the existing timestamps untouched so a duplicate `StopRecording` + /// envelope does not slide the window forward. + async fn mark_recording_stopped( + &self, + recording_index: i64, + stop_timestamp_ns: i64, + ) -> Result; + + /// Stamp `backend_stop_notified_at = now` after the recording-stop + /// notifier successfully POSTed `/recording/stop`. Idempotent: a second + /// call leaves the existing timestamp untouched. + async fn mark_recording_stop_notified( + &self, + recording_index: i64, + ) -> Result, StateStoreError>; + + /// Stamp `backend_cancel_notified_at = now` after the recording-cancel + /// notifier successfully POSTed `/recording/cancel`. Idempotent. + async fn mark_recording_cancel_notified( + &self, + recording_index: i64, + ) -> Result, StateStoreError>; + + /// List recordings that have a cloud `recording_id` AND `cancelled_at IS + /// NOT NULL` but whose backend cancel notification has not yet been + /// delivered. Used by the recording-cancel notifier's startup sweep. + async fn recordings_pending_cancel_notify(&self) -> Result, StateStoreError>; + + /// List recordings that have been stopped, have a cloud `recording_id`, + /// but whose backend `/recording/stop` notification has not yet been + /// delivered. Skips cancelled recordings and recordings whose `/start` was + /// never notified (a NULL `recording_id` means there is nothing to stop + /// server-side; the start notifier fills it first, then this sweep fires). + async fn recordings_pending_stop_notify(&self) -> Result, StateStoreError>; + + /// List earlier recordings for `(robot_id, robot_instance)` that are still + /// *pending* on the backend: they have a cloud `recording_id` (so they were + /// opened server-side), are resolved locally (`cancelled_at` or `stopped_at` + /// is set), but have not yet had their backend cancel/stop notification + /// delivered. The backend dedupes pending recordings per robot instance β€” + /// it returns the existing pending recording for an instance instead of + /// minting a new one β€” so these must be closed server-side before the next + /// recording's `/recording/start`, or that start reuses their cloud id. + /// Restricted to `recording_index < before_index`, ordered oldest first. + async fn recordings_pending_backend_resolution_for_source( + &self, + robot_id: &str, + robot_instance: i64, + before_index: i64, + ) -> Result, StateStoreError>; + + /// List every recording row currently in the DB. + /// + /// Used by the progress reporter to discover stopped recordings whose + /// traces have all finished uploading. Returned in `created_at` order. + async fn list_recordings(&self) -> Result, StateStoreError>; + + /// List the `trace_id`s of every trace currently ready to upload + /// (`upload_status` is `queued` or `retrying`). + /// + /// Server-side filtered via `idx_traces_upload_status` so the uploader's + /// post-completion rescan does not walk every recording's full trace set + /// (an O(recordings Γ— traces) N+1 scan after each completed upload). + async fn traces_ready_for_upload(&self) -> Result, StateStoreError>; + + /// Promote every trace that is now both `registered` and `written` (and not + /// already queued/uploading/uploaded) to `upload_status = queued`, returning + /// the `(trace_id, recording_index)` of each one this call transitioned. + /// + /// Idempotent (each trace promotes at most once) and meant to be run on + /// every registration drain: with pre-registration, registration can land + /// before the trace's bytes are written, and `TraceWritten` is published + /// before the write-behind batcher commits `write_status = written`. Running + /// this on the periodic tick is the safety net that promotes such a trace + /// once the write finally commits. + async fn promote_ready_traces_to_queued(&self) -> Result, StateStoreError>; + + /// List recordings the progress reporter still has work for: stopped, not + /// cancelled, with a cloud id, and not yet fully reported (either the + /// progress report or the expected-trace-count PUT is still outstanding). + /// + /// Server-side filtered so fully-settled recordings drop out of the + /// reporter's periodic sweep instead of being scanned (and their traces + /// re-fetched) forever. Returned in `created_at` order. + async fn recordings_pending_progress(&self) -> Result, StateStoreError>; + + /// List recordings the reaper can reclaim *now*: either cancelled with the + /// backend cancel notified, or stopped + stop-notified + progress-reported + /// with every declared trace uploaded (expected count met, none non- + /// `uploaded`). The per-trace "all uploaded" test is folded into the query + /// so the reaper neither walks every recording nor re-fetches the traces of + /// a recording stuck on a permanently-failed upload on every 60 s sweep. + /// Returned in `created_at` order. + async fn recordings_pending_reclaim(&self) -> Result, StateStoreError>; + + /// Resolve the cloud `recording_id` for the recording identified by + /// `(robot_id, robot_instance, start_timestamp_ns)`. + /// + /// Backs the `queries` IPC service: the SDK asks the daemon for the id + /// instead of reading this DB directly. Matches `start_timestamp_ns` + /// exactly (the producer's capture marker, stored verbatim) and excludes + /// cancelled recordings, mirroring the previous client-side query. Returns + /// `None` when no such recording exists or its cloud id has not been minted + /// yet. + async fn resolve_recording_id_for_marker( + &self, + robot_id: &str, + robot_instance: i64, + start_timestamp_ns: i64, + ) -> Result, StateStoreError>; + + /// Atomically transition `progress_reported` for `recording_id`. + /// + /// `expected` is the status the caller observed before the request β€” if + /// the row no longer matches (e.g. another tick already advanced it) the + /// update is a no-op and the current row is returned. Returns `None` when + /// the recording is not present. + async fn set_progress_report_status( + &self, + recording_index: i64, + expected: ProgressReportStatus, + next: ProgressReportStatus, + ) -> Result, StateStoreError>; + + /// Stamp the recording's `expected_trace_count` once the producer-side + /// trace set is known to be final. Idempotent: the value is only written + /// when currently NULL so two reporters cannot race each other into + /// inconsistent state. + async fn set_expected_trace_count( + &self, + recording_index: i64, + expected_trace_count: i64, + ) -> Result, StateStoreError>; + + /// Stamp the recording's `expected_trace_count_reported` to `count` once + /// the backend has acknowledged the `expected-trace-count` PUT. Stored as + /// a non-zero integer so the reporter can use a single column to mean + /// both "reported" (non-zero) and "what we told the backend". + async fn mark_expected_trace_count_reported( + &self, + recording_index: i64, + count: i64, + ) -> Result, StateStoreError>; + + /// Re-arm pipeline rows that were mid-flight when the daemon last + /// stopped. Mirrors `reset_retrying_to_written` in + /// `state_store_sqlite.py`. Called on startup so a SIGKILL or panic does + /// not leave traces wedged in transient `registering` / `uploading` + /// states the coordinators no longer scan. + /// + /// Returns the number of trace rows rewritten. + async fn reset_stale_pipeline_states(&self) -> Result; + + /// Mark trace rows whose writer-side state is stale (`writing` / + /// `initializing` / `pending_metadata`) and whose `last_updated` is older + /// than `stale_threshold_secs` as `failed`. + /// + /// On startup these rows belong to a previous daemon process β€” by + /// definition no current actor is touching them β€” and leaving them in a + /// transient state would forever block their parent recording from + /// reaching the "all traces written" gate the progress reporter waits on. + /// The age threshold is a defence against accidentally clobbering a row + /// that the current daemon has just begun writing (the row's + /// `last_updated` is touched on creation, so a fresh row will not be + /// caught by the sweep). + /// + /// Returns the number of trace rows rewritten. + async fn mark_stale_writing_traces_failed( + &self, + stale_threshold_secs: i64, + ) -> Result; + + /// Atomically mark a recording as cancelled and burn every non-terminal + /// trace it owns to terminal states the cloud coordinators ignore + /// (`write_status = failed`, `upload_status = failed`, + /// `registration_status = failed` if not already `registered`). + /// + /// A cancel is a recording stop that discards data, so it also stamps + /// `stop_timestamp_ns` (the cancel's capture time, β†’ backend `end_time`) + /// just like [`mark_recording_stopped`](Self::mark_recording_stopped). + /// + /// Idempotent: re-cancelling a recording that already has a `cancelled_at` + /// leaves both timestamps untouched. Returns the recording row after the + /// update and the number of write-phase trace rows transitioned to `failed` + /// (the later upload/registration burns can touch further rows that this + /// count does not include). + async fn cancel_recording( + &self, + recording_index: i64, + stop_timestamp_ns: i64, + ) -> Result<(RecordingRow, u64), StateStoreError>; + + /// Delete a recording and all of its trace rows in a single transaction. + /// + /// Called by the recording reaper once a recording is fully settled (every + /// trace uploaded and the backend notified), after its on-disk artefacts + /// have been removed. Returns the number of trace rows deleted. + async fn delete_recording_cascade(&self, recording_index: i64) -> Result; +} + +/// Optional fields to update on a trace row. +/// +/// Fields left as `None` are not written. Use `Default::default()` and set +/// only the fields the caller intends to change. +#[derive(Debug, Clone, Default)] +pub struct TraceUpdate { + /// New write lifecycle state. + pub write_status: Option, + /// New registration lifecycle state. + pub registration_status: Option, + /// New upload lifecycle state. + pub upload_status: Option, + /// On-disk artefact path. + pub path: Option, + /// Bytes written so far. + pub bytes_written: Option, + /// Final byte total (set on finalise). + pub total_bytes: Option, + /// Bytes uploaded so far. + pub bytes_uploaded: Option, + /// JSON-encoded `{filepath: session_uri}` map persisted by the + /// registration coordinator. + pub upload_session_uris: Option, + /// Set the latest error code (use `Some(None)` to clear, `None` to leave + /// untouched). + pub error_code: Option>, + /// Set the latest error message (use `Some(None)` to clear, `None` to + /// leave untouched). + pub error_message: Option>, +} + +impl TraceUpdate { + /// True when every field is unset and no SQL write is needed. + fn is_empty(&self) -> bool { + self.write_status.is_none() + && self.registration_status.is_none() + && self.upload_status.is_none() + && self.path.is_none() + && self.bytes_written.is_none() + && self.total_bytes.is_none() + && self.bytes_uploaded.is_none() + && self.upload_session_uris.is_none() + && self.error_code.is_none() + && self.error_message.is_none() + } +} + +/// Row-creation fields for a trace, carried on the *first* coalesced write so +/// the batcher can insert the row instead of the actor blocking on a +/// synchronous `create_trace`. A trace is created the moment its actor spawns β€” +/// which can happen at any point in a recording (a sensor that starts logging +/// midway), not only at the boundary β€” so this rides every actor's first write. +#[derive(Debug, Clone)] +pub struct TraceCreate { + /// Parent recording's local index. + pub recording_index: i64, + /// Wire data-type label. + pub data_type: Option, + /// Per-stream sensor label (persisted to `data_type_name`). + pub data_type_name: Option, +} + +/// One coalesced set of actor-owned column updates for a single trace β€” the +/// unit the [`crate::state::trace_event_database_writer::TraceEventDatabaseWriter`] batcher flushes. +/// +/// The per-trace actor owns the write-phase columns (`write_status`, the +/// on-disk byte counter, `total_bytes`, the write-phase error). The uploader's +/// rolling `bytes_uploaded` checkpoint is also coalesced here β€” it is the one +/// cloud-owned column that is hot (one write per 64 MiB per concurrent upload), +/// purely advisory (resume correctness comes from the server's 308 offset, not +/// this row), and last-writer-wins, so it fits the write-behind exactly. The +/// remaining cloud columns (`upload_status`, `registration_status`, +/// `upload_session_uris`, `path`) stay on the synchronous write path: they gate +/// reads (e.g. `traces_ready_for_upload`) and must be ordered against the bus +/// events the coordinators publish, so deferring them would reintroduce +/// read-after-write races. `None` fields are left untouched +/// (`COALESCE`-preserved). `create` is set only on the trace's first write and +/// triggers an idempotent row insert. +#[derive(Debug, Clone, Default)] +pub struct CoalescedTraceWrite { + /// Target trace row. + pub trace_id: String, + /// Row-creation fields, present only on the trace's first coalesced write. + pub create: Option, + /// New write-lifecycle state, if it changed. + pub write_status: Option, + /// Latest absolute on-disk byte count. + pub bytes_written: Option, + /// Final byte total (set on finalise). + pub total_bytes: Option, + /// Latest rolling upload offset (advisory progress checkpoint). + pub bytes_uploaded: Option, + /// Write-phase error code. + pub error_code: Option, + /// Write-phase error message. + pub error_message: Option, +} + +impl CoalescedTraceWrite { + /// True when a write-phase column changed and the guarded write-phase + /// UPDATE must run. A create-only write (and an upload-progress-only write) + /// skips it; `bytes_uploaded` is applied by its own statement because it + /// updates legitimately *after* the row has reached the terminal + /// `written` state, which the write-phase guard forbids. + fn has_write_column_update(&self) -> bool { + self.write_status.is_some() + || self.bytes_written.is_some() + || self.total_bytes.is_some() + || self.error_code.is_some() + || self.error_message.is_some() + } +} + +/// SQLite-backed [`StateStore`]. +/// +/// Writers serialise on a dedicated single-connection [`write_pool`], so the +/// daemon needs no app-level write mutex: only one connection ever writes, so +/// WAL never returns `SQLITE_BUSY` between the daemon's own writers and nothing +/// is held across a transaction's `.await`s. The earlier `Arc>` +/// guard (mirroring `state_store_sqlite.py`'s `asyncio.Semaphore(1)`) was held +/// across `begin..commit` and dominated the DB-layer profile (~96% of time was +/// spent waiting on it under load); letting the single write connection +/// serialise writers removes that wait entirely. +/// +/// Reads run on a separate multi-connection [`read_pool`] so they never queue +/// behind the writer β€” under WAL a reader observes a consistent snapshot while +/// the writer commits. +/// +/// [`write_pool`]: SqliteStateStore::write_pool +/// [`read_pool`]: SqliteStateStore::read_pool +#[derive(Clone)] +pub struct SqliteStateStore { + read_pool: SqlitePool, + write_pool: SqlitePool, +} + +impl SqliteStateStore { + /// Open the SQLite database at `db_path`, creating it (and the parent + /// directory) if missing, applying WAL pragmas, and running pending + /// migrations. + pub async fn open(db_path: &Path) -> Result { + if let Some(parent) = db_path.parent() { + if !parent.as_os_str().is_empty() { + std::fs::create_dir_all(parent).map_err(|source| StateStoreError::Io { + path: parent.to_path_buf(), + source, + })?; + } + } + + let options = SqliteConnectOptions::new() + .filename(db_path) + .create_if_missing(true) + .journal_mode(SqliteJournalMode::Wal) + .synchronous(SqliteSynchronous::Normal) + .busy_timeout(std::time::Duration::from_millis(BUSY_TIMEOUT_MS as u64)) + // sqlx prints every statement at INFO by default; quiet that down so + // the daemon's tracing output isn't drowned out by the same SQL on + // every trace write. + .log_statements(tracing::log::LevelFilter::Debug); + + // Single writer: serialises commits without an app-level mutex and makes + // SQLITE_BUSY between the daemon's own writers impossible. + let write_pool = SqlitePoolOptions::new() + .max_connections(1) + .connect_with(options.clone()) + .await?; + // Reads run concurrently on their own pool so they never wait behind the + // writer. + let read_pool = SqlitePoolOptions::new() + .max_connections(READ_POOL_CONNECTIONS) + .connect_with(options) + .await?; + + // Migrations are writes β€” run them on the write connection. + MIGRATOR.run(&write_pool).await?; + + Ok(SqliteStateStore { + read_pool, + write_pool, + }) + } + + /// Borrow the read pool, e.g. for diagnostics in tests. + #[allow(dead_code)] + pub fn pool(&self) -> &SqlitePool { + &self.read_pool + } + + /// Borrow the write pool. Test-only: production code reaches the write + /// connection exclusively through the typed write methods. + #[cfg(test)] + pub(crate) fn write_pool(&self) -> &SqlitePool { + &self.write_pool + } + + /// Close both pools, draining outstanding connections. + pub async fn close(self) { + self.write_pool.close().await; + self.read_pool.close().await; + } + + /// Apply a batch of coalesced per-trace writes in a single transaction. + /// + /// The [`crate::state::trace_event_database_writer::TraceEventDatabaseWriter`] batcher coalesces many + /// actors' per-frame progress / status / finalise updates (last-writer-wins + /// per trace) and flushes them together here, so the per-transaction + /// (begin/commit) cost is amortised across the whole batch instead of paid + /// per row. Each SET clause is a fixed `COALESCE(?, col)` + /// form (only supplied columns change; statement stays prepared-cache + /// friendly) and there is no read-back `SELECT`. + /// + /// The write-phase `WHERE` guard keys off the *target* lifecycle state so a + /// flush is monotonic w.r.t. terminal states β€” a late coalesced progress + /// write can never resurrect a row a concurrent + /// [`StateStore::cancel_recording`] already burned to `failed`, nor + /// un-finish a `written` row. That invariant is what lets the batcher run + /// without tight coordination with the cancel path. The three guard + /// variants are fixed `const` statements (not `format!`-ed per row) so they + /// stay friendly to sqlx's prepared-statement cache. + pub async fn apply_trace_writes( + &self, + writes: &[CoalescedTraceWrite], + ) -> Result<(), StateStoreError> { + // The write-phase UPDATE, one fixed statement per terminal guard so + // sqlx's prepared-statement cache sees the same SQL every flush. Bind + // order is identical across the three: ?1 write_status, ?2 + // bytes_written, ?3 total_bytes, ?4 error_code, ?5 error_message, ?6 + // last_updated, ?7 trace_id; only the trailing guard differs. + // + // Finalise: apply unless cancel already burned the row. + const SQL_WRITE_FINALISE: &str = "UPDATE traces SET \ + write_status = COALESCE(?1, write_status), \ + bytes_written = COALESCE(?2, bytes_written), \ + total_bytes = COALESCE(?3, total_bytes), \ + error_code = COALESCE(?4, error_code), \ + error_message = COALESCE(?5, error_message), \ + last_updated = ?6 \ + WHERE trace_id = ?7 AND write_status != 'failed'"; + // Fail: apply unless the trace already finished writing. + const SQL_WRITE_FAIL: &str = "UPDATE traces SET \ + write_status = COALESCE(?1, write_status), \ + bytes_written = COALESCE(?2, bytes_written), \ + total_bytes = COALESCE(?3, total_bytes), \ + error_code = COALESCE(?4, error_code), \ + error_message = COALESCE(?5, error_message), \ + last_updated = ?6 \ + WHERE trace_id = ?7 AND write_status != 'written'"; + // Progress / Writing / byte-only: live (non-terminal) rows only. + const SQL_WRITE_PROGRESS: &str = "UPDATE traces SET \ + write_status = COALESCE(?1, write_status), \ + bytes_written = COALESCE(?2, bytes_written), \ + total_bytes = COALESCE(?3, total_bytes), \ + error_code = COALESCE(?4, error_code), \ + error_message = COALESCE(?5, error_message), \ + last_updated = ?6 \ + WHERE trace_id = ?7 AND write_status NOT IN ('written', 'failed')"; + // The advisory upload checkpoint. Updates while the upload is live; + // skipped once it has settled (`uploaded`/`failed`) so a late flush + // can't touch a terminal row. Bind order: ?1 bytes_uploaded, + // ?2 last_updated, ?3 trace_id. + const SQL_UPLOAD_PROGRESS: &str = "UPDATE traces SET \ + bytes_uploaded = ?1, last_updated = ?2 \ + WHERE trace_id = ?3 AND upload_status NOT IN ('uploaded', 'failed')"; + + if writes.is_empty() { + return Ok(()); + } + let mut tx = self.write_pool.begin().await?; + let now = Utc::now().naive_utc(); + for write in writes { + // First write for a trace carries its create fields: insert the row + // (idempotent) before applying any column update. This folds the + // per-actor `create_trace` into the batch, so the boundary's spawn + // burst becomes one batched transaction instead of N. + if let Some(create) = &write.create { + sqlx::query( + "INSERT INTO traces (trace_id, recording_index, write_status, \ + data_type, data_type_name, created_at, last_updated) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?6) \ + ON CONFLICT(trace_id) DO NOTHING", + ) + .bind(&write.trace_id) + .bind(create.recording_index) + .bind(TraceWriteStatus::Initializing.as_str()) + .bind(create.data_type.as_deref()) + .bind(create.data_type_name.as_deref()) + .bind(now) + .execute(&mut *tx) + .await?; + } + + if write.has_write_column_update() { + let sql = match write.write_status { + Some(TraceWriteStatus::Written) => SQL_WRITE_FINALISE, + Some(TraceWriteStatus::Failed) => SQL_WRITE_FAIL, + _ => SQL_WRITE_PROGRESS, + }; + sqlx::query(sql) + .bind(write.write_status.as_ref().map(|status| status.as_str())) + .bind(write.bytes_written) + .bind(write.total_bytes) + .bind(write.error_code.as_ref().map(|code| code.as_str())) + .bind(write.error_message.as_deref()) + .bind(now) + .bind(&write.trace_id) + .execute(&mut *tx) + .await?; + } + + if let Some(bytes_uploaded) = write.bytes_uploaded { + sqlx::query(SQL_UPLOAD_PROGRESS) + .bind(bytes_uploaded) + .bind(now) + .bind(&write.trace_id) + .execute(&mut *tx) + .await?; + } + } + tx.commit().await?; + Ok(()) + } + + /// Fetch a recording row by its local index inside an open connection. + async fn fetch_recording_locked( + conn: &mut SqliteConnection, + recording_index: i64, + ) -> Result, sqlx::Error> { + let row = sqlx::query("SELECT * FROM recordings WHERE recording_index = ?1") + .bind(recording_index) + .fetch_optional(&mut *conn) + .await?; + match row { + Some(row) => Ok(Some(RecordingRow::from_row(&row)?)), + None => Ok(None), + } + } +} + +/// `StateStore` for [`SqliteStateStore`] β€” see the struct for the read/write +/// connection-pool split. The per-frame hot path never calls these methods +/// directly: actor updates funnel through the coalescing write-behind batcher +/// ([`crate::state::trace_event_database_writer::TraceEventDatabaseWriter`]) and +/// are applied here as one batched transaction. +#[async_trait] +impl StateStore for SqliteStateStore { + async fn create_recording( + &self, + new: NewRecording<'_>, + ) -> Result { + let mut tx = self.write_pool.begin().await?; + let now = Utc::now().naive_utc(); + let result = sqlx::query( + "INSERT INTO recordings ( \ + robot_id, robot_instance, dataset_id, \ + start_timestamp_ns, created_at, last_updated \ + ) VALUES (?1, ?2, ?3, ?4, ?5, ?5)", + ) + .bind(new.robot_id) + .bind(new.robot_instance) + .bind(new.dataset_id) + .bind(new.start_timestamp_ns) + .bind(now) + .execute(&mut *tx) + .await?; + + let recording_index = result.last_insert_rowid(); + let row = Self::fetch_recording_locked(&mut tx, recording_index) + .await? + .ok_or_else(|| sqlx::Error::RowNotFound)?; + tx.commit().await?; + Ok(row) + } + + async fn get_recording( + &self, + recording_index: i64, + ) -> Result, StateStoreError> { + let row = sqlx::query("SELECT * FROM recordings WHERE recording_index = ?1") + .bind(recording_index) + .fetch_optional(&self.read_pool) + .await?; + Ok(match row { + Some(row) => Some(RecordingRow::from_row(&row)?), + None => None, + }) + } + + async fn recordings_for_source( + &self, + robot_id: &str, + robot_instance: i64, + ) -> Result, StateStoreError> { + let rows = sqlx::query( + "SELECT * FROM recordings \ + WHERE robot_id = ?1 AND robot_instance = ?2 \ + ORDER BY recording_index ASC", + ) + .bind(robot_id) + .bind(robot_instance) + .fetch_all(&self.read_pool) + .await?; + rows.iter() + .map(RecordingRow::from_row) + .collect::, _>>() + .map_err(Into::into) + } + + async fn mark_recording_start_notified( + &self, + recording_index: i64, + recording_id: &str, + ) -> Result, StateStoreError> { + let mut tx = self.write_pool.begin().await?; + let now = Utc::now().naive_utc(); + sqlx::query( + "UPDATE recordings \ + SET recording_id = COALESCE(recording_id, ?1), \ + backend_start_notified_at = COALESCE(backend_start_notified_at, ?2), \ + last_updated = ?2 \ + WHERE recording_index = ?3", + ) + .bind(recording_id) + .bind(now) + .bind(recording_index) + .execute(&mut *tx) + .await?; + let row = Self::fetch_recording_locked(&mut tx, recording_index).await?; + tx.commit().await?; + Ok(row) + } + + async fn recordings_pending_start_notify(&self) -> Result, StateStoreError> { + let rows = sqlx::query( + "SELECT * FROM recordings \ + WHERE recording_id IS NULL \ + AND backend_start_notified_at IS NULL \ + AND cancelled_at IS NULL \ + ORDER BY recording_index ASC", + ) + .fetch_all(&self.read_pool) + .await?; + rows.iter() + .map(RecordingRow::from_row) + .collect::, _>>() + .map_err(Into::into) + } + + async fn create_trace( + &self, + recording_index: i64, + trace_id: &str, + data_type: Option<&str>, + data_type_name: Option<&str>, + ) -> Result { + let mut tx = self.write_pool.begin().await?; + + let now = Utc::now().naive_utc(); + sqlx::query( + "INSERT INTO traces (trace_id, recording_index, write_status, data_type, \ + data_type_name, created_at, last_updated) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?6) \ + ON CONFLICT(trace_id) DO NOTHING", + ) + .bind(trace_id) + .bind(recording_index) + .bind(TraceWriteStatus::Initializing.as_str()) + .bind(data_type) + .bind(data_type_name) + .bind(now) + .execute(&mut *tx) + .await?; + + let row = sqlx::query("SELECT * FROM traces WHERE trace_id = ?1") + .bind(trace_id) + .fetch_one(&mut *tx) + .await?; + let record = TraceRecord::from_row(&row)?; + + tx.commit().await?; + Ok(record) + } + + async fn update_trace( + &self, + trace_id: &str, + update: TraceUpdate, + ) -> Result<(), StateStoreError> { + if update.is_empty() { + return Ok(()); + } + + let mut tx = self.write_pool.begin().await?; + + // Build the UPDATE dynamically so we only touch fields the caller set. + // Always bump `last_updated` so the registration coordinator's + // age-based claim policy sees fresh timestamps. + let now = Utc::now().naive_utc(); + let mut assignments: Vec<&'static str> = Vec::new(); + if update.write_status.is_some() { + assignments.push("write_status = ?"); + } + if update.registration_status.is_some() { + assignments.push("registration_status = ?"); + } + if update.upload_status.is_some() { + assignments.push("upload_status = ?"); + } + if update.path.is_some() { + assignments.push("path = ?"); + } + if update.bytes_written.is_some() { + assignments.push("bytes_written = ?"); + } + if update.total_bytes.is_some() { + assignments.push("total_bytes = ?"); + } + if update.bytes_uploaded.is_some() { + assignments.push("bytes_uploaded = ?"); + } + if update.upload_session_uris.is_some() { + assignments.push("upload_session_uris = ?"); + } + if update.error_code.is_some() { + assignments.push("error_code = ?"); + } + if update.error_message.is_some() { + assignments.push("error_message = ?"); + } + assignments.push("last_updated = ?"); + + let sql = format!( + "UPDATE traces SET {} WHERE trace_id = ?", + assignments.join(", ") + ); + let mut query = sqlx::query(&sql); + if let Some(status) = update.write_status { + query = query.bind(status.as_str()); + } + if let Some(status) = update.registration_status { + query = query.bind(status.as_str()); + } + if let Some(status) = update.upload_status { + query = query.bind(status.as_str()); + } + if let Some(path) = update.path { + query = query.bind(path); + } + if let Some(bytes) = update.bytes_written { + query = query.bind(bytes); + } + if let Some(bytes) = update.total_bytes { + query = query.bind(bytes); + } + if let Some(bytes) = update.bytes_uploaded { + query = query.bind(bytes); + } + if let Some(uris) = update.upload_session_uris { + query = query.bind(uris); + } + if let Some(code) = update.error_code { + query = query.bind(code.map(|value| value.as_str().to_string())); + } + if let Some(message) = update.error_message { + query = query.bind(message); + } + query = query.bind(now).bind(trace_id); + + // No read-back: a non-matching `trace_id` is a harmless no-op UPDATE + // (0 rows affected) and no caller distinguishes it from a hit. + query.execute(&mut *tx).await?; + tx.commit().await?; + Ok(()) + } + + async fn get_trace(&self, trace_id: &str) -> Result, StateStoreError> { + let row = sqlx::query("SELECT * FROM traces WHERE trace_id = ?1") + .bind(trace_id) + .fetch_optional(&self.read_pool) + .await?; + Ok(match row { + Some(row) => Some(TraceRecord::from_row(&row)?), + None => None, + }) + } + + async fn list_traces_for_recording( + &self, + recording_index: i64, + ) -> Result, StateStoreError> { + let rows = sqlx::query( + "SELECT * FROM traces WHERE recording_index = ?1 ORDER BY created_at ASC, trace_id ASC", + ) + .bind(recording_index) + .fetch_all(&self.read_pool) + .await?; + rows.iter() + .map(TraceRecord::from_row) + .collect::, _>>() + .map_err(Into::into) + } + + async fn claim_traces_for_registration( + &self, + limit: usize, + max_wait_secs: f64, + ) -> Result, StateStoreError> { + let limit_i64 = limit as i64; + if limit_i64 <= 0 { + return Ok(Vec::new()); + } + let mut tx = self.write_pool.begin().await?; + + // Count ready traces first so the size-vs-age policy stays explicit. + // SQLite's transactional snapshot means this count is stable across the + // subsequent claim. A trace is eligible for registration as soon as its + // row exists (any write_status except `failed`) β€” registration only + // needs the trace's identity, not its bytes, so it can run while the + // recording is still writing (see the module docs on pre-registration). + let ready_count: i64 = sqlx::query_scalar( + "SELECT COUNT(*) FROM traces \ + WHERE write_status != ?1 AND registration_status = ?2", + ) + .bind(TraceWriteStatus::Failed.as_str()) + .bind(TraceRegistrationStatus::Pending.as_str()) + .fetch_one(&mut *tx) + .await?; + + // Select the eligible trace_ids (oldest first, capped at `limit`) and + // flip them to `registering` in a single `UPDATE … RETURNING`, so two + // coordinators can't double-claim and there's neither a separate SELECT + // round-trip nor a per-row UPDATE loop. The `IN (subquery)` form is used + // because SQLite's stock build doesn't support `LIMIT` directly on + // `UPDATE`; the subquery materialises before the UPDATE runs, so the + // claim sees a stable candidate set. + let now = Utc::now().naive_utc(); + let rows = if ready_count >= limit_i64 { + // Size trigger: enough are ready β€” claim the oldest `limit` + // regardless of age. + sqlx::query( + "UPDATE traces SET registration_status = ?1, last_updated = ?2 \ + WHERE trace_id IN ( \ + SELECT trace_id FROM traces \ + WHERE write_status != ?3 AND registration_status = ?4 \ + ORDER BY last_updated ASC LIMIT ?5 \ + ) \ + RETURNING *", + ) + .bind(TraceRegistrationStatus::Registering.as_str()) + .bind(now) + .bind(TraceWriteStatus::Failed.as_str()) + .bind(TraceRegistrationStatus::Pending.as_str()) + .bind(limit_i64) + .fetch_all(&mut *tx) + .await? + } else { + // Age trigger: claim only those whose `last_updated` is older than + // the debounce cutoff. + let cutoff = now - chrono::Duration::milliseconds((max_wait_secs * 1000.0) as i64); + sqlx::query( + "UPDATE traces SET registration_status = ?1, last_updated = ?2 \ + WHERE trace_id IN ( \ + SELECT trace_id FROM traces \ + WHERE write_status != ?3 AND registration_status = ?4 \ + AND last_updated <= ?5 \ + ORDER BY last_updated ASC LIMIT ?6 \ + ) \ + RETURNING *", + ) + .bind(TraceRegistrationStatus::Registering.as_str()) + .bind(now) + .bind(TraceWriteStatus::Failed.as_str()) + .bind(TraceRegistrationStatus::Pending.as_str()) + .bind(cutoff) + .bind(limit_i64) + .fetch_all(&mut *tx) + .await? + }; + + tx.commit().await?; + rows.iter() + .map(TraceRecord::from_row) + .collect::, _>>() + .map_err(Into::into) + } + + async fn mark_recording_stopped( + &self, + recording_index: i64, + stop_timestamp_ns: i64, + ) -> Result { + let mut tx = self.write_pool.begin().await?; + + let now = Utc::now().naive_utc(); + sqlx::query( + "UPDATE recordings \ + SET stopped_at = COALESCE(stopped_at, ?2), \ + stop_timestamp_ns = COALESCE(stop_timestamp_ns, ?3), \ + last_updated = ?2 \ + WHERE recording_index = ?1", + ) + .bind(recording_index) + .bind(now) + .bind(stop_timestamp_ns) + .execute(&mut *tx) + .await?; + + let record = Self::fetch_recording_locked(&mut tx, recording_index) + .await? + .ok_or(sqlx::Error::RowNotFound)?; + + tx.commit().await?; + Ok(record) + } + + async fn list_recordings(&self) -> Result, StateStoreError> { + let rows = sqlx::query("SELECT * FROM recordings ORDER BY created_at ASC") + .fetch_all(&self.read_pool) + .await?; + rows.iter() + .map(RecordingRow::from_row) + .collect::, _>>() + .map_err(Into::into) + } + + async fn traces_ready_for_upload(&self) -> Result, StateStoreError> { + let ids = sqlx::query_scalar::<_, String>( + "SELECT trace_id FROM traces WHERE upload_status IN ('queued', 'retrying')", + ) + .fetch_all(&self.read_pool) + .await?; + Ok(ids) + } + + async fn promote_ready_traces_to_queued(&self) -> Result, StateStoreError> { + // The `upload_status NOT IN (...)` guard makes each promotion fire at + // most once. This is the ONLY path that lets a trace reach the uploader, + // so it must require BOTH a session URI (registered) and finalised bytes + // (written) β€” `traces_ready_for_upload` only checks `upload_status`. + let now = Utc::now().naive_utc(); + let rows = sqlx::query_as::<_, (String, i64)>( + "UPDATE traces SET upload_status = ?1, last_updated = ?2 \ + WHERE registration_status = ?3 \ + AND write_status = ?4 \ + AND upload_status NOT IN ('queued', 'uploading', 'uploaded') \ + RETURNING trace_id, recording_index", + ) + .bind(TraceUploadStatus::Queued.as_str()) + .bind(now) + .bind(TraceRegistrationStatus::Registered.as_str()) + .bind(TraceWriteStatus::Written.as_str()) + .fetch_all(&self.write_pool) + .await?; + Ok(rows) + } + + async fn recordings_pending_progress(&self) -> Result, StateStoreError> { + let rows = sqlx::query( + "SELECT * FROM recordings \ + WHERE stopped_at IS NOT NULL \ + AND cancelled_at IS NULL \ + AND recording_id IS NOT NULL \ + AND (progress_reported != 'reported' OR expected_trace_count_reported = 0) \ + ORDER BY created_at ASC", + ) + .fetch_all(&self.read_pool) + .await?; + rows.iter() + .map(RecordingRow::from_row) + .collect::, _>>() + .map_err(Into::into) + } + + async fn recordings_pending_reclaim(&self) -> Result, StateStoreError> { + let rows = sqlx::query( + "SELECT r.* FROM recordings r \ + WHERE (r.cancelled_at IS NOT NULL AND r.backend_cancel_notified_at IS NOT NULL) \ + OR ( \ + r.cancelled_at IS NULL \ + AND r.stopped_at IS NOT NULL \ + AND r.backend_stop_notified_at IS NOT NULL \ + AND r.progress_reported = 'reported' \ + AND r.expected_trace_count IS NOT NULL \ + AND r.expected_trace_count = \ + (SELECT COUNT(*) FROM traces t WHERE t.recording_index = r.recording_index) \ + AND EXISTS \ + (SELECT 1 FROM traces t WHERE t.recording_index = r.recording_index) \ + AND NOT EXISTS \ + (SELECT 1 FROM traces t \ + WHERE t.recording_index = r.recording_index \ + AND t.upload_status != 'uploaded') \ + ) \ + ORDER BY r.created_at ASC", + ) + .fetch_all(&self.read_pool) + .await?; + rows.iter() + .map(RecordingRow::from_row) + .collect::, _>>() + .map_err(Into::into) + } + + async fn resolve_recording_id_for_marker( + &self, + robot_id: &str, + robot_instance: i64, + start_timestamp_ns: i64, + ) -> Result, StateStoreError> { + let recording_id = sqlx::query_scalar::<_, Option>( + "SELECT recording_id FROM recordings \ + WHERE robot_id = ?1 AND robot_instance = ?2 AND start_timestamp_ns = ?3 \ + AND cancelled_at IS NULL \ + ORDER BY recording_index DESC LIMIT 1", + ) + .bind(robot_id) + .bind(robot_instance) + .bind(start_timestamp_ns) + .fetch_optional(&self.read_pool) + .await?; + // Outer `Option` = row present; inner = the nullable column. A matching + // row whose cloud id has not been minted yet flattens to `None`. + Ok(recording_id.flatten()) + } + + async fn mark_recording_stop_notified( + &self, + recording_index: i64, + ) -> Result, StateStoreError> { + let mut tx = self.write_pool.begin().await?; + let now = Utc::now().naive_utc(); + sqlx::query( + "UPDATE recordings \ + SET backend_stop_notified_at = COALESCE(backend_stop_notified_at, ?2), \ + last_updated = ?2 \ + WHERE recording_index = ?1", + ) + .bind(recording_index) + .bind(now) + .execute(&mut *tx) + .await?; + + let record = Self::fetch_recording_locked(&mut tx, recording_index).await?; + tx.commit().await?; + Ok(record) + } + + async fn mark_recording_cancel_notified( + &self, + recording_index: i64, + ) -> Result, StateStoreError> { + let mut tx = self.write_pool.begin().await?; + let now = Utc::now().naive_utc(); + sqlx::query( + "UPDATE recordings \ + SET backend_cancel_notified_at = COALESCE(backend_cancel_notified_at, ?2), \ + last_updated = ?2 \ + WHERE recording_index = ?1", + ) + .bind(recording_index) + .bind(now) + .execute(&mut *tx) + .await?; + let record = Self::fetch_recording_locked(&mut tx, recording_index).await?; + tx.commit().await?; + Ok(record) + } + + async fn recordings_pending_cancel_notify(&self) -> Result, StateStoreError> { + let rows = sqlx::query( + "SELECT * FROM recordings \ + WHERE cancelled_at IS NOT NULL \ + AND recording_id IS NOT NULL \ + AND backend_cancel_notified_at IS NULL \ + ORDER BY cancelled_at ASC", + ) + .fetch_all(&self.read_pool) + .await?; + rows.iter() + .map(RecordingRow::from_row) + .collect::, _>>() + .map_err(Into::into) + } + + async fn recordings_pending_stop_notify(&self) -> Result, StateStoreError> { + let rows = sqlx::query( + "SELECT * FROM recordings \ + WHERE stopped_at IS NOT NULL \ + AND recording_id IS NOT NULL \ + AND backend_stop_notified_at IS NULL \ + AND cancelled_at IS NULL \ + ORDER BY stopped_at ASC", + ) + .fetch_all(&self.read_pool) + .await?; + rows.iter() + .map(RecordingRow::from_row) + .collect::, _>>() + .map_err(Into::into) + } + + async fn recordings_pending_backend_resolution_for_source( + &self, + robot_id: &str, + robot_instance: i64, + before_index: i64, + ) -> Result, StateStoreError> { + let rows = sqlx::query( + "SELECT * FROM recordings \ + WHERE robot_id = ? \ + AND robot_instance = ? \ + AND recording_index < ? \ + AND recording_id IS NOT NULL \ + AND backend_stop_notified_at IS NULL \ + AND backend_cancel_notified_at IS NULL \ + AND (cancelled_at IS NOT NULL OR stopped_at IS NOT NULL) \ + ORDER BY recording_index ASC", + ) + .bind(robot_id) + .bind(robot_instance) + .bind(before_index) + .fetch_all(&self.read_pool) + .await?; + rows.iter() + .map(RecordingRow::from_row) + .collect::, _>>() + .map_err(Into::into) + } + + async fn set_progress_report_status( + &self, + recording_index: i64, + expected: ProgressReportStatus, + next: ProgressReportStatus, + ) -> Result, StateStoreError> { + let mut tx = self.write_pool.begin().await?; + let now = Utc::now().naive_utc(); + sqlx::query( + "UPDATE recordings \ + SET progress_reported = ?1, last_updated = ?2 \ + WHERE recording_index = ?3 AND progress_reported = ?4", + ) + .bind(next.as_str()) + .bind(now) + .bind(recording_index) + .bind(expected.as_str()) + .execute(&mut *tx) + .await?; + + let record = Self::fetch_recording_locked(&mut tx, recording_index).await?; + tx.commit().await?; + Ok(record) + } + + async fn set_expected_trace_count( + &self, + recording_index: i64, + expected_trace_count: i64, + ) -> Result, StateStoreError> { + let mut tx = self.write_pool.begin().await?; + let now = Utc::now().naive_utc(); + sqlx::query( + "UPDATE recordings \ + SET expected_trace_count = COALESCE(expected_trace_count, ?1), \ + last_updated = ?2 \ + WHERE recording_index = ?3", + ) + .bind(expected_trace_count) + .bind(now) + .bind(recording_index) + .execute(&mut *tx) + .await?; + let record = Self::fetch_recording_locked(&mut tx, recording_index).await?; + tx.commit().await?; + Ok(record) + } + + async fn mark_expected_trace_count_reported( + &self, + recording_index: i64, + count: i64, + ) -> Result, StateStoreError> { + let mut tx = self.write_pool.begin().await?; + let now = Utc::now().naive_utc(); + sqlx::query( + "UPDATE recordings \ + SET expected_trace_count_reported = ?1, \ + last_updated = ?2 \ + WHERE recording_index = ?3", + ) + .bind(count) + .bind(now) + .bind(recording_index) + .execute(&mut *tx) + .await?; + let record = Self::fetch_recording_locked(&mut tx, recording_index).await?; + tx.commit().await?; + Ok(record) + } + + async fn reset_stale_pipeline_states(&self) -> Result { + let mut tx = self.write_pool.begin().await?; + let now = Utc::now().naive_utc(); + // `registering` β†’ `pending` so the registration coordinator's claim + // query sees the row again on the next tick. + let reg_result = sqlx::query( + "UPDATE traces \ + SET registration_status = ?1, last_updated = ?2 \ + WHERE registration_status = ?3", + ) + .bind(TraceRegistrationStatus::Pending.as_str()) + .bind(now) + .bind(TraceRegistrationStatus::Registering.as_str()) + .execute(&mut *tx) + .await?; + // `uploading` β†’ `retrying` so the uploader's drain (which filters on + // `Queued | Retrying`) re-picks it up. Stays inside the registered + // half of the pipeline because the session URI is still valid + // (`registration_status` is preserved by definition). + let upload_result = sqlx::query( + "UPDATE traces \ + SET upload_status = ?1, last_updated = ?2 \ + WHERE upload_status = ?3", + ) + .bind(TraceUploadStatus::Retrying.as_str()) + .bind(now) + .bind(TraceUploadStatus::Uploading.as_str()) + .execute(&mut *tx) + .await?; + tx.commit().await?; + Ok(reg_result.rows_affected() + upload_result.rows_affected()) + } + + async fn mark_stale_writing_traces_failed( + &self, + stale_threshold_secs: i64, + ) -> Result { + let mut tx = self.write_pool.begin().await?; + let now = Utc::now().naive_utc(); + let cutoff = now - chrono::Duration::seconds(stale_threshold_secs); + let result = sqlx::query( + "UPDATE traces \ + SET write_status = ?1, \ + error_code = COALESCE(error_code, ?2), \ + error_message = COALESCE(error_message, ?3), \ + last_updated = ?4 \ + WHERE write_status IN (?5, ?6, ?7) \ + AND last_updated <= ?8", + ) + .bind(TraceWriteStatus::Failed.as_str()) + .bind(TraceErrorCode::WriteFailed.as_str()) + .bind("daemon exited before encoding finished") + .bind(now) + .bind(TraceWriteStatus::Writing.as_str()) + .bind(TraceWriteStatus::Initializing.as_str()) + .bind(TraceWriteStatus::PendingMetadata.as_str()) + .bind(cutoff) + .execute(&mut *tx) + .await?; + tx.commit().await?; + Ok(result.rows_affected()) + } + + async fn cancel_recording( + &self, + recording_index: i64, + stop_timestamp_ns: i64, + ) -> Result<(RecordingRow, u64), StateStoreError> { + let mut tx = self.write_pool.begin().await?; + + let now = Utc::now().naive_utc(); + sqlx::query( + "UPDATE recordings \ + SET cancelled_at = COALESCE(cancelled_at, ?2), \ + stop_timestamp_ns = COALESCE(stop_timestamp_ns, ?4), \ + progress_reported = ?3, \ + last_updated = ?2 \ + WHERE recording_index = ?1", + ) + .bind(recording_index) + .bind(now) + .bind(ProgressReportStatus::Reported.as_str()) + .bind(stop_timestamp_ns) + .execute(&mut *tx) + .await?; + + // Burn every non-terminal trace so the registration / upload / + // progress coordinators ignore them. `failed` is the existing + // terminal label for all three pipelines; tagging with the + // recording-cancelled error code lets operators distinguish a + // user-cancel from an actual write or upload failure. + let write_result = sqlx::query( + "UPDATE traces \ + SET write_status = ?1, \ + error_code = ?2, \ + error_message = COALESCE(error_message, ?3), \ + last_updated = ?4 \ + WHERE recording_index = ?5 \ + AND write_status NOT IN (?6, ?1)", + ) + .bind(TraceWriteStatus::Failed.as_str()) + .bind(TraceErrorCode::RecordingCancelled.as_str()) + .bind("recording cancelled by producer") + .bind(now) + .bind(recording_index) + .bind(TraceWriteStatus::Written.as_str()) + .execute(&mut *tx) + .await?; + sqlx::query( + "UPDATE traces \ + SET upload_status = ?1, last_updated = ?2 \ + WHERE recording_index = ?3 \ + AND upload_status NOT IN (?1, ?4)", + ) + .bind(TraceUploadStatus::Failed.as_str()) + .bind(now) + .bind(recording_index) + .bind(TraceUploadStatus::Uploaded.as_str()) + .execute(&mut *tx) + .await?; + sqlx::query( + "UPDATE traces \ + SET registration_status = ?1, last_updated = ?2 \ + WHERE recording_index = ?3 \ + AND registration_status NOT IN (?1, ?4)", + ) + .bind(TraceRegistrationStatus::Failed.as_str()) + .bind(now) + .bind(recording_index) + .bind(TraceRegistrationStatus::Registered.as_str()) + .execute(&mut *tx) + .await?; + + let record = Self::fetch_recording_locked(&mut tx, recording_index) + .await? + .ok_or(sqlx::Error::RowNotFound)?; + + tx.commit().await?; + Ok((record, write_result.rows_affected())) + } + + async fn delete_recording_cascade(&self, recording_index: i64) -> Result { + let mut tx = self.write_pool.begin().await?; + let traces_deleted = sqlx::query("DELETE FROM traces WHERE recording_index = ?1") + .bind(recording_index) + .execute(&mut *tx) + .await? + .rows_affected(); + sqlx::query("DELETE FROM recordings WHERE recording_index = ?1") + .bind(recording_index) + .execute(&mut *tx) + .await?; + tx.commit().await?; + Ok(traces_deleted) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + async fn open_store() -> (SqliteStateStore, TempDir) { + let tempdir = TempDir::new().expect("tempdir"); + let path = tempdir.path().join("state.db"); + let store = SqliteStateStore::open(&path).await.expect("open store"); + (store, tempdir) + } + + /// Insert a recording for `(robot-1, instance)` and return its index. + async fn seed_recording(store: &SqliteStateStore, instance: i64) -> i64 { + store + .create_recording(NewRecording { + robot_id: Some("robot-1"), + robot_instance: Some(instance), + dataset_id: Some("ds-1"), + start_timestamp_ns: 1_700_000_000_000_000_000, + }) + .await + .expect("create_recording") + .recording_index + } + + #[tokio::test] + async fn promote_ready_traces_to_queued_gates_on_registered_and_written() { + let (store, _tempdir) = open_store().await; + let recording_index = seed_recording(&store, 0).await; + store + .create_trace(recording_index, "t1", None, None) + .await + .expect("create_trace"); + + // Freshly created (initializing, unregistered) -> nothing promoted. + assert!(store + .promote_ready_traces_to_queued() + .await + .expect("sweep") + .is_empty()); + + // Pre-registered while the bytes are still being written -> still not + // promoted, and crucially NOT queued (the uploader would otherwise PUT + // an unwritten file). + store + .update_trace( + "t1", + TraceUpdate { + registration_status: Some(TraceRegistrationStatus::Registered), + ..TraceUpdate::default() + }, + ) + .await + .expect("update_trace"); + assert!(store + .promote_ready_traces_to_queued() + .await + .expect("sweep") + .is_empty()); + let trace = store.get_trace("t1").await.expect("get").expect("exists"); + assert_ne!(trace.upload_status, TraceUploadStatus::Queued); + + // Bytes commit -> promoted exactly once, carrying its recording_index. + store + .update_trace( + "t1", + TraceUpdate { + write_status: Some(TraceWriteStatus::Written), + ..TraceUpdate::default() + }, + ) + .await + .expect("update_trace"); + let promoted = store.promote_ready_traces_to_queued().await.expect("sweep"); + assert_eq!(promoted, vec![("t1".to_string(), recording_index)]); + let trace = store.get_trace("t1").await.expect("get").expect("exists"); + assert_eq!(trace.upload_status, TraceUploadStatus::Queued); + + // Idempotent: a subsequent sweep promotes nothing. + assert!(store + .promote_ready_traces_to_queued() + .await + .expect("sweep") + .is_empty()); + } + + #[tokio::test] + async fn create_recording_allocates_increasing_indices() { + let (store, _tempdir) = open_store().await; + let first = seed_recording(&store, 0).await; + let second = seed_recording(&store, 1).await; + assert!( + second > first, + "recording_index must increase: {first} {second}" + ); + + let row = store.get_recording(first).await.unwrap().unwrap(); + assert_eq!(row.recording_index, first); + assert_eq!(row.recording_id, None, "cloud id starts NULL"); + assert_eq!(row.robot_id.as_deref(), Some("robot-1")); + assert_eq!(row.robot_instance, Some(0)); + } + + #[tokio::test] + async fn recordings_for_source_orders_by_index() { + let (store, _tempdir) = open_store().await; + let first = seed_recording(&store, 0).await; + let second = seed_recording(&store, 0).await; + // A different instance must not be returned. + seed_recording(&store, 9).await; + + let rows = store.recordings_for_source("robot-1", 0).await.unwrap(); + let indices: Vec = rows.iter().map(|row| row.recording_index).collect(); + assert_eq!(indices, vec![first, second]); + } + + #[tokio::test] + async fn cloud_id_and_start_notify_lifecycle() { + let (store, _tempdir) = open_store().await; + let index = seed_recording(&store, 0).await; + + // Pending until notified/failed. + let pending = store.recordings_pending_start_notify().await.unwrap(); + assert_eq!(pending.len(), 1); + assert_eq!(pending[0].recording_index, index); + + // Start notify stamps both the cloud id and the notified timestamp, + // and is idempotent: a second call cannot clobber the persisted id. + let row = store + .mark_recording_start_notified(index, "cloud-rec-1") + .await + .unwrap() + .unwrap(); + assert_eq!(row.recording_id.as_deref(), Some("cloud-rec-1")); + assert!(row.backend_start_notified_at.is_some()); + assert!(store + .recordings_pending_start_notify() + .await + .unwrap() + .is_empty()); + + let row = store + .mark_recording_start_notified(index, "other-id") + .await + .unwrap() + .unwrap(); + assert_eq!(row.recording_id.as_deref(), Some("cloud-rec-1")); + } + + #[tokio::test] + async fn stop_notify_sweep_requires_a_cloud_id() { + let (store, _tempdir) = open_store().await; + let index = seed_recording(&store, 0).await; + store.mark_recording_stopped(index, 2).await.unwrap(); + // Stopped but no cloud id yet β†’ not eligible for the stop sweep. + assert!(store + .recordings_pending_stop_notify() + .await + .unwrap() + .is_empty()); + + store + .mark_recording_start_notified(index, "cloud-rec-1") + .await + .unwrap(); + let pending = store.recordings_pending_stop_notify().await.unwrap(); + assert_eq!(pending.len(), 1); + assert_eq!(pending[0].recording_index, index); + } + + #[tokio::test] + async fn open_creates_schema_and_applies_wal() { + let (store, _tempdir) = open_store().await; + + let journal_mode: String = sqlx::query_scalar("PRAGMA journal_mode") + .fetch_one(store.pool()) + .await + .expect("journal_mode"); + assert_eq!(journal_mode.to_lowercase(), "wal"); + + // `synchronous=NORMAL` is the numeric `1` from SQLite's PRAGMA result. + let synchronous: i64 = sqlx::query_scalar("PRAGMA synchronous") + .fetch_one(store.pool()) + .await + .expect("synchronous"); + assert_eq!(synchronous, 1); + + let tables: Vec = + sqlx::query_scalar("SELECT name FROM sqlite_master WHERE type = 'table' ORDER BY name") + .fetch_all(store.pool()) + .await + .expect("tables"); + assert!(tables.contains(&"recordings".to_string())); + assert!(tables.contains(&"traces".to_string())); + } + + #[tokio::test] + async fn create_trace_inserts_trace_rows() { + let (store, _tempdir) = open_store().await; + let index = seed_recording(&store, 0).await; + + let trace = store + .create_trace(index, "trace-1", Some("video"), None) + .await + .expect("create_trace"); + assert_eq!(trace.trace_id, "trace-1"); + assert_eq!(trace.recording_index, index); + assert_eq!(trace.write_status, TraceWriteStatus::Initializing); + assert_eq!(trace.data_type.as_deref(), Some("video")); + + // Creating the same trace twice is a no-op (write_status preserved). + let again = store + .create_trace(index, "trace-1", Some("video"), None) + .await + .expect("idempotent create_trace"); + assert_eq!(again.trace_id, "trace-1"); + let traces = store + .list_traces_for_recording(index) + .await + .expect("list_traces"); + assert_eq!(traces.len(), 1); + } + + #[tokio::test] + async fn update_trace_overwrites_only_set_fields() { + let (store, _tempdir) = open_store().await; + let index = seed_recording(&store, 0).await; + store + .create_trace(index, "trace-1", None, None) + .await + .expect("create_trace"); + + store + .update_trace( + "trace-1", + TraceUpdate { + write_status: Some(TraceWriteStatus::Writing), + bytes_written: Some(2048), + ..TraceUpdate::default() + }, + ) + .await + .expect("update_trace"); + let updated = store.get_trace("trace-1").await.unwrap().unwrap(); + assert_eq!(updated.write_status, TraceWriteStatus::Writing); + assert_eq!(updated.bytes_written, 2048); + // Unset fields keep their prior values. + assert_eq!(updated.bytes_uploaded, 0); + assert_eq!(updated.upload_status, TraceUploadStatus::Pending); + + // Updating an unknown trace is a harmless no-op (no row created). + store + .update_trace( + "unknown-trace", + TraceUpdate { + write_status: Some(TraceWriteStatus::Failed), + ..TraceUpdate::default() + }, + ) + .await + .expect("update_trace"); + assert!(store.get_trace("unknown-trace").await.unwrap().is_none()); + } + + #[tokio::test] + async fn claim_for_registration_respects_size_trigger() { + let (store, _tempdir) = open_store().await; + let recording_index = seed_recording(&store, 0).await; + for index in 0..5 { + let trace_id = format!("trace-{index}"); + store + .create_trace(recording_index, &trace_id, None, None) + .await + .expect("create_trace"); + store + .update_trace( + &trace_id, + TraceUpdate { + write_status: Some(TraceWriteStatus::Written), + ..TraceUpdate::default() + }, + ) + .await + .expect("update_trace"); + } + + // Size trigger: 5 ready β‰₯ limit of 3, so the oldest 3 are claimed even + // though last_updated is fresh. + let claimed = store + .claim_traces_for_registration(3, 60.0) + .await + .expect("claim_traces"); + assert_eq!(claimed.len(), 3); + for trace in &claimed { + assert_eq!( + trace.registration_status, + TraceRegistrationStatus::Registering + ); + } + + // The remaining two are too fresh for the age trigger so are not + // claimed on a second call with a smaller limit. + let second = store + .claim_traces_for_registration(50, 60.0) + .await + .expect("claim_traces"); + assert!( + second.is_empty(), + "expected no age-eligible traces, got {second:?}" + ); + } + + #[tokio::test] + async fn reset_stale_pipeline_states_rearms_registering_and_uploading() { + let (store, _tempdir) = open_store().await; + // Three recordings to make sure the sweep doesn't leak across rows. + for (instance, trace_id, reg, upload) in [ + ( + 0, + "trace-reg", + TraceRegistrationStatus::Registering, + TraceUploadStatus::Pending, + ), + ( + 1, + "trace-up", + TraceRegistrationStatus::Registered, + TraceUploadStatus::Uploading, + ), + ( + 2, + "trace-clean", + TraceRegistrationStatus::Registered, + TraceUploadStatus::Queued, + ), + ] { + let recording_index = seed_recording(&store, instance).await; + store + .create_trace( + recording_index, + trace_id, + Some("JOINT_POSITIONS"), + Some("arm"), + ) + .await + .unwrap(); + store + .update_trace( + trace_id, + TraceUpdate { + write_status: Some(TraceWriteStatus::Written), + registration_status: Some(reg), + upload_status: Some(upload), + ..TraceUpdate::default() + }, + ) + .await + .unwrap(); + } + + let touched = store.reset_stale_pipeline_states().await.unwrap(); + assert_eq!( + touched, 2, + "registering + uploading rows should be re-armed" + ); + + let reg = store.get_trace("trace-reg").await.unwrap().unwrap(); + assert_eq!(reg.registration_status, TraceRegistrationStatus::Pending); + let up = store.get_trace("trace-up").await.unwrap().unwrap(); + assert_eq!(up.upload_status, TraceUploadStatus::Retrying); + let clean = store.get_trace("trace-clean").await.unwrap().unwrap(); + // Untouched rows keep their state β€” the sweep is targeted. + assert_eq!( + clean.registration_status, + TraceRegistrationStatus::Registered + ); + assert_eq!(clean.upload_status, TraceUploadStatus::Queued); + } + + #[tokio::test] + async fn mark_stale_writing_traces_failed_burns_old_rows_only() { + let (store, _tempdir) = open_store().await; + let recording_index = seed_recording(&store, 0).await; + for (trace_id, write_status) in [ + ("fresh-writing", TraceWriteStatus::Writing), + ("stale-writing", TraceWriteStatus::Writing), + ("stale-initializing", TraceWriteStatus::Initializing), + ("stale-pending-meta", TraceWriteStatus::PendingMetadata), + ("done", TraceWriteStatus::Written), + ("failed", TraceWriteStatus::Failed), + ] { + store + .create_trace( + recording_index, + trace_id, + Some("JOINT_POSITIONS"), + Some("arm"), + ) + .await + .unwrap(); + store + .update_trace( + trace_id, + TraceUpdate { + write_status: Some(write_status), + ..TraceUpdate::default() + }, + ) + .await + .unwrap(); + } + // Backdate the three "stale-*" rows to ~5 minutes ago by stamping + // last_updated directly. SQLite stores `DATETIME` as ISO8601 text, + // which `NaiveDateTime` serialises automatically. + let stale_at = Utc::now().naive_utc() - chrono::Duration::seconds(300); + for trace_id in ["stale-writing", "stale-initializing", "stale-pending-meta"] { + sqlx::query("UPDATE traces SET last_updated = ?1 WHERE trace_id = ?2") + .bind(stale_at) + .bind(trace_id) + .execute(store.write_pool()) + .await + .unwrap(); + } + + let touched = store.mark_stale_writing_traces_failed(30).await.unwrap(); + assert_eq!(touched, 3, "only stale writing-side rows should be touched"); + + for trace_id in ["stale-writing", "stale-initializing", "stale-pending-meta"] { + let row = store.get_trace(trace_id).await.unwrap().unwrap(); + assert_eq!(row.write_status, TraceWriteStatus::Failed); + assert_eq!(row.error_code, Some(TraceErrorCode::WriteFailed)); + } + // Fresh + already-terminal rows are not touched. + let fresh = store.get_trace("fresh-writing").await.unwrap().unwrap(); + assert_eq!(fresh.write_status, TraceWriteStatus::Writing); + let done = store.get_trace("done").await.unwrap().unwrap(); + assert_eq!(done.write_status, TraceWriteStatus::Written); + let failed = store.get_trace("failed").await.unwrap().unwrap(); + assert_eq!(failed.error_code, None, "pre-existing rows untouched"); + } + + #[tokio::test] + async fn cancel_recording_burns_traces_and_stamps_cancelled_at() { + let (store, _tempdir) = open_store().await; + let recording_index = seed_recording(&store, 0).await; + for (trace_id, write, upload, reg) in [ + ( + "in-flight", + TraceWriteStatus::Writing, + TraceUploadStatus::Pending, + TraceRegistrationStatus::Pending, + ), + ( + "registered-queued", + TraceWriteStatus::Written, + TraceUploadStatus::Queued, + TraceRegistrationStatus::Registered, + ), + ( + "already-uploaded", + TraceWriteStatus::Written, + TraceUploadStatus::Uploaded, + TraceRegistrationStatus::Registered, + ), + ] { + store + .create_trace(recording_index, trace_id, Some("JOINT_POSITIONS"), None) + .await + .unwrap(); + store + .update_trace( + trace_id, + TraceUpdate { + write_status: Some(write), + upload_status: Some(upload), + registration_status: Some(reg), + ..TraceUpdate::default() + }, + ) + .await + .unwrap(); + } + // A trace belonging to another recording must not be touched. + let other_index = seed_recording(&store, 9).await; + store + .create_trace(other_index, "untouched", Some("JOINT_POSITIONS"), None) + .await + .unwrap(); + + let (row, touched) = store + .cancel_recording(recording_index, 5_000_000_000) + .await + .unwrap(); + assert!(row.cancelled_at.is_some(), "cancelled_at must be stamped"); + assert_eq!( + row.stop_timestamp_ns, + Some(5_000_000_000), + "a cancel stamps stop_timestamp_ns like a stop" + ); + assert_eq!(row.progress_reported, ProgressReportStatus::Reported); + assert_eq!(touched, 1, "only the non-Written trace's write was touched"); + + let in_flight = store.get_trace("in-flight").await.unwrap().unwrap(); + assert_eq!(in_flight.write_status, TraceWriteStatus::Failed); + assert_eq!( + in_flight.error_code, + Some(TraceErrorCode::RecordingCancelled) + ); + assert_eq!(in_flight.upload_status, TraceUploadStatus::Failed); + + let queued = store.get_trace("registered-queued").await.unwrap().unwrap(); + assert_eq!(queued.upload_status, TraceUploadStatus::Failed); + assert_eq!(queued.write_status, TraceWriteStatus::Written); + + let uploaded = store.get_trace("already-uploaded").await.unwrap().unwrap(); + assert_eq!(uploaded.upload_status, TraceUploadStatus::Uploaded); + assert_eq!( + uploaded.registration_status, + TraceRegistrationStatus::Registered + ); + + let other = store.get_trace("untouched").await.unwrap().unwrap(); + assert_eq!(other.write_status, TraceWriteStatus::Initializing); + assert_eq!(other.upload_status, TraceUploadStatus::Pending); + } + + #[tokio::test] + async fn cancel_recording_is_idempotent() { + let (store, _tempdir) = open_store().await; + let recording_index = seed_recording(&store, 0).await; + store + .create_trace(recording_index, "trace-1", Some("JOINT_POSITIONS"), None) + .await + .unwrap(); + + let (first, _) = store + .cancel_recording(recording_index, 5_000_000_000) + .await + .unwrap(); + let first_at = first.cancelled_at.expect("cancelled_at set"); + // Sleep across a clock tick to make a date change observable. + std::thread::sleep(std::time::Duration::from_millis(10)); + let (second, _) = store + .cancel_recording(recording_index, 9_000_000_000) + .await + .unwrap(); + assert_eq!( + second.stop_timestamp_ns, + Some(5_000_000_000), + "subsequent cancels must not slide stop_timestamp_ns forward" + ); + assert_eq!( + second.cancelled_at, + Some(first_at), + "subsequent cancels must not slide cancelled_at forward" + ); + } + + #[tokio::test] + async fn claim_for_registration_respects_age_trigger() { + let (store, _tempdir) = open_store().await; + let recording_index = seed_recording(&store, 0).await; + store + .create_trace(recording_index, "trace-1", None, None) + .await + .expect("create_trace"); + store + .update_trace( + "trace-1", + TraceUpdate { + write_status: Some(TraceWriteStatus::Written), + ..TraceUpdate::default() + }, + ) + .await + .expect("update_trace"); + + // Age trigger: max_wait_secs = 0 β‡’ any row at or before "now" is + // eligible. We pass a generous limit so the size trigger doesn't fire. + let claimed = store + .claim_traces_for_registration(50, 0.0) + .await + .expect("claim_traces"); + assert_eq!(claimed.len(), 1); + assert_eq!(claimed[0].trace_id, "trace-1"); + assert_eq!( + claimed[0].registration_status, + TraceRegistrationStatus::Registering + ); + } + + #[tokio::test] + async fn apply_trace_writes_records_upload_progress_until_settled() { + let (store, _tempdir) = open_store().await; + let index = seed_recording(&store, 0).await; + store + .create_trace(index, "trace-up", Some("RGB"), None) + .await + .unwrap(); + // Write phase done, upload phase begins. + store + .update_trace( + "trace-up", + TraceUpdate { + write_status: Some(TraceWriteStatus::Written), + upload_status: Some(TraceUploadStatus::Uploading), + ..TraceUpdate::default() + }, + ) + .await + .unwrap(); + + // A rolling checkpoint applies while the upload is live β€” and it does so + // even though the row is already `written`, which the write-phase guard + // forbids (so it must travel on its own statement). + store + .apply_trace_writes(&[CoalescedTraceWrite { + trace_id: "trace-up".to_string(), + bytes_uploaded: Some(4_000_000), + ..Default::default() + }]) + .await + .unwrap(); + assert_eq!( + store + .get_trace("trace-up") + .await + .unwrap() + .unwrap() + .bytes_uploaded, + 4_000_000 + ); + + // Once the upload settles, a late/duplicate coalesced checkpoint must + // not touch the terminal row (it would otherwise rewind the final + // byte count the synchronous finalise wrote). + store + .update_trace( + "trace-up", + TraceUpdate { + upload_status: Some(TraceUploadStatus::Uploaded), + bytes_uploaded: Some(8_000_000), + ..TraceUpdate::default() + }, + ) + .await + .unwrap(); + store + .apply_trace_writes(&[CoalescedTraceWrite { + trace_id: "trace-up".to_string(), + bytes_uploaded: Some(1), + ..Default::default() + }]) + .await + .unwrap(); + assert_eq!( + store + .get_trace("trace-up") + .await + .unwrap() + .unwrap() + .bytes_uploaded, + 8_000_000, + "a late checkpoint must not touch a settled upload" + ); + } + + #[tokio::test] + async fn recordings_pending_reclaim_only_returns_settled_recordings() { + let (store, _tempdir) = open_store().await; + + // Helper: drive a stopped recording with one trace at `upload` status + // through the full notify/progress gates. + async fn stopped_with_trace( + store: &SqliteStateStore, + instance: i64, + trace_id: &str, + upload: TraceUploadStatus, + ) -> i64 { + let index = seed_recording(store, instance).await; + store + .mark_recording_start_notified(index, &format!("cloud-{instance}")) + .await + .unwrap(); + store + .create_trace(index, trace_id, Some("J"), None) + .await + .unwrap(); + store + .update_trace( + trace_id, + TraceUpdate { + write_status: Some(TraceWriteStatus::Written), + upload_status: Some(upload), + ..TraceUpdate::default() + }, + ) + .await + .unwrap(); + store.mark_recording_stopped(index, 1).await.unwrap(); + store.mark_recording_stop_notified(index).await.unwrap(); + store.set_expected_trace_count(index, 1).await.unwrap(); + store + .set_progress_report_status( + index, + ProgressReportStatus::Pending, + ProgressReportStatus::Reported, + ) + .await + .unwrap(); + index + } + + // A: stopped + fully uploaded β†’ reclaimable. + let uploaded = stopped_with_trace(&store, 0, "a1", TraceUploadStatus::Uploaded).await; + // B: stopped, settled at the recording level, but one trace failed to + // upload β†’ must NOT be reclaimable (and must not be re-scanned forever). + let failed = stopped_with_trace(&store, 1, "b1", TraceUploadStatus::Failed).await; + // C: cancelled + backend notified β†’ reclaimable with no trace scan. + let cancelled = seed_recording(&store, 2).await; + store + .mark_recording_start_notified(cancelled, "cloud-c") + .await + .unwrap(); + store.cancel_recording(cancelled, 1).await.unwrap(); + store + .mark_recording_cancel_notified(cancelled) + .await + .unwrap(); + // D: live (never stopped) β†’ not reclaimable. + let live = seed_recording(&store, 3).await; + + let reclaimable: Vec = store + .recordings_pending_reclaim() + .await + .unwrap() + .iter() + .map(|row| row.recording_index) + .collect(); + assert!( + reclaimable.contains(&uploaded), + "fully-uploaded stopped recording reclaims" + ); + assert!( + reclaimable.contains(&cancelled), + "cancelled+notified recording reclaims" + ); + assert!( + !reclaimable.contains(&failed), + "a permanently-failed trace blocks reclaim (M10: no perpetual re-scan)" + ); + assert!( + !reclaimable.contains(&live), + "a live recording never reclaims" + ); + } + + #[tokio::test] + async fn delete_recording_cascade_removes_recording_and_its_traces_only() { + let (store, _tempdir) = open_store().await; + let index = seed_recording(&store, 0).await; + for trace_id in ["t-1", "t-2"] { + store + .create_trace(index, trace_id, Some("JOINT_POSITIONS"), None) + .await + .unwrap(); + } + // A sibling recording + trace must survive the cascade. + let other = seed_recording(&store, 9).await; + store + .create_trace(other, "keep", Some("JOINT_POSITIONS"), None) + .await + .unwrap(); + + let deleted = store.delete_recording_cascade(index).await.unwrap(); + assert_eq!(deleted, 2, "both traces of the recording are deleted"); + + assert!(store.get_recording(index).await.unwrap().is_none()); + assert!(store.get_trace("t-1").await.unwrap().is_none()); + assert!(store.get_trace("t-2").await.unwrap().is_none()); + + // Sibling untouched. + assert!(store.get_recording(other).await.unwrap().is_some()); + assert!(store.get_trace("keep").await.unwrap().is_some()); + } +} diff --git a/rust/data_daemon/src/state/trace_event_database_writer.rs b/rust/data_daemon/src/state/trace_event_database_writer.rs new file mode 100644 index 000000000..04fe558f7 --- /dev/null +++ b/rust/data_daemon/src/state/trace_event_database_writer.rs @@ -0,0 +1,558 @@ +//! Coalescing + batching write-behind for per-trace actor writes. +//! +//! Per-trace actors fire-and-forget partial column updates (a `writing` bump, a +//! debounced `bytes_written`, the finalise `written` + `total_bytes`, or a +//! `failed`) without ever awaiting a transaction. This task coalesces +//! consecutive ops for the same trace last-writer-wins per column (a burst of +//! `bytes_written` collapses to one row write) and flushes the pending set in a +//! single batched transaction ([`SqliteStateStore::apply_trace_writes`]) on a +//! short timer or once the pending set grows past a cap. +//! +//! Terminal-state monotonicity (a late progress write can't resurrect a +//! cancelled row) lives in `apply_trace_writes`'s `WHERE` guard, so the writer +//! needs no coordination with the cancel path. + +use std::collections::HashMap; +use std::sync::Arc; + +use tokio::sync::{mpsc, oneshot}; +use tokio::task::JoinHandle; +use tokio::time::{interval, Duration, MissedTickBehavior}; + +use crate::state::schema::{TraceErrorCode, TraceWriteStatus}; +use crate::state::store::{CoalescedTraceWrite, SqliteStateStore, TraceCreate}; + +/// How often pending writes are flushed. Short enough that finalised traces +/// become visible promptly, long enough that a burst of progress updates +/// coalesces into one row write per flush. +const FLUSH_INTERVAL: Duration = Duration::from_millis(25); + +/// Flush eagerly once this many distinct traces are pending, so a wide +/// fan-out (many traces updated within one interval) doesn't grow an +/// unbounded batch before the timer fires. +const MAX_PENDING_TRACES: usize = 512; + +/// Control + data messages accepted by the writer task. +enum Message { + /// A partial column update for one trace, merged into the pending set. + Write(CoalescedTraceWrite), + /// Discard every *pending create* for a recording, then acknowledge. Sent by + /// the dispatcher's cancel path before `cancel_recording` so a not-yet- + /// flushed trace can't be inserted as an orphan row after the recording is + /// burned. Pending update-only writes are left β€” the terminal-state guard in + /// `apply_trace_writes` already makes them no-ops against the failed row. + DropRecording { + recording_index: i64, + ack: oneshot::Sender<()>, + }, + /// Flush everything pending now and acknowledge (tests + shutdown). + Flush(oneshot::Sender<()>), + /// Drain, flush, acknowledge, and exit. + Shutdown(oneshot::Sender<()>), +} + +/// Cloneable handle the per-trace actors use to enqueue writes. Every method is +/// synchronous and non-blocking: the actor fires an update and moves on. +#[derive(Clone)] +pub struct TraceWriteHandle { + tx: mpsc::UnboundedSender, +} + +impl TraceWriteHandle { + /// Create the trace row (fire-and-forget). Sent once, as the actor's first + /// write, so the row is inserted by the next batched flush instead of the + /// actor blocking on a synchronous `create_trace`. Works at any point in a + /// recording, including a sensor that starts logging midway. + pub fn create( + &self, + trace_id: &str, + recording_index: i64, + data_type: Option<&str>, + data_type_name: Option<&str>, + ) { + self.enqueue(CoalescedTraceWrite { + trace_id: trace_id.to_string(), + create: Some(TraceCreate { + recording_index, + data_type: data_type.map(str::to_string), + data_type_name: data_type_name.map(str::to_string), + }), + ..Default::default() + }); + } + + /// Mark the trace `writing` (first frame / first video chunk). + pub fn mark_writing(&self, trace_id: &str) { + self.enqueue(CoalescedTraceWrite { + trace_id: trace_id.to_string(), + write_status: Some(TraceWriteStatus::Writing), + ..Default::default() + }); + } + + /// Record the latest absolute on-disk byte count. + pub fn progress(&self, trace_id: &str, bytes_written: i64) { + self.enqueue(CoalescedTraceWrite { + trace_id: trace_id.to_string(), + bytes_written: Some(bytes_written), + ..Default::default() + }); + } + + /// Record the latest rolling upload offset (advisory progress). Coalesced + /// like the write-phase progress so the uploader's per-64-MiB checkpoint + /// across many concurrent uploads collapses to one batched row write + /// instead of a synchronous transaction each. Resume correctness comes from + /// the server's 308 offset, not this row, so a coalesced/late value is + /// harmless; the store skips it once the upload has settled. + pub fn upload_progress(&self, trace_id: &str, bytes_uploaded: i64) { + self.enqueue(CoalescedTraceWrite { + trace_id: trace_id.to_string(), + bytes_uploaded: Some(bytes_uploaded), + ..Default::default() + }); + } + + /// Finalise the trace: `written`, with the final byte total. + pub fn finalise(&self, trace_id: &str, total_bytes: i64) { + self.enqueue(CoalescedTraceWrite { + trace_id: trace_id.to_string(), + write_status: Some(TraceWriteStatus::Written), + total_bytes: Some(total_bytes), + bytes_written: Some(total_bytes), + ..Default::default() + }); + } + + /// Mark the trace `failed`, preserving the latest byte count. + pub fn fail(&self, trace_id: &str, bytes_written: i64) { + self.enqueue(CoalescedTraceWrite { + trace_id: trace_id.to_string(), + write_status: Some(TraceWriteStatus::Failed), + bytes_written: Some(bytes_written), + ..Default::default() + }); + } + + /// Mark the trace `failed` with a write-phase error code + message. + #[allow(dead_code)] + pub fn fail_with( + &self, + trace_id: &str, + bytes_written: i64, + error_code: TraceErrorCode, + error_message: impl Into, + ) { + self.enqueue(CoalescedTraceWrite { + trace_id: trace_id.to_string(), + write_status: Some(TraceWriteStatus::Failed), + bytes_written: Some(bytes_written), + error_code: Some(error_code), + error_message: Some(error_message.into()), + ..Default::default() + }); + } + + /// Flush all pending writes and wait for the batch to commit. Used by + /// tests and by callers that need a happens-before with the DB. + pub async fn flush(&self) { + let (ack, ack_rx) = oneshot::channel(); + if self.tx.send(Message::Flush(ack)).is_ok() { + let _ = ack_rx.await; + } + } + + /// Discard pending creates for a recording and wait for the purge to + /// complete. The dispatcher calls this before `cancel_recording` so a + /// not-yet-flushed trace of a cancelled recording can't land as an orphan + /// row after the cancel has burned the recording's existing traces. + pub async fn drop_recording(&self, recording_index: i64) { + let (ack, ack_rx) = oneshot::channel(); + if self + .tx + .send(Message::DropRecording { + recording_index, + ack, + }) + .is_ok() + { + let _ = ack_rx.await; + } + } + + fn enqueue(&self, write: CoalescedTraceWrite) { + // The channel only closes once the writer task has exited (daemon + // shutdown). A drop here means we're past the point where writes + // matter, so swallow it rather than propagate to the actor. + let _ = self.tx.send(Message::Write(write)); + } +} + +/// Owns the writer task's lifetime. Held by the daemon main loop; dropping it +/// does not stop the task (clones of the handle keep the channel open) β€” call +/// [`TraceEventDatabaseWriter::shutdown`] to drain, flush, and join. +pub struct TraceEventDatabaseWriter { + tx: mpsc::UnboundedSender, + join: JoinHandle<()>, +} + +impl TraceEventDatabaseWriter { + /// Drain every queued write, flush a final batch, and join the task. Call + /// after the dispatcher (and therefore every actor) has shut down, so no + /// further writes can be produced, and before the store is closed. + pub async fn shutdown(self) { + let (ack, ack_rx) = oneshot::channel(); + if self.tx.send(Message::Shutdown(ack)).is_ok() { + let _ = ack_rx.await; + } + if let Err(error) = self.join.await { + tracing::warn!(?error, "trace-writer task join failed during shutdown"); + } + } +} + +/// Spawn the writer task and return a cloneable [`TraceWriteHandle`] for the +/// actors plus the [`TraceEventDatabaseWriter`] owner for shutdown. +pub fn spawn(store: Arc) -> (TraceWriteHandle, TraceEventDatabaseWriter) { + let (tx, rx) = mpsc::unbounded_channel(); + let join = tokio::spawn(run(store, rx)); + ( + TraceWriteHandle { tx: tx.clone() }, + TraceEventDatabaseWriter { tx, join }, + ) +} + +/// Merge one partial update into the pending set, last-writer-wins per column. +fn merge(pending: &mut HashMap, write: CoalescedTraceWrite) { + let entry = pending + .entry(write.trace_id.clone()) + .or_insert_with(|| CoalescedTraceWrite { + trace_id: write.trace_id.clone(), + ..Default::default() + }); + // `create` is set-once β€” it arrives on the first write and is immutable + // thereafter (the row identity never changes). + if write.create.is_some() && entry.create.is_none() { + entry.create = write.create; + } + if write.write_status.is_some() { + entry.write_status = write.write_status; + } + if write.bytes_written.is_some() { + entry.bytes_written = write.bytes_written; + } + if write.total_bytes.is_some() { + entry.total_bytes = write.total_bytes; + } + if write.bytes_uploaded.is_some() { + entry.bytes_uploaded = write.bytes_uploaded; + } + // `error_code`/`error_message` are only ever set by `fail`, which is + // mutually exclusive with `finalise` (a trace either fails or finalises, not + // both), so a `written` status never coalesces with a stale error in the + // same entry. + if write.error_code.is_some() { + entry.error_code = write.error_code; + } + if write.error_message.is_some() { + entry.error_message = write.error_message; + } +} + +/// Discard pending entries that would *insert* a row for `recording_index`. +/// Update-only entries (whose create already flushed) are left: the terminal +/// guard in `apply_trace_writes` makes them no-ops against the cancelled row. +fn drop_recording_creates( + pending: &mut HashMap, + recording_index: i64, +) { + pending.retain(|_, write| { + write + .create + .as_ref() + .is_none_or(|create| create.recording_index != recording_index) + }); +} + +/// Flush the pending set in one batched transaction, clearing it only on a +/// successful commit. +/// +/// On error the drained batch is **re-merged** into `pending` rather than +/// discarded: dropping it loses a `finalise`/`failed`, which wedges the trace +/// in `writing` and retains its parent recording forever. Re-merging keeps the +/// updates for the next tick's retry and, because the merge is keyed by +/// `trace_id`, coalesces with any writes that arrived since β€” so a persistent +/// failure can't grow `pending` past the live trace count. +async fn flush(store: &SqliteStateStore, pending: &mut HashMap) { + if pending.is_empty() { + return; + } + let batch: Vec = pending.drain().map(|(_, write)| write).collect(); + if let Err(error) = store.apply_trace_writes(&batch).await { + tracing::warn!( + %error, + rows = batch.len(), + "trace-writer batch flush failed; re-queueing batch for retry" + ); + for write in batch { + merge(pending, write); + } + } +} + +async fn run(store: Arc, mut rx: mpsc::UnboundedReceiver) { + let mut pending: HashMap = HashMap::new(); + let mut ticker = interval(FLUSH_INTERVAL); + // A flush that runs long must not fire a backlog of catch-up ticks. + ticker.set_missed_tick_behavior(MissedTickBehavior::Delay); + + loop { + tokio::select! { + message = rx.recv() => match message { + Some(Message::Write(write)) => { + merge(&mut pending, write); + if pending.len() >= MAX_PENDING_TRACES { + flush(&store, &mut pending).await; + } + } + Some(Message::DropRecording { recording_index, ack }) => { + drop_recording_creates(&mut pending, recording_index); + let _ = ack.send(()); + } + Some(Message::Flush(ack)) => { + flush(&store, &mut pending).await; + let _ = ack.send(()); + } + Some(Message::Shutdown(ack)) => { + // Drain anything already queued behind the Shutdown so no + // finalise is lost, then flush a last batch. + while let Ok(message) = rx.try_recv() { + match message { + Message::Write(write) => merge(&mut pending, write), + Message::DropRecording { recording_index, ack: inner } => { + drop_recording_creates(&mut pending, recording_index); + let _ = inner.send(()); + } + Message::Flush(inner) => { + flush(&store, &mut pending).await; + let _ = inner.send(()); + } + Message::Shutdown(inner) => { + let _ = inner.send(()); + } + } + } + flush(&store, &mut pending).await; + let _ = ack.send(()); + return; + } + // All handles dropped without an explicit shutdown β€” flush + // whatever's left so a finalise isn't lost on an abrupt exit. + None => { + flush(&store, &mut pending).await; + return; + } + }, + _ = ticker.tick() => { + flush(&store, &mut pending).await; + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::state::schema::TraceWriteStatus; + use crate::state::store::NewRecording; + use crate::state::StateStore; + use tempfile::TempDir; + + async fn store_with_trace() -> (Arc, TempDir, String) { + let dir = TempDir::new().unwrap(); + let store = Arc::new( + SqliteStateStore::open(&dir.path().join("state.db")) + .await + .unwrap(), + ); + let rec = store + .create_recording(NewRecording { + robot_id: Some("r"), + robot_instance: Some(0), + start_timestamp_ns: 1, + ..Default::default() + }) + .await + .unwrap() + .recording_index; + store + .create_trace(rec, "t1", Some("J"), Some("j")) + .await + .unwrap(); + (store, dir, "t1".to_string()) + } + + #[tokio::test] + async fn coalesces_progress_and_finalises() { + let (store, _dir, trace_id) = store_with_trace().await; + let (handle, writer) = spawn(store.clone()); + + handle.mark_writing(&trace_id); + for bytes in [10, 20, 30, 40] { + handle.progress(&trace_id, bytes); + } + handle.finalise(&trace_id, 100); + handle.flush().await; + + let trace = store.get_trace(&trace_id).await.unwrap().unwrap(); + assert_eq!(trace.write_status, TraceWriteStatus::Written); + assert_eq!(trace.total_bytes, 100); + assert_eq!(trace.bytes_written, 100); + + writer.shutdown().await; + } + + #[tokio::test] + async fn progress_does_not_resurrect_a_failed_row() { + let (store, _dir, trace_id) = store_with_trace().await; + let (handle, writer) = spawn(store.clone()); + + // Simulate cancel burning the row to `failed` out of band. + store + .update_trace( + &trace_id, + crate::state::store::TraceUpdate { + write_status: Some(TraceWriteStatus::Failed), + ..Default::default() + }, + ) + .await + .unwrap(); + + // A late coalesced progress write must NOT move it back to writing. + handle.progress(&trace_id, 999); + handle.mark_writing(&trace_id); + handle.flush().await; + + let trace = store.get_trace(&trace_id).await.unwrap().unwrap(); + assert_eq!(trace.write_status, TraceWriteStatus::Failed); + + writer.shutdown().await; + } + + #[tokio::test] + async fn shutdown_flushes_queued_writes() { + let (store, _dir, trace_id) = store_with_trace().await; + let (handle, writer) = spawn(store.clone()); + + handle.finalise(&trace_id, 42); + // No explicit flush β€” shutdown must drain and persist it. + writer.shutdown().await; + + let trace = store.get_trace(&trace_id).await.unwrap().unwrap(); + assert_eq!(trace.write_status, TraceWriteStatus::Written); + assert_eq!(trace.total_bytes, 42); + } + + #[tokio::test] + async fn flush_retains_batch_when_apply_fails() { + let (store, _dir, trace_id) = store_with_trace().await; + let mut pending = HashMap::new(); + merge( + &mut pending, + CoalescedTraceWrite { + trace_id: trace_id.clone(), + write_status: Some(TraceWriteStatus::Written), + total_bytes: Some(99), + ..Default::default() + }, + ); + + // Force apply_trace_writes to fail by closing the write connection. + store.write_pool().close().await; + flush(&store, &mut pending).await; + + // Regression guard for H2: a failed flush must NOT silently drop the + // batch β€” a lost `finalise` would wedge the trace in `writing` and + // retain its parent recording forever. + assert_eq!(pending.len(), 1, "failed flush must retain the batch"); + let retained = pending.get(&trace_id).expect("batch retained for retry"); + assert_eq!(retained.write_status, Some(TraceWriteStatus::Written)); + assert_eq!(retained.total_bytes, Some(99)); + } + + async fn store_with_recording() -> (Arc, TempDir, i64) { + let dir = TempDir::new().unwrap(); + let store = Arc::new( + SqliteStateStore::open(&dir.path().join("state.db")) + .await + .unwrap(), + ); + let rec = store + .create_recording(NewRecording { + robot_id: Some("r"), + robot_instance: Some(0), + start_timestamp_ns: 1, + ..Default::default() + }) + .await + .unwrap() + .recording_index; + (store, dir, rec) + } + + #[tokio::test] + async fn batched_create_inserts_then_finalises() { + let (store, _dir, rec) = store_with_recording().await; + let (handle, writer) = spawn(store.clone()); + + // No synchronous create_trace β€” the row is born from the batch. + handle.create("t-new", rec, Some("J"), Some("j")); + handle.mark_writing("t-new"); + handle.progress("t-new", 64); + handle.finalise("t-new", 128); + handle.flush().await; + + let trace = store.get_trace("t-new").await.unwrap().unwrap(); + assert_eq!(trace.recording_index, rec); + assert_eq!(trace.data_type.as_deref(), Some("J")); + assert_eq!(trace.write_status, TraceWriteStatus::Written); + assert_eq!(trace.total_bytes, 128); + + writer.shutdown().await; + } + + #[tokio::test] + async fn create_only_write_inserts_initializing_row() { + let (store, _dir, rec) = store_with_recording().await; + let (handle, writer) = spawn(store.clone()); + + // A sensor that starts logging mid-recording: actor spawns, sends the + // create, but no data has been appended before the flush. + handle.create("t-mid", rec, Some("RGB"), Some("cam")); + handle.flush().await; + + let trace = store.get_trace("t-mid").await.unwrap().unwrap(); + assert_eq!(trace.write_status, TraceWriteStatus::Initializing); + assert_eq!(trace.recording_index, rec); + + writer.shutdown().await; + } + + #[tokio::test] + async fn drop_recording_discards_unflushed_create() { + let (store, _dir, rec) = store_with_recording().await; + let (handle, writer) = spawn(store.clone()); + + // Create queued but NOT flushed, then the recording is cancelled. + handle.create("t-cancel", rec, Some("J"), Some("j")); + handle.mark_writing("t-cancel"); + handle.drop_recording(rec).await; + handle.flush().await; + + // The orphan row must never have been inserted. + assert!(store.get_trace("t-cancel").await.unwrap().is_none()); + + writer.shutdown().await; + } +} diff --git a/rust/data_daemon/src/storage/budget.rs b/rust/data_daemon/src/storage/budget.rs new file mode 100644 index 000000000..b13b2ed4a --- /dev/null +++ b/rust/data_daemon/src/storage/budget.rs @@ -0,0 +1,383 @@ +//! Storage-budget tracking for the per-trace writers. +//! +//! Two independent limits gate every write: +//! +//! - The configured `storage_limit_bytes` (from the active profile) caps how +//! much room the daemon may consume under `recordings_root`. The tracker +//! keeps an estimate that is refreshed by a full directory scan no more +//! often than `refresh_seconds`. +//! - `min_free_disk_bytes` is the safety margin the daemon keeps free on the +//! underlying filesystem. Defaults to `MIN_FREE_DISK_BYTES = 32 MiB`. + +use std::path::{Path, PathBuf}; +use std::sync::Mutex; +use std::time::{Duration, Instant}; + +use super::paths::directory_bytes; + +/// Free-disk safety margin the daemon keeps available at all times. +pub const MIN_FREE_DISK_BYTES: u64 = 32 * 1024 * 1024; + +/// Minimum interval between full directory rescans of the used-bytes estimate. +pub const STORAGE_REFRESH_SECONDS: f64 = 5.0; + +/// Storage-budget configuration. +/// +/// `storage_limit_bytes = None` disables the in-tree usage cap (matching +/// today's behaviour when the operator clears `storage_limit` in the +/// profile). The free-disk safety margin always applies. +#[derive(Debug, Clone, Copy)] +pub struct StoragePolicy { + /// Maximum bytes the daemon may consume under the recordings root. + pub storage_limit_bytes: Option, + /// Minimum bytes that must remain free on the underlying filesystem. + pub min_free_disk_bytes: u64, + /// Maximum age of the cached used-bytes estimate before a rescan. + pub refresh_interval: Duration, +} + +impl Default for StoragePolicy { + fn default() -> Self { + Self { + storage_limit_bytes: None, + min_free_disk_bytes: MIN_FREE_DISK_BYTES, + refresh_interval: Duration::from_secs_f64(STORAGE_REFRESH_SECONDS), + } + } +} + +/// Outcome of a budget check. +/// +/// A binary "may I write" decision that also folds in the reason, so the +/// per-trace actor can emit a useful tracing log line and the upload +/// coordinator can pick the right backpressure response. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BudgetCheck { + /// The write is within both the storage limit and the free-disk margin. + Available, + /// The configured `storage_limit_bytes` would be exceeded. + StorageLimitExceeded { + /// Bytes the writer asked for. + requested: u64, + /// Current used-bytes estimate. + used: u64, + /// Configured cap. + limit: u64, + }, + /// The filesystem free-byte safety margin would be breached. + FilesystemFull { + /// Bytes the writer asked for. + requested: u64, + /// Free bytes reported by `statvfs`. + free: u64, + /// Safety margin from the policy. + min_free: u64, + }, +} + +impl BudgetCheck { + /// True when the writer is cleared to proceed. + pub fn is_available(self) -> bool { + matches!(self, BudgetCheck::Available) + } +} + +/// Errors raised when interrogating the underlying filesystem. +#[derive(Debug, thiserror::Error)] +pub enum BudgetError { + /// `statvfs` failed on the recordings root or one of its ancestors. + #[error("failed to query filesystem at {path}: {source}")] + Statvfs { + /// Path passed to `statvfs`. + path: PathBuf, + /// Underlying I/O error. + #[source] + source: std::io::Error, + }, +} + +/// Storage-budget tracker. +/// +/// Each method is thread-safe; internal state lives behind a `Mutex` so the +/// per-trace actors can reserve from a single shared instance without an +/// async hop. The estimate is updated optimistically on `reserve` and +/// reconciled by [`refresh_if_stale`](Self::refresh_if_stale). +pub struct StorageBudget { + recordings_root: PathBuf, + policy: StoragePolicy, + state: Mutex, +} + +struct BudgetState { + used_bytes: u64, + /// Cached free-space estimate, refreshed off the hot path; see + /// [`StorageBudget::refresh`]. + free_bytes: u64, + last_refresh: Instant, +} + +impl StorageBudget { + /// Open a budget tracker rooted at `recordings_root`. + /// + /// Runs an initial directory scan so the first `reserve` call sees an + /// accurate baseline. The recordings root does not need to exist yet β€” + /// the scan returns zero in that case. + pub fn new(recordings_root: impl Into, policy: StoragePolicy) -> Self { + let recordings_root = recordings_root.into(); + let used_bytes = directory_bytes(&recordings_root); + // Seed the free-space estimate; if `statvfs` is unavailable yet, assume + // ample space so writes aren't blocked until the first refresh succeeds. + let free_bytes = free_disk_bytes(&recordings_root).unwrap_or(u64::MAX); + Self { + recordings_root, + policy, + state: Mutex::new(BudgetState { + used_bytes, + free_bytes, + last_refresh: Instant::now(), + }), + } + } + + /// Borrow the recordings root used to seed this budget tracker. + #[allow(dead_code)] + pub fn recordings_root(&self) -> &Path { + &self.recordings_root + } + + /// Borrow the active policy. + pub fn policy(&self) -> &StoragePolicy { + &self.policy + } + + /// Current used-bytes estimate (may be stale; call + /// [`refresh_if_stale`](Self::refresh_if_stale) for an accurate read). + pub fn used_bytes(&self) -> u64 { + self.state.lock().expect("budget state").used_bytes + } + + /// Rescan the recordings tree if the estimate is older than + /// `refresh_interval`. This is a test/best-effort convenience wrapper for a + /// synchronous refresh; production drives the live refresh from a background + /// interval task instead (see [`refresh`](Self::refresh)) so the blocking + /// I/O never lands on a trace actor's hot path. + #[allow(dead_code)] + pub fn refresh_if_stale(&self) { + let refresh_interval = self.policy.refresh_interval; + if refresh_interval.is_zero() { + return; + } + let needs_refresh = { + let state = self.state.lock().expect("budget state"); + state.last_refresh.elapsed() >= refresh_interval + }; + if needs_refresh { + self.refresh(); + } + } + + /// Reconcile the cached estimates against the filesystem now. Performs the + /// (potentially blocking) directory scan and `statvfs`, so it MUST be called + /// off any latency-critical path β€” the daemon runs it on a dedicated + /// interval task. The tree scan only matters when a storage limit is + /// configured, so it is skipped otherwise; the free-space `statvfs` always + /// runs since the free-disk margin always applies. + pub fn refresh(&self) { + let scanned = self + .policy + .storage_limit_bytes + .map(|_| directory_bytes(&self.recordings_root)); + let free = free_disk_bytes(&self.recordings_root); + let mut state = self.state.lock().expect("budget state"); + if let Some(scanned) = scanned { + state.used_bytes = scanned; + } + match free { + Ok(free) => state.free_bytes = free, + // Keep the last good reading on a transient `statvfs` error rather + // than blocking writes; a persistent failure simply means the margin + // check uses a slightly stale free-space value. + Err(error) => { + tracing::warn!(%error, "statvfs refresh failed; keeping cached free space") + } + } + state.last_refresh = Instant::now(); + } + + /// Check (without committing) whether `bytes_to_write` would fit. + /// + /// Reads only the cached estimates (no filesystem I/O), so it is safe on the + /// per-frame hot path; the estimates are kept fresh by [`refresh`](Self::refresh). + pub fn check(&self, bytes_to_write: u64) -> Result { + let free = self.state.lock().expect("budget state").free_bytes; + if free < bytes_to_write.saturating_add(self.policy.min_free_disk_bytes) { + return Ok(BudgetCheck::FilesystemFull { + requested: bytes_to_write, + free, + min_free: self.policy.min_free_disk_bytes, + }); + } + + if let Some(limit) = self.policy.storage_limit_bytes { + let used = self.used_bytes(); + if used.saturating_add(bytes_to_write) > limit { + return Ok(BudgetCheck::StorageLimitExceeded { + requested: bytes_to_write, + used, + limit, + }); + } + } + + Ok(BudgetCheck::Available) + } + + /// Reserve `bytes_to_write` against the in-tree usage cap. + /// + /// Returns the same enum as [`check`](Self::check), but mutates the + /// internal estimate when the result is [`BudgetCheck::Available`] so + /// repeated calls add up across writers. The filesystem free-byte check + /// is best-effort: when it fails (e.g. `statvfs` reports a transient + /// error) the reservation fails closed β€” it is denied as if the disk were + /// full. + pub fn reserve(&self, bytes_to_write: u64) -> Result { + let check = self.check(bytes_to_write)?; + if let BudgetCheck::Available = check { + let mut state = self.state.lock().expect("budget state"); + state.used_bytes = state.used_bytes.saturating_add(bytes_to_write); + } + Ok(check) + } + + /// Release `bytes_to_release` from the in-tree usage estimate, e.g. after + /// a recording is deleted post-upload. + pub fn release(&self, bytes_to_release: u64) { + let mut state = self.state.lock().expect("budget state"); + state.used_bytes = state.used_bytes.saturating_sub(bytes_to_release); + } +} + +/// Free bytes available on the filesystem holding `path`. +/// +/// Walks up the directory tree until it finds an existing ancestor, so the +/// probe succeeds even before the recordings directory has been created +/// (without itself creating any directories). +fn free_disk_bytes(path: &Path) -> Result { + let mut probe = path.to_path_buf(); + loop { + match nix::sys::statvfs::statvfs(probe.as_path()) { + Ok(stats) => { + let blocks_available: u64 = stats.blocks_available(); + let fragment_size: u64 = stats.fragment_size(); + return Ok(blocks_available.saturating_mul(fragment_size)); + } + Err(errno) => { + if let Some(parent) = probe.parent() { + if parent != probe.as_path() { + probe = parent.to_path_buf(); + continue; + } + } + return Err(BudgetError::Statvfs { + path: path.to_path_buf(), + source: std::io::Error::from(errno), + }); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn policy_with_limit(limit: Option) -> StoragePolicy { + StoragePolicy { + storage_limit_bytes: limit, + // Set the safety margin to zero so the test focuses on the + // in-tree cap; the free-disk arm has its own test below. + min_free_disk_bytes: 0, + refresh_interval: Duration::from_secs(60), + } + } + + #[test] + fn reserve_accumulates_then_blocks_at_limit() { + let tempdir = TempDir::new().unwrap(); + let budget = StorageBudget::new(tempdir.path(), policy_with_limit(Some(4096))); + + assert_eq!(budget.reserve(1024).unwrap(), BudgetCheck::Available); + assert_eq!(budget.reserve(2048).unwrap(), BudgetCheck::Available); + assert_eq!(budget.used_bytes(), 3072); + + let blocked = budget.reserve(2048).unwrap(); + assert!( + matches!( + blocked, + BudgetCheck::StorageLimitExceeded { + requested: 2048, + used: 3072, + limit: 4096 + } + ), + "expected storage-limit exhaustion, got {blocked:?}" + ); + + budget.release(1024); + assert_eq!(budget.used_bytes(), 2048); + } + + #[test] + fn unlimited_policy_never_blocks_on_in_tree_usage() { + let tempdir = TempDir::new().unwrap(); + let budget = StorageBudget::new(tempdir.path(), policy_with_limit(None)); + // Request a non-trivial amount that should still comfortably fit on + // the test filesystem; with `storage_limit_bytes = None` the in-tree + // cap is disabled regardless. We deliberately stay well below disk + // capacity so the free-disk arm doesn't trip. + assert_eq!(budget.reserve(1024 * 1024).unwrap(), BudgetCheck::Available); + // Reserving repeatedly should keep returning Available without + // bookkeeping ever crossing a non-existent threshold. + for _ in 0..16 { + assert_eq!(budget.reserve(1024 * 1024).unwrap(), BudgetCheck::Available); + } + } + + #[test] + fn filesystem_full_when_safety_margin_exceeds_free_bytes() { + let tempdir = TempDir::new().unwrap(); + // A safety margin of u64::MAX is impossible to satisfy on any real + // filesystem, so the check must report `FilesystemFull` regardless of + // the in-tree usage estimate. + let policy = StoragePolicy { + storage_limit_bytes: None, + min_free_disk_bytes: u64::MAX, + refresh_interval: Duration::from_secs(60), + }; + let budget = StorageBudget::new(tempdir.path(), policy); + let result = budget.check(1).unwrap(); + assert!( + matches!(result, BudgetCheck::FilesystemFull { .. }), + "expected filesystem-full, got {result:?}" + ); + } + + #[test] + fn refresh_picks_up_external_writes() { + let tempdir = TempDir::new().unwrap(); + let policy = StoragePolicy { + storage_limit_bytes: Some(8192), + min_free_disk_bytes: 0, + refresh_interval: Duration::from_millis(0).saturating_add(Duration::from_nanos(1)), + }; + let budget = StorageBudget::new(tempdir.path(), policy); + assert_eq!(budget.used_bytes(), 0); + + std::fs::write(tempdir.path().join("blob.bin"), vec![0u8; 4096]).unwrap(); + // Sleep just past the refresh interval so the rescan triggers. + std::thread::sleep(Duration::from_millis(2)); + budget.refresh_if_stale(); + assert_eq!(budget.used_bytes(), 4096); + } +} diff --git a/rust/data_daemon/src/storage/mod.rs b/rust/data_daemon/src/storage/mod.rs new file mode 100644 index 000000000..689bbf05e --- /dev/null +++ b/rust/data_daemon/src/storage/mod.rs @@ -0,0 +1,9 @@ +//! On-disk storage layout and budget tracking for trace artefacts. +//! +//! [`paths`] resolves the per-trace directory layout under +//! `recordings/{recording_id}/{data_type}/{trace_id}/`. [`budget`] guards the +//! encoder against filling the disk past the `MIN_FREE_DISK_BYTES` (32 MiB) +//! safety margin and against exceeding the configured storage limit. + +pub mod budget; +pub mod paths; diff --git a/rust/data_daemon/src/storage/paths.rs b/rust/data_daemon/src/storage/paths.rs new file mode 100644 index 000000000..5da54ab75 --- /dev/null +++ b/rust/data_daemon/src/storage/paths.rs @@ -0,0 +1,291 @@ +//! Resolve on-disk paths for a recording, trace, and its artefacts. +//! +//! Mirrors `recording_encoding_disk_manager/core/trace_filesystem.py`. The +//! layout is part of the hard external contract: tests and downstream tools +//! expect `{recordings_root}/{recording_id}/{data_type}/{trace_id}/` and the +//! encoders read/write specific filenames inside that directory. + +use std::path::{Path, PathBuf}; + +/// Filename for the JSON-array trace data, written by both scalar traces and +/// the video-trace sidecar. Matches `video_trace.py::TRACE_FILE` and the +/// scalar `JsonTrace` writer's default. +pub const TRACE_JSON_FILENAME: &str = "trace.json"; + +/// Filename for the H.264 lossy MP4. Matches `video_trace.py::LOSSY_VIDEO_NAME`. +pub const LOSSY_VIDEO_FILENAME: &str = "lossy.mp4"; + +/// Filename for the FFV1 lossless MP4. Matches `video_trace.py::LOSSLESS_VIDEO_NAME`. +pub const LOSSLESS_VIDEO_FILENAME: &str = "lossless.mp4"; + +/// Directory name (inside a video trace's directory) that holds the +/// producer-spooled NUT chunks awaiting daemon-side encoding. +pub const CHUNKS_DIRNAME: &str = "chunks"; + +/// Top-level directory (under `recordings_root`) the producer spools video NUT +/// chunks into before the daemon knows which recording they belong to. Keyed +/// by source + sensor because the producer cannot reference a recording. The +/// daemon relinks a chunk under its recording once routing resolves a window, +/// and reclaims the whole tree on startup (a daemon restart mid-recording +/// corrupts that recording). +pub const SPOOL_DIRNAME: &str = ".rgb_spool"; + +/// Resolve the producer's video spool directory for a `(source, sensor)` +/// stream: `{recordings_root}/.rgb_spool/{robot_id}/{instance}/{data_type}/{sensor_name}/`. +/// +/// Both producer and daemon agree on this layout so the daemon can find and +/// relink the producer's spooled NUTs. `sensor_name` is omitted from the path +/// when absent. +pub fn spool_dir( + recordings_root: &Path, + robot_id: &str, + robot_instance: i64, + data_type: &str, + sensor_name: Option<&str>, +) -> PathBuf { + let mut dir = recordings_root + .join(SPOOL_DIRNAME) + .join(robot_id) + .join(robot_instance.to_string()) + .join(data_type); + if let Some(sensor_name) = sensor_name { + dir = dir.join(sensor_name); + } + dir +} + +/// Build the spool filename for a chunk: +/// `chunk_{publish_ns}_{thread_id}.nut`. +/// +/// `publish_ns` (the chunk's `publish_timestamp_ns` β€” the wall-clock ns the +/// producer opened the chunk) and the producing thread's `thread_id` make the +/// name unique per `(source, sensor)` across recordings β€” a fresh recording no +/// longer reuses a previous one's filename, so the daemon's relink can never +/// collide with the next recording's spool. The daemon assigns its own +/// per-trace [`chunk_filename`] at relink time, so these values are never +/// otherwise interpreted. +pub fn spool_chunk_filename(publish_ns: i64, thread_id: i64) -> String { + format!("chunk_{publish_ns}_{thread_id}.nut") +} + +/// Resolve the full spool path for one spooled chunk. +pub fn spool_chunk_path( + recordings_root: &Path, + robot_id: &str, + robot_instance: i64, + data_type: &str, + sensor_name: Option<&str>, + publish_ns: i64, + thread_id: i64, +) -> PathBuf { + spool_dir( + recordings_root, + robot_id, + robot_instance, + data_type, + sensor_name, + ) + .join(spool_chunk_filename(publish_ns, thread_id)) +} + +/// Resolve the top-level spool directory, reclaimed wholesale on daemon start. +pub fn spool_root(recordings_root: &Path) -> PathBuf { + recordings_root.join(SPOOL_DIRNAME) +} + +/// Build the filename for a video chunk at `chunk_index` β€” `chunk_NNNN.nut`. +/// +/// The producer writes directly to this final path; no `.tmp` staging is +/// needed because the daemon only acts on a chunk once the producer has +/// published its [`Envelope::VideoChunkReady`], which happens after the NUT +/// writer has been finished and flushed. +/// +/// [`Envelope::VideoChunkReady`]: data_daemon_shared::Envelope::VideoChunkReady +pub fn chunk_filename(chunk_index: u32) -> String { + format!("chunk_{chunk_index:04}.nut") +} + +/// Build the filename for a per-chunk encoded lossy mp4 segment. +pub fn chunk_lossy_filename(chunk_index: u32) -> String { + format!("chunk_{chunk_index:04}_lossy.mp4") +} + +/// Build the filename for a per-chunk encoded lossless mp4 segment. +pub fn chunk_lossless_filename(chunk_index: u32) -> String { + format!("chunk_{chunk_index:04}_lossless.mp4") +} + +/// Resolve a recording's top-level directory: `{recordings_root}/{recording}`. +/// +/// `recording` is the daemon-local `recording_index` stringified β€” the same +/// value the per-trace [`TracePath`] uses as its first path segment, so this +/// directory contains every trace directory for the recording. Used by the +/// recording reaper to remove a fully-uploaded recording's artefacts in one go. +pub fn recording_dir(recordings_root: &Path, recording_index: i64) -> PathBuf { + recordings_root.join(recording_index.to_string()) +} + +/// Key for an on-disk trace directory. +/// +/// The three components map directly to the on-disk path segments: +/// `recording_id` and `trace_id` come from the producer, `data_type` is the +/// wire label carried in `StartTrace`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TracePath { + /// Recording the trace belongs to. + pub recording_id: String, + /// Wire data-type label (e.g. `"video"`, `"joints"`). + pub data_type: String, + /// Trace identifier supplied by the SDK. + pub trace_id: String, +} + +impl TracePath { + /// Compose a new key from owned strings. + pub fn new( + recording_id: impl Into, + data_type: impl Into, + trace_id: impl Into, + ) -> Self { + Self { + recording_id: recording_id.into(), + data_type: data_type.into(), + trace_id: trace_id.into(), + } + } + + /// Resolve the trace directory beneath `recordings_root`. + pub fn directory(&self, recordings_root: &Path) -> PathBuf { + recordings_root + .join(&self.recording_id) + .join(&self.data_type) + .join(&self.trace_id) + } + + /// Resolve the `trace.json` path for this trace. + #[allow(dead_code)] + pub fn trace_json(&self, recordings_root: &Path) -> PathBuf { + self.directory(recordings_root).join(TRACE_JSON_FILENAME) + } + + /// Resolve the `lossy.mp4` path for this trace. + #[allow(dead_code)] + pub fn lossy_video(&self, recordings_root: &Path) -> PathBuf { + self.directory(recordings_root).join(LOSSY_VIDEO_FILENAME) + } + + /// Resolve the `lossless.mp4` path for this trace. + #[allow(dead_code)] + pub fn lossless_video(&self, recordings_root: &Path) -> PathBuf { + self.directory(recordings_root) + .join(LOSSLESS_VIDEO_FILENAME) + } + + /// Resolve the per-trace `chunks/` directory used by the producer to + /// spool NUT chunks before the daemon encodes them. Both producer and + /// daemon agree on the layout via this helper so a daemon recovery sweep + /// can find the producer's leftovers. + #[allow(dead_code)] + pub fn chunks_dir(&self, recordings_root: &Path) -> PathBuf { + self.directory(recordings_root).join(CHUNKS_DIRNAME) + } +} + +/// Sum the byte count of every regular file beneath `root`, used here by the +/// storage budget. The implementation lives in `data_daemon_shared` so the +/// producer's spool-backlog cap sums its inbox with byte-for-byte identical +/// semantics. +pub use data_daemon_shared::paths::directory_bytes; + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn directory_layout_matches_python_convention() { + let path = TracePath::new("rec-1", "joints", "trace-1"); + let root = Path::new("/var/data/recordings"); + assert_eq!( + path.directory(root), + PathBuf::from("/var/data/recordings/rec-1/joints/trace-1") + ); + assert_eq!( + path.trace_json(root), + PathBuf::from("/var/data/recordings/rec-1/joints/trace-1/trace.json") + ); + assert_eq!( + path.lossy_video(root), + PathBuf::from("/var/data/recordings/rec-1/joints/trace-1/lossy.mp4") + ); + assert_eq!( + path.lossless_video(root), + PathBuf::from("/var/data/recordings/rec-1/joints/trace-1/lossless.mp4") + ); + assert_eq!( + path.chunks_dir(root), + PathBuf::from("/var/data/recordings/rec-1/joints/trace-1/chunks") + ); + } + + #[test] + fn chunk_filenames_are_zero_padded() { + assert_eq!(chunk_filename(0), "chunk_0000.nut"); + assert_eq!(chunk_filename(7), "chunk_0007.nut"); + assert_eq!(chunk_filename(1234), "chunk_1234.nut"); + assert_eq!(chunk_lossy_filename(5), "chunk_0005_lossy.mp4"); + assert_eq!(chunk_lossless_filename(5), "chunk_0005_lossless.mp4"); + } + + #[test] + fn spool_chunk_filename_is_unique_per_publish_ts_and_thread() { + // The whole point of keying on `(publish_ns, thread_id)` is that two + // recordings on the same `(source, sensor)` never collide on a spool + // filename β€” distinct opens yield distinct names; identical inputs are + // stable so the daemon reconstructs exactly what the producer wrote. + let first = spool_chunk_filename(1_700_000_000_000_000_000, 42); + let second = spool_chunk_filename(1_700_000_000_000_000_001, 42); + let other_thread = spool_chunk_filename(1_700_000_000_000_000_000, 43); + assert_eq!(first, "chunk_1700000000000000000_42.nut"); + assert_ne!(first, second, "a later open must not reuse the filename"); + assert_ne!(first, other_thread, "a different thread disambiguates"); + assert_eq!( + first, + spool_chunk_filename(1_700_000_000_000_000_000, 42), + "identical inputs must be stable" + ); + } + + #[test] + fn spool_chunk_path_lives_under_the_spool_dir() { + let root = Path::new("/var/data/recordings"); + let path = spool_chunk_path(root, "robot-1", 0, "RGB_IMAGES", Some("camera_0"), 150, 7); + assert_eq!( + path, + PathBuf::from( + "/var/data/recordings/.rgb_spool/robot-1/0/RGB_IMAGES/camera_0/chunk_150_7.nut" + ) + ); + assert_eq!( + spool_root(root), + PathBuf::from("/var/data/recordings/.rgb_spool") + ); + } + + #[test] + fn directory_bytes_sums_nested_files_and_ignores_missing_roots() { + let tempdir = TempDir::new().unwrap(); + let root = tempdir.path().join("recordings"); + + // Missing root: zero, no error. + assert_eq!(directory_bytes(&root), 0); + + let trace = TracePath::new("rec-1", "joints", "trace-1"); + let dir = trace.directory(&root); + std::fs::create_dir_all(&dir).unwrap(); + std::fs::write(dir.join("trace.json"), vec![0u8; 1024]).unwrap(); + std::fs::write(dir.join("extra.bin"), vec![0u8; 32]).unwrap(); + + assert_eq!(directory_bytes(&root), 1024 + 32); + } +} diff --git a/rust/data_daemon_producer/Cargo.toml b/rust/data_daemon_producer/Cargo.toml new file mode 100644 index 000000000..5b224278e --- /dev/null +++ b/rust/data_daemon_producer/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "data_daemon_producer" +version.workspace = true +edition.workspace = true +license.workspace = true +description = "Producer-side IPC client for the Neuracore data daemon, exposed to Python via PyO3." + +[lib] +# `cdylib` produces the `.so` Python imports as `neuracore.data_daemon._native_producer`. +# `rlib` lets Rust integration tests still link against the library. +crate-type = ["cdylib", "rlib"] +path = "src/lib.rs" + +[dependencies] +data_daemon_shared = { path = "../data_daemon_shared" } +iceoryx2.workspace = true +libc.workspace = true +pyo3.workspace = true +# Used by the batched joint-data fast path to format each per-item +# `{"timestamp":...,"value":...}` payload. We deliberately do NOT hand-format +# via `write!`: Rust's `Display for f64` strips trailing zeros (1.0 β†’ "1"), +# which would land integer-valued joint values on disk as JSON integers +# instead of floats and break the cloud-side data-verification pass. +# serde_json uses ryu under the hood and always emits at least one fractional +# digit, matching Python's `json.dumps` shape. +serde.workspace = true +serde_json.workspace = true +thiserror.workspace = true +tracing.workspace = true + +[dev-dependencies] +tempfile = "3" diff --git a/rust/data_daemon_producer/src/lib.rs b/rust/data_daemon_producer/src/lib.rs new file mode 100644 index 000000000..2c4188859 --- /dev/null +++ b/rust/data_daemon_producer/src/lib.rs @@ -0,0 +1,447 @@ +// PyO3 0.22's `#[pyfunction]` expansion includes an `.into()` on the +// `PyResult` return value that fires clippy's `useless_conversion` lint +// when T resolves to `()`. The lint is correct about the generated code but +// the conversion lives in the macro expansion, not anything we wrote, so we +// silence it at the crate level rather than spraying allows over every +// `#[pyfunction]`. +#![allow(clippy::useless_conversion)] +// The crate-level docs link to the producer's private `#[pyfunction]` entry +// points and internal modules. CI builds docs with `--document-private-items`, +// so those links resolve; silence the lint that flags them as private. +#![allow(rustdoc::private_intra_doc_links)] + +//! PyO3 producer client for the Neuracore data daemon β€” a *thin shipper*. +//! +//! This crate ships as `neuracore.data_daemon._native_producer` inside the +//! Python wheel. It knows nothing about recordings: it publishes +//! source/sensor/timestamp-tagged data and three fire-and-forget lifecycle +//! events, and the daemon decides which recording (if any) each datum belongs +//! to. There is no trace registry, no per-frame sequence numbers, and no +//! recording identity on the wire. +//! +//! The surface the SDK's logging layer drives, all keyed by the **source** +//! `(robot_id, robot_instance)`: +//! +//! - [`start_recording`] / [`stop_recording`] / [`cancel_recording`] publish +//! one lifecycle envelope each, carrying the lifecycle wall-clock +//! `*_at_ns`. +//! - [`log_joints`] / [`log_json`] publish data envelopes tagged with the +//! sensor `(data_type, sensor_name)` and capture `timestamp_ns`. +//! - [`log_frame`] spools raw RGB into per-`(source, sensor)` NUT chunk files +//! under a recording-independent inbox and announces each finished chunk +//! with [`VideoChunkReady`](data_daemon_shared::Envelope::VideoChunkReady); the +//! daemon buckets the chunk into a recording by its frame timestamps, +//! relinks the NUT under that recording, and transcodes it. +//! +//! ## Module layout +//! +//! This file is a thin PyO3 faΓ§ade: the `#[pyfunction]` wrappers do argument +//! validation, release the GIL, and delegate into the submodules. +//! +//! - [`paths`] β€” filesystem layout shared with the daemon (recordings root, +//! spool paths, `(source, sensor)` stream keys). +//! - [`publisher`] β€” per-thread iceoryx2 publisher state, fork safety, the +//! synchronous `publish`, and the background data-publisher thread. +//! - [`writer`] β€” the background video-writer thread, the in-progress +//! video-chunk registry, and chunk seal/announce/flush logic. +//! - [`query`] β€” recording-id resolution over the `queries` service. +//! - [`nut_writer`] β€” minimal NUT-container muxer for raw RGB video. + +pub mod nut_writer; + +mod paths; +mod publisher; +mod query; +mod writer; + +use data_daemon_shared::{Envelope, RecordingIdQuery}; +use pyo3::buffer::PyBuffer; +use pyo3::exceptions::{PyRuntimeError, PyValueError}; +use pyo3::prelude::*; + +use crate::publisher::{now_ns, publish, publisher_tx, ProducerError, PublishMsg}; +use crate::query::resolve_recording_id; +use crate::writer::{writer_queue, FrameJob, WriterMsg}; + +/// Announce that a recording has started for a source. Fire-and-forget: the +/// daemon opens a window and owns all recording identity. +/// +/// The producer stamps the window's lower bound on the publish clock +/// (`publish_timestamp_ns`, always wall-clock now) β€” that, never the caller's +/// timestamp, is what the daemon uses for window membership, so a synthetic +/// capture time can't shift the window or clip data. Separately, the recording's +/// *capture* timestamp (`timestamp_ns` when supplied, else the publish time) is +/// what the daemon stores as `start_timestamp_ns` and POSTs as the backend +/// `start_time`. The capture timestamp is returned so the caller can use it as +/// the marker that resolves the daemon-assigned cloud recording id +/// (`get_recording_id`) for this exact recording. +#[pyfunction] +#[pyo3(signature = (robot_id, robot_instance, robot_name = None, dataset_id = None, dataset_name = None, timestamp_ns = None))] +fn start_recording( + py: Python<'_>, + robot_id: &str, + robot_instance: i64, + robot_name: Option, + dataset_id: Option, + dataset_name: Option, + timestamp_ns: Option, +) -> PyResult { + if robot_id.is_empty() { + return Err(PyValueError::new_err("robot_id must not be empty")); + } + let robot_id = robot_id.to_string(); + py.allow_threads(|| -> PyResult { + let publish_timestamp_ns = now_ns(); + // Caller-supplied capture time, mirroring the `log_*` timestamp default + // (publish clock when omitted). Decoupled from the window boundary. + let capture_timestamp_ns = timestamp_ns.unwrap_or(publish_timestamp_ns); + publish(&Envelope::StartRecording { + robot_id, + robot_instance, + robot_name, + dataset_id, + dataset_name, + publish_timestamp_ns, + timestamp_ns: capture_timestamp_ns, + })?; + Ok(capture_timestamp_ns) + }) +} + +/// Log one scalar sample for each of several joints captured at the same +/// instant, packed into one `BatchedData` envelope. +/// +/// **Flattened transfer:** the joint names arrive as a single `\0`-joined +/// string and the values as one flat list, so the GIL-held cost is one string +/// copy plus one `Vec` extraction. The previous `Vec<(String, f64)>` +/// signature made PyO3 extract N `(name, value)` tuples β€” N allocations + N +/// downcasts under the GIL β€” which dominated this call at high joint counts +/// (~1000 joints β‰ˆ 2 ms). The names are split and zipped with the values on the +/// publisher thread, off this path. +#[pyfunction] +#[pyo3(signature = (robot_id, robot_instance, data_type, names, values, timestamp_ns, timestamp_s = None))] +#[allow(clippy::too_many_arguments)] +fn log_joints( + py: Python<'_>, + robot_id: &str, + robot_instance: i64, + data_type: &str, + names: &str, + values: Vec, + timestamp_ns: i64, + timestamp_s: Option, +) -> PyResult<()> { + if robot_id.is_empty() || data_type.is_empty() { + return Err(PyValueError::new_err( + "robot_id and data_type must not be empty", + )); + } + if values.is_empty() { + return Ok(()); + } + let robot_id = robot_id.to_string(); + let data_type = data_type.to_string(); + let joined_names = names.to_string(); + py.allow_threads(move || { + // Stamp the window-routing clock at enqueue (inside the recording + // window). The publisher thread splits the names, zips them with the + // values, serialises, and publishes the `BatchedData`, keeping that work + // β€” and the synchronous IPC publish, which can briefly block on a full + // commands buffer β€” off this call. + let publish_timestamp_ns = now_ns(); + let _ = publisher_tx().send(PublishMsg::Joint { + robot_id, + robot_instance, + data_type, + joined_names, + values, + timestamp_ns, + timestamp_s, + publish_timestamp_ns, + }); + }); + Ok(()) +} + +/// Log one video frame for a camera. The frame is appended to the +/// `(source, sensor)` in-progress NUT chunk under the inbox; when the chunk +/// crosses the chunk-flush threshold a [`Envelope::VideoChunkReady`] is +/// published. +#[pyfunction] +#[pyo3(signature = (robot_id, robot_instance, data_type, name, width, height, payload, timestamp_ns, timestamp_s = None))] +#[allow(clippy::too_many_arguments)] +fn log_frame( + py: Python<'_>, + robot_id: &str, + robot_instance: i64, + data_type: &str, + name: &str, + width: u32, + height: u32, + payload: PyBuffer, + timestamp_ns: i64, + timestamp_s: Option, +) -> PyResult<()> { + if robot_id.is_empty() || data_type.is_empty() || name.is_empty() { + return Err(PyValueError::new_err( + "robot_id, data_type and name must not be empty", + )); + } + if width == 0 || height == 0 { + return Err(PyValueError::new_err("width and height must be non-zero")); + } + let expected_bytes = (width as usize) + .saturating_mul(height as usize) + .saturating_mul(3); + let actual_bytes = payload.item_count(); + if actual_bytes != expected_bytes { + return Err(PyValueError::new_err(format!( + "video frame buffer is {actual_bytes} bytes; expected width*height*3 = {expected_bytes}" + ))); + } + if !payload.is_c_contiguous() { + return Err(PyValueError::new_err( + "video frame buffer must be C-contiguous", + )); + } + // Resolve the recordings root *here*, on the GIL, before copying the frame + // or handing it to the writer thread. Video is the only path that needs the + // root (it spools NUT chunks under it), so a host with no `$HOME` and no + // `NEURACORE_DAEMON_RECORDINGS_ROOT` fails this call with a clear Python + // error rather than the writer thread spooling somewhere the daemon never + // looks (silent data loss) or panicking across the FFI boundary. + crate::paths::recordings_root() + .map_err(|message| PyRuntimeError::new_err(message.to_string()))?; + let resolved_timestamp_s = timestamp_s.unwrap_or_else(|| timestamp_ns as f64 / 1_000_000_000.0); + + // SAFETY: PyO3 holds the GIL here, the buffer is validated `u8` and + // C-contiguous, the length comes from `PyBuffer::item_count`, and we only + // read. The frame is owned by the caller's numpy array, which may be reused + // the instant this call returns, so we *copy* it into the job under the GIL + // (as the buffer protocol requires). + let data = unsafe { + std::slice::from_raw_parts(payload.buf_ptr() as *const u8, actual_bytes).to_vec() + }; + + let job = FrameJob { + robot_id: robot_id.to_string(), + robot_instance, + data_type: data_type.to_string(), + sensor_name: name.to_string(), + width, + height, + timestamp_ns, + timestamp_s: resolved_timestamp_s, + data, + }; + + // Hand off to the writer with the GIL released: enqueuing only blocks under + // sustained overload (the byte caps), and blocking there while holding the + // GIL would stall every Python thread in the process. A frame that cannot be + // admitted before the spool-stall window elapses surfaces as an error rather + // than being silently dropped, so the caller learns the daemon has stalled. + py.allow_threads(move || writer_queue().push(WriterMsg::Frame(job))) + .map_err(|_| { + PyRuntimeError::new_err( + "video logging stalled: the data daemon is not draining the spool \ + backlog (frame rejected after 1s of backpressure)", + ) + })?; + Ok(()) +} + +/// Log one JSON sample for any non-joint, non-video data type, delivered +/// verbatim as a `Data` envelope. +/// +/// `data_type` is an opaque wire label and `payload` is already-serialized +/// bytes, so this is the generic single-sample path: scalars, poses, gripper +/// amounts, language, point clouds and any future JSON type all flow through +/// here unchanged. The daemon classifies the label downstream +/// (see `content_type_for`); it imposes no allowlist. +#[pyfunction] +#[pyo3(signature = (robot_id, robot_instance, data_type, name, payload, timestamp_ns, timestamp_s = None))] +#[allow(clippy::too_many_arguments)] +fn log_json( + py: Python<'_>, + robot_id: &str, + robot_instance: i64, + data_type: &str, + name: &str, + payload: &[u8], + timestamp_ns: i64, + timestamp_s: Option, +) -> PyResult<()> { + if robot_id.is_empty() || data_type.is_empty() || name.is_empty() { + return Err(PyValueError::new_err( + "robot_id, data_type and name must not be empty", + )); + } + let robot_id = robot_id.to_string(); + let data_type = data_type.to_string(); + let name = name.to_string(); + let owned_payload = payload.to_vec(); + py.allow_threads(move || { + // Stamp the window-routing clock at enqueue; the publisher thread + // publishes the `Data` envelope off this call (see [`PublishMsg::Json`]). + let publish_timestamp_ns = now_ns(); + let _ = publisher_tx().send(PublishMsg::Json { + robot_id, + robot_instance, + data_type, + sensor_name: name, + payload: owned_payload, + timestamp_ns, + timestamp_s, + publish_timestamp_ns, + }); + }); + Ok(()) +} + +/// Flush any tail video chunks for the source, then publish one +/// `StopRecording`. The flush happens before the stop publish so the in-order +/// delivery contract on this thread's publisher delivers the chunk first. +/// +/// The producer stamps the window's upper bound on the publish clock here +/// (`publish_timestamp_ns`, always wall-clock now), so the whole publish clock +/// is owned by the producer (consistent with the data envelopes). Every video +/// chunk routes by its *open* time, which is strictly inside the recording, so +/// the exact value of this boundary no longer has to be reconciled with a tail +/// chunk. The recording's *capture* stop time (`timestamp_ns` when supplied, +/// else the publish time) is separate β€” it is stored as `stop_timestamp_ns` and +/// POSTed as the backend `end_time`, never used for window membership. +#[pyfunction] +#[pyo3(signature = (robot_id, robot_instance, timestamp_ns = None))] +fn stop_recording( + py: Python<'_>, + robot_id: &str, + robot_instance: i64, + timestamp_ns: Option, +) -> PyResult<()> { + if robot_id.is_empty() { + return Err(PyValueError::new_err("robot_id must not be empty")); + } + let robot_id = robot_id.to_string(); + py.allow_threads(|| -> PyResult<()> { + let publish_timestamp_ns = now_ns(); + // Caller-supplied capture time, mirroring the `log_*` timestamp default + // (publish clock when omitted). Decoupled from the window boundary. + let capture_timestamp_ns = timestamp_ns.unwrap_or(publish_timestamp_ns); + // Barrier on the writer: it drains every frame still queued for this + // source (FIFO), seals the tail chunks and announces them, then acks. + // Blocking here means the stop never returns until those chunks are + // durably spooled + announced, so a process exit right after + // `stop_recording` can't lose them. + let (ack_tx, ack_rx) = std::sync::mpsc::channel(); + // Control messages bypass the frame caps, so this never blocks or stalls. + let _ = writer_queue().push(WriterMsg::FlushSource { + robot_id: robot_id.clone(), + robot_instance, + ack: ack_tx, + }); + let _ = ack_rx.recv(); + // Publish `StopRecording` from THIS (the calling) thread's publisher β€” + // the same port as `StartRecording` β€” so consecutive recordings' start + // and stop boundaries stay strictly ordered for the daemon. The tail + // chunks were announced on the writer's port just above; the daemon's + // holdback + closing-window retention route them into this window even + // though they ride a different port. + publish(&Envelope::StopRecording { + robot_id, + robot_instance, + publish_timestamp_ns, + timestamp_ns: capture_timestamp_ns, + })?; + Ok(()) + }) +} + +/// Cancel a recording β€” drop the source's in-progress chunk state without +/// flushing (the daemon's cancel handler removes the relinked artefacts and +/// the recovery sweep reclaims any spooled NUTs). +/// +/// A cancel is a recording stop that discards data, so it carries the same +/// capture `timestamp_ns` as `stop_recording` (the caller's value, else the +/// publish clock); the daemon stores it as `stop_timestamp_ns` and POSTs it as +/// the backend `end_time`. +#[pyfunction] +#[pyo3(signature = (robot_id, robot_instance, timestamp_ns = None))] +fn cancel_recording( + py: Python<'_>, + robot_id: &str, + robot_instance: i64, + timestamp_ns: Option, +) -> PyResult<()> { + if robot_id.is_empty() { + return Err(PyValueError::new_err("robot_id must not be empty")); + } + let robot_id = robot_id.to_string(); + py.allow_threads(|| -> PyResult<()> { + let capture_timestamp_ns = timestamp_ns.unwrap_or_else(now_ns); + // Barrier on the writer: it drains any frames still queued for this + // source (FIFO) and drops the in-progress chunk state without announcing + // it, then acks. Block until acked so the cancel is ordered after those + // frames and no late chunk for this recording is announced. + let (ack_tx, ack_rx) = std::sync::mpsc::channel(); + // Control messages bypass the frame caps, so this never blocks or stalls. + let _ = writer_queue().push(WriterMsg::DropSource { + robot_id: robot_id.clone(), + robot_instance, + ack: ack_tx, + }); + let _ = ack_rx.recv(); + // Publish `CancelRecording` from THIS (the calling) thread's publisher, + // ordered with Start/Stop on the same port (see the writer module note). + publish(&Envelope::CancelRecording { + robot_id, + robot_instance, + timestamp_ns: capture_timestamp_ns, + })?; + Ok(()) + }) +} + +/// Resolve the daemon-owned cloud `recording_id` for a recording, blocking with +/// the GIL released until the id is available or `timeout_s` elapses. +/// +/// The thin producer never mints recording identity β€” the daemon allocates the +/// cloud id asynchronously after `/recording/start`. This asks the daemon over +/// the `queries` request-response service (identifying the recording by its +/// source + capture `timestamp_ns` marker) and returns the id once minted, or +/// `None` on timeout / when no daemon is answering. Safe for +/// non-performance-critical paths only (tests, `stop_recording(wait=True)`). +#[pyfunction] +#[pyo3(signature = (robot_id, robot_instance, timestamp_ns, timeout_s))] +fn get_recording_id( + py: Python<'_>, + robot_id: &str, + robot_instance: i64, + timestamp_ns: i64, + timeout_s: f64, +) -> PyResult> { + if robot_id.is_empty() { + return Err(PyValueError::new_err("robot_id must not be empty")); + } + let query = RecordingIdQuery { + robot_id: robot_id.to_string(), + robot_instance, + timestamp_ns, + }; + let request_bytes = query.encode().map_err(ProducerError::from)?; + py.allow_threads(|| -> PyResult> { + Ok(resolve_recording_id(&request_bytes, timeout_s)?) + }) +} + +/// Python module entrypoint registered as `neuracore.data_daemon._native_producer`. +#[pymodule] +fn _native_producer(module: &Bound<'_, PyModule>) -> PyResult<()> { + module.add_function(wrap_pyfunction!(start_recording, module)?)?; + module.add_function(wrap_pyfunction!(log_joints, module)?)?; + module.add_function(wrap_pyfunction!(log_frame, module)?)?; + module.add_function(wrap_pyfunction!(log_json, module)?)?; + module.add_function(wrap_pyfunction!(stop_recording, module)?)?; + module.add_function(wrap_pyfunction!(cancel_recording, module)?)?; + module.add_function(wrap_pyfunction!(get_recording_id, module)?)?; + Ok(()) +} diff --git a/rust/data_daemon_producer/src/nut_writer.rs b/rust/data_daemon_producer/src/nut_writer.rs new file mode 100644 index 000000000..c7dd8940b --- /dev/null +++ b/rust/data_daemon_producer/src/nut_writer.rs @@ -0,0 +1,929 @@ +//! Minimal NUT-container muxer for a single raw-RGB24 video stream. +//! +//! The video trace actor spools captured frames into a `raw.nut` file with +//! this writer; the file is then handed off to an `ffmpeg` transcoder. +//! +//! The output is intentionally the bare minimum NUT spec elements needed for +//! `ffprobe` to report the stream geometry: file id string, main header, +//! stream header, one syncpoint, and one frame packet per captured RGB +//! buffer. We deliberately skip the optional index packet so the file stays +//! crash-safe β€” a truncated tail still demuxes up to the last complete frame. +//! +//! See `https://ffmpeg.org/~michael/nut.txt` for the authoritative spec. The +//! bit-level layout is non-obvious in several places (frame-code table +//! run-length encoding, `coded_pts` lsb/msb form, CRC-32/MPEG-2 with +//! MSB-first polynomial 0x04C11DB7) so the helpers below carry inline +//! commentary explaining *why* each magic value is what it is. + +use std::fs::{File, OpenOptions}; +use std::io::{self, BufWriter, Write}; +use std::os::unix::io::AsRawFd; +use std::path::{Path, PathBuf}; +use std::sync::OnceLock; + +/// Fixed 25-byte file identifier mandated by the NUT spec. The trailing NUL +/// is part of the signature. +const FILE_ID_STRING: &[u8] = b"nut/multimedia container\0"; + +/// 64-bit big-endian startcodes. All begin with the ASCII byte `'N'` (0x4E) +/// so demuxers can resync by scanning for that byte and then matching the +/// remaining 56 bits. +const MAIN_STARTCODE: u64 = 0x4E4D_7A56_1F5F_04AD; +const STREAM_STARTCODE: u64 = 0x4E53_1140_5BF2_F9DB; +const SYNCPOINT_STARTCODE: u64 = 0x4E4B_E4AD_EECA_4569; + +/// NUT bitstream version we emit. Version 3 is the long-stable spec; version +/// 4 introduced extra stream-header fields we do not need. +const NUT_VERSION: u64 = 3; + +/// `msb_pts_shift` advertised in the stream header. Conventional value of 7 +/// means the short-form `coded_pts` representation occupies the low 7 bits. +/// We always write the full-pts form (encoded as `pts + (1 << 7)`), but the +/// stream header still has to advertise a value so demuxers can compute the +/// short form when they encounter it. +const MSB_PTS_SHIFT: u64 = 7; + +/// Maximum distance (bytes) the spec allows between consecutive startcodes. +/// 65536 disables the policy in practice for our use case while still being a +/// legal value (the spec clamps values above 65536 back down to 65536). +const MAX_DISTANCE: u64 = 65536; + +/// `data_size_mul` used in the single populated frame-code table entry. With +/// `mul = 1` and `lsb = 0`, the frame's `data_size_msb` carries the entire +/// frame byte count, which is exactly what we want for variable-size raw RGB +/// frames. +const TABLE_MUL: u64 = 1; + +// Frame-flag bit positions defined by the NUT spec (see `nut.txt` Β§"flags"). +// These are *bit positions*, not packed values, so the actual flag is `1 << +// bit`. We keep them as constants for readability in the frame-emit path. +const FLAG_KEY: u64 = 1 << 0; // bit 0 +const FLAG_CODED_PTS: u64 = 1 << 3; // bit 3 +const FLAG_STREAM_ID: u64 = 1 << 4; // bit 4 +const FLAG_SIZE_MSB: u64 = 1 << 5; // bit 5 +const FLAG_CHECKSUM: u64 = 1 << 6; // bit 6 +const FLAG_CODED: u64 = 1 << 12; // bit 12 β€” "read coded_flags from the stream" +const FLAG_INVALID: u64 = 1 << 13; // bit 13 β€” entry is unusable + +/// Frame code byte used for every frame we emit. The spec earmarks 0xFF as +/// an "all explicit" entry by convention; combined with `FLAG_CODED` in the +/// table, this means every frame carries its own flags inline. +const FRAME_CODE_ALL_EXPLICIT: u8 = 0xFF; + +/// Configuration captured at writer-creation time. The writer is single +/// stream and assumes packed RGB24 (3 bytes per pixel, no padding). +#[derive(Debug, Clone, Copy)] +pub struct NutVideoConfig { + /// Frame width in pixels. Must be non-zero. + pub width: u32, + /// Frame height in pixels. Must be non-zero. + pub height: u32, + /// Time-base numerator. For 30 fps capture use `1`. + pub time_base_num: u32, + /// Time-base denominator. For 30 fps capture use `30`. + pub time_base_den: u32, +} + +/// Errors raised by [`NutWriter`]. +#[derive(Debug, thiserror::Error)] +pub enum NutError { + /// Configuration values violated NUT spec invariants (e.g. zero width). + #[error("invalid NUT configuration: {0}")] + InvalidConfig(&'static str), + /// Frame buffer size did not match `width * height * 3`. + #[error("frame buffer size mismatch: expected {expected} bytes, got {actual}")] + FrameSize { + /// Expected number of bytes for one packed RGB24 frame. + expected: usize, + /// Actual buffer length supplied by the caller. + actual: usize, + }, + /// Failed to create the parent directory or open the output file. + #[error("failed to open NUT file {path}: {source}")] + Open { + /// Path that failed to open. + path: PathBuf, + /// Underlying I/O error. + #[source] + source: io::Error, + }, + /// Failed to write buffered bytes to disk. + #[error("failed to write NUT file {path}: {source}")] + Write { + /// Path being written to. + path: PathBuf, + /// Underlying I/O error. + #[source] + source: io::Error, + }, +} + +/// Append-only NUT muxer. +/// +/// `create` writes the header packets (file id, main, stream, syncpoint) and +/// flushes them so the file is parseable from the very first frame onwards. +/// Each [`write_frame`](Self::write_frame) call appends one frame packet +/// **without** an explicit per-frame `flush` β€” the [`BufWriter`] holds up to +/// [`BUF_WRITER_CAPACITY_BYTES`] of pending bytes and drains either when the +/// next write would overflow it or when [`finish`](Self::finish) is called +/// at chunk close. Per-frame flushing was previously used as a crash-safety +/// belt-and-braces, but the chunk-based design now relies on the daemon's +/// startup recovery sweep to delete any partial chunk anyway, so the +/// in-buffer tail is going to be discarded if the producer dies mid-chunk. +/// In exchange we amortise small writes (syncpoint + frame header, ~30 +/// bytes) into the next big frame's syscall, cutting tail latency for +/// every frame size. +pub struct NutWriter { + path: PathBuf, + writer: BufWriter, + config: NutVideoConfig, + /// Bytes physically written to disk so far (header + flushed frames). + /// Tracked by us rather than queried from the file because `BufWriter` + /// doesn't expose a cheap byte counter. + bytes_written: u64, + /// Number of bytes a well-formed RGB24 frame must occupy. Cached so the + /// per-frame size check stays in a single `usize`. + expected_frame_bytes: usize, + /// File offset of the most recently written syncpoint packet. Drives the + /// bytes-since-last-syncpoint check that triggers the next periodic + /// syncpoint. + last_syncpoint_offset: u64, + /// File offset up to which an async writeback hint has been issued. The + /// next hint covers `[last_writeback_hint, bytes_written)`. + last_writeback_hint: u64, +} + +/// Emit a new syncpoint when the bytes-since-last-syncpoint would exceed +/// this threshold once the next frame is appended. We pick well below +/// [`MAX_DISTANCE`] (65536) so even a worst-case oversized header keeps the +/// distance within spec; ffmpeg's NUT demuxer rejects the file with +/// `Last frame must have been damaged X > 100 + max_distance` once that +/// budget is blown. +const SYNCPOINT_INTERVAL_BYTES: u64 = 32_768; + +/// [`BufWriter`] capacity. Picked from a matrix benchmark as the size +/// that's never meaningfully slower than the alternatives across the +/// 64Γ—64 (12 KiB) β†’ 1920Γ—1920 (10 MiB) frame range we expect, and +/// strictly better than the 8 KiB default for very small frames (where +/// many frames coalesce per syscall) and for very large frames (where the +/// buffer absorbs short writes the kernel returns under writeback +/// pressure). The caveat: in the 0.5-3 MiB frame zone this buffer can cause +/// occasional ~7 ms flush spikes when a frame tips it past capacity and the +/// whole 8 MiB drains in one `write` β€” a known tail-latency cost, absorbed by +/// the writer thread rather than a `log_*` caller. +const BUF_WRITER_CAPACITY_BYTES: usize = 8 * 1024 * 1024; + +/// Issue an async writeback hint at least this often, measured by bytes +/// appended since the last hint. +/// +/// A chunk is 256 MiB ([`CHUNK_FLUSH_BYTES`](crate)) and the kernel's +/// `balance_dirty_pages` throttle fires on the *system-wide* dirty-page count β€” +/// so at 1080p@60 across multiple cameras hundreds of MiB of dirty pages can +/// pile up before a chunk closes, then a single `write()` hard-stalls for +/// hundreds of ms. Hinting writeback every 16 MiB starts the kernel draining +/// those pages continuously, keeping the dirty footprint bounded (β‰ˆ interval Γ— +/// active streams) so the throttle never reaches a hard stall. It's only a +/// handful of cheap syscalls per chunk β€” far coarser than per-frame, far finer +/// than per-chunk (which would arrive after the stall already happened). +/// +/// We only hint writeback (`SYNC_FILE_RANGE_WRITE`); we do **not** drop the +/// pages (`fadvise(DONTNEED)`), because the daemon re-reads each NUT to +/// transcode it β€” cleaning the pages keeps them cache-warm for that read while +/// still relieving write pressure. +const WRITEBACK_HINT_INTERVAL_BYTES: u64 = 16 * 1024 * 1024; + +impl NutWriter { + /// Create a NUT file at `path` and emit the four mandatory header + /// elements. The parent directory is created if missing. + pub fn create(path: &Path, config: NutVideoConfig) -> Result { + if config.width == 0 || config.height == 0 { + return Err(NutError::InvalidConfig("width and height must be non-zero")); + } + if config.time_base_num == 0 || config.time_base_den == 0 { + return Err(NutError::InvalidConfig( + "time_base_num and time_base_den must be non-zero", + )); + } + + if let Some(parent) = path.parent() { + if !parent.as_os_str().is_empty() { + std::fs::create_dir_all(parent).map_err(|source| NutError::Open { + path: parent.to_path_buf(), + source, + })?; + } + } + + let file = OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(path) + .map_err(|source| NutError::Open { + path: path.to_path_buf(), + source, + })?; + + let expected_frame_bytes = (config.width as usize) + .checked_mul(config.height as usize) + .and_then(|pixels| pixels.checked_mul(3)) + .ok_or(NutError::InvalidConfig( + "width * height * 3 overflows usize", + ))?; + + let mut writer = NutWriter { + path: path.to_path_buf(), + writer: BufWriter::with_capacity(BUF_WRITER_CAPACITY_BYTES, file), + config, + bytes_written: 0, + expected_frame_bytes, + last_syncpoint_offset: 0, + last_writeback_hint: 0, + }; + writer.write_headers()?; + Ok(writer) + } + + /// Path being written to. + pub fn path(&self) -> &Path { + &self.path + } + + /// Total bytes appended to the chunk so far. Counts bytes handed to the + /// `BufWriter` (which may still be buffered in memory, not yet `fsync`'d), + /// so it is the logical chunk size β€” what the flush threshold keys off β€” not + /// a durability guarantee. + pub fn bytes_written(&self) -> u64 { + self.bytes_written + } + + /// Append one raw-RGB24 frame at the supplied PTS (frame index in + /// time-base ticks). The supplied slice must be exactly + /// `width * height * 3` bytes long. + pub fn write_frame(&mut self, pts: u64, rgb_bytes: &[u8]) -> Result<(), NutError> { + if rgb_bytes.len() != self.expected_frame_bytes { + return Err(NutError::FrameSize { + expected: self.expected_frame_bytes, + actual: rgb_bytes.len(), + }); + } + + // Emit a periodic syncpoint before appending the frame so the gap + // between consecutive syncpoints stays within the demuxer's reach. + // Without this, files containing more than a few frames of >16 KiB + // each are rejected as "Last frame must have been damaged". + let bytes_since_last = self + .bytes_written + .saturating_sub(self.last_syncpoint_offset); + let projected_frame_bytes = rgb_bytes.len() as u64 + 32; + if bytes_since_last.saturating_add(projected_frame_bytes) > SYNCPOINT_INTERVAL_BYTES { + self.write_syncpoint(pts)?; + } + + // Frame packets are the *only* NUT packets without a startcode. The + // demuxer differentiates them by inspecting the next byte: any value + // other than 'N' (0x4E) is treated as a `frame_code`. 0xFF is our + // "all explicit" entry, so we follow it with the inline coded_flags, + // stream_id, coded_pts, data_size_msb, and a checksum over that + // header. + let mut header = Vec::with_capacity(16); + header.push(FRAME_CODE_ALL_EXPLICIT); + + // coded_flags XORed against the table entry's FLAG_CODED produces the + // actual frame flags. Table entry starts as FLAG_CODED only, and we + // want KEY|STREAM_ID|CODED_PTS|SIZE_MSB|CHECKSUM, so: + // coded_flags = FLAG_CODED ^ (KEY|STREAM_ID|CODED_PTS|SIZE_MSB|CHECKSUM) + let target_flags = + FLAG_KEY | FLAG_STREAM_ID | FLAG_CODED_PTS | FLAG_SIZE_MSB | FLAG_CHECKSUM; + let coded_flags = FLAG_CODED ^ target_flags; + vencode(&mut header, coded_flags); + + // stream_id β€” always 0, single-stream file. + vencode(&mut header, 0); + + // coded_pts: the "full pts" form is `pts + (1 << msb_pts_shift)`. + // Decoder side: any value >= (1 << msb_pts_shift) is treated as full + // pts and the offset is subtracted back off. This keeps us correct + // for arbitrarily large pts values without worrying about the + // lsb/last_pts reconstruction path. + let coded_pts = pts + .checked_add(1u64 << MSB_PTS_SHIFT) + .ok_or(NutError::InvalidConfig("pts overflow when encoding"))?; + vencode(&mut header, coded_pts); + + // data_size = data_size_lsb (0) + data_size_msb * data_size_mul (1) + // = data_size_msb, so we encode the raw frame length here. + vencode(&mut header, rgb_bytes.len() as u64); + + // Frame header checksum: CRC32/MPEG-2 over framecode + all header + // bytes up to (but not including) the checksum itself. + let checksum = crc32_nut(&header); + header.extend_from_slice(&checksum.to_be_bytes()); + + self.write_all(&header)?; + self.write_all(rgb_bytes)?; + + // Once enough has accumulated, kick off async writeback for it so dirty + // pages drain continuously instead of piling up to the throttle's hard + // stall. Best-effort: never fails a frame. + if self.bytes_written.saturating_sub(self.last_writeback_hint) + >= WRITEBACK_HINT_INTERVAL_BYTES + { + self.hint_writeback(); + } + Ok(()) + } + + /// Ask the kernel to start writing back the bytes appended since the last + /// hint, *without waiting* (`SYNC_FILE_RANGE_WRITE`). This bounds the + /// producer's dirty-page footprint so a later `write()` doesn't hard-stall + /// under `balance_dirty_pages` throttling. The pages are cleaned but not + /// evicted, so the daemon's subsequent transcode read still hits cache. + /// + /// Best-effort: the buffered tail is flushed into the page cache first so + /// the whole range is eligible, then the marker advances regardless of the + /// syscall result β€” a writeback hint is a pure optimisation and must never + /// drop a frame or surface an error. + fn hint_writeback(&mut self) { + // Push the BufWriter's contents into the page cache so the full range + // is writeback-eligible. A real write error here is surfaced by the + // next `write_all`; here we just skip the hint. + if self.flush().is_err() { + return; + } + let offset = self.last_writeback_hint; + let nbytes = self.bytes_written.saturating_sub(offset); + self.last_writeback_hint = self.bytes_written; + if nbytes == 0 { + return; + } + let fd = self.writer.get_ref().as_raw_fd(); + // SAFETY: `fd` is the open NUT file's descriptor (valid for the + // lifetime of `self.writer`), offset/nbytes are non-negative, and + // `SYNC_FILE_RANGE_WRITE` only *queues* writeback β€” it does not block, + // mutate user memory, or take ownership of the fd. + let result = unsafe { + libc::sync_file_range( + fd, + offset as libc::off64_t, + nbytes as libc::off64_t, + libc::SYNC_FILE_RANGE_WRITE, + ) + }; + if result != 0 { + tracing::debug!( + errno = %io::Error::last_os_error(), + path = %self.path.display(), + "sync_file_range writeback hint failed (ignored)", + ); + } + } + + /// Flush any remaining buffered bytes and return the total bytes + /// written. + pub fn finish(mut self) -> Result { + self.flush()?; + Ok(self.bytes_written) + } + + fn write_headers(&mut self) -> Result<(), NutError> { + // file_id_string is the only part of the file that is *not* wrapped + // in a packet; everything after it is a sequence of packet_header / + // payload / packet_footer triples (or, for frames, a bare + // frame_code-based packet). + self.write_all(FILE_ID_STRING)?; + + let main_payload = build_main_header_payload(self.config); + let main_packet = wrap_packet(MAIN_STARTCODE, &main_payload); + self.write_all(&main_packet)?; + + let stream_payload = build_stream_header_payload(self.config); + let stream_packet = wrap_packet(STREAM_STARTCODE, &stream_payload); + self.write_all(&stream_packet)?; + + // First syncpoint: global_key_pts = 0, back_ptr_div16 = 0 (no prior + // syncpoint to chain back to). Record its on-disk offset so the + // periodic re-emit in `write_frame` can chain `back_ptr_div16`. + let syncpoint_offset = self.bytes_written; + let syncpoint_payload = build_syncpoint_payload(0, 0); + let syncpoint_packet = wrap_packet(SYNCPOINT_STARTCODE, &syncpoint_payload); + self.write_all(&syncpoint_packet)?; + self.last_syncpoint_offset = syncpoint_offset; + + Ok(()) + } + + /// Emit a fresh syncpoint with `global_key_pts = pts`. `back_ptr_div16` + /// is always 0 β€” the field is a *seek* hint and the spec requires the + /// real distance to be 16-byte-aligned, which we cannot guarantee + /// without padding every packet. Setting 0 advertises "no usable back + /// chain"; ffmpeg's NUT demuxer falls back to linear scanning, which + /// is exactly what the on-demand transcode pass needs. + fn write_syncpoint(&mut self, pts: u64) -> Result<(), NutError> { + let new_offset = self.bytes_written; + let payload = build_syncpoint_payload(pts, 0); + let packet = wrap_packet(SYNCPOINT_STARTCODE, &payload); + self.write_all(&packet)?; + self.last_syncpoint_offset = new_offset; + Ok(()) + } + + fn write_all(&mut self, bytes: &[u8]) -> Result<(), NutError> { + self.writer + .write_all(bytes) + .map_err(|source| NutError::Write { + path: self.path.clone(), + source, + })?; + self.bytes_written = self.bytes_written.saturating_add(bytes.len() as u64); + Ok(()) + } + + fn flush(&mut self) -> Result<(), NutError> { + self.writer.flush().map_err(|source| NutError::Write { + path: self.path.clone(), + source, + }) + } +} + +/// Build the main-header payload (everything between `forward_ptr` and the +/// trailing packet checksum). Kept separate so the packet-framing helper +/// can compute lengths without re-deriving the payload. +fn build_main_header_payload(config: NutVideoConfig) -> Vec { + let mut payload = Vec::with_capacity(64); + + vencode(&mut payload, NUT_VERSION); + vencode(&mut payload, 1); // stream_count + vencode(&mut payload, MAX_DISTANCE); + vencode(&mut payload, 1); // time_base_count + vencode(&mut payload, config.time_base_num as u64); + vencode(&mut payload, config.time_base_den as u64); + + // Frame-code table. The decode loop walks i from 0..256, consuming + // count entries per "row". The slot i == 'N' (78) is auto-marked + // INVALID without consuming a count, so a run of 254 entries starting + // at i=0 lands the cursor on i=255 even though 254 + 1 (the 'N' freebie) + // = 255 increments. The second row of count=1 then fills entry 0xFF + // with the all-explicit FLAG_CODED behaviour we use for every frame. + // + // tmp_fields = 6 means the row carries tmp_pts, tmp_mul, tmp_stream, + // tmp_size, tmp_res, and count (in that order). Everything past that + // (tmp_match, tmp_head_idx, etc.) keeps its prior value. + // Row 1: 254 INVALID entries covering i = 0..='N'-1, 'N'+1..=0xFE. + vencode(&mut payload, FLAG_INVALID); // tmp_flag + vencode(&mut payload, 6); // tmp_fields + sencode(&mut payload, 0); // tmp_pts + vencode(&mut payload, TABLE_MUL); // tmp_mul + vencode(&mut payload, 0); // tmp_stream + vencode(&mut payload, 0); // tmp_size + vencode(&mut payload, 0); // tmp_res + vencode(&mut payload, 254); // count + + // Row 2: single FLAG_CODED entry that lands on i = 0xFF. + vencode(&mut payload, FLAG_CODED); // tmp_flag + vencode(&mut payload, 6); // tmp_fields + sencode(&mut payload, 0); // tmp_pts (unused for FLAG_CODED frames) + vencode(&mut payload, TABLE_MUL); // tmp_mul (= 1 so data_size_msb = data_size) + vencode(&mut payload, 0); // tmp_stream + vencode(&mut payload, 0); // tmp_size (lsb = 0) + vencode(&mut payload, 0); // tmp_res + vencode(&mut payload, 1); // count + + // Version >= 3 main header tail. + vencode(&mut payload, 0); // header_count_minus1 β€” no elision headers + vencode(&mut payload, 0); // main_flags β€” no BROADCAST_MODE + + payload +} + +/// Build the video stream-header payload. +fn build_stream_header_payload(config: NutVideoConfig) -> Vec { + let mut payload = Vec::with_capacity(48); + + vencode(&mut payload, 0); // stream_id + vencode(&mut payload, 0); // stream_class β€” 0 = video + + // fourcc as a `vb`: length-prefixed bytes. "RGB\x18" advertises packed + // RGB24 (8 bits per channel, 24 bpp). FFmpeg's libavformat maps this + // fourcc to `AV_CODEC_ID_RAWVIDEO` with pix_fmt = `AV_PIX_FMT_RGB24`. + let fourcc: &[u8] = b"RGB\x18"; + vencode(&mut payload, fourcc.len() as u64); + payload.extend_from_slice(fourcc); + + vencode(&mut payload, 0); // time_base_id + vencode(&mut payload, MSB_PTS_SHIFT); + vencode(&mut payload, 1); // max_pts_distance β€” we always include FLAG_CHECKSUM anyway + vencode(&mut payload, 0); // decode_delay β€” no B-frames in raw video + // stream_flags = 0. We deliberately do *not* set FLAG_FIXED_FPS: our + // time_base is microsecond ticks (1/1_000_000), and FLAG_FIXED_FPS would + // tell downstream demuxers the stream runs at exactly 1/time_base fps + // i.e. one million fps. ffmpeg honours that on transcode by inflating the + // output to ~10 million frames per 10 s clip (duplicating every real + // input frame across all 1-Β΅s slots), which makes the encode effectively + // never complete. Real camera capture is variable-rate; an honest VFR + // stream is what we want. + vencode(&mut payload, 0); // stream_flags + vencode(&mut payload, 0); // codec_specific_data length + + // Video-class tail. + vencode(&mut payload, config.width as u64); + vencode(&mut payload, config.height as u64); + vencode(&mut payload, 1); // sample_width β€” square pixels + vencode(&mut payload, 1); // sample_height + vencode(&mut payload, 0); // colorspace_type β€” unknown + + payload +} + +/// Build the syncpoint payload. The writer emits these periodically (see +/// `write_frame`) to keep the inter-syncpoint distance within the demuxer's +/// reach. +fn build_syncpoint_payload(global_key_pts: u64, back_ptr_div16: u64) -> Vec { + let mut payload = Vec::with_capacity(8); + + // global_key_pts (t): tmp = pts * time_base_count + time_base_id. With + // time_base_count = 1 and time_base_id = 0 this is just the supplied + // pts value, which is the PTS of the first frame after this syncpoint. + vencode(&mut payload, global_key_pts); + // back_ptr_div16 β€” distance back to the previous syncpoint, in 16-byte + // units. This writer always passes 0 (no back-chain); demuxers fall back to + // a linear scan when seeking. + vencode(&mut payload, back_ptr_div16); + payload +} + +/// Wrap a payload into a complete NUT packet: startcode, forward_ptr, +/// optional header_checksum, payload, trailing CRC. +/// +/// `forward_ptr` is defined by the spec as the distance from the first byte +/// after the `packet_header` (i.e. the start of the payload) to the first +/// byte of the *next* packet. That distance equals `payload.len() + 4` (the +/// trailing checksum). If `forward_ptr > 4096` the spec requires an extra +/// `header_checksum u32` between `forward_ptr` and the payload β€” we'd never +/// hit that for our header packets in practice, but we honour it so the +/// helper is reusable. +fn wrap_packet(startcode: u64, payload: &[u8]) -> Vec { + let forward_ptr = payload.len() as u64 + 4; // +4 for the trailing checksum + let needs_header_checksum = forward_ptr > 4096; + + let mut packet = Vec::with_capacity(8 + 9 + 4 + payload.len() + 4); + packet.extend_from_slice(&startcode.to_be_bytes()); + vencode(&mut packet, forward_ptr); + if needs_header_checksum { + // header_checksum covers startcode + forward_ptr bytes only. + let header_checksum = crc32_nut(&packet); + packet.extend_from_slice(&header_checksum.to_be_bytes()); + } + + // The packet checksum covers everything between the packet_header and + // the checksum itself β€” i.e. the payload bytes only. Snapshot the + // position so the slice we hash is unambiguous. + let payload_start = packet.len(); + packet.extend_from_slice(payload); + let checksum = crc32_nut(&packet[payload_start..]); + packet.extend_from_slice(&checksum.to_be_bytes()); + packet +} + +/// Append `value` as a NUT variable-length unsigned integer. +/// +/// Encoding is big-endian: every byte except the last has its high bit set +/// to mean "more bytes follow"; the last byte has its high bit clear. The +/// low 7 bits of each byte carry value bits, with the most-significant 7 +/// bits of `value` emitted first. +fn vencode(out: &mut Vec, value: u64) { + // Count how many 7-bit groups are needed. At least one (so 0 encodes as + // a single 0x00 byte). + let mut bits_needed = 7; + while bits_needed < 64 && (value >> bits_needed) != 0 { + bits_needed += 7; + } + + // Emit groups MSB first, setting the continuation bit on all but the + // last byte. + let mut shift = bits_needed - 7; + loop { + let chunk = ((value >> shift) & 0x7F) as u8; + if shift == 0 { + out.push(chunk); + return; + } + out.push(chunk | 0x80); + shift -= 7; + } +} + +/// Append `value` as a NUT signed variable-length integer. +/// +/// Zig-zag encoding: positive `x` maps to `2x`, negative `x` to `2|x| - 1`. +/// This keeps small-magnitude values short regardless of sign. +fn sencode(out: &mut Vec, value: i64) { + let encoded = if value >= 0 { + (value as u64).wrapping_mul(2) + } else { + // Widen through i128 so unsigned_abs() is correct even for i64::MIN. + let magnitude = (value as i128).unsigned_abs() as u64; + magnitude.wrapping_mul(2).wrapping_sub(1) + }; + vencode(out, encoded); +} + +/// Decode a NUT variable-length unsigned integer starting at `offset`. +/// Returns `(value, bytes_consumed)`. Used only by the unit tests, but kept +/// in the main module to keep encoding/decoding side by side. +#[cfg(test)] +fn vdecode(bytes: &[u8], offset: usize) -> (u64, usize) { + let mut value: u64 = 0; + let mut consumed = 0; + loop { + let byte = bytes[offset + consumed]; + consumed += 1; + value = (value << 7) | u64::from(byte & 0x7F); + if byte & 0x80 == 0 { + return (value, consumed); + } + } +} + +/// Decode a NUT signed variable-length integer (inverse of [`sencode`]). +#[cfg(test)] +fn sdecode(bytes: &[u8], offset: usize) -> (i64, usize) { + let (raw, consumed) = vdecode(bytes, offset); + let value = if raw & 1 == 1 { + -(raw.div_ceil(2) as i64) + } else { + (raw / 2) as i64 + }; + (value, consumed) +} + +/// CRC32/MPEG-2: polynomial 0x04C11DB7, init 0, MSB-first, no final XOR. +/// +/// NUT spec Β§"crc32 checksum" specifies this exact variant; using the more +/// common IEEE 802.3 reversed CRC silently produces unparseable files. +fn crc32_nut(bytes: &[u8]) -> u32 { + static TABLE: OnceLock<[u32; 256]> = OnceLock::new(); + let table = TABLE.get_or_init(|| { + let mut table = [0u32; 256]; + for (index, slot) in table.iter_mut().enumerate() { + // Build the lookup entry for one input byte by shifting the + // byte into the high end of the register and reducing eight + // times by the polynomial whenever the top bit is set. + let mut value = (index as u32) << 24; + for _ in 0..8 { + if value & 0x8000_0000 != 0 { + value = (value << 1) ^ 0x04C1_1DB7; + } else { + value <<= 1; + } + } + *slot = value; + } + table + }); + + let mut crc: u32 = 0; + for &byte in bytes { + let index = ((crc >> 24) as u8 ^ byte) as usize; + crc = (crc << 8) ^ table[index]; + } + crc +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + use std::process::Command; + use tempfile::TempDir; + + #[test] + fn vencode_round_trip() { + // Boundary values from the spec: single-byte cap (127), 14-bit cap + // (16383), and a representative wide value. + for value in [ + 0u64, + 1, + 126, + 127, + 128, + 255, + 16_383, + 16_384, + 1_000_000, + u64::MAX, + ] { + let mut buffer = Vec::new(); + vencode(&mut buffer, value); + let (decoded, consumed) = vdecode(&buffer, 0); + assert_eq!(decoded, value, "vencode/vdecode disagreed for {value}"); + assert_eq!(consumed, buffer.len(), "extra bytes left over"); + } + } + + #[test] + fn vencode_known_short_forms() { + // Spot-check the literal byte sequences called out in the spec so a + // regression in the encoder is caught even if the decoder is also + // broken in the same way. + let mut buffer = Vec::new(); + vencode(&mut buffer, 0); + assert_eq!(buffer, vec![0x00]); + buffer.clear(); + vencode(&mut buffer, 127); + assert_eq!(buffer, vec![0x7F]); + buffer.clear(); + vencode(&mut buffer, 128); + assert_eq!(buffer, vec![0x81, 0x00]); + buffer.clear(); + vencode(&mut buffer, 16_384); + assert_eq!(buffer, vec![0x81, 0x80, 0x00]); + } + + #[test] + fn sencode_round_trip() { + for value in [ + 0i64, + 1, + -1, + 63, + -63, + 64, + -64, + 8_192, + -8_192, + i64::from(i32::MAX), + i64::from(i32::MIN), + ] { + let mut buffer = Vec::new(); + sencode(&mut buffer, value); + let (decoded, consumed) = sdecode(&buffer, 0); + assert_eq!(decoded, value, "sencode/sdecode disagreed for {value}"); + assert_eq!(consumed, buffer.len(), "extra bytes left over"); + } + } + + #[test] + fn crc32_known_vector() { + // NUT uses polynomial 0x04C11DB7, init = 0, MSB-first, no final + // XOR. Cross-checked against an independent reference implementation + // of the same parameters for the standard "123456789" input. (The + // more famous CRC-32/MPEG-2 check constant 0x0376E6E7 corresponds to + // init = 0xFFFFFFFF, which NUT does *not* use.) + assert_eq!(crc32_nut(b"123456789"), 0x89A1_897F); + } + + #[test] + fn file_starts_with_nut_id() { + let tempdir = TempDir::new().unwrap(); + let path = tempdir.path().join("raw.nut"); + let writer = NutWriter::create( + &path, + NutVideoConfig { + width: 4, + height: 4, + time_base_num: 1, + time_base_den: 30, + }, + ) + .unwrap(); + writer.finish().unwrap(); + + let bytes = std::fs::read(&path).unwrap(); + assert!(bytes.len() >= FILE_ID_STRING.len()); + assert_eq!(&bytes[..FILE_ID_STRING.len()], FILE_ID_STRING); + } + + #[test] + fn rejects_wrong_frame_size() { + let tempdir = TempDir::new().unwrap(); + let path = tempdir.path().join("raw.nut"); + let mut writer = NutWriter::create( + &path, + NutVideoConfig { + width: 4, + height: 4, + time_base_num: 1, + time_base_den: 30, + }, + ) + .unwrap(); + let too_small = vec![0u8; 10]; + let err = writer.write_frame(0, &too_small).unwrap_err(); + assert!(matches!( + err, + NutError::FrameSize { + expected: 48, + actual: 10 + } + )); + } + + /// Locate `ffprobe`. Returns `None` (with a logged note) if it is not on + /// PATH so the test can skip cleanly rather than fail in sandboxes that + /// lack the FFmpeg suite. + fn locate_ffprobe() -> Option { + // `which ffprobe` is the most portable check across the Linux + // distributions we run CI on. Falling back to a literal "ffprobe" + // string lets the eventual `Command::new` produce a clear error if + // the binary disappears between this lookup and execution. + let output = Command::new("which").arg("ffprobe").output().ok()?; + if !output.status.success() { + return None; + } + let path = String::from_utf8(output.stdout).ok()?; + let trimmed = path.trim(); + if trimmed.is_empty() { + None + } else { + Some(PathBuf::from(trimmed)) + } + } + + #[test] + fn ffprobe_recognises_stream_metadata() { + let ffprobe = match locate_ffprobe() { + Some(path) => path, + None => { + eprintln!( + "ffprobe not on PATH β€” skipping NUT metadata validation. \ + Install `ffmpeg` to enable this test." + ); + return; + } + }; + + let tempdir = TempDir::new().unwrap(); + let path = tempdir.path().join("raw.nut"); + let config = NutVideoConfig { + width: 16, + height: 16, + time_base_num: 1, + time_base_den: 30, + }; + let mut writer = NutWriter::create(&path, config).unwrap(); + + // Distinct pixel values per frame so a future regression that + // duplicates frame data is also caught by inspection. + let frame_count = 4u64; + for index in 0..frame_count { + let mut buffer = vec![0u8; 16 * 16 * 3]; + for (pixel_index, chunk) in buffer.chunks_mut(3).enumerate() { + chunk[0] = ((pixel_index + index as usize) & 0xFF) as u8; + chunk[1] = ((pixel_index * 3 + index as usize) & 0xFF) as u8; + chunk[2] = ((pixel_index * 5 + index as usize) & 0xFF) as u8; + } + writer.write_frame(index, &buffer).unwrap(); + } + writer.finish().unwrap(); + + let output = Command::new(&ffprobe) + .args([ + "-v", + "error", + "-print_format", + "json", + "-show_streams", + "-count_frames", + ]) + .arg(&path) + .output() + .expect("spawn ffprobe"); + + assert!( + output.status.success(), + "ffprobe exited with {:?}: stderr={}", + output.status, + String::from_utf8_lossy(&output.stderr) + ); + + let parsed: serde_json::Value = + serde_json::from_slice(&output.stdout).expect("ffprobe should emit valid JSON"); + let streams = parsed["streams"].as_array().expect("streams array present"); + assert_eq!( + streams.len(), + 1, + "expected exactly one stream, got {streams:?}" + ); + + let stream = &streams[0]; + assert_eq!(stream["codec_type"], "video"); + assert_eq!(stream["width"], 16); + assert_eq!(stream["height"], 16); + + // `-count_frames` populates `nb_read_frames`; fall back to + // `nb_frames` if the build of ffprobe in question prefers it. + let frame_field = stream + .get("nb_read_frames") + .or_else(|| stream.get("nb_frames")) + .and_then(|value| value.as_str()) + .and_then(|s| s.parse::().ok()); + if let Some(reported) = frame_field { + assert_eq!( + reported, frame_count, + "ffprobe reported {reported} frames, expected {frame_count}" + ); + } + } +} diff --git a/rust/data_daemon_producer/src/paths.rs b/rust/data_daemon_producer/src/paths.rs new file mode 100644 index 000000000..2a68b87b5 --- /dev/null +++ b/rust/data_daemon_producer/src/paths.rs @@ -0,0 +1,103 @@ +//! Filesystem layout shared with the daemon. +//! +//! The producer spools NUT chunks into a recording-independent inbox under the +//! daemon's recordings root. The path helpers here mirror the daemon's +//! `storage::paths` / `config::env` byte-for-byte so the daemon finds exactly +//! what the producer wrote, and the `stream_key` helpers key the in-progress +//! video-chunk registry by `(source, sensor)`. + +use std::path::{Path, PathBuf}; +use std::sync::LazyLock; + +/// Spool directory name β€” must match `storage::paths::SPOOL_DIRNAME` on the +/// daemon side. +const SPOOL_DIRNAME: &str = ".rgb_spool"; + +/// Recordings root, resolved once per process via the shared resolver +/// ([`data_daemon_shared::paths::recordings_root`]) so the producer and daemon +/// always compute the same root from the same inputs. +/// +/// The `Err` case β€” no `$HOME` and no `NEURACORE_DAEMON_RECORDINGS_ROOT` +/// override β€” is surfaced to Python as a `PyErr` at the `log_frame` boundary +/// (see [`recordings_root`]). It is never allowed to panic across the FFI +/// boundary, nor to silently fall back to a scratch dir the daemon would never +/// read (which would lose the user's video). +static RECORDINGS_ROOT: LazyLock> = LazyLock::new(|| { + data_daemon_shared::paths::recordings_root().map_err(|error| error.to_string()) +}); + +/// The resolved recordings root, or the resolution error message. The +/// `log_frame` pyfunction checks this on the GIL before enqueueing a frame, so +/// an unresolvable root becomes a clear Python exception instead of a +/// writer-thread failure or silent data loss. +pub(crate) fn recordings_root() -> Result<&'static Path, &'static str> { + match &*RECORDINGS_ROOT { + Ok(path) => Ok(path.as_path()), + Err(message) => Err(message.as_str()), + } +} + +/// Composite registry key for one `(source, sensor)` video stream. The NUL +/// separators cannot occur in any component, so the join is unambiguous. +pub(crate) fn stream_key( + robot_id: &str, + robot_instance: i64, + data_type: &str, + sensor_name: &str, +) -> String { + format!("{robot_id}\u{0}{robot_instance}\u{0}{data_type}\u{0}{sensor_name}") +} + +/// Prefix matching every video stream belonging to a source. +pub(crate) fn source_prefix(robot_id: &str, robot_instance: i64) -> String { + format!("{robot_id}\u{0}{robot_instance}\u{0}") +} + +/// Split a `stream_key` back into `(data_type, sensor_name)`. The leading +/// `robot_id\0instance\0` is dropped. +pub(crate) fn split_stream_key(key: &str) -> (String, String) { + let mut parts = key.splitn(4, '\u{0}'); + let _robot_id = parts.next().unwrap_or(""); + let _instance = parts.next().unwrap_or(""); + let data_type = parts.next().unwrap_or("").to_string(); + let sensor_name = parts.next().unwrap_or("").to_string(); + (data_type, sensor_name) +} + +/// Build the spool directory for a `(source, sensor)` stream, or `None` if the +/// recordings root is unresolved. Mirrors `storage::paths::spool_dir` on the +/// daemon side; the two must agree byte-for-byte so the daemon finds exactly +/// what the producer wrote. +/// +/// The root is validated at the `log_frame` boundary before any frame is +/// enqueued, so on the writer thread this is effectively infallible β€” but it +/// returns `Option` rather than `.expect()`-ing, because a writer-thread panic +/// would silently kill the thread (no more frames, no error surfaced) instead +/// of being logged and recovered from. +pub(crate) fn spool_dir( + robot_id: &str, + robot_instance: i64, + data_type: &str, + sensor_name: &str, +) -> Option { + recordings_root().ok().map(|root| { + root.join(SPOOL_DIRNAME) + .join(robot_id) + .join(robot_instance.to_string()) + .join(data_type) + .join(sensor_name) + }) +} + +/// The video spool inbox root (`{recordings_root}/.rgb_spool`). The producer's +/// entire on-disk video backlog lives under here; the writer thread sums it to +/// enforce the spool-backlog cap. `None` when the recordings root is unresolved +/// (the same condition `log_frame` already rejects on the GIL). +pub(crate) fn spool_root() -> Option { + recordings_root().ok().map(|root| root.join(SPOOL_DIRNAME)) +} + +/// Spool chunk filename β€” must match `storage::paths::spool_chunk_filename`. +pub(crate) fn spool_chunk_filename(publish_ns: i64, thread_id: i64) -> String { + format!("chunk_{publish_ns}_{thread_id}.nut") +} diff --git a/rust/data_daemon_producer/src/publisher.rs b/rust/data_daemon_producer/src/publisher.rs new file mode 100644 index 000000000..3f72ac132 --- /dev/null +++ b/rust/data_daemon_producer/src/publisher.rs @@ -0,0 +1,526 @@ +//! IPC plumbing: per-thread iceoryx2 publisher state and the background data +//! publisher thread. +//! +//! ## Threading +//! +//! iceoryx2's [`Publisher`] is neither `Send` nor `Sync`, so it is parked in a +//! [`thread_local`]: each Python thread that calls in lazily builds its own +//! iceoryx2 [`Node`] and a publisher on the commands service. +//! +//! ## Fork safety +//! +//! A one-shot `pthread_atfork` child handler clears the forking thread's +//! `PRODUCER` slot so the next publish rebuilds. +//! +//! ## Background data publisher +//! +//! Synchronous IPC publishes (`BatchedData` joints, `Data` json, and the +//! `VideoChunkReady` chunk announcements) can briefly block on a full commands +//! buffer when the daemon's listener is preempted off-CPU under heavy +//! (multi-context) load. Routing them through a dedicated per-process publisher +//! thread keeps that block off BOTH the caller's `log_*` thread AND the disk +//! writer thread β€” crucially, the writer's stop/cancel *barrier* then waits only +//! for the durable on-disk seal, never for an IPC publish. +//! +//! All three are held-back *data* on the daemon side (routed by +//! `publish_timestamp_ns` within the holdback + closing-window retention), so β€” +//! unlike lifecycle envelopes (`Start/Stop/CancelRecording`, which stay on the +//! caller's publisher for strict ordering) β€” reordering them onto this thread's +//! publisher is safe. The queue is unbounded: messages are small/infrequent and +//! the thread keeps up in steady state; a transient daemon stall buffers only a +//! few hundred small items. + +use std::cell::RefCell; +use std::sync::mpsc::{Receiver, Sender}; +use std::sync::{LazyLock, Mutex, Once}; +use std::time::{Instant, SystemTime, UNIX_EPOCH}; + +use data_daemon_shared::service_name::{ + COMMANDS, COMMANDS_MAX_PAYLOAD_BYTES, LIFECYCLE_SUBSCRIBER_BUFFER_SIZE, MAX_NODES_PER_SERVICE, + MAX_PUBLISHERS_PER_SERVICE, MAX_QUERY_CLIENTS_PER_SERVICE, MAX_QUERY_SERVERS_PER_SERVICE, + MAX_SUBSCRIBERS_PER_SERVICE, QUERIES, QUERIES_MAX_PAYLOAD_BYTES, +}; +use data_daemon_shared::{BatchedDataItem, Envelope}; +use iceoryx2::node::{Node, NodeBuilder}; +use iceoryx2::port::client::Client; +use iceoryx2::port::publisher::Publisher; +use iceoryx2::prelude::{ipc, UnableToDeliverStrategy}; +use iceoryx2::service::port_factory::publish_subscribe::PortFactory; +use iceoryx2::service::port_factory::request_response::PortFactory as QueryPortFactory; +use pyo3::exceptions::PyRuntimeError; +use pyo3::PyErr; +use thiserror::Error; + +/// Errors raised while publishing envelopes to the daemon. +#[derive(Debug, Error)] +pub(crate) enum ProducerError { + /// Failed to build the iceoryx2 node. + #[error("failed to create iceoryx2 node: {0}")] + NodeCreate(String), + /// Failed to open or create an iceoryx2 service. + #[error("failed to open service: {0}")] + ServiceOpen(String), + /// Failed to build the publisher port. + #[error("failed to create publisher: {0}")] + PublisherCreate(String), + /// Failed to loan a slice sample. + #[error("failed to loan sample: {0}")] + Loan(String), + /// Failed to send the loaned sample. + #[error("failed to send sample: {0}")] + Send(String), + /// Failed to encode the envelope. + #[error(transparent)] + Encode(#[from] data_daemon_shared::EnvelopeCodecError), + /// Payload too large for the configured iceoryx2 max slice length. + #[error("envelope payload {actual} bytes exceeds limit {limit} bytes")] + PayloadTooLarge { + /// Actual encoded envelope size. + actual: usize, + /// Maximum slice length the publisher was built with. + limit: usize, + }, +} + +impl From for PyErr { + fn from(error: ProducerError) -> Self { + PyRuntimeError::new_err(error.to_string()) + } +} + +/// Per-thread iceoryx2 state. +pub(crate) struct ProducerState { + _node: Node, + _commands_service: PortFactory, + commands_publisher: Publisher, + /// Service handle held alongside the query client so port discovery doesn't + /// race the handle going out of scope. + _queries_service: QueryPortFactory, + /// Request-response client used by `get_recording_id` to ask the daemon + /// for a recording's cloud id. + pub(crate) queries_client: Client, +} + +/// Work item for the publisher thread. +pub(crate) enum PublishMsg { + /// A batch of joint `(name, value)` samples to serialise + publish as one + /// `BatchedData`. Serialisation happens on the publisher thread, off the + /// caller. + Joint { + robot_id: String, + robot_instance: i64, + data_type: String, + /// Joint names joined by `\0` (split + zipped with `values` here, off + /// the caller's GIL-held path). + joined_names: String, + values: Vec, + timestamp_ns: i64, + timestamp_s: Option, + publish_timestamp_ns: i64, + }, + /// One JSON sample to publish as a `Data` envelope. + Json { + robot_id: String, + robot_instance: i64, + data_type: String, + sensor_name: String, + payload: Vec, + timestamp_ns: i64, + timestamp_s: Option, + publish_timestamp_ns: i64, + }, + /// A pre-built `VideoChunkReady` envelope to announce (built by the writer + /// thread once the chunk is sealed on disk). + Announce(Envelope), +} + +/// Process-wide publisher handle, healed across `fork` via `owner_pid` (mirrors +/// `VIDEO_WRITER`). +struct PublisherRegistry { + owner_pid: u32, + tx: Option>, +} + +static PUBLISHER: LazyLock> = LazyLock::new(|| { + Mutex::new(PublisherRegistry { + owner_pid: 0, + tx: None, + }) +}); + +thread_local! { + /// Per-thread cache of the process publisher channel. The hot `log_*` path + /// hits this slot β€” a plain TLS load with no global `Mutex` and no + /// `getpid()` syscall (glibc removed the pid cache, so `process::id()` is a + /// real syscall on every call). Const-initialised and cleared by + /// `on_fork_in_child` alongside `PRODUCER`, so a forked child rebuilds. + static PUBLISHER_TX: RefCell>> = const { RefCell::new(None) }; +} + +/// Return this process's publisher channel, spawning the publisher thread on +/// first use and re-spawning after a fork. +/// +/// Fast path: the thread-local cache (no lock, no syscall). Slow path (first +/// call on a thread, or first call after a fork cleared the slot): heal/spawn +/// the process publisher under the global lock and cache the channel. +pub(crate) fn publisher_tx() -> Sender { + if let Some(tx) = PUBLISHER_TX.with(|cell| cell.borrow().clone()) { + return tx; + } + let (tx, healthy) = publisher_tx_global(); + // Only cache a channel whose receiver is alive. On a spawn failure the + // receiver was dropped, so caching the dead `Sender` would make every later + // `log_*` on this thread fail silently with no re-spawn; skip the cache so + // the slow path re-attempts the spawn on the next call. + if healthy { + PUBLISHER_TX.with(|cell| *cell.borrow_mut() = Some(tx.clone())); + } + tx +} + +/// Heal/spawn the process-wide publisher thread under the global lock and +/// return its channel. Keyed by `owner_pid` so a post-fork child re-spawns. The +/// returned flag is `false` when the spawn failed (the channel's receiver is +/// gone), so the caller knows not to cache it. +fn publisher_tx_global() -> (Sender, bool) { + let mut reg = PUBLISHER.lock().unwrap_or_else(|p| p.into_inner()); + let pid = std::process::id(); + if reg.owner_pid == pid { + if let Some(tx) = reg.tx.as_ref() { + return (tx.clone(), true); + } + } + let (tx, rx) = std::sync::mpsc::channel(); + match std::thread::Builder::new() + .name("nc-data-publisher".to_string()) + .spawn(move || publish_loop(rx)) + { + Ok(_handle) => { + reg.owner_pid = pid; + reg.tx = Some(tx.clone()); + (tx, true) + } + Err(error) => { + tracing::error!(%error, "failed to spawn data publisher thread; dropping sample"); + (tx, false) + } + } +} + +/// The publisher thread's run loop: publish every queued data envelope. Exits +/// when the last [`Sender`] is dropped (the channel closes). +fn publish_loop(rx: Receiver) { + while let Ok(msg) = rx.recv() { + let result = match msg { + PublishMsg::Joint { + robot_id, + robot_instance, + data_type, + joined_names, + values, + timestamp_ns, + timestamp_s, + publish_timestamp_ns, + } => { + let timestamp_for_json = + timestamp_s.unwrap_or_else(|| timestamp_ns as f64 / 1_000_000_000.0); + let mut batch_items = Vec::with_capacity(values.len()); + // Split the `\0`-joined names and pair each with its value β€” all + // on the publisher thread, off the caller's GIL-held path. The + // caller guarantees name/value counts match; `zip` is a safety + // net if they ever don't (it stops at the shorter). + for (name, value) in joined_names.split('\u{0}').zip(values) { + match serde_json::to_vec(&ScalarFrameEntry { + timestamp: timestamp_for_json, + value, + }) { + Ok(payload) => batch_items.push(BatchedDataItem { + sensor_name: Some(name.to_string()), + payload, + }), + Err(error) => { + tracing::warn!(%error, "failed to encode joint frame JSON; dropping item") + } + } + } + publish(&Envelope::BatchedData { + robot_id, + robot_instance, + data_type, + publish_timestamp_ns, + timestamp_ns, + timestamp_s, + items: batch_items, + }) + } + PublishMsg::Json { + robot_id, + robot_instance, + data_type, + sensor_name, + payload, + timestamp_ns, + timestamp_s, + publish_timestamp_ns, + } => publish(&Envelope::Data { + robot_id, + robot_instance, + data_type, + sensor_name: Some(sensor_name), + publish_timestamp_ns, + timestamp_ns, + timestamp_s, + payload, + }), + PublishMsg::Announce(envelope) => publish(&envelope), + }; + if let Err(error) = result { + tracing::warn!(%error, "failed to publish data envelope"); + } + } +} + +thread_local! { + /// One iceoryx2 publisher set per OS thread. Const-initialised so the slot + /// is a plain TLS load β€” required for the `pthread_atfork` child handler to + /// access it without invoking a lazy initializer in a post-fork context. + static PRODUCER: RefCell> = const { RefCell::new(None) }; +} + +/// Run `f` against this thread's producer state, lazily building it on first +/// use. +pub(crate) fn with_producer( + operation: impl FnOnce(&ProducerState) -> Result, +) -> Result { + PRODUCER.with(|cell| { + // Single `borrow_mut` for the whole operation: lazily build the + // producer state on first use, then run the operation against it. + let mut slot = cell.borrow_mut(); + if slot.is_none() { + *slot = Some(build_producer_state()?); + } + operation( + slot.as_ref() + .expect("producer state populated immediately above"), + ) + }) +} + +fn build_producer_state() -> Result { + ensure_fork_handler_registered(); + + let node = NodeBuilder::new() + .create::() + .map_err(|error| ProducerError::NodeCreate(error.to_string()))?; + + let (commands_service, commands_publisher) = open_publisher( + &node, + COMMANDS, + LIFECYCLE_SUBSCRIBER_BUFFER_SIZE, + COMMANDS_MAX_PAYLOAD_BYTES, + )?; + + let (queries_service, queries_client) = open_query_client(&node, QUERIES)?; + + Ok(ProducerState { + _node: node, + _commands_service: commands_service, + commands_publisher, + _queries_service: queries_service, + queries_client, + }) +} + +/// Open (or attach to) the `[u8]` request-response `queries` service off `node` +/// and build a client on it. Config mirrors the daemon's `open_query_server` +/// so `open_or_create` reconciles to the same service attributes regardless of +/// which side comes up first. +#[allow(clippy::type_complexity)] +fn open_query_client( + node: &Node, + service_name: &str, +) -> Result< + ( + QueryPortFactory, + Client, + ), + ProducerError, +> { + let parsed_name = service_name + .try_into() + .map_err(|error| ProducerError::ServiceOpen(format!("invalid service name: {error}")))?; + let service = node + .service_builder(&parsed_name) + .request_response::<[u8], [u8]>() + .max_clients(MAX_QUERY_CLIENTS_PER_SERVICE) + .max_servers(MAX_QUERY_SERVERS_PER_SERVICE) + .max_nodes(MAX_NODES_PER_SERVICE) + .open_or_create() + .map_err(|error| ProducerError::ServiceOpen(error.to_string()))?; + let client = service + .client_builder() + .initial_max_slice_len(QUERIES_MAX_PAYLOAD_BYTES) + .create() + .map_err(|error| ProducerError::PublisherCreate(error.to_string()))?; + Ok((service, client)) +} + +/// Open (or attach to) one `[u8]` pub/sub service off `node` and build a +/// publisher on it. +#[allow(clippy::type_complexity)] +fn open_publisher( + node: &Node, + service_name: &str, + subscriber_buffer_size: usize, + max_slice_len: usize, +) -> Result< + ( + PortFactory, + Publisher, + ), + ProducerError, +> { + let parsed_name = service_name + .try_into() + .map_err(|error| ProducerError::ServiceOpen(format!("invalid service name: {error}")))?; + let service = node + .service_builder(&parsed_name) + .publish_subscribe::<[u8]>() + // Disable iceoryx2's default safe-overflow so a full subscriber buffer + // makes `Block` take effect rather than silently evicting the oldest + // sample. Must match the daemon's `open_subscriber`. + .enable_safe_overflow(false) + .subscriber_max_buffer_size(subscriber_buffer_size) + .max_publishers(MAX_PUBLISHERS_PER_SERVICE) + .max_subscribers(MAX_SUBSCRIBERS_PER_SERVICE) + .max_nodes(MAX_NODES_PER_SERVICE) + .open_or_create() + .map_err(|error| ProducerError::ServiceOpen(error.to_string()))?; + let publisher = service + .publisher_builder() + .initial_max_slice_len(max_slice_len) + .unable_to_deliver_strategy(UnableToDeliverStrategy::Block) + .create() + .map_err(|error| ProducerError::PublisherCreate(error.to_string()))?; + Ok((service, publisher)) +} + +/// Install the `pthread_atfork` child handler exactly once per process. +fn ensure_fork_handler_registered() { + static REGISTER: Once = Once::new(); + REGISTER.call_once(|| { + // SAFETY: `pthread_atfork` is the standard libc primitive for + // registering fork callbacks. `on_fork_in_child` is `extern "C"`, + // touches only a const-initialised TLS slot, and the only "work" it + // does is `mem::forget`. + let result = unsafe { libc::pthread_atfork(None, None, Some(on_fork_in_child)) }; + if result != 0 { + tracing::warn!( + errno = result, + "pthread_atfork registration failed; fork-safety relies on caller-managed cleanup", + ); + } + }); +} + +/// `pthread_atfork` child callback: clears the surviving thread's `PRODUCER` +/// slot so the next [`with_producer`] rebuilds fresh iceoryx2 publishers. The +/// inherited state is `mem::forget`'d on purpose (running its `Drop` would +/// touch the parent's bookkeeping). The video chunk registry self-heals via the +/// `owner_pid` check. +extern "C" fn on_fork_in_child() { + PRODUCER.with(|cell| { + if let Some(stale) = cell.borrow_mut().take() { + std::mem::forget(stale); + } + }); + // Drop the cached publisher channel so the next `publisher_tx` rebuilds + // through the global (pid-keyed) heal path. The `Sender` is a plain mpsc + // handle, so a normal drop is safe here (no parent-side bookkeeping to + // corrupt, unlike `PRODUCER`'s iceoryx2 ports). + PUBLISHER_TX.with(|cell| { + cell.borrow_mut().take(); + }); +} + +/// Producer wall-clock time in nanoseconds since the Unix epoch, stamped onto +/// every published data envelope as its `publish_timestamp_ns`. This is the +/// daemon's sole window-membership key, decoupled from whatever clock the +/// caller timestamps data with. The lifecycle `StartRecording` / `StopRecording` +/// envelopes carry the same publish clock as their `publish_timestamp_ns`, so +/// window boundaries and data are directly comparable. +pub(crate) fn now_ns() -> i64 { + match SystemTime::now().duration_since(UNIX_EPOCH) { + Ok(elapsed) => elapsed.as_nanos() as i64, + // The system clock is set before the Unix epoch (a mis-set RTC). A 0 + // here is the worst possible value: it routes the datum before *every* + // window β†’ silent orphan-drop, indistinguishable from a real sample. + // Fall back to a positive, strictly-increasing monotonic-anchored value + // so the datum still lands in a window rather than vanishing. + Err(_) => clock_fallback_ns(), + } +} + +/// Positive, strictly-increasing fallback for [`now_ns`] when the wall clock is +/// unusable. Anchored to a fixed epoch base plus a process-monotonic offset, so +/// every envelope a mis-clocked process emits stays mutually comparable. +fn clock_fallback_ns() -> i64 { + /// Monotonic anchor captured on first use. + static ANCHOR: LazyLock = LazyLock::new(Instant::now); + /// 2024-01-01T00:00:00Z in epoch-ns β€” an arbitrary but sane positive base. + const BASE_NS: i64 = 1_704_067_200_000_000_000; + BASE_NS.saturating_add(ANCHOR.elapsed().as_nanos() as i64) +} + +/// Encode `envelope` and publish it on the commands service. +pub(crate) fn publish(envelope: &Envelope) -> Result<(), ProducerError> { + let bytes = envelope.encode()?; + if bytes.len() > COMMANDS_MAX_PAYLOAD_BYTES { + return Err(ProducerError::PayloadTooLarge { + actual: bytes.len(), + limit: COMMANDS_MAX_PAYLOAD_BYTES, + }); + } + with_producer(|state| { + let publisher = &state.commands_publisher; + let sample = publisher + .loan_slice_uninit(bytes.len()) + .map_err(|error| ProducerError::Loan(error.to_string()))?; + let sample = sample.write_from_slice(&bytes); + sample + .send() + .map_err(|error| ProducerError::Send(error.to_string()))?; + Ok(()) + }) +} + +/// Per-item JSON shape written to `trace.json` for scalar joint streams. +#[derive(serde::Serialize)] +struct ScalarFrameEntry { + timestamp: f64, + value: f64, +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Pin the joint-scalar float serialisation to Python's `json.dumps` shape: + /// serde_json (ryΕ«) emits the shortest round-trip with at least one + /// fractional digit, so an integer-valued float keeps its `.0` rather than + /// collapsing to an int. The cloud-side data verification compares this text + /// exactly, so a silent change here would break it (see this crate's + /// Cargo.toml note on why we serialise via serde_json, not `write!`). + #[test] + fn scalar_frame_entry_float_repr_matches_python_json_dumps() { + let cases = [ + (1.0_f64, 0.5_f64, r#"{"timestamp":1.0,"value":0.5}"#), + (2.0_f64, -0.25_f64, r#"{"timestamp":2.0,"value":-0.25}"#), + (0.0_f64, 1.0_f64, r#"{"timestamp":0.0,"value":1.0}"#), + ]; + for (timestamp, value, expected) in cases { + let bytes = serde_json::to_vec(&ScalarFrameEntry { timestamp, value }).expect("encode"); + assert_eq!(String::from_utf8(bytes).unwrap(), expected); + } + } +} diff --git a/rust/data_daemon_producer/src/query.rs b/rust/data_daemon_producer/src/query.rs new file mode 100644 index 000000000..fdc9f18ac --- /dev/null +++ b/rust/data_daemon_producer/src/query.rs @@ -0,0 +1,91 @@ +//! Recording-id resolution over the `queries` request-response service. +//! +//! The thin producer never mints recording identity β€” the daemon allocates the +//! cloud id asynchronously after `/recording/start`. These helpers ask the +//! daemon (identifying the recording by its source + capture `timestamp_ns` +//! marker) and return the id once minted. + +use std::time::{Duration, Instant}; + +use data_daemon_shared::RecordingIdReply; + +use crate::publisher::{with_producer, ProducerError, ProducerState}; + +/// Interval between successive recording-id requests to the daemon. +const RECORDING_ID_POLL_INTERVAL: Duration = Duration::from_millis(50); +/// How long a single request waits for the daemon's reply before re-asking. +const RECORDING_ID_RESPONSE_WAIT: Duration = Duration::from_millis(40); +/// Poll cadence while waiting for a single request's reply. +const RECORDING_ID_RECEIVE_POLL: Duration = Duration::from_millis(2); + +/// Block (with the GIL released by the caller) until the daemon-owned cloud +/// `recording_id` is available or `timeout_s` elapses, re-asking on each poll +/// interval. Returns `None` on timeout / when no daemon is answering. +pub(crate) fn resolve_recording_id( + request_bytes: &[u8], + timeout_s: f64, +) -> Result, ProducerError> { + // Clamp before converting: `Duration::from_secs_f64` panics on a non-finite + // or huge value, and `timeout_s` is caller-controlled across the FFI + // boundary (e.g. `float('inf')` / `float('nan')`). `f64::clamp` propagates + // NaN, so guard it explicitly (β†’ 0); +inf clamps to a day, well past any + // sane wait. + let bounded_timeout_s = if timeout_s.is_nan() { + 0.0 + } else { + timeout_s.clamp(0.0, 86_400.0) + }; + let deadline = Instant::now() + Duration::from_secs_f64(bounded_timeout_s); + loop { + let resolved = with_producer(|state| resolve_recording_id_once(state, request_bytes))?; + if resolved.is_some() { + return Ok(resolved); + } + if Instant::now() >= deadline { + return Ok(None); + } + std::thread::sleep(RECORDING_ID_POLL_INTERVAL); + } +} + +/// Send one recording-id request and wait briefly for the daemon's reply. +/// +/// Returns `Ok(Some(id))` once the daemon has minted the cloud id, or `Ok(None)` +/// when it replied "not yet" or did not reply within the per-request window +/// (e.g. no daemon is up). The caller re-asks until its overall timeout. +fn resolve_recording_id_once( + state: &ProducerState, + request_bytes: &[u8], +) -> Result, ProducerError> { + let request = state + .queries_client + .loan_slice_uninit(request_bytes.len()) + .map_err(|error| ProducerError::Loan(error.to_string()))?; + let request = request.write_from_slice(request_bytes); + let pending = request + .send() + .map_err(|error| ProducerError::Send(error.to_string()))?; + + let response_deadline = Instant::now() + RECORDING_ID_RESPONSE_WAIT; + loop { + match pending.receive() { + Ok(Some(response)) => { + let reply = RecordingIdReply::decode(response.payload())?; + return Ok(reply.recording_id); + } + Ok(None) => {} + Err(error) => { + // A transient receive error is not fatal: log it and report "no + // reply" so the caller's outer loop re-asks until its real + // deadline, matching this function's documented `Ok(None)` + // contract (it is a receive failure, not a send failure). + tracing::debug!(%error, "recording-id receive failed; treating as no reply"); + return Ok(None); + } + } + if Instant::now() >= response_deadline { + return Ok(None); + } + std::thread::sleep(RECORDING_ID_RECEIVE_POLL); + } +} diff --git a/rust/data_daemon_producer/src/writer.rs b/rust/data_daemon_producer/src/writer.rs new file mode 100644 index 000000000..9dae6b547 --- /dev/null +++ b/rust/data_daemon_producer/src/writer.rs @@ -0,0 +1,1183 @@ +//! Background video-writer thread and the in-progress video-chunk registry. +//! +//! `log_frame` must never block the caller on disk I/O. The producer spool lives +//! on the same filesystem as the daemon's SQLite WAL and the ffmpeg transcode +//! outputs, so on ext4 (`data=ordered`) a frame `write()` can stall for hundreds +//! of ms behind an unrelated `fsync`/journal commit β€” and because `log_frame` +//! holds the GIL across that write, the stall freezes the whole producer. To +//! keep the robot-facing ingest path latency-bounded, `log_frame` copies the +//! frame and hands it to a dedicated per-process writer thread, returning at +//! once. The writer owns *every* NUT write, chunk seal, and +//! `VideoChunkReady`/`StopRecording`/`CancelRecording` publish for video, so a +//! disk stall blocks only the writer β€” never a `log_*` caller. +//! +//! Lifecycle envelopes (`StartRecording` / `StopRecording` / `CancelRecording`) +//! are emphatically NOT routed through the writer: they stay on the *calling* +//! thread's publisher, the same port `StartRecording` uses, so consecutive +//! recordings' start/stop boundaries keep their strict in-order delivery. (Were +//! `StopRecording` published from the writer's port instead, it could be +//! reordered against the next recording's `StartRecording` on the main port β€” +//! the daemon then sees a start while the prior window is still live and drops +//! the overlapping window's data.) The stop/cancel paths only *barrier* on the +//! writer β€” seal + announce (or drop) the source's tail chunks, ack β€” and then +//! publish the lifecycle envelope themselves. Chunk-before-stop ordering is not +//! a same-port guarantee here but is safe anyway: the daemon holds every +//! `VideoChunkReady` back (`NCD_HOLDBACK_MS`, default 500 ms) and retains a +//! just-closed window, so a tail chunk announced just before the stop still +//! routes into the (closing) window by its in-window open timestamp. +//! +//! ## Fork safety +//! +//! The process-wide [`VIDEO_CHUNKS`] registry stores the owning PID and wipes +//! itself on the first access from a process whose PID no longer matches, so a +//! forked `multiprocessing` worker never inherits stale parent state. The +//! [`VIDEO_WRITER`] handle heals the same way: the parent's writer thread does +//! not survive into a forked child, so the child re-spawns one on first use. + +use std::cell::RefCell; +use std::collections::{HashMap, VecDeque}; +use std::path::PathBuf; +use std::sync::mpsc::Sender; +use std::sync::{Arc, Condvar, LazyLock, Mutex, Once}; +use std::time::{Duration, Instant}; + +use data_daemon_shared::service_name::MAX_VIDEO_CHUNK_FRAMES; +use data_daemon_shared::Envelope; + +use crate::nut_writer::{NutVideoConfig, NutWriter}; +use crate::paths::{source_prefix, split_stream_key, spool_chunk_filename, spool_dir, stream_key}; +use crate::publisher::{now_ns, publisher_tx, ProducerError, PublishMsg}; + +/// Bytes after which the producer rotates to a fresh NUT chunk file. +/// +/// Each chunk pays a fixed per-encode cost on the daemon side (~100-200 ms of +/// ffmpeg fork+exec + libx264 init for two output codecs). 256 MiB keeps that +/// fixed cost a small fraction of the per-chunk wall time. The threshold is +/// checked *after* each frame, so the on-disk file can exceed it by at most +/// one frame. A chunk is also rolled at every lifecycle event so a single NUT +/// only ever holds frames from one recording window. +/// +/// This byte threshold has a companion frame-count cap +/// ([`MAX_VIDEO_CHUNK_FRAMES`]): a chunk is sealed at whichever bound is hit +/// first (see [`should_flush_chunk`]). Small frames never reach 256 MiB +/// mid-recording, so the frame cap is what bounds the chunk's announcement +/// envelope to one commands slice. +const CHUNK_FLUSH_BYTES: u64 = 256 * 1024 * 1024; + +/// Backpressure cap for the writer's frame queue. A transient disk stall is +/// absorbed by buffering frames up to this many bytes before `log_frame` +/// blocks; only a *sustained* overload (the writer genuinely can't keep up) +/// propagates backpressure to the caller. 64 MiB holds ~a second of a +/// multi-camera 256Γ—256@30 workload while staying small next to a worker's RSS. +const WRITER_QUEUE_MAX_BYTES: usize = 64 * 1024 * 1024; + +/// How often the writer rescans its spool inbox to refresh the on-disk backlog +/// estimate and release frame-admission backpressure. Also bounds how long a +/// producer stays blocked after the daemon drains a chunk (≀ this interval). +const SPOOL_SCAN_INTERVAL: Duration = Duration::from_millis(250); + +/// How long a video frame may wait for spool-backlog headroom before +/// `log_frame` gives up and raises. The spool drains only as the *daemon* +/// transcodes chunks, so a dead or wedged daemon must surface as a logging +/// error rather than block the caller's thread forever. One second is far +/// longer than any healthy transcode stall, yet short enough that the caller +/// learns promptly instead of silently losing frames. +const FRAME_ADMISSION_TIMEOUT: Duration = Duration::from_secs(1); + +/// Resolve the producer's spool-backlog cap (bytes) from the daemon profile +/// config (`spool_limit`: `NCD_SPOOL_LIMIT` β†’ active profile β†’ default). +fn resolved_spool_max_bytes() -> u64 { + floor_spool_max(data_daemon_shared::config::resolve_spool_limit_bytes()) +} + +/// Apply the cap's safety floor. A configured value of `0` (or any non-positive) +/// disables the bound; any positive value is floored to two chunk sizes so there +/// is always room for the in-progress chunk plus a sealed one β€” a cap below the +/// chunk size would wedge the writer (the open chunk alone exceeds it, so every +/// frame blocks and the chunk never seals). +fn floor_spool_max(configured: i64) -> u64 { + if configured <= 0 { + return 0; + } + (configured as u64).max(2 * CHUNK_FLUSH_BYTES) +} + +/// Rescan the spool inbox and publish the fresh backlog estimate to `queue`, +/// releasing any producer blocked on the spool cap. No-op when the bound is +/// disabled, so a disabled bound never pays for a directory walk. +fn refresh_spool_backlog(queue: &FrameQueue) { + if queue.spool_max == 0 { + return; + } + let scanned = crate::paths::spool_root() + .map(|root| data_daemon_shared::paths::directory_bytes(&root)) + .unwrap_or(0); + queue.set_spool_bytes(scanned); +} + +/// In-progress video chunk state for one `(source, sensor)` stream. +/// +/// The producer does not know which recording the frames belong to, so chunks +/// are spooled into a recording-independent inbox keyed by source + sensor. +/// The daemon relinks them under a recording once routing resolves a window. +struct VideoChunkState { + /// Frame width in pixels (constant across a stream's chunks). + width: u32, + /// Frame height in pixels (constant across a stream's chunks). + height: u32, + /// `{recordings_root}/.rgb_spool/{robot_id}/{instance}/{data_type}/{sensor_name}/`. + spool_dir: PathBuf, + /// Active NUT writer for the in-progress chunk. `None` between chunks. + nut_writer: Option, + /// `publish_timestamp_ns` of the in-progress chunk β€” captured with + /// `chunk_thread_id` when the chunk opened (its first frame). Keys both the + /// spool filename `chunk_{publish_ns}_{thread_id}.nut` and the window + /// routing on the announcement, so the daemon can reconstruct the spool + /// path. Re-stamped on every chunk open, so each chunk is named uniquely + /// and no two recordings collide on a filename. `0` between chunks. + chunk_publish_ns: i64, + /// OS thread id (`gettid`) of the thread that opened the in-progress chunk. + chunk_thread_id: i64, + /// Frames already written into the in-progress chunk. + frame_count: u32, + /// Per-stream PTS origin, microseconds since the Unix epoch. + pts_origin_us: Option, + /// Last PTS written to any chunk for the stream; enforces monotonicity. + last_pts_us: Option, + /// Per-frame capture time in ns for the in-progress chunk β€” drained into + /// the announcement so the daemon can bucket frames into a window. + frame_timestamps_ns: Vec, + /// Per-frame `timestamp_s` accumulator for the in-progress chunk. + frame_timestamps_s: Vec, +} + +/// Process-wide registry of in-progress per-`(source, sensor)` video chunk +/// state. Per-stream state lives behind its own [`Arc>`] +/// so a multi-megabyte NUT write for camera A does not block camera B. +type VideoChunkSlot = Arc>; + +struct VideoChunkRegistry { + owner_pid: u32, + streams: HashMap, +} + +static VIDEO_CHUNKS: LazyLock> = LazyLock::new(|| { + Mutex::new(VideoChunkRegistry { + owner_pid: 0, + streams: HashMap::new(), + }) +}); + +/// Lock the video chunk registry and run `operation` against its streams map. +/// +/// Heals on fork: when the stored `owner_pid` no longer matches the current +/// process the map was inherited from a pre-fork parent, so it is cleared +/// before use. +fn with_video_chunks(operation: impl FnOnce(&mut HashMap) -> R) -> R { + let mut registry = VIDEO_CHUNKS + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + let pid = std::process::id(); + if registry.owner_pid != pid { + registry.streams.clear(); + registry.owner_pid = pid; + } + operation(&mut registry.streams) +} + +/// One frame handed to the background writer. Owns its pixel bytes (copied out +/// of the caller's buffer under the GIL) so the caller can return immediately. +pub(crate) struct FrameJob { + pub(crate) robot_id: String, + pub(crate) robot_instance: i64, + pub(crate) data_type: String, + pub(crate) sensor_name: String, + pub(crate) width: u32, + pub(crate) height: u32, + pub(crate) timestamp_ns: i64, + pub(crate) timestamp_s: f64, + pub(crate) data: Vec, +} + +/// Work item for the writer thread. +pub(crate) enum WriterMsg { + /// Append one frame to its `(source, sensor)` in-progress chunk. + Frame(FrameJob), + /// Stop barrier: drain every frame queued ahead for the source (FIFO), seal + /// and announce its open chunks, then acknowledge. The caller publishes + /// `StopRecording` itself once acked. No lifecycle envelope is published + /// here β€” see the module note on lifecycle ordering. + FlushSource { + robot_id: String, + robot_instance: i64, + ack: Sender<()>, + }, + /// Cancel barrier: drop every open chunk for the source without announcing + /// it (the daemon's cancel + recovery sweep reclaim the spooled NUTs), then + /// acknowledge. The caller publishes `CancelRecording` itself once acked. + DropSource { + robot_id: String, + robot_instance: i64, + ack: Sender<()>, + }, +} + +impl WriterMsg { + /// Bytes this message contributes to the queue's backpressure budget. Only + /// frame payloads are throttled; control messages must always enqueue so a + /// stop/cancel can drain even a full queue. + fn queue_bytes(&self) -> usize { + match self { + WriterMsg::Frame(job) => job.data.len(), + _ => 0, + } + } +} + +/// Returned by [`FrameQueue::push`] when a video frame cannot be admitted +/// within [`FRAME_ADMISSION_TIMEOUT`] because the on-disk spool backlog is stuck +/// at its cap. The producer maps this to a Python exception so a stalled daemon +/// surfaces as a logging error instead of a silently dropped frame. +#[derive(Debug)] +pub(crate) struct LoggingStalled; + +/// Byte-bounded MPSC queue between the logging threads and the writer thread. +/// +/// Two independent backpressure limits gate *frame* admission (control +/// messages always pass so a stop/cancel can drain even a full queue): +/// +/// - [`WRITER_QUEUE_MAX_BYTES`] caps the in-memory frames awaiting a disk write. +/// - `spool_max` caps the producer's on-disk NUT backlog: when the daemon can't +/// transcode spooled chunks fast enough the inbox would otherwise grow +/// unbounded and fill the disk, stalling `stop_recording`'s tail-chunk flush +/// for seconds. The writer thread refreshes `spool_bytes` from a periodic +/// directory scan ([`refresh_spool_backlog`]). +pub(crate) struct FrameQueue { + inner: Mutex, + not_full: Condvar, + not_empty: Condvar, + /// In-memory frame-buffer cap. Drained by the local writer thread, which + /// always makes progress, so a frame blocked on it waits unbounded. + /// [`WRITER_QUEUE_MAX_BYTES`] in production; tests shrink it to exercise the + /// path with small frames. + memory_max: usize, + /// On-disk spool-backlog cap. Drained only as the daemon transcodes chunks, + /// so a frame blocked on it is time-limited (see `block_timeout`). `0` + /// disables the bound. + spool_max: u64, + /// How long a frame may wait on the **spool** cap before [`push`] rejects it + /// with [`LoggingStalled`]. Does not apply to the in-memory cap. Always + /// [`FRAME_ADMISSION_TIMEOUT`] in production; tests shorten it. + /// + /// [`push`]: FrameQueue::push + block_timeout: Duration, +} + +struct FrameQueueInner { + msgs: VecDeque, + bytes: usize, + spool_bytes: u64, +} + +impl FrameQueue { + fn new() -> Self { + Self::build( + WRITER_QUEUE_MAX_BYTES, + resolved_spool_max_bytes(), + FRAME_ADMISSION_TIMEOUT, + ) + } + + /// Assemble a queue from explicit caps β€” the one place the fields are + /// initialised, so the production and test constructors can't drift. + fn build(memory_max: usize, spool_max: u64, block_timeout: Duration) -> Self { + FrameQueue { + inner: Mutex::new(FrameQueueInner { + msgs: VecDeque::new(), + bytes: 0, + spool_bytes: 0, + }), + not_full: Condvar::new(), + not_empty: Condvar::new(), + memory_max, + spool_max, + block_timeout, + } + } + + /// Build a queue with an explicit spool cap and stall timeout (keeping the + /// production in-memory cap), bypassing the profile/env config read so + /// backpressure can be tested deterministically. + #[cfg(test)] + fn with_caps(spool_max: u64, block_timeout: Duration) -> Self { + Self::build(WRITER_QUEUE_MAX_BYTES, spool_max, block_timeout) + } + + /// Build a queue with an explicit in-memory cap too, so the in-memory + /// backpressure path can be exercised with small frames. + #[cfg(test)] + fn with_memory_cap(memory_max: usize, spool_max: u64, block_timeout: Duration) -> Self { + Self::build(memory_max, spool_max, block_timeout) + } + + /// Enqueue a message, blocking the caller only while a *frame* would exceed + /// the in-memory cap **or** the on-disk spool backlog is at its cap (control + /// messages never block, so a stop/cancel drains even a full queue). A lone + /// frame larger than the in-memory cap is still admitted once the queue is + /// empty, so forward progress is always possible. + /// + /// The two caps wait differently because they drain differently: + /// + /// - The in-memory cap drains as the local writer thread writes frames to + /// disk, which always makes progress, so a frame blocked *only* on it + /// waits unbounded β€” exactly as before the spool cap existed. + /// - The spool cap drains only as the daemon transcodes chunks, so a frame + /// blocked on it gives up after [`FRAME_ADMISSION_TIMEOUT`] and returns + /// [`LoggingStalled`] rather than block a logging thread forever behind a + /// dead daemon. + pub(crate) fn push(&self, msg: WriterMsg) -> Result<(), LoggingStalled> { + let add = msg.queue_bytes(); + let mut inner = self.inner.lock().unwrap_or_else(|p| p.into_inner()); + if add > 0 { + // `spool_deadline` tracks *continuous* time the frame has spent + // blocked on the spool cap. It is armed when the spool is the + // blocker and cleared whenever the spool falls back below its cap, so + // a slow-but-draining daemon (which keeps dipping under the cap) + // never trips the timeout β€” only a daemon that is wedged at the cap + // does. A frame blocked solely on the in-memory cap waits unbounded. + let mut spool_deadline: Option = None; + loop { + let over_spool = self.over_spool_cap(&inner); + if !over_spool && !self.over_memory_cap(&inner, add) { + break; + } + inner = if over_spool { + let deadline = + *spool_deadline.get_or_insert_with(|| Instant::now() + self.block_timeout); + let remaining = match deadline.checked_duration_since(Instant::now()) { + Some(remaining) if !remaining.is_zero() => remaining, + _ => return Err(LoggingStalled), + }; + self.not_full + .wait_timeout(inner, remaining) + .unwrap_or_else(|p| p.into_inner()) + .0 + } else { + spool_deadline = None; + self.not_full.wait(inner).unwrap_or_else(|p| p.into_inner()) + }; + } + } + inner.bytes += add; + inner.msgs.push_back(msg); + self.not_empty.notify_one(); + Ok(()) + } + + /// Whether admitting an `add`-byte frame would breach the in-memory cap. + /// Yields once the queue is empty, so an oversized lone frame still makes + /// progress. The local writer thread always drains this, so a frame blocked + /// here waits unbounded. + fn over_memory_cap(&self, inner: &FrameQueueInner, add: usize) -> bool { + inner.bytes > 0 && inner.bytes + add > self.memory_max + } + + /// Whether the on-disk spool backlog is at its cap. Disabled when + /// `spool_max == 0`. This clears only when the daemon drains the inbox, so a + /// frame blocked here is bounded by [`FRAME_ADMISSION_TIMEOUT`]. + fn over_spool_cap(&self, inner: &FrameQueueInner) -> bool { + self.spool_max > 0 && inner.spool_bytes >= self.spool_max + } + + /// Publish the latest scanned spool-backlog size and wake every producer + /// blocked on the spool cap so they re-evaluate admission. + fn set_spool_bytes(&self, scanned: u64) { + let mut inner = self.inner.lock().unwrap_or_else(|p| p.into_inner()); + inner.spool_bytes = scanned; + self.not_full.notify_all(); + } + + /// Block indefinitely until a message is available, then pop it (FIFO). + /// Used when the spool cap is disabled, where the writer has no reason to + /// wake on a timer. + fn pop(&self) -> WriterMsg { + self.pop_inner(None) + .expect("an unbounded wait never times out") + } + + /// Pop the next message (FIFO), blocking up to `timeout`; `None` on timeout. + /// + /// The timeout lets the writer thread wake to rescan the spool even when + /// frame admission is fully blocked (no new frames arriving) β€” that rescan + /// is what releases the backpressure as the daemon drains the inbox, so the + /// spool bound can never deadlock. + fn pop_timeout(&self, timeout: Duration) -> Option { + self.pop_inner(Some(timeout)) + } + + /// Shared pop body: a `None` timeout waits forever, `Some` bounds each wait. + fn pop_inner(&self, timeout: Option) -> Option { + let mut inner = self.inner.lock().unwrap_or_else(|p| p.into_inner()); + loop { + if let Some(msg) = inner.msgs.pop_front() { + inner.bytes -= msg.queue_bytes(); + self.not_full.notify_one(); + return Some(msg); + } + match timeout { + None => { + inner = self + .not_empty + .wait(inner) + .unwrap_or_else(|p| p.into_inner()); + } + Some(timeout) => { + let (guard, result) = self + .not_empty + .wait_timeout(inner, timeout) + .unwrap_or_else(|poisoned| poisoned.into_inner()); + inner = guard; + if result.timed_out() && inner.msgs.is_empty() { + return None; + } + } + } + } + } +} + +/// Process-wide writer handle, healed across `fork` via `owner_pid` (mirrors +/// [`VIDEO_CHUNKS`]). The parent's writer thread does not survive into a forked +/// child, so the child re-spawns one on first use. +struct WriterRegistry { + owner_pid: u32, + queue: Option>, +} + +static VIDEO_WRITER: LazyLock> = LazyLock::new(|| { + Mutex::new(WriterRegistry { + owner_pid: 0, + queue: None, + }) +}); + +thread_local! { + /// Per-thread cache of the process video-writer queue. The hot `log_frame` + /// path hits this slot β€” a plain TLS load β€” instead of taking the global + /// `VIDEO_WRITER` mutex and a `getpid()` syscall on every frame. Cleared by + /// the fork child handler so a forked child rebuilds. + static WRITER_QUEUE: RefCell>> = const { RefCell::new(None) }; +} + +/// Return this process's writer queue. Fast path: the thread-local cache (no +/// lock, no syscall). Slow path: heal/spawn under the global lock and cache. +pub(crate) fn writer_queue() -> Arc { + if let Some(queue) = WRITER_QUEUE.with(|cell| cell.borrow().clone()) { + return queue; + } + let queue = writer_queue_global(); + WRITER_QUEUE.with(|cell| *cell.borrow_mut() = Some(queue.clone())); + queue +} + +/// Heal/spawn the process writer thread under the global lock, returning its +/// queue. On the (near-impossible) spawn failure we log and return a detached +/// queue *without* recording it, so the next call retries rather than the caller +/// blocking forever on a consumer-less queue. +fn writer_queue_global() -> Arc { + ensure_writer_fork_handler_registered(); + let mut reg = VIDEO_WRITER.lock().unwrap_or_else(|p| p.into_inner()); + let pid = std::process::id(); + if reg.owner_pid == pid { + if let Some(queue) = reg.queue.as_ref() { + return queue.clone(); + } + } + + let queue = Arc::new(FrameQueue::new()); + let worker_queue = queue.clone(); + match std::thread::Builder::new() + .name("nc-video-writer".to_string()) + .spawn(move || writer_loop(&worker_queue)) + { + Ok(_handle) => { + reg.owner_pid = pid; + reg.queue = Some(queue.clone()); + } + Err(error) => { + // Leave the registry unset so the next call retries the spawn. The + // returned queue has no consumer; a single `push` of a frame far + // under the cap won't block, so the frame is simply dropped when the + // queue is freed rather than the producer hanging. + tracing::error!(%error, "failed to spawn video writer thread; dropping frame"); + } + } + queue +} + +/// Install a `pthread_atfork` child handler (once) that clears this thread's +/// cached [`WRITER_QUEUE`]. The global `VIDEO_WRITER` self-heals via its +/// `owner_pid`, but the per-thread cache would otherwise hand a forked child a +/// stale queue whose writer thread didn't survive the fork. +fn ensure_writer_fork_handler_registered() { + static REGISTER: Once = Once::new(); + REGISTER.call_once(|| { + // SAFETY: standard libc fork-callback registration. `clear_queue_cache` + // is `extern "C"` and only drops a const-initialised TLS `Arc`. + let result = unsafe { libc::pthread_atfork(None, None, Some(clear_queue_cache)) }; + if result != 0 { + tracing::warn!( + errno = result, + "pthread_atfork registration failed; video writer-queue cache relies on PID heal", + ); + } + }); +} + +/// `pthread_atfork` child callback: drop the surviving thread's cached writer +/// queue so the next [`writer_queue`] rebuilds through the PID-keyed heal path. +extern "C" fn clear_queue_cache() { + WRITER_QUEUE.with(|cell| { + cell.borrow_mut().take(); + }); +} + +/// The writer thread's run loop. Sole accessor of the in-progress chunk state +/// and sole publisher of video chunk + stop/cancel envelopes for this process. +fn writer_loop(queue: &FrameQueue) { + // With the spool cap disabled there is nothing to scan, so block on each + // message indefinitely rather than waking on a timer. + let bounded = queue.spool_max > 0; + if bounded { + // Prime the backlog estimate so the first frames see a real spool size. + refresh_spool_backlog(queue); + } + let mut last_scan = Instant::now(); + loop { + let next = if bounded { + queue.pop_timeout(SPOOL_SCAN_INTERVAL) + } else { + Some(queue.pop()) + }; + match next { + Some(WriterMsg::Frame(job)) => { + if let Err(error) = record_video_frame( + &job.robot_id, + job.robot_instance, + &job.data_type, + &job.sensor_name, + job.width, + job.height, + &job.data, + job.timestamp_ns, + job.timestamp_s, + ) { + tracing::warn!(%error, sensor_name = job.sensor_name, "failed to spool video frame"); + } + } + Some(WriterMsg::FlushSource { + robot_id, + robot_instance, + ack, + }) => { + if let Err(error) = flush_source_chunks(&robot_id, robot_instance) { + tracing::warn!(%error, "failed to flush tail video chunks on stop"); + } + let _ = ack.send(()); + } + Some(WriterMsg::DropSource { + robot_id, + robot_instance, + ack, + }) => { + let prefix = source_prefix(&robot_id, robot_instance); + with_video_chunks(|streams| { + streams.retain(|key, _| !key.starts_with(&prefix)); + }); + let _ = ack.send(()); + } + // pop_timeout elapsed with no message: fall through to the rescan. + None => {} + } + // Refresh the backlog estimate on a coarse cadence so frame-admission + // backpressure tracks the daemon draining the spool inbox. + if bounded && last_scan.elapsed() >= SPOOL_SCAN_INTERVAL { + refresh_spool_backlog(queue); + last_scan = Instant::now(); + } + } +} + +/// A chunk-open timestamp that is strictly increasing within this process. +/// +/// The spool filename is `chunk_{publish_ns}_{thread_id}.nut`. All video chunks +/// are now opened by the single background writer thread, so they share one +/// `thread_id` and uniqueness rests entirely on `publish_ns`. `now_ns()` reads +/// `CLOCK_REALTIME`, whose granularity can repeat across two opens issued back +/// to back, which would collide two cameras' chunk files. Bumping past the last +/// value returned keeps every chunk's name distinct while staying within the +/// recording window (the window spans seconds; a few ns of skew is irrelevant +/// to membership). Only the writer thread calls this, but the atomic keeps it +/// correct regardless. +fn next_chunk_open_ns() -> i64 { + use std::sync::atomic::{AtomicI64, Ordering}; + static LAST: AtomicI64 = AtomicI64::new(0); + let mut candidate = now_ns(); + loop { + let last = LAST.load(Ordering::Relaxed); + if candidate <= last { + candidate = last + 1; + } + match LAST.compare_exchange_weak(last, candidate, Ordering::Relaxed, Ordering::Relaxed) { + Ok(_) => return candidate, + Err(_) => candidate = now_ns(), + } + } +} + +/// OS thread id of the calling thread (Linux `gettid`). Used to disambiguate a +/// video chunk's spool filename across producer threads and as a breadcrumb +/// when inspecting the spool directory. +fn current_thread_id() -> i64 { + // SAFETY: `gettid` takes no arguments and cannot fail. + unsafe { libc::gettid() as i64 } +} + +/// Whether the in-progress chunk should be sealed now, checked after each +/// appended frame. A chunk is rolled at the **lower** of two bounds: +/// +/// * [`CHUNK_FLUSH_BYTES`] β€” keeps the daemon's per-chunk encode cost amortised. +/// * [`MAX_VIDEO_CHUNK_FRAMES`] β€” keeps the chunk's `VideoChunkReady` +/// announcement within one `COMMANDS_MAX_PAYLOAD_BYTES` sample. Small frames +/// never reach the byte threshold mid-recording, so without the frame cap a +/// long recording accumulates one ever-growing chunk whose per-frame +/// timestamp vectors eventually overflow the commands slice β€” the +/// announcement then fails to publish and the recording's video is lost. +fn should_flush_chunk(chunk_bytes: u64, frame_count: u32) -> bool { + chunk_bytes >= CHUNK_FLUSH_BYTES || frame_count >= MAX_VIDEO_CHUNK_FRAMES +} + +/// Append one frame to the `(source, sensor)` in-progress NUT chunk, opening +/// the chunk lazily, enforcing PTS monotonicity, and flushing once the chunk +/// crosses [`CHUNK_FLUSH_BYTES`] or [`MAX_VIDEO_CHUNK_FRAMES`]. Best-effort: +/// NUT-write errors are logged and the frame dropped, never propagated to +/// Python. +#[allow(clippy::too_many_arguments)] +fn record_video_frame( + robot_id: &str, + robot_instance: i64, + data_type: &str, + sensor_name: &str, + width: u32, + height: u32, + payload: &[u8], + timestamp_ns: i64, + timestamp_s: f64, +) -> Result<(), ProducerError> { + let key = stream_key(robot_id, robot_instance, data_type, sensor_name); + // Resolve the slot, building the per-stream state (and its spool dir) only + // on the FIRST frame of a stream. The spool-dir path build is several + // allocations, so doing it per frame (as an earlier revision did) added + // allocation churn to the writer's hot path and backed up the frame queue + // at high frame rates. The recordings root is pre-validated on the GIL in + // `log_frame`, so `spool_dir` only returns `None` on a genuine + // misconfiguration β€” drop the frame (never panic on the writer thread). + let slot: VideoChunkSlot = match with_video_chunks(|streams| { + if let Some(slot) = streams.get(&key) { + return Some(slot.clone()); + } + let spool = spool_dir(robot_id, robot_instance, data_type, sensor_name)?; + let slot = Arc::new(Mutex::new(VideoChunkState { + width, + height, + spool_dir: spool, + nut_writer: None, + chunk_publish_ns: 0, + chunk_thread_id: 0, + frame_count: 0, + pts_origin_us: None, + last_pts_us: None, + frame_timestamps_ns: Vec::new(), + frame_timestamps_s: Vec::new(), + })); + streams.insert(key.clone(), slot.clone()); + Some(slot) + }) { + Some(slot) => slot, + None => { + tracing::error!( + sensor_name, + "recordings root unresolved on writer thread; dropping video frame" + ); + return Ok(()); + } + }; + + // The announcements are built under the per-stream lock but published + // outside it β€” `publish()` blocks the calling thread when the daemon falls + // behind, and holding the mutex across that block would stall this + // camera's next frame. + let announcements = { + let mut state = slot.lock().unwrap_or_else(|poisoned| poisoned.into_inner()); + append_frame_locked( + &mut state, + robot_id, + robot_instance, + data_type, + sensor_name, + width, + height, + payload, + timestamp_ns, + timestamp_s, + ) + }; + + for envelope in announcements { + // Hand each sealed chunk's announcement to the publisher thread rather + // than publishing inline: this runs on the writer thread, which must + // never block on an IPC publish (the stop/cancel barrier waits on it). + let _ = publisher_tx().send(PublishMsg::Announce(envelope)); + } + Ok(()) +} + +/// Append one frame to the locked per-stream chunk `state`, returning every +/// chunk announcement produced this call: a geometry-change seal and/or a +/// size/frame-cap flush (so a single call can yield two). Pure with respect to +/// IPC β€” the caller publishes the returned envelopes outside the lock β€” which +/// also makes the open/seal/roll logic unit-testable without a live daemon. +/// Best-effort: a NUT open/write error logs and drops the frame. +#[allow(clippy::too_many_arguments)] +fn append_frame_locked( + state: &mut VideoChunkState, + robot_id: &str, + robot_instance: i64, + data_type: &str, + sensor_name: &str, + width: u32, + height: u32, + payload: &[u8], + timestamp_ns: i64, + timestamp_s: f64, +) -> Vec { + let mut announcements: Vec = Vec::new(); + + // A mid-stream resolution change can't share a chunk with the prior + // geometry: the NUT header advertises the opening frame's size, so a + // differently-sized frame fails the writer's size check and is silently + // dropped (or, on a coincidental `w*h*3` match, corrupts the encode). Seal + // the open chunk (announced below) and reopen a fresh one with the new + // geometry rather than dropping every later frame. + if state.nut_writer.is_some() && (state.width != width || state.height != height) { + tracing::warn!( + sensor_name, + old_width = state.width, + old_height = state.height, + new_width = width, + new_height = height, + "video frame geometry changed mid-stream; sealing chunk and reopening" + ); + if let Some(envelope) = + flush_chunk_locked(robot_id, robot_instance, data_type, sensor_name, state) + { + announcements.push(envelope); + } + } + state.width = width; + state.height = height; + + // Each fresh chunk opens with a header syncpoint at global_key_pts=0, so + // reset the PTS origin whenever a chunk is (re)opened β€” after a geometry + // seal above, or a size/frame-cap roll on the previous call. Every chunk's + // frames then start near PTS 0 rather than carrying the whole stream's + // elapsed time, keeping each chunk's frame PTS consistent with its header. + if state.nut_writer.is_none() { + state.pts_origin_us = None; + state.last_pts_us = None; + } + let origin_us = *state.pts_origin_us.get_or_insert(timestamp_ns / 1_000); + let relative_us = (timestamp_ns / 1_000).saturating_sub(origin_us).max(0); + let mut pts = relative_us as u64; + if let Some(previous) = state.last_pts_us { + if pts <= previous { + pts = previous.saturating_add(1); + } + } + + if state.nut_writer.is_none() { + // Stamp the chunk's identity at open: its `publish_timestamp_ns` + // (this instant β€” inside the active recording window) plus the + // opening thread's id. These name the spool file and ride the + // announcement so the daemon can both route and locate the chunk. + state.chunk_publish_ns = next_chunk_open_ns(); + state.chunk_thread_id = current_thread_id(); + let chunk_path = state.spool_dir.join(spool_chunk_filename( + state.chunk_publish_ns, + state.chunk_thread_id, + )); + let config = NutVideoConfig { + width: state.width, + height: state.height, + time_base_num: 1, + time_base_den: 1_000_000, + }; + match NutWriter::create(&chunk_path, config) { + Ok(writer) => state.nut_writer = Some(writer), + Err(error) => { + tracing::warn!( + %error, + sensor_name, + path = %chunk_path.display(), + "failed to open NUT chunk; dropping frame" + ); + return announcements; + } + } + } + + let bytes_after_write = { + let writer = state.nut_writer.as_mut().expect("opened immediately above"); + if let Err(error) = writer.write_frame(pts, payload) { + tracing::warn!( + %error, + sensor_name, + "failed to write video frame to NUT chunk; dropping frame" + ); + return announcements; + } + writer.bytes_written() + }; + state.last_pts_us = Some(pts); + state.frame_count = state.frame_count.saturating_add(1); + state.frame_timestamps_ns.push(timestamp_ns); + state.frame_timestamps_s.push(timestamp_s); + + if should_flush_chunk(bytes_after_write, state.frame_count) { + if let Some(envelope) = + flush_chunk_locked(robot_id, robot_instance, data_type, sensor_name, state) + { + announcements.push(envelope); + } + } + announcements +} + +/// Seal the in-progress chunk and return the announcement envelope. The caller +/// must hold the per-stream lock. Returns `None` when there is no open chunk +/// writer (no frames since the last flush). +fn flush_chunk_locked( + robot_id: &str, + robot_instance: i64, + data_type: &str, + sensor_name: &str, + state: &mut VideoChunkState, +) -> Option { + let writer = state.nut_writer.take()?; + let byte_count = match writer.finish() { + Ok(bytes) => bytes, + Err(error) => { + tracing::warn!( + %error, + sensor_name, + "failed to finalise NUT chunk; dropping chunk" + ); + state.frame_count = 0; + state.frame_timestamps_ns.clear(); + state.frame_timestamps_s.clear(); + return None; + } + }; + // The chunk's open-time identity, stamped when its writer was created. + let publish_timestamp_ns = state.chunk_publish_ns; + let thread_id = state.chunk_thread_id; + let frame_count = state.frame_count; + let frame_timestamps_ns = std::mem::take(&mut state.frame_timestamps_ns); + let frame_timestamps_s = std::mem::take(&mut state.frame_timestamps_s); + + state.frame_count = 0; + + Some(Envelope::VideoChunkReady { + robot_id: robot_id.to_string(), + robot_instance, + data_type: data_type.to_string(), + sensor_name: Some(sensor_name.to_string()), + publish_timestamp_ns, + thread_id, + width: state.width, + height: state.height, + byte_count, + frame_count, + frame_timestamps_ns, + frame_timestamps_s, + }) +} + +/// Flush and remove every open video chunk for a source. Each flushed chunk is +/// announced so the daemon can route it before the `StopRecording` lands. +fn flush_source_chunks(robot_id: &str, robot_instance: i64) -> Result<(), ProducerError> { + let prefix = source_prefix(robot_id, robot_instance); + let slots: Vec<(String, VideoChunkSlot)> = with_video_chunks(|streams| { + let keys: Vec = streams + .keys() + .filter(|key| key.starts_with(&prefix)) + .cloned() + .collect(); + keys.into_iter() + .filter_map(|key| streams.remove(&key).map(|slot| (key, slot))) + .collect() + }); + for (key, slot) in slots { + let (data_type, sensor_name) = split_stream_key(&key); + let flush_envelope = { + let mut state = slot.lock().unwrap_or_else(|poisoned| poisoned.into_inner()); + flush_chunk_locked( + robot_id, + robot_instance, + &data_type, + &sensor_name, + &mut state, + ) + }; + if let Some(envelope) = flush_envelope { + // Announce via the publisher thread so the stop barrier (which awaits + // this flush) only ever waits on the on-disk seal, never an IPC send. + let _ = publisher_tx().send(PublishMsg::Announce(envelope)); + } + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn flushes_when_byte_threshold_reached() { + // The byte threshold seals the chunk regardless of frame count (large + // frames hit 256 MiB long before the frame cap). + assert!(should_flush_chunk(CHUNK_FLUSH_BYTES, 1)); + assert!(should_flush_chunk(CHUNK_FLUSH_BYTES + 1, 1)); + } + + #[test] + fn flushes_when_frame_cap_reached() { + // Small frames never reach the byte threshold, so the frame cap is what + // bounds the chunk β€” it seals at MAX_VIDEO_CHUNK_FRAMES even with a + // near-empty NUT file. + assert!(should_flush_chunk(0, MAX_VIDEO_CHUNK_FRAMES)); + assert!(should_flush_chunk(1, MAX_VIDEO_CHUNK_FRAMES + 1)); + } + + #[test] + fn does_not_flush_below_both_bounds() { + assert!(!should_flush_chunk(0, 0)); + assert!(!should_flush_chunk( + CHUNK_FLUSH_BYTES - 1, + MAX_VIDEO_CHUNK_FRAMES - 1 + )); + } + + #[test] + fn flush_is_the_lower_of_the_two_bounds() { + // A chunk of tiny frames is sealed by the frame cap with bytes still far + // under the byte threshold β€” i.e. whichever bound is hit first wins. + assert!(should_flush_chunk(1, MAX_VIDEO_CHUNK_FRAMES)); + // ...and a byte-heavy chunk is sealed before the frame cap. + assert!(should_flush_chunk(CHUNK_FLUSH_BYTES, 1)); + } + + /// Build a fresh, empty per-stream chunk state rooted at `spool_dir`. + fn fresh_state(spool_dir: PathBuf, width: u32, height: u32) -> VideoChunkState { + VideoChunkState { + width, + height, + spool_dir, + nut_writer: None, + chunk_publish_ns: 0, + chunk_thread_id: 0, + frame_count: 0, + pts_origin_us: None, + last_pts_us: None, + frame_timestamps_ns: Vec::new(), + frame_timestamps_s: Vec::new(), + } + } + + #[test] + fn geometry_change_seals_chunk_and_reopens_at_new_size() { + // M11 regression: a mid-stream resolution change must seal the open + // chunk (so its frames aren't lost) and reopen at the new geometry, + // rather than silently dropping every later, differently-sized frame. + let dir = tempfile::tempdir().unwrap(); + let mut state = fresh_state(dir.path().to_path_buf(), 2, 2); + + // First frame at 2x2 (rgb24 = 2*2*3 bytes) just opens a chunk. + let frame_2x2 = vec![0u8; 2 * 2 * 3]; + let opened = append_frame_locked( + &mut state, "r", 0, "RGB", "cam", 2, 2, &frame_2x2, 1_000, 0.0, + ); + assert!( + opened.is_empty(), + "opening the first chunk emits no announcement" + ); + assert!(state.nut_writer.is_some()); + assert_eq!(state.frame_count, 1); + + // Second frame at 4x4 must seal the 2x2 chunk and reopen at 4x4. + let frame_4x4 = vec![0u8; 4 * 4 * 3]; + let sealed = append_frame_locked( + &mut state, "r", 0, "RGB", "cam", 4, 4, &frame_4x4, 2_000, 0.001, + ); + assert_eq!(sealed.len(), 1, "the geometry change seals the prior chunk"); + match &sealed[0] { + Envelope::VideoChunkReady { + width, + height, + frame_count, + .. + } => { + assert_eq!( + (*width, *height), + (2, 2), + "the sealed chunk keeps the original geometry" + ); + assert_eq!(*frame_count, 1, "it carries the single 2x2 frame"); + } + other => panic!("expected VideoChunkReady, got {other:?}"), + } + assert_eq!( + (state.width, state.height), + (4, 4), + "state adopts the new geometry" + ); + assert!( + state.nut_writer.is_some(), + "a fresh chunk is reopened at the new geometry" + ); + assert_eq!(state.frame_count, 1, "the new chunk holds the 4x4 frame"); + } + + /// A minimal video-frame message carrying `bytes` of payload. + fn frame_msg(bytes: usize) -> WriterMsg { + WriterMsg::Frame(FrameJob { + robot_id: "robot".to_string(), + robot_instance: 0, + data_type: "RGB_IMAGES".to_string(), + sensor_name: "camera".to_string(), + width: 0, + height: 0, + timestamp_ns: 0, + timestamp_s: 0.0, + data: vec![0u8; bytes], + }) + } + + #[test] + fn floor_spool_max_disables_on_non_positive() { + assert_eq!(floor_spool_max(0), 0); + assert_eq!(floor_spool_max(-1), 0); + } + + #[test] + fn floor_spool_max_raises_sub_chunk_caps_to_two_chunks() { + // A sub-chunk cap would wedge the writer, so it is floored. + assert_eq!(floor_spool_max(1), 2 * CHUNK_FLUSH_BYTES); + // A comfortably large cap is honoured verbatim. + let large = 8 * CHUNK_FLUSH_BYTES as i64; + assert_eq!(floor_spool_max(large), large as u64); + } + + #[test] + fn frame_admitted_when_spool_below_cap() { + let queue = FrameQueue::with_caps(4 * CHUNK_FLUSH_BYTES, FRAME_ADMISSION_TIMEOUT); + assert!(queue.push(frame_msg(1024)).is_ok()); + } + + #[test] + fn frame_rejected_when_spool_stuck_at_cap() { + let queue = FrameQueue::with_caps(CHUNK_FLUSH_BYTES, Duration::from_millis(50)); + // At the cap with nothing draining: a frame must time out and reject + // rather than block the caller forever. + queue.set_spool_bytes(CHUNK_FLUSH_BYTES); + let started = Instant::now(); + assert!(matches!(queue.push(frame_msg(1024)), Err(LoggingStalled))); + assert!( + started.elapsed() >= Duration::from_millis(50), + "it should wait out the stall window before rejecting" + ); + } + + #[test] + fn control_messages_bypass_a_full_spool() { + let queue = FrameQueue::with_caps(CHUNK_FLUSH_BYTES, Duration::from_millis(50)); + queue.set_spool_bytes(CHUNK_FLUSH_BYTES); + let (ack_tx, _ack_rx) = std::sync::mpsc::channel(); + // A flush/cancel must enqueue immediately even while frames are blocked. + assert!(queue + .push(WriterMsg::FlushSource { + robot_id: "robot".to_string(), + robot_instance: 0, + ack: ack_tx, + }) + .is_ok()); + } + + #[test] + fn disabled_cap_never_applies_spool_backpressure() { + let queue = FrameQueue::with_caps(0, Duration::from_millis(50)); + // Even a huge reported backlog cannot block when the bound is disabled. + queue.set_spool_bytes(u64::MAX); + assert!(queue.push(frame_msg(1024)).is_ok()); + } + + #[test] + fn draining_the_spool_unblocks_a_waiting_frame() { + // A long stall window so only the drain β€” not a timeout β€” can release it. + let queue = Arc::new(FrameQueue::with_caps( + CHUNK_FLUSH_BYTES, + Duration::from_secs(10), + )); + queue.set_spool_bytes(CHUNK_FLUSH_BYTES); + let pusher = { + let queue = Arc::clone(&queue); + std::thread::spawn(move || queue.push(frame_msg(1024))) + }; + // Let the pusher reach its wait, then report the spool as drained. + std::thread::sleep(Duration::from_millis(50)); + queue.set_spool_bytes(0); + assert!(pusher.join().unwrap().is_ok()); + } + + #[test] + fn in_memory_backpressure_is_never_time_limited() { + // The stall timeout guards against a wedged *daemon* (the spool cap). A + // frame blocked purely on the in-memory cap β€” with the spool well below + // its cap β€” must wait for the local writer to drain it, never reject like + // a spool stall, even long past the (short) stall window. + let queue = Arc::new(FrameQueue::with_memory_cap( + 1024, + 4 * CHUNK_FLUSH_BYTES, + Duration::from_millis(50), + )); + // First frame is admitted despite exceeding the cap (queue was empty), + // leaving the buffer full so the next frame must block. + assert!(queue.push(frame_msg(2048)).is_ok()); + + let pusher = { + let queue = Arc::clone(&queue); + std::thread::spawn(move || queue.push(frame_msg(2048))) + }; + // Well beyond the stall window: a spool stall would have rejected by now, + // but the in-memory wait must still be blocking. + std::thread::sleep(Duration::from_millis(150)); + assert!( + !pusher.is_finished(), + "in-memory backpressure must not be subject to the spool stall timeout" + ); + + // Draining one frame frees headroom and admits the waiter. + assert!(matches!(queue.pop(), WriterMsg::Frame(_))); + assert!(pusher.join().unwrap().is_ok()); + } +} diff --git a/rust/data_daemon_shared/Cargo.toml b/rust/data_daemon_shared/Cargo.toml new file mode 100644 index 000000000..692244bd8 --- /dev/null +++ b/rust/data_daemon_shared/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "data_daemon_shared" +version.workspace = true +edition.workspace = true +license.workspace = true +description = "Shared definitions for the Neuracore data daemon: IPC envelope/service-name wire format, configuration model, and filesystem-path resolution." + +[lib] +path = "src/lib.rs" + +[dependencies] +# Shared recordings-root resolution (`paths`) needs the user's home directory so +# the daemon and the producer compute the same default root. +dirs.workspace = true +postcard.workspace = true +serde.workspace = true +thiserror.workspace = true +nix.workspace = true +serde_yaml.workspace = true +tracing.workspace = true + +[dev-dependencies] +serde_json.workspace = true +tempfile = "3" diff --git a/rust/data_daemon_shared/src/config/env.rs b/rust/data_daemon_shared/src/config/env.rs new file mode 100644 index 000000000..9c2da19af --- /dev/null +++ b/rust/data_daemon_shared/src/config/env.rs @@ -0,0 +1,254 @@ +//! Environment-variable parsing: `NCD_*` configuration overrides and the +//! `NEURACORE_DAEMON_*` / `NDD_*` / `NEURACORE_*` runtime settings. +//! +//! Mirrors `config_manager/config.py`, `config_manager/helpers.py`, +//! `data_daemon/helpers.py`, and `data_daemon/const.py`. + +use std::path::PathBuf; + +use serde::{Deserialize, Deserializer}; + +use crate::config::DaemonConfig; + +/// Values treated as truthy for boolean environment variables, matching +/// `config.py::YES_CONFIRMATION`. +const YES_VALUES: [&str; 4] = ["1", "true", "yes", "y"]; + +/// Default backend API URL, from `const.py::API_URL`. +const DEFAULT_API_URL: &str = "https://api.neuracore.app/api"; + +/// Parse a byte quantity from an integer-or-unit-suffixed string. +/// +/// Mirrors `config_manager/helpers.py::parse_bytes`. Supported units +/// (case-insensitive): `b`, `k`, `kb`, `m`, `mb`, `g`, `gb`. Also usable +/// directly as a `clap` value parser for the `--storage-limit` / +/// `--bandwidth-limit` / `--spool-limit` options. +pub fn parse_bytes(value: &str) -> Result { + let normalized = value.trim().to_lowercase(); + + if !normalized.is_empty() + && normalized + .chars() + .all(|character| character.is_ascii_digit()) + { + return normalized + .parse::() + .map_err(|_| format!("Invalid byte value: '{value}'")); + } + + let numeric_part: String = normalized.chars().filter(|c| c.is_ascii_digit()).collect(); + let unit_suffix: String = normalized.chars().filter(|c| !c.is_ascii_digit()).collect(); + + if numeric_part.is_empty() || unit_suffix.is_empty() { + return Err(format!("Invalid byte value: '{value}'")); + } + + let base_value: i64 = numeric_part + .parse() + .map_err(|_| format!("Invalid byte value: '{value}'"))?; + + let multiplier: i64 = match unit_suffix.as_str() { + "b" => 1, + "k" | "kb" => 1024, + "m" | "mb" => 1024 * 1024, + "g" | "gb" => 1024 * 1024 * 1024, + _ => return Err(format!("Unknown byte unit in value: '{value}'")), + }; + + base_value + .checked_mul(multiplier) + .ok_or_else(|| format!("Byte value out of range: '{value}'")) +} + +/// Serde deserializer for byte-valued config fields that accepts either a +/// plain integer or a unit-suffixed string (e.g. `1G`). +/// +/// Applies to byte-valued profile fields such as `storage_limit`, +/// `bandwidth_limit`, and `spool_limit`. A malformed unit-suffixed string +/// surfaces the parse failure directly as a deserialization error. +pub fn deserialize_optional_bytes<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + #[derive(Deserialize)] + #[serde(untagged)] + enum IntOrString { + Int(i64), + Str(String), + } + + match Option::::deserialize(deserializer)? { + None => Ok(None), + Some(IntOrString::Int(value)) => Ok(Some(value)), + Some(IntOrString::Str(text)) => parse_bytes(&text) + .map(Some) + .map_err(serde::de::Error::custom), + } +} + +/// Read an environment variable, returning `None` when it is unset, holds +/// non-UTF-8 bytes, **or is empty**. Every `NCD_*` / `NEURACORE_*` override +/// read through this helper treats an empty value as unset (so a shell that +/// exports an unset variable as the empty string falls through to the +/// configured profile value rather than clobbering it). This is most important +/// for the secret-bearing `NCD_API_KEY` / `NCD_CURRENT_ORG_ID`, but it applies +/// uniformly to the boolean and numeric overrides too β€” unlike Python's +/// `config.py`, which honours an empty string as a real override. +fn env_var(name: &str) -> Option { + std::env::var(name).ok().filter(|value| !value.is_empty()) +} + +/// Whether an environment value should be treated as truthy. +fn is_truthy(value: &str) -> bool { + YES_VALUES.contains(&value.to_lowercase().as_str()) +} + +/// Read `NCD_*` daemon-config overrides from the environment. +/// +/// Mirrors `config.py::ConfigManager._read_env_overrides`: unparseable +/// numeric values are skipped (leaving the field unset) rather than failing. +pub fn env_config_overrides() -> DaemonConfig { + let mut config = DaemonConfig::default(); + + if let Some(value) = env_var("NCD_STORAGE_LIMIT") { + if let Ok(parsed) = parse_bytes(&value) { + config.storage_limit = Some(parsed); + } + } + if let Some(value) = env_var("NCD_BANDWIDTH_LIMIT") { + if let Ok(parsed) = parse_bytes(&value) { + config.bandwidth_limit = Some(parsed); + } + } + if let Some(value) = env_var("NCD_SPOOL_LIMIT") { + if let Ok(parsed) = parse_bytes(&value) { + config.spool_limit = Some(parsed); + } + } + if let Some(value) = env_var("NCD_PATH_TO_STORE_RECORD") { + config.path_to_store_record = Some(value); + } + if let Some(value) = env_var("NCD_NUM_THREADS") { + if let Ok(parsed) = value.parse::() { + config.num_threads = Some(parsed); + } + } + if let Some(value) = env_var("NCD_KEEP_WAKELOCK_WHILE_UPLOAD") { + config.keep_wakelock_while_upload = Some(is_truthy(&value)); + } + if let Some(value) = env_var("NCD_OFFLINE") { + config.offline = Some(is_truthy(&value)); + } + if let Some(value) = env_var("NCD_API_KEY") { + config.api_key = Some(value); + } + if let Some(value) = env_var("NCD_CURRENT_ORG_ID") { + config.current_org_id = Some(value); + } + + config +} + +/// The active daemon profile name (`NEURACORE_DAEMON_PROFILE`), or `None` when +/// unset. The producer uses this to resolve profile-scoped settings (the spool +/// cap) without materialising the computed default profile. +pub fn active_profile_name() -> Option { + env_var("NEURACORE_DAEMON_PROFILE") +} + +/// Home directory, used as the root for `~/.neuracore` paths. +/// +/// Panics only if the home directory cannot be determined at all, in which +/// case the daemon cannot resolve any of its on-disk paths. +pub(crate) fn home_dir() -> PathBuf { + dirs::home_dir().expect("could not determine the user's home directory") +} + +/// Resolve the daemon PID file path. +/// +/// Mirrors `helpers.py::get_daemon_pid_path`: `NEURACORE_DAEMON_PID_PATH` or +/// `~/.neuracore/daemon.pid`. +pub fn pid_path() -> PathBuf { + match env_var("NEURACORE_DAEMON_PID_PATH") { + Some(value) => PathBuf::from(value), + None => home_dir().join(".neuracore").join("daemon.pid"), + } +} + +/// Resolve the daemon SQLite database path via the shared resolver, so the +/// daemon and the producer agree on its location. Panics with a clear message +/// when the home directory is required but unavailable β€” acceptable for the +/// daemon binary (it exits at startup before writing anything); the producer +/// surfaces the same condition as a Python error. +pub fn db_path() -> PathBuf { + crate::paths::db_path().expect("home directory required to resolve the daemon database path") +} + +/// Resolve the recordings root via the shared resolver (see [`db_path`]). +pub fn recordings_root_path() -> PathBuf { + crate::paths::recordings_root().expect("home directory required to resolve the recordings root") +} + +/// Resolved runtime environment: paths and flags read from +/// `NEURACORE_DAEMON_*`, `NDD_*`, and `NEURACORE_*` variables. +/// +/// Consolidates the helpers in `data_daemon/helpers.py` and the env-derived +/// constants in `data_daemon/const.py`. +#[derive(Debug, Clone)] +pub struct RuntimeEnv { + /// PID file path (`NEURACORE_DAEMON_PID_PATH`). + pub pid_path: PathBuf, + /// SQLite database path (`NEURACORE_DAEMON_DB_PATH`). + pub db_path: PathBuf, + /// Recordings root directory (`NEURACORE_DAEMON_RECORDINGS_ROOT`). + pub recordings_root: PathBuf, + /// Profile to launch with (`NEURACORE_DAEMON_PROFILE`). + pub profile: Option, + /// Debug logging flag (`NDD_DEBUG`). + pub debug: bool, + /// Backend API base URL (`NEURACORE_API_URL`). + pub api_url: String, +} + +impl RuntimeEnv { + /// Resolve the runtime environment from the current process environment. + pub fn from_env() -> Self { + RuntimeEnv { + pid_path: pid_path(), + db_path: db_path(), + recordings_root: recordings_root_path(), + profile: active_profile_name(), + // Mirrors `helpers.py::is_debug_mode`. + debug: env_var("NDD_DEBUG") + .map(|value| value.to_lowercase() == "true") + .unwrap_or(false), + api_url: env_var("NEURACORE_API_URL").unwrap_or_else(|| DEFAULT_API_URL.to_string()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_bytes_handles_plain_integers_and_units() { + assert_eq!(parse_bytes("1024"), Ok(1024)); + assert_eq!(parse_bytes(" 2048 "), Ok(2048)); + assert_eq!(parse_bytes("1b"), Ok(1)); + assert_eq!(parse_bytes("1K"), Ok(1024)); + assert_eq!(parse_bytes("2kb"), Ok(2048)); + assert_eq!(parse_bytes("1M"), Ok(1024 * 1024)); + assert_eq!(parse_bytes("3mb"), Ok(3 * 1024 * 1024)); + assert_eq!(parse_bytes("1G"), Ok(1024 * 1024 * 1024)); + assert_eq!(parse_bytes("2gb"), Ok(2 * 1024 * 1024 * 1024)); + } + + #[test] + fn parse_bytes_rejects_invalid_values() { + assert!(parse_bytes("").is_err()); + assert!(parse_bytes("abc").is_err()); + assert!(parse_bytes("12tb").is_err()); + assert!(parse_bytes("1 g").is_err()); + } +} diff --git a/rust/data_daemon_shared/src/config/mod.rs b/rust/data_daemon_shared/src/config/mod.rs new file mode 100644 index 000000000..3aa1eab1c --- /dev/null +++ b/rust/data_daemon_shared/src/config/mod.rs @@ -0,0 +1,270 @@ +//! Daemon configuration: the `DaemonConfig` model, profile storage, and the +//! profile + environment + CLI override merge. +//! +//! This module lives in the shared `data_daemon_shared` crate (rather than the +//! daemon binary) so the daemon **and** the PyO3 producer resolve the same +//! effective settings from the same profile/env inputs. The producer needs the +//! spool-backlog cap ([`resolve_spool_limit_bytes`]); resolving it here means +//! the two processes never drift on what the active profile says. + +pub mod env; +pub mod profile; + +use std::path::Path; + +use serde::{Deserialize, Serialize}; + +use crate::config::profile::{ProfileError, ProfileManager}; + +/// Default profile name, matching `const.py::DEFAULT_PROFILE_NAME`. +pub const DEFAULT_PROFILE_NAME: &str = "default_profile"; + +// Defaults for a freshly built configuration, from `const.py`. +const DEFAULT_STORAGE_FREE_FRACTION: f64 = 0.5; +const DEFAULT_TARGET_DRAIN_HOURS: f64 = 12.0; +const DEFAULT_MIN_BANDWIDTH_MIB_S: f64 = 1.0; +const DEFAULT_MAX_BANDWIDTH_MIB_S: f64 = 20.0; +const SECONDS_PER_HOUR: f64 = 60.0 * 60.0; +const BYTES_PER_MIB: f64 = 1024.0 * 1024.0; + +/// Default cap on the producer's on-disk video spool backlog, in bytes. +/// +/// The producer spools raw-RGB NUT chunks to disk and the daemon transcodes +/// them; when the daemon can't keep up (small-CPU host, sustained 1080p video) +/// the un-encoded chunks would otherwise pile up unbounded β€” tens of GB β€” and +/// saturate a constrained disk, stalling `stop_recording`'s tail-chunk flush +/// for seconds. Bounding the spool backlog keeps the disk pressure flat. 2 GiB +/// is several 256 MiB chunks of headroom: large enough to absorb a transient +/// transcode stall without ever blocking the common case. +pub const DEFAULT_SPOOL_LIMIT_BYTES: i64 = 2 * 1024 * 1024 * 1024; + +/// Configuration options for a Neuracore data daemon instance. +/// +/// Every field is optional so partial profiles (e.g. a YAML file containing +/// only `offline: true`) and partial overrides round-trip cleanly. Field +/// order is fixed so that `profile get`'s JSON output is stable. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +pub struct DaemonConfig { + /// Maximum storage the daemon may use locally, in bytes. + #[serde(default, deserialize_with = "env::deserialize_optional_bytes")] + pub storage_limit: Option, + /// Maximum upload bandwidth, in bytes per second. + #[serde(default, deserialize_with = "env::deserialize_optional_bytes")] + pub bandwidth_limit: Option, + /// Cap on the producer's on-disk video spool backlog, in bytes. When the + /// un-encoded NUT backlog reaches this size the producer applies + /// backpressure to video frame logging rather than letting the spool grow + /// unbounded and fill the disk. See [`DEFAULT_SPOOL_LIMIT_BYTES`]. + #[serde(default, deserialize_with = "env::deserialize_optional_bytes")] + pub spool_limit: Option, + /// Directory where the daemon writes recording files. + pub path_to_store_record: Option, + /// Number of worker threads used by the daemon. + pub num_threads: Option, + /// Whether to keep a wakelock while uploading data. + pub keep_wakelock_while_upload: Option, + /// When true, disable uploads and only store data locally. + pub offline: Option, + /// Neuracore API key for authentication. + pub api_key: Option, + /// Organisation ID for the authenticated user. + pub current_org_id: Option, +} + +impl DaemonConfig { + /// Overlay `other`'s set fields on top of `self`. + /// + /// A field is overwritten only when `other` provides a value for it, so + /// `None` never clears an existing setting. + pub fn overlay(&mut self, other: &DaemonConfig) { + if other.storage_limit.is_some() { + self.storage_limit = other.storage_limit; + } + if other.bandwidth_limit.is_some() { + self.bandwidth_limit = other.bandwidth_limit; + } + if other.spool_limit.is_some() { + self.spool_limit = other.spool_limit; + } + if other.path_to_store_record.is_some() { + self.path_to_store_record = other.path_to_store_record.clone(); + } + if other.num_threads.is_some() { + self.num_threads = other.num_threads; + } + if other.keep_wakelock_while_upload.is_some() { + self.keep_wakelock_while_upload = other.keep_wakelock_while_upload; + } + if other.offline.is_some() { + self.offline = other.offline; + } + if other.api_key.is_some() { + self.api_key = other.api_key.clone(); + } + if other.current_org_id.is_some() { + self.current_org_id = other.current_org_id.clone(); + } + } +} + +/// Build a default daemon configuration based on local disk availability. +/// +/// Mirrors `config_manager/helpers.py::build_default_daemon_config`: the +/// recordings directory is created if missing, the storage limit is set to a +/// fraction of free disk space, and the bandwidth limit is derived from it and +/// clamped to a sane range. +pub fn build_default_daemon_config() -> std::io::Result { + let record_dir = env::recordings_root_path(); + std::fs::create_dir_all(&record_dir)?; + + let free_bytes = free_disk_bytes(&record_dir)?; + let storage_limit = (DEFAULT_STORAGE_FREE_FRACTION * free_bytes as f64) as i64; + + let raw_bandwidth = storage_limit as f64 / (DEFAULT_TARGET_DRAIN_HOURS * SECONDS_PER_HOUR); + let min_bandwidth = (DEFAULT_MIN_BANDWIDTH_MIB_S * BYTES_PER_MIB) as i64; + let max_bandwidth = (DEFAULT_MAX_BANDWIDTH_MIB_S * BYTES_PER_MIB) as i64; + let bandwidth_limit = (raw_bandwidth as i64).clamp(min_bandwidth, max_bandwidth); + + Ok(DaemonConfig { + storage_limit: Some(storage_limit), + bandwidth_limit: Some(bandwidth_limit), + spool_limit: Some(DEFAULT_SPOOL_LIMIT_BYTES), + path_to_store_record: Some(record_dir.to_string_lossy().into_owned()), + num_threads: Some(1), + keep_wakelock_while_upload: Some(false), + offline: Some(false), + api_key: None, + current_org_id: None, + }) +} + +/// Free bytes available to an unprivileged user on the filesystem holding +/// `path`. +fn free_disk_bytes(path: &Path) -> std::io::Result { + let stats = nix::sys::statvfs::statvfs(path).map_err(std::io::Error::from)?; + let blocks_available: u64 = stats.blocks_available(); + let fragment_size: u64 = stats.fragment_size(); + Ok(blocks_available * fragment_size) +} + +/// Resolve the effective daemon configuration from profile, environment, and +/// optional CLI overrides. +/// +/// Mirrors `config_manager/config.py::ConfigManager.resolve_effective_config`: +/// the named profile (or the computed default when `profile` is `None`) is the +/// base, `NCD_*` environment variables are layered on top, and CLI overrides +/// win last. +pub fn resolve_effective_config( + profiles: &ProfileManager, + profile: Option<&str>, + cli_overrides: Option<&DaemonConfig>, +) -> Result { + let mut config = profiles.get_profile(profile)?; + config.overlay(&env::env_config_overrides()); + if let Some(cli) = cli_overrides { + config.overlay(cli); + } + Ok(config) +} + +/// Resolve the effective spool-backlog cap (in bytes) for the **producer**, +/// which has no CLI args and must not trigger the directory-creating side +/// effects of the default-config build. +/// +/// Precedence mirrors [`resolve_effective_config`] minus the CLI layer: the +/// `NCD_SPOOL_LIMIT` env override wins, then the active named profile's +/// `spool_limit` (`NEURACORE_DAEMON_PROFILE`), then [`DEFAULT_SPOOL_LIMIT_BYTES`]. +/// A configured value of `0` is honoured verbatim and disables the bound. The +/// unnamed/default profile is deliberately *not* materialised here (that path +/// runs `build_default_daemon_config`, which creates the recordings dir and +/// stats the filesystem) β€” an unset profile simply falls through to the default. +pub fn resolve_spool_limit_bytes() -> i64 { + if let Some(value) = env::env_config_overrides().spool_limit { + return value; + } + if let Some(name) = env::active_profile_name() { + if let Ok(config) = ProfileManager::new().get_profile(Some(&name)) { + if let Some(value) = config.spool_limit { + return value; + } + } + } + DEFAULT_SPOOL_LIMIT_BYTES +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn overlay_only_overwrites_set_fields() { + let mut base = DaemonConfig { + storage_limit: Some(100), + offline: Some(false), + api_key: Some("base-key".to_string()), + ..DaemonConfig::default() + }; + let overrides = DaemonConfig { + storage_limit: Some(200), + offline: None, + api_key: None, + num_threads: Some(4), + ..DaemonConfig::default() + }; + + base.overlay(&overrides); + + assert_eq!(base.storage_limit, Some(200)); + assert_eq!(base.offline, Some(false)); + assert_eq!(base.api_key.as_deref(), Some("base-key")); + assert_eq!(base.num_threads, Some(4)); + } + + #[test] + fn overlay_sets_spool_limit_when_provided() { + let mut base = DaemonConfig::default(); + base.overlay(&DaemonConfig { + spool_limit: Some(1024), + ..DaemonConfig::default() + }); + assert_eq!(base.spool_limit, Some(1024)); + // A subsequent overlay without the field leaves it untouched. + base.overlay(&DaemonConfig::default()); + assert_eq!(base.spool_limit, Some(1024)); + } + + #[test] + fn json_output_keeps_python_field_order() { + let config = DaemonConfig { + storage_limit: Some(1), + bandwidth_limit: Some(2), + spool_limit: Some(3), + path_to_store_record: Some("/tmp/x".to_string()), + num_threads: Some(1), + keep_wakelock_while_upload: Some(false), + offline: Some(false), + api_key: None, + current_org_id: None, + }; + let json = serde_json::to_string_pretty(&config).unwrap(); + let keys: Vec<&str> = json + .lines() + .filter_map(|line| line.trim().strip_prefix('"')) + .filter_map(|rest| rest.split('"').next()) + .collect(); + assert_eq!( + keys, + [ + "storage_limit", + "bandwidth_limit", + "spool_limit", + "path_to_store_record", + "num_threads", + "keep_wakelock_while_upload", + "offline", + "api_key", + "current_org_id", + ] + ); + } +} diff --git a/rust/data_daemon_shared/src/config/profile.rs b/rust/data_daemon_shared/src/config/profile.rs new file mode 100644 index 000000000..8c79f8575 --- /dev/null +++ b/rust/data_daemon_shared/src/config/profile.rs @@ -0,0 +1,296 @@ +//! Daemon profile storage: load, create, update, delete, and list the YAML +//! profile files under `~/.neuracore/data_daemon/profiles/`. +//! +//! Mirrors `config_manager/profiles.py::ProfileManager`. The on-disk format is +//! YAML to match the existing layout the integration tests write directly +//! (e.g. `shared/profiles.py::scoped_offline_profile`). + +use std::fs; +use std::io::{self, Write}; +use std::path::PathBuf; + +use thiserror::Error; + +use crate::config::{build_default_daemon_config, DaemonConfig}; + +/// Errors raised while managing profiles. The `Display` strings are surfaced +/// verbatim in CLI output, so their wording is part of the CLI contract. +#[derive(Debug, Error)] +pub enum ProfileError { + /// The requested profile file does not exist. + #[error("Profile '{0}' not found.")] + NotFound(String), + /// A profile with the same name already exists. + #[error("Profile '{0}' already exists.")] + AlreadyExists(String), + /// An I/O error occurred while reading or writing a profile file. + #[error(transparent)] + Io(#[from] io::Error), + /// A profile file could not be parsed as a valid `DaemonConfig`. + #[error(transparent)] + Yaml(#[from] serde_yaml::Error), +} + +/// Manages daemon profiles stored on disk. +pub struct ProfileManager { + home: PathBuf, +} + +impl Default for ProfileManager { + fn default() -> Self { + Self::new() + } +} + +impl ProfileManager { + /// Create a `ProfileManager` rooted at the current user's home directory. + pub fn new() -> Self { + ProfileManager { + home: super::env::home_dir(), + } + } + + /// Create a `ProfileManager` rooted at an explicit home directory. + #[cfg(test)] + pub fn with_home(home: PathBuf) -> Self { + ProfileManager { home } + } + + /// Directory where daemon profiles are stored. + fn profiles_dir(&self) -> PathBuf { + self.home + .join(".neuracore") + .join("data_daemon") + .join("profiles") + } + + /// Ensure the profiles directory exists and return its path. + fn ensure_profiles_dir(&self) -> io::Result { + let profiles_dir = self.profiles_dir(); + fs::create_dir_all(&profiles_dir)?; + Ok(profiles_dir) + } + + /// Filesystem path for a named profile (creating the directory if needed). + fn profile_path(&self, profile: &str) -> io::Result { + Ok(self.ensure_profiles_dir()?.join(format!("{profile}.yaml"))) + } + + /// List available profile names, sorted, without the `.yaml` suffix. + pub fn list_profiles(&self) -> Vec { + let profiles_dir = self.profiles_dir(); + let entries = match fs::read_dir(&profiles_dir) { + Ok(entries) => entries, + Err(error) if error.kind() == io::ErrorKind::NotFound => return Vec::new(), + Err(error) => { + tracing::warn!(%error, dir = %profiles_dir.display(), "failed to read profiles directory"); + return Vec::new(); + } + }; + + let mut names: Vec = entries + .flatten() + .filter_map(|entry| { + let path = entry.path(); + if path.is_file() && path.extension().is_some_and(|ext| ext == "yaml") { + path.file_stem() + .map(|stem| stem.to_string_lossy().into_owned()) + } else { + None + } + }) + .collect(); + names.sort(); + names + } + + /// Load a profile configuration from disk. + /// + /// When `profile` is `None`, returns the computed default configuration β€” + /// matching `ProfileManager.get_profile(None)`. + pub fn get_profile(&self, profile: Option<&str>) -> Result { + let Some(name) = profile else { + return Ok(build_default_daemon_config()?); + }; + + let profile_path = self.profile_path(name)?; + let contents = match fs::read_to_string(&profile_path) { + Ok(contents) => contents, + Err(error) if error.kind() == io::ErrorKind::NotFound => { + return Err(ProfileError::NotFound(name.to_string())); + } + Err(error) => return Err(error.into()), + }; + + // An empty file parses to an all-default config. + if contents.trim().is_empty() { + return Ok(DaemonConfig::default()); + } + Ok(serde_yaml::from_str(&contents)?) + } + + /// Create a new profile populated with default configuration values. + pub fn create_profile(&self, profile: &str) -> Result<(), ProfileError> { + let profile_path = self.profile_path(profile)?; + let config = build_default_daemon_config()?; + let serialized = serde_yaml::to_string(&config)?; + + match fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&profile_path) + { + Ok(mut file) => { + file.write_all(serialized.as_bytes())?; + Ok(()) + } + Err(error) if error.kind() == io::ErrorKind::AlreadyExists => { + Err(ProfileError::AlreadyExists(profile.to_string())) + } + Err(error) => Err(error.into()), + } + } + + /// Update an existing profile by overlaying the provided field values. + /// + /// Returns the updated configuration. Fields left unset in `updates` keep + /// their existing values, matching pydantic's `model_copy(update=...)`. + pub fn update_profile( + &self, + profile: &str, + updates: &DaemonConfig, + ) -> Result { + let profile_path = self.profile_path(profile)?; + let mut config = self.get_profile(Some(profile))?; + config.overlay(updates); + + let serialized = serde_yaml::to_string(&config)?; + // Write to a sibling temp file then rename so a crash mid-write can't + // leave a truncated, unparseable profile behind (rename is atomic on the + // same filesystem). + let temp_path = profile_path.with_extension("yaml.tmp"); + fs::write(&temp_path, serialized)?; + fs::rename(&temp_path, &profile_path)?; + Ok(config) + } + + /// Delete an existing profile. + pub fn delete_profile(&self, profile: &str) -> Result<(), ProfileError> { + let profile_path = self.profile_path(profile)?; + match fs::remove_file(&profile_path) { + Ok(()) => Ok(()), + Err(error) if error.kind() == io::ErrorKind::NotFound => { + Err(ProfileError::NotFound(profile.to_string())) + } + Err(error) => Err(error.into()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// A `ProfileManager` rooted at a throwaway temp directory. + fn temp_manager() -> (tempfile::TempDir, ProfileManager) { + let home = tempfile::tempdir().expect("create temp home"); + let manager = ProfileManager::with_home(home.path().to_path_buf()); + (home, manager) + } + + #[test] + fn list_is_empty_before_any_profile_exists() { + let (_home, manager) = temp_manager(); + assert!(manager.list_profiles().is_empty()); + } + + #[test] + fn create_then_get_and_list_round_trips() { + let (_home, manager) = temp_manager(); + + manager.create_profile("alpha").expect("create alpha"); + let config = manager.get_profile(Some("alpha")).expect("get alpha"); + + assert_eq!(config.offline, Some(false)); + assert_eq!(config.num_threads, Some(1)); + assert_eq!(manager.list_profiles(), vec!["alpha".to_string()]); + } + + #[test] + fn create_rejects_duplicate_profile() { + let (_home, manager) = temp_manager(); + manager.create_profile("alpha").expect("create alpha"); + + let error = manager.create_profile("alpha").expect_err("duplicate"); + assert!(matches!(error, ProfileError::AlreadyExists(name) if name == "alpha")); + } + + #[test] + fn update_overlays_fields_and_persists() { + let (_home, manager) = temp_manager(); + manager.create_profile("alpha").expect("create alpha"); + + let updates = DaemonConfig { + storage_limit: Some(4096), + offline: Some(true), + ..DaemonConfig::default() + }; + manager.update_profile("alpha", &updates).expect("update"); + + let reloaded = manager.get_profile(Some("alpha")).expect("reload"); + assert_eq!(reloaded.storage_limit, Some(4096)); + assert_eq!(reloaded.offline, Some(true)); + // Untouched fields survive the update. + assert_eq!(reloaded.num_threads, Some(1)); + } + + #[test] + fn update_missing_profile_reports_not_found() { + let (_home, manager) = temp_manager(); + let error = manager + .update_profile("ghost", &DaemonConfig::default()) + .expect_err("missing"); + assert!(matches!(error, ProfileError::NotFound(name) if name == "ghost")); + } + + #[test] + fn get_missing_profile_reports_not_found() { + let (_home, manager) = temp_manager(); + let error = manager.get_profile(Some("ghost")).expect_err("missing"); + assert_eq!(error.to_string(), "Profile 'ghost' not found."); + } + + #[test] + fn delete_removes_profile_and_then_reports_not_found() { + let (_home, manager) = temp_manager(); + manager.create_profile("alpha").expect("create alpha"); + + manager.delete_profile("alpha").expect("delete alpha"); + assert!(manager.list_profiles().is_empty()); + + let error = manager.delete_profile("alpha").expect_err("second delete"); + assert!(matches!(error, ProfileError::NotFound(name) if name == "alpha")); + } + + #[test] + fn partial_yaml_profile_loads_with_defaults() { + let (_home, manager) = temp_manager(); + let profiles_dir = manager.ensure_profiles_dir().expect("profiles dir"); + // Matches what the integration tests write directly. + fs::write(profiles_dir.join("partial.yaml"), "offline: true\n").expect("write"); + + let config = manager.get_profile(Some("partial")).expect("load partial"); + assert_eq!(config.offline, Some(true)); + assert_eq!(config.storage_limit, None); + } + + #[test] + fn yaml_profile_accepts_unit_suffixed_byte_values() { + let (_home, manager) = temp_manager(); + let profiles_dir = manager.ensure_profiles_dir().expect("profiles dir"); + fs::write(profiles_dir.join("units.yaml"), "storage_limit: 1G\n").expect("write"); + + let config = manager.get_profile(Some("units")).expect("load units"); + assert_eq!(config.storage_limit, Some(1024 * 1024 * 1024)); + } +} diff --git a/rust/data_daemon_shared/src/lib.rs b/rust/data_daemon_shared/src/lib.rs new file mode 100644 index 000000000..61bf211c7 --- /dev/null +++ b/rust/data_daemon_shared/src/lib.rs @@ -0,0 +1,771 @@ +//! Shared definitions for the Neuracore data daemon. +//! +//! Both the daemon binary and the PyO3 producer crate +//! (`data_daemon_producer`) depend on this crate so they agree on everything +//! that crosses the process boundary: +//! +//! - the iceoryx2 service-name conventions ([`service_name`]), +//! - the [`Envelope`] enum carried over the `commands` service, and +//! - the helpers to (de)serialize that envelope to/from the byte slice payload +//! iceoryx2 transports. +//! +//! It also owns the resolution the two processes must compute identically off +//! the same inputs: the daemon configuration model ([`config`]) and the +//! filesystem layout ([`paths`]). Keeping these here is what stops the daemon +//! and producer from drifting on, say, the spool-backlog cap or the recordings +//! root. +//! +//! Envelopes are encoded with [`postcard`], a compact length-prefixed binary +//! format. Payload bytes travel raw (length-prefix + bytes β€” no base64 or +//! `[u8]β†’[i32]` expansion that JSON would force), and `f64` fields round-trip +//! bit-exact because postcard writes the IEEE-754 byte pattern directly. The +//! schema is forward-compatible: postcard's enum representation tags variants +//! with a varint-encoded u32 discriminant (one byte for the first 128 +//! variants), so new envelope variants append cleanly. +//! +//! # The thin-shipper model +//! +//! The producer is a *thin shipper*: it knows nothing about recordings. Every +//! envelope is tagged only with its **source** (`robot_id`, `robot_instance`) +//! and β€” for data β€” its **sensor** (`data_type`, `sensor_name`) and capture +//! `timestamp_ns`. The producer publishes three fire-and-forget lifecycle +//! events ([`Envelope::StartRecording`] / [`Envelope::StopRecording`] / +//! [`Envelope::CancelRecording`]) carrying the lifecycle wall-clock timestamp, +//! and the daemon decides β€” from its per-source active-window map β€” which +//! recording (if any) each datum belongs to. There is **no** `recording_id`, +//! `recording_index`, `trace_id`, or `sequence_number` on the wire; the daemon +//! assigns and stores those after routing. +//! +//! All envelopes β€” lifecycle, joints/scalars, and the chunk-ready +//! notifications for video traces β€” travel over a single `commands` service. +//! Video pixel buffers themselves are *not* on the IPC bus: the producer +//! spools them to disk as NUT chunks and announces each finished chunk with an +//! [`Envelope::VideoChunkReady`] envelope. See [`service_name`]. + +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +pub mod config; +pub mod paths; + +/// iceoryx2 service-name conventions shared by daemon and producer. +pub mod service_name { + /// Pub/sub service carrying every IPC envelope: lifecycle + /// (`start_recording`, `stop_recording`, `cancel_recording`), non-video + /// `data` / `batched_data` envelopes (joints, scalars, custom streams), + /// and the [`crate::Envelope::VideoChunkReady`] notifications that hand off + /// disk-spooled video chunks to the daemon. + /// + /// There is no longer a dedicated video service β€” the producer writes + /// pixel data straight to disk, so the IPC bus only ever carries + /// metadata-sized payloads. + pub const COMMANDS: &str = "neuracore/data_daemon/commands"; + + /// Maximum size of a single `commands`-service sample. + /// + /// All envelope payloads are now metadata-sized: non-video frames are + /// small JSON, the integration matrix's 1000-joint batch encodes to + /// ~90 KiB, and `VideoChunkReady`'s `frame_timestamps_s` vector is + /// ~30 KiB even for a 128 MiB 1080p chunk. 1 MiB leaves generous + /// headroom for the worst case. + pub const COMMANDS_MAX_PAYLOAD_BYTES: usize = 1024 * 1024; + + /// Worst-case postcard size of one frame's contribution to a + /// [`crate::Envelope::VideoChunkReady`] announcement: a `frame_timestamps_ns` + /// element is an `i64` zigzag varint (≀10 bytes for a full-range Unix-ns + /// value) and a `frame_timestamps_s` element is a fixed 8-byte `f64`. + pub const VIDEO_CHUNK_BYTES_PER_FRAME: usize = 10 + 8; + + /// Bytes held back from [`COMMANDS_MAX_PAYLOAD_BYTES`] for a + /// `VideoChunkReady` envelope's fixed fields β€” the enum tag, source ids, + /// dimensions, counts and the two vector length prefixes β€” so the frame cap + /// below is computed against only the room left for the per-frame vectors. + pub const VIDEO_CHUNK_HEADER_RESERVE: usize = 4 * 1024; + + /// Maximum number of frames a single video chunk may carry. + /// + /// The producer seals a chunk at the **lower** of its byte threshold and + /// this frame cap. The cap exists so a [`crate::Envelope::VideoChunkReady`] + /// announcement always fits one [`COMMANDS_MAX_PAYLOAD_BYTES`] sample: the + /// per-frame `frame_timestamps_{ns,s}` vectors are the only unbounded part + /// of the envelope, so a long recording of small frames β€” which never + /// reaches the byte threshold mid-recording β€” would otherwise accumulate + /// enough frames in a single chunk to overflow the slice. The announcement + /// then fails to publish and the whole recording's video is lost. Guarded + /// by `video_chunk_ready_at_frame_cap_fits_commands_slice`. + pub const MAX_VIDEO_CHUNK_FRAMES: u32 = ((COMMANDS_MAX_PAYLOAD_BYTES + - VIDEO_CHUNK_HEADER_RESERVE) + / VIDEO_CHUNK_BYTES_PER_FRAME) as u32; + + /// Subscriber buffer depth for the lifecycle service. + /// + /// Lossless, in-order delivery is *not* a function of this depth: the + /// service is opened with `enable_safe_overflow(false)`, so a full + /// buffer makes the producer's `Block` strategy wait rather than silently + /// evict the oldest sample. (Were overflow left at iceoryx2's default the + /// oldest sample would be dropped, stranding the daemon's per-source + /// routing.) The depth therefore only trades producer-blocking frequency + /// against memory. + /// + /// The depth is bounded from *above* by memory, not just throughput. + /// iceoryx2 sizes a publisher's data segment as + /// `max_subscribers Γ— (buffer + borrowed) Γ— initial_max_slice_len`, and + /// the resident footprint is `buffer Γ— actual_sample_size`. The largest + /// `commands` sample is a [`crate::Envelope::BatchedData`] envelope β€” the + /// integration matrix's 1000-joint worst case encodes to ~90 KiB β€” so a + /// 1024-deep buffer would retain ~94 MiB of pages per publisher and + /// exhaust the 64 MiB devcontainer `/dev/shm`. + /// + /// 64 keeps that worst case at ~6 MiB per publisher while staying deep + /// enough for steady state: the daemon drains every 1 ms and batched + /// joint logging emits one envelope per timestep, so the buffer never + /// fills under normal load. + pub const LIFECYCLE_SUBSCRIBER_BUFFER_SIZE: usize = 64; + + /// Maximum number of concurrent publishers per service. + /// + /// iceoryx2's default cap of 2 is unworkable for the SDK's threading + /// model: the native producer parks its iceoryx2 publisher in a + /// `thread_local!` (publishers are `!Sync`), so each Python OS thread + /// that calls into the producer builds its own. The integration matrix + /// fans up to ~32 worker threads (`parallel_contexts=8` Γ— three joint + /// roles + one RGB role) and the orchestrator thread also publishes + /// lifecycle envelopes, comfortably exceeding the default. Hitting the + /// cap surfaces as + /// `PublisherCreateError::ExceedsMaxSupportedPublishers` from + /// `publisher_builder().create()` and the SDK can't drain the trace. + /// + /// Both sides agree on this constant via `open_or_create`, so the first + /// party in (the daemon at startup) seeds the service with the larger + /// cap and the producer's later open observes the same attribute set. + pub const MAX_PUBLISHERS_PER_SERVICE: usize = 128; + + /// Maximum number of concurrent subscribers per service. + /// + /// The daemon opens exactly one subscriber per service; producers never + /// subscribe. iceoryx2 sizes every publisher's data segment as + /// `max_subscribers Γ— (buffer + borrowed) Γ— slice`, so the default of 8 + /// inflates each segment 8Γ— for subscribers that never exist. Pinning + /// this to 1 keeps the segment proportional to the real topology. + pub const MAX_SUBSCRIBERS_PER_SERVICE: usize = 1; + + /// Maximum number of concurrent iceoryx2 nodes attached to any service. + /// + /// One node is built per **thread** (the `thread_local!` PRODUCER slot in + /// the native producer). The integration matrix fans to 8 parallel worker + /// subprocesses each running 5+ threads (main + RGB + joint roles), giving + /// 40+ nodes plus the daemon. 512 gives enough headroom that the cap is + /// never approached in any test configuration. + /// + /// **Failure mode when the cap *is* reached** (a long-lived process that + /// churns through >512 distinct OS threads, each lazily building its own + /// node on first `log_*`): `open_or_create` on the service returns + /// `ExceedsMaxNumberOfNodes`. In the producer that surfaces as a + /// `ProducerError` the publish path swallows (the sample is dropped, logged + /// once) β€” data from new threads silently stops flowing; in the daemon a + /// failed attach is fatal to that service. The node count never shrinks + /// while the process lives (nodes are released only on process exit / fork), + /// so a thread-churning workload leaks toward the cap monotonically. + /// + /// The scalable fix (one node shared per process, not per thread) is tracked + /// separately and would reduce the live count to single digits and remove + /// the cliff entirely. + pub const MAX_NODES_PER_SERVICE: usize = 512; + + /// Request-response service the SDK uses to resolve a recording's + /// daemon-owned cloud `recording_id`. + /// + /// The cloud id is minted asynchronously by the start notifier, so the SDK + /// (`nc.start_recording(wait=True)`, tests) asks the daemon for it over this + /// service instead of reading the daemon's private SQLite DB directly β€” the + /// daemon answers authoritatively from its own state. A request carries the + /// source + the recording's capture marker; the reply carries the id once + /// minted (or "not yet"). See [`crate::RecordingIdQuery`] / [`crate::RecordingIdReply`]. + pub const QUERIES: &str = "neuracore/data_daemon/queries"; + + /// Maximum size of a single `queries`-service sample. Both the request and + /// the reply are a handful of UUID strings + integers; 4 KiB is generous. + pub const QUERIES_MAX_PAYLOAD_BYTES: usize = 4 * 1024; + + /// Maximum number of concurrent query clients. Mirrors + /// [`MAX_PUBLISHERS_PER_SERVICE`]: the native producer parks one client port + /// per OS thread (iceoryx2 ports are `!Sync`), so the cap must cover the + /// integration matrix's full thread fan-out. + pub const MAX_QUERY_CLIENTS_PER_SERVICE: usize = 128; + + /// Maximum number of concurrent query servers. The daemon opens exactly one. + pub const MAX_QUERY_SERVERS_PER_SERVICE: usize = 1; +} + +/// A single message exchanged between the producer and the daemon. +/// +/// Every variant is tagged with its **source** (`robot_id`, `robot_instance`). +/// Data variants additionally carry their **sensor** (`data_type`, +/// `sensor_name`) and capture `timestamp_ns`. No recording or trace identity +/// travels on the wire β€” the daemon owns it (see the crate-level docs). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum Envelope { + /// Producer announces that a recording has started for a source. + /// + /// The daemon opens an active window for `(robot_id, robot_instance)` at + /// `publish_timestamp_ns`, allocates the local `recording_index`, and + /// inserts the recording row. Processed immediately on arrival (bypasses + /// the holdback). + StartRecording { + robot_id: String, + robot_instance: i64, + /// Optional robot human-readable name. + robot_name: Option, + /// Optional dataset identifier. + dataset_id: Option, + /// Optional dataset human-readable name. + dataset_name: Option, + /// Producer wall-clock publish time (Unix nanoseconds) at which the + /// recording window opens β€” the inclusive lower bound of the window's + /// membership range, on the same publish clock as every `Data` + /// envelope. The **only** key used for window membership, so routing + /// never depends on the caller's capture clock. + publish_timestamp_ns: i64, + /// Caller-supplied capture time (Unix nanoseconds) for the recording's + /// start β€” the recording's *own* clock, or the publish time when the + /// caller supplied none. Stored as the row's `start_timestamp_ns` and + /// POSTed to the backend as `start_time`; never used for routing. + timestamp_ns: i64, + }, + /// Producer announces that the source's active recording has stopped. + /// + /// The daemon sets the window's exclusive upper bound and begins the + /// drain/finalise countdown. Processed immediately on arrival. + StopRecording { + robot_id: String, + robot_instance: i64, + /// Producer wall-clock publish time (Unix nanoseconds) at which the + /// recording window closes β€” the exclusive upper bound of the + /// membership range, on the same publish clock as the data envelopes. + publish_timestamp_ns: i64, + /// Caller-supplied capture time (Unix nanoseconds) for the recording's + /// stop β€” or the publish time when the caller supplied none. Stored as + /// the row's `stop_timestamp_ns` and POSTed to the backend as + /// `end_time`; never used for routing. + timestamp_ns: i64, + }, + /// Producer cancels the source's active recording β€” the daemon drops every + /// in-flight per-trace actor, deletes the on-disk artefacts, marks the + /// recording row cancelled, and uploads nothing. Processed immediately on + /// arrival; the daemon is idempotent. + CancelRecording { + robot_id: String, + robot_instance: i64, + /// Caller-supplied capture time (Unix nanoseconds) for the cancel β€” or + /// the publish time when the caller supplied none. A cancel is a + /// recording stop that discards data, so the daemon stores this as the + /// row's `stop_timestamp_ns` and POSTs it as the backend `end_time`, + /// exactly like `StopRecording`. No window-boundary `publish_timestamp_ns` + /// is carried because cancelling drops the window outright. + timestamp_ns: i64, + }, + /// Producer delivers one sensor sample. + /// + /// The payload is opaque to the IPC layer; the per-trace actor parses it + /// according to `data_type` and writes it through the JSON writer. The + /// daemon holds the datum for the configured holdback, then routes it into + /// the source's window whose `[started_at_ns, stopped_at_ns)` contains + /// `timestamp_ns`. + /// + /// Video frames do *not* travel as `Data` envelopes β€” they are spooled to + /// disk by the producer and announced via [`Envelope::VideoChunkReady`] + /// instead. + Data { + robot_id: String, + robot_instance: i64, + /// Wire data-type label (e.g. `"JOINT_POSITIONS"`, `"RGB_IMAGES"`). + data_type: String, + /// Per-stream sensor label (joint name, camera id, …) β€” disambiguates + /// traces that share a `data_type`. Persisted to the trace row's + /// `data_type_name` column. + sensor_name: Option, + /// Producer wall-clock time (Unix nanoseconds) stamped at the moment + /// this envelope is published. This is the **only** key used for + /// window membership β€” it is decoupled from the data's own capture + /// time, so the daemon's routing never depends on what clock the + /// caller timestamps data with. Lifecycle events carry the same kind + /// of publish-clock timestamp, so a datum belongs to the window whose + /// `[started_at_ns, stopped_at_ns)` brackets its publish time. + publish_timestamp_ns: i64, + /// Caller-supplied capture time in nanoseconds since the Unix epoch β€” + /// the data's *own* clock, written into the trace content. Not used + /// for routing. + timestamp_ns: i64, + /// Optional caller-supplied capture time in seconds (f64). Postcard + /// writes this bit-exact. + timestamp_s: Option, + /// Opaque per-sample bytes. Postcard transports these as + /// length-prefix + raw bytes (no expansion). + payload: Vec, + }, + /// Producer delivers one sample for each of several sensors captured at the + /// same instant β€” used by scalar joint logging, where a robot's N joints + /// are sampled together. + /// + /// Collapsing N [`Envelope::Data`] envelopes into one IPC message cuts the + /// per-call iceoryx2 publish count (and the pressure on the lifecycle + /// buffer) by a factor of N. Because every item shares the batch's + /// `timestamp_ns`, the whole batch belongs to one window β€” the daemon + /// holds and routes it as a single unit. + BatchedData { + robot_id: String, + robot_instance: i64, + /// Wire data-type label shared by every item (e.g. `"JOINT_POSITIONS"`). + /// A batch is one `log_*` call for a single sensor group, so the type + /// is constant across the batch β€” carried once here rather than + /// duplicated into every [`BatchedDataItem`] (which, for the 1000-joint + /// worst case, was ~16% of the envelope's wire size). + data_type: String, + /// Producer wall-clock publish time (Unix nanoseconds), shared by every + /// item. The sole key for window membership (see [`Envelope::Data`]). + publish_timestamp_ns: i64, + /// Caller-supplied capture time (ns), shared by every item β€” content, + /// not routing. + timestamp_ns: i64, + /// Optional caller-supplied capture time in seconds, shared by every + /// item. + timestamp_s: Option, + /// Per-sensor samples; each routes to one trace actor. + items: Vec, + }, + /// Producer announces a finished NUT chunk for a video trace. + /// + /// The producer spools captured RGB frames to disk as a sequence of NUT + /// chunks under a recording-independent spool dir keyed by source + sensor, + /// each named `chunk_{spool_ns}_{thread_id}.nut` so two recordings on the + /// same source never collide on a filename. When a chunk crosses the flush + /// threshold (or a lifecycle event rolls it) the producer finishes the NUT + /// and publishes this envelope so the daemon can route the chunk into the + /// right recording window (by `publish_timestamp_ns`), relink the NUT under + /// the recording, and encode it to a sealed MP4 segment. Per-frame `timestamp_s` values are + /// carried inline so the daemon-side `trace.json` sidecar matches the + /// bit-exact assertion. + VideoChunkReady { + robot_id: String, + robot_instance: i64, + /// Wire data-type label (e.g. `"RGB_IMAGES"`). + data_type: String, + /// Per-stream sensor label (camera id). + sensor_name: Option, + /// Producer wall-clock ns stamped when the chunk's NUT file was opened + /// (its first frame). Serves two purposes: it is the key that routes + /// the whole chunk into a recording window β€” the open moment lies + /// strictly inside the recording, so membership is unambiguous β€” and, + /// with `thread_id`, it forms the chunk's spool filename + /// `chunk_{publish_timestamp_ns}_{thread_id}.nut` so the daemon can + /// reconstruct the spool path. + publish_timestamp_ns: i64, + /// OS thread id (`gettid`) of the producer thread that spooled the + /// chunk. Disambiguates the spool filename across threads and is a + /// useful breadcrumb when inspecting the spool directory. + thread_id: i64, + /// Frame width in pixels (constant across a trace). + width: u32, + /// Frame height in pixels (constant across a trace). + height: u32, + /// Size of the NUT file in bytes. + byte_count: u64, + /// Number of frames packed into this chunk. + frame_count: u32, + /// Per-frame capture time in nanoseconds since the Unix epoch, in + /// arrival order. Length equals `frame_count`. Used to bucket the + /// chunk's frames against the source's active-window map. + frame_timestamps_ns: Vec, + /// Per-frame `timestamp_s` (Unix seconds, f64) in arrival order. + /// Length equals `frame_count`; values round-trip bit-exact through + /// postcard for the metadata sidecar. + frame_timestamps_s: Vec, + }, +} + +/// One sensor's sample inside an [`Envelope::BatchedData`] batch. +/// +/// Carries only the fields that differ between items β€” `data_type`, +/// `timestamp_ns` and `timestamp_s` are hoisted onto the parent envelope +/// because every sensor in a batch shares them (one `log_*` call, one sensor +/// group, one capture instant). Each item self-tags its `sensor_name` because +/// there is no pre-registered trace to look up. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct BatchedDataItem { + /// Per-stream sensor label (joint name, …). + pub sensor_name: Option, + /// Opaque per-sample bytes. Transported length-prefix + raw, exactly as + /// [`Envelope::Data`]'s `payload`. + pub payload: Vec, +} + +impl Envelope { + /// Variant name used in tracing/logging. + pub fn kind(&self) -> &'static str { + match self { + Envelope::StartRecording { .. } => "start_recording", + Envelope::StopRecording { .. } => "stop_recording", + Envelope::CancelRecording { .. } => "cancel_recording", + Envelope::Data { .. } => "data", + Envelope::BatchedData { .. } => "batched_data", + Envelope::VideoChunkReady { .. } => "video_chunk_ready", + } + } + + /// Encode the envelope as a postcard byte vector ready for an iceoryx2 + /// sample. + pub fn encode(&self) -> Result, EnvelopeCodecError> { + encode_postcard(self) + } + + /// Decode an envelope from the byte slice carried in an iceoryx2 sample. + pub fn decode(bytes: &[u8]) -> Result { + decode_postcard(bytes) + } +} + +/// Encode a wire type as a postcard byte vector. +fn encode_postcard(value: &T) -> Result, EnvelopeCodecError> { + postcard::to_allocvec(value).map_err(EnvelopeCodecError::Encode) +} + +/// Decode a wire type from a postcard byte slice. +fn decode_postcard(bytes: &[u8]) -> Result { + postcard::from_bytes(bytes).map_err(EnvelopeCodecError::Decode) +} + +/// Errors raised while encoding or decoding an [`Envelope`]. +#[derive(Debug, Error)] +pub enum EnvelopeCodecError { + /// Failed to serialize the envelope. + #[error("failed to encode envelope: {0}")] + Encode(#[source] postcard::Error), + /// Failed to deserialize the envelope. + #[error("failed to decode envelope: {0}")] + Decode(#[source] postcard::Error), +} + +/// Request sent by the SDK on the [`service_name::QUERIES`] service to resolve a +/// recording's daemon-owned cloud `recording_id`. +/// +/// The recording is identified exactly the way the daemon stored it: the +/// `(robot_id, robot_instance)` source plus `timestamp_ns` β€” the producer's +/// capture marker returned by `start_recording`, persisted verbatim as the +/// recording row's `start_timestamp_ns`. Matching on the marker (not `<=`) +/// resolves precisely that recording, never an earlier one for the same source. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct RecordingIdQuery { + pub robot_id: String, + pub robot_instance: i64, + /// The recording's capture marker (Unix nanoseconds). + pub timestamp_ns: i64, +} + +/// Reply to a [`RecordingIdQuery`]. +/// +/// `recording_id` is `None` while the start notifier has not yet minted the +/// cloud id (or no matching, non-cancelled recording exists); the SDK re-asks +/// until it is `Some` or its own timeout elapses. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct RecordingIdReply { + /// The daemon-owned cloud recording id, once available. + pub recording_id: Option, +} + +impl RecordingIdQuery { + /// Encode as a postcard byte vector for a `queries`-service request sample. + pub fn encode(&self) -> Result, EnvelopeCodecError> { + encode_postcard(self) + } + + /// Decode from the byte slice carried in a `queries`-service request sample. + pub fn decode(bytes: &[u8]) -> Result { + decode_postcard(bytes) + } +} + +impl RecordingIdReply { + /// Encode as a postcard byte vector for a `queries`-service response sample. + pub fn encode(&self) -> Result, EnvelopeCodecError> { + encode_postcard(self) + } + + /// Decode from the byte slice carried in a `queries`-service response sample. + pub fn decode(bytes: &[u8]) -> Result { + decode_postcard(bytes) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn start_recording_round_trips_through_postcard() { + let original = Envelope::StartRecording { + robot_id: "robot-1".into(), + robot_instance: 3, + robot_name: Some("arm".into()), + dataset_id: Some("ds-1".into()), + dataset_name: Some("warehouse".into()), + publish_timestamp_ns: 1_700_000_000_000_000_000, + timestamp_ns: 1_700_000_000_000_000_000, + }; + let bytes = original.encode().expect("encode"); + let decoded = Envelope::decode(&bytes).expect("decode"); + assert_eq!(original, decoded); + assert_eq!(original.kind(), "start_recording"); + } + + #[test] + fn data_envelope_preserves_payload_bytes() { + let original = Envelope::Data { + robot_id: "robot-1".into(), + robot_instance: 0, + data_type: "JOINT_POSITIONS".into(), + sensor_name: Some("waist".into()), + publish_timestamp_ns: 1_700_000_000_000_000_000, + timestamp_ns: 1_000_000, + timestamp_s: None, + payload: vec![1, 2, 3, 4, 5, 6], + }; + let bytes = original.encode().expect("encode"); + let decoded = Envelope::decode(&bytes).expect("decode"); + assert_eq!(original, decoded); + assert_eq!(original.kind(), "data"); + } + + #[test] + fn data_timestamp_s_is_bit_exact_over_postcard_wire() { + // Postcard writes `f64` as 8 raw IEEE-754 bytes, so values that + // would shift under a decimal parser (e.g. `7/60`) round-trip + // bit-identically β€” required for the integration matrix's + // exact-match assertion on the video sidecar timestamps. + let original = Envelope::Data { + robot_id: "robot-1".into(), + robot_instance: 0, + data_type: "RGB_IMAGES".into(), + sensor_name: Some("camera_right".into()), + publish_timestamp_ns: 1_700_000_000_000_000_000, + timestamp_ns: 116_666_666, + timestamp_s: Some(7.0_f64 / 60.0_f64), + payload: vec![0xAA, 0xBB], + }; + let bytes = original.encode().expect("encode"); + let decoded = Envelope::decode(&bytes).expect("decode"); + assert_eq!(original, decoded); + if let Envelope::Data { timestamp_s, .. } = decoded { + assert_eq!( + timestamp_s.map(f64::to_bits), + Some((7.0_f64 / 60.0_f64).to_bits()), + ); + } else { + panic!("decoded envelope was not Data"); + } + } + + #[test] + fn data_payload_does_not_expand_under_postcard() { + // The whole point of moving off JSON is that `Vec` no longer + // expands ~3Γ— as a JSON array of integers. Encode a 1 MiB payload + // and check the wire form is within a small constant of the raw + // bytes (variant tag + length prefix + source/sensor + timestamps). + const PAYLOAD_LEN: usize = 1024 * 1024; + let original = Envelope::Data { + robot_id: "robot-1".into(), + robot_instance: 0, + data_type: "RGB_IMAGES".into(), + sensor_name: None, + publish_timestamp_ns: 0, + timestamp_ns: 0, + timestamp_s: None, + payload: vec![0xAB; PAYLOAD_LEN], + }; + let bytes = original.encode().expect("encode"); + assert!( + bytes.len() <= PAYLOAD_LEN + 4096, + "postcard wire form ({} bytes) is too far from raw payload ({} bytes)", + bytes.len(), + PAYLOAD_LEN, + ); + assert!( + bytes.len() >= PAYLOAD_LEN, + "wire form must contain the raw bytes" + ); + } + + #[test] + fn batched_data_round_trips() { + let original = Envelope::BatchedData { + robot_id: "robot-1".into(), + robot_instance: 0, + data_type: "JOINT_POSITIONS".into(), + publish_timestamp_ns: 1_700_000_000_000_000_000, + timestamp_ns: 1_700_000_000_000_000_000, + timestamp_s: Some(1_700_000_000.5), + items: vec![ + BatchedDataItem { + sensor_name: Some("joint-0".into()), + payload: br#"{"timestamp":1.0,"value":0.5}"#.to_vec(), + }, + BatchedDataItem { + sensor_name: Some("joint-1".into()), + payload: br#"{"timestamp":1.0,"value":-0.25}"#.to_vec(), + }, + ], + }; + let bytes = original.encode().expect("encode"); + let decoded = Envelope::decode(&bytes).expect("decode"); + assert_eq!(original, decoded); + assert_eq!(original.kind(), "batched_data"); + } + + #[test] + fn batched_data_worst_case_fits_commands_slice() { + // The integration matrix's high-dimensionality case logs 1000 joints + // per call. Each joint payload is a small `{"timestamp":..,"value":..}` + // JSON object plus a data_type label and sensor name; the whole batch + // must fit inside a single `commands` sample so the producer can + // publish it in one go. + let items: Vec = (0..1000) + .map(|index| BatchedDataItem { + sensor_name: Some(format!("vx300s_left_joint_{index:04}")), + payload: br#"{"timestamp":1747740000.1234567,"value":-1.234567890123}"#.to_vec(), + }) + .collect(); + let envelope = Envelope::BatchedData { + robot_id: "11111111-2222-3333-4444-555555555555".into(), + robot_instance: 0, + data_type: "JOINT_POSITIONS".into(), + publish_timestamp_ns: 1_747_740_000_123_456_700, + timestamp_ns: 1_747_740_000_123_456_700, + timestamp_s: Some(1_747_740_000.123_456_7), + items, + }; + let bytes = envelope.encode().expect("encode"); + assert!( + bytes.len() <= service_name::COMMANDS_MAX_PAYLOAD_BYTES, + "1000-joint batch ({} bytes) must fit the commands slice ({} bytes)", + bytes.len(), + service_name::COMMANDS_MAX_PAYLOAD_BYTES, + ); + } + + #[test] + fn stop_and_cancel_round_trip() { + let stop = Envelope::StopRecording { + robot_id: "robot-1".into(), + robot_instance: 2, + publish_timestamp_ns: 1_700_000_000_000_000_000, + timestamp_ns: 1_700_000_000_000_000_000, + }; + let bytes = stop.encode().expect("encode"); + assert_eq!(stop, Envelope::decode(&bytes).expect("decode")); + assert_eq!(stop.kind(), "stop_recording"); + + let cancel = Envelope::CancelRecording { + robot_id: "robot-1".into(), + robot_instance: 2, + timestamp_ns: 1_700_000_000_000_000_000, + }; + let bytes = cancel.encode().expect("encode"); + assert_eq!(cancel, Envelope::decode(&bytes).expect("decode")); + assert_eq!(cancel.kind(), "cancel_recording"); + } + + #[test] + fn video_chunk_ready_round_trips() { + let original = Envelope::VideoChunkReady { + robot_id: "robot-1".into(), + robot_instance: 0, + data_type: "RGB_IMAGES".into(), + sensor_name: Some("camera_right".into()), + publish_timestamp_ns: 1_700_000_000_000_000_000, + thread_id: 4242, + width: 1920, + height: 1080, + byte_count: 128 * 1024 * 1024, + frame_count: 4, + frame_timestamps_ns: vec![ + 1_700_000_000_000_000_000, + 1_700_000_000_016_666_700, + 1_700_000_000_033_333_300, + 1_700_000_000_050_000_000, + ], + frame_timestamps_s: vec![ + 1_700_000_000.0, + 1_700_000_000.016_666_7, + 1_700_000_000.033_333_3, + 7.0_f64 / 60.0_f64, + ], + }; + let bytes = original.encode().expect("encode"); + let decoded = Envelope::decode(&bytes).expect("decode"); + assert_eq!(original, decoded); + assert_eq!(original.kind(), "video_chunk_ready"); + } + + #[test] + fn video_chunk_ready_worst_case_fits_commands_slice() { + // A 128 MiB 1080p chunk holds ~3800 frames; carry two timestamps per + // frame (ns + s). Even at 10_000 frames the envelope is comfortably + // under COMMANDS_MAX_PAYLOAD_BYTES. + let frame_timestamps_ns: Vec = (0..10_000).map(|i| i as i64 * 1_000_000).collect(); + let frame_timestamps_s: Vec = (0..10_000).map(|i| i as f64 * 1e-3).collect(); + let envelope = Envelope::VideoChunkReady { + robot_id: "11111111-2222-3333-4444-555555555555".into(), + robot_instance: 0, + data_type: "RGB_IMAGES".into(), + sensor_name: Some("camera_right".into()), + publish_timestamp_ns: 1_700_000_000_000_000_000, + thread_id: 42, + width: 1920, + height: 1080, + byte_count: 128 * 1024 * 1024, + frame_count: frame_timestamps_ns.len() as u32, + frame_timestamps_ns, + frame_timestamps_s, + }; + let bytes = envelope.encode().expect("encode"); + assert!( + bytes.len() <= service_name::COMMANDS_MAX_PAYLOAD_BYTES, + "10k-frame chunk envelope ({} bytes) must fit the commands slice ({} bytes)", + bytes.len(), + service_name::COMMANDS_MAX_PAYLOAD_BYTES, + ); + } + + #[test] + fn video_chunk_ready_at_frame_cap_fits_commands_slice() { + // The producer caps a chunk at MAX_VIDEO_CHUNK_FRAMES frames so its + // announcement always fits one commands sample. Prove the cap holds at + // the absolute worst case: every per-frame ns timestamp a full-range + // i64 (10-byte postcard zigzag varint) and every fixed field maxed out. + // Without the cap a long recording of tiny frames overflows the slice + // and the whole recording's video announcement fails to publish. + let count = service_name::MAX_VIDEO_CHUNK_FRAMES as usize; + let frame_timestamps_ns: Vec = (0..count).map(|i| i64::MAX - i as i64).collect(); + let frame_timestamps_s: Vec = (0..count).map(|i| i as f64).collect(); + let envelope = Envelope::VideoChunkReady { + robot_id: "11111111-2222-3333-4444-555555555555".into(), + robot_instance: i64::MAX, + data_type: "RGB_IMAGES".into(), + sensor_name: Some("camera_with_a_deliberately_long_sensor_label".into()), + publish_timestamp_ns: i64::MAX, + thread_id: i64::MAX, + width: u32::MAX, + height: u32::MAX, + byte_count: u64::MAX, + frame_count: count as u32, + frame_timestamps_ns, + frame_timestamps_s, + }; + let bytes = envelope.encode().expect("encode"); + assert!( + bytes.len() <= service_name::COMMANDS_MAX_PAYLOAD_BYTES, + "chunk at frame cap ({count} frames, {} bytes) must fit the commands slice ({} bytes)", + bytes.len(), + service_name::COMMANDS_MAX_PAYLOAD_BYTES, + ); + } +} diff --git a/rust/data_daemon_shared/src/paths.rs b/rust/data_daemon_shared/src/paths.rs new file mode 100644 index 000000000..77c36adcf --- /dev/null +++ b/rust/data_daemon_shared/src/paths.rs @@ -0,0 +1,181 @@ +//! Filesystem-path resolution shared by the daemon and the producer. +//! +//! The producer runs in a *separate* process from the daemon, yet both must +//! agree on where recordings live: the producer spools NUT chunks under the +//! recordings root and the daemon encodes them from the same place. Rather than +//! each maintaining its own copy of the "env override β†’ db-sibling β†’ +//! `~/.neuracore` default" precedence (which can silently drift), both call the +//! resolvers here β€” so the two processes are guaranteed to compute the same +//! paths from the same inputs. +//! +//! Resolution is fallible: when a path can only come from the home directory and +//! the home directory cannot be determined (e.g. a headless container with no +//! `$HOME`), the caller gets a [`HomeDirUnavailable`] error to surface +//! appropriately β€” the daemon exits at startup, the producer raises a Python +//! exception β€” instead of panicking or silently falling back to a scratch dir +//! the other process would never look in. + +use std::path::{Path, PathBuf}; + +use thiserror::Error; + +/// Env var overriding the recordings root (highest precedence). +pub const RECORDINGS_ROOT_ENV: &str = "NEURACORE_DAEMON_RECORDINGS_ROOT"; + +/// Env var overriding the SQLite DB path; the recordings root defaults to its +/// `recordings` sibling. +pub const DB_PATH_ENV: &str = "NEURACORE_DAEMON_DB_PATH"; + +/// Raised when a path can only be resolved from the home directory and the home +/// directory cannot be determined. +#[derive(Debug, Error)] +#[error( + "could not determine the user's home directory; \ + set {RECORDINGS_ROOT_ENV} (or {DB_PATH_ENV}) to an absolute path" +)] +pub struct HomeDirUnavailable; + +/// An env var's value, or `None` when unset or empty (an empty override is +/// treated as "unset" so a blank var doesn't resolve to an empty path). +fn non_empty_env(name: &str) -> Option { + std::env::var(name).ok().filter(|value| !value.is_empty()) +} + +/// The user's home directory, or [`HomeDirUnavailable`]. +fn home_dir() -> Result { + dirs::home_dir().ok_or(HomeDirUnavailable) +} + +/// Expand a leading `~` or `~/…` against the home directory; `~user` forms are +/// left unchanged. +fn expand_user(path: &str) -> Result { + if let Some(stripped) = path.strip_prefix("~/") { + return Ok(home_dir()?.join(stripped)); + } + if path == "~" { + return home_dir(); + } + Ok(PathBuf::from(path)) +} + +/// Resolve the daemon SQLite database path: [`DB_PATH_ENV`] (with `~` +/// expansion) or `~/.neuracore/data_daemon/state.db`. +pub fn db_path() -> Result { + match non_empty_env(DB_PATH_ENV) { + Some(value) => expand_user(&value), + None => Ok(home_dir()? + .join(".neuracore") + .join("data_daemon") + .join("state.db")), + } +} + +/// Resolve the recordings root: [`RECORDINGS_ROOT_ENV`] if set, otherwise the +/// `recordings` sibling of [`db_path`]. Identical for the daemon and producer. +pub fn recordings_root() -> Result { + if let Some(value) = non_empty_env(RECORDINGS_ROOT_ENV) { + return Ok(PathBuf::from(value)); + } + let db_path = db_path()?; + Ok(db_path + .parent() + .unwrap_or_else(|| Path::new(".")) + .join("recordings")) +} + +/// Sum the byte count of every regular file beneath `root`, recursively. +/// +/// Returns 0 when `root` does not exist (the expected state before the +/// recordings tree is created) and silently skips entries it cannot `stat`. +/// Symlinks are neither followed nor counted, so the walk cannot cycle. +/// +/// Shared because two callers need the same number from the same tree: the +/// daemon's storage budget sums the recordings root, and the producer's writer +/// sums its spool inbox to enforce the backlog cap. +pub fn directory_bytes(root: &Path) -> u64 { + let mut total: u64 = 0; + let mut stack = vec![root.to_path_buf()]; + while let Some(dir) = stack.pop() { + let entries = match std::fs::read_dir(&dir) { + Ok(entries) => entries, + Err(_) => continue, + }; + for entry in entries.flatten() { + match entry.file_type() { + Ok(file_type) if file_type.is_dir() => stack.push(entry.path()), + Ok(file_type) if file_type.is_file() => { + if let Ok(metadata) = entry.metadata() { + total = total.saturating_add(metadata.len()); + } + } + _ => {} + } + } + } + total +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn directory_bytes_sums_nested_files_and_ignores_missing_roots() { + let tempdir = TempDir::new().unwrap(); + let root = tempdir.path().join("recordings"); + + // Missing root: zero, no error. + assert_eq!(directory_bytes(&root), 0); + + let nested = root.join("source").join("sensor"); + std::fs::create_dir_all(&nested).unwrap(); + std::fs::write(nested.join("chunk.nut"), vec![0u8; 1024]).unwrap(); + std::fs::write(root.join("top.bin"), vec![0u8; 32]).unwrap(); + + assert_eq!(directory_bytes(&root), 1024 + 32); + } + + // These tests mutate process-wide env vars, so they must not run + // concurrently with each other; a single test drives the whole matrix. + #[test] + fn resolution_precedence() { + let saved_root = std::env::var_os(RECORDINGS_ROOT_ENV); + let saved_db = std::env::var_os(DB_PATH_ENV); + + // Explicit recordings-root override wins outright. + std::env::set_var(RECORDINGS_ROOT_ENV, "/data/records"); + std::env::set_var(DB_PATH_ENV, "/var/lib/ncd/state.db"); + assert_eq!(recordings_root().unwrap(), PathBuf::from("/data/records")); + + // Empty override is treated as unset β†’ falls through to the db sibling. + std::env::set_var(RECORDINGS_ROOT_ENV, ""); + assert_eq!( + recordings_root().unwrap(), + PathBuf::from("/var/lib/ncd/recordings") + ); + assert_eq!(db_path().unwrap(), PathBuf::from("/var/lib/ncd/state.db")); + + // Restore the environment for other tests. + match saved_root { + Some(value) => std::env::set_var(RECORDINGS_ROOT_ENV, value), + None => std::env::remove_var(RECORDINGS_ROOT_ENV), + } + match saved_db { + Some(value) => std::env::set_var(DB_PATH_ENV, value), + None => std::env::remove_var(DB_PATH_ENV), + } + } + + #[test] + fn expand_user_only_touches_leading_tilde() { + assert_eq!( + expand_user("/abs/path").unwrap(), + PathBuf::from("/abs/path") + ); + assert_eq!( + expand_user("rel/~/path").unwrap(), + PathBuf::from("rel/~/path") + ); + } +}