Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 53 additions & 20 deletions dev-tools/omdb/src/bin/omdb/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5327,7 +5327,7 @@ async fn cmd_db_instance_info(

let table = tabled::Table::new(vmms.iter().map(|vmm| {
let &Vmm {
id,
id: _,
sled_id,
propolis_ip: _,
propolis_port: _,
Expand All @@ -5336,15 +5336,12 @@ async fn cmd_db_instance_info(
time_created,
time_deleted,
time_state_updated: _,
generation,
state,
generation: _,
state: _,
failure_reason: _,
} = vmm;
VmmRow {
state: VmmStateRow {
id,
state,
generation: generation.0.into(),
},
state: VmmStateRow::from(vmm),
sled_id: sled_id.into(),
time_created,
time_deleted,
Expand All @@ -5365,10 +5362,23 @@ async fn cmd_db_instance_info(
struct VmmStateRow {
id: Uuid,
state: db::model::VmmState,
#[tabled(display_with = "display_option_blank")]
failure_reason: Option<db::model::VmmFailureReason>,
#[tabled(rename = "GEN")]
generation: u64,
}

impl From<&'_ db::model::Vmm> for VmmStateRow {
fn from(vmm: &db::model::Vmm) -> Self {
Self {
id: vmm.id,
state: vmm.state,
failure_reason: vmm.failure_reason,
generation: vmm.generation.0.into(),
}
}
}

/// Common fields extracted from an InstanceAndActiveVmm, shared by
/// both `CustomerInstanceRow` and `SledInstanceRow`.
struct InstanceFields {
Expand Down Expand Up @@ -7965,6 +7975,8 @@ fn prettyprint_vmm(
const CPU_PLATFORM: &'static str = "CPU platform";
const ADDRESS: &'static str = "propolis address";
const STATE: &'static str = "state";
const FAILURE_REASON: &'static str = " failure reason";
const FAILURE_NOTE: &'static str = " note";
const WIDTH: usize = const_max_len(&[
ID,
CREATED,
Expand All @@ -7976,6 +7988,8 @@ fn prettyprint_vmm(
CPU_PLATFORM,
STATE,
ADDRESS,
FAILURE_REASON,
FAILURE_NOTE,
]);

let width = std::cmp::max(width, Some(WIDTH)).unwrap_or(WIDTH);
Expand All @@ -7991,6 +8005,7 @@ fn prettyprint_vmm(
state,
generation,
time_state_updated,
failure_reason,
} = vmm;

println!("{indent}{ID:>width$}: {id}");
Expand All @@ -8002,6 +8017,31 @@ fn prettyprint_vmm(
println!("{indent}{DELETED:width$}: {deleted}");
}
println!("{indent}{STATE:>width$}: {state}");
if let Some(reason) = failure_reason {
println!("{indent}{FAILURE_REASON:>width$}: {reason}");

if state == &db::model::VmmState::Failed {
println!(
"{indent}{FAILURE_NOTE:>width$}: {}",
reason.description()
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this description going to be different from the to_string() impl above (on line 8021)?

Copy link
Copy Markdown
Member Author

@hawkw hawkw May 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

description() provides a human-readable sentence describing what this reason means, while the to_string() impl returns the same string used as the database enum value. I wanted to include both here, so that you can see both the actual value in the DB and a more helpful explanation of what that means.

);
} else {
println!(
"{:<width$}weird: VMMs should only have non-NULL failure \
reasons if they are in the failed state",
"/!\\",
width = indent.len(),
);
}
} else if state == &db::model::VmmState::Failed {
println!(
"{:<width$}weird: VMMs in the 'failed' state should have a \
non-NULL failure reason",
"/!\\",
width = indent.len(),
);
}

let g = u64::from(generation.0);
println!(
"{indent}{UPDATED:>width$}: {time_state_updated:?} (generation {g})"
Expand Down Expand Up @@ -8085,7 +8125,7 @@ async fn cmd_db_vmm_list(
impl<'a> From<&'a (Vmm, Option<Sled>)> for VmmRow<'a> {
fn from((vmm, sled): &'a (Vmm, Option<Sled>)) -> Self {
let &Vmm {
id,
id: _,
time_created: _,
time_deleted: _,
instance_id,
Expand All @@ -8094,8 +8134,9 @@ async fn cmd_db_vmm_list(
propolis_port: _,
cpu_platform: _,
time_state_updated: _,
generation,
state,
generation: _,
state: _,
failure_reason: _,
} = vmm;
let sled = match sled {
Some(sled) => sled.serial_number(),
Expand All @@ -8104,15 +8145,7 @@ async fn cmd_db_vmm_list(
"<unknown>"
}
};
VmmRow {
instance_id,
state: VmmStateRow {
id,
state,
generation: generation.0.into(),
},
sled,
}
VmmRow { instance_id, state: VmmStateRow::from(vmm), sled }
}
}

Expand Down
2 changes: 2 additions & 0 deletions nexus/db-model/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ mod virtual_provisioning_collection;
mod virtual_provisioning_resource;
mod vmm;
mod vmm_cpu_platform;
mod vmm_failure_reason;
mod vni;
mod volume;
mod volume_repair;
Expand Down Expand Up @@ -386,6 +387,7 @@ pub use virtual_provisioning_collection::*;
pub use virtual_provisioning_resource::*;
pub use vmm::*;
pub use vmm_cpu_platform::*;
pub use vmm_failure_reason::*;
pub use vmm_state::*;
pub use vni::*;
pub use volume::*;
Expand Down
3 changes: 2 additions & 1 deletion nexus/db-model/src/schema_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock};
///
/// This must be updated when you change the database schema. Refer to
/// schema/crdb/README.adoc in the root of this repository for details.
pub const SCHEMA_VERSION: Version = Version::new(258, 0, 0);
pub const SCHEMA_VERSION: Version = Version::new(259, 0, 0);

/// List of all past database schema versions, in *reverse* order
///
Expand All @@ -28,6 +28,7 @@ pub static KNOWN_VERSIONS: LazyLock<Vec<KnownVersion>> = LazyLock::new(|| {
// | leaving the first copy as an example for the next person.
// v
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
KnownVersion::new(259, "vmm-failure-reason"),
KnownVersion::new(258, "lookup-unmarked-ereports-by-class"),
KnownVersion::new(257, "add-disk-adoption-requests"),
KnownVersion::new(256, "bgp-unnumbered-peer-cleanup"),
Expand Down
84 changes: 30 additions & 54 deletions nexus/db-model/src/vmm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

use super::{Generation, VmmState};
use crate::typed_uuid::DbTypedUuid;
use crate::{SqlU16, VmmCpuPlatform};
use crate::{SqlU16, VmmCpuPlatform, VmmFailureReason};
use chrono::{DateTime, Utc};
use nexus_db_schema::schema::vmm;
use omicron_uuid_kinds::*;
Expand Down Expand Up @@ -72,6 +72,10 @@ pub struct Vmm {
/// control plane if this VMM's instance didn't specify a required platform
/// when it was started.
pub cpu_platform: VmmCpuPlatform,

/// If this VMM is in the `Failed` state, this field describes why it
/// failed. This is `None` for VMMs that are not in the `Failed` state.
pub failure_reason: Option<VmmFailureReason>,
}

impl Vmm {
Expand Down Expand Up @@ -101,66 +105,38 @@ impl Vmm {
propolis_port: SqlU16(propolis_port),
state: VmmState::Creating,
cpu_platform,
failure_reason: None,
}
}

/// Returns the runtime state of this VMM.
pub fn runtime(&self) -> VmmRuntimeState {
VmmRuntimeState {
time_state_updated: self.time_state_updated,
generation: self.generation,
state: self.state,
pub fn runtime(&self) -> nexus_types::instance::VmmRuntimeState {
use nexus_types::instance as types;
let state = match (self.state, self.failure_reason) {
(VmmState::Failed, None) => {
// Weird and bad!
types::VmmState::Failed(types::VmmFailureReason::Prehistoric)
}
(VmmState::Failed, Some(reason)) => {
types::VmmState::Failed(reason.into())
}
(VmmState::Creating, _) => types::VmmState::Creating,
(VmmState::Starting, _) => types::VmmState::Starting,
(VmmState::Running, _) => types::VmmState::Running,
(VmmState::Stopping, _) => types::VmmState::Stopping,
(VmmState::Stopped, _) => types::VmmState::Stopped,
(VmmState::Rebooting, _) => types::VmmState::Rebooting,
(VmmState::Migrating, _) => types::VmmState::Migrating,
(VmmState::Destroyed, _) => types::VmmState::Destroyed,
(VmmState::SagaUnwound, _) => types::VmmState::SagaUnwound,
};
types::VmmRuntimeState {
state,
generation: self.generation.into(),
time_updated: self.time_state_updated,
}
}

pub fn sled_id(&self) -> SledUuid {
self.sled_id.into()
}
}

/// Runtime state for a VMM, owned by the sled where that VMM is running.
#[derive(
Clone,
Debug,
AsChangeset,
Selectable,
Insertable,
Queryable,
Serialize,
Deserialize,
PartialEq,
)]
#[diesel(table_name = vmm)]
pub struct VmmRuntimeState {
/// The time at which this state was most recently updated.
pub time_state_updated: DateTime<Utc>,

/// The generation number protecting this VMM's state and update time.
#[diesel(column_name = state_generation)]
#[serde(rename = "gen")]
pub generation: Generation,

/// The state of this VMM. If this VMM is the active VMM for a given
/// instance, this state is the instance's logical state.
pub state: VmmState,
}

impl From<sled_agent_types::instance::VmmRuntimeState> for VmmRuntimeState {
fn from(value: sled_agent_types::instance::VmmRuntimeState) -> Self {
Self {
state: value.state.into(),
time_state_updated: value.time_updated,
generation: value.generation.into(),
}
}
}

impl From<Vmm> for sled_agent_types::instance::VmmRuntimeState {
fn from(s: Vmm) -> Self {
Self {
generation: s.generation.into(),
state: s.state.into(),
time_updated: s.time_state_updated,
}
}
}
82 changes: 82 additions & 0 deletions nexus/db-model/src/vmm_failure_reason.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Describes why a VMM record is in the `Failed` state.

use super::impl_enum_type;
use nexus_types::instance as types;
use serde::{Deserialize, Serialize};
use std::fmt;

impl_enum_type!(
VmmFailureReasonEnum:

#[derive(
Copy,
Clone,
Debug,
PartialEq,
AsExpression,
FromSqlRow,
Serialize,
Deserialize,
)]
pub enum VmmFailureReason;

// The reason for this VMM's failure is unknown, because the VMM failed
// prior to the recording of failure reasons.
Prehistoric => b"prehistoric"
// The sled-agent reported that this VMM failed.
FromSledAgent => b"from_sled_agent"
// A request to the sled-agent received a response indicating that this
// VMM is no longer present on the sled.
NoSuchInstance => b"no_such_instance"
// The sled on which this VMM was running has been expunged.
SledExpunged => b"sled_expunged"
// The sled on which this VMM was running has powered off.
SledOff => b"sled_off"
);

impl From<types::VmmFailureReason> for VmmFailureReason {
fn from(reason: types::VmmFailureReason) -> Self {
match reason {
types::VmmFailureReason::Prehistoric => Self::Prehistoric,
types::VmmFailureReason::FromSledAgent => Self::FromSledAgent,
types::VmmFailureReason::NoSuchInstance => Self::NoSuchInstance,
types::VmmFailureReason::SledExpunged => Self::SledExpunged,
types::VmmFailureReason::SledOff => Self::SledOff,
}
}
}

impl From<VmmFailureReason> for types::VmmFailureReason {
fn from(reason: VmmFailureReason) -> Self {
match reason {
VmmFailureReason::Prehistoric => Self::Prehistoric,
VmmFailureReason::FromSledAgent => Self::FromSledAgent,
VmmFailureReason::NoSuchInstance => Self::NoSuchInstance,
VmmFailureReason::SledExpunged => Self::SledExpunged,
VmmFailureReason::SledOff => Self::SledOff,
}
}
}

impl VmmFailureReason {
pub fn from_vmm_state(state: types::VmmState) -> Option<Self> {
match state {
types::VmmState::Failed(reason) => Some(reason.into()),
_ => None,
}
}

pub fn description(&self) -> &'static str {
types::VmmFailureReason::from(*self).description()
}
}

impl fmt::Display for VmmFailureReason {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
types::VmmFailureReason::from(*self).fmt(f)
}
}
Loading
Loading