diff --git a/.envrc b/.envrc index 9731258f343..a6ab4f52174 100644 --- a/.envrc +++ b/.envrc @@ -5,6 +5,7 @@ PATH_add out/cockroachdb/bin PATH_add out/clickhouse PATH_add out/dendrite-stub/bin PATH_add out/mgd/root/opt/oxide/mgd/bin +PATH_add out/mg-ddm/root/opt/oxide/mg-ddm/bin if [ "$OMICRON_USE_FLAKE" = 1 ] && nix flake show &> /dev/null then diff --git a/Cargo.lock b/Cargo.lock index b5b6ea30509..b68ae45b6b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2525,10 +2525,10 @@ dependencies = [ [[package]] name = "ddm-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=7696ee48d5ee29a917dea459e281fe2e8ff20513#7696ee48d5ee29a917dea459e281fe2e8ff20513" +source = "git+https://github.com/oxidecomputer/maghemite?rev=974423895c17cc23711732f518e447b284425ccd#974423895c17cc23711732f518e447b284425ccd" dependencies = [ "oxnet", - "progenitor 0.13.0", + "progenitor 0.14.0", "reqwest 0.13.2", "serde", "slog", @@ -6509,11 +6509,11 @@ dependencies = [ [[package]] name = "mg-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=7696ee48d5ee29a917dea459e281fe2e8ff20513#7696ee48d5ee29a917dea459e281fe2e8ff20513" +source = "git+https://github.com/oxidecomputer/maghemite?rev=974423895c17cc23711732f518e447b284425ccd#974423895c17cc23711732f518e447b284425ccd" dependencies = [ "chrono", "colored 3.1.1", - "progenitor 0.13.0", + "progenitor 0.14.0", "rdb-types", "reqwest 0.13.2", "schemars 0.8.22", @@ -10180,9 +10180,9 @@ dependencies = [ [[package]] name = "oxnet" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dc6fb07ecd6d2a17ff1431bc5b3ce11036c0b6dd93a3c4904db5b910817b162" +checksum = "057865b45bb202b17ed475d8f22f0416412de2c317c168fefecf9d207faf048d" dependencies = [ "ipnetwork", "schemars 0.8.22", @@ -11845,7 +11845,7 @@ dependencies = [ [[package]] name = "rdb-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=7696ee48d5ee29a917dea459e281fe2e8ff20513#7696ee48d5ee29a917dea459e281fe2e8ff20513" +source = "git+https://github.com/oxidecomputer/maghemite?rev=974423895c17cc23711732f518e447b284425ccd#974423895c17cc23711732f518e447b284425ccd" dependencies = [ "oxnet", "schemars 0.8.22", @@ -14106,7 +14106,7 @@ version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", "syn 2.0.117", diff --git a/Cargo.toml b/Cargo.toml index 3e5adc43e60..3f2059ad6a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -604,8 +604,8 @@ ntp-admin-api = { path = "ntp-admin/api" } ntp-admin-client = { path = "clients/ntp-admin-client" } ntp-admin-types = { path = "ntp-admin/types" } ntp-admin-types-versions = { path = "ntp-admin/types/versions" } -mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "7696ee48d5ee29a917dea459e281fe2e8ff20513" } -ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "7696ee48d5ee29a917dea459e281fe2e8ff20513" } +mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "974423895c17cc23711732f518e447b284425ccd" } +ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "974423895c17cc23711732f518e447b284425ccd" } multimap = "0.10.1" nexus-auth = { path = "nexus/auth" } nexus-background-task-interface = { path = "nexus/background-task-interface" } @@ -669,7 +669,7 @@ oxide-client = { path = "clients/oxide-client" } oxide-tokio-rt = "0.1.4" oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "bae0440c199b3908c12903a9532854936353433b", features = [ "api", "std" ] } oxlog = { path = "dev-tools/oxlog" } -oxnet = "0.1.4" +oxnet = "0.1.5" once_cell = "1.21.3" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "2.2.0" @@ -742,7 +742,7 @@ rats-corim = { git = "https://github.com/oxidecomputer/rats-corim.git", rev = "f raw-cpuid = { git = "https://github.com/oxidecomputer/rust-cpuid.git", rev = "a4cf01df76f35430ff5d39dc2fe470bcb953503b" } rayon = "1.10" rcgen = "0.12.1" -rdb-types = { git = "https://github.com/oxidecomputer/maghemite", rev = "7696ee48d5ee29a917dea459e281fe2e8ff20513" } +rdb-types = { git = "https://github.com/oxidecomputer/maghemite", rev = "974423895c17cc23711732f518e447b284425ccd" } reconfigurator-cli = { path = "dev-tools/reconfigurator-cli" } reedline = "0.40.0" ref-cast = "1.0" diff --git a/clients/ddm-admin-client/src/lib.rs b/clients/ddm-admin-client/src/lib.rs index 7a8b56d499d..466a8883918 100644 --- a/clients/ddm-admin-client/src/lib.rs +++ b/clients/ddm-admin-client/src/lib.rs @@ -13,6 +13,7 @@ pub use ddm_admin_client::types; use ddm_admin_client::Client as InnerClient; use either::Either; +use omicron_common::address::DDMD_PORT; use oxnet::Ipv6Net; use sled_hardware_types::underlay::BOOTSTRAP_MASK; use sled_hardware_types::underlay::BOOTSTRAP_PREFIX; @@ -26,9 +27,6 @@ use thiserror::Error; use crate::types::EnableStatsRequest; -// TODO-cleanup Is it okay to hardcode this port number here? -const DDMD_PORT: u16 = 8000; - #[derive(Debug, Error, SlogInlineError)] pub enum DdmError { #[error("Failed to construct an HTTP client:")] diff --git a/dev-tools/downloader/src/lib.rs b/dev-tools/downloader/src/lib.rs index 44fb340de28..a7e5a65683b 100644 --- a/dev-tools/downloader/src/lib.rs +++ b/dev-tools/downloader/src/lib.rs @@ -69,6 +69,9 @@ enum Target { /// Maghemite mgd binary MaghemiteMgd, + /// Maghemite ddmd binary + MaghemiteDdmd, + /// SoftNPU, an admin program (scadm) and a pre-compiled P4 program. Softnpu, @@ -137,6 +140,7 @@ pub async fn run_cmd(args: DownloadArgs) -> Result<()> { Target::Console => downloader.download_console().await, Target::DendriteStub => downloader.download_dendrite_stub().await, Target::MaghemiteMgd => downloader.download_maghemite_mgd().await, + Target::MaghemiteDdmd => downloader.download_maghemite_ddmd().await, Target::Softnpu => downloader.download_softnpu().await, Target::TransceiverControl => { downloader.download_transceiver_control().await @@ -946,6 +950,84 @@ impl Downloader<'_> { Ok(()) } + async fn download_maghemite_ddmd(&self) -> Result<()> { + let download_dir = self.output_dir.join("downloads"); + tokio::fs::create_dir_all(&download_dir).await?; + + let checksums_path = self.versions_dir.join("maghemite_mgd_checksums"); + let [mg_ddm_sha2, ddmd_linux_sha2] = get_values_from_file( + ["MG_DDM_SHA256", "DDMD_LINUX_SHA256"], + &checksums_path, + ) + .await?; + let commit_path = + self.versions_dir.join("maghemite_ddm_openapi_version"); + let [commit] = get_values_from_file(["COMMIT"], &commit_path).await?; + + let repo = "oxidecomputer/maghemite"; + let base_url = format!("{BUILDOMAT_URL}/{repo}/image/{commit}"); + + let filename = "mg-ddm.tar.gz"; + let tarball_path = download_dir.join(filename); + download_file_and_verify( + &self.log, + &tarball_path, + &format!("{base_url}/{filename}"), + ChecksumAlgorithm::Sha2, + &mg_ddm_sha2, + ) + .await?; + unpack_tarball(&self.log, &tarball_path, &download_dir).await?; + + let destination_dir = self.output_dir.join("mg-ddm"); + let _ = tokio::fs::remove_dir_all(&destination_dir).await; + tokio::fs::create_dir_all(&destination_dir).await?; + copy_dir_all( + &download_dir.join("root"), + &destination_dir.join("root"), + )?; + + let binary_dir = destination_dir.join("root/opt/oxide/mg-ddm/bin"); + + match os_name()? { + Os::Linux => { + let filename = "ddmd"; + let path = download_dir.join(filename); + download_file_and_verify( + &self.log, + &path, + &format!( + "{BUILDOMAT_URL}/{repo}/linux/{commit}/{filename}" + ), + ChecksumAlgorithm::Sha2, + &ddmd_linux_sha2, + ) + .await?; + set_permissions(&path, 0o755).await?; + tokio::fs::copy(path, binary_dir.join(filename)).await?; + } + Os::Mac => { + info!( + self.log, + "Building maghemite ddmd from source for macOS" + ); + + let binaries = [("ddmd", &["--no-default-features"][..])]; + + let built_binaries = self + .build_from_git("maghemite", &commit, &binaries) + .await?; + + let dest = binary_dir.join("ddmd"); + tokio::fs::copy(&built_binaries[0], &dest).await?; + set_permissions(&dest, 0o755).await?; + } + Os::Illumos => (), + } + + Ok(()) + } + async fn download_softnpu(&self) -> Result<()> { let destination_dir = self.output_dir.join("npuzone"); tokio::fs::create_dir_all(&destination_dir).await?; diff --git a/env.sh b/env.sh index 6a84c35902a..114b53f07ed 100644 --- a/env.sh +++ b/env.sh @@ -12,6 +12,7 @@ export PATH="$OMICRON_WS/out/cockroachdb/bin:$PATH" export PATH="$OMICRON_WS/out/clickhouse:$PATH" export PATH="$OMICRON_WS/out/dendrite-stub/bin:$PATH" export PATH="$OMICRON_WS/out/mgd/root/opt/oxide/mgd/bin:$PATH" +export PATH="$OMICRON_WS/out/mg-ddm/root/opt/oxide/mg-ddm/bin:$PATH" # if xtrace was set previously, do not unset it case $OLD_SHELL_OPTS in diff --git a/internal-dns/types/src/config.rs b/internal-dns/types/src/config.rs index d5bef144343..5b4f736e2c5 100644 --- a/internal-dns/types/src/config.rs +++ b/internal-dns/types/src/config.rs @@ -163,6 +163,20 @@ pub struct DnsConfigBuilder { service_instances_sleds: BTreeMap>, } +/// Ports for the per-switch services published in internal DNS by +/// [`DnsConfigBuilder::host_zone_switch`]. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct HostSwitchZonePorts { + /// Dendrite (`dpd`) admin API port. + pub dendrite: u16, + /// Management Gateway Service (`mgs`) port. + pub mgs: u16, + /// Maghemite `mgd` admin API port. + pub mgd: u16, + /// Maghemite `ddmd` admin API port. + pub ddm: u16, +} + /// Describes a host of type "sled" in the control plane DNS zone #[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct Sled(SledUuid); @@ -396,10 +410,14 @@ impl DnsConfigBuilder { &mut self, sled_id: SledUuid, switch_zone_ip: Ipv6Addr, - dendrite_port: u16, - mgs_port: u16, - mgd_port: u16, + ports: HostSwitchZonePorts, ) -> anyhow::Result<()> { + let HostSwitchZonePorts { + dendrite: dendrite_port, + mgs: mgs_port, + mgd: mgd_port, + ddm: ddm_port, + } = ports; let zone = self.host_dendrite(sled_id, switch_zone_ip)?; self.service_backend_zone(ServiceName::Dendrite, &zone, dendrite_port)?; self.service_backend_zone( @@ -407,7 +425,8 @@ impl DnsConfigBuilder { &zone, mgs_port, )?; - self.service_backend_zone(ServiceName::Mgd, &zone, mgd_port) + self.service_backend_zone(ServiceName::Mgd, &zone, mgd_port)?; + self.service_backend_zone(ServiceName::Ddm, &zone, ddm_port) } /// Higher-level shorthand for adding a Nexus zone with both its internal @@ -731,7 +750,9 @@ impl DnsConfigBuilder { #[cfg(test)] mod test { - use super::{DnsConfigBuilder, Host, ServiceName}; + use super::{ + DnsConfigBuilder, DnsRecord, Host, HostSwitchZonePorts, ServiceName, + }; use crate::{config::Zone, names::DNS_ZONE}; use omicron_common::api::external::Generation; use omicron_uuid_kinds::{OmicronZoneUuid, SledUuid}; @@ -779,6 +800,8 @@ mod test { "_oximeter-reader._tcp", ); assert_eq!(ServiceName::Dendrite.dns_name(), "_dendrite._tcp",); + assert_eq!(ServiceName::Mgd.dns_name(), "_mgd._tcp",); + assert_eq!(ServiceName::Ddm.dns_name(), "_ddm._tcp",); assert_eq!( ServiceName::CruciblePantry.dns_name(), "_crucible-pantry._tcp", @@ -796,6 +819,71 @@ mod test { ); } + #[test] + fn host_zone_switch_publishes_all_services() { + let sled_uuid: SledUuid = + "001de000-51ed-4000-8000-000000000001".parse().unwrap(); + let switch_zone_ip = Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1); + + // Use distinct port numbers so an arg-order swap in `host_zone_switch` + // surfaces as a port mismatch on the affected service. + let dendrite_port = 11; + let mgs_port = 13; + let mgd_port = 17; + let ddm_port = 19; + + let mut builder = DnsConfigBuilder::new(); + builder + .host_zone_switch( + sled_uuid, + switch_zone_ip, + HostSwitchZonePorts { + dendrite: dendrite_port, + mgs: mgs_port, + mgd: mgd_port, + ddm: ddm_port, + }, + ) + .unwrap(); + + let config = builder.build_full_config_for_initial_generation(); + + let mut by_name: BTreeMap<&str, &[DnsRecord]> = BTreeMap::new(); + for zone in &config.zones { + for (name, records) in &zone.records { + by_name.insert(name.as_str(), records.as_slice()); + } + } + + for (expected_name, expected_port) in [ + ("_dendrite._tcp", dendrite_port), + ("_mgs._tcp", mgs_port), + ("_mgd._tcp", mgd_port), + ("_ddm._tcp", ddm_port), + ] { + let records = by_name.get(expected_name).unwrap_or_else(|| { + panic!( + "expected {expected_name} in published switch-zone \ + services; got {by_name:?}" + ) + }); + let srv_port = records + .iter() + .find_map(|r| match r { + DnsRecord::Srv(s) => Some(s.port), + _ => None, + }) + .unwrap_or_else(|| { + panic!("no SRV record for {expected_name}: {records:?}") + }); + + assert_eq!( + srv_port, expected_port, + "wrong SRV port for {expected_name}" + ); + } + } + #[test] fn display_hosts() { let sled_uuid = SledUuid::nil(); diff --git a/internal-dns/types/src/names.rs b/internal-dns/types/src/names.rs index 73b2439e48e..105d0222f3c 100644 --- a/internal-dns/types/src/names.rs +++ b/internal-dns/types/src/names.rs @@ -75,6 +75,7 @@ pub enum ServiceName { BoundaryNtp, InternalNtp, Mgd, + Ddm, } impl ServiceName { @@ -116,6 +117,7 @@ impl ServiceName { ServiceName::BoundaryNtp => "boundary-ntp", ServiceName::InternalNtp => "internal-ntp", ServiceName::Mgd => "mgd", + ServiceName::Ddm => "ddm", } } @@ -144,7 +146,8 @@ impl ServiceName { | ServiceName::CruciblePantry | ServiceName::BoundaryNtp | ServiceName::InternalNtp - | ServiceName::Mgd => { + | ServiceName::Mgd + | ServiceName::Ddm => { format!("_{}._tcp", self.service_kind()) } ServiceName::SledAgent(id) => { diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 685c7c85e6f..0a85c4dd114 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -988,9 +988,8 @@ mod test { // the previous pass (i.e., that corresponds to an Omicron zone). // // There are some ServiceNames missing here because they are not part of - // our representative config (e.g., ClickhouseKeeper) or they don't - // currently have DNS record at all (e.g., SledAgent, Maghemite, Mgd, - // Tfport). + // our representative config (e.g., ClickhouseKeeper) or because they + // do not currently have a DNS record at all (e.g., SledAgent). let mut srv_kinds_expected = BTreeSet::from([ ServiceName::Clickhouse, ServiceName::ClickhouseNative, @@ -1001,6 +1000,8 @@ mod test { ServiceName::NexusLockstep, ServiceName::Oximeter, ServiceName::Dendrite, + ServiceName::Mgd, + ServiceName::Ddm, ServiceName::CruciblePantry, ServiceName::BoundaryNtp, ServiceName::InternalNtp, diff --git a/nexus/reconfigurator/execution/src/test_utils.rs b/nexus/reconfigurator/execution/src/test_utils.rs index cd46adacd0b..fdb17289225 100644 --- a/nexus/reconfigurator/execution/src/test_utils.rs +++ b/nexus/reconfigurator/execution/src/test_utils.rs @@ -113,10 +113,12 @@ pub fn overridables_for_test( let dendrite_port = cptestctx.dendrite.read().unwrap().get(&switch_slot).unwrap().port; let mgd_port = cptestctx.mgd.get(&switch_slot).unwrap().port; + let ddm_port = cptestctx.ddm.get(&switch_slot).unwrap().port; overrides.override_switch_zone_ip(sled_id, ip); overrides.override_dendrite_port(sled_id, dendrite_port); overrides.override_mgs_port(sled_id, mgs_port); overrides.override_mgd_port(sled_id, mgd_port); + overrides.override_ddm_port(sled_id, ddm_port); } overrides } diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index a1f865e2934..7dbbf3640dc 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -1854,7 +1854,8 @@ mod tests { | ServiceName::RepoDepot | ServiceName::ManagementGatewayService | ServiceName::Dendrite - | ServiceName::Mgd => { + | ServiceName::Mgd + | ServiceName::Ddm => { out.insert(service, Ok(())); } // InternalNtp is too large to fit in a single DNS packet and diff --git a/nexus/test-utils/src/nexus_test.rs b/nexus/test-utils/src/nexus_test.rs index 693aea88732..48c945e742b 100644 --- a/nexus/test-utils/src/nexus_test.rs +++ b/nexus/test-utils/src/nexus_test.rs @@ -117,6 +117,7 @@ pub struct ControlPlaneTestContext { /// Ports of stopped dendrite instances (for use by start_dendrite) pub stopped_dendrite_ports: RwLock>, pub mgd: HashMap, + pub ddm: HashMap, pub external_dns_zone_name: String, pub external_dns: TransientDnsServer, pub internal_dns: TransientDnsServer, @@ -320,6 +321,9 @@ impl ControlPlaneTestContext { for (_, mut mgd) in self.mgd { mgd.cleanup().await.unwrap(); } + for (_, mut ddm) in self.ddm { + ddm.cleanup().await.unwrap(); + } self.logctx.cleanup_successful(); } } diff --git a/nexus/test-utils/src/starter.rs b/nexus/test-utils/src/starter.rs index aa9c5cbd268..48257f8dab3 100644 --- a/nexus/test-utils/src/starter.rs +++ b/nexus/test-utils/src/starter.rs @@ -23,6 +23,7 @@ use futures::future::BoxFuture; use gateway_test_utils::setup::GatewayTestContext; use iddqd::IdOrdMap; use internal_dns_types::config::DnsConfigBuilder; +use internal_dns_types::config::HostSwitchZonePorts; use internal_dns_types::names::DNS_ZONE_EXTERNAL_TESTING; use internal_dns_types::names::ServiceName; use nexus_config::Database; @@ -146,6 +147,7 @@ pub struct ControlPlaneStarter<'a, N: NexusServer> { pub gateway: BTreeMap, pub dendrite: RwLock>, pub mgd: HashMap, + pub ddm: HashMap, // NOTE: Only exists after starting Nexus, until external Nexus is // initialized. @@ -203,6 +205,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> { gateway: BTreeMap::new(), dendrite: RwLock::new(HashMap::new()), mgd: HashMap::new(), + ddm: HashMap::new(), nexus_internal: None, nexus_internal_addr: None, external_dns_zone_name: None, @@ -461,6 +464,17 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> { self.config.pkg.mgd.insert(switch_slot, config); } + pub async fn start_ddm(&mut self, switch_slot: SwitchSlot) { + let log = &self.logctx.log; + debug!(log, "Starting DDM sim"; "switch_slot" => ?switch_slot); + + let ddm = dev::maghemite::DdmInstance::start().await.unwrap(); + let port = ddm.port; + self.ddm.insert(switch_slot, ddm); + + debug!(log, "DDM sim started"; "port" => port); + } + pub async fn record_switch_dns( &mut self, sled_id: SledUuid, @@ -479,9 +493,18 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> { .host_zone_switch( sled_id, Ipv6Addr::LOCALHOST, - self.dendrite.read().unwrap().get(&switch_slot).unwrap().port, - self.gateway.get(&switch_slot).unwrap().port, - self.mgd.get(&switch_slot).unwrap().port, + HostSwitchZonePorts { + dendrite: self + .dendrite + .read() + .unwrap() + .get(&switch_slot) + .unwrap() + .port, + mgs: self.gateway.get(&switch_slot).unwrap().port, + mgd: self.mgd.get(&switch_slot).unwrap().port, + ddm: self.ddm.get(&switch_slot).unwrap().port, + }, ) .unwrap() } @@ -1250,6 +1273,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> { dendrite: RwLock::new(self.dendrite.into_inner().unwrap()), stopped_dendrite_ports: RwLock::new(HashMap::new()), mgd: self.mgd, + ddm: self.ddm, external_dns_zone_name: self.external_dns_zone_name.unwrap(), external_dns: self.external_dns.unwrap(), internal_dns: self.internal_dns.unwrap(), @@ -1291,6 +1315,9 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> { for (_, mut mgd) in self.mgd { mgd.cleanup().await.unwrap(); } + for (_, mut ddm) in self.ddm { + ddm.cleanup().await.unwrap(); + } self.logctx.cleanup_successful(); } @@ -1631,6 +1658,12 @@ pub(crate) async fn setup_with_config_impl( builder.start_mgd(SwitchSlot::Switch0).boxed() }), ), + ( + "start_ddm_switch0", + Box::new(|builder| { + builder.start_ddm(SwitchSlot::Switch0).boxed() + }), + ), ( "record_switch_dns", Box::new(|builder| { @@ -1675,6 +1708,12 @@ pub(crate) async fn setup_with_config_impl( builder.start_mgd(SwitchSlot::Switch1).boxed() }), ), + ( + "start_ddm_switch1", + Box::new(|builder| { + builder.start_ddm(SwitchSlot::Switch1).boxed() + }), + ), ( "record_switch_dns", Box::new(|builder| { diff --git a/nexus/tests/integration_tests/initialization.rs b/nexus/tests/integration_tests/initialization.rs index 350757cf1de..714880feb37 100644 --- a/nexus/tests/integration_tests/initialization.rs +++ b/nexus/tests/integration_tests/initialization.rs @@ -158,6 +158,11 @@ async fn test_nexus_boots_before_dendrite() { starter.start_mgd(SwitchSlot::Switch1).await; info!(log, "Started mgd"); + info!(log, "Starting ddm"); + starter.start_ddm(SwitchSlot::Switch0).await; + starter.start_ddm(SwitchSlot::Switch1).await; + info!(log, "Started ddm"); + info!(log, "Populating internal DNS records"); starter .record_switch_dns( @@ -197,6 +202,8 @@ async fn nexus_schema_test_setup( starter.start_dendrite(SwitchSlot::Switch1).await; starter.start_mgd(SwitchSlot::Switch0).await; starter.start_mgd(SwitchSlot::Switch1).await; + starter.start_ddm(SwitchSlot::Switch0).await; + starter.start_ddm(SwitchSlot::Switch1).await; starter.populate_internal_dns().await; } diff --git a/nexus/types/src/deployment/execution/dns.rs b/nexus/types/src/deployment/execution/dns.rs index 009377fd8d9..c901dcc92f7 100644 --- a/nexus/types/src/deployment/execution/dns.rs +++ b/nexus/types/src/deployment/execution/dns.rs @@ -155,9 +155,7 @@ pub fn blueprint_internal_dns_config( dns_builder.host_zone_switch( scrimlet.id(), switch_zone_ip, - overrides.dendrite_port(scrimlet.id()), - overrides.mgs_port(scrimlet.id()), - overrides.mgd_port(scrimlet.id()), + overrides.host_switch_zone_ports(scrimlet.id()), )?; } diff --git a/nexus/types/src/deployment/execution/overridables.rs b/nexus/types/src/deployment/execution/overridables.rs index 881a7c49bdd..bf46374d1dc 100644 --- a/nexus/types/src/deployment/execution/overridables.rs +++ b/nexus/types/src/deployment/execution/overridables.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use internal_dns_types::config::HostSwitchZonePorts; +use omicron_common::address::DDMD_PORT; use omicron_common::address::DENDRITE_PORT; use omicron_common::address::Ipv6Subnet; use omicron_common::address::MGD_PORT; @@ -29,6 +31,8 @@ pub struct Overridables { pub mgs_ports: BTreeMap, /// map: sled id -> TCP port on which that sled's MGD is listening pub mgd_ports: BTreeMap, + /// map: sled id -> TCP port on which that sled's DDM is listening + pub ddm_ports: BTreeMap, /// map: sled id -> IP address of the sled's switch zone pub switch_zone_ips: BTreeMap, } @@ -67,6 +71,32 @@ impl Overridables { self.mgd_ports.get(&sled_id).copied().unwrap_or(MGD_PORT) } + /// Specify the TCP port on which this sled's DDM is listening + pub fn override_ddm_port(&mut self, sled_id: SledUuid, port: u16) { + self.ddm_ports.insert(sled_id, port); + } + + /// Returns the TCP port on which this sled's DDM is listening + pub fn ddm_port(&self, sled_id: SledUuid) -> u16 { + self.ddm_ports.get(&sled_id).copied().unwrap_or(DDMD_PORT) + } + + /// Returns the per-switch-zone service ports for this sled. + /// + /// Bundles the four switch-zone admin ports into a single + /// [`HostSwitchZonePorts`] so callers cannot swap fields by accident. + pub fn host_switch_zone_ports( + &self, + sled_id: SledUuid, + ) -> HostSwitchZonePorts { + HostSwitchZonePorts { + dendrite: self.dendrite_port(sled_id), + mgs: self.mgs_port(sled_id), + mgd: self.mgd_port(sled_id), + ddm: self.ddm_port(sled_id), + } + } + /// Specify the IP address of this switch zone pub fn override_switch_zone_ip( &mut self, diff --git a/package-manifest.toml b/package-manifest.toml index d742f5b5338..aa2a0c297c1 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -683,10 +683,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "7696ee48d5ee29a917dea459e281fe2e8ff20513" +source.commit = "974423895c17cc23711732f518e447b284425ccd" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm-gz.sha256.txt -source.sha256 = "ce52b9094adf0ed567bd3ed1e3ac48ac1c983cc7859adacf4f392e415a1189ad" +source.sha256 = "eed4c89343c29b42a74b16d74186c4e3c1a78701b0398ec2b81206122e4317d1" output.type = "tarball" [package.mg-ddm] @@ -699,10 +699,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "7696ee48d5ee29a917dea459e281fe2e8ff20513" +source.commit = "974423895c17cc23711732f518e447b284425ccd" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "23950a4e73a07fa7f087ba3312e4bc5a8981fd9ebad54af2350baaa86ad6bbf3" +source.sha256 = "ea97c636761cf7f622ddf0382ab365e68973604809eb6ebc93a0bbb94f758030" output.type = "zone" output.intermediate_only = true @@ -714,10 +714,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "7696ee48d5ee29a917dea459e281fe2e8ff20513" +source.commit = "974423895c17cc23711732f518e447b284425ccd" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mgd.sha256.txt -source.sha256 = "301d31ca481e4822f69484feacca31dd08a7c4aae87d96641d384bda3178d2f3" +source.sha256 = "69f6bc36806b799174897762f0b10885d600a747018d1f9dbf2caeae9c749841" output.type = "zone" output.intermediate_only = true diff --git a/sled-agent/rack-setup/src/plan/service.rs b/sled-agent/rack-setup/src/plan/service.rs index 2ef6d79489a..63702c1090c 100644 --- a/sled-agent/rack-setup/src/plan/service.rs +++ b/sled-agent/rack-setup/src/plan/service.rs @@ -13,7 +13,7 @@ use iddqd::errors::DuplicateItem; use iddqd::id_upcast; use illumos_utils::zpool::ZpoolName; use internal_dns_types::config::{ - DnsConfigBuilder, DnsConfigParams, Host, Zone, + DnsConfigBuilder, DnsConfigParams, Host, HostSwitchZonePorts, Zone, }; use internal_dns_types::names::ServiceName; use nexus_types::deployment::LastAllocatedSubnetIpOffset; @@ -29,10 +29,10 @@ use nexus_types::deployment::{ }; use nexus_types::external_api::sled::SledState; use omicron_common::address::{ - CP_SERVICES_RESERVED_ADDRESSES, DENDRITE_PORT, DNS_HTTP_PORT, DNS_PORT, - Ipv6Subnet, MGD_PORT, MGS_PORT, NEXUS_INTERNAL_PORT, NEXUS_LOCKSTEP_PORT, - NTP_PORT, NUM_SOURCE_NAT_PORTS, REPO_DEPOT_PORT, ReservedRackSubnet, - SLED_PREFIX, SLED_RESERVED_ADDRESSES, get_sled_address, + CP_SERVICES_RESERVED_ADDRESSES, DDMD_PORT, DENDRITE_PORT, DNS_HTTP_PORT, + DNS_PORT, Ipv6Subnet, MGD_PORT, MGS_PORT, NEXUS_INTERNAL_PORT, + NEXUS_LOCKSTEP_PORT, NTP_PORT, NUM_SOURCE_NAT_PORTS, REPO_DEPOT_PORT, + ReservedRackSubnet, SLED_PREFIX, SLED_RESERVED_ADDRESSES, get_sled_address, get_switch_zone_address, }; use omicron_common::api::external::{Generation, MacAddr, Vni}; @@ -338,9 +338,12 @@ impl ServicePlan { .host_zone_switch( sled.sled_id, address, - DENDRITE_PORT, - MGS_PORT, - MGD_PORT, + HostSwitchZonePorts { + dendrite: DENDRITE_PORT, + mgs: MGS_PORT, + mgd: MGD_PORT, + ddm: DDMD_PORT, + }, ) .unwrap(); } diff --git a/test-utils/src/dev/maghemite.rs b/test-utils/src/dev/maghemite.rs index 4c2d85df3ee..00225c8737a 100644 --- a/test-utils/src/dev/maghemite.rs +++ b/test-utils/src/dev/maghemite.rs @@ -133,7 +133,7 @@ async fn discover_port(logfile: String) -> Result { let timeout = Instant::now() + MGD_TIMEOUT; tokio::time::timeout_at(timeout, find_mgd_port_in_log(logfile)) .await - .context("time out while discovering mgd port number")? + .context("time out while discovering port number")? } async fn find_mgd_port_in_log(logfile: String) -> Result { @@ -163,6 +163,108 @@ async fn find_mgd_port_in_log(logfile: String) -> Result { } } +/// Test fixture that spawns and supervises a legit `ddmd` subprocess. +/// +/// Owns a `tokio::process::Child` and a tempdir; discovers the bound admin +/// port by scraping dropshot's startup `local_addr` records; kills the child +/// on `cleanup`/`Drop`. Mirrors `MgdInstance`. +/// +/// `ddmd` runs in sled global zones and switch zones in production. Spawned +/// here with `--no-state-machine`, which serves only the admin API and skips +/// the discovery / exchange / routing daemons that need real network +/// interfaces and illumos-only kernel facilities. Only switch-zone instances +/// are registered in internal DNS as `ServiceName::Ddm`; sled-global-zone +/// instances are accessed locally by their own host (RSS, sled-agent's +/// prefix advertisement, etc.) and don't need DNS publication. +pub struct DdmInstance { + /// Port number the ddmd instance is listening on. + pub port: u16, + /// Arguments provided to the `ddmd` cli command. + pub args: Vec, + /// Child process spawned by running `ddmd`. + pub child: Option, + /// Temporary directory where logging output and other files generated by + /// `ddmd` are stored. + pub data_dir: Option, +} + +impl DdmInstance { + /// Start a `ddmd` instance with `--no-state-machine`, bound to an + /// auto-assigned admin port on localhost. + pub async fn start() -> Result { + let temp_dir = TempDir::new()?; + + let args = vec![ + "--admin-addr".to_string(), + "::1".into(), + "--admin-port".into(), + "0".into(), + "--no-state-machine".into(), + "--data-dir".into(), + temp_dir.path().display().to_string(), + ]; + + let child = tokio::process::Command::new("ddmd") + .args(&args) + .stdin(Stdio::null()) + .stdout(Stdio::from(redirect_file(temp_dir.path(), "ddmd_stdout")?)) + .stderr(Stdio::from(redirect_file(temp_dir.path(), "ddmd_stderr")?)) + .spawn() + .with_context(|| { + format!("failed to spawn `ddmd` (with args: {:?})", &args) + })?; + + let child = Some(child); + + let temp_dir = temp_dir.keep(); + let port = + discover_port(temp_dir.join("ddmd_stdout").display().to_string()) + .await + .with_context(|| { + format!( + "failed to discover ddmd port from files in {}", + temp_dir.display() + ) + })?; + + Ok(Self { port, args, child, data_dir: Some(temp_dir) }) + } + + pub async fn cleanup(&mut self) -> Result<(), anyhow::Error> { + if let Some(mut child) = self.child.take() { + child.start_kill().context("Sending SIGKILL to child")?; + child.wait().await.context("waiting for child")?; + } + if let Some(dir) = self.data_dir.take() { + std::fs::remove_dir_all(&dir).with_context(|| { + format!("cleaning up temporary directory {}", dir.display()) + })?; + } + Ok(()) + } +} + +impl Drop for DdmInstance { + fn drop(&mut self) { + if self.child.is_some() || self.data_dir.is_some() { + eprintln!( + "WARN: dropped DdmInstance without cleaning it up first \ + (there may still be a child process running and a \ + temporary directory leaked)" + ); + if let Some(child) = self.child.as_mut() { + let _ = child.start_kill(); + } + if let Some(path) = self.data_dir.take() { + eprintln!( + "WARN: ddmd temporary directory leaked: {}", + path.display() + ); + } + } + } +} + #[cfg(test)] mod tests { use super::find_mgd_port_in_log; @@ -183,6 +285,16 @@ mod tests { .expect("Cannot find 'mgd' on PATH. Refer to README.md for installation instructions"); } + #[tokio::test] + async fn test_ddmd_in_path() { + tokio::process::Command::new("ddmd") + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn() + .expect("Cannot find 'ddmd' on PATH. Refer to README.md for installation instructions"); + } + #[tokio::test] async fn test_discover_local_listening_port() { // Write some data to a fake log file diff --git a/tools/ci_check_opte_ver.sh b/tools/ci_check_opte_ver.sh index 4f1e099d9ea..13ae25fd07d 100755 --- a/tools/ci_check_opte_ver.sh +++ b/tools/ci_check_opte_ver.sh @@ -68,9 +68,9 @@ fi # Also check that the buildomat deploy job is using the same version BUILDOMAT_DEPLOY_TARGET=$(cat .github/buildomat/jobs/deploy.sh | sed -n 's/#:[ ]*target[ ]*=[ ]*"\(.*\)"/\1/p') -if [ "lab-2.0-opte-0.$API_VER" != "$BUILDOMAT_DEPLOY_TARGET" ]; then +if [ "lab-3.0-opte-0.$API_VER" != "$BUILDOMAT_DEPLOY_TARGET" ]; then echo "OPTE version mismatch:" echo "Cargo.toml: $OPTE_REV ($OPTE_VER)" - echo "buildomat deploy job: $BUILDOMAT_DEPLOY_TARGET (expected lab-opte-0.$API_VER)" + echo "buildomat deploy job: $BUILDOMAT_DEPLOY_TARGET (expected lab-3.0-opte-0.$API_VER)" exit 1 fi diff --git a/tools/install_builder_prerequisites.sh b/tools/install_builder_prerequisites.sh index 0f1df7d2528..d79a923ca1f 100755 --- a/tools/install_builder_prerequisites.sh +++ b/tools/install_builder_prerequisites.sh @@ -230,6 +230,7 @@ retry xtask download \ console \ dendrite-stub \ maghemite-mgd \ + maghemite-ddmd \ transceiver-control # Validate the PATH: diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index 060b3a13efb..cd2803a6591 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1 +1 @@ -COMMIT="7696ee48d5ee29a917dea459e281fe2e8ff20513" +COMMIT="974423895c17cc23711732f518e447b284425ccd" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 060b3a13efb..cd2803a6591 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1 +1 @@ -COMMIT="7696ee48d5ee29a917dea459e281fe2e8ff20513" +COMMIT="974423895c17cc23711732f518e447b284425ccd" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index 470facaa671..69aae8a4144 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,4 @@ -CIDL_SHA256="301d31ca481e4822f69484feacca31dd08a7c4aae87d96641d384bda3178d2f3" -MGD_LINUX_SHA256="95f9759a5fde2784d148c81df2218d29adde1d27fb72d5dbcf534de6450f0f7c" \ No newline at end of file +CIDL_SHA256="69f6bc36806b799174897762f0b10885d600a747018d1f9dbf2caeae9c749841" +MGD_LINUX_SHA256="aa2d1cda4a4d75f403856921abdd6cdffcf9c379f013b767be682b5ee1f32cea" +MG_DDM_SHA256="ea97c636761cf7f622ddf0382ab365e68973604809eb6ebc93a0bbb94f758030" +DDMD_LINUX_SHA256="25bee3739280df195949c36d402c43a3d76c5a47f92395009e2c2b0e6413d671" \ No newline at end of file diff --git a/tools/update_maghemite.sh b/tools/update_maghemite.sh index 0051397b51d..0a482cb4440 100755 --- a/tools/update_maghemite.sh +++ b/tools/update_maghemite.sh @@ -54,6 +54,12 @@ function update_mgd { SHA_LINUX=$(get_sha "$REPO" "$TARGET_COMMIT" "mgd" "linux") OUTPUT_LINUX=$(printf "MGD_LINUX_SHA256=\"%s\"\n" "$SHA_LINUX") + SHA_MG_DDM=$(get_sha "$REPO" "$TARGET_COMMIT" "mg-ddm" "image") + OUTPUT_MG_DDM=$(printf "MG_DDM_SHA256=\"%s\"\n" "$SHA_MG_DDM") + + SHA_DDMD_LINUX=$(get_sha "$REPO" "$TARGET_COMMIT" "ddmd" "linux") + OUTPUT_DDMD_LINUX=$(printf "DDMD_LINUX_SHA256=\"%s\"\n" "$SHA_DDMD_LINUX") + if [ -n "$DRY_RUN" ]; then MGD_PATH="/dev/null" else @@ -61,7 +67,7 @@ function update_mgd { fi echo "Updating Maghemite mgd from: $TARGET_COMMIT" set -x - printf "$OUTPUT\n$OUTPUT_LINUX" > $MGD_PATH + printf "$OUTPUT\n$OUTPUT_LINUX\n$OUTPUT_MG_DDM\n$OUTPUT_DDMD_LINUX" > $MGD_PATH set +x }