Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions aptos-node/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use aptos_build_info::build_information;
use aptos_config::config::{merge_node_config, NodeConfig, PersistableConfig};
use aptos_framework::ReleaseBundle;
use aptos_genesis::builder::GenesisConfiguration;
use aptos_inspection_service::server::InspectionServiceComponents;
use aptos_logger::{prelude::*, telemetry_log_writer::TelemetryLog, Level, LoggerFilterUpdater};
use aptos_state_sync_driver::driver_factory::StateSyncRuntime;
use aptos_types::{
Expand Down Expand Up @@ -701,6 +702,14 @@ pub fn setup_environment_and_start_node(
// Starts the admin service
let mut admin_service = services::start_admin_service(&node_config);

// Start the inspection service (port 9101) early — before RocksDB — so that
// Prometheus metrics are scrapeable from the very first moments of startup.
// Components that require a fully-initialised node (peer information) will
// return 503 until `inspection_components.set(...)` is called below.
let inspection_components = Arc::new(InspectionServiceComponents::new());
let inspection_service_runtime =
services::start_node_inspection_service(&node_config, inspection_components.clone());

// Initialize transaction tracing from config
{
let tracing_cfg = &node_config.transaction_tracing;
Expand Down Expand Up @@ -805,12 +814,9 @@ pub fn setup_environment_and_start_node(
db_rw.clone(),
)?;

// Start the node inspection service
let inspection_service_runtime = services::start_node_inspection_service(
&node_config,
aptos_data_client,
peers_and_metadata.clone(),
);
// Inject the now-available components into the already-running inspection service.
// This unblocks /peer_information (and any other endpoints that need these values).
inspection_components.set(aptos_data_client, peers_and_metadata.clone());

// Bootstrap the API and transaction streaming services
let (
Expand Down
17 changes: 8 additions & 9 deletions aptos-node/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ use aptos_consensus::{
quorum_store::quorum_store_db::QuorumStoreDB,
};
use aptos_consensus_notifications::ConsensusNotifier;
use aptos_data_client::client::AptosDataClient;
use aptos_db_indexer::{db_indexer::InternalIndexerDB, indexer_reader::IndexerReaders};
use aptos_event_notifications::{DbBackedOnChainConfig, ReconfigNotificationListener};
use aptos_indexer_grpc_fullnode::runtime::bootstrap as bootstrap_indexer_grpc;
use aptos_indexer_grpc_table_info::runtime::{
bootstrap as bootstrap_indexer_table_info, bootstrap_internal_indexer_db,
};
use aptos_inspection_service::server::InspectionServiceComponents;
use aptos_logger::{debug, telemetry_log_writer::TelemetryLog, LoggerFilterUpdater};
use aptos_mempool::{
network::MempoolSyncMsg, MempoolClientRequest, MempoolClientSender, QuorumStoreRequest,
Expand Down Expand Up @@ -202,17 +202,16 @@ pub fn start_admin_service(node_config: &NodeConfig) -> AdminService {
AdminService::new(node_config)
}

/// Starts the node inspection service and returns the runtime
/// Starts the node inspection service and returns the runtime.
///
/// Pass an `Arc<InspectionServiceComponents>` whose fields are initially `None`.
/// After the rest of the node has initialised, call `components.set(...)` to
/// inject the live values, at which point all endpoints will become fully operational.
pub fn start_node_inspection_service(
node_config: &NodeConfig,
aptos_data_client: AptosDataClient,
peers_and_metadata: Arc<PeersAndMetadata>,
components: Arc<InspectionServiceComponents>,
) -> Runtime {
aptos_inspection_service::start_inspection_service(
node_config.clone(),
aptos_data_client,
peers_and_metadata,
)
aptos_inspection_service::start_inspection_service(node_config.clone(), components)
}

/// Starts the peer monitoring service and returns the runtime
Expand Down
54 changes: 41 additions & 13 deletions crates/aptos-inspection-service/src/server/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,37 @@ use hyper::{
use std::{
convert::Infallible,
net::{SocketAddr, ToSocketAddrs},
sync::Arc,
sync::{Arc, OnceLock},
};
use tokio::runtime::Runtime;

/// Holds the components that are injected into the inspection service after it starts.
/// Uses `OnceLock<T>` so the service can start before these are available.
#[derive(Default)]
pub struct InspectionServiceComponents {
pub data_client: OnceLock<AptosDataClient>,
pub peers_and_metadata: OnceLock<Arc<PeersAndMetadata>>,
}

impl InspectionServiceComponents {
pub fn new() -> Self {
Self {
data_client: OnceLock::new(),
peers_and_metadata: OnceLock::new(),
}
}

/// Inject both components once they are available.
pub fn set(&self, data_client: AptosDataClient, peers_and_metadata: Arc<PeersAndMetadata>) {
self.data_client
.set(data_client)
.expect("data_client already set");
self.peers_and_metadata
.set(peers_and_metadata)
.expect("peers_and_metadata already set");
}
}

mod configuration;
mod identity_information;
mod index;
Expand Down Expand Up @@ -48,10 +75,14 @@ pub const UNEXPECTED_ERROR_MESSAGE: &str = "An unexpected error was encountered!
/// Starts the inspection service that listens on the configured
/// address and handles various endpoint requests. Returns the
/// runtime so the caller can keep it alive.
///
/// `components` is an `Arc<InspectionServiceComponents>` whose fields start as
/// `None` and are filled in via `components.set(...)` once the rest of the node
/// has finished initialising. Until then, endpoints that require those values
/// (e.g. `/peer_information`) will return 503.
pub fn start_inspection_service(
node_config: NodeConfig,
aptos_data_client: AptosDataClient,
peers_and_metadata: Arc<PeersAndMetadata>,
components: Arc<InspectionServiceComponents>,
) -> Runtime {
// Fetch the service port and address
let service_port = node_config.inspection_service.port;
Expand Down Expand Up @@ -80,16 +111,10 @@ pub fn start_inspection_service(
// Create the service function that handles the endpoint requests
let make_service = make_service_fn(move |_conn| {
let node_config = node_config.clone();
let aptos_data_client = aptos_data_client.clone();
let peers_and_metadata = peers_and_metadata.clone();
let components = components.clone();
async move {
Ok::<_, Infallible>(service_fn(move |request| {
serve_requests(
request,
node_config.clone(),
aptos_data_client.clone(),
peers_and_metadata.clone(),
)
serve_requests(request, node_config.clone(), components.clone())
}))
}
});
Expand All @@ -106,9 +131,12 @@ pub fn start_inspection_service(
async fn serve_requests(
req: Request<Body>,
node_config: NodeConfig,
aptos_data_client: AptosDataClient,
peers_and_metadata: Arc<PeersAndMetadata>,
components: Arc<InspectionServiceComponents>,
) -> Result<Response<Body>, hyper::Error> {
// Read the optional components (may be None during early startup)
let aptos_data_client = components.data_client.get().cloned();
let peers_and_metadata = components.peers_and_metadata.get().cloned();

// Process the request and get the response components
let (status_code, body, content_type) = match req.uri().path() {
CONFIGURATION_PATH => {
Expand Down
32 changes: 22 additions & 10 deletions crates/aptos-inspection-service/src/server/peer_information.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,33 @@ use std::{collections::BTreeMap, ops::Deref, sync::Arc};
pub const PEER_INFO_DISABLED_MESSAGE: &str =
"This endpoint is disabled! Enable it in the node config at inspection_service.expose_peer_information: true";

/// Handles a new peer information request
// The message to display while the node is still initializing
pub const PEER_INFO_INITIALIZING_MESSAGE: &str =
"Node is still initializing — peer information is not yet available";

/// Handles a new peer information request.
///
/// `aptos_data_client` and `peers_and_metadata` are `None` during early startup
/// (before the rest of the node has initialised). In that case the endpoint
/// returns 503 so callers know to retry rather than treating it as a hard error.
pub fn handle_peer_information_request(
node_config: &NodeConfig,
aptos_data_client: AptosDataClient,
peers_and_metadata: Arc<PeersAndMetadata>,
aptos_data_client: Option<AptosDataClient>,
peers_and_metadata: Option<Arc<PeersAndMetadata>>,
) -> (StatusCode, Body, String) {
// Only return peer information if the endpoint is enabled
let (status_code, body) = if node_config.inspection_service.expose_peer_information {
let peer_information = get_peer_information(aptos_data_client, peers_and_metadata);
(StatusCode::OK, Body::from(peer_information))
} else {
(
let (status_code, body) = match (aptos_data_client, peers_and_metadata) {
_ if !node_config.inspection_service.expose_peer_information => (
StatusCode::FORBIDDEN,
Body::from(PEER_INFO_DISABLED_MESSAGE),
)
),
(Some(data_client), Some(pam)) => {
let peer_information = get_peer_information(data_client, pam);
(StatusCode::OK, Body::from(peer_information))
},
_ => (
StatusCode::SERVICE_UNAVAILABLE,
Body::from(PEER_INFO_INITIALIZING_MESSAGE),
),
};

(status_code, body, CONTENT_TYPE_TEXT.into())
Expand Down
12 changes: 9 additions & 3 deletions crates/aptos-inspection-service/src/server/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use crate::{
identity_information::IDENTITY_INFO_DISABLED_MESSAGE,
peer_information::PEER_INFO_DISABLED_MESSAGE, serve_requests,
system_information::SYS_INFO_DISABLED_MESSAGE, utils::get_all_metrics,
InspectionServiceComponents,
},
CONFIGURATION_PATH, FORGE_METRICS_PATH, IDENTITY_INFORMATION_PATH, INDEX_PATH,
JSON_METRICS_PATH, METRICS_PATH, PEER_INFORMATION_PATH, SYSTEM_INFORMATION_PATH,
Expand Down Expand Up @@ -280,7 +281,9 @@ fn test_publish_metrics() {
assert_approx_eq!(1.0, metrics.first().unwrap().get_counter().get_value());
}

// Exercise the serve_requests() handler with a GET request to the given path
// Exercise the serve_requests() handler with a GET request to the given path.
// Components are pre-populated with live values so all endpoints behave as they
// would on a fully-initialised node.
async fn send_get_request_to_path(config: &NodeConfig, endpoint: &str) -> Response<Body> {
// Build the URI
let uri = format!("http://127.0.0.1:9201{}", endpoint);
Expand All @@ -300,6 +303,10 @@ async fn send_get_request_to_path(config: &NodeConfig, endpoint: &str) -> Respon
None,
);

// Build fully-populated inspection components
let components = Arc::new(InspectionServiceComponents::new());
components.set(aptos_data_client, peers_and_metadata);

// Serve the request
serve_requests(
Request::builder()
Expand All @@ -308,8 +315,7 @@ async fn send_get_request_to_path(config: &NodeConfig, endpoint: &str) -> Respon
.body(Body::from(""))
.unwrap(),
config.clone(),
aptos_data_client,
peers_and_metadata,
components,
)
.await
.unwrap()
Expand Down
Loading