diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..6fbeb1e5 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,44 @@ +name: Build PBS Docker Image + +on: + push: + branches: + - "lt/otel-support" + +permissions: + contents: read + packages: write + +jobs: + build-and-push-pbs-docker: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: true + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push PBS Docker image + uses: docker/build-push-action@v6 + with: + context: . + push: true + platforms: linux/amd64,linux/arm64 + tags: ghcr.io/commit-boost/pbs:otel-support + cache-from: type=registry,ref=ghcr.io/commit-boost/pbs:buildcache + cache-to: type=registry,ref=ghcr.io/commit-boost/pbs:buildcache,mode=max + file: provisioning/pbs.Dockerfile diff --git a/Cargo.lock b/Cargo.lock index 5ebc811a..c4b2fe40 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1494,6 +1494,9 @@ dependencies = [ "ethereum_ssz_derive", "eyre", "jsonwebtoken", + "opentelemetry", + "opentelemetry-otlp", + "opentelemetry_sdk", "pbkdf2 0.12.2", "rand 0.9.0", "reqwest", @@ -1508,6 +1511,7 @@ dependencies = [ "tonic", "tracing", "tracing-appender", + "tracing-opentelemetry", "tracing-subscriber", "tree_hash 0.9.1", "tree_hash_derive", @@ -3562,6 +3566,86 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "opentelemetry" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e87237e2775f74896f9ad219d26a2081751187eb7c9f5c58dde20a23b95d16c" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.12", + "tracing", +] + +[[package]] +name = "opentelemetry-http" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46d7ab32b827b5b495bd90fa95a6cb65ccc293555dcc3199ae2937d2d237c8ed" +dependencies = [ + "async-trait", + "bytes", + "http", + "opentelemetry", + "reqwest", + "tracing", +] + +[[package]] +name = "opentelemetry-otlp" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d899720fe06916ccba71c01d04ecd77312734e2de3467fd30d9d580c8ce85656" +dependencies = [ + "futures-core", + "http", + "opentelemetry", + "opentelemetry-http", + "opentelemetry-proto", + "opentelemetry_sdk", + "prost", + "reqwest", + "thiserror 2.0.12", + "tokio", + "tonic", + "tracing", +] + +[[package]] +name = "opentelemetry-proto" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c40da242381435e18570d5b9d50aca2a4f4f4d8e146231adb4e7768023309b3" +dependencies = [ + "opentelemetry", + "opentelemetry_sdk", + "prost", + "tonic", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afdefb21d1d47394abc1ba6c57363ab141be19e27cc70d0e422b7f303e4d290b" +dependencies = [ + "futures-channel", + "futures-executor", + "futures-util", + "glob", + "opentelemetry", + "percent-encoding", + "rand 0.9.0", + "serde_json", + "thiserror 2.0.12", + "tokio", + "tokio-stream", + "tracing", +] + [[package]] name = "overload" version = "0.1.1" @@ -4072,6 +4156,7 @@ dependencies = [ "base64 0.22.1", "bytes", "encoding_rs", + "futures-channel", "futures-core", "futures-util", "h2", @@ -5302,6 +5387,24 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-opentelemetry" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd8e764bd6f5813fd8bebc3117875190c5b0415be8f7f8059bffb6ecd979c444" +dependencies = [ + "js-sys", + "once_cell", + "opentelemetry", + "opentelemetry_sdk", + "smallvec", + "tracing", + "tracing-core", + "tracing-log", + "tracing-subscriber", + "web-time", +] + [[package]] name = "tracing-serde" version = "0.2.0" @@ -5716,6 +5819,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "webpki-roots" version = "0.26.8" diff --git a/Cargo.toml b/Cargo.toml index aef26a94..1afc2bdb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,7 +44,11 @@ eyre = "0.6.12" futures = "0.3.30" headers = "0.4.0" indexmap = "2.2.6" +jsonwebtoken = { version = "9.3.1", default-features = false } lazy_static = "1.5.0" +opentelemetry = { version = "0.29", features = ["trace"] } +opentelemetry-otlp = { version = "0.29", features = ["grpc-tonic", "http-proto", "trace"] } +opentelemetry_sdk = { version = "0.29", features = ["rt-tokio", "trace"] } parking_lot = "0.12.3" pbkdf2 = "0.12.2" prometheus = "0.13.4" @@ -63,6 +67,7 @@ tonic = { version = "0.12.3", features = ["channel", "prost", "tls"] } tonic-build = "0.12.3" tracing = "0.1.40" tracing-appender = "0.2.3" +tracing-opentelemetry = "0.30" tracing-subscriber = { version = "0.3.18", features = ["env-filter", "json"] } tree_hash = "0.9" tree_hash_derive = "0.9" @@ -70,4 +75,3 @@ typenum = "1.17.0" unicode-normalization = "0.1.24" url = { version = "2.5.0", features = ["serde"] } uuid = { version = "1.8.0", features = ["fast-rng", "serde", "v4"] } -jsonwebtoken = { version = "9.3.1", default-features = false } diff --git a/config.example.toml b/config.example.toml index ae69c3ff..5c29dc09 100644 --- a/config.example.toml +++ b/config.example.toml @@ -129,8 +129,8 @@ loader = "./tests/data/mux_keys.example.json" # loader = { url = "http://localhost:8000/keys" } # loader = { registry = "lido", node_operator_id = 8 } # loader = { registry = "ssv", node_operator_id = 8 } -timeout_get_header_ms = 900 late_in_slot_time_ms = 1500 +timeout_get_header_ms = 900 # For each mux, one or more [[mux.relays]] can be defined, which will be used for the matching validator pubkeys # Only the relays defined here will be used, and the relays defined in the main [[relays]] config will be ignored # The fields specified here are the same as in [[relays]] (headers, get_params, enable_timing_games, target_first_request_ms, frequency_get_header_ms) @@ -254,6 +254,12 @@ host = "127.0.0.1" # OPTIONAL, DEFAULT: 10000 start_port = 10000 +[logs] +# Whether to export OpenTelemtry traces. OTEL is configured via env variables, set those in the [env] section or in your docker compose. +# For more info see: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md +# OPTIONAL, DEFAULT: false +export_traces = false + # Configuration stdout logs # OPTIONAL, DEFAULT: enabled [logs.stdout] @@ -288,3 +294,8 @@ dir_path = "./logs" # Maximum number of log files to keep # OPTIONAL max_files = 30 + +# Env variables set for all services. +# OPTIONAL +[env] +A_COMMON_ENV = "a_common_value" diff --git a/crates/cli/src/docker_init.rs b/crates/cli/src/docker_init.rs index 4453f597..9460b7ff 100644 --- a/crates/cli/src/docker_init.rs +++ b/crates/cli/src/docker_init.rs @@ -72,6 +72,12 @@ pub async fn handle_docker_init(config_path: PathBuf, output_dir: PathBuf) -> Re // targets to pass to prometheus let mut targets = Vec::new(); + let mut service_common_envs = IndexMap::new(); + for (key, val) in cb_config.env { + let (key, val) = get_env_val(key.as_str(), val.as_str()); + service_common_envs.insert(key, val); + } + // address for signer API communication let signer_port = 20000; let signer_server = @@ -112,6 +118,7 @@ pub async fn handle_docker_init(config_path: PathBuf, output_dir: PathBuf) -> Re get_env_interp(MODULE_JWT_ENV, &jwt_name), get_env_val(SIGNER_URL_ENV, &signer_server), ]); + module_envs.extend(service_common_envs.clone()); // Pass on the env variables if let Some(envs) = module.env { @@ -198,6 +205,7 @@ pub async fn handle_docker_init(config_path: PathBuf, output_dir: PathBuf) -> Re get_env_val(CONFIG_ENV, CONFIG_DEFAULT), get_env_uval(BUILDER_PORT_ENV, builder_events_port), ]); + module_envs.extend(service_common_envs.clone()); if let Some((key, val)) = chain_spec_env.clone() { module_envs.insert(key, val); @@ -247,6 +255,7 @@ pub async fn handle_docker_init(config_path: PathBuf, output_dir: PathBuf) -> Re // setup pbs service let mut pbs_envs = IndexMap::from([get_env_val(CONFIG_ENV, CONFIG_DEFAULT)]); + pbs_envs.extend(service_common_envs.clone()); let mut pbs_volumes = vec![config_volume.clone()]; // ports @@ -336,6 +345,7 @@ pub async fn handle_docker_init(config_path: PathBuf, output_dir: PathBuf) -> Re get_env_same(JWTS_ENV), get_env_uval(SIGNER_PORT_ENV, signer_port as u64), ]); + signer_envs.extend(service_common_envs.clone()); let mut ports = vec![]; @@ -464,6 +474,7 @@ pub async fn handle_docker_init(config_path: PathBuf, output_dir: PathBuf) -> Re get_env_val(DIRK_KEY_ENV, DIRK_KEY_DEFAULT), get_env_val(DIRK_DIR_SECRETS_ENV, DIRK_DIR_SECRETS_DEFAULT), ]); + signer_envs.extend(service_common_envs.clone()); let mut ports = vec![]; diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index df78b046..0fdf8eda 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -21,6 +21,10 @@ ethereum_serde_utils.workspace = true ethereum_ssz.workspace = true ethereum_ssz_derive.workspace = true eyre.workspace = true +jsonwebtoken.workspace = true +opentelemetry.workspace = true +opentelemetry-otlp.workspace = true +opentelemetry_sdk.workspace = true pbkdf2.workspace = true rand.workspace = true reqwest.workspace = true @@ -35,9 +39,9 @@ toml.workspace = true tonic.workspace = true tracing.workspace = true tracing-appender.workspace = true +tracing-opentelemetry.workspace = true tracing-subscriber.workspace = true tree_hash.workspace = true tree_hash_derive.workspace = true unicode-normalization.workspace = true url.workspace = true -jsonwebtoken.workspace = true diff --git a/crates/common/src/config/log.rs b/crates/common/src/config/log.rs index 383d6fba..25b75e05 100644 --- a/crates/common/src/config/log.rs +++ b/crates/common/src/config/log.rs @@ -8,7 +8,12 @@ use crate::utils::default_bool; #[derive(Clone, Default, Debug, Deserialize, Serialize)] pub struct LogsSettings { + /// Whether to export OpenTelemetry traces + #[serde(default = "default_bool::")] + pub export_traces: bool, + #[serde(default)] pub stdout: StdoutLogSettings, + #[serde(default)] pub file: FileLogSettings, } diff --git a/crates/common/src/config/mod.rs b/crates/common/src/config/mod.rs index 75fd3c9d..4c76dc16 100644 --- a/crates/common/src/config/mod.rs +++ b/crates/common/src/config/mod.rs @@ -1,4 +1,4 @@ -use std::path::PathBuf; +use std::{collections::HashMap, path::PathBuf}; use eyre::Result; use serde::{Deserialize, Serialize}; @@ -35,6 +35,8 @@ pub struct CommitBoostConfig { pub metrics: Option, #[serde(default)] pub logs: LogsSettings, + #[serde(default)] + pub env: HashMap, } impl CommitBoostConfig { @@ -81,6 +83,7 @@ impl CommitBoostConfig { signer: helper_config.signer, metrics: helper_config.metrics, logs: helper_config.logs, + env: helper_config.env, }; Ok(config) @@ -120,4 +123,6 @@ struct HelperConfig { metrics: Option, #[serde(default)] logs: LogsSettings, + #[serde(default)] + env: HashMap, } diff --git a/crates/common/src/utils.rs b/crates/common/src/utils.rs index 37119580..71d3ffe7 100644 --- a/crates/common/src/utils.rs +++ b/crates/common/src/utils.rs @@ -9,6 +9,8 @@ use alloy::{ }; use axum::http::HeaderValue; use blst::min_pk::{PublicKey, Signature}; +use opentelemetry::trace::TracerProvider as _; +use opentelemetry_otlp::OTEL_EXPORTER_OTLP_PROTOCOL; use rand::{distr::Alphanumeric, Rng}; use reqwest::header::HeaderMap; use serde::{de::DeserializeOwned, Serialize}; @@ -16,6 +18,7 @@ use serde_json::Value; use ssz::{Decode, Encode}; use tracing::Level; use tracing_appender::{non_blocking::WorkerGuard, rolling::Rotation}; +use tracing_opentelemetry::OpenTelemetryLayer; use tracing_subscriber::{ fmt::{format::Format, Layer}, prelude::*, @@ -235,6 +238,32 @@ pub fn initialize_tracing_log( } }; + if settings.export_traces { + // grpc by default + let exporter = if let Ok(protocol) = std::env::var(OTEL_EXPORTER_OTLP_PROTOCOL) { + if protocol.contains("http") { + opentelemetry_otlp::SpanExporter::builder().with_http().build()? + } else { + opentelemetry_otlp::SpanExporter::builder().with_tonic().build()? + } + } else { + opentelemetry_otlp::SpanExporter::builder().with_tonic().build()? + }; + + let tracer = opentelemetry_sdk::trace::SdkTracerProvider::builder() + .with_batch_exporter(exporter) + .build() + .tracer("commit_boost"); + + let layer = OpenTelemetryLayer::new(tracer) + .with_tracked_inactivity(false) + .with_threads(false) + .with_filter(format_crates_filter("info", "trace")) + .boxed(); + + layers.push(layer); + } + tracing_subscriber::registry().with(layers).init(); Ok((stdout_guard, file_guard)) diff --git a/crates/pbs/src/routes/router.rs b/crates/pbs/src/routes/router.rs index e5a28de5..d52922a3 100644 --- a/crates/pbs/src/routes/router.rs +++ b/crates/pbs/src/routes/router.rs @@ -42,7 +42,7 @@ pub fn create_app_router>(state: PbsStateGu } #[tracing::instrument( - name = "", + name = "request", skip_all, fields( method = %req.extensions().get::().map(|m| m.as_str()).unwrap_or("unknown"), diff --git a/docs/docs/get_started/configuration.md b/docs/docs/get_started/configuration.md index 4e642205..bb5967e4 100644 --- a/docs/docs/get_started/configuration.md +++ b/docs/docs/get_started/configuration.md @@ -372,3 +372,17 @@ docker compose -f cb.docker-compose.yml exec cb_signer curl -X POST http://local - If running in Docker containers, changes in `volumes` will not be applied, as it requires the container to be recreated. Be careful if changing a path to a local file as it may not be accessible from the container. - Custom PBS modules may override the default behaviour of the hot reload feature to parse extra configuration fields. Check the [examples](https://github.com/Commit-Boost/commit-boost-client/blob/main/examples/status_api/src/main.rs) for more details. - In case the reload fails (most likely because of some misconfigured option), the server will return a 500 error and the previous configuration will be kept. + +## OpenTelemetry +All modules support exporting OTEL traces to an external endpoint. To enable this, make sure to set +```toml +[logs] +export_traces = true +``` +and set the relevant OTEL environment variables, either via the `[env]` section, or directly in your docker compose / binary. As an example you could set: +```toml +[env] +OTEL_EXPORTER_OTLP_ENDPOINT = "http://localhost:4317" +OTEL_SERVICE_NAME = "pbs" +``` +to export traces to a local instance via GRPC. For more information on what variables to use, check out the official [OTEL exporter reference](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md). \ No newline at end of file diff --git a/examples/configs/pbs_metrics.toml b/examples/configs/pbs_metrics.toml index 08d283cc..babd9203 100644 --- a/examples/configs/pbs_metrics.toml +++ b/examples/configs/pbs_metrics.toml @@ -1,4 +1,4 @@ -# PBS + metrics + logs to file +# PBS + metrics + export traces chain = "Holesky" @@ -13,4 +13,9 @@ url = "http://0xa1cec75a3f0661e99299274182938151e8433c61a19222347ea1313d839229cb enabled = true [logs] -log_dir_path = "./logs" +export_traces = true + + +[env] +OTEL_EXPORTER_OTLP_ENDPOINT = "http://localhost:4317" +OTEL_SERVICE_NAME = "pbs"