Skip to content
This repository has been archived by the owner on Oct 28, 2024. It is now read-only.

Create granular vector event logs #34

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/sinks/aws_s3/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ impl S3SinkConfig {
// Returns back the same result so it continues to work downstream
.map_result(|result: Result<S3Response, _>| {
if let Ok(ref response) = result {
response.send_event_metadata.emit_upload_event();
response.event_log_metadata.emit_upload_event();
}
result
})
Expand Down
38 changes: 29 additions & 9 deletions src/sinks/aws_s3/sink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ use crate::{
},
util::{
metadata::RequestMetadataBuilder, request_builder::EncodeResult, Compression,
RequestBuilder, vector_event::VectorSendEventMetadata, vector_event::extract_topic_name,
RequestBuilder,
vector_event::{VectorEventLogSendMetadata, extract_topic_name, generate_count_map},
},
},
};
Expand Down Expand Up @@ -61,11 +62,34 @@ impl RequestBuilder<(S3PartitionKey, Vec<Event>)> for S3RequestOptions {
let finalizers = events.take_finalizers();
let s3_key_prefix = partition_key.key_prefix.clone();

// Key prefix will already contain topic name since we're extracting it from path and
// not the actual file name
let topic_name = extract_topic_name(&s3_key_prefix);

// TODO: There's a good amount of overlapping code for event logs and we will continue to
// need to update more sinks with this functionality. Might be tricky but might be good to
// refactor/update the base request builder class to minimize this duplication

// Create event metadata here as this is where the list of events are available pre-encoding
// And we want to access this list to process the raw events to see specific field values
let event_log_metadata = VectorEventLogSendMetadata {
// Events are not encoded here yet, so byte size is not yet known
// Setting as 0 here and updating when it is set in build_request()
bytes: 0,
events_len: events.len(),
// Similarly the exact blob isn't determined here yet
blob: "".to_string(),
container: self.bucket.clone(),
topic: topic_name,
count_map: generate_count_map(&events),
};

let metadata = S3Metadata {
partition_key,
s3_key: s3_key_prefix,
count: events.len(),
finalizers,
event_log_metadata: event_log_metadata,
};

(metadata, builder, events)
Expand Down Expand Up @@ -105,15 +129,11 @@ impl RequestBuilder<(S3PartitionKey, Vec<Event>)> for S3RequestOptions {
s3metadata.s3_key = format_s3_key(&s3metadata.s3_key, &filename, &extension);

let body = payload.into_payload();
let topic_name = extract_topic_name(&s3metadata.s3_key);

VectorSendEventMetadata {
bytes: body.len(),
events_len: s3metadata.count,
blob: s3metadata.s3_key.clone(),
container: self.bucket.clone(),
topic: topic_name,
}.emit_sending_event();
// Update some components of the metadata since they've been computed now
s3metadata.event_log_metadata.bytes = body.len();
s3metadata.event_log_metadata.blob = s3metadata.s3_key.clone();
s3metadata.event_log_metadata.emit_sending_event();

S3Request {
body: body,
Expand Down
2 changes: 1 addition & 1 deletion src/sinks/azure_blob/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ impl AzureBlobSinkConfig {
// Returns back the same result so it continues to work downstream
.map_result(|result: StdResult<AzureBlobResponse, _>| {
if let Ok(ref response) = result {
response.send_event_metadata.emit_upload_event();
response.event_log_metadata.emit_upload_event();
}
result
})
Expand Down
33 changes: 24 additions & 9 deletions src/sinks/azure_blob/request_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ use crate::{
azure_common::config::{AzureBlobMetadata, AzureBlobRequest},
util::{
metadata::RequestMetadataBuilder, request_builder::EncodeResult, Compression,
RequestBuilder, vector_event::VectorSendEventMetadata, vector_event::extract_topic_name,
RequestBuilder,
vector_event::{VectorEventLogSendMetadata, extract_topic_name, generate_count_map}
},
},
};
Expand Down Expand Up @@ -48,12 +49,30 @@ impl RequestBuilder<(String, Vec<Event>)> for AzureBlobRequestOptions {
) -> (Self::Metadata, RequestMetadataBuilder, Self::Events) {
let (partition_key, mut events) = input;
let finalizers = events.take_finalizers();

let topic_name = extract_topic_name(&partition_key);

// Create event metadata here as this is where the list of events are available pre-encoding
// And we want to access this list to process the raw events to see specific field values
let event_log_metadata = VectorEventLogSendMetadata {
// Events are not encoded here yet, so byte size is not yet known
// Setting as 0 here and updating when it is set in build_request()
bytes: 0,
events_len: events.len(),
// Similarly the exact blob isn't determined here yet
blob: "".to_string(),
container: self.container_name.clone(),
topic: topic_name,
count_map: generate_count_map(&events),
};

let azure_metadata = AzureBlobMetadata {
partition_key,
container_name: self.container_name.clone(),
count: events.len(),
byte_size: events.estimated_json_encoded_size_of(),
finalizers,
event_log_metadata: event_log_metadata,
};

let builder = RequestMetadataBuilder::from_events(&events);
Expand Down Expand Up @@ -82,15 +101,11 @@ impl RequestBuilder<(String, Vec<Event>)> for AzureBlobRequestOptions {
);

let blob_data = payload.into_payload();
let topic_name = extract_topic_name(&azure_metadata.partition_key);

VectorSendEventMetadata {
bytes: blob_data.len(),
events_len: azure_metadata.count,
blob: azure_metadata.partition_key.clone(),
container: self.container_name.clone(),
topic: topic_name,
}.emit_sending_event();
// Update some components of the metadata since they've been computed now
azure_metadata.event_log_metadata.bytes = blob_data.len();
azure_metadata.event_log_metadata.blob = azure_metadata.partition_key.clone();
azure_metadata.event_log_metadata.emit_sending_event();

AzureBlobRequest {
blob_data,
Expand Down
7 changes: 4 additions & 3 deletions src/sinks/azure_common/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use vector_lib::{
use crate::{
event::{EventFinalizers, EventStatus, Finalizable},
internal_events::{CheckRetryEvent},
sinks::{util::{retries::RetryLogic, vector_event::VectorSendEventMetadata}, Healthcheck},
sinks::{util::{retries::RetryLogic, vector_event::VectorEventLogSendMetadata}, Healthcheck},
};

#[derive(Debug, Clone)]
Expand Down Expand Up @@ -52,6 +52,7 @@ pub struct AzureBlobMetadata {
pub count: usize,
pub byte_size: JsonSize,
pub finalizers: EventFinalizers,
pub event_log_metadata: VectorEventLogSendMetadata,
}

#[derive(Debug, Clone)]
Expand Down Expand Up @@ -86,8 +87,8 @@ pub struct AzureBlobResponse {
pub inner: PutBlockBlobResponse,
pub events_byte_size: GroupedCountByteSize,
pub byte_size: usize,
// Extending S3 response with additional information relevant for vector send event logs
pub send_event_metadata: VectorSendEventMetadata,
// Extending Azure response with additional information relevant for vector send event logs
pub event_log_metadata: VectorEventLogSendMetadata,
}

impl DriverResponse for AzureBlobResponse {
Expand Down
13 changes: 1 addition & 12 deletions src/sinks/azure_common/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ use tracing::Instrument;

use crate::sinks::{
azure_common::config::{AzureBlobRequest, AzureBlobResponse},
util::vector_event::VectorSendEventMetadata,
util::vector_event::extract_topic_name,
};

#[derive(Clone)]
Expand Down Expand Up @@ -52,15 +50,6 @@ impl Service<AzureBlobRequest> for AzureBlobService {
Some(encoding) => blob.content_encoding(encoding),
None => blob,
};
let topic_name = extract_topic_name(&request.metadata.partition_key);

let send_event_metadata = VectorSendEventMetadata {
bytes: byte_size,
events_len: request.metadata.count,
blob: request.metadata.partition_key.clone(),
container: request.metadata.container_name.clone(),
topic: topic_name,
};

let result = blob
.into_future()
Expand All @@ -74,7 +63,7 @@ impl Service<AzureBlobRequest> for AzureBlobService {
.request_metadata
.into_events_estimated_json_encoded_byte_size(),
byte_size,
send_event_metadata,
event_log_metadata: request.metadata.event_log_metadata,
})
})
}
Expand Down
17 changes: 5 additions & 12 deletions src/sinks/s3_common/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ use super::config::S3Options;
use super::partitioner::S3PartitionKey;

use crate::sinks::{
util::vector_event::VectorSendEventMetadata,
util::vector_event::extract_topic_name,
util::vector_event::VectorEventLogSendMetadata
};

#[derive(Debug, Clone)]
Expand Down Expand Up @@ -55,13 +54,14 @@ pub struct S3Metadata {
pub s3_key: String,
pub count: usize,
pub finalizers: EventFinalizers,
pub event_log_metadata: VectorEventLogSendMetadata,
}

#[derive(Debug)]
pub struct S3Response {
pub events_byte_size: GroupedCountByteSize,
// Extending S3 response with additional information relevant for vector send event logs
pub send_event_metadata: VectorSendEventMetadata,
pub event_log_metadata: VectorEventLogSendMetadata,
}

impl DriverResponse for S3Response {
Expand Down Expand Up @@ -130,15 +130,8 @@ impl Service<S3Request> for S3Service {
let events_byte_size = request
.request_metadata
.into_events_estimated_json_encoded_byte_size();
let topic_name = extract_topic_name(&request.metadata.s3_key);

let send_event_metadata = VectorSendEventMetadata {
bytes: request.body.len(),
events_len: request.metadata.count,
blob: request.metadata.s3_key.clone(),
container: request.bucket.clone(),
topic: topic_name,
};
let event_log_metadata = request.metadata.event_log_metadata;

let client = self.client.clone();

Expand All @@ -165,7 +158,7 @@ impl Service<S3Request> for S3Service {

result.map(|_| S3Response {
events_byte_size,
send_event_metadata
event_log_metadata
})
})
}
Expand Down
Loading
Loading