From cf31badaada6568ef5ddd20ddceaa66e35e41741 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Fri, 19 Jan 2024 13:26:10 -0700 Subject: [PATCH 01/64] Initial pass at metrics --- src/include/aerospike/aerospike_stats.h | 19 +- src/include/aerospike/as_cluster.h | 46 ++- src/include/aerospike/as_command.h | 1 + src/include/aerospike/as_event_internal.h | 4 +- src/include/aerospike/as_metrics.h | 189 +++++++++ src/include/aerospike/as_node.h | 17 +- src/main/aerospike/aerospike_batch.c | 2 + src/main/aerospike/aerospike_key.c | 4 + src/main/aerospike/aerospike_query.c | 4 + src/main/aerospike/aerospike_scan.c | 2 + src/main/aerospike/aerospike_stats.c | 19 +- src/main/aerospike/as_cluster.c | 97 ++++- src/main/aerospike/as_command.c | 10 +- src/main/aerospike/as_event.c | 10 +- src/main/aerospike/as_metrics.c | 479 ++++++++++++++++++++++ src/main/aerospike/as_node.c | 23 +- src/main/aerospike/as_pipe.c | 4 +- vs/UpgradeLog.htm | 297 ++++++++++++++ vs/aerospike/aerospike.vcxproj | 2 + vs/aerospike/aerospike.vcxproj.filters | 6 + 20 files changed, 1205 insertions(+), 30 deletions(-) create mode 100644 src/include/aerospike/as_metrics.h create mode 100644 src/main/aerospike/as_metrics.c create mode 100644 vs/UpgradeLog.htm diff --git a/src/include/aerospike/aerospike_stats.h b/src/include/aerospike/aerospike_stats.h index 431f12266e..47b3ff1476 100644 --- a/src/include/aerospike/aerospike_stats.h +++ b/src/include/aerospike/aerospike_stats.h @@ -90,7 +90,7 @@ typedef struct as_node_stats_s { /** * Node error count within current window. */ - uint32_t error_count; + uint32_t error_rate_count; } as_node_stats; @@ -144,6 +144,11 @@ typedef struct as_cluster_stats_s { */ uint32_t thread_pool_queued_tasks; + /** + * Count of transaction retries since cluster was started. + */ + uint64_t retry_count; + } as_cluster_stats; struct as_cluster_s; @@ -242,6 +247,18 @@ aerospike_event_loop_stats(as_event_loop* event_loop, as_event_loop_stats* stats AS_EXTERN char* aerospike_stats_to_string(as_cluster_stats* stats); +/** + * Enable extended periodic cluster and node latency metrics. + */ +AS_EXTERN void +aerospike_enable_metrics(aerospike* as, const struct as_policy_metrics_s* policy); + +/** + * Disable extended periodic cluster and node latency metrics. + */ +AS_EXTERN void +aerospike_disable_metrics(aerospike* as); + #ifdef __cplusplus } // end extern "C" #endif diff --git a/src/include/aerospike/as_cluster.h b/src/include/aerospike/as_cluster.h index b48b2568a8..9713c785d7 100644 --- a/src/include/aerospike/as_cluster.h +++ b/src/include/aerospike/as_cluster.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -379,6 +380,19 @@ typedef struct as_cluster_s { * Should continue to tend cluster. */ volatile bool valid; + + bool metrics_enabled; + + as_policy_metrics* metrics_policy; + + as_metrics_callbacks* metrics_callbacks; + + uint64_t retry_count; + + uint64_t tran_count; + + uint64_t delay_queue_timeout_count; + } as_cluster; /****************************************************************************** @@ -518,6 +532,18 @@ as_partition_shm_get_node( as_node* prev_node, as_policy_replica replica, uint8_t replica_size, uint8_t* replica_index ); +void +as_cluster_enable_metrics(as_cluster* cluster, as_policy_metrics* policy); + +void +as_cluster_disable_metrics(as_cluster* cluster); + +void +as_cluster_add_tran(as_cluster* cluster); + +uint64_t +as_cluster_get_tran_count(const as_cluster* cluster); + /** * @private * Get mapped node given partition and replica. This function does not reserve the node. @@ -544,10 +570,10 @@ as_partition_get_node( * Increment node's error count. */ static inline void -as_node_incr_error_count(as_node* node) +as_node_incr_error_rate(as_node* node) { if (node->cluster->max_error_rate > 0) { - as_incr_uint32(&node->error_count); + as_incr_uint32(&node->error_rate_count); } } @@ -556,9 +582,9 @@ as_node_incr_error_count(as_node* node) * Reset node's error count. */ static inline void -as_node_reset_error_count(as_node* node) +as_node_reset_error_rate_count(as_node* node) { - as_store_uint32(&node->error_count, 0); + as_store_uint32(&node->error_rate_count, 0); } /** @@ -566,9 +592,9 @@ as_node_reset_error_count(as_node* node) * Get node's error count. */ static inline uint32_t -as_node_get_error_count(as_node* node) +as_node_get_error_rate(as_node* node) { - return as_load_uint32(&node->error_count); + return as_load_uint32(&node->error_rate_count); } /** @@ -576,10 +602,10 @@ as_node_get_error_count(as_node* node) * Validate node's error count. */ static inline bool -as_node_valid_error_count(as_node* node) +as_node_valid_error_rate(as_node* node) { uint32_t max = node->cluster->max_error_rate; - return max == 0 || max >= as_load_uint32(&node->error_count); + return max == 0 || max >= as_load_uint32(&node->error_rate_count); } /** @@ -590,7 +616,7 @@ static inline void as_node_close_conn_error(as_node* node, as_socket* sock, as_conn_pool* pool) { as_node_close_connection(node, sock, pool); - as_node_incr_error_count(node); + as_node_incr_error_rate(node); } /** @@ -601,7 +627,7 @@ static inline void as_node_put_conn_error(as_node* node, as_socket* sock) { as_node_put_connection(node, sock); - as_node_incr_error_count(node); + as_node_incr_error_rate(node); } #ifdef __cplusplus diff --git a/src/include/aerospike/as_command.h b/src/include/aerospike/as_command.h index c81806197c..7c6c30475e 100644 --- a/src/include/aerospike/as_command.h +++ b/src/include/aerospike/as_command.h @@ -190,6 +190,7 @@ typedef struct as_command_s { uint8_t replica_size; uint8_t replica_index; uint8_t replica_index_sc; // Used in batch only. + as_latency_type latency_type; } as_command; /** diff --git a/src/include/aerospike/as_event_internal.h b/src/include/aerospike/as_event_internal.h index 52b4fe7413..811dfba9f6 100644 --- a/src/include/aerospike/as_event_internal.h +++ b/src/include/aerospike/as_event_internal.h @@ -761,7 +761,7 @@ as_event_release_async_connection(as_event_command* cmd) { as_async_conn_pool* pool = &cmd->node->async_conn_pools[cmd->event_loop->index]; as_event_release_connection(cmd->conn, pool); - as_node_incr_error_count(cmd->node); + as_node_incr_error_rate(cmd->node); } static inline void @@ -783,7 +783,7 @@ as_event_connection_timeout(as_event_command* cmd, as_async_conn_pool* pool) if (conn->watching > 0) { as_event_stop_watcher(cmd, conn); as_event_release_connection(conn, pool); - as_node_incr_error_count(cmd->node); + as_node_incr_error_rate(cmd->node); } else { cf_free(conn); diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h new file mode 100644 index 0000000000..d5c36b6f80 --- /dev/null +++ b/src/include/aerospike/as_metrics.h @@ -0,0 +1,189 @@ +/* + * Copyright 2008-2023 Aerospike, Inc. + * + * Portions may be licensed to Aerospike, Inc. under one or more contributor + * license agreements. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +#pragma once + +#include +#include +#include +#include + +#if !defined(_MSC_VER) +#include +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/****************************************************************************** +* MACROS +*****************************************************************************/ + +#define NS_TO_MS 1000000 +#define MIN_FILE_SIZE 1000000 +#define UTC_STR_LEN 72 + +/****************************************************************************** + * TYPES + *****************************************************************************/ + +typedef enum as_latency_type_e { + AS_LATENCY_TYPE_CONN, //as_queue or as_async_conn_pool? + AS_LATENCY_TYPE_WRITE, + AS_LATENCY_TYPE_READ, + AS_LATENCY_TYPE_BATCH, + AS_LATENCY_TYPE_QUERY, + AS_LATENCY_TYPE_NONE +} as_latency_type; + +typedef struct as_latency_buckets_s { + int32_t latency_shift; + + int32_t latency_columns; + + uint64_t* buckets; +} as_latency_buckets; + +struct as_metrics_callbacks_s; + +/** +* Metrics Policy +*/ +typedef struct as_policy_metrics_s { + const char* report_directory; + + int64_t report_size_limit; // default 0 + + int32_t interval; // default 30 + + int32_t latency_columns; // default 7 + + int32_t latency_shift; // default 1 + + struct as_metrics_callbacks_s* metrics_callbacks; + + FILE* file; +} as_policy_metrics; + +struct as_cluster_s; +struct as_node_s; + +typedef void (*as_metrics_enable_callback)(struct as_policy_metrics_s* policy); + +typedef void (*as_metrics_snapshot_callback)(const struct as_policy_metrics_s* policy, const struct as_cluster_s* cluster); + +typedef void (*as_metrics_node_close_callback)(const struct as_policy_metrics_s* policy, const struct as_node_s* node); + +typedef void (*as_metrics_disable_callback)(struct as_policy_metrics_s* policy, const struct as_cluster_s* cluster); + +typedef struct as_metrics_callbacks_s { + as_metrics_enable_callback enable_callback; + as_metrics_snapshot_callback snapshot_callback; + as_metrics_node_close_callback node_close_callback; + as_metrics_disable_callback disable_callback; +} as_metrics_callbacks; + +typedef struct as_node_metrics_s { + as_latency_buckets* latency; +} as_node_metrics; + +const char* +utc_time_str(time_t t); + +void +as_metrics_policy_init(as_policy_metrics* policy); + +char* +as_latency_type_to_string(as_latency_type type); + +void +as_metrics_latency_buckets_init(as_latency_buckets* latency_buckets, int32_t latencyColumns, int32_t latencyShift); + +uint64_t +as_metrics_get_bucket(as_latency_buckets* buckets, uint32_t i); + +void +as_metrics_latency_buckets_add(as_latency_buckets* latency_buckets, uint64_t elapsed); + +uint32_t +as_metrics_get_index(as_latency_buckets* latency_buckets, uint64_t elapsed_nanos); + +void +as_node_metrics_init(as_node_metrics* node_metrics, const as_policy_metrics* policy); + +void +as_metrics_add_latency(as_node_metrics* node_metrics, as_latency_type latency_type, uint64_t elapsed); + +void +as_metrics_callbacks_init(as_metrics_callbacks* callbacks); + +void +as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem); + +void +as_metrics_write_cluster(struct as_policy_metrics_s* policy, const struct as_cluster_s* cluster); + +void +as_metrics_write_node(as_string_builder* sb, struct as_node_stats_s* node_stats); + +void +as_metrics_write_conn(as_string_builder* sb, struct as_conn_stats_s* conn_stats); + +#if defined(__linux__) +void +as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem); + +void +as_metrics_proc_stat_mem_cpu(double* vm_usage, double* resident_set, double* cpu_usage); +#endif + +#if defined(_MSC_VER) + +void +as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem); + +static double +as_metrics_calculate_cpu_load(uint64_t idleTicks, uint64_t totalTicks); + +static uint64_t +as_metrics_file_time_to_uint_64(const FILETIME ft); + +double +as_metrics_process_cpu_load(); + +double +as_metrics_process_mem_usage(); + +#endif + +#if defined(__APPLE__) + +void +as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem); + +double +as_metrics_process_mem_usage(); + +double +as_metrics_process_cpu_load(); +#endif + +#ifdef __cplusplus +} // end extern "C" +#endif \ No newline at end of file diff --git a/src/include/aerospike/as_node.h b/src/include/aerospike/as_node.h index 1bab9c76ab..743f385476 100644 --- a/src/include/aerospike/as_node.h +++ b/src/include/aerospike/as_node.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -297,6 +298,8 @@ typedef struct as_node_s { */ as_racks* racks; + as_node_metrics* metrics; + /** * Socket used exclusively for cluster tend thread info requests. */ @@ -320,7 +323,11 @@ typedef struct as_node_s { /** * Error count for this node's error_rate_window. */ - uint32_t error_count; + uint32_t error_rate_count; + + uint64_t error_count; + + uint64_t timeout_count; /** * Server's generation count for peers. @@ -637,6 +644,14 @@ as_node_signal_login(as_node* node); bool as_node_has_rack(as_node* node, const char* ns, int rack_id); +typedef enum as_latency_type_e as_latency_type; + +void +as_node_add_latency(as_node* node, as_latency_type latency_type, uint64_t elapsed); + +void +as_node_enable_metrics(as_node* node, as_policy_metrics* policy); + /** * @private * Volatile read session pointer. diff --git a/src/main/aerospike/aerospike_batch.c b/src/main/aerospike/aerospike_batch.c index 5ea0355044..fa6059b325 100644 --- a/src/main/aerospike/aerospike_batch.c +++ b/src/main/aerospike/aerospike_batch.c @@ -1613,6 +1613,8 @@ as_batch_command_init( cmd->buf_size = size; cmd->partition_id = 0; // Not referenced when node set. cmd->replica = task->replica; + cmd->latency_type = AS_LATENCY_TYPE_BATCH; + as_cluster_add_tran(task->cluster); // Note: Do not set flags to AS_COMMAND_FLAGS_LINEARIZE because AP and SC replicas // are tracked separately for batch (cmd->master and cmd->master_sc). diff --git a/src/main/aerospike/aerospike_key.c b/src/main/aerospike/aerospike_key.c index 927f1c80b9..f8572ead5e 100644 --- a/src/main/aerospike/aerospike_key.c +++ b/src/main/aerospike/aerospike_key.c @@ -80,6 +80,8 @@ as_command_init_read( cmd->udata = udata; cmd->buf_size = size; cmd->partition_id = pi->partition_id; + cmd->latency_type = AS_LATENCY_TYPE_READ; + as_cluster_add_tran(cluster); if (pi->sc_mode) { switch (read_mode_sc) { @@ -143,6 +145,8 @@ as_command_init_write( cmd->replica = as_command_write_replica(replica); cmd->replica_size = pi->replica_size; cmd->replica_index = 0; + cmd->latency_type = AS_LATENCY_TYPE_WRITE; + as_cluster_add_tran(cluster); } static inline void diff --git a/src/main/aerospike/aerospike_query.c b/src/main/aerospike/aerospike_query.c index 348d84d3dd..d373ff997a 100644 --- a/src/main/aerospike/aerospike_query.c +++ b/src/main/aerospike/aerospike_query.c @@ -1132,6 +1132,8 @@ as_query_command_execute_old(as_query_task* task) cmd.flags = flags; cmd.replica_size = 1; cmd.replica_index = 0; + cmd.latency_type = AS_LATENCY_TYPE_QUERY; + as_cluster_add_tran(task->cluster); as_command_start_timer(&cmd); @@ -1233,6 +1235,8 @@ as_query_command_execute_new(as_query_task* task) cmd.flags = flags; cmd.replica_size = 1; cmd.replica_index = 0; + cmd.latency_type = AS_LATENCY_TYPE_QUERY; + as_cluster_add_tran(task->cluster); as_command_start_timer(&cmd); diff --git a/src/main/aerospike/aerospike_scan.c b/src/main/aerospike/aerospike_scan.c index b303819c96..884b502c2b 100644 --- a/src/main/aerospike/aerospike_scan.c +++ b/src/main/aerospike/aerospike_scan.c @@ -687,6 +687,8 @@ as_scan_command_execute(as_scan_task* task) cmd.flags = AS_COMMAND_FLAGS_READ; cmd.replica_size = 1; cmd.replica_index = 0; + cmd.latency_type = AS_LATENCY_TYPE_QUERY; + as_cluster_add_tran(task->cluster); as_command_start_timer(&cmd); diff --git a/src/main/aerospike/aerospike_stats.c b/src/main/aerospike/aerospike_stats.c index 1d48cf6e7e..34d8868275 100644 --- a/src/main/aerospike/aerospike_stats.c +++ b/src/main/aerospike/aerospike_stats.c @@ -110,6 +110,7 @@ aerospike_cluster_stats(as_cluster* cluster, as_cluster_stats* stats) // cf_queue applies locks, so we are safe here. stats->thread_pool_queued_tasks = cf_queue_sz(cluster->thread_pool.dispatch_queue); + stats->retry_count = cluster->retry_count; } void @@ -133,7 +134,7 @@ aerospike_node_stats(as_node* node, as_node_stats* stats) { as_node_reserve(node); // Released in aerospike_node_stats_destroy() stats->node = node; - stats->error_count = as_node_get_error_count(node); + stats->error_rate_count = as_node_get_error_rate(node); as_sum_init(&stats->sync); as_sum_init(&stats->async); @@ -184,7 +185,7 @@ aerospike_stats_to_string(as_cluster_stats* stats) as_conn_stats_tostring(&sb, "pipeline", &node_stats->pipeline); as_string_builder_append_newline(&sb); as_string_builder_append(&sb, "error count: "); - as_string_builder_append_uint(&sb, node_stats->error_count); + as_string_builder_append_uint(&sb, node_stats->error_rate_count); as_string_builder_append_newline(&sb); } @@ -207,3 +208,17 @@ aerospike_stats_to_string(as_cluster_stats* stats) } return sb.data; } + +void +aerospike_enable_metrics(aerospike* as, const as_policy_metrics* policy) +{ + as_cluster* cluster = as->cluster; + as_cluster_enable_metrics(cluster, policy); +} + +void +aerospike_disable_metrics(aerospike* as) +{ + as_cluster* cluster = as->cluster; + as_cluster_disable_metrics(cluster); +} diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index cbb7e3cf21..cd2214d7f6 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -554,6 +554,86 @@ as_cluster_remove_nodes_copy(as_cluster* cluster, as_vector* /* */ no as_vector_append(cluster->gc, &item); } +void +as_cluster_enable_metrics(as_cluster* cluster, as_policy_metrics* policy) +{ + if (cluster->metrics_enabled) + { + cluster->metrics_callbacks->disable_callback(policy, cluster); + } + + cluster->metrics_callbacks = policy->metrics_callbacks; + if (cluster->metrics_callbacks == NULL) + { + as_metrics_callbacks_init(cluster->metrics_callbacks); + } + + cluster->metrics_policy = policy; + + as_nodes* nodes = cluster->nodes; + for (uint32_t i = 0; i < nodes->size; i++) { + as_node* node = nodes->array[i]; + as_node_enable_metrics(node, policy); + } + + cluster->metrics_callbacks->enable_callback(policy); +} + +void +as_cluster_disable_metrics(as_cluster* cluster) +{ + if (cluster->metrics_enabled) + { + cluster->metrics_enabled = false; + cluster->metrics_callbacks->disable_callback(cluster->metrics_policy, cluster); + } +} + +void +as_cluster_add_tran(as_cluster* cluster) +{ + if (cluster->metrics_enabled) + { + as_incr_uint64(&cluster->tran_count); + } +} + +uint64_t +as_cluster_get_tran_count(const as_cluster* cluster) +{ + return as_load_uint64(&cluster->tran_count); +} + +void +as_cluster_add_retry(as_cluster* cluster) +{ + as_incr_uint64(&cluster->retry_count); +} + +void +as_cluster_add_retries(as_cluster* cluster, uint32_t count) +{ + as_faa_uint64(&cluster->retry_count, count); +} + +uint64_t +as_cluster_get_retry_count(const as_cluster* cluster) +{ + return as_load_uint64(&cluster->retry_count); +} + +void +as_cluster_add_delay_queue_timeout(as_cluster* cluster) +{ + as_incr_uint64(&cluster->delay_queue_timeout_count); +} + +uint64_t +as_cluster_get_delay_queue_timeout_count(const as_cluster* cluster) +{ + return as_load_uint64(&cluster->delay_queue_timeout_count); +} + static void as_cluster_remove_nodes(as_cluster* cluster, as_vector* /* */ nodes_to_remove) { @@ -565,6 +645,10 @@ as_cluster_remove_nodes(as_cluster* cluster, as_vector* /* */ nodes_t for (uint32_t i = 0; i < nodes_to_remove->size; i++) { as_node* node = as_vector_get_ptr(nodes_to_remove, i); as_node_deactivate(node); + + if (cluster->metrics_enabled) { + cluster->metrics_callbacks->node_close_callback(node->cluster->metrics_policy, node); + } } // Remove all nodes at once to avoid copying entire array multiple times. @@ -635,12 +719,12 @@ as_cluster_balance_connections(as_cluster* cluster) } static void -as_cluster_reset_error_count(as_cluster* cluster) +as_cluster_reset_error_rate(as_cluster* cluster) { as_nodes* nodes = cluster->nodes; for (uint32_t i = 0; i < nodes->size; i++) { - as_node_reset_error_count(nodes->array[i]); + as_node_reset_error_rate_count(nodes->array[i]); } } @@ -656,7 +740,7 @@ as_cluster_manage(as_cluster* cluster) // Reset connection error window for all nodes every error_rate_window tend iterations. if (cluster->max_error_rate > 0 && cluster->tend_count % cluster->error_rate_window == 0) { - as_cluster_reset_error_count(cluster); + as_cluster_reset_error_rate(cluster); } } @@ -841,7 +925,7 @@ as_cluster_tend(as_cluster* cluster, as_error* err, bool is_init) } } - cluster->invalid_node_count = as_peers_invalid_count(&peers); + cluster->invalid_node_count += as_peers_invalid_count(&peers); // Refresh partition map when necessary. for (uint32_t i = 0; i < nodes->size; i++) { @@ -882,6 +966,11 @@ as_cluster_tend(as_cluster* cluster, as_error* err, bool is_init) as_incr_uint32(&cluster->shm_info->cluster_shm->rebalance_gen); } + if (cluster->metrics_enabled && (cluster->tend_count % cluster->metrics_policy->interval)) + { + cluster->metrics_callbacks->snapshot_callback(cluster->metrics_policy, cluster); + } + as_cluster_destroy_peers(&peers); as_cluster_manage(cluster); return AEROSPIKE_OK; diff --git a/src/main/aerospike/as_command.c b/src/main/aerospike/as_command.c index 5c80e014d6..a06d91727e 100644 --- a/src/main/aerospike/as_command.c +++ b/src/main/aerospike/as_command.c @@ -590,6 +590,8 @@ as_command_execute(as_command* cmd, as_error* err) as_node* node = NULL; as_status status; bool release_node; + as_latency_type latency_type = cmd->cluster->metrics_enabled ? cmd->latency_type : AS_LATENCY_TYPE_NONE; + uint64_t begin = cf_getns(); // Execute command until successful, timed out or maximum iterations have been reached. while (true) { @@ -615,7 +617,7 @@ as_command_execute(as_command* cmd, as_error* err) release_node = true; } - if (! as_node_valid_error_count(node)) { + if (! as_node_valid_error_rate(node)) { status = as_error_set_message(err, AEROSPIKE_MAX_ERROR_RATE, "Max error rate exceeded"); goto Retry; } @@ -705,6 +707,12 @@ as_command_execute(as_command* cmd, as_error* err) // Put connection back in pool. as_node_put_connection(node, &socket); + + if (latency_type != AS_LATENCY_TYPE_NONE) + { + uint64_t elapsed = cf_getns() - begin; + as_node_add_latency(node, latency_type, elapsed); + } // Release resources. if (release_node) { diff --git a/src/main/aerospike/as_event.c b/src/main/aerospike/as_event.c index 4ad1b27b91..b984b87f35 100644 --- a/src/main/aerospike/as_event.c +++ b/src/main/aerospike/as_event.c @@ -611,7 +611,7 @@ as_event_command_begin(as_event_loop* event_loop, as_event_command* cmd) as_node_reserve(cmd->node); } - if (! as_node_valid_error_count(cmd->node)) { + if (! as_node_valid_error_rate(cmd->node)) { event_loop->errors++; if (as_event_command_retry(cmd, true)) { @@ -648,7 +648,7 @@ as_event_command_begin(as_event_loop* event_loop, as_event_command* cmd) if (len != 0) { as_log_debug("Invalid async socket from pool: %d", len); as_event_release_connection(&conn->base, pool); - as_node_incr_error_count(cmd->node); + as_node_incr_error_rate(cmd->node); continue; } @@ -1264,7 +1264,7 @@ as_event_response_error(as_event_command* cmd, as_error* err) case AEROSPIKE_ERR_CLUSTER: case AEROSPIKE_ERR_DEVICE_OVERLOAD: as_event_put_connection(cmd, pool); - as_node_incr_error_count(cmd->node); + as_node_incr_error_rate(cmd->node); break; case AEROSPIKE_ERR_QUERY_ABORTED: @@ -1275,7 +1275,7 @@ as_event_response_error(as_event_command* cmd, as_error* err) case AEROSPIKE_ERR_CLIENT: case AEROSPIKE_NOT_AUTHENTICATED: as_event_release_connection(cmd->conn, pool); - as_node_incr_error_count(cmd->node); + as_node_incr_error_rate(cmd->node); break; default: @@ -1794,7 +1794,7 @@ as_event_balance_connections_node(as_event_loop* event_loop, as_cluster* cluster // Do not close idle pipeline connections because pipelines work better with a stable // number of connections. } - else if (excess < 0 && as_node_valid_error_count(node)) { + else if (excess < 0 && as_node_valid_error_rate(node)) { create_connections(event_loop, node, pool, -excess); } } diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c new file mode 100644 index 0000000000..92811c393b --- /dev/null +++ b/src/main/aerospike/as_metrics.c @@ -0,0 +1,479 @@ +/* + * Copyright 2008-2023 Aerospike, Inc. + * + * Portions may be licensed to Aerospike, Inc. under one or more contributor + * license agreements. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +#include +#include +#include +#include +#include + + /****************************************************************************** + * FUNCTIONS + *****************************************************************************/ + +const char* +utc_time_str(time_t t) +{ + static char buf[UTC_STR_LEN + 1]; + struct tm* utc = gmtime(&t); + snprintf(buf, sizeof(buf), + "%4d-%02d-%02dT%02d:%02d:%02dZ", + 1900 + utc->tm_year, utc->tm_mon + 1, utc->tm_mday, + utc->tm_hour, utc->tm_min, utc->tm_sec); + return buf; +} + + +void +as_metrics_policy_init(as_policy_metrics* policy) +{ + policy->report_size_limit = 0; + policy->interval = 30; + policy->latency_columns = 7; + policy->latency_shift = 1; + policy->file = NULL; +} + +char* +as_latency_type_to_string(as_latency_type type) +{ + switch (type) + { + case AS_LATENCY_TYPE_CONN: + return "conn"; + break; + case AS_LATENCY_TYPE_WRITE: + return "write"; + break; + case AS_LATENCY_TYPE_READ: + return "read"; + break; + case AS_LATENCY_TYPE_BATCH: + return "batch"; + break; + case AS_LATENCY_TYPE_QUERY: + return "query"; + break; + case AS_LATENCY_TYPE_NONE: + return "none"; + break; + default: + return "none"; + break; + } +} + +void +as_metrics_latency_buckets_init(as_latency_buckets* latency_buckets, int32_t latency_columns, int32_t latency_shift) +{ + latency_buckets->latency_columns = latency_columns; + latency_buckets->latency_shift = latency_shift; + latency_buckets->buckets = cf_malloc(sizeof(uint64_t) * latency_columns); +} + +uint64_t +as_metrics_get_bucket(as_latency_buckets* buckets, uint32_t i) +{ + return as_load_uint64(&buckets[i]); +} + +void +as_metrics_latency_buckets_add(as_latency_buckets* latency_buckets, uint64_t elapsed) +{ + uint32_t index = as_metrics_get_index(latency_buckets, elapsed); + as_incr_uint64(&latency_buckets->buckets[index]); +} + +uint32_t +as_metrics_get_index(as_latency_buckets* latency_buckets, uint64_t elapsed_nanos) +{ + // Convert nanoseconds to milliseconds. + uint64_t elapsed = elapsed_nanos / NS_TO_MS; + + // Round up elapsed to nearest millisecond. + if ((elapsed_nanos - (elapsed * NS_TO_MS)) > 0) + { + elapsed++; + } + + uint32_t last_bucket = latency_buckets->latency_columns - 1; + uint64_t limit = 1; + + for (uint32_t i = 0; i < last_bucket; i++) + { + if (elapsed <= limit) + { + return i; + } + limit <<= latency_buckets->latency_shift; + } + + return last_bucket; +} + +void +as_node_metrics_init(as_node_metrics* node_metrics, const as_policy_metrics* policy) +{ + uint32_t max_latency_type = AS_LATENCY_TYPE_NONE; + node_metrics->latency = cf_malloc(sizeof(as_latency_buckets) * max_latency_type); + for (uint32_t i = 0; i < max_latency_type; i++) + { + as_metrics_latency_buckets_init(&node_metrics->latency[i], policy->latency_columns, policy->latency_shift); + } +} + +void +as_metrics_add_latency(as_node_metrics* node_metrics, as_latency_type latency_type, uint64_t elapsed) +{ + as_metrics_latency_buckets_add(&node_metrics->latency[latency_type], elapsed); +} + +void +as_metrics_enable(struct as_policy_metrics_s* policy) +{ + if (policy->report_size_limit != 0 && policy->report_size_limit < MIN_FILE_SIZE) + { + // error + } + + // create file directory + policy->file = fopen(policy->report_directory, "w"); + + as_string_builder sb; + as_string_builder_inita(&sb, 25, true); + as_string_builder_append(&sb, utc_time_str(time(NULL))); + as_string_builder_append(&sb, " header(1)"); + as_string_builder_append(&sb, " cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]]"); + as_string_builder_append(&sb, " eventloop[processSize,queueSize]"); + as_string_builder_append(&sb, " node[name,address,port,syncConn,asyncConn,errors,timeouts,latency[]]"); + as_string_builder_append(&sb, " conn[inUse,inPool,opened,closed]"); + as_string_builder_append(&sb, " latency("); + as_string_builder_append(&sb, policy->latency_columns); + as_string_builder_append(&sb, ','); + as_string_builder_append(&sb, policy->latency_shift); + as_string_builder_append(&sb, ')'); + as_string_builder_append(&sb, "[type[l1,l2,l3...]]"); + fprintf(policy->file, "\n"); + fprintf(policy->file, sb.data); +} + +void +as_metrics_snapshot(const struct as_policy_metrics_s* policy, const struct as_cluster_s* cluster) +{ + if (policy->file != NULL) + { + as_metrics_write_cluster(policy, cluster); + } +} + +void +as_metrics_node_close(const struct as_policy_metrics_s* policy, const struct as_node_s* node) +{ + // write node info to file + if (policy->file != NULL) + { + as_string_builder sb; + as_string_builder_inita(&sb, 25, true); + as_string_builder_append(&sb, utc_time_str(time(NULL))); + as_metrics_write_node(&sb, node); + fprintf(policy->file, "\n"); + fprintf(policy->file, sb.data); + } +} + +void +as_metrics_disable(struct as_policy_metrics_s* policy, const struct as_cluster_s* cluster) +{ + // write cluster into to file, disable + if (policy->file != NULL) + { + as_metrics_write_cluster(policy, cluster); + fclose(policy->file); + policy->file = NULL; + } +} + +void +as_metrics_callbacks_init(as_metrics_callbacks* callbacks) +{ + callbacks->enable_callback = as_metrics_enable; + callbacks->disable_callback = as_metrics_disable; + callbacks->node_close_callback = as_metrics_node_close; + callbacks->snapshot_callback = as_metrics_snapshot; +} + +void +as_metrics_write_cluster(struct as_policy_metrics_s* policy, const struct as_cluster_s* cluster) { + char* cluster_name = cluster->cluster_name; + if (cluster_name == NULL) + { + cluster_name = ""; + } + + double* cpu_load; + double* mem; + as_metrics_process_cpu_load_mem_usage(cpu_load, mem); + as_cluster_stats* stats; + aerospike_cluster_stats(cluster, stats); + + as_string_builder sb; + as_string_builder_inita(&sb, 10, true); + as_string_builder_append(&sb, utc_time_str(time(NULL))); + as_string_builder_append(&sb, " cluster["); + as_string_builder_append(&sb, cluster_name); + as_string_builder_append(&sb, ','); + as_string_builder_append(&sb, (int)cpu_load); + as_string_builder_append(&sb, ','); + as_string_builder_append(&sb, mem); + as_string_builder_append(&sb, ','); + as_string_builder_append(&sb, cluster->invalid_node_count); // Cumulative. Not reset on each interval. + as_string_builder_append(&sb, ','); + as_string_builder_append(&sb, as_cluster_get_tran_count(cluster)); // Cumulative. Not reset on each interval. + as_string_builder_append(&sb, ','); + as_string_builder_append(&sb, cluster->retry_count); // Cumulative. Not reset on each interval. + as_string_builder_append(&sb, ','); + as_string_builder_append(&sb, cluster->delay_queue_timeout_count); // Cumulative. Not reset on each interval. + as_string_builder_append(&sb, ",["); + + as_node_stats* nodes = stats->nodes; + for (uint32_t i = 0; i < stats->nodes_size; i++) { + as_node_stats* node = &stats->nodes[i]; + if (i > 0) { + as_string_builder_append(&sb, ","); + } + as_metrics_write_node(&sb, node); + } + as_string_builder_append(&sb, "]]"); + + fprintf(policy->file, "\n"); + fprintf(policy->file, sb.data); +} + +void +as_metrics_write_node(as_string_builder* sb, struct as_node_stats_s* node_stats) +{ + as_node* node = node_stats->node; + as_string_builder_append(&sb, '['); + as_string_builder_append(&sb, node->name); + as_string_builder_append(&sb, ','); + + //as_host* host = node-> TODO: how to get host from node? it is in node_info + + //as_string_builder_append(&sb, host->name); + //as_string_builder_append(&sb, ','); + //as_string_builder_append(&sb, host->port); + //as_string_builder_append(&sb, ','); + + as_metrics_write_conn(sb, &node_stats->sync); + as_string_builder_append(&sb, ','); + as_metrics_write_conn(sb, &node_stats->async); + as_string_builder_append(&sb, ','); + + as_string_builder_append(&sb, node->error_count); + as_string_builder_append(&sb, ','); + as_string_builder_append(&sb, node->timeout_count); + as_string_builder_append(&sb, ',['); + + as_node_metrics* node_metrics = node->metrics; + uint32_t max = AS_LATENCY_TYPE_NONE; + + + for (uint32_t i = 0; i < max; i++) { + if (i > 0) { + as_string_builder_append(&sb, ","); + } + as_string_builder_append(&sb, as_latency_type_to_string(i)); + as_string_builder_append(&sb, '['); + + as_latency_buckets* buckets = &node_metrics->latency[i]; + uint32_t bucket_max = buckets->latency_columns; + + for (uint32_t j = 0; j < bucket_max; j++) { + if (j > 0) { + as_string_builder_append(&sb, ','); + } + as_string_builder_append(&sb, as_metrics_get_bucket(&buckets, i)); + } + as_string_builder_append(&sb, ']'); + } + as_string_builder_append(&sb, ']]'); +} + +void +as_metrics_write_conn(as_string_builder* sb, struct as_conn_stats_s* conn_stats) +{ + as_string_builder_append(&sb, conn_stats->in_use); + as_string_builder_append(&sb, ','); + as_string_builder_append(&sb, conn_stats->in_pool); + as_string_builder_append(&sb, ','); + as_string_builder_append(&sb, conn_stats->opened); // Cumulative. Not reset on each interval. + as_string_builder_append(&sb, ','); + as_string_builder_append(&sb, conn_stats->closed); // Cumulative. Not reset on each interval. +} + +#if defined(__linux__) +void +as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem) +{ + double resident_set; + as_metrics_proc_stat_mem_cpu(mem, resident_set, cpu_usage); +} + +void +as_metrics_proc_stat_mem_cpu(double* vm_usage, double* resident_set, double* cpu_usage) +{ + using std::ios_base; + using std::ifstream; + using std::string; + + vm_usage = 0.0; + resident_set = 0.0; + + ifstream stat_stream("/proc/self/stat", ios_base::in); + + // dummy vars for leading entries in stat that we don't care about + string pid, comm, state, ppid, pgrp, session, tty_nr; + string tpgid, flags, minflt, cminflt, majflt, cmajflt; + string cutime, cstime, priority, nice; + string O, itrealvalue; + + // the fields we want + uint64_t utime, stime, starttime; + uint64_t vsize; + int64_t rss; + + stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr + >> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt + >> utime >> stime >> cutime >> cstime >> priority >> nice + >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest + + stat_stream.close(); + + int64_t page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages + vm_usage = vsize / 1024.0; + resident_set = rss * page_size_kb; + + uint64_t u_time_sec = utime / sysconf(_SC_CLK_TCK); + uint64_t s_time_sec = stime / sysconf(_SC_CLK_TCK); + uint64_t start_time_sec = starttime / sysconf(_SC_CLK_TCK); + + cpu_usage = (u_time_sec + s_time_sec) / (cf_get_seconds() - start_time_sec); +} +#endif + +#if defined(_MSC_VER) +#include + +void +as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem) +{ + *cpu_usage = as_metrics_process_cpu_load(); + *mem = as_metrics_process_mem_usage(); +} + +static double +as_metrics_calculate_cpu_load(uint64_t idleTicks, uint64_t totalTicks) +{ + static uint64_t _previousTotalTicks = 0; + static uint64_t _previousIdleTicks = 0; + + uint64_t totalTicksSinceLastTime = totalTicks - _previousTotalTicks; + uint64_t idleTicksSinceLastTime = idleTicks - _previousIdleTicks; + + double ret = 1.0f - ((totalTicksSinceLastTime > 0) ? ((double)idleTicksSinceLastTime) / totalTicksSinceLastTime : 0); + + _previousTotalTicks = totalTicks; + _previousIdleTicks = idleTicks; + return ret; +} + +static uint64_t +as_metrics_file_time_to_uint_64(const FILETIME ft) +{ + return (((uint64_t)(ft.dwHighDateTime)) << 32) | ((uint64_t)ft.dwLowDateTime); +} + +// Returns 1.0f for "CPU fully pinned", 0.0f for "CPU idle", or somewhere in between +// You'll need to call this at regular intervals, since it measures the load between +// the previous call and the current one. Returns -1.0 on error. +double +as_metrics_process_cpu_load() +{ + FILETIME idleTime, kernelTime, userTime; + return GetSystemTimes(&idleTime, &kernelTime, &userTime) ? + as_metrics_calculate_cpu_load(as_metrics_file_time_to_uint_64(idleTime), as_metrics_file_time_to_uint_64(kernelTime) + as_metrics_file_time_to_uint_64(userTime)) * 100: -1.0f; +} + +double +as_metrics_process_mem_usage() +{ + MEMORYSTATUSEX statex; + + statex.dwLength = sizeof(statex); + + GlobalMemoryStatusEx(&statex); + return statex.ullTotalVirtual - statex.ullAvailVirtual; +} + +#endif + +#if defined(__APPLE__) +#include +#include +#include + +void +as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem) +{ + *cpu_usage = as_metrics_process_cpu_load(); + *mem = as_metrics_process_mem_usage(); +} + +double +as_metrics_process_mem_usage() +{ + struct task_basic_info t_info; + mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT; + + if (KERN_SUCCESS != task_info(mach_task_self(), + TASK_BASIC_INFO, (task_info_t)&t_info, + &t_info_count)) + { + return -1; + } + + return t_info.virtual_size; +} +double +as_metrics_process_cpu_load() +{ + pid_t pid = getpid(); + as_string_builder sb; + as_string_builder_inita(&sb, 20, true); + as_string_builder_append(&sb, "ps -p "); + as_string_builder_append(&sb, pid); + as_string_builder_append(&sb, " -o %CPU"); + FILE* result = popen(sb.data); + char[5] cpu_holder; + char[6] cpu_percent; + fgets(result, 4, cpu_holder); // %CPU placeholder + fgets(result, 5, cpu_percent); + pclose(result); + + return atof(cpu_percent); +} +#endif \ No newline at end of file diff --git a/src/main/aerospike/as_node.c b/src/main/aerospike/as_node.c index 08fedde8f1..fe41157c71 100644 --- a/src/main/aerospike/as_node.c +++ b/src/main/aerospike/as_node.c @@ -136,11 +136,18 @@ as_node_create(as_cluster* cluster, as_node_info* node_info) node->partition_changed = true; node->rebalance_changed = cluster->rack_aware; + if (cluster->metrics_enabled) + { + as_node_metrics_init(node->metrics, cluster->metrics_policy); + } + // Create sync connection pools. node->sync_conn_pools = cf_malloc(sizeof(as_conn_pool) * cluster->conn_pools_per_node); node->sync_conns_opened = 1; node->sync_conns_closed = 0; + node->error_rate_count = 0; node->error_count = 0; + node->timeout_count = 0; node->conn_iter = 0; uint32_t min = cluster->min_conns_per_node / cluster->conn_pools_per_node; @@ -635,7 +642,7 @@ as_node_balance_connections(as_node* node) if (excess > 0) { as_node_close_idle_connections(node, pool, excess); } - else if (excess < 0 && as_node_valid_error_count(node)) { + else if (excess < 0 && as_node_valid_error_rate(node)) { as_node_create_connections(node, pool, timeout_ms, -excess); } } @@ -902,7 +909,7 @@ static void as_node_restart(as_cluster* cluster, as_node* node) { if (cluster->max_error_rate > 0) { - as_node_reset_error_count(node); + as_node_reset_error_rate_count(node); } // Balance sync connections. @@ -1299,6 +1306,18 @@ as_node_parse_racks(as_cluster* cluster, as_error* err, as_node* node, char* buf return AEROSPIKE_OK; } +void +as_node_add_latency(as_node* node, as_latency_type latency_type, uint64_t elapsed) +{ + as_metrics_add_latency(node->metrics, latency_type, elapsed); +} + +void +as_node_enable_metrics(as_node* node, as_policy_metrics* policy) +{ + as_node_metrics_init(node->metrics, policy); +} + static as_status as_node_process_racks(as_cluster* cluster, as_error* err, as_node* node, as_vector* values) { diff --git a/src/main/aerospike/as_pipe.c b/src/main/aerospike/as_pipe.c index be96c4b863..7519e4b204 100644 --- a/src/main/aerospike/as_pipe.c +++ b/src/main/aerospike/as_pipe.c @@ -155,7 +155,7 @@ cancel_connection(as_event_command* cmd, as_error* err, int32_t source, bool ret conn->canceled = true; as_async_conn_pool* pool = &node->pipe_conn_pools[loop->index]; as_event_release_connection((as_event_connection*)conn, pool); - as_node_incr_error_count(node); + as_node_incr_error_rate(node); as_node_release(node); return; } @@ -349,7 +349,7 @@ as_pipe_get_connection(as_event_command* cmd) if (len < 0) { as_log_debug("Invalid pipeline socket from pool: %d", len); release_connection(cmd, conn, pool); - as_node_incr_error_count(cmd->node); + as_node_incr_error_rate(cmd->node); continue; } diff --git a/vs/UpgradeLog.htm b/vs/UpgradeLog.htm new file mode 100644 index 0000000000..0fd4566c0e --- /dev/null +++ b/vs/UpgradeLog.htm @@ -0,0 +1,297 @@ + + + + Migration Report +

+ Migration Report -

Overview

ProjectPathErrorsWarningsMessages
aerospikeaerospike\aerospike.vcxproj100
aerospike-testaerospike-test\aerospike-test.vcxproj100
appendexamples\append\append.vcxproj100
async-batch-getexamples\async-batch-get\async-batch-get.vcxproj100
async-delay-queueexamples\async-delay-queue\async-delay-queue.vcxproj100
async-getexamples\async-get\async-get.vcxproj100
async-queryexamples\async-query\async-query.vcxproj100
async-scanexamples\async-scan\async-scan.vcxproj100
batch-getexamples\batch-get\batch-get.vcxproj100
expireexamples\expire\expire.vcxproj100
generationexamples\generation\generation.vcxproj100
geo-filterexamples\geo-filter\geo-filter.vcxproj100
geo-simpleexamples\geo-simple\geo-simple.vcxproj100
getexamples\get\get.vcxproj100
increxamples\incr\incr.vcxproj100
listexamples\list\list.vcxproj100
mapexamples\map\map.vcxproj100
putexamples\put\put.vcxproj100
queryexamples\query\query.vcxproj100
query-aggregateexamples\query-aggregate\query-aggregate.vcxproj100
scanexamples\scan\scan.vcxproj100
scan-backgroundexamples\scan-background\scan-background.vcxproj100
touchexamples\touch\touch.vcxproj100
udfexamples\udf\udf.vcxproj100
examplesexamples000
Solutionaerospike.sln001

Solution and projects

aerospike

Message
aerospike\aerospike.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

aerospike-test

Message
aerospike-test\aerospike-test.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

append

Message
examples\append\append.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

async-batch-get

Message
examples\async-batch-get\async-batch-get.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

async-delay-queue

Message
examples\async-delay-queue\async-delay-queue.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

async-get

Message
examples\async-get\async-get.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

async-query

Message
examples\async-query\async-query.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

async-scan

Message
examples\async-scan\async-scan.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

batch-get

Message
examples\batch-get\batch-get.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

expire

Message
examples\expire\expire.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

generation

Message
examples\generation\generation.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

geo-filter

Message
examples\geo-filter\geo-filter.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

geo-simple

Message
examples\geo-simple\geo-simple.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

get

Message
examples\get\get.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

incr

Message
examples\incr\incr.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

list

Message
examples\list\list.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

map

Message
examples\map\map.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

put

Message
examples\put\put.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

query

Message
examples\query\query.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

query-aggregate

Message
examples\query-aggregate\query-aggregate.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

scan

Message
examples\scan\scan.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

scan-background

Message
examples\scan-background\scan-background.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

touch

Message
examples\touch\touch.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

udf

Message
examples\udf\udf.vcxproj: + The application which this project type is based on was not found. Please try this link for further information: 8bc9ceb8-8b4a-11d0-8d11-00a0c91bc942

examples

Message
examples logged no messages. +

Solution

Message
+ Show 1 additional messages +
aerospike.sln: + The solution file does not require migration.
+ Hide 1 additional messages +
\ No newline at end of file diff --git a/vs/aerospike/aerospike.vcxproj b/vs/aerospike/aerospike.vcxproj index a8c0728ca9..113721eadc 100644 --- a/vs/aerospike/aerospike.vcxproj +++ b/vs/aerospike/aerospike.vcxproj @@ -398,6 +398,7 @@ + @@ -549,6 +550,7 @@ + diff --git a/vs/aerospike/aerospike.vcxproj.filters b/vs/aerospike/aerospike.vcxproj.filters index da07913095..0d375a0167 100644 --- a/vs/aerospike/aerospike.vcxproj.filters +++ b/vs/aerospike/aerospike.vcxproj.filters @@ -495,6 +495,9 @@ Header Files\lua + + Header Files + @@ -941,6 +944,9 @@ Source Files\lua + + Source Files + From 81f9f4d574194f6ff01913e55b5b6bc5fee8cf56 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Fri, 19 Jan 2024 13:38:45 -0700 Subject: [PATCH 02/64] Add event loop info to file printing --- src/main/aerospike/as_metrics.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 92811c393b..c2daf6446c 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -248,6 +248,21 @@ as_metrics_write_cluster(struct as_policy_metrics_s* policy, const struct as_clu as_string_builder_append(&sb, ','); as_string_builder_append(&sb, cluster->delay_queue_timeout_count); // Cumulative. Not reset on each interval. as_string_builder_append(&sb, ",["); + + as_event_loop_stats* event_loops = stats->event_loops; + for (uint32_t i = 0; i < stats->event_loops_size; i++) + { + as_event_loop_stats* loop = &event_loops[i]; + if (i > 0) { + as_string_builder_append(&sb, ','); + } + as_string_builder_append(&sb, '['); + as_string_builder_append(&sb, loop->process_size); + as_string_builder_append(&sb, ','); + as_string_builder_append(&sb, loop->queue_size); + as_string_builder_append(&sb, ']'); + } + as_string_builder_append(&sb, '],['); as_node_stats* nodes = stats->nodes; for (uint32_t i = 0; i < stats->nodes_size; i++) { From dc50d0f7395d48725c75bede7120b5876a7dc5b0 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Mon, 22 Jan 2024 11:06:33 -0700 Subject: [PATCH 03/64] Update copyright year, fix timeout and error count in node_stats --- modules/common | 2 +- src/include/aerospike/aerospike_stats.h | 13 ++++++++--- src/include/aerospike/as_cluster.h | 2 +- src/include/aerospike/as_command.h | 2 +- src/include/aerospike/as_event_internal.h | 2 +- src/include/aerospike/as_metrics.h | 2 +- src/include/aerospike/as_node.h | 28 ++++++++++++++++++++++- src/main/aerospike/aerospike_batch.c | 2 +- src/main/aerospike/aerospike_key.c | 2 +- src/main/aerospike/aerospike_query.c | 2 +- src/main/aerospike/aerospike_scan.c | 2 +- src/main/aerospike/aerospike_stats.c | 7 +++--- src/main/aerospike/as_cluster.c | 2 +- src/main/aerospike/as_command.c | 2 +- src/main/aerospike/as_event.c | 2 +- src/main/aerospike/as_metrics.c | 2 +- src/main/aerospike/as_node.c | 27 +++++++++++++++++++++- src/main/aerospike/as_pipe.c | 2 +- 18 files changed, 81 insertions(+), 22 deletions(-) diff --git a/modules/common b/modules/common index 83add8e681..4d829e1380 160000 --- a/modules/common +++ b/modules/common @@ -1 +1 @@ -Subproject commit 83add8e681b3e25ed467dc67488d68d4e197ee44 +Subproject commit 4d829e1380676cf19c0607f05234a13efaff0045 diff --git a/src/include/aerospike/aerospike_stats.h b/src/include/aerospike/aerospike_stats.h index 47b3ff1476..46949fbe38 100644 --- a/src/include/aerospike/aerospike_stats.h +++ b/src/include/aerospike/aerospike_stats.h @@ -1,5 +1,5 @@ /* - * Copyright 2008-2021 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. @@ -88,9 +88,16 @@ typedef struct as_node_stats_s { as_conn_stats pipeline; /** - * Node error count within current window. + * Transaction error count since node was initialized. If the error is retryable, multiple errors per + * transaction may occur. */ - uint32_t error_rate_count; + uint32_t error_count; + + /** + * Transaction timeout count since node was initialized. If the timeout is retryable (ie socketTimeout), + * multiple timeouts per transaction may occur. + */ + int64_t timeout_count; } as_node_stats; diff --git a/src/include/aerospike/as_cluster.h b/src/include/aerospike/as_cluster.h index 9713c785d7..41990c23a8 100644 --- a/src/include/aerospike/as_cluster.h +++ b/src/include/aerospike/as_cluster.h @@ -1,5 +1,5 @@ /* - * Copyright 2008-2023 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. diff --git a/src/include/aerospike/as_command.h b/src/include/aerospike/as_command.h index 7c6c30475e..32b4c4ef0d 100644 --- a/src/include/aerospike/as_command.h +++ b/src/include/aerospike/as_command.h @@ -1,5 +1,5 @@ /* - * Copyright 2008-2023 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. diff --git a/src/include/aerospike/as_event_internal.h b/src/include/aerospike/as_event_internal.h index 811dfba9f6..1ef7ffc279 100644 --- a/src/include/aerospike/as_event_internal.h +++ b/src/include/aerospike/as_event_internal.h @@ -1,5 +1,5 @@ /* - * Copyright 2008-2023 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index d5c36b6f80..c980eb5db3 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -1,5 +1,5 @@ /* - * Copyright 2008-2023 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. diff --git a/src/include/aerospike/as_node.h b/src/include/aerospike/as_node.h index 743f385476..8033e49858 100644 --- a/src/include/aerospike/as_node.h +++ b/src/include/aerospike/as_node.h @@ -1,5 +1,5 @@ /* - * Copyright 2008-2022 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. @@ -652,6 +652,32 @@ as_node_add_latency(as_node* node, as_latency_type latency_type, uint64_t elapse void as_node_enable_metrics(as_node* node, as_policy_metrics* policy); +/** + * Return transaction error count. The value is cumulative and not reset per metrics interval. + */ +uint64_t +as_node_get_error_count(as_node* node); + +/** + * Increment transaction error count. If the error is retryable, multiple errors per + * transaction may occur. + */ +void +as_node_add_error(as_node* node); + +/** + * Return transaction timeout count. The value is cumulative and not reset per metrics interval. + */ +uint64_t +as_node_get_timeout_count(as_node* node); + +/** + * Increment transaction timeout count. If the timeout is retryable (ie socketTimeout), + * multiple timeouts per transaction may occur. + */ +void +as_node_add_timeout(as_node* node); + /** * @private * Volatile read session pointer. diff --git a/src/main/aerospike/aerospike_batch.c b/src/main/aerospike/aerospike_batch.c index fa6059b325..d5b25bb6e6 100644 --- a/src/main/aerospike/aerospike_batch.c +++ b/src/main/aerospike/aerospike_batch.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2023 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. diff --git a/src/main/aerospike/aerospike_key.c b/src/main/aerospike/aerospike_key.c index f8572ead5e..12366db5d9 100644 --- a/src/main/aerospike/aerospike_key.c +++ b/src/main/aerospike/aerospike_key.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2023 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. diff --git a/src/main/aerospike/aerospike_query.c b/src/main/aerospike/aerospike_query.c index d373ff997a..ecdd3f1894 100644 --- a/src/main/aerospike/aerospike_query.c +++ b/src/main/aerospike/aerospike_query.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2023 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. diff --git a/src/main/aerospike/aerospike_scan.c b/src/main/aerospike/aerospike_scan.c index 884b502c2b..55f827a4bc 100644 --- a/src/main/aerospike/aerospike_scan.c +++ b/src/main/aerospike/aerospike_scan.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2023 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. diff --git a/src/main/aerospike/aerospike_stats.c b/src/main/aerospike/aerospike_stats.c index 34d8868275..331621034d 100644 --- a/src/main/aerospike/aerospike_stats.c +++ b/src/main/aerospike/aerospike_stats.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2021 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. @@ -134,7 +134,8 @@ aerospike_node_stats(as_node* node, as_node_stats* stats) { as_node_reserve(node); // Released in aerospike_node_stats_destroy() stats->node = node; - stats->error_rate_count = as_node_get_error_rate(node); + stats->error_count = as_node_get_error_count(node); + stats->timeout_count = as_node_get_timeout_count(node); as_sum_init(&stats->sync); as_sum_init(&stats->async); @@ -185,7 +186,7 @@ aerospike_stats_to_string(as_cluster_stats* stats) as_conn_stats_tostring(&sb, "pipeline", &node_stats->pipeline); as_string_builder_append_newline(&sb); as_string_builder_append(&sb, "error count: "); - as_string_builder_append_uint(&sb, node_stats->error_rate_count); + as_string_builder_append_uint(&sb, node_stats->error_count); as_string_builder_append_newline(&sb); } diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index cd2214d7f6..be378b1e3c 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2023 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. diff --git a/src/main/aerospike/as_command.c b/src/main/aerospike/as_command.c index a06d91727e..34529028e8 100644 --- a/src/main/aerospike/as_command.c +++ b/src/main/aerospike/as_command.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2023 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. diff --git a/src/main/aerospike/as_event.c b/src/main/aerospike/as_event.c index b984b87f35..efada9b736 100644 --- a/src/main/aerospike/as_event.c +++ b/src/main/aerospike/as_event.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2023 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index c2daf6446c..5fdb075466 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2023 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. diff --git a/src/main/aerospike/as_node.c b/src/main/aerospike/as_node.c index fe41157c71..7c841c6030 100644 --- a/src/main/aerospike/as_node.c +++ b/src/main/aerospike/as_node.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2023 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. @@ -1318,6 +1318,31 @@ as_node_enable_metrics(as_node* node, as_policy_metrics* policy) as_node_metrics_init(node->metrics, policy); } +uint64_t +as_node_get_error_count(as_node* node) +{ + return as_load_uint64(&node->error_count); +} + +void +as_node_add_error(as_node* node) +{ + as_incr_uint64(&node->error_count); +} + +uint64_t +as_node_get_timeout_count(as_node* node) +{ + return as_load_uint64(&node->timeout_count); +} + +void +as_node_add_timeout(as_node* node) +{ + as_incr_uint64(&node->timeout_count); +} + + static as_status as_node_process_racks(as_cluster* cluster, as_error* err, as_node* node, as_vector* values) { diff --git a/src/main/aerospike/as_pipe.c b/src/main/aerospike/as_pipe.c index 7519e4b204..9a42a8ff7a 100644 --- a/src/main/aerospike/as_pipe.c +++ b/src/main/aerospike/as_pipe.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2022 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. From 65a6f7eff99b3db99296561e8887251a27528b84 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Mon, 22 Jan 2024 13:04:19 -0700 Subject: [PATCH 04/64] Change enum to macros for latency_type, add comments to cluster, and rename error_rate_count to error_rate in node --- src/include/aerospike/as_cluster.h | 28 ++++++++++++++++++++++------ src/include/aerospike/as_metrics.h | 28 ++++++++++++++-------------- src/include/aerospike/as_node.h | 12 +++++++++--- src/main/aerospike/as_cluster.c | 18 +++++++++--------- src/main/aerospike/as_metrics.c | 10 +++++----- src/main/aerospike/as_node.c | 4 ++-- 6 files changed, 61 insertions(+), 39 deletions(-) diff --git a/src/include/aerospike/as_cluster.h b/src/include/aerospike/as_cluster.h index 41990c23a8..626eaf9192 100644 --- a/src/include/aerospike/as_cluster.h +++ b/src/include/aerospike/as_cluster.h @@ -385,7 +385,7 @@ typedef struct as_cluster_s { as_policy_metrics* metrics_policy; - as_metrics_callbacks* metrics_callbacks; + as_metrics_listeners* metrics_listeners; uint64_t retry_count; @@ -532,15 +532,31 @@ as_partition_shm_get_node( as_node* prev_node, as_policy_replica replica, uint8_t replica_size, uint8_t* replica_index ); +/** + * @private + * Enable the collection of metrics + */ void as_cluster_enable_metrics(as_cluster* cluster, as_policy_metrics* policy); +/** + * @private + * Disable the collection of metrics + */ void as_cluster_disable_metrics(as_cluster* cluster); +/** + * @private + * Increment transaction count when metrics are enabled. + */ void as_cluster_add_tran(as_cluster* cluster); +/** + * @private + * Return transaction count. The value is cumulative and not reset per metrics interval. + */ uint64_t as_cluster_get_tran_count(const as_cluster* cluster); @@ -573,7 +589,7 @@ static inline void as_node_incr_error_rate(as_node* node) { if (node->cluster->max_error_rate > 0) { - as_incr_uint32(&node->error_rate_count); + as_incr_uint32(&node->error_rate); } } @@ -582,9 +598,9 @@ as_node_incr_error_rate(as_node* node) * Reset node's error count. */ static inline void -as_node_reset_error_rate_count(as_node* node) +as_node_reset_error_rate(as_node* node) { - as_store_uint32(&node->error_rate_count, 0); + as_store_uint32(&node->error_rate, 0); } /** @@ -594,7 +610,7 @@ as_node_reset_error_rate_count(as_node* node) static inline uint32_t as_node_get_error_rate(as_node* node) { - return as_load_uint32(&node->error_rate_count); + return as_load_uint32(&node->error_rate); } /** @@ -605,7 +621,7 @@ static inline bool as_node_valid_error_rate(as_node* node) { uint32_t max = node->cluster->max_error_rate; - return max == 0 || max >= as_load_uint32(&node->error_rate_count); + return max == 0 || max >= as_load_uint32(&node->error_rate); } /** diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index c980eb5db3..986dc40c26 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -39,19 +39,19 @@ extern "C" { #define MIN_FILE_SIZE 1000000 #define UTC_STR_LEN 72 +typedef uint8_t as_latency_type; + +#define AS_LATENCY_TYPE_CONN 0 +#define AS_LATENCY_TYPE_WRITE 1 +#define AS_LATENCY_TYPE_READ 2 +#define AS_LATENCY_TYPE_BATCH 3 +#define AS_LATENCY_TYPE_QUERY 4 +#define AS_LATENCY_TYPE_NONE 5 + /****************************************************************************** * TYPES *****************************************************************************/ -typedef enum as_latency_type_e { - AS_LATENCY_TYPE_CONN, //as_queue or as_async_conn_pool? - AS_LATENCY_TYPE_WRITE, - AS_LATENCY_TYPE_READ, - AS_LATENCY_TYPE_BATCH, - AS_LATENCY_TYPE_QUERY, - AS_LATENCY_TYPE_NONE -} as_latency_type; - typedef struct as_latency_buckets_s { int32_t latency_shift; @@ -60,7 +60,7 @@ typedef struct as_latency_buckets_s { uint64_t* buckets; } as_latency_buckets; -struct as_metrics_callbacks_s; +struct as_metrics_listeners_s; /** * Metrics Policy @@ -76,7 +76,7 @@ typedef struct as_policy_metrics_s { int32_t latency_shift; // default 1 - struct as_metrics_callbacks_s* metrics_callbacks; + struct as_metrics_listeners_s* metrics_listeners; FILE* file; } as_policy_metrics; @@ -92,12 +92,12 @@ typedef void (*as_metrics_node_close_callback)(const struct as_policy_metrics_s* typedef void (*as_metrics_disable_callback)(struct as_policy_metrics_s* policy, const struct as_cluster_s* cluster); -typedef struct as_metrics_callbacks_s { +typedef struct as_metrics_listeners_s { as_metrics_enable_callback enable_callback; as_metrics_snapshot_callback snapshot_callback; as_metrics_node_close_callback node_close_callback; as_metrics_disable_callback disable_callback; -} as_metrics_callbacks; +} as_metrics_listeners; typedef struct as_node_metrics_s { as_latency_buckets* latency; @@ -131,7 +131,7 @@ void as_metrics_add_latency(as_node_metrics* node_metrics, as_latency_type latency_type, uint64_t elapsed); void -as_metrics_callbacks_init(as_metrics_callbacks* callbacks); +as_metrics_listeners_init(as_metrics_listeners* listeners); void as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem); diff --git a/src/include/aerospike/as_node.h b/src/include/aerospike/as_node.h index 8033e49858..827b77fb2e 100644 --- a/src/include/aerospike/as_node.h +++ b/src/include/aerospike/as_node.h @@ -323,10 +323,18 @@ typedef struct as_node_s { /** * Error count for this node's error_rate_window. */ - uint32_t error_rate_count; + uint32_t error_rate; + /** + * Transaction error count since node was initialized. If the error is retryable, multiple errors per + * transaction may occur. + */ uint64_t error_count; + /** + * Transaction timeout count since node was initialized. If the timeout is retryable (ie socketTimeout), + * multiple timeouts per transaction may occur. + */ uint64_t timeout_count; /** @@ -644,8 +652,6 @@ as_node_signal_login(as_node* node); bool as_node_has_rack(as_node* node, const char* ns, int rack_id); -typedef enum as_latency_type_e as_latency_type; - void as_node_add_latency(as_node* node, as_latency_type latency_type, uint64_t elapsed); diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index be378b1e3c..f540db9df5 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -559,13 +559,13 @@ as_cluster_enable_metrics(as_cluster* cluster, as_policy_metrics* policy) { if (cluster->metrics_enabled) { - cluster->metrics_callbacks->disable_callback(policy, cluster); + cluster->metrics_listeners->disable_callback(policy, cluster); } - cluster->metrics_callbacks = policy->metrics_callbacks; - if (cluster->metrics_callbacks == NULL) + cluster->metrics_listeners = policy->metrics_listeners; + if (cluster->metrics_listeners == NULL) { - as_metrics_callbacks_init(cluster->metrics_callbacks); + as_metrics_listeners_init(cluster->metrics_listeners); } cluster->metrics_policy = policy; @@ -576,7 +576,7 @@ as_cluster_enable_metrics(as_cluster* cluster, as_policy_metrics* policy) as_node_enable_metrics(node, policy); } - cluster->metrics_callbacks->enable_callback(policy); + cluster->metrics_listeners->enable_callback(policy); } void @@ -585,7 +585,7 @@ as_cluster_disable_metrics(as_cluster* cluster) if (cluster->metrics_enabled) { cluster->metrics_enabled = false; - cluster->metrics_callbacks->disable_callback(cluster->metrics_policy, cluster); + cluster->metrics_listeners->disable_callback(cluster->metrics_policy, cluster); } } @@ -647,7 +647,7 @@ as_cluster_remove_nodes(as_cluster* cluster, as_vector* /* */ nodes_t as_node_deactivate(node); if (cluster->metrics_enabled) { - cluster->metrics_callbacks->node_close_callback(node->cluster->metrics_policy, node); + cluster->metrics_listeners->node_close_callback(node->cluster->metrics_policy, node); } } @@ -724,7 +724,7 @@ as_cluster_reset_error_rate(as_cluster* cluster) as_nodes* nodes = cluster->nodes; for (uint32_t i = 0; i < nodes->size; i++) { - as_node_reset_error_rate_count(nodes->array[i]); + as_node_reset_error_rate(nodes->array[i]); } } @@ -968,7 +968,7 @@ as_cluster_tend(as_cluster* cluster, as_error* err, bool is_init) if (cluster->metrics_enabled && (cluster->tend_count % cluster->metrics_policy->interval)) { - cluster->metrics_callbacks->snapshot_callback(cluster->metrics_policy, cluster); + cluster->metrics_listeners->snapshot_callback(cluster->metrics_policy, cluster); } as_cluster_destroy_peers(&peers); diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 5fdb075466..435c1e4aad 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -208,12 +208,12 @@ as_metrics_disable(struct as_policy_metrics_s* policy, const struct as_cluster_s } void -as_metrics_callbacks_init(as_metrics_callbacks* callbacks) +as_metrics_listeners_init(as_metrics_listeners* listeners) { - callbacks->enable_callback = as_metrics_enable; - callbacks->disable_callback = as_metrics_disable; - callbacks->node_close_callback = as_metrics_node_close; - callbacks->snapshot_callback = as_metrics_snapshot; + listeners->enable_callback = as_metrics_enable; + listeners->disable_callback = as_metrics_disable; + listeners->node_close_callback = as_metrics_node_close; + listeners->snapshot_callback = as_metrics_snapshot; } void diff --git a/src/main/aerospike/as_node.c b/src/main/aerospike/as_node.c index 7c841c6030..01d55a8971 100644 --- a/src/main/aerospike/as_node.c +++ b/src/main/aerospike/as_node.c @@ -145,7 +145,7 @@ as_node_create(as_cluster* cluster, as_node_info* node_info) node->sync_conn_pools = cf_malloc(sizeof(as_conn_pool) * cluster->conn_pools_per_node); node->sync_conns_opened = 1; node->sync_conns_closed = 0; - node->error_rate_count = 0; + node->error_rate = 0; node->error_count = 0; node->timeout_count = 0; node->conn_iter = 0; @@ -909,7 +909,7 @@ static void as_node_restart(as_cluster* cluster, as_node* node) { if (cluster->max_error_rate > 0) { - as_node_reset_error_rate_count(node); + as_node_reset_error_rate(node); } // Balance sync connections. From 8091d95bf9ead35bc10e3cefcb778dae716e9c61 Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Mon, 22 Jan 2024 15:13:13 -0500 Subject: [PATCH 05/64] Restore common submodule change. --- modules/common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/common b/modules/common index 4d829e1380..83add8e681 160000 --- a/modules/common +++ b/modules/common @@ -1 +1 @@ -Subproject commit 4d829e1380676cf19c0607f05234a13efaff0045 +Subproject commit 83add8e681b3e25ed467dc67488d68d4e197ee44 From a53adb245aecc29ba5bd308b39d3738e1d1648fa Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Mon, 22 Jan 2024 13:26:00 -0700 Subject: [PATCH 06/64] Update comments in metrics files, make timeout_count unsigned --- src/include/aerospike/aerospike_stats.h | 2 +- src/include/aerospike/as_metrics.h | 12 ++++++------ src/main/aerospike/as_metrics.c | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/include/aerospike/aerospike_stats.h b/src/include/aerospike/aerospike_stats.h index 46949fbe38..4718fd2f7b 100644 --- a/src/include/aerospike/aerospike_stats.h +++ b/src/include/aerospike/aerospike_stats.h @@ -97,7 +97,7 @@ typedef struct as_node_stats_s { * Transaction timeout count since node was initialized. If the timeout is retryable (ie socketTimeout), * multiple timeouts per transaction may occur. */ - int64_t timeout_count; + uint64_t timeout_count; } as_node_stats; diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index 986dc40c26..33fa08cf23 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -31,9 +31,9 @@ extern "C" { #endif -/****************************************************************************** -* MACROS -*****************************************************************************/ +//--------------------------------- +// Macros +//--------------------------------- #define NS_TO_MS 1000000 #define MIN_FILE_SIZE 1000000 @@ -48,9 +48,9 @@ typedef uint8_t as_latency_type; #define AS_LATENCY_TYPE_QUERY 4 #define AS_LATENCY_TYPE_NONE 5 -/****************************************************************************** - * TYPES - *****************************************************************************/ +//--------------------------------- +// Types +//--------------------------------- typedef struct as_latency_buckets_s { int32_t latency_shift; diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 435c1e4aad..eaa1879351 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -21,9 +21,9 @@ #include #include - /****************************************************************************** - * FUNCTIONS - *****************************************************************************/ +//--------------------------------- +// Functions +//--------------------------------- const char* utc_time_str(time_t t) From b739b91e13d010959eced32f28f7cc0206add674 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Tue, 23 Jan 2024 08:22:38 -0700 Subject: [PATCH 07/64] Add metrics_writer type --- src/include/aerospike/as_metrics.h | 35 +++-- src/include/aerospike/as_node.h | 3 + src/main/aerospike/as_cluster.c | 8 +- src/main/aerospike/as_metrics.c | 221 +++++++++++++++-------------- 4 files changed, 152 insertions(+), 115 deletions(-) diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index 33fa08cf23..7c9df3f00b 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -78,19 +78,19 @@ typedef struct as_policy_metrics_s { struct as_metrics_listeners_s* metrics_listeners; - FILE* file; + void* udata; } as_policy_metrics; struct as_cluster_s; struct as_node_s; -typedef void (*as_metrics_enable_callback)(struct as_policy_metrics_s* policy); +typedef void (*as_metrics_enable_callback)(const struct as_policy_metrics_s* policy); -typedef void (*as_metrics_snapshot_callback)(const struct as_policy_metrics_s* policy, const struct as_cluster_s* cluster); +typedef void (*as_metrics_snapshot_callback)(const struct as_cluster_s* cluster, void* udata); -typedef void (*as_metrics_node_close_callback)(const struct as_policy_metrics_s* policy, const struct as_node_s* node); +typedef void (*as_metrics_node_close_callback)(const struct as_node_s* node, void* udata); -typedef void (*as_metrics_disable_callback)(struct as_policy_metrics_s* policy, const struct as_cluster_s* cluster); +typedef void (*as_metrics_disable_callback)(const struct as_cluster_s* cluster, void* udata); typedef struct as_metrics_listeners_s { as_metrics_enable_callback enable_callback; @@ -103,6 +103,22 @@ typedef struct as_node_metrics_s { as_latency_buckets* latency; } as_node_metrics; +typedef struct as_metrics_writer_s { + FILE* file; + + as_string_builder* sb; + + bool enable; + + uint64_t max_size; + + uint64_t size; + + int32_t latency_columns; + + int32_t latency_shift; +} as_metrics_writer; + const char* utc_time_str(time_t t); @@ -137,13 +153,16 @@ void as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem); void -as_metrics_write_cluster(struct as_policy_metrics_s* policy, const struct as_cluster_s* cluster); +as_metrics_write_cluster(as_metrics_writer* mw, const struct as_cluster_s* cluster); + +void +as_metrics_write_node(as_metrics_writer* mw, struct as_node_stats_s* node_stats); void -as_metrics_write_node(as_string_builder* sb, struct as_node_stats_s* node_stats); +as_metrics_write_conn(as_metrics_writer* mw, struct as_conn_stats_s* conn_stats); void -as_metrics_write_conn(as_string_builder* sb, struct as_conn_stats_s* conn_stats); +as_metrics_write_line(as_metrics_writer* mw); #if defined(__linux__) void diff --git a/src/include/aerospike/as_node.h b/src/include/aerospike/as_node.h index 827b77fb2e..52661d68de 100644 --- a/src/include/aerospike/as_node.h +++ b/src/include/aerospike/as_node.h @@ -298,6 +298,9 @@ typedef struct as_node_s { */ as_racks* racks; + /** + * Node metrics + */ as_node_metrics* metrics; /** diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index f540db9df5..45ec18ec40 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -559,7 +559,7 @@ as_cluster_enable_metrics(as_cluster* cluster, as_policy_metrics* policy) { if (cluster->metrics_enabled) { - cluster->metrics_listeners->disable_callback(policy, cluster); + cluster->metrics_listeners->disable_callback(policy, cluster, policy->udata); } cluster->metrics_listeners = policy->metrics_listeners; @@ -576,7 +576,7 @@ as_cluster_enable_metrics(as_cluster* cluster, as_policy_metrics* policy) as_node_enable_metrics(node, policy); } - cluster->metrics_listeners->enable_callback(policy); + cluster->metrics_listeners->enable_callback(policy, policy->udata); } void @@ -585,7 +585,7 @@ as_cluster_disable_metrics(as_cluster* cluster) if (cluster->metrics_enabled) { cluster->metrics_enabled = false; - cluster->metrics_listeners->disable_callback(cluster->metrics_policy, cluster); + cluster->metrics_listeners->disable_callback(cluster->metrics_policy, cluster, cluster->metrics_policy->udata); } } @@ -968,7 +968,7 @@ as_cluster_tend(as_cluster* cluster, as_error* err, bool is_init) if (cluster->metrics_enabled && (cluster->tend_count % cluster->metrics_policy->interval)) { - cluster->metrics_listeners->snapshot_callback(cluster->metrics_policy, cluster); + cluster->metrics_listeners->snapshot_callback(cluster->metrics_policy, cluster, cluster->metrics_policy->udata); } as_cluster_destroy_peers(&peers); diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index eaa1879351..2eb48f7fb0 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -45,7 +45,6 @@ as_metrics_policy_init(as_policy_metrics* policy) policy->interval = 30; policy->latency_columns = 7; policy->latency_shift = 1; - policy->file = NULL; } char* @@ -143,7 +142,7 @@ as_metrics_add_latency(as_node_metrics* node_metrics, as_latency_type latency_ty } void -as_metrics_enable(struct as_policy_metrics_s* policy) +as_metrics_writer_enable(const struct as_policy_metrics_s* policy) { if (policy->report_size_limit != 0 && policy->report_size_limit < MIN_FILE_SIZE) { @@ -151,73 +150,79 @@ as_metrics_enable(struct as_policy_metrics_s* policy) } // create file directory - policy->file = fopen(policy->report_directory, "w"); - - as_string_builder sb; - as_string_builder_inita(&sb, 25, true); - as_string_builder_append(&sb, utc_time_str(time(NULL))); - as_string_builder_append(&sb, " header(1)"); - as_string_builder_append(&sb, " cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]]"); - as_string_builder_append(&sb, " eventloop[processSize,queueSize]"); - as_string_builder_append(&sb, " node[name,address,port,syncConn,asyncConn,errors,timeouts,latency[]]"); - as_string_builder_append(&sb, " conn[inUse,inPool,opened,closed]"); - as_string_builder_append(&sb, " latency("); - as_string_builder_append(&sb, policy->latency_columns); - as_string_builder_append(&sb, ','); - as_string_builder_append(&sb, policy->latency_shift); - as_string_builder_append(&sb, ')'); - as_string_builder_append(&sb, "[type[l1,l2,l3...]]"); - fprintf(policy->file, "\n"); - fprintf(policy->file, sb.data); + as_metrics_writer* mw = policy->udata; + mw->file = fopen(policy->report_directory, "w"); + mw->max_size = policy->report_size_limit; + mw->latency_columns = policy->latency_columns; + mw->latency_shift = policy->latency_shift; + mw->size = 0; + + as_string_builder_inita(mw->sb, 25, true); + as_string_builder_append(&mw->sb, utc_time_str(time(NULL))); + as_string_builder_append(&mw->sb, " header(1)"); + as_string_builder_append(&mw->sb, " cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]]"); + as_string_builder_append(&mw->sb, " eventloop[processSize,queueSize]"); + as_string_builder_append(&mw->sb, " node[name,address,port,syncConn,asyncConn,errors,timeouts,latency[]]"); + as_string_builder_append(&mw->sb, " conn[inUse,inPool,opened,closed]"); + as_string_builder_append(&mw->sb, " latency("); + as_string_builder_append(&mw->sb, mw->latency_columns); + as_string_builder_append(&mw->sb, ','); + as_string_builder_append(&mw->sb, mw->latency_shift); + as_string_builder_append(&mw->sb, ')'); + as_string_builder_append(&mw->sb, "[type[l1,l2,l3...]]"); + as_metrics_write_line(mw); + + mw->enable = true; } void -as_metrics_snapshot(const struct as_policy_metrics_s* policy, const struct as_cluster_s* cluster) +as_metrics_writer_snapshot(const struct as_cluster_s* cluster, void* udata) { - if (policy->file != NULL) + as_metrics_writer* mw = udata; + if (mw->enable && mw->file != NULL) { - as_metrics_write_cluster(policy, cluster); + as_metrics_write_cluster(mw, cluster); } } void -as_metrics_node_close(const struct as_policy_metrics_s* policy, const struct as_node_s* node) +as_metrics_writer_node_close(const struct as_node_s* node, void* udata) { // write node info to file - if (policy->file != NULL) + as_metrics_writer* mw = udata; + if (mw->enable && mw->file != NULL) { - as_string_builder sb; - as_string_builder_inita(&sb, 25, true); - as_string_builder_append(&sb, utc_time_str(time(NULL))); - as_metrics_write_node(&sb, node); - fprintf(policy->file, "\n"); - fprintf(policy->file, sb.data); + as_string_builder_append(&mw->sb, utc_time_str(time(NULL))); + as_metrics_write_node(&mw->sb, node); + as_metrics_write_line(mw); } } void -as_metrics_disable(struct as_policy_metrics_s* policy, const struct as_cluster_s* cluster) +as_metrics_writer_disable(const struct as_cluster_s* cluster, void* udata) { // write cluster into to file, disable - if (policy->file != NULL) + as_metrics_writer* mw = udata; + if (mw->enable && mw->file != NULL) { - as_metrics_write_cluster(policy, cluster); - fclose(policy->file); - policy->file = NULL; + as_metrics_write_cluster(mw, cluster); + fclose(mw->file); + mw->file = NULL; + mw->enable = false; } } void as_metrics_listeners_init(as_metrics_listeners* listeners) { - listeners->enable_callback = as_metrics_enable; - listeners->disable_callback = as_metrics_disable; - listeners->node_close_callback = as_metrics_node_close; - listeners->snapshot_callback = as_metrics_snapshot; + listeners->enable_callback = as_metrics_writer_enable; + listeners->disable_callback = as_metrics_writer_disable; + listeners->node_close_callback = as_metrics_writer_node_close; + listeners->snapshot_callback = as_metrics_writer_snapshot; } void -as_metrics_write_cluster(struct as_policy_metrics_s* policy, const struct as_cluster_s* cluster) { +as_metrics_write_cluster(as_metrics_writer* mw, const struct as_cluster_s* cluster) { char* cluster_name = cluster->cluster_name; if (cluster_name == NULL) { @@ -230,78 +235,75 @@ as_metrics_write_cluster(struct as_policy_metrics_s* policy, const struct as_clu as_cluster_stats* stats; aerospike_cluster_stats(cluster, stats); - as_string_builder sb; - as_string_builder_inita(&sb, 10, true); - as_string_builder_append(&sb, utc_time_str(time(NULL))); - as_string_builder_append(&sb, " cluster["); - as_string_builder_append(&sb, cluster_name); - as_string_builder_append(&sb, ','); - as_string_builder_append(&sb, (int)cpu_load); - as_string_builder_append(&sb, ','); - as_string_builder_append(&sb, mem); - as_string_builder_append(&sb, ','); - as_string_builder_append(&sb, cluster->invalid_node_count); // Cumulative. Not reset on each interval. - as_string_builder_append(&sb, ','); - as_string_builder_append(&sb, as_cluster_get_tran_count(cluster)); // Cumulative. Not reset on each interval. - as_string_builder_append(&sb, ','); - as_string_builder_append(&sb, cluster->retry_count); // Cumulative. Not reset on each interval. - as_string_builder_append(&sb, ','); - as_string_builder_append(&sb, cluster->delay_queue_timeout_count); // Cumulative. Not reset on each interval. - as_string_builder_append(&sb, ",["); + as_string_builder_append(&mw->sb, utc_time_str(time(NULL))); + as_string_builder_append(&mw->sb, " cluster["); + as_string_builder_append(&mw->sb, cluster_name); + as_string_builder_append(&mw->sb, ','); + as_string_builder_append(&mw->sb, (int)cpu_load); + as_string_builder_append(&mw->sb, ','); + as_string_builder_append(&mw->sb, mem); + as_string_builder_append(&mw->sb, ','); + as_string_builder_append(&mw->sb, cluster->invalid_node_count); // Cumulative. Not reset on each interval. + as_string_builder_append(&mw->sb, ','); + as_string_builder_append(&mw->sb, as_cluster_get_tran_count(cluster)); // Cumulative. Not reset on each interval. + as_string_builder_append(&mw->sb, ','); + as_string_builder_append(&mw->sb, cluster->retry_count); // Cumulative. Not reset on each interval. + as_string_builder_append(&mw->sb, ','); + as_string_builder_append(&mw->sb, cluster->delay_queue_timeout_count); // Cumulative. Not reset on each interval. + as_string_builder_append(&mw->sb, ",["); as_event_loop_stats* event_loops = stats->event_loops; for (uint32_t i = 0; i < stats->event_loops_size; i++) { as_event_loop_stats* loop = &event_loops[i]; if (i > 0) { - as_string_builder_append(&sb, ','); + as_string_builder_append(&mw->sb, ','); } - as_string_builder_append(&sb, '['); - as_string_builder_append(&sb, loop->process_size); - as_string_builder_append(&sb, ','); - as_string_builder_append(&sb, loop->queue_size); - as_string_builder_append(&sb, ']'); + as_string_builder_append(&mw->sb, '['); + as_string_builder_append(&mw->sb, loop->process_size); + as_string_builder_append(&mw->sb, ','); + as_string_builder_append(&mw->sb, loop->queue_size); + as_string_builder_append(&mw->sb, ']'); } - as_string_builder_append(&sb, '],['); - + as_string_builder_append(&mw->sb, '],['); + as_node_stats* nodes = stats->nodes; for (uint32_t i = 0; i < stats->nodes_size; i++) { as_node_stats* node = &stats->nodes[i]; if (i > 0) { - as_string_builder_append(&sb, ","); + as_string_builder_append(&mw->sb, ","); } - as_metrics_write_node(&sb, node); + as_metrics_write_node(&mw->sb, node); } - as_string_builder_append(&sb, "]]"); + as_string_builder_append(&mw->sb, "]]"); - fprintf(policy->file, "\n"); - fprintf(policy->file, sb.data); + as_metrics_write_line(mw); } void -as_metrics_write_node(as_string_builder* sb, struct as_node_stats_s* node_stats) +as_metrics_write_node(as_metrics_writer* mw, struct as_node_stats_s* node_stats) { as_node* node = node_stats->node; - as_string_builder_append(&sb, '['); - as_string_builder_append(&sb, node->name); - as_string_builder_append(&sb, ','); + as_string_builder_append(&mw->sb, '['); + as_string_builder_append(&mw->sb, node->name); + as_string_builder_append(&mw->sb, ','); //as_host* host = node-> TODO: how to get host from node? it is in node_info - //as_string_builder_append(&sb, host->name); - //as_string_builder_append(&sb, ','); - //as_string_builder_append(&sb, host->port); - //as_string_builder_append(&sb, ','); + //as_string_builder_append(&mw->sb, host->name); + //as_string_builder_append(&mw->sb, ','); + //as_string_builder_append(&mw->sb, host->port); + //as_string_builder_append(&mw->sb, ','); - as_metrics_write_conn(sb, &node_stats->sync); - as_string_builder_append(&sb, ','); - as_metrics_write_conn(sb, &node_stats->async); - as_string_builder_append(&sb, ','); + as_metrics_write_conn(&mw->sb, &node_stats->sync); + as_string_builder_append(&mw->sb, ','); + as_metrics_write_conn(&mw->sb, &node_stats->async); + as_string_builder_append(&mw->sb, ','); - as_string_builder_append(&sb, node->error_count); - as_string_builder_append(&sb, ','); - as_string_builder_append(&sb, node->timeout_count); - as_string_builder_append(&sb, ',['); + as_string_builder_append(&mw->sb, node->error_count); + as_string_builder_append(&mw->sb, ','); + as_string_builder_append(&mw->sb, node->timeout_count); + as_string_builder_append(&mw->sb, ',['); as_node_metrics* node_metrics = node->metrics; uint32_t max = AS_LATENCY_TYPE_NONE; @@ -309,35 +311,48 @@ as_metrics_write_node(as_string_builder* sb, struct as_node_stats_s* node_stats) for (uint32_t i = 0; i < max; i++) { if (i > 0) { - as_string_builder_append(&sb, ","); + as_string_builder_append(&mw->sb, ","); } - as_string_builder_append(&sb, as_latency_type_to_string(i)); - as_string_builder_append(&sb, '['); + as_string_builder_append(&mw->sb, as_latency_type_to_string(i)); + as_string_builder_append(&mw->sb, '['); as_latency_buckets* buckets = &node_metrics->latency[i]; uint32_t bucket_max = buckets->latency_columns; for (uint32_t j = 0; j < bucket_max; j++) { if (j > 0) { - as_string_builder_append(&sb, ','); + as_string_builder_append(&mw->sb, ','); } - as_string_builder_append(&sb, as_metrics_get_bucket(&buckets, i)); + as_string_builder_append(&mw->sb, as_metrics_get_bucket(&buckets, i)); } - as_string_builder_append(&sb, ']'); + as_string_builder_append(&mw->sb, ']'); } - as_string_builder_append(&sb, ']]'); + as_string_builder_append(&mw->sb, ']]'); } void -as_metrics_write_conn(as_string_builder* sb, struct as_conn_stats_s* conn_stats) +as_metrics_write_conn(as_metrics_writer* mw, struct as_conn_stats_s* conn_stats) { - as_string_builder_append(&sb, conn_stats->in_use); - as_string_builder_append(&sb, ','); - as_string_builder_append(&sb, conn_stats->in_pool); - as_string_builder_append(&sb, ','); - as_string_builder_append(&sb, conn_stats->opened); // Cumulative. Not reset on each interval. - as_string_builder_append(&sb, ','); - as_string_builder_append(&sb, conn_stats->closed); // Cumulative. Not reset on each interval. + as_string_builder_append(&mw->sb, conn_stats->in_use); + as_string_builder_append(&mw->sb, ','); + as_string_builder_append(&mw->sb, conn_stats->in_pool); + as_string_builder_append(&mw->sb, ','); + as_string_builder_append(&mw->sb, conn_stats->opened); // Cumulative. Not reset on each interval. + as_string_builder_append(&mw->sb, ','); + as_string_builder_append(&mw->sb, conn_stats->closed); // Cumulative. Not reset on each interval. +} + +void +as_metrics_write_line(as_metrics_writer* mw) +{ + as_string_builder_append_newline(&mw->sb); + fprintf(mw->file, &mw->sb->data); + mw->size += mw->sb->length; + + if (mw->max_size > 0 && mw->size >= mw->max_size) + { + // write new file? + } } #if defined(__linux__) From afd42f46e69e4da82a4cec135a117881a544986d Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Tue, 23 Jan 2024 09:18:48 -0700 Subject: [PATCH 08/64] Add errors to enable and disable metrics --- src/include/aerospike/aerospike_stats.h | 8 ++++---- src/include/aerospike/as_cluster.h | 8 ++++---- src/include/aerospike/as_metrics.h | 7 +++++-- src/main/aerospike/aerospike_stats.c | 20 ++++++++++++++------ src/main/aerospike/as_cluster.c | 20 ++++++++++++++------ src/main/aerospike/as_metrics.c | 25 +++++++++++++++++++------ 6 files changed, 60 insertions(+), 28 deletions(-) diff --git a/src/include/aerospike/aerospike_stats.h b/src/include/aerospike/aerospike_stats.h index 4718fd2f7b..725073c8fe 100644 --- a/src/include/aerospike/aerospike_stats.h +++ b/src/include/aerospike/aerospike_stats.h @@ -257,14 +257,14 @@ aerospike_stats_to_string(as_cluster_stats* stats); /** * Enable extended periodic cluster and node latency metrics. */ -AS_EXTERN void -aerospike_enable_metrics(aerospike* as, const struct as_policy_metrics_s* policy); +AS_EXTERN as_status +aerospike_enable_metrics(aerospike* as, as_error* err, const struct as_policy_metrics_s* policy); /** * Disable extended periodic cluster and node latency metrics. */ -AS_EXTERN void -aerospike_disable_metrics(aerospike* as); +AS_EXTERN as_status +aerospike_disable_metrics(aerospike* as, as_error* err); #ifdef __cplusplus } // end extern "C" diff --git a/src/include/aerospike/as_cluster.h b/src/include/aerospike/as_cluster.h index 626eaf9192..a3137dc6e4 100644 --- a/src/include/aerospike/as_cluster.h +++ b/src/include/aerospike/as_cluster.h @@ -536,15 +536,15 @@ as_partition_shm_get_node( * @private * Enable the collection of metrics */ -void -as_cluster_enable_metrics(as_cluster* cluster, as_policy_metrics* policy); +as_status +as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_policy_metrics* policy); /** * @private * Disable the collection of metrics */ -void -as_cluster_disable_metrics(as_cluster* cluster); +as_status +as_cluster_disable_metrics(as_error* err, as_cluster* cluster); /** * @private diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index 7c9df3f00b..3ad80e460a 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #if !defined(_MSC_VER) @@ -84,13 +85,13 @@ typedef struct as_policy_metrics_s { struct as_cluster_s; struct as_node_s; -typedef void (*as_metrics_enable_callback)(const struct as_policy_metrics_s* policy); +typedef as_status (*as_metrics_enable_callback)(as_error* err, const struct as_policy_metrics_s* policy); typedef void (*as_metrics_snapshot_callback)(const struct as_cluster_s* cluster, void* udata); typedef void (*as_metrics_node_close_callback)(const struct as_node_s* node, void* udata); -typedef void (*as_metrics_disable_callback)(const struct as_cluster_s* cluster, void* udata); +typedef as_status (*as_metrics_disable_callback)(as_error* err, const struct as_cluster_s* cluster, void* udata); typedef struct as_metrics_listeners_s { as_metrics_enable_callback enable_callback; @@ -117,6 +118,8 @@ typedef struct as_metrics_writer_s { int32_t latency_columns; int32_t latency_shift; + + const char* report_directory; } as_metrics_writer; const char* diff --git a/src/main/aerospike/aerospike_stats.c b/src/main/aerospike/aerospike_stats.c index 331621034d..3850d39751 100644 --- a/src/main/aerospike/aerospike_stats.c +++ b/src/main/aerospike/aerospike_stats.c @@ -210,16 +210,24 @@ aerospike_stats_to_string(as_cluster_stats* stats) return sb.data; } -void -aerospike_enable_metrics(aerospike* as, const as_policy_metrics* policy) +as_status +aerospike_enable_metrics(aerospike* as, as_error* err, const struct as_policy_metrics_s* policy) { as_cluster* cluster = as->cluster; - as_cluster_enable_metrics(cluster, policy); + as_status status = as_cluster_enable_metrics(err, cluster, policy); + if (status != AEROSPIKE_OK) + { + return status; + } } -void -aerospike_disable_metrics(aerospike* as) +as_status +aerospike_disable_metrics(aerospike* as, as_error* err) { as_cluster* cluster = as->cluster; - as_cluster_disable_metrics(cluster); + as_status status = as_cluster_disable_metrics(err, cluster); + if (status != AEROSPIKE_OK) + { + return status; + } } diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index 45ec18ec40..a53603f2c8 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -554,8 +554,8 @@ as_cluster_remove_nodes_copy(as_cluster* cluster, as_vector* /* */ no as_vector_append(cluster->gc, &item); } -void -as_cluster_enable_metrics(as_cluster* cluster, as_policy_metrics* policy) +as_status +as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_policy_metrics* policy) { if (cluster->metrics_enabled) { @@ -576,16 +576,24 @@ as_cluster_enable_metrics(as_cluster* cluster, as_policy_metrics* policy) as_node_enable_metrics(node, policy); } - cluster->metrics_listeners->enable_callback(policy, policy->udata); + as_status status = cluster->metrics_listeners->enable_callback(err, policy, policy->udata); + if (status != AEROSPIKE_OK) + { + return status; + } } -void -as_cluster_disable_metrics(as_cluster* cluster) +as_status +as_cluster_disable_metrics(as_error* err, as_cluster* cluster) { if (cluster->metrics_enabled) { cluster->metrics_enabled = false; - cluster->metrics_listeners->disable_callback(cluster->metrics_policy, cluster, cluster->metrics_policy->udata); + as_status status = cluster->metrics_listeners->disable_callback(cluster->metrics_policy, cluster, cluster->metrics_policy->udata); + if (status != AEROSPIKE_OK) + { + return status; + } } } diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 2eb48f7fb0..6861d69f58 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -141,21 +141,29 @@ as_metrics_add_latency(as_node_metrics* node_metrics, as_latency_type latency_ty as_metrics_latency_buckets_add(&node_metrics->latency[latency_type], elapsed); } -void -as_metrics_writer_enable(const struct as_policy_metrics_s* policy) +as_status +as_metrics_writer_enable(as_error* err, const struct as_policy_metrics_s* policy) { if (policy->report_size_limit != 0 && policy->report_size_limit < MIN_FILE_SIZE) { - // error + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Metrics policy report_size_limit %d must be at least %d", policy->report_size_limit, MIN_FILE_SIZE); } // create file directory as_metrics_writer* mw = policy->udata; mw->file = fopen(policy->report_directory, "w"); + + if (!mw->file) + { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Failed to open file: %s", policy->report_directory); + } mw->max_size = policy->report_size_limit; mw->latency_columns = policy->latency_columns; mw->latency_shift = policy->latency_shift; mw->size = 0; + mw->report_directory = policy->report_directory; as_string_builder_inita(mw->sb, 25, true); as_string_builder_append(&mw->sb, utc_time_str(time(NULL))); @@ -198,17 +206,22 @@ as_metrics_writer_node_close(const struct as_node_s* node, void* udata) } } -void -as_metrics_writer_disable(const struct as_cluster_s* cluster, void* udata) +as_status +as_metrics_writer_disable(as_error* err, const struct as_cluster_s* cluster, void* udata) { // write cluster into to file, disable as_metrics_writer* mw = udata; if (mw->enable && mw->file != NULL) { as_metrics_write_cluster(mw, cluster); - fclose(mw->file); + uint32_t result = fclose(mw->file); mw->file = NULL; mw->enable = false; + if (result != 0) + { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "File stream did not close successfully: %s", mw->report_directory); + } } } From 6775bc6ad89bbc754cb00dc6034e718171cdcc51 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Tue, 23 Jan 2024 10:30:47 -0700 Subject: [PATCH 09/64] Comments and other small fixes --- src/include/aerospike/aerospike_stats.h | 2 +- src/include/aerospike/as_metrics.h | 99 +++++++++++++++++++++---- src/include/aerospike/as_node.h | 8 ++ 3 files changed, 93 insertions(+), 16 deletions(-) diff --git a/src/include/aerospike/aerospike_stats.h b/src/include/aerospike/aerospike_stats.h index 725073c8fe..19999b05e2 100644 --- a/src/include/aerospike/aerospike_stats.h +++ b/src/include/aerospike/aerospike_stats.h @@ -91,7 +91,7 @@ typedef struct as_node_stats_s { * Transaction error count since node was initialized. If the error is retryable, multiple errors per * transaction may occur. */ - uint32_t error_count; + uint64_t error_count; /** * Transaction timeout count since node was initialized. If the timeout is retryable (ie socketTimeout), diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index 3ad80e460a..8d51e49dfc 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -53,11 +53,13 @@ typedef uint8_t as_latency_type; // Types //--------------------------------- +/** + * Latency buckets for a transaction group. + * Latency bucket counts are cumulative and not reset on each metrics snapshot interval + */ typedef struct as_latency_buckets_s { int32_t latency_shift; - int32_t latency_columns; - uint64_t* buckets; } as_latency_buckets; @@ -67,7 +69,7 @@ struct as_metrics_listeners_s; * Metrics Policy */ typedef struct as_policy_metrics_s { - const char* report_directory; + const char* report_directory; // where the metrics file is output int64_t report_size_limit; // default 0 @@ -85,6 +87,9 @@ typedef struct as_policy_metrics_s { struct as_cluster_s; struct as_node_s; +/** + * Callbacks for metrics listener operations + */ typedef as_status (*as_metrics_enable_callback)(as_error* err, const struct as_policy_metrics_s* policy); typedef void (*as_metrics_snapshot_callback)(const struct as_cluster_s* cluster, void* udata); @@ -93,6 +98,9 @@ typedef void (*as_metrics_node_close_callback)(const struct as_node_s* node, voi typedef as_status (*as_metrics_disable_callback)(as_error* err, const struct as_cluster_s* cluster, void* udata); +/** + * Struct to hold required callbacks + */ typedef struct as_metrics_listeners_s { as_metrics_enable_callback enable_callback; as_metrics_snapshot_callback snapshot_callback; @@ -100,10 +108,16 @@ typedef struct as_metrics_listeners_s { as_metrics_disable_callback disable_callback; } as_metrics_listeners; +/** + * Node metrics latency bucket struct + */ typedef struct as_node_metrics_s { as_latency_buckets* latency; } as_node_metrics; +/** + * Implementation of metrics_listeners + */ typedef struct as_metrics_writer_s { FILE* file; @@ -122,86 +136,141 @@ typedef struct as_metrics_writer_s { const char* report_directory; } as_metrics_writer; +/** + * Format time into UTC string + */ const char* utc_time_str(time_t t); +/** + * Initalize metrics policy + */ void as_metrics_policy_init(as_policy_metrics* policy); +/** + * Convert latency_type to string version for printing to the output file + */ char* as_latency_type_to_string(as_latency_type type); +/** + * Initalize latency bucket struct + */ void -as_metrics_latency_buckets_init(as_latency_buckets* latency_buckets, int32_t latencyColumns, int32_t latencyShift); +as_metrics_latency_buckets_init(as_latency_buckets* latency_buckets, int32_t latency_columns, int32_t latency_shift); +/** + * Return cumulative count of a bucket. + */ uint64_t as_metrics_get_bucket(as_latency_buckets* buckets, uint32_t i); +/** + * Increment count of bucket corresponding to the elapsed time in nanoseconds. + */ void as_metrics_latency_buckets_add(as_latency_buckets* latency_buckets, uint64_t elapsed); +/** + * Determine which index of bucket the elapsed time belongs in + */ uint32_t as_metrics_get_index(as_latency_buckets* latency_buckets, uint64_t elapsed_nanos); +/** + * Initalize node metrics struct + */ void as_node_metrics_init(as_node_metrics* node_metrics, const as_policy_metrics* policy); +/** + * Add latency to corresponding bucket type + */ void as_metrics_add_latency(as_node_metrics* node_metrics, as_latency_type latency_type, uint64_t elapsed); +/** + * Initalize metrics listener struct + */ void as_metrics_listeners_init(as_metrics_listeners* listeners); +/** + * Calculate CPU and memory usage + */ void as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem); +/** + * Write cluster information to the metrics output file + */ void as_metrics_write_cluster(as_metrics_writer* mw, const struct as_cluster_s* cluster); +/** + * Write node information to the metrics output file + */ void as_metrics_write_node(as_metrics_writer* mw, struct as_node_stats_s* node_stats); +/** + * Write connection information to the metrics output file + */ void as_metrics_write_conn(as_metrics_writer* mw, struct as_conn_stats_s* conn_stats); +/** + * Write line to the metrics output file + */ void as_metrics_write_line(as_metrics_writer* mw); #if defined(__linux__) -void -as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem); - +/** + * Gets memory and CPU usage information from proc/stat + */ void as_metrics_proc_stat_mem_cpu(double* vm_usage, double* resident_set, double* cpu_usage); #endif #if defined(_MSC_VER) -void -as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem); - +/** + * Helper method that calculates CPU usage using ticks + */ static double as_metrics_calculate_cpu_load(uint64_t idleTicks, uint64_t totalTicks); +/** + * Helper method for converting file time to uint64_t + */ static uint64_t as_metrics_file_time_to_uint_64(const FILETIME ft); +/** + * Gets CPU usage using GetSystemTimes() + */ double as_metrics_process_cpu_load(); +/** + * Gets memory usage using GlobalMemoryStatusEx() + */ double as_metrics_process_mem_usage(); - #endif #if defined(__APPLE__) - -void -as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem); - +/** + * Gets memory usage using task_info + */ double as_metrics_process_mem_usage(); +/** + * Gets cpu usage using ps -p + */ double as_metrics_process_cpu_load(); #endif diff --git a/src/include/aerospike/as_node.h b/src/include/aerospike/as_node.h index 52661d68de..21d7b5f119 100644 --- a/src/include/aerospike/as_node.h +++ b/src/include/aerospike/as_node.h @@ -655,9 +655,17 @@ as_node_signal_login(as_node* node); bool as_node_has_rack(as_node* node, const char* ns, int rack_id); +/** + * @private + * Record latency of type latency_type for node + */ void as_node_add_latency(as_node* node, as_latency_type latency_type, uint64_t elapsed); +/** + * @private + * Enable metrics at the node level + */ void as_node_enable_metrics(as_node* node, as_policy_metrics* policy); From a0e3b629a3467c6187735f4053bed7ecdbe85b0d Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Tue, 23 Jan 2024 11:03:58 -0700 Subject: [PATCH 10/64] Fix warnings and errors --- src/include/aerospike/as_metrics.h | 2 ++ src/main/aerospike/as_cluster.c | 14 +++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index 8d51e49dfc..5912ec8fa3 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -202,12 +202,14 @@ as_metrics_listeners_init(as_metrics_listeners* listeners); void as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem); +struct as_cluster_s; /** * Write cluster information to the metrics output file */ void as_metrics_write_cluster(as_metrics_writer* mw, const struct as_cluster_s* cluster); +struct as_node_stats_s; /** * Write node information to the metrics output file */ diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index a53603f2c8..c37b27346e 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -559,7 +559,7 @@ as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_policy_metrics* { if (cluster->metrics_enabled) { - cluster->metrics_listeners->disable_callback(policy, cluster, policy->udata); + cluster->metrics_listeners->disable_callback(err, cluster, policy->udata); } cluster->metrics_listeners = policy->metrics_listeners; @@ -576,11 +576,13 @@ as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_policy_metrics* as_node_enable_metrics(node, policy); } - as_status status = cluster->metrics_listeners->enable_callback(err, policy, policy->udata); + as_status status = cluster->metrics_listeners->enable_callback(err, policy); if (status != AEROSPIKE_OK) { return status; } + + return AEROSPIKE_OK; } as_status @@ -589,12 +591,14 @@ as_cluster_disable_metrics(as_error* err, as_cluster* cluster) if (cluster->metrics_enabled) { cluster->metrics_enabled = false; - as_status status = cluster->metrics_listeners->disable_callback(cluster->metrics_policy, cluster, cluster->metrics_policy->udata); + as_status status = cluster->metrics_listeners->disable_callback(err, cluster, cluster->metrics_policy->udata); if (status != AEROSPIKE_OK) { return status; } } + + return AEROSPIKE_OK; } void @@ -655,7 +659,7 @@ as_cluster_remove_nodes(as_cluster* cluster, as_vector* /* */ nodes_t as_node_deactivate(node); if (cluster->metrics_enabled) { - cluster->metrics_listeners->node_close_callback(node->cluster->metrics_policy, node); + cluster->metrics_listeners->node_close_callback(node, node->cluster->metrics_policy->udata); } } @@ -976,7 +980,7 @@ as_cluster_tend(as_cluster* cluster, as_error* err, bool is_init) if (cluster->metrics_enabled && (cluster->tend_count % cluster->metrics_policy->interval)) { - cluster->metrics_listeners->snapshot_callback(cluster->metrics_policy, cluster, cluster->metrics_policy->udata); + cluster->metrics_listeners->snapshot_callback(cluster, cluster->metrics_policy->udata); } as_cluster_destroy_peers(&peers); From 8fe251a6d8de82bd98640a74a18d831023242067 Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Tue, 23 Jan 2024 13:58:50 -0500 Subject: [PATCH 11/64] Add as_string_builder_append_int64() and as_string_builder_append_uint64(). --- modules/common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/common b/modules/common index 83add8e681..7e02bf445a 160000 --- a/modules/common +++ b/modules/common @@ -1 +1 @@ -Subproject commit 83add8e681b3e25ed467dc67488d68d4e197ee44 +Subproject commit 7e02bf445ae1ba1671378bd13813c7ed42476918 From 233a2366a71ee4272c2885ea9af3e268ee1527f1 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Tue, 23 Jan 2024 15:14:51 -0700 Subject: [PATCH 12/64] Work on warnings --- src/include/aerospike/aerospike_stats.h | 32 ++- src/include/aerospike/as_metrics.h | 35 ++- src/include/aerospike/as_node.h | 2 +- src/main/aerospike/aerospike_stats.c | 38 +-- src/main/aerospike/as_cluster.c | 24 +- src/main/aerospike/as_metrics.c | 335 ++++++++++++++++-------- src/main/aerospike/as_node.c | 2 +- 7 files changed, 303 insertions(+), 165 deletions(-) diff --git a/src/include/aerospike/aerospike_stats.h b/src/include/aerospike/aerospike_stats.h index 19999b05e2..fa39e2358c 100644 --- a/src/include/aerospike/aerospike_stats.h +++ b/src/include/aerospike/aerospike_stats.h @@ -245,6 +245,36 @@ aerospike_event_loop_stats(as_event_loop* event_loop, as_event_loop_stats* stats stats->queue_size = as_event_loop_get_queue_size(event_loop); } +static inline void +as_sum_init(as_conn_stats* stats) +{ + stats->in_pool = 0; + stats->in_use = 0; + stats->opened = 0; + stats->closed = 0; +} + +static inline void +as_sum_no_lock(as_async_conn_pool* pool, as_conn_stats* stats) +{ + // Warning: cross-thread reference without a lock. + int tmp = as_queue_size(&pool->queue); + + // Timing issues may cause values to go negative. Adjust. + if (tmp < 0) { + tmp = 0; + } + stats->in_pool += tmp; + tmp = pool->queue.total - tmp; + + if (tmp < 0) { + tmp = 0; + } + stats->in_use += tmp; + stats->opened += pool->opened; + stats->closed += pool->closed; +} + /** * Return string representation of cluster statistics. * The string should be freed when it's no longer needed. @@ -258,7 +288,7 @@ aerospike_stats_to_string(as_cluster_stats* stats); * Enable extended periodic cluster and node latency metrics. */ AS_EXTERN as_status -aerospike_enable_metrics(aerospike* as, as_error* err, const struct as_policy_metrics_s* policy); +aerospike_enable_metrics(aerospike* as, as_error* err, struct as_policy_metrics_s* policy); /** * Disable extended periodic cluster and node latency metrics. diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index 5912ec8fa3..5be9ef1b04 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -92,11 +92,11 @@ struct as_node_s; */ typedef as_status (*as_metrics_enable_callback)(as_error* err, const struct as_policy_metrics_s* policy); -typedef void (*as_metrics_snapshot_callback)(const struct as_cluster_s* cluster, void* udata); +typedef as_status (*as_metrics_snapshot_callback)(as_error* err, struct as_cluster_s* cluster, void* udata); -typedef void (*as_metrics_node_close_callback)(const struct as_node_s* node, void* udata); +typedef as_status (*as_metrics_node_close_callback)(as_error* err, struct as_node_s* node, void* udata); -typedef as_status (*as_metrics_disable_callback)(as_error* err, const struct as_cluster_s* cluster, void* udata); +typedef as_status (*as_metrics_disable_callback)(as_error* err, struct as_cluster_s* cluster, void* udata); /** * Struct to hold required callbacks @@ -196,6 +196,12 @@ as_metrics_add_latency(as_node_metrics* node_metrics, as_latency_type latency_ty void as_metrics_listeners_init(as_metrics_listeners* listeners); +/** + * Open output metrics file and write header + */ +as_status +as_metrics_open_writer(as_metrics_writer* mw, as_error* err); + /** * Calculate CPU and memory usage */ @@ -206,27 +212,34 @@ struct as_cluster_s; /** * Write cluster information to the metrics output file */ -void -as_metrics_write_cluster(as_metrics_writer* mw, const struct as_cluster_s* cluster); +as_status +as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster_s* cluster); -struct as_node_stats_s; +struct as_node_s; /** * Write node information to the metrics output file */ void -as_metrics_write_node(as_metrics_writer* mw, struct as_node_stats_s* node_stats); +as_metrics_write_node(as_metrics_writer* mw, struct as_node_s* node_stats); +struct as_conn_stats_s; /** * Write connection information to the metrics output file */ void -as_metrics_write_conn(as_metrics_writer* mw, struct as_conn_stats_s* conn_stats); +as_metrics_write_conn(as_metrics_writer* mw, const struct as_conn_stats_s* conn_stats); + +void +as_metrics_get_node_sync_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* async); + +void +as_metrics_get_node_async_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* sync); /** * Write line to the metrics output file */ -void -as_metrics_write_line(as_metrics_writer* mw); +as_status +as_metrics_write_line(as_metrics_writer* mw, as_error* err); #if defined(__linux__) /** @@ -259,7 +272,7 @@ as_metrics_process_cpu_load(); /** * Gets memory usage using GlobalMemoryStatusEx() */ -double +DWORDLONG as_metrics_process_mem_usage(); #endif diff --git a/src/include/aerospike/as_node.h b/src/include/aerospike/as_node.h index 21d7b5f119..517f312e12 100644 --- a/src/include/aerospike/as_node.h +++ b/src/include/aerospike/as_node.h @@ -667,7 +667,7 @@ as_node_add_latency(as_node* node, as_latency_type latency_type, uint64_t elapse * Enable metrics at the node level */ void -as_node_enable_metrics(as_node* node, as_policy_metrics* policy); +as_node_enable_metrics(as_node* node, const as_policy_metrics* policy); /** * Return transaction error count. The value is cumulative and not reset per metrics interval. diff --git a/src/main/aerospike/aerospike_stats.c b/src/main/aerospike/aerospike_stats.c index 3850d39751..c2d72f3745 100644 --- a/src/main/aerospike/aerospike_stats.c +++ b/src/main/aerospike/aerospike_stats.c @@ -31,36 +31,6 @@ extern uint32_t as_event_loop_size; * STATIC FUNCTIONS *****************************************************************************/ -static inline void -as_sum_init(as_conn_stats* stats) -{ - stats->in_pool = 0; - stats->in_use = 0; - stats->opened = 0; - stats->closed = 0; -} - -static inline void -as_sum_no_lock(as_async_conn_pool* pool, as_conn_stats* stats) -{ - // Warning: cross-thread reference without a lock. - int tmp = as_queue_size(&pool->queue); - - // Timing issues may cause values to go negative. Adjust. - if (tmp < 0) { - tmp = 0; - } - stats->in_pool += tmp; - tmp = pool->queue.total - tmp; - - if (tmp < 0) { - tmp = 0; - } - stats->in_use += tmp; - stats->opened += pool->opened; - stats->closed += pool->closed; -} - static void as_conn_stats_tostring(as_string_builder* sb, const char* title, as_conn_stats* cs) { @@ -186,7 +156,7 @@ aerospike_stats_to_string(as_cluster_stats* stats) as_conn_stats_tostring(&sb, "pipeline", &node_stats->pipeline); as_string_builder_append_newline(&sb); as_string_builder_append(&sb, "error count: "); - as_string_builder_append_uint(&sb, node_stats->error_count); + as_string_builder_append_uint64(&sb, node_stats->error_count); as_string_builder_append_newline(&sb); } @@ -211,7 +181,7 @@ aerospike_stats_to_string(as_cluster_stats* stats) } as_status -aerospike_enable_metrics(aerospike* as, as_error* err, const struct as_policy_metrics_s* policy) +aerospike_enable_metrics(aerospike* as, as_error* err, struct as_policy_metrics_s* policy) { as_cluster* cluster = as->cluster; as_status status = as_cluster_enable_metrics(err, cluster, policy); @@ -219,6 +189,8 @@ aerospike_enable_metrics(aerospike* as, as_error* err, const struct as_policy_me { return status; } + + return AEROSPIKE_OK; } as_status @@ -230,4 +202,6 @@ aerospike_disable_metrics(aerospike* as, as_error* err) { return status; } + + return AEROSPIKE_OK; } diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index c37b27346e..54f1900406 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -646,8 +646,8 @@ as_cluster_get_delay_queue_timeout_count(const as_cluster* cluster) return as_load_uint64(&cluster->delay_queue_timeout_count); } -static void -as_cluster_remove_nodes(as_cluster* cluster, as_vector* /* */ nodes_to_remove) +static as_status +as_cluster_remove_nodes(as_error* err, as_cluster* cluster, as_vector* /* */ nodes_to_remove) { // There is no need to delete nodes from partition tables because the nodes // have already been set to inactive. Further connection requests will result @@ -659,7 +659,11 @@ as_cluster_remove_nodes(as_cluster* cluster, as_vector* /* */ nodes_t as_node_deactivate(node); if (cluster->metrics_enabled) { - cluster->metrics_listeners->node_close_callback(node, node->cluster->metrics_policy->udata); + as_status status = cluster->metrics_listeners->node_close_callback(err, node, node->cluster->metrics_policy->udata); + if (status != AEROSPIKE_OK) + { + return status; + } } } @@ -670,6 +674,8 @@ as_cluster_remove_nodes(as_cluster* cluster, as_vector* /* */ nodes_t if (cluster->shm_info) { as_shm_remove_nodes(cluster, nodes_to_remove); } + + return AEROSPIKE_OK; } static as_status @@ -923,7 +929,11 @@ as_cluster_tend(as_cluster* cluster, as_error* err, bool is_init) // Remove nodes in a batch. if (nodes_to_remove.size > 0) { - as_cluster_remove_nodes(cluster, &nodes_to_remove); + as_status status = as_cluster_remove_nodes(err, cluster, &nodes_to_remove); + if (status != AEROSPIKE_OK) + { + return status; + } nodes = cluster->nodes; } as_vector_destroy(&nodes_to_remove); @@ -980,7 +990,11 @@ as_cluster_tend(as_cluster* cluster, as_error* err, bool is_init) if (cluster->metrics_enabled && (cluster->tend_count % cluster->metrics_policy->interval)) { - cluster->metrics_listeners->snapshot_callback(cluster, cluster->metrics_policy->udata); + as_status status = cluster->metrics_listeners->snapshot_callback(err, cluster, cluster->metrics_policy->udata); + if (status != AEROSPIKE_OK) + { + return status; + } } as_cluster_destroy_peers(&peers); diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 6861d69f58..4c94404e4e 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -18,9 +18,15 @@ #include #include #include +#include #include #include +//--------------------------------- +// Globals +//--------------------------------- +extern uint32_t as_event_loop_capacity; + //--------------------------------- // Functions //--------------------------------- @@ -152,68 +158,109 @@ as_metrics_writer_enable(as_error* err, const struct as_policy_metrics_s* policy // create file directory as_metrics_writer* mw = policy->udata; - mw->file = fopen(policy->report_directory, "w"); - - if (!mw->file) - { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "Failed to open file: %s", policy->report_directory); - } mw->max_size = policy->report_size_limit; mw->latency_columns = policy->latency_columns; mw->latency_shift = policy->latency_shift; - mw->size = 0; mw->report_directory = policy->report_directory; - as_string_builder_inita(mw->sb, 25, true); - as_string_builder_append(&mw->sb, utc_time_str(time(NULL))); - as_string_builder_append(&mw->sb, " header(1)"); - as_string_builder_append(&mw->sb, " cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]]"); - as_string_builder_append(&mw->sb, " eventloop[processSize,queueSize]"); - as_string_builder_append(&mw->sb, " node[name,address,port,syncConn,asyncConn,errors,timeouts,latency[]]"); - as_string_builder_append(&mw->sb, " conn[inUse,inPool,opened,closed]"); - as_string_builder_append(&mw->sb, " latency("); - as_string_builder_append(&mw->sb, mw->latency_columns); - as_string_builder_append(&mw->sb, ','); - as_string_builder_append(&mw->sb, mw->latency_shift); - as_string_builder_append(&mw->sb, ')'); - as_string_builder_append(&mw->sb, "[type[l1,l2,l3...]]"); - as_metrics_write_line(mw); + as_status status = as_metrics_open_writer(mw, err); + if (status != AEROSPIKE_OK) + { + return status; + } mw->enable = true; + return AEROSPIKE_OK; } -void -as_metrics_writer_snapshot(const struct as_cluster_s* cluster, void* udata) +as_status +as_metrics_open_writer(as_metrics_writer* mw, as_error* err) +{ + const char* now = utc_time_str(time(NULL)); + as_string_builder file_name; + as_string_builder_inita(&file_name, 50, true); + as_string_builder_append(&file_name, mw->report_directory); + as_string_builder_append(&file_name, "metrics-"); + as_string_builder_append(&file_name, now); + as_string_builder_append(&file_name, ".log"); + mw->file = fopen(file_name.data, "w"); + + if (!mw->file) + { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Failed to open file: %s", mw->report_directory); + } + + mw->size = 0; + + as_string_builder_inita(mw->sb, 2048, true); + as_string_builder_append(mw->sb, now); + as_string_builder_append(mw->sb, " header(1)"); + as_string_builder_append(mw->sb, " cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]]"); + as_string_builder_append(mw->sb, " eventloop[processSize,queueSize]"); + as_string_builder_append(mw->sb, " node[name,address,port,syncConn,asyncConn,errors,timeouts,latency[]]"); + as_string_builder_append(mw->sb, " conn[inUse,inPool,opened,closed]"); + as_string_builder_append(mw->sb, " latency("); + as_string_builder_append_int(mw->sb, mw->latency_columns); + as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_int(mw->sb, mw->latency_shift); + as_string_builder_append_char(mw->sb, ')'); + as_string_builder_append(mw->sb, "[type[l1,l2,l3...]]"); + as_status status = as_metrics_write_line(mw, err); + + if (status != AEROSPIKE_OK) + { + return status; + } + + return AEROSPIKE_OK; +} + +as_status +as_metrics_writer_snapshot(as_error* err, struct as_cluster_s* cluster, void* udata) { as_metrics_writer* mw = udata; if (mw->enable && mw->file != NULL) { - as_metrics_write_cluster(mw, cluster); + as_status status = as_metrics_write_cluster(err, mw, cluster); + if (status != AEROSPIKE_OK) + { + return status; + } } + return AEROSPIKE_OK; } -void -as_metrics_writer_node_close(const struct as_node_s* node, void* udata) +as_status +as_metrics_writer_node_close(as_error* err, struct as_node_s* node, void* udata) { // write node info to file as_metrics_writer* mw = udata; if (mw->enable && mw->file != NULL) { - as_string_builder_append(&mw->sb, utc_time_str(time(NULL))); - as_metrics_write_node(&mw->sb, node); - as_metrics_write_line(mw); + as_string_builder_append(mw->sb, utc_time_str(time(NULL))); + as_metrics_write_node(mw, node); + as_status status = as_metrics_write_line(mw, err); + if (status != AEROSPIKE_OK) + { + return status; + } } + return AEROSPIKE_OK; } as_status -as_metrics_writer_disable(as_error* err, const struct as_cluster_s* cluster, void* udata) +as_metrics_writer_disable(as_error* err, struct as_cluster_s* cluster, void* udata) { // write cluster into to file, disable as_metrics_writer* mw = udata; if (mw->enable && mw->file != NULL) { - as_metrics_write_cluster(mw, cluster); + as_status status = as_metrics_write_cluster(err, mw, cluster); + if (status != AEROSPIKE_OK) + { + return status; + } uint32_t result = fclose(mw->file); mw->file = NULL; mw->enable = false; @@ -223,6 +270,8 @@ as_metrics_writer_disable(as_error* err, const struct as_cluster_s* cluster, voi "File stream did not close successfully: %s", mw->report_directory); } } + + return AEROSPIKE_OK; } void @@ -234,8 +283,8 @@ as_metrics_listeners_init(as_metrics_listeners* listeners) listeners->snapshot_callback = as_metrics_writer_snapshot; } -void -as_metrics_write_cluster(as_metrics_writer* mw, const struct as_cluster_s* cluster) { +as_status +as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster_s* cluster) { char* cluster_name = cluster->cluster_name; if (cluster_name == NULL) { @@ -245,78 +294,90 @@ as_metrics_write_cluster(as_metrics_writer* mw, const struct as_cluster_s* clust double* cpu_load; double* mem; as_metrics_process_cpu_load_mem_usage(cpu_load, mem); - as_cluster_stats* stats; - aerospike_cluster_stats(cluster, stats); - - as_string_builder_append(&mw->sb, utc_time_str(time(NULL))); - as_string_builder_append(&mw->sb, " cluster["); - as_string_builder_append(&mw->sb, cluster_name); - as_string_builder_append(&mw->sb, ','); - as_string_builder_append(&mw->sb, (int)cpu_load); - as_string_builder_append(&mw->sb, ','); - as_string_builder_append(&mw->sb, mem); - as_string_builder_append(&mw->sb, ','); - as_string_builder_append(&mw->sb, cluster->invalid_node_count); // Cumulative. Not reset on each interval. - as_string_builder_append(&mw->sb, ','); - as_string_builder_append(&mw->sb, as_cluster_get_tran_count(cluster)); // Cumulative. Not reset on each interval. - as_string_builder_append(&mw->sb, ','); - as_string_builder_append(&mw->sb, cluster->retry_count); // Cumulative. Not reset on each interval. - as_string_builder_append(&mw->sb, ','); - as_string_builder_append(&mw->sb, cluster->delay_queue_timeout_count); // Cumulative. Not reset on each interval. - as_string_builder_append(&mw->sb, ",["); - - as_event_loop_stats* event_loops = stats->event_loops; - for (uint32_t i = 0; i < stats->event_loops_size; i++) + *cpu_load = *cpu_load + 0.5 - (*cpu_load < 0); + *mem = *mem + 0.5 - (*mem < 0); + uint32_t cpu_load_rounded = (uint32_t)cpu_load; + uint32_t mem_rounded = (uint32_t)mem; + + as_string_builder_append(mw->sb, utc_time_str(time(NULL))); + as_string_builder_append(mw->sb, " cluster["); + as_string_builder_append(mw->sb, cluster_name); + as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_int(mw->sb, cpu_load_rounded); + as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_int(mw->sb, mem_rounded); + as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_uint(mw->sb, cluster->invalid_node_count); // Cumulative. Not reset on each interval. + as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_uint64(mw->sb, as_cluster_get_tran_count(cluster)); // Cumulative. Not reset on each interval. + as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_uint64(mw->sb, cluster->retry_count); // Cumulative. Not reset on each interval. + as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_uint64(mw->sb, cluster->delay_queue_timeout_count); // Cumulative. Not reset on each interval. + as_string_builder_append(mw->sb, ",["); + + for (uint32_t i = 0; i < as_event_loop_size; i++) { - as_event_loop_stats* loop = &event_loops[i]; + as_event_loop* loop = &as_event_loops[i]; if (i > 0) { - as_string_builder_append(&mw->sb, ','); + as_string_builder_append_char(mw->sb, ','); } - as_string_builder_append(&mw->sb, '['); - as_string_builder_append(&mw->sb, loop->process_size); - as_string_builder_append(&mw->sb, ','); - as_string_builder_append(&mw->sb, loop->queue_size); - as_string_builder_append(&mw->sb, ']'); + as_string_builder_append_char(mw->sb, '['); + as_string_builder_append_int(mw->sb, as_event_loop_get_process_size(loop)); + as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_uint(mw->sb, as_event_loop_get_queue_size(loop)); + as_string_builder_append_char(mw->sb, ']'); } - as_string_builder_append(&mw->sb, '],['); + as_string_builder_append(mw->sb, "],["); - as_node_stats* nodes = stats->nodes; - for (uint32_t i = 0; i < stats->nodes_size; i++) { - as_node_stats* node = &stats->nodes[i]; + as_nodes* nodes = as_nodes_reserve(cluster); + for (uint32_t i = 0; i < nodes->size; i++) { + as_node* node = nodes->array[i]; + as_node_reserve(node); if (i > 0) { - as_string_builder_append(&mw->sb, ","); + as_string_builder_append_char(mw->sb, ','); } - as_metrics_write_node(&mw->sb, node); + as_metrics_write_node(mw, node); + + as_node_release(node); } - as_string_builder_append(&mw->sb, "]]"); + as_string_builder_append(mw->sb, "]]"); - as_metrics_write_line(mw); + as_nodes_release(nodes); + as_status status = as_metrics_write_line(mw, err); + if (status != AEROSPIKE_OK) + { + return status; + } + + return AEROSPIKE_OK; } void -as_metrics_write_node(as_metrics_writer* mw, struct as_node_stats_s* node_stats) +as_metrics_write_node(as_metrics_writer* mw, struct as_node_s* node) { - as_node* node = node_stats->node; - as_string_builder_append(&mw->sb, '['); - as_string_builder_append(&mw->sb, node->name); - as_string_builder_append(&mw->sb, ','); - - //as_host* host = node-> TODO: how to get host from node? it is in node_info - - //as_string_builder_append(&mw->sb, host->name); - //as_string_builder_append(&mw->sb, ','); - //as_string_builder_append(&mw->sb, host->port); - //as_string_builder_append(&mw->sb, ','); - - as_metrics_write_conn(&mw->sb, &node_stats->sync); - as_string_builder_append(&mw->sb, ','); - as_metrics_write_conn(&mw->sb, &node_stats->async); - as_string_builder_append(&mw->sb, ','); - - as_string_builder_append(&mw->sb, node->error_count); - as_string_builder_append(&mw->sb, ','); - as_string_builder_append(&mw->sb, node->timeout_count); - as_string_builder_append(&mw->sb, ',['); + as_string_builder_append_char(mw->sb, '['); + as_string_builder_append(mw->sb, node->name); + as_string_builder_append_char(mw->sb, ','); + + as_string_builder_append(mw->sb, as_node_get_address_string(node)); + as_string_builder_append_char(mw->sb, ','); + + struct as_conn_stats_s* sync; + struct as_conn_stats_s* async; + as_sum_init(sync); + as_sum_init(async); + as_metrics_get_node_sync_conn_stats(node, sync); + as_metrics_write_conn(mw, sync); + as_string_builder_append_char(mw->sb, ','); + as_metrics_get_node_async_conn_stats(node, async); + as_metrics_write_conn(mw, async); + as_string_builder_append_char(mw->sb, ','); + + as_string_builder_append_uint64(mw->sb, as_node_get_error_count(node)); + as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_uint64(mw->sb, as_node_get_timeout_count(node)); + as_string_builder_append(mw->sb, ",["); as_node_metrics* node_metrics = node->metrics; uint32_t max = AS_LATENCY_TYPE_NONE; @@ -324,48 +385,92 @@ as_metrics_write_node(as_metrics_writer* mw, struct as_node_stats_s* node_stats) for (uint32_t i = 0; i < max; i++) { if (i > 0) { - as_string_builder_append(&mw->sb, ","); + as_string_builder_append_char(mw->sb, ','); } - as_string_builder_append(&mw->sb, as_latency_type_to_string(i)); - as_string_builder_append(&mw->sb, '['); + as_string_builder_append(mw->sb, as_latency_type_to_string(i)); + as_string_builder_append_char(mw->sb, '['); as_latency_buckets* buckets = &node_metrics->latency[i]; uint32_t bucket_max = buckets->latency_columns; for (uint32_t j = 0; j < bucket_max; j++) { if (j > 0) { - as_string_builder_append(&mw->sb, ','); + as_string_builder_append_char(mw->sb, ','); } - as_string_builder_append(&mw->sb, as_metrics_get_bucket(&buckets, i)); + as_string_builder_append_uint64(mw->sb, as_metrics_get_bucket(buckets, i)); } - as_string_builder_append(&mw->sb, ']'); + as_string_builder_append_char(mw->sb, ']'); } - as_string_builder_append(&mw->sb, ']]'); + as_string_builder_append(mw->sb, "]]"); } void -as_metrics_write_conn(as_metrics_writer* mw, struct as_conn_stats_s* conn_stats) +as_metrics_get_node_sync_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* sync) { - as_string_builder_append(&mw->sb, conn_stats->in_use); - as_string_builder_append(&mw->sb, ','); - as_string_builder_append(&mw->sb, conn_stats->in_pool); - as_string_builder_append(&mw->sb, ','); - as_string_builder_append(&mw->sb, conn_stats->opened); // Cumulative. Not reset on each interval. - as_string_builder_append(&mw->sb, ','); - as_string_builder_append(&mw->sb, conn_stats->closed); // Cumulative. Not reset on each interval. + uint32_t max = node->cluster->conn_pools_per_node; + // Sync connection summary. + for (uint32_t i = 0; i < max; i++) { + as_conn_pool* pool = &node->sync_conn_pools[i]; + + pthread_mutex_lock(&pool->lock); + uint32_t in_pool = as_queue_size(&pool->queue); + uint32_t total = pool->queue.total; + pthread_mutex_unlock(&pool->lock); + + sync->in_pool += in_pool; + sync->in_use += total - in_pool; + } + sync->opened = node->sync_conns_opened; + sync->closed = node->sync_conns_closed; } void -as_metrics_write_line(as_metrics_writer* mw) +as_metrics_get_node_async_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* async) { - as_string_builder_append_newline(&mw->sb); - fprintf(mw->file, &mw->sb->data); + // Async connection summary. + if (as_event_loop_capacity > 0) { + for (uint32_t i = 0; i < as_event_loop_size; i++) { + // Regular async. + as_sum_no_lock(&node->async_conn_pools[i], async); + } + } +} + +void +as_metrics_write_conn(as_metrics_writer* mw, const struct as_conn_stats_s* stats) +{ + as_string_builder_append_uint(mw->sb, stats->in_use); + as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_uint(mw->sb, stats->in_pool); + as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_uint(mw->sb, stats->opened); // Cumulative. Not reset on each interval. + as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_uint(mw->sb, stats->closed); // Cumulative. Not reset on each interval. +} + +as_status +as_metrics_write_line(as_metrics_writer* mw, as_error* err) +{ + as_string_builder_append_newline(mw->sb); + fprintf(mw->file, mw->sb->data); mw->size += mw->sb->length; if (mw->max_size > 0 && mw->size >= mw->max_size) { - // write new file? + uint32_t result = fclose(mw->file); + if (result != 0) + { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "File stream did not close successfully: %s", mw->report_directory); + } + as_status status = as_metrics_open_writer(mw, err); + if (status != AEROSPIKE_OK) + { + return status; + } } + + return AEROSPIKE_OK; } #if defined(__linux__) @@ -425,7 +530,9 @@ void as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem) { *cpu_usage = as_metrics_process_cpu_load(); - *mem = as_metrics_process_mem_usage(); + DWORDLONG dword = as_metrics_process_mem_usage(); + dword = dword + 0.5 - (dword < 0); + *mem = dword; } static double @@ -461,7 +568,7 @@ as_metrics_process_cpu_load() as_metrics_calculate_cpu_load(as_metrics_file_time_to_uint_64(idleTime), as_metrics_file_time_to_uint_64(kernelTime) + as_metrics_file_time_to_uint_64(userTime)) * 100: -1.0f; } -double +DWORDLONG as_metrics_process_mem_usage() { MEMORYSTATUSEX statex; diff --git a/src/main/aerospike/as_node.c b/src/main/aerospike/as_node.c index 01d55a8971..3df6ce5e5a 100644 --- a/src/main/aerospike/as_node.c +++ b/src/main/aerospike/as_node.c @@ -1313,7 +1313,7 @@ as_node_add_latency(as_node* node, as_latency_type latency_type, uint64_t elapse } void -as_node_enable_metrics(as_node* node, as_policy_metrics* policy) +as_node_enable_metrics(as_node* node, const as_policy_metrics* policy) { as_node_metrics_init(node->metrics, policy); } From fc7f3228ede11a6ce0393882dad8c669c2964f4b Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Wed, 24 Jan 2024 09:05:08 -0700 Subject: [PATCH 13/64] Fix warnings --- src/include/aerospike/as_metrics.h | 12 +++-- src/include/aerospike/as_node.h | 8 +-- src/main/aerospike/as_metrics.c | 80 ++++++++++++++++-------------- 3 files changed, 56 insertions(+), 44 deletions(-) diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index 5be9ef1b04..6ae9bd8c52 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -206,7 +206,7 @@ as_metrics_open_writer(as_metrics_writer* mw, as_error* err); * Calculate CPU and memory usage */ void -as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem); +as_metrics_process_cpu_load_mem_usage(uint32_t* cpu_usage, uint32_t* mem); struct as_cluster_s; /** @@ -229,9 +229,15 @@ struct as_conn_stats_s; void as_metrics_write_conn(as_metrics_writer* mw, const struct as_conn_stats_s* conn_stats); +/** + * Calculate sync conn stats data + */ void as_metrics_get_node_sync_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* async); +/** + * Calculate async conn stats data + */ void as_metrics_get_node_async_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* sync); @@ -270,9 +276,9 @@ double as_metrics_process_cpu_load(); /** - * Gets memory usage using GlobalMemoryStatusEx() + * Gets memory usage using GetProcessMemoryInfo() */ -DWORDLONG +uint32_t as_metrics_process_mem_usage(); #endif diff --git a/src/include/aerospike/as_node.h b/src/include/aerospike/as_node.h index 517f312e12..ffb0011174 100644 --- a/src/include/aerospike/as_node.h +++ b/src/include/aerospike/as_node.h @@ -327,11 +327,11 @@ typedef struct as_node_s { * Error count for this node's error_rate_window. */ uint32_t error_rate; - + /** - * Transaction error count since node was initialized. If the error is retryable, multiple errors per - * transaction may occur. - */ + * Transaction error count since node was initialized. If the error is retryable, multiple errors per + * transaction may occur. + */ uint64_t error_count; /** diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 4c94404e4e..e273f860e8 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -291,21 +291,17 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster cluster_name = ""; } - double* cpu_load; - double* mem; - as_metrics_process_cpu_load_mem_usage(cpu_load, mem); - *cpu_load = *cpu_load + 0.5 - (*cpu_load < 0); - *mem = *mem + 0.5 - (*mem < 0); - uint32_t cpu_load_rounded = (uint32_t)cpu_load; - uint32_t mem_rounded = (uint32_t)mem; + uint32_t cpu_load = 0; + uint32_t mem = 0; + as_metrics_process_cpu_load_mem_usage(&cpu_load, &mem); as_string_builder_append(mw->sb, utc_time_str(time(NULL))); as_string_builder_append(mw->sb, " cluster["); as_string_builder_append(mw->sb, cluster_name); as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_int(mw->sb, cpu_load_rounded); + as_string_builder_append_int(mw->sb, cpu_load); as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_int(mw->sb, mem_rounded); + as_string_builder_append_int(mw->sb, mem); as_string_builder_append_char(mw->sb, ','); as_string_builder_append_uint(mw->sb, cluster->invalid_node_count); // Cumulative. Not reset on each interval. as_string_builder_append_char(mw->sb, ','); @@ -363,15 +359,15 @@ as_metrics_write_node(as_metrics_writer* mw, struct as_node_s* node) as_string_builder_append(mw->sb, as_node_get_address_string(node)); as_string_builder_append_char(mw->sb, ','); - struct as_conn_stats_s* sync; - struct as_conn_stats_s* async; - as_sum_init(sync); - as_sum_init(async); - as_metrics_get_node_sync_conn_stats(node, sync); - as_metrics_write_conn(mw, sync); + struct as_conn_stats_s sync; + struct as_conn_stats_s async; + as_sum_init(&sync); + as_sum_init(&async); + as_metrics_get_node_sync_conn_stats(node, &sync); + as_metrics_write_conn(mw, &sync); as_string_builder_append_char(mw->sb, ','); - as_metrics_get_node_async_conn_stats(node, async); - as_metrics_write_conn(mw, async); + as_metrics_get_node_async_conn_stats(node, &async); + as_metrics_write_conn(mw, &async); as_string_builder_append_char(mw->sb, ','); as_string_builder_append_uint64(mw->sb, as_node_get_error_count(node)); @@ -475,10 +471,14 @@ as_metrics_write_line(as_metrics_writer* mw, as_error* err) #if defined(__linux__) void -as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem) +as_metrics_process_cpu_load_mem_usage(uint32_t* cpu_usage, uint32_t* mem) { - double resident_set; - as_metrics_proc_stat_mem_cpu(mem, resident_set, cpu_usage); + double resident_set, mem_d, cpu_usage_d; + as_metrics_proc_stat_mem_cpu(&mem_d, &resident_set, &cpu_usage_d); + cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); + mem_d = mem_d + 0.5 - (mem_d < 0); + *cpu_usage = (uint32_t)cpu_usage_d; + *mem = (uint32_t)mem_d; } void @@ -512,27 +512,28 @@ as_metrics_proc_stat_mem_cpu(double* vm_usage, double* resident_set, double* cpu stat_stream.close(); int64_t page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages - vm_usage = vsize / 1024.0; - resident_set = rss * page_size_kb; + *vm_usage = vsize / 1024.0; + *resident_set = rss * page_size_kb; uint64_t u_time_sec = utime / sysconf(_SC_CLK_TCK); uint64_t s_time_sec = stime / sysconf(_SC_CLK_TCK); uint64_t start_time_sec = starttime / sysconf(_SC_CLK_TCK); - cpu_usage = (u_time_sec + s_time_sec) / (cf_get_seconds() - start_time_sec); + *cpu_usage = (u_time_sec + s_time_sec) / (cf_get_seconds() - start_time_sec); } #endif #if defined(_MSC_VER) #include +#include void -as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem) +as_metrics_process_cpu_load_mem_usage(uint32_t* cpu_usage, uint32_t* mem) { - *cpu_usage = as_metrics_process_cpu_load(); - DWORDLONG dword = as_metrics_process_mem_usage(); - dword = dword + 0.5 - (dword < 0); - *mem = dword; + double cpu_usage_d = as_metrics_process_cpu_load(); + cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); + *cpu_usage = (uint32_t)cpu_usage_d; + *mem = as_metrics_process_mem_usage(); } static double @@ -568,15 +569,15 @@ as_metrics_process_cpu_load() as_metrics_calculate_cpu_load(as_metrics_file_time_to_uint_64(idleTime), as_metrics_file_time_to_uint_64(kernelTime) + as_metrics_file_time_to_uint_64(userTime)) * 100: -1.0f; } -DWORDLONG +uint32_t as_metrics_process_mem_usage() { - MEMORYSTATUSEX statex; - - statex.dwLength = sizeof(statex); + PROCESS_MEMORY_COUNTERS memCounter; + BOOL result = GetProcessMemoryInfo(GetCurrentProcess(), + &memCounter, + sizeof(memCounter)); - GlobalMemoryStatusEx(&statex); - return statex.ullTotalVirtual - statex.ullAvailVirtual; + return (uint32_t)memCounter.WorkingSetSize; } #endif @@ -587,10 +588,14 @@ as_metrics_process_mem_usage() #include void -as_metrics_process_cpu_load_mem_usage(double* cpu_usage, double* mem) +as_metrics_process_cpu_load_mem_usage(uint32_t* cpu_usage, uint32_t* mem) { - *cpu_usage = as_metrics_process_cpu_load(); - *mem = as_metrics_process_mem_usage(); + double cpu_usage_d = as_metrics_process_cpu_load(); + double mem_d = as_metrics_process_mem_usage(); + cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); + mem_d = mem_d + 0.5 - (mem_d < 0); + *cpu_usage = (uint32_t)cpu_usage_d; + *mem = (uint32_t)mem_d; } double @@ -608,6 +613,7 @@ as_metrics_process_mem_usage() return t_info.virtual_size; } + double as_metrics_process_cpu_load() { From a8f8bc0c72fb716f432d0d74d6cab0f68d235c0c Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Thu, 25 Jan 2024 13:36:25 -0700 Subject: [PATCH 14/64] Fixing bugs --- Makefile | 1 + src/include/aerospike/as_cluster.h | 8 +- src/include/aerospike/as_metrics.h | 144 +++---- src/main/aerospike/aerospike_stats.c | 9 +- src/main/aerospike/as_cluster.c | 46 ++- src/main/aerospike/as_metrics.c | 589 ++++++++++++++------------- src/main/aerospike/as_node.c | 8 +- src/test/aerospike_test.c | 9 + 8 files changed, 402 insertions(+), 412 deletions(-) diff --git a/Makefile b/Makefile index da78c30a6a..c99a0c2ee4 100644 --- a/Makefile +++ b/Makefile @@ -136,6 +136,7 @@ AEROSPIKE += as_key.o AEROSPIKE += as_list_operations.o AEROSPIKE += as_lookup.o AEROSPIKE += as_map_operations.o +AEROSPIKE += as_metrics.o AEROSPIKE += as_node.o AEROSPIKE += as_operations.o AEROSPIKE += as_partition.o diff --git a/src/include/aerospike/as_cluster.h b/src/include/aerospike/as_cluster.h index a3137dc6e4..54469845c7 100644 --- a/src/include/aerospike/as_cluster.h +++ b/src/include/aerospike/as_cluster.h @@ -383,9 +383,13 @@ typedef struct as_cluster_s { bool metrics_enabled; - as_policy_metrics* metrics_policy; + uint32_t metrics_interval; - as_metrics_listeners* metrics_listeners; + uint32_t metrics_latency_columns; + + uint32_t metrics_latency_shift; + + as_metrics_listeners metrics_listeners; uint64_t retry_count; diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index 6ae9bd8c52..f81477d752 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -58,56 +58,54 @@ typedef uint8_t as_latency_type; * Latency bucket counts are cumulative and not reset on each metrics snapshot interval */ typedef struct as_latency_buckets_s { - int32_t latency_shift; - int32_t latency_columns; + uint32_t latency_shift; + uint32_t latency_columns; uint64_t* buckets; } as_latency_buckets; -struct as_metrics_listeners_s; - -/** -* Metrics Policy -*/ -typedef struct as_policy_metrics_s { - const char* report_directory; // where the metrics file is output - - int64_t report_size_limit; // default 0 - - int32_t interval; // default 30 - - int32_t latency_columns; // default 7 - - int32_t latency_shift; // default 1 - - struct as_metrics_listeners_s* metrics_listeners; - - void* udata; -} as_policy_metrics; - -struct as_cluster_s; +struct as_policy_metrics_s; struct as_node_s; +struct as_cluster_s; /** * Callbacks for metrics listener operations */ -typedef as_status (*as_metrics_enable_callback)(as_error* err, const struct as_policy_metrics_s* policy); +typedef as_status(*as_metrics_enable_listener)(as_error* err, const struct as_policy_metrics_s* policy, void* udata); -typedef as_status (*as_metrics_snapshot_callback)(as_error* err, struct as_cluster_s* cluster, void* udata); +typedef as_status(*as_metrics_snapshot_listener)(as_error* err, struct as_cluster_s* cluster, void* udata); -typedef as_status (*as_metrics_node_close_callback)(as_error* err, struct as_node_s* node, void* udata); +typedef as_status(*as_metrics_node_close_listener)(as_error* err, struct as_node_s* node, void* udata); -typedef as_status (*as_metrics_disable_callback)(as_error* err, struct as_cluster_s* cluster, void* udata); +typedef as_status(*as_metrics_disable_listener)(as_error* err, struct as_cluster_s* cluster, void* udata); /** * Struct to hold required callbacks */ typedef struct as_metrics_listeners_s { - as_metrics_enable_callback enable_callback; - as_metrics_snapshot_callback snapshot_callback; - as_metrics_node_close_callback node_close_callback; - as_metrics_disable_callback disable_callback; + as_metrics_enable_listener enable_listener; + as_metrics_snapshot_listener snapshot_listener; + as_metrics_node_close_listener node_close_listener; + as_metrics_disable_listener disable_listener; + void* udata; } as_metrics_listeners; +/** +* Metrics Policy +*/ +typedef struct as_policy_metrics_s { + const char* report_directory; // where the metrics file is output + + uint64_t report_size_limit; // default 0 + + uint32_t interval; // default 30 + + uint32_t latency_columns; // default 7 + + uint32_t latency_shift; // default 1 + + as_metrics_listeners metrics_listeners; +} as_policy_metrics; + /** * Node metrics latency bucket struct */ @@ -129,25 +127,32 @@ typedef struct as_metrics_writer_s { uint64_t size; - int32_t latency_columns; + uint32_t latency_columns; - int32_t latency_shift; + uint32_t latency_shift; const char* report_directory; } as_metrics_writer; -/** - * Format time into UTC string - */ -const char* -utc_time_str(time_t t); - /** * Initalize metrics policy */ -void +AS_EXTERN void as_metrics_policy_init(as_policy_metrics* policy); +static inline void +as_metrics_set_listeners( + as_policy_metrics* policy, as_metrics_enable_listener enable, + as_metrics_disable_listener disable, as_metrics_node_close_listener node_close, + as_metrics_snapshot_listener snapshot + ) +{ + policy->metrics_listeners.enable_listener = enable; + policy->metrics_listeners.disable_listener = disable; + policy->metrics_listeners.node_close_listener = node_close; + policy->metrics_listeners.snapshot_listener = snapshot; +} + /** * Convert latency_type to string version for printing to the output file */ @@ -158,7 +163,7 @@ as_latency_type_to_string(as_latency_type type); * Initalize latency bucket struct */ void -as_metrics_latency_buckets_init(as_latency_buckets* latency_buckets, int32_t latency_columns, int32_t latency_shift); +as_metrics_latency_buckets_init(as_latency_buckets* latency_buckets, uint32_t latency_columns, uint32_t latency_shift); /** * Return cumulative count of a bucket. @@ -181,8 +186,8 @@ as_metrics_get_index(as_latency_buckets* latency_buckets, uint64_t elapsed_nanos /** * Initalize node metrics struct */ -void -as_node_metrics_init(as_node_metrics* node_metrics, const as_policy_metrics* policy); +as_node_metrics* +as_node_metrics_init(uint32_t latency_columns, uint32_t latency_shift); /** * Add latency to corresponding bucket type @@ -190,63 +195,12 @@ as_node_metrics_init(as_node_metrics* node_metrics, const as_policy_metrics* pol void as_metrics_add_latency(as_node_metrics* node_metrics, as_latency_type latency_type, uint64_t elapsed); -/** - * Initalize metrics listener struct - */ -void -as_metrics_listeners_init(as_metrics_listeners* listeners); - -/** - * Open output metrics file and write header - */ -as_status -as_metrics_open_writer(as_metrics_writer* mw, as_error* err); - /** * Calculate CPU and memory usage */ void as_metrics_process_cpu_load_mem_usage(uint32_t* cpu_usage, uint32_t* mem); -struct as_cluster_s; -/** - * Write cluster information to the metrics output file - */ -as_status -as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster_s* cluster); - -struct as_node_s; -/** - * Write node information to the metrics output file - */ -void -as_metrics_write_node(as_metrics_writer* mw, struct as_node_s* node_stats); - -struct as_conn_stats_s; -/** - * Write connection information to the metrics output file - */ -void -as_metrics_write_conn(as_metrics_writer* mw, const struct as_conn_stats_s* conn_stats); - -/** - * Calculate sync conn stats data - */ -void -as_metrics_get_node_sync_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* async); - -/** - * Calculate async conn stats data - */ -void -as_metrics_get_node_async_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* sync); - -/** - * Write line to the metrics output file - */ -as_status -as_metrics_write_line(as_metrics_writer* mw, as_error* err); - #if defined(__linux__) /** * Gets memory and CPU usage information from proc/stat diff --git a/src/main/aerospike/aerospike_stats.c b/src/main/aerospike/aerospike_stats.c index c2d72f3745..3c004b88c5 100644 --- a/src/main/aerospike/aerospike_stats.c +++ b/src/main/aerospike/aerospike_stats.c @@ -183,14 +183,7 @@ aerospike_stats_to_string(as_cluster_stats* stats) as_status aerospike_enable_metrics(aerospike* as, as_error* err, struct as_policy_metrics_s* policy) { - as_cluster* cluster = as->cluster; - as_status status = as_cluster_enable_metrics(err, cluster, policy); - if (status != AEROSPIKE_OK) - { - return status; - } - - return AEROSPIKE_OK; + return as_cluster_enable_metrics(err, as->cluster, policy); } as_status diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index 54f1900406..97bd509034 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -557,31 +557,28 @@ as_cluster_remove_nodes_copy(as_cluster* cluster, as_vector* /* */ no as_status as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_policy_metrics* policy) { - if (cluster->metrics_enabled) - { - cluster->metrics_listeners->disable_callback(err, cluster, policy->udata); + if (cluster->metrics_enabled) { + cluster->metrics_listeners.disable_listener(err, cluster, cluster->metrics_listeners.udata); } cluster->metrics_listeners = policy->metrics_listeners; - if (cluster->metrics_listeners == NULL) - { - as_metrics_listeners_init(cluster->metrics_listeners); - } - - cluster->metrics_policy = policy; + cluster->metrics_interval = policy->interval; + cluster->metrics_latency_columns = policy->latency_columns; + cluster->metrics_latency_shift = policy->latency_shift; - as_nodes* nodes = cluster->nodes; + as_nodes* nodes = as_nodes_reserve(cluster); for (uint32_t i = 0; i < nodes->size; i++) { as_node* node = nodes->array[i]; as_node_enable_metrics(node, policy); } - as_status status = cluster->metrics_listeners->enable_callback(err, policy); - if (status != AEROSPIKE_OK) - { + as_nodes_release(nodes); + + as_status status = cluster->metrics_listeners.enable_listener(err, policy, cluster->metrics_listeners.udata); + if (status != AEROSPIKE_OK) { return status; } - + cluster->metrics_enabled = true; return AEROSPIKE_OK; } @@ -591,11 +588,7 @@ as_cluster_disable_metrics(as_error* err, as_cluster* cluster) if (cluster->metrics_enabled) { cluster->metrics_enabled = false; - as_status status = cluster->metrics_listeners->disable_callback(err, cluster, cluster->metrics_policy->udata); - if (status != AEROSPIKE_OK) - { - return status; - } + return cluster->metrics_listeners.disable_listener(err, cluster, cluster->metrics_listeners.udata); } return AEROSPIKE_OK; @@ -659,7 +652,7 @@ as_cluster_remove_nodes(as_error* err, as_cluster* cluster, as_vector* /* metrics_enabled) { - as_status status = cluster->metrics_listeners->node_close_callback(err, node, node->cluster->metrics_policy->udata); + as_status status = cluster->metrics_listeners.node_close_listener(err, node, node->cluster->metrics_listeners.udata); if (status != AEROSPIKE_OK) { return status; @@ -988,9 +981,9 @@ as_cluster_tend(as_cluster* cluster, as_error* err, bool is_init) as_incr_uint32(&cluster->shm_info->cluster_shm->rebalance_gen); } - if (cluster->metrics_enabled && (cluster->tend_count % cluster->metrics_policy->interval)) + if (cluster->metrics_enabled && (cluster->tend_count % cluster->metrics_interval)) { - as_status status = cluster->metrics_listeners->snapshot_callback(err, cluster, cluster->metrics_policy->udata); + as_status status = cluster->metrics_listeners.snapshot_listener(err, cluster, cluster->metrics_listeners.udata); if (status != AEROSPIKE_OK) { return status; @@ -1510,6 +1503,15 @@ as_cluster_create(as_config* config, as_error* err, as_cluster** cluster_out) } pthread_attr_destroy(&attr); } + + // Initialize metrics fields + cluster->metrics_enabled = false; + cluster->metrics_interval = 0; + cluster->metrics_latency_columns = 0; + cluster->metrics_latency_shift = 0; + cluster->tran_count = 0; + cluster->retry_count = 0; + cluster->delay_queue_timeout_count = 0; *cluster_out = cluster; return AEROSPIKE_OK; } diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index e273f860e8..2ade9dd8c5 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -28,172 +28,93 @@ extern uint32_t as_event_loop_capacity; //--------------------------------- -// Functions +// Static Functions //--------------------------------- -const char* -utc_time_str(time_t t) +static const char* +time_str_file(time_t t) { static char buf[UTC_STR_LEN + 1]; - struct tm* utc = gmtime(&t); + struct tm* local = localtime(&t); snprintf(buf, sizeof(buf), - "%4d-%02d-%02dT%02d:%02d:%02dZ", - 1900 + utc->tm_year, utc->tm_mon + 1, utc->tm_mday, - utc->tm_hour, utc->tm_min, utc->tm_sec); + "%4d%02d%02d%02d%02d%02dZ", + 1900 + local->tm_year, local->tm_mon + 1, local->tm_mday, + local->tm_hour, local->tm_min, local->tm_sec); return buf; } - -void -as_metrics_policy_init(as_policy_metrics* policy) -{ - policy->report_size_limit = 0; - policy->interval = 30; - policy->latency_columns = 7; - policy->latency_shift = 1; -} - -char* -as_latency_type_to_string(as_latency_type type) +static const char* +time_str(time_t t) { - switch (type) - { - case AS_LATENCY_TYPE_CONN: - return "conn"; - break; - case AS_LATENCY_TYPE_WRITE: - return "write"; - break; - case AS_LATENCY_TYPE_READ: - return "read"; - break; - case AS_LATENCY_TYPE_BATCH: - return "batch"; - break; - case AS_LATENCY_TYPE_QUERY: - return "query"; - break; - case AS_LATENCY_TYPE_NONE: - return "none"; - break; - default: - return "none"; - break; - } -} - -void -as_metrics_latency_buckets_init(as_latency_buckets* latency_buckets, int32_t latency_columns, int32_t latency_shift) -{ - latency_buckets->latency_columns = latency_columns; - latency_buckets->latency_shift = latency_shift; - latency_buckets->buckets = cf_malloc(sizeof(uint64_t) * latency_columns); -} - -uint64_t -as_metrics_get_bucket(as_latency_buckets* buckets, uint32_t i) -{ - return as_load_uint64(&buckets[i]); + static char buf[UTC_STR_LEN + 1]; + struct tm* local = localtime(&t); + snprintf(buf, sizeof(buf), + "%4d-%02d-%02d %02d:%02d:%02dZ", + 1900 + local->tm_year, local->tm_mon + 1, local->tm_mday, + local->tm_hour, local->tm_min, local->tm_sec); + return buf; } -void -as_metrics_latency_buckets_add(as_latency_buckets* latency_buckets, uint64_t elapsed) -{ - uint32_t index = as_metrics_get_index(latency_buckets, elapsed); - as_incr_uint64(&latency_buckets->buckets[index]); -} +static as_status +as_metrics_open_writer(as_metrics_writer* mw, as_error* err); -uint32_t -as_metrics_get_index(as_latency_buckets* latency_buckets, uint64_t elapsed_nanos) +static as_status +as_metrics_write_line(as_metrics_writer* mw, as_error* err) { - // Convert nanoseconds to milliseconds. - uint64_t elapsed = elapsed_nanos / NS_TO_MS; - - // Round up elapsed to nearest millisecond. - if ((elapsed_nanos - (elapsed * NS_TO_MS)) > 0) - { - elapsed++; - } + as_string_builder_append_newline(mw->sb); + fprintf(mw->file, "%s", mw->sb->data); + mw->size += mw->sb->length; + as_string_builder_reset(mw->sb); - uint32_t last_bucket = latency_buckets->latency_columns - 1; - uint64_t limit = 1; + if (mw->max_size > 0 && mw->size >= mw->max_size) { + uint32_t result = fclose(mw->file); - for (uint32_t i = 0; i < last_bucket; i++) - { - if (elapsed <= limit) - { - return i; + if (result != 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "File stream did not close successfully: %s", mw->report_directory); } - limit <<= latency_buckets->latency_shift; + return as_metrics_open_writer(mw, err); } - return last_bucket; -} - -void -as_node_metrics_init(as_node_metrics* node_metrics, const as_policy_metrics* policy) -{ - uint32_t max_latency_type = AS_LATENCY_TYPE_NONE; - node_metrics->latency = cf_malloc(sizeof(as_latency_buckets) * max_latency_type); - for (uint32_t i = 0; i < max_latency_type; i++) - { - as_metrics_latency_buckets_init(&node_metrics->latency[i], policy->latency_columns, policy->latency_shift); - } -} - -void -as_metrics_add_latency(as_node_metrics* node_metrics, as_latency_type latency_type, uint64_t elapsed) -{ - as_metrics_latency_buckets_add(&node_metrics->latency[latency_type], elapsed); + return AEROSPIKE_OK; } -as_status -as_metrics_writer_enable(as_error* err, const struct as_policy_metrics_s* policy) +static void* +as_metrics_writer_init_udata() { - if (policy->report_size_limit != 0 && policy->report_size_limit < MIN_FILE_SIZE) - { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "Metrics policy report_size_limit %d must be at least %d", policy->report_size_limit, MIN_FILE_SIZE); - } - - // create file directory - as_metrics_writer* mw = policy->udata; - mw->max_size = policy->report_size_limit; - mw->latency_columns = policy->latency_columns; - mw->latency_shift = policy->latency_shift; - mw->report_directory = policy->report_directory; - - as_status status = as_metrics_open_writer(mw, err); - if (status != AEROSPIKE_OK) - { - return status; - } - - mw->enable = true; - return AEROSPIKE_OK; + as_metrics_writer* mw = (as_metrics_writer *)cf_malloc(sizeof(as_metrics_writer)); + mw->file = NULL; + mw->sb = NULL; + mw->enable = false; + mw->max_size = 0; + mw->latency_columns = 0; + mw->latency_shift = 0; + mw->report_directory = NULL; + + return mw; } -as_status +static as_status as_metrics_open_writer(as_metrics_writer* mw, as_error* err) { - const char* now = utc_time_str(time(NULL)); + const char* now_file = time_str_file(time(NULL)); as_string_builder file_name; - as_string_builder_inita(&file_name, 50, true); + as_string_builder_inita(&file_name, 100, true); as_string_builder_append(&file_name, mw->report_directory); - as_string_builder_append(&file_name, "metrics-"); - as_string_builder_append(&file_name, now); + as_string_builder_append(&file_name, "\\metrics-"); + as_string_builder_append(&file_name, now_file); as_string_builder_append(&file_name, ".log"); mw->file = fopen(file_name.data, "w"); - if (!mw->file) - { + if (!mw->file) { return as_error_update(err, AEROSPIKE_ERR_CLIENT, "Failed to open file: %s", mw->report_directory); } mw->size = 0; - + mw->sb = (as_string_builder*)cf_malloc(sizeof(as_string_builder)); as_string_builder_inita(mw->sb, 2048, true); + const char* now = time_str(time(NULL)); as_string_builder_append(mw->sb, now); as_string_builder_append(mw->sb, " header(1)"); as_string_builder_append(mw->sb, " cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]]"); @@ -206,88 +127,110 @@ as_metrics_open_writer(as_metrics_writer* mw, as_error* err) as_string_builder_append_int(mw->sb, mw->latency_shift); as_string_builder_append_char(mw->sb, ')'); as_string_builder_append(mw->sb, "[type[l1,l2,l3...]]"); - as_status status = as_metrics_write_line(mw, err); + return as_metrics_write_line(mw, err); +} - if (status != AEROSPIKE_OK) - { - return status; - } +static void +as_metrics_get_node_sync_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* sync) +{ + uint32_t max = node->cluster->conn_pools_per_node; - return AEROSPIKE_OK; + // Sync connection summary. + for (uint32_t i = 0; i < max; i++) { + as_conn_pool* pool = &node->sync_conn_pools[i]; + + pthread_mutex_lock(&pool->lock); + uint32_t in_pool = as_queue_size(&pool->queue); + uint32_t total = pool->queue.total; + pthread_mutex_unlock(&pool->lock); + + sync->in_pool += in_pool; + sync->in_use += total - in_pool; + } + sync->opened = node->sync_conns_opened; + sync->closed = node->sync_conns_closed; } -as_status -as_metrics_writer_snapshot(as_error* err, struct as_cluster_s* cluster, void* udata) +static void +as_metrics_get_node_async_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* async) { - as_metrics_writer* mw = udata; - if (mw->enable && mw->file != NULL) - { - as_status status = as_metrics_write_cluster(err, mw, cluster); - if (status != AEROSPIKE_OK) - { - return status; + // Async connection summary. + if (as_event_loop_capacity > 0) { + for (uint32_t i = 0; i < as_event_loop_size; i++) { + // Regular async. + as_sum_no_lock(&node->async_conn_pools[i], async); } } - return AEROSPIKE_OK; } -as_status -as_metrics_writer_node_close(as_error* err, struct as_node_s* node, void* udata) +static void +as_metrics_write_conn(as_metrics_writer* mw, const struct as_conn_stats_s* stats) { - // write node info to file - as_metrics_writer* mw = udata; - if (mw->enable && mw->file != NULL) - { - as_string_builder_append(mw->sb, utc_time_str(time(NULL))); - as_metrics_write_node(mw, node); - as_status status = as_metrics_write_line(mw, err); - if (status != AEROSPIKE_OK) - { - return status; - } - } - return AEROSPIKE_OK; + as_string_builder_append_uint(mw->sb, stats->in_use); + as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_uint(mw->sb, stats->in_pool); + as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_uint(mw->sb, stats->opened); // Cumulative. Not reset on each interval. + as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_uint(mw->sb, stats->closed); // Cumulative. Not reset on each interval. } -as_status -as_metrics_writer_disable(as_error* err, struct as_cluster_s* cluster, void* udata) +static void +as_metrics_write_node(as_metrics_writer* mw, struct as_node_s* node) { - // write cluster into to file, disable - as_metrics_writer* mw = udata; - if (mw->enable && mw->file != NULL) - { - as_status status = as_metrics_write_cluster(err, mw, cluster); - if (status != AEROSPIKE_OK) - { - return status; - } - uint32_t result = fclose(mw->file); - mw->file = NULL; - mw->enable = false; - if (result != 0) - { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "File stream did not close successfully: %s", mw->report_directory); + as_string_builder_append_char(mw->sb, '['); + as_string_builder_append(mw->sb, node->name); + as_string_builder_append_char(mw->sb, ','); + + as_string_builder_append(mw->sb, as_node_get_address_string(node)); + as_string_builder_append_char(mw->sb, ','); + + struct as_conn_stats_s sync; + struct as_conn_stats_s async; + as_sum_init(&sync); + as_sum_init(&async); + as_metrics_get_node_sync_conn_stats(node, &sync); + as_metrics_write_conn(mw, &sync); + as_string_builder_append_char(mw->sb, ','); + as_metrics_get_node_async_conn_stats(node, &async); + as_metrics_write_conn(mw, &async); + as_string_builder_append_char(mw->sb, ','); + + as_string_builder_append_uint64(mw->sb, as_node_get_error_count(node)); + as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_uint64(mw->sb, as_node_get_timeout_count(node)); + as_string_builder_append(mw->sb, ",["); + + as_node_metrics* node_metrics = node->metrics; + uint32_t max = AS_LATENCY_TYPE_NONE; + + + for (uint32_t i = 0; i < max; i++) { + if (i > 0) { + as_string_builder_append_char(mw->sb, ','); } - } + as_string_builder_append(mw->sb, as_latency_type_to_string(i)); + as_string_builder_append_char(mw->sb, '['); - return AEROSPIKE_OK; -} + as_latency_buckets* buckets = &node_metrics->latency[i]; + uint32_t bucket_max = buckets->latency_columns; -void -as_metrics_listeners_init(as_metrics_listeners* listeners) -{ - listeners->enable_callback = as_metrics_writer_enable; - listeners->disable_callback = as_metrics_writer_disable; - listeners->node_close_callback = as_metrics_writer_node_close; - listeners->snapshot_callback = as_metrics_writer_snapshot; + for (uint32_t j = 0; j < bucket_max; j++) { + if (j > 0) { + as_string_builder_append_char(mw->sb, ','); + } + as_string_builder_append_uint64(mw->sb, as_metrics_get_bucket(buckets, i)); + } + as_string_builder_append_char(mw->sb, ']'); + } + as_string_builder_append(mw->sb, "]]"); } -as_status +static as_status as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster_s* cluster) { char* cluster_name = cluster->cluster_name; - if (cluster_name == NULL) - { + + if (cluster_name == NULL) { cluster_name = ""; } @@ -295,7 +238,7 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster uint32_t mem = 0; as_metrics_process_cpu_load_mem_usage(&cpu_load, &mem); - as_string_builder_append(mw->sb, utc_time_str(time(NULL))); + as_string_builder_append(mw->sb, time_str(time(NULL))); as_string_builder_append(mw->sb, " cluster["); as_string_builder_append(mw->sb, cluster_name); as_string_builder_append_char(mw->sb, ','); @@ -312,8 +255,7 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster as_string_builder_append_uint64(mw->sb, cluster->delay_queue_timeout_count); // Cumulative. Not reset on each interval. as_string_builder_append(mw->sb, ",["); - for (uint32_t i = 0; i < as_event_loop_size; i++) - { + for (uint32_t i = 0; i < as_event_loop_size; i++) { as_event_loop* loop = &as_event_loops[i]; if (i > 0) { as_string_builder_append_char(mw->sb, ','); @@ -330,6 +272,7 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster for (uint32_t i = 0; i < nodes->size; i++) { as_node* node = nodes->array[i]; as_node_reserve(node); + if (i > 0) { as_string_builder_append_char(mw->sb, ','); } @@ -340,136 +283,216 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster as_string_builder_append(mw->sb, "]]"); as_nodes_release(nodes); - as_status status = as_metrics_write_line(mw, err); - if (status != AEROSPIKE_OK) - { + return as_metrics_write_line(mw, err); +} + +static as_status +as_metrics_writer_enable(as_error* err, const struct as_policy_metrics_s* policy, void* udata) +{ + if (policy->report_size_limit != 0 && policy->report_size_limit < MIN_FILE_SIZE) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Metrics policy report_size_limit %d must be at least %d", policy->report_size_limit, MIN_FILE_SIZE); + } + + // create file directory + as_metrics_writer* mw = udata; + mw->max_size = policy->report_size_limit; + mw->latency_columns = policy->latency_columns; + mw->latency_shift = policy->latency_shift; + mw->report_directory = policy->report_directory; + + as_status status = as_metrics_open_writer(mw, err); + + if (status != AEROSPIKE_OK) { return status; } + mw->enable = true; return AEROSPIKE_OK; } -void -as_metrics_write_node(as_metrics_writer* mw, struct as_node_s* node) +static as_status +as_metrics_writer_snapshot(as_error* err, struct as_cluster_s* cluster, void* udata) { - as_string_builder_append_char(mw->sb, '['); - as_string_builder_append(mw->sb, node->name); - as_string_builder_append_char(mw->sb, ','); + as_metrics_writer* mw = udata; - as_string_builder_append(mw->sb, as_node_get_address_string(node)); - as_string_builder_append_char(mw->sb, ','); + if (mw->enable && mw->file != NULL) { + as_status status = as_metrics_write_cluster(err, mw, cluster); + if (status != AEROSPIKE_OK) { + return status; + } + } + return AEROSPIKE_OK; +} - struct as_conn_stats_s sync; - struct as_conn_stats_s async; - as_sum_init(&sync); - as_sum_init(&async); - as_metrics_get_node_sync_conn_stats(node, &sync); - as_metrics_write_conn(mw, &sync); - as_string_builder_append_char(mw->sb, ','); - as_metrics_get_node_async_conn_stats(node, &async); - as_metrics_write_conn(mw, &async); - as_string_builder_append_char(mw->sb, ','); +static as_status +as_metrics_writer_disable(as_error* err, struct as_cluster_s* cluster, void* udata) +{ + // write cluster into to file, disable + as_metrics_writer* mw = udata; + if (mw->enable && mw->file != NULL) { + as_status status = as_metrics_write_cluster(err, mw, cluster); - as_string_builder_append_uint64(mw->sb, as_node_get_error_count(node)); - as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_uint64(mw->sb, as_node_get_timeout_count(node)); - as_string_builder_append(mw->sb, ",["); + if (status != AEROSPIKE_OK) { + return status; + } + uint32_t result = fclose(mw->file); + mw->file = NULL; + mw->enable = false; + as_string_builder_destroy(mw->sb); - as_node_metrics* node_metrics = node->metrics; - uint32_t max = AS_LATENCY_TYPE_NONE; + if (result != 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "File stream did not close successfully: %s", mw->report_directory); + } + } + return AEROSPIKE_OK; +} - for (uint32_t i = 0; i < max; i++) { - if (i > 0) { - as_string_builder_append_char(mw->sb, ','); - } - as_string_builder_append(mw->sb, as_latency_type_to_string(i)); - as_string_builder_append_char(mw->sb, '['); - as_latency_buckets* buckets = &node_metrics->latency[i]; - uint32_t bucket_max = buckets->latency_columns; +static as_status +as_metrics_writer_node_close(as_error* err, struct as_node_s* node, void* udata) +{ + // write node info to file + as_metrics_writer* mw = udata; - for (uint32_t j = 0; j < bucket_max; j++) { - if (j > 0) { - as_string_builder_append_char(mw->sb, ','); - } - as_string_builder_append_uint64(mw->sb, as_metrics_get_bucket(buckets, i)); + if (mw->enable && mw->file != NULL) + { + as_string_builder_append(mw->sb, time_str(time(NULL))); + as_metrics_write_node(mw, node); + as_status status = as_metrics_write_line(mw, err); + + if (status != AEROSPIKE_OK) { + return status; } - as_string_builder_append_char(mw->sb, ']'); } - as_string_builder_append(mw->sb, "]]"); + return AEROSPIKE_OK; } + +//--------------------------------- +// Functions +//--------------------------------- + void -as_metrics_get_node_sync_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* sync) +as_metrics_policy_init(as_policy_metrics* policy) { - uint32_t max = node->cluster->conn_pools_per_node; - // Sync connection summary. - for (uint32_t i = 0; i < max; i++) { - as_conn_pool* pool = &node->sync_conn_pools[i]; - - pthread_mutex_lock(&pool->lock); - uint32_t in_pool = as_queue_size(&pool->queue); - uint32_t total = pool->queue.total; - pthread_mutex_unlock(&pool->lock); + policy->report_size_limit = 0; + policy->interval = 30; + policy->latency_columns = 7; + policy->latency_shift = 1; + policy->metrics_listeners.enable_listener = as_metrics_writer_enable; + policy->metrics_listeners.disable_listener = as_metrics_writer_disable; + policy->metrics_listeners.node_close_listener = as_metrics_writer_node_close; + policy->metrics_listeners.snapshot_listener = as_metrics_writer_snapshot; + policy->metrics_listeners.udata = as_metrics_writer_init_udata(); +} - sync->in_pool += in_pool; - sync->in_use += total - in_pool; +char* +as_latency_type_to_string(as_latency_type type) +{ + switch (type) + { + case AS_LATENCY_TYPE_CONN: + return "conn"; + break; + case AS_LATENCY_TYPE_WRITE: + return "write"; + break; + case AS_LATENCY_TYPE_READ: + return "read"; + break; + case AS_LATENCY_TYPE_BATCH: + return "batch"; + break; + case AS_LATENCY_TYPE_QUERY: + return "query"; + break; + case AS_LATENCY_TYPE_NONE: + return "none"; + break; + default: + return "none"; + break; } - sync->opened = node->sync_conns_opened; - sync->closed = node->sync_conns_closed; } void -as_metrics_get_node_async_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* async) +as_metrics_latency_buckets_init(as_latency_buckets* latency_buckets, uint32_t latency_columns, uint32_t latency_shift) { - // Async connection summary. - if (as_event_loop_capacity > 0) { - for (uint32_t i = 0; i < as_event_loop_size; i++) { - // Regular async. - as_sum_no_lock(&node->async_conn_pools[i], async); - } + latency_buckets->latency_columns = latency_columns; + latency_buckets->latency_shift = latency_shift; + latency_buckets->buckets = cf_malloc(sizeof(uint64_t) * latency_columns); + for (uint32_t i = 0; i < latency_columns; i++) { + as_store_uint64(&latency_buckets->buckets[i], 0); } } +uint64_t +as_metrics_get_bucket(as_latency_buckets* buckets, uint32_t i) +{ + return as_load_uint64(&buckets->buckets[i]); +} + void -as_metrics_write_conn(as_metrics_writer* mw, const struct as_conn_stats_s* stats) +as_metrics_latency_buckets_add(as_latency_buckets* latency_buckets, uint64_t elapsed) { - as_string_builder_append_uint(mw->sb, stats->in_use); - as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_uint(mw->sb, stats->in_pool); - as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_uint(mw->sb, stats->opened); // Cumulative. Not reset on each interval. - as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_uint(mw->sb, stats->closed); // Cumulative. Not reset on each interval. + uint32_t index = as_metrics_get_index(latency_buckets, elapsed); + as_incr_uint64(&latency_buckets->buckets[index]); } -as_status -as_metrics_write_line(as_metrics_writer* mw, as_error* err) +uint32_t +as_metrics_get_index(as_latency_buckets* latency_buckets, uint64_t elapsed_nanos) { - as_string_builder_append_newline(mw->sb); - fprintf(mw->file, mw->sb->data); - mw->size += mw->sb->length; + // Convert nanoseconds to milliseconds. + uint64_t elapsed = elapsed_nanos / NS_TO_MS; - if (mw->max_size > 0 && mw->size >= mw->max_size) - { - uint32_t result = fclose(mw->file); - if (result != 0) - { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "File stream did not close successfully: %s", mw->report_directory); - } - as_status status = as_metrics_open_writer(mw, err); - if (status != AEROSPIKE_OK) - { - return status; + // Round up elapsed to nearest millisecond. + if ((elapsed_nanos - (elapsed * NS_TO_MS)) > 0) { + elapsed++; + } + + uint32_t last_bucket = latency_buckets->latency_columns - 1; + uint64_t limit = 1; + + for (uint32_t i = 0; i < last_bucket; i++) { + if (elapsed <= limit) { + return i; } + limit <<= latency_buckets->latency_shift; } - return AEROSPIKE_OK; + return last_bucket; } +as_node_metrics* +as_node_metrics_init(uint32_t latency_columns, uint32_t latency_shift) +{ + as_node_metrics* node_metrics = (as_node_metrics *)cf_malloc(sizeof(as_node_metrics)); + uint32_t max_latency_type = AS_LATENCY_TYPE_NONE; + node_metrics->latency = (as_latency_buckets *)cf_malloc(sizeof(as_latency_buckets) * max_latency_type); + for (uint32_t i = 0; i < max_latency_type; i++) { + as_metrics_latency_buckets_init(&node_metrics->latency[i], latency_columns, latency_shift); + } + + return node_metrics; +} + +void +as_metrics_add_latency(as_node_metrics* node_metrics, as_latency_type latency_type, uint64_t elapsed) +{ + as_metrics_latency_buckets_add(&node_metrics->latency[latency_type], elapsed); +} + + #if defined(__linux__) +#include +#include +#include +#include +#include + void as_metrics_process_cpu_load_mem_usage(uint32_t* cpu_usage, uint32_t* mem) { diff --git a/src/main/aerospike/as_node.c b/src/main/aerospike/as_node.c index 3df6ce5e5a..5c25a98aa7 100644 --- a/src/main/aerospike/as_node.c +++ b/src/main/aerospike/as_node.c @@ -138,7 +138,11 @@ as_node_create(as_cluster* cluster, as_node_info* node_info) if (cluster->metrics_enabled) { - as_node_metrics_init(node->metrics, cluster->metrics_policy); + node->metrics = as_node_metrics_init(cluster->metrics_latency_columns, cluster->metrics_latency_shift); + } + else + { + node->metrics = NULL; } // Create sync connection pools. @@ -1315,7 +1319,7 @@ as_node_add_latency(as_node* node, as_latency_type latency_type, uint64_t elapse void as_node_enable_metrics(as_node* node, const as_policy_metrics* policy) { - as_node_metrics_init(node->metrics, policy); + node->metrics = as_node_metrics_init(policy->latency_columns, policy->latency_shift); } uint64_t diff --git a/src/test/aerospike_test.c b/src/test/aerospike_test.c index fdf9b618fe..33a7062569 100644 --- a/src/test/aerospike_test.c +++ b/src/test/aerospike_test.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include "test.h" #include "aerospike_test.h" @@ -380,6 +382,13 @@ static bool before(atf_plan* plan) } } + as_policy_metrics policy; + as_metrics_policy_init(&policy); + policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; + + // enable metrics + as_status status = aerospike_enable_metrics(as, &err, &policy); + cf_free(result); return true; } From aa92db777b6463c7a2eda577906db3c50d639a6d Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Thu, 25 Jan 2024 13:51:44 -0700 Subject: [PATCH 15/64] Fixing more bugs --- src/include/aerospike/aerospike_stats.h | 32 ++++--------------------- src/main/aerospike/aerospike_stats.c | 30 +++++++++++++++++++++++ 2 files changed, 34 insertions(+), 28 deletions(-) diff --git a/src/include/aerospike/aerospike_stats.h b/src/include/aerospike/aerospike_stats.h index fa39e2358c..723893faf5 100644 --- a/src/include/aerospike/aerospike_stats.h +++ b/src/include/aerospike/aerospike_stats.h @@ -245,35 +245,11 @@ aerospike_event_loop_stats(as_event_loop* event_loop, as_event_loop_stats* stats stats->queue_size = as_event_loop_get_queue_size(event_loop); } -static inline void -as_sum_init(as_conn_stats* stats) -{ - stats->in_pool = 0; - stats->in_use = 0; - stats->opened = 0; - stats->closed = 0; -} +void +as_sum_init(as_conn_stats* stats); -static inline void -as_sum_no_lock(as_async_conn_pool* pool, as_conn_stats* stats) -{ - // Warning: cross-thread reference without a lock. - int tmp = as_queue_size(&pool->queue); - - // Timing issues may cause values to go negative. Adjust. - if (tmp < 0) { - tmp = 0; - } - stats->in_pool += tmp; - tmp = pool->queue.total - tmp; - - if (tmp < 0) { - tmp = 0; - } - stats->in_use += tmp; - stats->opened += pool->opened; - stats->closed += pool->closed; -} +void +as_sum_no_lock(as_async_conn_pool* pool, as_conn_stats* stats); /** * Return string representation of cluster statistics. diff --git a/src/main/aerospike/aerospike_stats.c b/src/main/aerospike/aerospike_stats.c index 3c004b88c5..eecd6b81b1 100644 --- a/src/main/aerospike/aerospike_stats.c +++ b/src/main/aerospike/aerospike_stats.c @@ -51,6 +51,36 @@ as_conn_stats_tostring(as_string_builder* sb, const char* title, as_conn_stats* * FUNCTIONS *****************************************************************************/ +void +as_sum_init(as_conn_stats* stats) +{ + stats->in_pool = 0; + stats->in_use = 0; + stats->opened = 0; + stats->closed = 0; +} + +void +as_sum_no_lock(as_async_conn_pool* pool, as_conn_stats* stats) +{ + // Warning: cross-thread reference without a lock. + int tmp = as_queue_size(&pool->queue); + + // Timing issues may cause values to go negative. Adjust. + if (tmp < 0) { + tmp = 0; + } + stats->in_pool += tmp; + tmp = pool->queue.total - tmp; + + if (tmp < 0) { + tmp = 0; + } + stats->in_use += tmp; + stats->opened += pool->opened; + stats->closed += pool->closed; +} + void aerospike_cluster_stats(as_cluster* cluster, as_cluster_stats* stats) { From 126270f7d6d98b0e10703e76f57866b78af8e1b0 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Wed, 31 Jan 2024 09:31:37 -0700 Subject: [PATCH 16/64] Add logging to help debug --- .../async_batch_get/src/main/example.c | 12 ++++ .../basic_examples/append/src/main/example.c | 16 ++++- .../batch_examples/get/src/main/example.c | 16 ++++- src/include/aerospike/as_metrics.h | 12 ---- src/main/aerospike/aerospike_stats.c | 2 +- src/main/aerospike/as_cluster.c | 2 +- src/main/aerospike/as_metrics.c | 72 ++++++++++++++++--- src/test/aerospike_test.c | 1 + 8 files changed, 107 insertions(+), 26 deletions(-) diff --git a/examples/async_examples/async_batch_get/src/main/example.c b/examples/async_examples/async_batch_get/src/main/example.c index f6d043036a..d6ddb77b63 100644 --- a/examples/async_examples/async_batch_get/src/main/example.c +++ b/examples/async_examples/async_batch_get/src/main/example.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "example_utils.h" @@ -75,6 +76,16 @@ main(int argc, char* argv[]) // Connect to the aerospike database cluster. example_connect_to_aerospike(&as); + as_error err; + as_error_reset(&err); + as_policy_metrics policy; + as_metrics_policy_init(&policy); + policy.interval = 5; + policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; + + // enable metrics + as_status status = aerospike_enable_metrics(&as, &err, &policy); + // Start clean. example_remove_test_records(&as); @@ -86,6 +97,7 @@ main(int argc, char* argv[]) // Wait till commands have completed before shutting down. as_monitor_wait(&monitor); + Sleep(30000); // Cleanup and shutdown. example_remove_test_records(&as); diff --git a/examples/basic_examples/append/src/main/example.c b/examples/basic_examples/append/src/main/example.c index d64671a95d..dfbb51d712 100644 --- a/examples/basic_examples/append/src/main/example.c +++ b/examples/basic_examples/append/src/main/example.c @@ -34,6 +34,8 @@ #include #include #include +#include +#include #include "example_utils.h" @@ -54,10 +56,20 @@ main(int argc, char* argv[]) aerospike as; example_connect_to_aerospike(&as); + as_error err; + as_error_reset(&err); + as_policy_metrics policy; + as_metrics_policy_init(&policy); + policy.interval = 5; + policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; + + // enable metrics + as_status status = aerospike_enable_metrics(&as, &err, &policy); + // Start clean. example_remove_test_record(&as); - as_error err; + //as_error err; // Create an as_operations object with three concatenation operations. // Generally, if using as_operations_inita(), we won't need to destroy the @@ -124,6 +136,8 @@ main(int argc, char* argv[]) LOG("as_operations object to apply to database:"); example_dump_operations(&ops); + Sleep(30000); + // Try to apply the operations. This will fail, since we can't append a // string value to an existing bin with "raw" value. Note that if any // operation in the transaction is rejected, none will be applied. diff --git a/examples/batch_examples/get/src/main/example.c b/examples/batch_examples/get/src/main/example.c index c50096ac61..e8c1eefa67 100644 --- a/examples/batch_examples/get/src/main/example.c +++ b/examples/batch_examples/get/src/main/example.c @@ -31,6 +31,8 @@ #include #include #include "example_utils.h" +#include +#include //------------------------------------ // Forward Declarations @@ -69,6 +71,16 @@ main(int argc, char* argv[]) aerospike as; example_connect_to_aerospike(&as); + as_error err; + as_error_reset(&err); + as_policy_metrics policy; + as_metrics_policy_init(&policy); + policy.interval = 5; + policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; + + // enable metrics + as_status status = aerospike_enable_metrics(&as, &err, &policy); + // Start clean. example_remove_test_records(&as); @@ -77,7 +89,7 @@ main(int argc, char* argv[]) exit(-1); } - as_error err; + //as_error err; // Make a batch of all the keys we inserted. as_batch batch; @@ -195,6 +207,8 @@ main(int argc, char* argv[]) exit(-1); } + Sleep(30000); + // Cleanup and disconnect from the database cluster. cleanup(&as); diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index f81477d752..0a7e43dd2b 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -211,18 +211,6 @@ as_metrics_proc_stat_mem_cpu(double* vm_usage, double* resident_set, double* cpu #if defined(_MSC_VER) -/** - * Helper method that calculates CPU usage using ticks - */ -static double -as_metrics_calculate_cpu_load(uint64_t idleTicks, uint64_t totalTicks); - -/** - * Helper method for converting file time to uint64_t - */ -static uint64_t -as_metrics_file_time_to_uint_64(const FILETIME ft); - /** * Gets CPU usage using GetSystemTimes() */ diff --git a/src/main/aerospike/aerospike_stats.c b/src/main/aerospike/aerospike_stats.c index eecd6b81b1..a9f19ab018 100644 --- a/src/main/aerospike/aerospike_stats.c +++ b/src/main/aerospike/aerospike_stats.c @@ -31,7 +31,7 @@ extern uint32_t as_event_loop_size; * STATIC FUNCTIONS *****************************************************************************/ -static void +static inline void as_conn_stats_tostring(as_string_builder* sb, const char* title, as_conn_stats* cs) { as_string_builder_append_char(sb, ' '); diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index 97bd509034..e30b9eb0d4 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -981,7 +981,7 @@ as_cluster_tend(as_cluster* cluster, as_error* err, bool is_init) as_incr_uint32(&cluster->shm_info->cluster_shm->rebalance_gen); } - if (cluster->metrics_enabled && (cluster->tend_count % cluster->metrics_interval)) + if (cluster->metrics_enabled && (cluster->tend_count % cluster->metrics_interval) == 0) { as_status status = cluster->metrics_listeners.snapshot_listener(err, cluster, cluster->metrics_listeners.udata); if (status != AEROSPIKE_OK) diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 2ade9dd8c5..c0bfef50f3 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -22,6 +22,8 @@ #include #include +#define LOG(_fmt, ...) { printf(_fmt "\n", ##__VA_ARGS__); fflush(stdout); } + //--------------------------------- // Globals //--------------------------------- @@ -37,7 +39,7 @@ time_str_file(time_t t) static char buf[UTC_STR_LEN + 1]; struct tm* local = localtime(&t); snprintf(buf, sizeof(buf), - "%4d%02d%02d%02d%02d%02dZ", + "%4d%02d%02d%02d%02d%02d", 1900 + local->tm_year, local->tm_mon + 1, local->tm_mday, local->tm_hour, local->tm_min, local->tm_sec); return buf; @@ -49,7 +51,7 @@ time_str(time_t t) static char buf[UTC_STR_LEN + 1]; struct tm* local = localtime(&t); snprintf(buf, sizeof(buf), - "%4d-%02d-%02d %02d:%02d:%02dZ", + "%4d-%02d-%02d %02d:%02d:%02d", 1900 + local->tm_year, local->tm_mon + 1, local->tm_mday, local->tm_hour, local->tm_min, local->tm_sec); return buf; @@ -61,13 +63,24 @@ as_metrics_open_writer(as_metrics_writer* mw, as_error* err); static as_status as_metrics_write_line(as_metrics_writer* mw, as_error* err) { + LOG("as_metrics_write_line begin"); as_string_builder_append_newline(mw->sb); - fprintf(mw->file, "%s", mw->sb->data); + int written = fprintf(mw->file, "%s", mw->sb->data); + if (mw->sb->length != written) + { + LOG("wrong number of chars written"); + LOG("expected %d", mw->sb->length); + } + LOG("written %d", written); + LOG("data %s", mw->sb->data); mw->size += mw->sb->length; as_string_builder_reset(mw->sb); if (mw->max_size > 0 && mw->size >= mw->max_size) { + LOG("need new file"); uint32_t result = fclose(mw->file); + as_string_builder_destroy(mw->sb); + mw->sb = NULL; if (result != 0) { return as_error_update(err, AEROSPIKE_ERR_CLIENT, @@ -76,12 +89,14 @@ as_metrics_write_line(as_metrics_writer* mw, as_error* err) return as_metrics_open_writer(mw, err); } + LOG("as_metrics_write_line end"); return AEROSPIKE_OK; } static void* as_metrics_writer_init_udata() { + LOG("as_metrics_writer_init_udata begin"); as_metrics_writer* mw = (as_metrics_writer *)cf_malloc(sizeof(as_metrics_writer)); mw->file = NULL; mw->sb = NULL; @@ -91,12 +106,14 @@ as_metrics_writer_init_udata() mw->latency_shift = 0; mw->report_directory = NULL; + LOG("as_metrics_writer_init_udata end"); return mw; } static as_status as_metrics_open_writer(as_metrics_writer* mw, as_error* err) { + LOG("as_metrics_open_writer begin"); const char* now_file = time_str_file(time(NULL)); as_string_builder file_name; as_string_builder_inita(&file_name, 100, true); @@ -105,6 +122,7 @@ as_metrics_open_writer(as_metrics_writer* mw, as_error* err) as_string_builder_append(&file_name, now_file); as_string_builder_append(&file_name, ".log"); mw->file = fopen(file_name.data, "w"); + as_string_builder_destroy(&file_name); if (!mw->file) { return as_error_update(err, AEROSPIKE_ERR_CLIENT, @@ -112,7 +130,7 @@ as_metrics_open_writer(as_metrics_writer* mw, as_error* err) } mw->size = 0; - mw->sb = (as_string_builder*)cf_malloc(sizeof(as_string_builder)); + mw->sb = (as_string_builder *)cf_malloc(sizeof(as_string_builder)); as_string_builder_inita(mw->sb, 2048, true); const char* now = time_str(time(NULL)); as_string_builder_append(mw->sb, now); @@ -127,12 +145,14 @@ as_metrics_open_writer(as_metrics_writer* mw, as_error* err) as_string_builder_append_int(mw->sb, mw->latency_shift); as_string_builder_append_char(mw->sb, ')'); as_string_builder_append(mw->sb, "[type[l1,l2,l3...]]"); + LOG("as_metrics_open_writer end"); return as_metrics_write_line(mw, err); } static void as_metrics_get_node_sync_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* sync) { + LOG("as_metrics_get_node_sync_conn_stats begin"); uint32_t max = node->cluster->conn_pools_per_node; // Sync connection summary. @@ -149,11 +169,13 @@ as_metrics_get_node_sync_conn_stats(const struct as_node_s* node, struct as_conn } sync->opened = node->sync_conns_opened; sync->closed = node->sync_conns_closed; + LOG("as_metrics_get_node_sync_conn_stats end"); } static void as_metrics_get_node_async_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* async) { + LOG("as_metrics_get_node_async_conn_stats begin"); // Async connection summary. if (as_event_loop_capacity > 0) { for (uint32_t i = 0; i < as_event_loop_size; i++) { @@ -161,11 +183,13 @@ as_metrics_get_node_async_conn_stats(const struct as_node_s* node, struct as_con as_sum_no_lock(&node->async_conn_pools[i], async); } } + LOG("as_metrics_get_node_async_conn_stats end"); } static void as_metrics_write_conn(as_metrics_writer* mw, const struct as_conn_stats_s* stats) { + LOG("as_metrics_write_conn begin"); as_string_builder_append_uint(mw->sb, stats->in_use); as_string_builder_append_char(mw->sb, ','); as_string_builder_append_uint(mw->sb, stats->in_pool); @@ -173,11 +197,13 @@ as_metrics_write_conn(as_metrics_writer* mw, const struct as_conn_stats_s* stats as_string_builder_append_uint(mw->sb, stats->opened); // Cumulative. Not reset on each interval. as_string_builder_append_char(mw->sb, ','); as_string_builder_append_uint(mw->sb, stats->closed); // Cumulative. Not reset on each interval. + LOG("as_metrics_write_conn end"); } static void as_metrics_write_node(as_metrics_writer* mw, struct as_node_s* node) { + LOG("as_metrics_write_node begin"); as_string_builder_append_char(mw->sb, '['); as_string_builder_append(mw->sb, node->name); as_string_builder_append_char(mw->sb, ','); @@ -204,7 +230,6 @@ as_metrics_write_node(as_metrics_writer* mw, struct as_node_s* node) as_node_metrics* node_metrics = node->metrics; uint32_t max = AS_LATENCY_TYPE_NONE; - for (uint32_t i = 0; i < max; i++) { if (i > 0) { as_string_builder_append_char(mw->sb, ','); @@ -219,15 +244,18 @@ as_metrics_write_node(as_metrics_writer* mw, struct as_node_s* node) if (j > 0) { as_string_builder_append_char(mw->sb, ','); } - as_string_builder_append_uint64(mw->sb, as_metrics_get_bucket(buckets, i)); + as_string_builder_append_uint64(mw->sb, as_metrics_get_bucket(buckets, j)); } as_string_builder_append_char(mw->sb, ']'); } as_string_builder_append(mw->sb, "]]"); + LOG("as_metrics_write_node end"); } static as_status -as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster_s* cluster) { +as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster_s* cluster) +{ + LOG("as_metrics_write_cluster begin"); char* cluster_name = cluster->cluster_name; if (cluster_name == NULL) { @@ -283,12 +311,14 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster as_string_builder_append(mw->sb, "]]"); as_nodes_release(nodes); + LOG("as_metrics_write_cluster end"); return as_metrics_write_line(mw, err); } static as_status as_metrics_writer_enable(as_error* err, const struct as_policy_metrics_s* policy, void* udata) { + LOG("as_metrics_writer_enable begin"); if (policy->report_size_limit != 0 && policy->report_size_limit < MIN_FILE_SIZE) { return as_error_update(err, AEROSPIKE_ERR_CLIENT, "Metrics policy report_size_limit %d must be at least %d", policy->report_size_limit, MIN_FILE_SIZE); @@ -308,26 +338,31 @@ as_metrics_writer_enable(as_error* err, const struct as_policy_metrics_s* policy } mw->enable = true; + LOG("as_metrics_writer_enable end"); return AEROSPIKE_OK; } static as_status as_metrics_writer_snapshot(as_error* err, struct as_cluster_s* cluster, void* udata) { + LOG("as_metrics_writer_snapshot begin"); as_metrics_writer* mw = udata; if (mw->enable && mw->file != NULL) { as_status status = as_metrics_write_cluster(err, mw, cluster); if (status != AEROSPIKE_OK) { + LOG("as_metrics_writer_snapshot not ok"); return status; } } + LOG("as_metrics_writer_snapshot end"); return AEROSPIKE_OK; } static as_status as_metrics_writer_disable(as_error* err, struct as_cluster_s* cluster, void* udata) { + LOG("as_metrics_writer_disable begin"); // write cluster into to file, disable as_metrics_writer* mw = udata; if (mw->enable && mw->file != NULL) { @@ -346,7 +381,7 @@ as_metrics_writer_disable(as_error* err, struct as_cluster_s* cluster, void* uda "File stream did not close successfully: %s", mw->report_directory); } } - + LOG("as_metrics_writer_disable end"); return AEROSPIKE_OK; } @@ -354,6 +389,7 @@ as_metrics_writer_disable(as_error* err, struct as_cluster_s* cluster, void* uda static as_status as_metrics_writer_node_close(as_error* err, struct as_node_s* node, void* udata) { + LOG("as_metrics_writer_node_close begin"); // write node info to file as_metrics_writer* mw = udata; @@ -367,6 +403,7 @@ as_metrics_writer_node_close(as_error* err, struct as_node_s* node, void* udata) return status; } } + LOG("as_metrics_writer_node_close end"); return AEROSPIKE_OK; } @@ -378,6 +415,7 @@ as_metrics_writer_node_close(as_error* err, struct as_node_s* node, void* udata) void as_metrics_policy_init(as_policy_metrics* policy) { + LOG("as_metrics_policy_init begin"); policy->report_size_limit = 0; policy->interval = 30; policy->latency_columns = 7; @@ -387,11 +425,13 @@ as_metrics_policy_init(as_policy_metrics* policy) policy->metrics_listeners.node_close_listener = as_metrics_writer_node_close; policy->metrics_listeners.snapshot_listener = as_metrics_writer_snapshot; policy->metrics_listeners.udata = as_metrics_writer_init_udata(); + LOG("as_metrics_policy_init end"); } char* as_latency_type_to_string(as_latency_type type) { + //LOG("as_latency_type_to_string begin"); switch (type) { case AS_LATENCY_TYPE_CONN: @@ -416,28 +456,33 @@ as_latency_type_to_string(as_latency_type type) return "none"; break; } + LOG("as_latency_type_to_string end"); } void as_metrics_latency_buckets_init(as_latency_buckets* latency_buckets, uint32_t latency_columns, uint32_t latency_shift) { + LOG("as_metrics_latency_buckets_init begin"); latency_buckets->latency_columns = latency_columns; latency_buckets->latency_shift = latency_shift; latency_buckets->buckets = cf_malloc(sizeof(uint64_t) * latency_columns); for (uint32_t i = 0; i < latency_columns; i++) { as_store_uint64(&latency_buckets->buckets[i], 0); } + LOG("as_metrics_latency_buckets_init end"); } uint64_t as_metrics_get_bucket(as_latency_buckets* buckets, uint32_t i) { + //LOG("as_metrics_get_bucket"); return as_load_uint64(&buckets->buckets[i]); } void as_metrics_latency_buckets_add(as_latency_buckets* latency_buckets, uint64_t elapsed) { + LOG("as_metrics_latency_buckets_add"); uint32_t index = as_metrics_get_index(latency_buckets, elapsed); as_incr_uint64(&latency_buckets->buckets[index]); } @@ -445,6 +490,7 @@ as_metrics_latency_buckets_add(as_latency_buckets* latency_buckets, uint64_t ela uint32_t as_metrics_get_index(as_latency_buckets* latency_buckets, uint64_t elapsed_nanos) { + LOG("as_metrics_get_index begin"); // Convert nanoseconds to milliseconds. uint64_t elapsed = elapsed_nanos / NS_TO_MS; @@ -462,26 +508,28 @@ as_metrics_get_index(as_latency_buckets* latency_buckets, uint64_t elapsed_nanos } limit <<= latency_buckets->latency_shift; } - + LOG("as_metrics_get_index end"); return last_bucket; } as_node_metrics* as_node_metrics_init(uint32_t latency_columns, uint32_t latency_shift) { + LOG("as_node_metrics_init begin"); as_node_metrics* node_metrics = (as_node_metrics *)cf_malloc(sizeof(as_node_metrics)); uint32_t max_latency_type = AS_LATENCY_TYPE_NONE; node_metrics->latency = (as_latency_buckets *)cf_malloc(sizeof(as_latency_buckets) * max_latency_type); for (uint32_t i = 0; i < max_latency_type; i++) { as_metrics_latency_buckets_init(&node_metrics->latency[i], latency_columns, latency_shift); } - + LOG("as_node_metrics_init end"); return node_metrics; } void as_metrics_add_latency(as_node_metrics* node_metrics, as_latency_type latency_type, uint64_t elapsed) { + LOG("as_metrics_add_latency"); as_metrics_latency_buckets_add(&node_metrics->latency[latency_type], elapsed); } @@ -553,6 +601,7 @@ as_metrics_proc_stat_mem_cpu(double* vm_usage, double* resident_set, double* cpu void as_metrics_process_cpu_load_mem_usage(uint32_t* cpu_usage, uint32_t* mem) { + LOG("as_metrics_process_cpu_load_mem_usage"); double cpu_usage_d = as_metrics_process_cpu_load(); cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); *cpu_usage = (uint32_t)cpu_usage_d; @@ -562,6 +611,7 @@ as_metrics_process_cpu_load_mem_usage(uint32_t* cpu_usage, uint32_t* mem) static double as_metrics_calculate_cpu_load(uint64_t idleTicks, uint64_t totalTicks) { + LOG("as_metrics_calculate_cpu_load"); static uint64_t _previousTotalTicks = 0; static uint64_t _previousIdleTicks = 0; @@ -587,6 +637,7 @@ as_metrics_file_time_to_uint_64(const FILETIME ft) double as_metrics_process_cpu_load() { + LOG("as_metrics_process_cpu_load"); FILETIME idleTime, kernelTime, userTime; return GetSystemTimes(&idleTime, &kernelTime, &userTime) ? as_metrics_calculate_cpu_load(as_metrics_file_time_to_uint_64(idleTime), as_metrics_file_time_to_uint_64(kernelTime) + as_metrics_file_time_to_uint_64(userTime)) * 100: -1.0f; @@ -595,6 +646,7 @@ as_metrics_process_cpu_load() uint32_t as_metrics_process_mem_usage() { + LOG("as_metrics_process_mem_usage"); PROCESS_MEMORY_COUNTERS memCounter; BOOL result = GetProcessMemoryInfo(GetCurrentProcess(), &memCounter, diff --git a/src/test/aerospike_test.c b/src/test/aerospike_test.c index 33a7062569..72456f609a 100644 --- a/src/test/aerospike_test.c +++ b/src/test/aerospike_test.c @@ -384,6 +384,7 @@ static bool before(atf_plan* plan) as_policy_metrics policy; as_metrics_policy_init(&policy); + policy.interval = 5; policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; // enable metrics From a9c192b16d829d0c5354f5d85566b0b08907ce7d Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Wed, 31 Jan 2024 12:12:23 -0500 Subject: [PATCH 17/64] Fix mac compiler errors/warnings. --- examples/batch_examples/get/src/main/example.c | 11 +++++++++-- src/include/aerospike/as_metrics.h | 6 +++--- src/main/aerospike/as_metrics.c | 7 +++++-- xcode/aerospike.xcodeproj/project.pbxproj | 8 ++++++++ 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/examples/batch_examples/get/src/main/example.c b/examples/batch_examples/get/src/main/example.c index e8c1eefa67..66ace4dbe3 100644 --- a/examples/batch_examples/get/src/main/example.c +++ b/examples/batch_examples/get/src/main/example.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -29,10 +30,10 @@ #include #include #include +#include #include #include "example_utils.h" #include -#include //------------------------------------ // Forward Declarations @@ -80,6 +81,12 @@ main(int argc, char* argv[]) // enable metrics as_status status = aerospike_enable_metrics(&as, &err, &policy); + + if (status != AEROSPIKE_OK) { + LOG("aerospike_enable_metrics() returned %d - %s", err.code, err.message); + cleanup(&as); + exit(-1); + } // Start clean. example_remove_test_records(&as); @@ -207,7 +214,7 @@ main(int argc, char* argv[]) exit(-1); } - Sleep(30000); + as_sleep(30000); // Cleanup and disconnect from the database cluster. cleanup(&as); diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index 0a7e43dd2b..e76bbfe2e6 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -229,15 +229,15 @@ as_metrics_process_mem_usage(); * Gets memory usage using task_info */ double -as_metrics_process_mem_usage(); +as_metrics_process_mem_usage(void); /** * Gets cpu usage using ps -p */ double -as_metrics_process_cpu_load(); +as_metrics_process_cpu_load(void); #endif #ifdef __cplusplus } // end extern "C" -#endif \ No newline at end of file +#endif diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index c0bfef50f3..1c8ebb49f0 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -692,11 +692,12 @@ as_metrics_process_mem_usage() double as_metrics_process_cpu_load() { + /* This code does not compile on mac. pid_t pid = getpid(); as_string_builder sb; as_string_builder_inita(&sb, 20, true); as_string_builder_append(&sb, "ps -p "); - as_string_builder_append(&sb, pid); + as_string_builder_append_int(&sb, pid); as_string_builder_append(&sb, " -o %CPU"); FILE* result = popen(sb.data); char[5] cpu_holder; @@ -706,5 +707,7 @@ as_metrics_process_cpu_load() pclose(result); return atof(cpu_percent); + */ + return 1.0; } -#endif \ No newline at end of file +#endif diff --git a/xcode/aerospike.xcodeproj/project.pbxproj b/xcode/aerospike.xcodeproj/project.pbxproj index 31afd29387..9cc60a4403 100644 --- a/xcode/aerospike.xcodeproj/project.pbxproj +++ b/xcode/aerospike.xcodeproj/project.pbxproj @@ -112,6 +112,8 @@ BF986E001F466BEE0057802C /* version.h in Headers */ = {isa = PBXBuildFile; fileRef = BF986DFF1F466BEE0057802C /* version.h */; }; BFA5B21020FD3FA4002AF0BB /* as_cpu.h in Headers */ = {isa = PBXBuildFile; fileRef = BFA5B20F20FD3FA4002AF0BB /* as_cpu.h */; }; BFABF3311FCF85EC004745A1 /* as_queue_mt.c in Sources */ = {isa = PBXBuildFile; fileRef = BFABF3301FCF85EC004745A1 /* as_queue_mt.c */; }; + BFAF276E2B6AB36A00A3858B /* as_metrics.h in Headers */ = {isa = PBXBuildFile; fileRef = BFAF276D2B6AB36A00A3858B /* as_metrics.h */; }; + BFAF27702B6AB39100A3858B /* as_metrics.c in Sources */ = {isa = PBXBuildFile; fileRef = BFAF276F2B6AB39100A3858B /* as_metrics.c */; }; BFB0ED5522A72260007FEA9C /* as_cdt_ctx.h in Headers */ = {isa = PBXBuildFile; fileRef = BFB0ED5422A72260007FEA9C /* as_cdt_ctx.h */; }; BFB8A5D81D0F3F77007B4E22 /* as_tls.c in Sources */ = {isa = PBXBuildFile; fileRef = BFB8A5D71D0F3F77007B4E22 /* as_tls.c */; }; BFB8A5DA1D0F3F9E007B4E22 /* as_tls.h in Headers */ = {isa = PBXBuildFile; fileRef = BFB8A5D91D0F3F9E007B4E22 /* as_tls.h */; }; @@ -325,6 +327,8 @@ BF986DFF1F466BEE0057802C /* version.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = version.h; path = ../src/include/aerospike/version.h; sourceTree = ""; }; BFA5B20F20FD3FA4002AF0BB /* as_cpu.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = as_cpu.h; path = ../src/include/aerospike/as_cpu.h; sourceTree = ""; }; BFABF3301FCF85EC004745A1 /* as_queue_mt.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = as_queue_mt.c; path = ../modules/common/src/main/aerospike/as_queue_mt.c; sourceTree = ""; }; + BFAF276D2B6AB36A00A3858B /* as_metrics.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = as_metrics.h; path = ../src/include/aerospike/as_metrics.h; sourceTree = ""; }; + BFAF276F2B6AB39100A3858B /* as_metrics.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = as_metrics.c; path = ../src/main/aerospike/as_metrics.c; sourceTree = ""; }; BFB0ED5422A72260007FEA9C /* as_cdt_ctx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = as_cdt_ctx.h; path = ../src/include/aerospike/as_cdt_ctx.h; sourceTree = ""; }; BFB8A5D71D0F3F77007B4E22 /* as_tls.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = as_tls.c; path = ../src/main/aerospike/as_tls.c; sourceTree = ""; }; BFB8A5D91D0F3F9E007B4E22 /* as_tls.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = as_tls.h; path = ../src/include/aerospike/as_tls.h; sourceTree = ""; }; @@ -551,6 +555,7 @@ BF90C76422AB0EB20062D920 /* as_list_operations.c */, BFC002891901E08500CB9BC8 /* as_lookup.c */, BF90C77022AB30E40062D920 /* as_map_operations.c */, + BFAF276F2B6AB39100A3858B /* as_metrics.c */, BFBB3C8E192D729A00251B15 /* as_node.c */, BF2AA7C718BEBFA400E54AF3 /* as_operations.c */, BFBA916A1914344B00AADA9A /* as_partition.c */, @@ -749,6 +754,7 @@ BFC65B511C921E9E0079DF5A /* as_listener.h */, BFC65B521C921E9E0079DF5A /* as_lookup.h */, BFF344AF1CDAC67700FD1976 /* as_map_operations.h */, + BFAF276D2B6AB36A00A3858B /* as_metrics.h */, BFC65B531C921E9E0079DF5A /* as_node.h */, BFC65B541C921E9E0079DF5A /* as_operations.h */, BFC65B551C921E9E0079DF5A /* as_partition.h */, @@ -798,6 +804,7 @@ BF65C9C6252D299D0026D9E2 /* as_exp.h in Headers */, BFC65B821C921E9E0079DF5A /* as_policy.h in Headers */, BF162EBE2413000B001B1747 /* as_cdt_order.h in Headers */, + BFAF276E2B6AB36A00A3858B /* as_metrics.h in Headers */, BFC65B801C921E9E0079DF5A /* as_partition.h in Headers */, BFC65B7D1C921E9E0079DF5A /* as_lookup.h in Headers */, BF4E4E2A1D48213700BEEF94 /* as_host.h in Headers */, @@ -969,6 +976,7 @@ BFBA106F18B7DFA100A64E68 /* as_msgpack.c in Sources */, BFCB38A71DFB764200C73D0F /* as_event_event.c in Sources */, BF2AA7F018BEBFA500E54AF3 /* as_record_hooks.c in Sources */, + BFAF27702B6AB39100A3858B /* as_metrics.c in Sources */, BFBD205918BC3436009ED931 /* mod_lua_val.c in Sources */, BFBA106218B7D8B300A64E68 /* as_pair.c in Sources */, BFBA105318B7D8B300A64E68 /* as_bytes.c in Sources */, From 2339eab89714151dad648da55b004ed1ebc9f847 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Wed, 31 Jan 2024 11:30:12 -0700 Subject: [PATCH 18/64] Add code to check for trailing slash and add if missing based on operating system --- src/main/aerospike/as_metrics.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 1c8ebb49f0..4e8767c096 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -110,6 +110,15 @@ as_metrics_writer_init_udata() return mw; } +static inline char separator() +{ +#ifdef _WIN32 + return '\\'; +#else + return '/'; +#endif +} + static as_status as_metrics_open_writer(as_metrics_writer* mw, as_error* err) { @@ -118,7 +127,12 @@ as_metrics_open_writer(as_metrics_writer* mw, as_error* err) as_string_builder file_name; as_string_builder_inita(&file_name, 100, true); as_string_builder_append(&file_name, mw->report_directory); - as_string_builder_append(&file_name, "\\metrics-"); + char last_char = mw->report_directory[(strlen(mw->report_directory) - 1)]; + if (last_char != '/' && last_char != '\\') + { + as_string_builder_append_char(&file_name, separator()); + } + as_string_builder_append(&file_name, "metrics-"); as_string_builder_append(&file_name, now_file); as_string_builder_append(&file_name, ".log"); mw->file = fopen(file_name.data, "w"); From 3887ac23b2fb3f3f6cbb62322985e33be0ea8236 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Thu, 1 Feb 2024 12:51:57 -0700 Subject: [PATCH 19/64] Free memory --- .../async_batch_get/src/main/example.c | 13 ++- .../basic_examples/append/src/main/example.c | 13 ++- .../batch_examples/get/src/main/example.c | 4 + examples/utils/src/main/example_utils.c | 6 ++ src/include/aerospike/aerospike_stats.h | 12 --- src/include/aerospike/as_metrics.h | 13 +++ src/main/aerospike/aerospike_stats.c | 19 ---- src/main/aerospike/as_metrics.c | 97 ++++++++++++++++--- src/test/aerospike_test.c | 15 ++- 9 files changed, 141 insertions(+), 51 deletions(-) diff --git a/examples/async_examples/async_batch_get/src/main/example.c b/examples/async_examples/async_batch_get/src/main/example.c index d6ddb77b63..0302747e1a 100644 --- a/examples/async_examples/async_batch_get/src/main/example.c +++ b/examples/async_examples/async_batch_get/src/main/example.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "example_utils.h" @@ -81,11 +82,21 @@ main(int argc, char* argv[]) as_policy_metrics policy; as_metrics_policy_init(&policy); policy.interval = 5; + +#ifdef _WIN32 policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; +#else + policy.report_directory = "/home/sklaus/metrics"; +#endif // enable metrics as_status status = aerospike_enable_metrics(&as, &err, &policy); + if (status != AEROSPIKE_OK) { + LOG("aerospike_enable_metrics() returned %d - %s", err.code, err.message); + exit(-1); + } + // Start clean. example_remove_test_records(&as); @@ -97,7 +108,7 @@ main(int argc, char* argv[]) // Wait till commands have completed before shutting down. as_monitor_wait(&monitor); - Sleep(30000); + as_sleep(30000); // Cleanup and shutdown. example_remove_test_records(&as); diff --git a/examples/basic_examples/append/src/main/example.c b/examples/basic_examples/append/src/main/example.c index dfbb51d712..15f53c3cf5 100644 --- a/examples/basic_examples/append/src/main/example.c +++ b/examples/basic_examples/append/src/main/example.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include "example_utils.h" @@ -61,11 +61,20 @@ main(int argc, char* argv[]) as_policy_metrics policy; as_metrics_policy_init(&policy); policy.interval = 5; +#ifdef _WIN32 policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; +#else + policy.report_directory = "/home/sklaus/metrics"; +#endif // enable metrics as_status status = aerospike_enable_metrics(&as, &err, &policy); + if (status != AEROSPIKE_OK) { + LOG("aerospike_enable_metrics() returned %d - %s", err.code, err.message); + exit(-1); + } + // Start clean. example_remove_test_record(&as); @@ -136,7 +145,7 @@ main(int argc, char* argv[]) LOG("as_operations object to apply to database:"); example_dump_operations(&ops); - Sleep(30000); + as_sleep(30000); // Try to apply the operations. This will fail, since we can't append a // string value to an existing bin with "raw" value. Note that if any diff --git a/examples/batch_examples/get/src/main/example.c b/examples/batch_examples/get/src/main/example.c index 66ace4dbe3..b3d699eec7 100644 --- a/examples/batch_examples/get/src/main/example.c +++ b/examples/batch_examples/get/src/main/example.c @@ -77,7 +77,11 @@ main(int argc, char* argv[]) as_policy_metrics policy; as_metrics_policy_init(&policy); policy.interval = 5; +#ifdef _WIN32 policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; +#else + policy.report_directory = "/home/sklaus/metrics"; +#endif // enable metrics as_status status = aerospike_enable_metrics(&as, &err, &policy); diff --git a/examples/utils/src/main/example_utils.c b/examples/utils/src/main/example_utils.c index aa53f2aeee..8151373310 100644 --- a/examples/utils/src/main/example_utils.c +++ b/examples/utils/src/main/example_utils.c @@ -36,10 +36,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -573,6 +575,10 @@ example_cleanup(aerospike* p_as) as_error err; + if (p_as->cluster->metrics_enabled) { + aerospike_disable_metrics(p_as, &err); + } + // Disconnect from the database cluster and clean up the aerospike object. aerospike_close(p_as, &err); aerospike_destroy(p_as); diff --git a/src/include/aerospike/aerospike_stats.h b/src/include/aerospike/aerospike_stats.h index 723893faf5..fcf21f166b 100644 --- a/src/include/aerospike/aerospike_stats.h +++ b/src/include/aerospike/aerospike_stats.h @@ -260,18 +260,6 @@ as_sum_no_lock(as_async_conn_pool* pool, as_conn_stats* stats); AS_EXTERN char* aerospike_stats_to_string(as_cluster_stats* stats); -/** - * Enable extended periodic cluster and node latency metrics. - */ -AS_EXTERN as_status -aerospike_enable_metrics(aerospike* as, as_error* err, struct as_policy_metrics_s* policy); - -/** - * Disable extended periodic cluster and node latency metrics. - */ -AS_EXTERN as_status -aerospike_disable_metrics(aerospike* as, as_error* err); - #ifdef __cplusplus } // end extern "C" #endif diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index e76bbfe2e6..77ee28cca4 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -140,6 +141,18 @@ typedef struct as_metrics_writer_s { AS_EXTERN void as_metrics_policy_init(as_policy_metrics* policy); +/** + * Enable extended periodic cluster and node latency metrics. + */ +AS_EXTERN as_status +aerospike_enable_metrics(aerospike* as, as_error* err, as_policy_metrics* policy); + +/** + * Disable extended periodic cluster and node latency metrics. + */ +AS_EXTERN as_status +aerospike_disable_metrics(aerospike* as, as_error* err); + static inline void as_metrics_set_listeners( as_policy_metrics* policy, as_metrics_enable_listener enable, diff --git a/src/main/aerospike/aerospike_stats.c b/src/main/aerospike/aerospike_stats.c index a9f19ab018..2e54c6ca50 100644 --- a/src/main/aerospike/aerospike_stats.c +++ b/src/main/aerospike/aerospike_stats.c @@ -209,22 +209,3 @@ aerospike_stats_to_string(as_cluster_stats* stats) } return sb.data; } - -as_status -aerospike_enable_metrics(aerospike* as, as_error* err, struct as_policy_metrics_s* policy) -{ - return as_cluster_enable_metrics(err, as->cluster, policy); -} - -as_status -aerospike_disable_metrics(aerospike* as, as_error* err) -{ - as_cluster* cluster = as->cluster; - as_status status = as_cluster_disable_metrics(err, cluster); - if (status != AEROSPIKE_OK) - { - return status; - } - - return AEROSPIKE_OK; -} diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 4e8767c096..55c80b3687 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -329,6 +329,41 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster return as_metrics_write_line(mw, err); } +static void +as_metrics_writer_free_writer(as_metrics_writer* mw) +{ + mw->enable = false; + as_string_builder_destroy(mw->sb); + cf_free(mw->file); + cf_free(mw->report_directory); + cf_free(mw); +} + +static void +as_metrics_writer_free_node_metrics(as_node* node) +{ + if (node->metrics != NULL) { + uint32_t max_latency_type = AS_LATENCY_TYPE_NONE; + for (uint32_t i = 0; i < max_latency_type; i++) { + cf_free(node->metrics->latency[i].buckets); + } + cf_free(node->metrics->latency); + cf_free(node->metrics); + node->metrics = NULL; + } +} + +static void +as_metrics_writer_free_all_node_metrics(as_cluster* cluster) +{ + // Free node memory + as_nodes* nodes = as_nodes_reserve(cluster); + for (uint32_t i = 0; i < nodes->size; i++) { + as_metrics_writer_free_node_metrics(nodes->array[i]); + } + as_nodes_release(nodes); +} + static as_status as_metrics_writer_enable(as_error* err, const struct as_policy_metrics_s* policy, void* udata) { @@ -365,9 +400,18 @@ as_metrics_writer_snapshot(as_error* err, struct as_cluster_s* cluster, void* ud if (mw->enable && mw->file != NULL) { as_status status = as_metrics_write_cluster(err, mw, cluster); if (status != AEROSPIKE_OK) { + as_metrics_writer_free_all_node_metrics(cluster); + as_metrics_writer_free_writer(mw); LOG("as_metrics_writer_snapshot not ok"); return status; } + uint32_t result = fflush(mw->file); + if (result != 0) { + as_metrics_writer_free_all_node_metrics(cluster); + as_metrics_writer_free_writer(mw); + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "File stream did not flush successfully: %s", mw->report_directory); + } } LOG("as_metrics_writer_snapshot end"); return AEROSPIKE_OK; @@ -381,14 +425,13 @@ as_metrics_writer_disable(as_error* err, struct as_cluster_s* cluster, void* uda as_metrics_writer* mw = udata; if (mw->enable && mw->file != NULL) { as_status status = as_metrics_write_cluster(err, mw, cluster); + uint32_t result = fclose(mw->file); + as_metrics_writer_free_all_node_metrics(cluster); + as_metrics_writer_free_writer(mw); if (status != AEROSPIKE_OK) { return status; } - uint32_t result = fclose(mw->file); - mw->file = NULL; - mw->enable = false; - as_string_builder_destroy(mw->sb); if (result != 0) { return as_error_update(err, AEROSPIKE_ERR_CLIENT, @@ -399,7 +442,6 @@ as_metrics_writer_disable(as_error* err, struct as_cluster_s* cluster, void* uda return AEROSPIKE_OK; } - static as_status as_metrics_writer_node_close(as_error* err, struct as_node_s* node, void* udata) { @@ -407,12 +449,13 @@ as_metrics_writer_node_close(as_error* err, struct as_node_s* node, void* udata) // write node info to file as_metrics_writer* mw = udata; - if (mw->enable && mw->file != NULL) - { + if (mw->enable && mw->file != NULL) { as_string_builder_append(mw->sb, time_str(time(NULL))); as_metrics_write_node(mw, node); as_status status = as_metrics_write_line(mw, err); + as_metrics_writer_free_node_metrics(node); + if (status != AEROSPIKE_OK) { return status; } @@ -422,10 +465,30 @@ as_metrics_writer_node_close(as_error* err, struct as_node_s* node, void* udata) } + //--------------------------------- // Functions //--------------------------------- +as_status +aerospike_enable_metrics(aerospike* as, as_error* err, struct as_policy_metrics_s* policy) +{ + return as_cluster_enable_metrics(err, as->cluster, policy); +} + +as_status +aerospike_disable_metrics(aerospike* as, as_error* err) +{ + as_cluster* cluster = as->cluster; + as_status status = as_cluster_disable_metrics(err, cluster); + if (status != AEROSPIKE_OK) + { + return status; + } + + return AEROSPIKE_OK; +} + void as_metrics_policy_init(as_policy_metrics* policy) { @@ -549,24 +612,26 @@ as_metrics_add_latency(as_node_metrics* node_metrics, as_latency_type latency_ty #if defined(__linux__) -#include -#include -#include -#include -#include +//#include +//#include +//#include +//#include +//#include void as_metrics_process_cpu_load_mem_usage(uint32_t* cpu_usage, uint32_t* mem) { - double resident_set, mem_d, cpu_usage_d; + /*double resident_set, mem_d, cpu_usage_d; as_metrics_proc_stat_mem_cpu(&mem_d, &resident_set, &cpu_usage_d); cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); mem_d = mem_d + 0.5 - (mem_d < 0); *cpu_usage = (uint32_t)cpu_usage_d; - *mem = (uint32_t)mem_d; + *mem = (uint32_t)mem_d;*/ + *cpu_usage = 100; + *mem = 100; } -void +/*void as_metrics_proc_stat_mem_cpu(double* vm_usage, double* resident_set, double* cpu_usage) { using std::ios_base; @@ -605,7 +670,7 @@ as_metrics_proc_stat_mem_cpu(double* vm_usage, double* resident_set, double* cpu uint64_t start_time_sec = starttime / sysconf(_SC_CLK_TCK); *cpu_usage = (u_time_sec + s_time_sec) / (cf_get_seconds() - start_time_sec); -} +}*/ #endif #if defined(_MSC_VER) diff --git a/src/test/aerospike_test.c b/src/test/aerospike_test.c index 72456f609a..5f4d4ff9f7 100644 --- a/src/test/aerospike_test.c +++ b/src/test/aerospike_test.c @@ -25,7 +25,6 @@ #include #include #include -#include #include "test.h" #include "aerospike_test.h" @@ -385,11 +384,20 @@ static bool before(atf_plan* plan) as_policy_metrics policy; as_metrics_policy_init(&policy); policy.interval = 5; +#ifdef _WIN32 policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; +#else + policy.report_directory = "/home/sklaus/metrics"; +#endif // enable metrics as_status status = aerospike_enable_metrics(as, &err, &policy); + if (status != AEROSPIKE_OK) { + error("aerospike_enable_metrics() returned %d - %s", err.code, err.message); + exit(-1); + } + cf_free(result); return true; } @@ -404,6 +412,11 @@ static bool after(atf_plan* plan) as_error err; as_error_reset(&err); + if (as->cluster->metrics_enabled) { + aerospike_disable_metrics(as, &err); + } + + as_error_reset(&err); as_status status = aerospike_close(as, &err); aerospike_destroy(as); From 7b9302f2f0b1555e93f5791bf8f1953d805dd1ca Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Fri, 2 Feb 2024 12:55:34 -0700 Subject: [PATCH 20/64] Fix memory leak --- examples/utils/src/main/example_utils.c | 6 - src/include/aerospike/as_metrics.h | 2 +- src/main/aerospike/aerospike.c | 5 + src/main/aerospike/as_cluster.c | 1 + src/main/aerospike/as_metrics.c | 211 ++++++++++++------------ src/test/aerospike_test.c | 5 - 6 files changed, 110 insertions(+), 120 deletions(-) diff --git a/examples/utils/src/main/example_utils.c b/examples/utils/src/main/example_utils.c index 8151373310..aa53f2aeee 100644 --- a/examples/utils/src/main/example_utils.c +++ b/examples/utils/src/main/example_utils.c @@ -36,12 +36,10 @@ #include #include #include -#include #include #include #include #include -#include #include #include #include @@ -575,10 +573,6 @@ example_cleanup(aerospike* p_as) as_error err; - if (p_as->cluster->metrics_enabled) { - aerospike_disable_metrics(p_as, &err); - } - // Disconnect from the database cluster and clean up the aerospike object. aerospike_close(p_as, &err); aerospike_destroy(p_as); diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index 77ee28cca4..75f4705f9c 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -120,7 +120,7 @@ typedef struct as_node_metrics_s { typedef struct as_metrics_writer_s { FILE* file; - as_string_builder* sb; + as_string_builder sb; bool enable; diff --git a/src/main/aerospike/aerospike.c b/src/main/aerospike/aerospike.c index dc28dc19a6..70bc0d93d5 100644 --- a/src/main/aerospike/aerospike.c +++ b/src/main/aerospike/aerospike.c @@ -251,6 +251,10 @@ aerospike_close(aerospike* as, as_error* err) as_cluster* cluster = as->cluster; if (cluster) { + if (cluster->metrics_enabled) { + aerospike_disable_metrics(as, err); + } + if (as_event_loop_size > 0 && !as_event_single_thread) { // Async configurations will attempt to wait till pending async commands have completed. as_event_close_cluster(cluster); @@ -259,6 +263,7 @@ aerospike_close(aerospike* as, as_error* err) // Close sync only configurations immediately. as_cluster_destroy(cluster); } + as->cluster = NULL; } return err->code; diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index e30b9eb0d4..401c1a8640 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -576,6 +576,7 @@ as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_policy_metrics* as_status status = cluster->metrics_listeners.enable_listener(err, policy, cluster->metrics_listeners.udata); if (status != AEROSPIKE_OK) { + as_cluster_disable_metrics(err, cluster); return status; } cluster->metrics_enabled = true; diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 55c80b3687..9b27d3bde3 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -64,23 +64,21 @@ static as_status as_metrics_write_line(as_metrics_writer* mw, as_error* err) { LOG("as_metrics_write_line begin"); - as_string_builder_append_newline(mw->sb); - int written = fprintf(mw->file, "%s", mw->sb->data); - if (mw->sb->length != written) + as_string_builder_append_newline(&mw->sb); + int written = fprintf(mw->file, "%s", mw->sb.data); + if (mw->sb.length != written) { LOG("wrong number of chars written"); - LOG("expected %d", mw->sb->length); + LOG("expected %d", mw->sb.length); } LOG("written %d", written); - LOG("data %s", mw->sb->data); - mw->size += mw->sb->length; - as_string_builder_reset(mw->sb); + LOG("data %s", mw->sb.data); + mw->size += mw->sb.length; + as_string_builder_reset(&mw->sb); if (mw->max_size > 0 && mw->size >= mw->max_size) { LOG("need new file"); uint32_t result = fclose(mw->file); - as_string_builder_destroy(mw->sb); - mw->sb = NULL; if (result != 0) { return as_error_update(err, AEROSPIKE_ERR_CLIENT, @@ -99,7 +97,6 @@ as_metrics_writer_init_udata() LOG("as_metrics_writer_init_udata begin"); as_metrics_writer* mw = (as_metrics_writer *)cf_malloc(sizeof(as_metrics_writer)); mw->file = NULL; - mw->sb = NULL; mw->enable = false; mw->max_size = 0; mw->latency_columns = 0; @@ -144,21 +141,20 @@ as_metrics_open_writer(as_metrics_writer* mw, as_error* err) } mw->size = 0; - mw->sb = (as_string_builder *)cf_malloc(sizeof(as_string_builder)); - as_string_builder_inita(mw->sb, 2048, true); + as_string_builder_inita(&mw->sb, 2048, true); const char* now = time_str(time(NULL)); - as_string_builder_append(mw->sb, now); - as_string_builder_append(mw->sb, " header(1)"); - as_string_builder_append(mw->sb, " cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]]"); - as_string_builder_append(mw->sb, " eventloop[processSize,queueSize]"); - as_string_builder_append(mw->sb, " node[name,address,port,syncConn,asyncConn,errors,timeouts,latency[]]"); - as_string_builder_append(mw->sb, " conn[inUse,inPool,opened,closed]"); - as_string_builder_append(mw->sb, " latency("); - as_string_builder_append_int(mw->sb, mw->latency_columns); - as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_int(mw->sb, mw->latency_shift); - as_string_builder_append_char(mw->sb, ')'); - as_string_builder_append(mw->sb, "[type[l1,l2,l3...]]"); + as_string_builder_append(&mw->sb, now); + as_string_builder_append(&mw->sb, " header(1)"); + as_string_builder_append(&mw->sb, " cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]]"); + as_string_builder_append(&mw->sb, " eventloop[processSize,queueSize]"); + as_string_builder_append(&mw->sb, " node[name,address,port,syncConn,asyncConn,errors,timeouts,latency[]]"); + as_string_builder_append(&mw->sb, " conn[inUse,inPool,opened,closed]"); + as_string_builder_append(&mw->sb, " latency("); + as_string_builder_append_int(&mw->sb, mw->latency_columns); + as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_int(&mw->sb, mw->latency_shift); + as_string_builder_append_char(&mw->sb, ')'); + as_string_builder_append(&mw->sb, "[type[l1,l2,l3...]]"); LOG("as_metrics_open_writer end"); return as_metrics_write_line(mw, err); } @@ -204,13 +200,13 @@ static void as_metrics_write_conn(as_metrics_writer* mw, const struct as_conn_stats_s* stats) { LOG("as_metrics_write_conn begin"); - as_string_builder_append_uint(mw->sb, stats->in_use); - as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_uint(mw->sb, stats->in_pool); - as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_uint(mw->sb, stats->opened); // Cumulative. Not reset on each interval. - as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_uint(mw->sb, stats->closed); // Cumulative. Not reset on each interval. + as_string_builder_append_uint(&mw->sb, stats->in_use); + as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_uint(&mw->sb, stats->in_pool); + as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_uint(&mw->sb, stats->opened); // Cumulative. Not reset on each interval. + as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_uint(&mw->sb, stats->closed); // Cumulative. Not reset on each interval. LOG("as_metrics_write_conn end"); } @@ -218,12 +214,12 @@ static void as_metrics_write_node(as_metrics_writer* mw, struct as_node_s* node) { LOG("as_metrics_write_node begin"); - as_string_builder_append_char(mw->sb, '['); - as_string_builder_append(mw->sb, node->name); - as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_char(&mw->sb, '['); + as_string_builder_append(&mw->sb, node->name); + as_string_builder_append_char(&mw->sb, ','); - as_string_builder_append(mw->sb, as_node_get_address_string(node)); - as_string_builder_append_char(mw->sb, ','); + as_string_builder_append(&mw->sb, as_node_get_address_string(node)); + as_string_builder_append_char(&mw->sb, ','); struct as_conn_stats_s sync; struct as_conn_stats_s async; @@ -231,38 +227,38 @@ as_metrics_write_node(as_metrics_writer* mw, struct as_node_s* node) as_sum_init(&async); as_metrics_get_node_sync_conn_stats(node, &sync); as_metrics_write_conn(mw, &sync); - as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_char(&mw->sb, ','); as_metrics_get_node_async_conn_stats(node, &async); as_metrics_write_conn(mw, &async); - as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_char(&mw->sb, ','); - as_string_builder_append_uint64(mw->sb, as_node_get_error_count(node)); - as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_uint64(mw->sb, as_node_get_timeout_count(node)); - as_string_builder_append(mw->sb, ",["); + as_string_builder_append_uint64(&mw->sb, as_node_get_error_count(node)); + as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_uint64(&mw->sb, as_node_get_timeout_count(node)); + as_string_builder_append(&mw->sb, ",["); as_node_metrics* node_metrics = node->metrics; uint32_t max = AS_LATENCY_TYPE_NONE; for (uint32_t i = 0; i < max; i++) { if (i > 0) { - as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_char(&mw->sb, ','); } - as_string_builder_append(mw->sb, as_latency_type_to_string(i)); - as_string_builder_append_char(mw->sb, '['); + as_string_builder_append(&mw->sb, as_latency_type_to_string(i)); + as_string_builder_append_char(&mw->sb, '['); as_latency_buckets* buckets = &node_metrics->latency[i]; uint32_t bucket_max = buckets->latency_columns; for (uint32_t j = 0; j < bucket_max; j++) { if (j > 0) { - as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_char(&mw->sb, ','); } - as_string_builder_append_uint64(mw->sb, as_metrics_get_bucket(buckets, j)); + as_string_builder_append_uint64(&mw->sb, as_metrics_get_bucket(buckets, j)); } - as_string_builder_append_char(mw->sb, ']'); + as_string_builder_append_char(&mw->sb, ']'); } - as_string_builder_append(mw->sb, "]]"); + as_string_builder_append(&mw->sb, "]]"); LOG("as_metrics_write_node end"); } @@ -280,35 +276,35 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster uint32_t mem = 0; as_metrics_process_cpu_load_mem_usage(&cpu_load, &mem); - as_string_builder_append(mw->sb, time_str(time(NULL))); - as_string_builder_append(mw->sb, " cluster["); - as_string_builder_append(mw->sb, cluster_name); - as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_int(mw->sb, cpu_load); - as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_int(mw->sb, mem); - as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_uint(mw->sb, cluster->invalid_node_count); // Cumulative. Not reset on each interval. - as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_uint64(mw->sb, as_cluster_get_tran_count(cluster)); // Cumulative. Not reset on each interval. - as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_uint64(mw->sb, cluster->retry_count); // Cumulative. Not reset on each interval. - as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_uint64(mw->sb, cluster->delay_queue_timeout_count); // Cumulative. Not reset on each interval. - as_string_builder_append(mw->sb, ",["); + as_string_builder_append(&mw->sb, time_str(time(NULL))); + as_string_builder_append(&mw->sb, " cluster["); + as_string_builder_append(&mw->sb, cluster_name); + as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_int(&mw->sb, cpu_load); + as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_int(&mw->sb, mem); + as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_uint(&mw->sb, cluster->invalid_node_count); // Cumulative. Not reset on each interval. + as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_uint64(&mw->sb, as_cluster_get_tran_count(cluster)); // Cumulative. Not reset on each interval. + as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_uint64(&mw->sb, cluster->retry_count); // Cumulative. Not reset on each interval. + as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_uint64(&mw->sb, cluster->delay_queue_timeout_count); // Cumulative. Not reset on each interval. + as_string_builder_append(&mw->sb, ",["); for (uint32_t i = 0; i < as_event_loop_size; i++) { as_event_loop* loop = &as_event_loops[i]; if (i > 0) { - as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_char(&mw->sb, ','); } - as_string_builder_append_char(mw->sb, '['); - as_string_builder_append_int(mw->sb, as_event_loop_get_process_size(loop)); - as_string_builder_append_char(mw->sb, ','); - as_string_builder_append_uint(mw->sb, as_event_loop_get_queue_size(loop)); - as_string_builder_append_char(mw->sb, ']'); + as_string_builder_append_char(&mw->sb, '['); + as_string_builder_append_int(&mw->sb, as_event_loop_get_process_size(loop)); + as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_uint(&mw->sb, as_event_loop_get_queue_size(loop)); + as_string_builder_append_char(&mw->sb, ']'); } - as_string_builder_append(mw->sb, "],["); + as_string_builder_append(&mw->sb, "],["); as_nodes* nodes = as_nodes_reserve(cluster); for (uint32_t i = 0; i < nodes->size; i++) { @@ -316,13 +312,13 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster as_node_reserve(node); if (i > 0) { - as_string_builder_append_char(mw->sb, ','); + as_string_builder_append_char(&mw->sb, ','); } as_metrics_write_node(mw, node); as_node_release(node); } - as_string_builder_append(mw->sb, "]]"); + as_string_builder_append(&mw->sb, "]]"); as_nodes_release(nodes); LOG("as_metrics_write_cluster end"); @@ -330,17 +326,17 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster } static void -as_metrics_writer_free_writer(as_metrics_writer* mw) +as_metrics_writer_destroy(as_metrics_writer* mw) { - mw->enable = false; - as_string_builder_destroy(mw->sb); - cf_free(mw->file); - cf_free(mw->report_directory); + LOG("destorying string builder"); + as_string_builder_destroy(&mw->sb); + LOG("string builder %s", &mw->sb); + fclose(mw->file); cf_free(mw); } static void -as_metrics_writer_free_node_metrics(as_node* node) +as_metrics_writer_destroy_node_metrics(as_node* node) { if (node->metrics != NULL) { uint32_t max_latency_type = AS_LATENCY_TYPE_NONE; @@ -354,12 +350,12 @@ as_metrics_writer_free_node_metrics(as_node* node) } static void -as_metrics_writer_free_all_node_metrics(as_cluster* cluster) +as_metrics_writer_destroy_nodes(as_cluster* cluster) { // Free node memory as_nodes* nodes = as_nodes_reserve(cluster); for (uint32_t i = 0; i < nodes->size; i++) { - as_metrics_writer_free_node_metrics(nodes->array[i]); + as_metrics_writer_destroy_node_metrics(nodes->array[i]); } as_nodes_release(nodes); } @@ -400,15 +396,15 @@ as_metrics_writer_snapshot(as_error* err, struct as_cluster_s* cluster, void* ud if (mw->enable && mw->file != NULL) { as_status status = as_metrics_write_cluster(err, mw, cluster); if (status != AEROSPIKE_OK) { - as_metrics_writer_free_all_node_metrics(cluster); - as_metrics_writer_free_writer(mw); + as_metrics_writer_destroy_nodes(cluster); + as_metrics_writer_destroy(mw); LOG("as_metrics_writer_snapshot not ok"); return status; } uint32_t result = fflush(mw->file); if (result != 0) { - as_metrics_writer_free_all_node_metrics(cluster); - as_metrics_writer_free_writer(mw); + as_metrics_writer_destroy_nodes(cluster); + as_metrics_writer_destroy(mw); return as_error_update(err, AEROSPIKE_ERR_CLIENT, "File stream did not flush successfully: %s", mw->report_directory); } @@ -423,20 +419,18 @@ as_metrics_writer_disable(as_error* err, struct as_cluster_s* cluster, void* uda LOG("as_metrics_writer_disable begin"); // write cluster into to file, disable as_metrics_writer* mw = udata; - if (mw->enable && mw->file != NULL) { - as_status status = as_metrics_write_cluster(err, mw, cluster); - uint32_t result = fclose(mw->file); - as_metrics_writer_free_all_node_metrics(cluster); - as_metrics_writer_free_writer(mw); - - if (status != AEROSPIKE_OK) { - return status; - } - - if (result != 0) { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "File stream did not close successfully: %s", mw->report_directory); + if (mw != NULL) { + if (mw->enable && mw->file != NULL) { + as_status status = as_metrics_write_cluster(err, mw, cluster); + + if (status != AEROSPIKE_OK) { + as_metrics_writer_destroy_nodes(cluster); + as_metrics_writer_destroy(mw); + return status; + } } + as_metrics_writer_destroy_nodes(cluster); + as_metrics_writer_destroy(mw); } LOG("as_metrics_writer_disable end"); return AEROSPIKE_OK; @@ -450,11 +444,12 @@ as_metrics_writer_node_close(as_error* err, struct as_node_s* node, void* udata) as_metrics_writer* mw = udata; if (mw->enable && mw->file != NULL) { - as_string_builder_append(mw->sb, time_str(time(NULL))); + as_string_builder_append(&mw->sb, time_str(time(NULL))); + as_node_reserve(node); as_metrics_write_node(mw, node); as_status status = as_metrics_write_line(mw, err); - - as_metrics_writer_free_node_metrics(node); + as_metrics_writer_destroy_node_metrics(node); + as_node_release(node); if (status != AEROSPIKE_OK) { return status; @@ -464,8 +459,6 @@ as_metrics_writer_node_close(as_error* err, struct as_node_s* node, void* udata) return AEROSPIKE_OK; } - - //--------------------------------- // Functions //--------------------------------- @@ -681,13 +674,15 @@ void as_metrics_process_cpu_load_mem_usage(uint32_t* cpu_usage, uint32_t* mem) { LOG("as_metrics_process_cpu_load_mem_usage"); - double cpu_usage_d = as_metrics_process_cpu_load(); + /*double cpu_usage_d = as_metrics_process_cpu_load(); cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); *cpu_usage = (uint32_t)cpu_usage_d; - *mem = as_metrics_process_mem_usage(); + *mem = as_metrics_process_mem_usage();*/ + *cpu_usage = 100; + *mem = 100; } -static double +/*static double as_metrics_calculate_cpu_load(uint64_t idleTicks, uint64_t totalTicks) { LOG("as_metrics_calculate_cpu_load"); @@ -732,7 +727,7 @@ as_metrics_process_mem_usage() sizeof(memCounter)); return (uint32_t)memCounter.WorkingSetSize; -} +}*/ #endif diff --git a/src/test/aerospike_test.c b/src/test/aerospike_test.c index 5f4d4ff9f7..d4dd4ed429 100644 --- a/src/test/aerospike_test.c +++ b/src/test/aerospike_test.c @@ -412,11 +412,6 @@ static bool after(atf_plan* plan) as_error err; as_error_reset(&err); - if (as->cluster->metrics_enabled) { - aerospike_disable_metrics(as, &err); - } - - as_error_reset(&err); as_status status = aerospike_close(as, &err); aerospike_destroy(as); From f57b4d2308755e6047c0298bfd3f7e0212b9f813 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Fri, 2 Feb 2024 13:10:47 -0700 Subject: [PATCH 21/64] Fix formatting --- src/main/aerospike/aerospike.c | 3 +-- src/main/aerospike/as_cluster.c | 18 ++++++------------ src/main/aerospike/as_metrics.c | 18 ++++-------------- src/main/aerospike/as_node.c | 6 ++---- 4 files changed, 13 insertions(+), 32 deletions(-) diff --git a/src/main/aerospike/aerospike.c b/src/main/aerospike/aerospike.c index 70bc0d93d5..324658feee 100644 --- a/src/main/aerospike/aerospike.c +++ b/src/main/aerospike/aerospike.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2023 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. @@ -263,7 +263,6 @@ aerospike_close(aerospike* as, as_error* err) // Close sync only configurations immediately. as_cluster_destroy(cluster); } - as->cluster = NULL; } return err->code; diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index 401c1a8640..9e99775477 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -586,8 +586,7 @@ as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_policy_metrics* as_status as_cluster_disable_metrics(as_error* err, as_cluster* cluster) { - if (cluster->metrics_enabled) - { + if (cluster->metrics_enabled) { cluster->metrics_enabled = false; return cluster->metrics_listeners.disable_listener(err, cluster, cluster->metrics_listeners.udata); } @@ -598,8 +597,7 @@ as_cluster_disable_metrics(as_error* err, as_cluster* cluster) void as_cluster_add_tran(as_cluster* cluster) { - if (cluster->metrics_enabled) - { + if (cluster->metrics_enabled) { as_incr_uint64(&cluster->tran_count); } } @@ -654,8 +652,7 @@ as_cluster_remove_nodes(as_error* err, as_cluster* cluster, as_vector* /* metrics_enabled) { as_status status = cluster->metrics_listeners.node_close_listener(err, node, node->cluster->metrics_listeners.udata); - if (status != AEROSPIKE_OK) - { + if (status != AEROSPIKE_OK) { return status; } } @@ -924,8 +921,7 @@ as_cluster_tend(as_cluster* cluster, as_error* err, bool is_init) // Remove nodes in a batch. if (nodes_to_remove.size > 0) { as_status status = as_cluster_remove_nodes(err, cluster, &nodes_to_remove); - if (status != AEROSPIKE_OK) - { + if (status != AEROSPIKE_OK) { return status; } nodes = cluster->nodes; @@ -982,11 +978,9 @@ as_cluster_tend(as_cluster* cluster, as_error* err, bool is_init) as_incr_uint32(&cluster->shm_info->cluster_shm->rebalance_gen); } - if (cluster->metrics_enabled && (cluster->tend_count % cluster->metrics_interval) == 0) - { + if (cluster->metrics_enabled && (cluster->tend_count % cluster->metrics_interval) == 0) { as_status status = cluster->metrics_listeners.snapshot_listener(err, cluster, cluster->metrics_listeners.udata); - if (status != AEROSPIKE_OK) - { + if (status != AEROSPIKE_OK) { return status; } } diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 9b27d3bde3..242ac39745 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -66,8 +66,7 @@ as_metrics_write_line(as_metrics_writer* mw, as_error* err) LOG("as_metrics_write_line begin"); as_string_builder_append_newline(&mw->sb); int written = fprintf(mw->file, "%s", mw->sb.data); - if (mw->sb.length != written) - { + if (mw->sb.length != written) { LOG("wrong number of chars written"); LOG("expected %d", mw->sb.length); } @@ -125,8 +124,7 @@ as_metrics_open_writer(as_metrics_writer* mw, as_error* err) as_string_builder_inita(&file_name, 100, true); as_string_builder_append(&file_name, mw->report_directory); char last_char = mw->report_directory[(strlen(mw->report_directory) - 1)]; - if (last_char != '/' && last_char != '\\') - { + if (last_char != '/' && last_char != '\\') { as_string_builder_append_char(&file_name, separator()); } as_string_builder_append(&file_name, "metrics-"); @@ -472,14 +470,7 @@ aerospike_enable_metrics(aerospike* as, as_error* err, struct as_policy_metrics_ as_status aerospike_disable_metrics(aerospike* as, as_error* err) { - as_cluster* cluster = as->cluster; - as_status status = as_cluster_disable_metrics(err, cluster); - if (status != AEROSPIKE_OK) - { - return status; - } - - return AEROSPIKE_OK; + return as_cluster_disable_metrics(err, as->cluster); } void @@ -502,8 +493,7 @@ char* as_latency_type_to_string(as_latency_type type) { //LOG("as_latency_type_to_string begin"); - switch (type) - { + switch (type) { case AS_LATENCY_TYPE_CONN: return "conn"; break; diff --git a/src/main/aerospike/as_node.c b/src/main/aerospike/as_node.c index 5c25a98aa7..400e6f2c97 100644 --- a/src/main/aerospike/as_node.c +++ b/src/main/aerospike/as_node.c @@ -136,12 +136,10 @@ as_node_create(as_cluster* cluster, as_node_info* node_info) node->partition_changed = true; node->rebalance_changed = cluster->rack_aware; - if (cluster->metrics_enabled) - { + if (cluster->metrics_enabled) { node->metrics = as_node_metrics_init(cluster->metrics_latency_columns, cluster->metrics_latency_shift); } - else - { + else { node->metrics = NULL; } From 343eb1049e69916501d6fcb7d6267ded044b42ff Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Mon, 5 Feb 2024 09:35:30 -0700 Subject: [PATCH 22/64] PUshing latest --- examples/batch_examples/get/src/main/example.c | 6 +++--- src/main/aerospike/as_cluster.c | 2 +- src/main/aerospike/as_metrics.c | 1 - 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/batch_examples/get/src/main/example.c b/examples/batch_examples/get/src/main/example.c index b3d699eec7..a5d5004338 100644 --- a/examples/batch_examples/get/src/main/example.c +++ b/examples/batch_examples/get/src/main/example.c @@ -86,7 +86,7 @@ main(int argc, char* argv[]) // enable metrics as_status status = aerospike_enable_metrics(&as, &err, &policy); - if (status != AEROSPIKE_OK) { + /*if (status != AEROSPIKE_OK) { LOG("aerospike_enable_metrics() returned %d - %s", err.code, err.message); cleanup(&as); exit(-1); @@ -216,9 +216,9 @@ main(int argc, char* argv[]) LOG("batch_write_operate_complex() returned %d - %s", err.code, err.message); cleanup(&as); exit(-1); - } + }*/ - as_sleep(30000); + as_sleep(10000); // Cleanup and disconnect from the database cluster. cleanup(&as); diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index 9e99775477..3b57057993 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -588,7 +588,7 @@ as_cluster_disable_metrics(as_error* err, as_cluster* cluster) { if (cluster->metrics_enabled) { cluster->metrics_enabled = false; - return cluster->metrics_listeners.disable_listener(err, cluster, cluster->metrics_listeners.udata); + //return cluster->metrics_listeners.disable_listener(err, cluster, cluster->metrics_listeners.udata); } return AEROSPIKE_OK; diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 242ac39745..191ff0f78f 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -328,7 +328,6 @@ as_metrics_writer_destroy(as_metrics_writer* mw) { LOG("destorying string builder"); as_string_builder_destroy(&mw->sb); - LOG("string builder %s", &mw->sb); fclose(mw->file); cf_free(mw); } From 3d804c06c72690608cea0ee9a4346e6867863de8 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Fri, 9 Feb 2024 10:45:34 -0700 Subject: [PATCH 23/64] Update cpu and mem --- .../batch_examples/get/src/main/example.c | 4 +- src/include/aerospike/as_metrics.h | 70 +- src/main/aerospike/as_cluster.c | 2 +- src/main/aerospike/as_metrics.c | 723 ++++++++++-------- src/test/aerospike_test.c | 3 + 5 files changed, 409 insertions(+), 393 deletions(-) diff --git a/examples/batch_examples/get/src/main/example.c b/examples/batch_examples/get/src/main/example.c index a5d5004338..eed1267bd2 100644 --- a/examples/batch_examples/get/src/main/example.c +++ b/examples/batch_examples/get/src/main/example.c @@ -86,7 +86,7 @@ main(int argc, char* argv[]) // enable metrics as_status status = aerospike_enable_metrics(&as, &err, &policy); - /*if (status != AEROSPIKE_OK) { + if (status != AEROSPIKE_OK) { LOG("aerospike_enable_metrics() returned %d - %s", err.code, err.message); cleanup(&as); exit(-1); @@ -216,7 +216,7 @@ main(int argc, char* argv[]) LOG("batch_write_operate_complex() returned %d - %s", err.code, err.message); cleanup(&as); exit(-1); - }*/ + } as_sleep(10000); diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index 75f4705f9c..e9fd85c0c1 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -29,6 +29,10 @@ #include #endif +#ifdef _WIN32 +#include +#endif + #ifdef __cplusplus extern "C" { #endif @@ -39,7 +43,6 @@ extern "C" { #define NS_TO_MS 1000000 #define MIN_FILE_SIZE 1000000 -#define UTC_STR_LEN 72 typedef uint8_t as_latency_type; @@ -59,9 +62,9 @@ typedef uint8_t as_latency_type; * Latency bucket counts are cumulative and not reset on each metrics snapshot interval */ typedef struct as_latency_buckets_s { + uint64_t* buckets; uint32_t latency_shift; uint32_t latency_columns; - uint64_t* buckets; } as_latency_buckets; struct as_policy_metrics_s; @@ -119,20 +122,20 @@ typedef struct as_node_metrics_s { */ typedef struct as_metrics_writer_s { FILE* file; - - as_string_builder sb; - - bool enable; - + const char* report_directory; uint64_t max_size; - uint64_t size; - uint32_t latency_columns; - uint32_t latency_shift; - - const char* report_directory; +#ifdef _WIN32 + FILETIME prev_process_times_kernel; + FILETIME prev_system_times_kernel; + FILETIME prev_process_times_user; + FILETIME prev_system_times_user; + HANDLE process; + DWORD pid; +#endif + bool enable; } as_metrics_writer; /** @@ -208,49 +211,6 @@ as_node_metrics_init(uint32_t latency_columns, uint32_t latency_shift); void as_metrics_add_latency(as_node_metrics* node_metrics, as_latency_type latency_type, uint64_t elapsed); -/** - * Calculate CPU and memory usage - */ -void -as_metrics_process_cpu_load_mem_usage(uint32_t* cpu_usage, uint32_t* mem); - -#if defined(__linux__) -/** - * Gets memory and CPU usage information from proc/stat - */ -void -as_metrics_proc_stat_mem_cpu(double* vm_usage, double* resident_set, double* cpu_usage); -#endif - -#if defined(_MSC_VER) - -/** - * Gets CPU usage using GetSystemTimes() - */ -double -as_metrics_process_cpu_load(); - -/** - * Gets memory usage using GetProcessMemoryInfo() - */ -uint32_t -as_metrics_process_mem_usage(); -#endif - -#if defined(__APPLE__) -/** - * Gets memory usage using task_info - */ -double -as_metrics_process_mem_usage(void); - -/** - * Gets cpu usage using ps -p - */ -double -as_metrics_process_cpu_load(void); -#endif - #ifdef __cplusplus } // end extern "C" #endif diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index 3b57057993..9e99775477 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -588,7 +588,7 @@ as_cluster_disable_metrics(as_error* err, as_cluster* cluster) { if (cluster->metrics_enabled) { cluster->metrics_enabled = false; - //return cluster->metrics_listeners.disable_listener(err, cluster, cluster->metrics_listeners.udata); + return cluster->metrics_listeners.disable_listener(err, cluster, cluster->metrics_listeners.udata); } return AEROSPIKE_OK; diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 191ff0f78f..e70b2043f9 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -21,6 +21,7 @@ #include #include #include +#include #define LOG(_fmt, ...) { printf(_fmt "\n", ##__VA_ARGS__); fflush(stdout); } @@ -33,50 +34,42 @@ extern uint32_t as_event_loop_capacity; // Static Functions //--------------------------------- -static const char* -time_str_file(time_t t) +static void +timestamp_to_string_filename(char* str, size_t str_size) { - static char buf[UTC_STR_LEN + 1]; - struct tm* local = localtime(&t); - snprintf(buf, sizeof(buf), + time_t now = time(NULL); + struct tm * local = localtime(&now); + snprintf(str, str_size, "%4d%02d%02d%02d%02d%02d", 1900 + local->tm_year, local->tm_mon + 1, local->tm_mday, local->tm_hour, local->tm_min, local->tm_sec); - return buf; } -static const char* -time_str(time_t t) +static void +timestamp_to_string(char* str, size_t str_size) { - static char buf[UTC_STR_LEN + 1]; - struct tm* local = localtime(&t); - snprintf(buf, sizeof(buf), + time_t now = time(NULL); + struct tm* local = localtime(&now); + snprintf(str, str_size, "%4d-%02d-%02d %02d:%02d:%02d", 1900 + local->tm_year, local->tm_mon + 1, local->tm_mday, local->tm_hour, local->tm_min, local->tm_sec); - return buf; } static as_status as_metrics_open_writer(as_metrics_writer* mw, as_error* err); static as_status -as_metrics_write_line(as_metrics_writer* mw, as_error* err) +as_metrics_write_line(as_metrics_writer* mw, const char* data, as_error* err) { - LOG("as_metrics_write_line begin"); - as_string_builder_append_newline(&mw->sb); - int written = fprintf(mw->file, "%s", mw->sb.data); - if (mw->sb.length != written) { - LOG("wrong number of chars written"); - LOG("expected %d", mw->sb.length); + int written = fprintf(mw->file, "%s", data); + if (written <= 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Failed to write metrics data: %d,%s", written, mw->report_directory); } - LOG("written %d", written); - LOG("data %s", mw->sb.data); - mw->size += mw->sb.length; - as_string_builder_reset(&mw->sb); + mw->size += written; if (mw->max_size > 0 && mw->size >= mw->max_size) { - LOG("need new file"); uint32_t result = fclose(mw->file); if (result != 0) { @@ -86,23 +79,30 @@ as_metrics_write_line(as_metrics_writer* mw, as_error* err) return as_metrics_open_writer(mw, err); } - LOG("as_metrics_write_line end"); return AEROSPIKE_OK; } static void* as_metrics_writer_init_udata() { - LOG("as_metrics_writer_init_udata begin"); - as_metrics_writer* mw = (as_metrics_writer *)cf_malloc(sizeof(as_metrics_writer)); + as_metrics_writer* mw = (as_metrics_writer*)cf_malloc(sizeof(as_metrics_writer)); mw->file = NULL; - mw->enable = false; + mw->report_directory = NULL; mw->max_size = 0; mw->latency_columns = 0; mw->latency_shift = 0; - mw->report_directory = NULL; - - LOG("as_metrics_writer_init_udata end"); + mw->enable = false; +#ifdef _WIN32 + mw->pid = GetCurrentProcessId(); + mw->process = OpenProcess(PROCESS_QUERY_INFORMATION, false, mw->pid); + FILETIME dummy; + if (mw->process != NULL) + { + GetProcessTimes(mw->process, &dummy, &dummy, &mw->prev_process_times_kernel, &mw->prev_process_times_user); + GetSystemTimes(0, &mw->prev_system_times_kernel, &mw->prev_system_times_user); + } +#endif + return mw; } @@ -118,20 +118,21 @@ static inline char separator() static as_status as_metrics_open_writer(as_metrics_writer* mw, as_error* err) { - LOG("as_metrics_open_writer begin"); - const char* now_file = time_str_file(time(NULL)); + as_error_reset(err); + char now_file_str[128]; + timestamp_to_string_filename(now_file_str, sizeof(now_file_str)); + as_string_builder file_name; - as_string_builder_inita(&file_name, 100, true); + as_string_builder_inita(&file_name, 256, false); as_string_builder_append(&file_name, mw->report_directory); char last_char = mw->report_directory[(strlen(mw->report_directory) - 1)]; if (last_char != '/' && last_char != '\\') { as_string_builder_append_char(&file_name, separator()); } as_string_builder_append(&file_name, "metrics-"); - as_string_builder_append(&file_name, now_file); + as_string_builder_append(&file_name, now_file_str); as_string_builder_append(&file_name, ".log"); mw->file = fopen(file_name.data, "w"); - as_string_builder_destroy(&file_name); if (!mw->file) { return as_error_update(err, AEROSPIKE_ERR_CLIENT, @@ -139,28 +140,23 @@ as_metrics_open_writer(as_metrics_writer* mw, as_error* err) } mw->size = 0; - as_string_builder_inita(&mw->sb, 2048, true); - const char* now = time_str(time(NULL)); - as_string_builder_append(&mw->sb, now); - as_string_builder_append(&mw->sb, " header(1)"); - as_string_builder_append(&mw->sb, " cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]]"); - as_string_builder_append(&mw->sb, " eventloop[processSize,queueSize]"); - as_string_builder_append(&mw->sb, " node[name,address,port,syncConn,asyncConn,errors,timeouts,latency[]]"); - as_string_builder_append(&mw->sb, " conn[inUse,inPool,opened,closed]"); - as_string_builder_append(&mw->sb, " latency("); - as_string_builder_append_int(&mw->sb, mw->latency_columns); - as_string_builder_append_char(&mw->sb, ','); - as_string_builder_append_int(&mw->sb, mw->latency_shift); - as_string_builder_append_char(&mw->sb, ')'); - as_string_builder_append(&mw->sb, "[type[l1,l2,l3...]]"); - LOG("as_metrics_open_writer end"); - return as_metrics_write_line(mw, err); + char now_str[128]; + timestamp_to_string(now_str, sizeof(now_str)); + + char data[512]; + int rv = snprintf(data, sizeof(data), "%s header(1) cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]] eventloop[processSize,queueSize] node[name,address,port,syncConn,asyncConn,errors,timeouts,latency[]] conn[inUse,inPool,opened,closed] latency(%u,%u)[type[l1,l2,l3...]]\n", + now_str, mw->latency_columns, mw->latency_shift); + if (rv <= 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Failed to construct metrics header: %d,%s", rv, file_name.data); + } + + return as_metrics_write_line(mw, data, err); } static void as_metrics_get_node_sync_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* sync) { - LOG("as_metrics_get_node_sync_conn_stats begin"); uint32_t max = node->cluster->conn_pools_per_node; // Sync connection summary. @@ -177,13 +173,11 @@ as_metrics_get_node_sync_conn_stats(const struct as_node_s* node, struct as_conn } sync->opened = node->sync_conns_opened; sync->closed = node->sync_conns_closed; - LOG("as_metrics_get_node_sync_conn_stats end"); } static void as_metrics_get_node_async_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* async) { - LOG("as_metrics_get_node_async_conn_stats begin"); // Async connection summary. if (as_event_loop_capacity > 0) { for (uint32_t i = 0; i < as_event_loop_size; i++) { @@ -191,79 +185,319 @@ as_metrics_get_node_async_conn_stats(const struct as_node_s* node, struct as_con as_sum_no_lock(&node->async_conn_pools[i], async); } } - LOG("as_metrics_get_node_async_conn_stats end"); } static void -as_metrics_write_conn(as_metrics_writer* mw, const struct as_conn_stats_s* stats) +as_metrics_write_conn(as_metrics_writer* mw, as_string_builder* sb, const struct as_conn_stats_s* stats) { - LOG("as_metrics_write_conn begin"); - as_string_builder_append_uint(&mw->sb, stats->in_use); - as_string_builder_append_char(&mw->sb, ','); - as_string_builder_append_uint(&mw->sb, stats->in_pool); - as_string_builder_append_char(&mw->sb, ','); - as_string_builder_append_uint(&mw->sb, stats->opened); // Cumulative. Not reset on each interval. - as_string_builder_append_char(&mw->sb, ','); - as_string_builder_append_uint(&mw->sb, stats->closed); // Cumulative. Not reset on each interval. - LOG("as_metrics_write_conn end"); + as_string_builder_append_uint(sb, stats->in_use); + as_string_builder_append_char(sb, ','); + as_string_builder_append_uint(sb, stats->in_pool); + as_string_builder_append_char(sb, ','); + as_string_builder_append_uint(sb, stats->opened); // Cumulative. Not reset on each interval. + as_string_builder_append_char(sb, ','); + as_string_builder_append_uint(sb, stats->closed); // Cumulative. Not reset on each interval. } static void -as_metrics_write_node(as_metrics_writer* mw, struct as_node_s* node) +as_metrics_write_node(as_metrics_writer* mw, as_string_builder* sb, struct as_node_s* node) { - LOG("as_metrics_write_node begin"); - as_string_builder_append_char(&mw->sb, '['); - as_string_builder_append(&mw->sb, node->name); - as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_char(sb, '['); + as_string_builder_append(sb, node->name); + as_string_builder_append_char(sb, ','); - as_string_builder_append(&mw->sb, as_node_get_address_string(node)); - as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append(sb, as_node_get_address_string(node)); + as_string_builder_append_char(sb, ','); struct as_conn_stats_s sync; struct as_conn_stats_s async; as_sum_init(&sync); as_sum_init(&async); as_metrics_get_node_sync_conn_stats(node, &sync); - as_metrics_write_conn(mw, &sync); - as_string_builder_append_char(&mw->sb, ','); + as_metrics_write_conn(mw, sb, &sync); + as_string_builder_append_char(sb, ','); as_metrics_get_node_async_conn_stats(node, &async); - as_metrics_write_conn(mw, &async); - as_string_builder_append_char(&mw->sb, ','); + as_metrics_write_conn(mw, sb, &async); + as_string_builder_append_char(sb, ','); - as_string_builder_append_uint64(&mw->sb, as_node_get_error_count(node)); - as_string_builder_append_char(&mw->sb, ','); - as_string_builder_append_uint64(&mw->sb, as_node_get_timeout_count(node)); - as_string_builder_append(&mw->sb, ",["); + as_string_builder_append_uint64(sb, as_node_get_error_count(node)); + as_string_builder_append_char(sb, ','); + as_string_builder_append_uint64(sb, as_node_get_timeout_count(node)); + as_string_builder_append(sb, ",["); as_node_metrics* node_metrics = node->metrics; uint32_t max = AS_LATENCY_TYPE_NONE; for (uint32_t i = 0; i < max; i++) { if (i > 0) { - as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_char(sb, ','); } - as_string_builder_append(&mw->sb, as_latency_type_to_string(i)); - as_string_builder_append_char(&mw->sb, '['); + as_string_builder_append(sb, as_latency_type_to_string(i)); + as_string_builder_append_char(sb, '['); as_latency_buckets* buckets = &node_metrics->latency[i]; uint32_t bucket_max = buckets->latency_columns; for (uint32_t j = 0; j < bucket_max; j++) { if (j > 0) { - as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_char(sb, ','); } - as_string_builder_append_uint64(&mw->sb, as_metrics_get_bucket(buckets, j)); + as_string_builder_append_uint64(sb, as_metrics_get_bucket(buckets, j)); } - as_string_builder_append_char(&mw->sb, ']'); + as_string_builder_append_char(sb, ']'); + } + as_string_builder_append(sb, "]]"); +} + +#if defined(__linux__) + +#include +#include + +static as_status +as_metrics_proc_stat_mem_cpu(as_error* err, double* vm_usage, double* resident_set, double* cpu_usage) +{ + *vm_usage = 0.0; + *resident_set = 0.0; + + FILE* proc_stat = fopen("/proc/self/stat", "r"); + if (!proc_stat) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Error calculating memory and CPU usage"); + } + + // dummies + int dummy_d; + char dummy_c; + unsigned int dummy_u; + long unsigned int dummy_lu; + long int dummy_ld; + + // the fields we want + uint64_t utime, stime; + long long unsigned int starttime; + uint64_t vsize; + int64_t rss; + + int matched = fscanf(proc_stat, "%d %s %c %d %d %d %d %d %u %lu %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %llu %lu %ld", + &dummy_d, &dummy_c, &dummy_c, &dummy_d, &dummy_d, &dummy_d, &dummy_d, &dummy_d, &dummy_u, &dummy_lu, &dummy_lu, &dummy_lu, &dummy_lu, + &utime, &stime, &dummy_ld, &dummy_ld, &dummy_ld, &dummy_ld, &dummy_ld, &dummy_ld, &starttime, &vsize, &rss); + + if (matched == 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Error calculating memory and CPU usage"); + } + + int result = fclose(proc_stat); + + if (result != 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Error closing /proc/self/stat"); + } + + int64_t page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages + *vm_usage = vsize / 1024.0; + *resident_set = rss * page_size_kb; + + float u_time_sec = utime / sysconf(_SC_CLK_TCK); + float s_time_sec = stime / sysconf(_SC_CLK_TCK); + float start_time_sec = starttime / sysconf(_SC_CLK_TCK); + + struct sysinfo info; + int success = sysinfo(&info); + if (success != 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Error calculating CPU usage"); } - as_string_builder_append(&mw->sb, "]]"); - LOG("as_metrics_write_node end"); + + *cpu_usage = (u_time_sec + s_time_sec) / (info.uptime - start_time_sec) * 100; + + return AEROSPIKE_OK; } +static as_status +as_metrics_process_cpu_load_mem_usage(as_error* err, uint32_t* cpu_usage, uint32_t* mem) +{ + double resident_set = 0.0; + double mem_d = 0.0; + double cpu_usage_d = 0.0; + as_status result = as_metrics_proc_stat_mem_cpu(err, &mem_d, &resident_set, &cpu_usage_d); + if (result != AEROSPIKE_OK) { + return result; + } + + LOG("cpu %f", cpu_usage_d); + cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); + mem_d = mem_d + 0.5 - (mem_d < 0); + *cpu_usage = (uint32_t)cpu_usage_d; + *mem = (uint32_t)mem_d; + + return AEROSPIKE_OK; +} +#endif + +#if defined(_MSC_VER) +#include + +static ULONGLONG +as_metrics_filetime_difference(FILETIME* prev_kernel, FILETIME* prev_user, FILETIME* cur_kernel, FILETIME* cur_user) { + LARGE_INTEGER a1, a2; + a1.LowPart = prev_kernel->dwLowDateTime; + a1.HighPart = prev_kernel->dwHighDateTime; + a2.LowPart = prev_user->dwLowDateTime; + a2.HighPart = prev_user->dwHighDateTime; + + LARGE_INTEGER b1, b2; + b1.LowPart = cur_kernel->dwLowDateTime; + b1.HighPart = cur_kernel->dwHighDateTime; + b2.LowPart = cur_user->dwLowDateTime; + b2.HighPart = cur_user->dwHighDateTime; + + //a1 and b1 - contains kernel times + //a2 and b2 - contains user times + return (b1.QuadPart - a1.QuadPart) + (b2.QuadPart - a2.QuadPart); +} + +static double +as_metrics_process_cpu_load(as_metrics_writer* mw) +{ + if (mw->process == NULL) { + return -1; + } + + FILETIME dummy; + FILETIME process_times_kernel, process_times_user, system_times_kernel, system_times_user; + + if (GetProcessTimes(mw->process, &dummy, &dummy, &process_times_kernel, &process_times_user) == 0) { + return -1; + } + if (GetSystemTimes(0, &system_times_kernel, &system_times_user) == 0) { + return -1; + } + + // Get diffrence latest - previous times. + ULONGLONG proc = as_metrics_filetime_difference(&mw->prev_process_times_kernel, &mw->prev_process_times_user, + &process_times_kernel, &process_times_user); + ULONGLONG system = as_metrics_filetime_difference(&mw->prev_system_times_kernel, &mw->prev_system_times_user, + &system_times_kernel, &system_times_user); + double usage = 0.0; + + // Calcualte percentage. + if (system != 0) { + usage = 100.0 * (proc / (double)system); + } + + // Assign latest times to previous times for the next round of calculation. + mw->prev_process_times_kernel = process_times_kernel; + mw->prev_process_times_user = process_times_user; + mw->prev_system_times_kernel = system_times_kernel; + mw->prev_system_times_user = system_times_user; + + return usage; +} + +static uint32_t +as_metrics_process_mem_usage() +{ + PROCESS_MEMORY_COUNTERS memCounter; + BOOL result = GetProcessMemoryInfo(GetCurrentProcess(), + &memCounter, + sizeof(memCounter)); + + return (uint32_t)memCounter.WorkingSetSize; +} + +static as_status +as_metrics_process_cpu_load_mem_usage(as_error* err, as_metrics_writer* mw, uint32_t* cpu_usage, uint32_t* mem) +{ + double cpu_usage_d = as_metrics_process_cpu_load(mw); + if (cpu_usage_d < 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Error calculating CPU usage"); + } + cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); + *cpu_usage = (uint32_t)cpu_usage_d; + *mem = as_metrics_process_mem_usage(); + + return AEROSPIKE_OK; +} + +#endif + +#if defined(__APPLE__) +#include +#include +#include + +static double +as_metrics_process_mem_usage() +{ + struct task_basic_info t_info; + mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT; + + if (KERN_SUCCESS != task_info(mach_task_self(), + TASK_BASIC_INFO, (task_info_t)&t_info, + &t_info_count)) + { + return -1; + } + + return t_info.virtual_size; +} + +static double +as_metrics_process_cpu_load() +{ + pid_t pid = getpid(); + as_string_builder sb; + as_string_builder_inita(&sb, 128, false); + as_string_builder_append(&sb, "ps -p "); + as_string_builder_append_int(&sb, pid); + as_string_builder_append(&sb, " -o %CPU"); + FILE* file = popen(sb.data, "r"); + if (!file) { + return -1; + } + char[5] cpu_holder; + char[6] cpu_percent; + fgets(file, 4, cpu_holder); // %CPU placeholder + fgets(file, 5, cpu_percent); + int result = pclose(file); + if (result < 0) { + return -1; + } + + return atof(cpu_percent); +} + +static as_status +as_metrics_process_cpu_load_mem_usage(as_error* err, uint32_t* cpu_usage, uint32_t* mem) +{ + double cpu_usage_d = as_metrics_process_cpu_load(); + double mem_d = as_metrics_process_mem_usage(); + + if (cpu_usage_d < 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Error calculating CPU usage"); + } + + if (mem_d < 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Error calculating memory usage"); + } + + cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); + mem_d = mem_d + 0.5 - (mem_d < 0); + *cpu_usage = (uint32_t)cpu_usage_d; + *mem = (uint32_t)mem_d; + + return AEROSPIKE_OK; +} +#endif + static as_status as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster_s* cluster) { - LOG("as_metrics_write_cluster begin"); char* cluster_name = cluster->cluster_name; if (cluster_name == NULL) { @@ -272,37 +506,48 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster uint32_t cpu_load = 0; uint32_t mem = 0; - as_metrics_process_cpu_load_mem_usage(&cpu_load, &mem); - - as_string_builder_append(&mw->sb, time_str(time(NULL))); - as_string_builder_append(&mw->sb, " cluster["); - as_string_builder_append(&mw->sb, cluster_name); - as_string_builder_append_char(&mw->sb, ','); - as_string_builder_append_int(&mw->sb, cpu_load); - as_string_builder_append_char(&mw->sb, ','); - as_string_builder_append_int(&mw->sb, mem); - as_string_builder_append_char(&mw->sb, ','); - as_string_builder_append_uint(&mw->sb, cluster->invalid_node_count); // Cumulative. Not reset on each interval. - as_string_builder_append_char(&mw->sb, ','); - as_string_builder_append_uint64(&mw->sb, as_cluster_get_tran_count(cluster)); // Cumulative. Not reset on each interval. - as_string_builder_append_char(&mw->sb, ','); - as_string_builder_append_uint64(&mw->sb, cluster->retry_count); // Cumulative. Not reset on each interval. - as_string_builder_append_char(&mw->sb, ','); - as_string_builder_append_uint64(&mw->sb, cluster->delay_queue_timeout_count); // Cumulative. Not reset on each interval. - as_string_builder_append(&mw->sb, ",["); +#ifdef _WIN32 + as_status result = as_metrics_process_cpu_load_mem_usage(err, mw, &cpu_load, &mem); +#else + as_status result = as_metrics_process_cpu_load_mem_usage(err, &cpu_load, &mem); +#endif + if (result != AEROSPIKE_OK) { + return result; + } + + char now_str[128]; + timestamp_to_string(now_str, sizeof(now_str)); + as_string_builder sb; + as_string_builder_inita(&sb, 16384, true); + as_string_builder_append(&sb, now_str); + as_string_builder_append(&sb, " cluster["); + as_string_builder_append(&sb, cluster_name); + as_string_builder_append_char(&sb, ','); + as_string_builder_append_int(&sb, cpu_load); + as_string_builder_append_char(&sb, ','); + as_string_builder_append_int(&sb, mem); + as_string_builder_append_char(&sb, ','); + as_string_builder_append_uint(&sb, cluster->invalid_node_count); // Cumulative. Not reset on each interval. + as_string_builder_append_char(&sb, ','); + as_string_builder_append_uint64(&sb, as_cluster_get_tran_count(cluster)); // Cumulative. Not reset on each interval. + as_string_builder_append_char(&sb, ','); + as_string_builder_append_uint64(&sb, cluster->retry_count); // Cumulative. Not reset on each interval. + as_string_builder_append_char(&sb, ','); + as_string_builder_append_uint64(&sb, cluster->delay_queue_timeout_count); // Cumulative. Not reset on each interval. + as_string_builder_append(&sb, ",["); for (uint32_t i = 0; i < as_event_loop_size; i++) { as_event_loop* loop = &as_event_loops[i]; if (i > 0) { - as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_char(&sb, ','); } - as_string_builder_append_char(&mw->sb, '['); - as_string_builder_append_int(&mw->sb, as_event_loop_get_process_size(loop)); - as_string_builder_append_char(&mw->sb, ','); - as_string_builder_append_uint(&mw->sb, as_event_loop_get_queue_size(loop)); - as_string_builder_append_char(&mw->sb, ']'); + as_string_builder_append_char(&sb, '['); + as_string_builder_append_int(&sb, as_event_loop_get_process_size(loop)); + as_string_builder_append_char(&sb, ','); + as_string_builder_append_uint(&sb, as_event_loop_get_queue_size(loop)); + as_string_builder_append_char(&sb, ']'); } - as_string_builder_append(&mw->sb, "],["); + as_string_builder_append(&sb, "],["); as_nodes* nodes = as_nodes_reserve(cluster); for (uint32_t i = 0; i < nodes->size; i++) { @@ -310,24 +555,24 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster as_node_reserve(node); if (i > 0) { - as_string_builder_append_char(&mw->sb, ','); + as_string_builder_append_char(&sb, ','); } - as_metrics_write_node(mw, node); + as_metrics_write_node(mw, &sb, node); as_node_release(node); } - as_string_builder_append(&mw->sb, "]]"); + as_string_builder_append(&sb, "]]"); as_nodes_release(nodes); - LOG("as_metrics_write_cluster end"); - return as_metrics_write_line(mw, err); + as_string_builder_append_newline(&sb); + as_status status = as_metrics_write_line(mw, sb.data, err); + as_string_builder_destroy(&sb); + return status; } static void as_metrics_writer_destroy(as_metrics_writer* mw) { - LOG("destorying string builder"); - as_string_builder_destroy(&mw->sb); fclose(mw->file); cf_free(mw); } @@ -360,7 +605,7 @@ as_metrics_writer_destroy_nodes(as_cluster* cluster) static as_status as_metrics_writer_enable(as_error* err, const struct as_policy_metrics_s* policy, void* udata) { - LOG("as_metrics_writer_enable begin"); + as_error_reset(err); if (policy->report_size_limit != 0 && policy->report_size_limit < MIN_FILE_SIZE) { return as_error_update(err, AEROSPIKE_ERR_CLIENT, "Metrics policy report_size_limit %d must be at least %d", policy->report_size_limit, MIN_FILE_SIZE); @@ -380,14 +625,13 @@ as_metrics_writer_enable(as_error* err, const struct as_policy_metrics_s* policy } mw->enable = true; - LOG("as_metrics_writer_enable end"); return AEROSPIKE_OK; } static as_status as_metrics_writer_snapshot(as_error* err, struct as_cluster_s* cluster, void* udata) { - LOG("as_metrics_writer_snapshot begin"); + as_error_reset(err); as_metrics_writer* mw = udata; if (mw->enable && mw->file != NULL) { @@ -395,7 +639,6 @@ as_metrics_writer_snapshot(as_error* err, struct as_cluster_s* cluster, void* ud if (status != AEROSPIKE_OK) { as_metrics_writer_destroy_nodes(cluster); as_metrics_writer_destroy(mw); - LOG("as_metrics_writer_snapshot not ok"); return status; } uint32_t result = fflush(mw->file); @@ -406,15 +649,15 @@ as_metrics_writer_snapshot(as_error* err, struct as_cluster_s* cluster, void* ud "File stream did not flush successfully: %s", mw->report_directory); } } - LOG("as_metrics_writer_snapshot end"); + return AEROSPIKE_OK; } static as_status as_metrics_writer_disable(as_error* err, struct as_cluster_s* cluster, void* udata) { - LOG("as_metrics_writer_disable begin"); // write cluster into to file, disable + as_error_reset(err); as_metrics_writer* mw = udata; if (mw != NULL) { if (mw->enable && mw->file != NULL) { @@ -429,30 +672,35 @@ as_metrics_writer_disable(as_error* err, struct as_cluster_s* cluster, void* uda as_metrics_writer_destroy_nodes(cluster); as_metrics_writer_destroy(mw); } - LOG("as_metrics_writer_disable end"); + return AEROSPIKE_OK; } static as_status as_metrics_writer_node_close(as_error* err, struct as_node_s* node, void* udata) { - LOG("as_metrics_writer_node_close begin"); // write node info to file + as_error_reset(err); as_metrics_writer* mw = udata; if (mw->enable && mw->file != NULL) { - as_string_builder_append(&mw->sb, time_str(time(NULL))); + char now_str[128]; + timestamp_to_string(now_str, sizeof(now_str)); + as_string_builder sb; + as_string_builder_inita(&sb, 16384, true); + as_string_builder_append(&sb, now_str); as_node_reserve(node); - as_metrics_write_node(mw, node); - as_status status = as_metrics_write_line(mw, err); + as_metrics_write_node(mw, &sb, node); + as_string_builder_append_newline(&sb); + as_status status = as_metrics_write_line(mw, sb.data, err); + as_metrics_writer_destroy_node_metrics(node); as_node_release(node); + as_string_builder_destroy(&sb); - if (status != AEROSPIKE_OK) { - return status; - } + return status; } - LOG("as_metrics_writer_node_close end"); + return AEROSPIKE_OK; } @@ -475,7 +723,6 @@ aerospike_disable_metrics(aerospike* as, as_error* err) void as_metrics_policy_init(as_policy_metrics* policy) { - LOG("as_metrics_policy_init begin"); policy->report_size_limit = 0; policy->interval = 30; policy->latency_columns = 7; @@ -485,13 +732,11 @@ as_metrics_policy_init(as_policy_metrics* policy) policy->metrics_listeners.node_close_listener = as_metrics_writer_node_close; policy->metrics_listeners.snapshot_listener = as_metrics_writer_snapshot; policy->metrics_listeners.udata = as_metrics_writer_init_udata(); - LOG("as_metrics_policy_init end"); } char* as_latency_type_to_string(as_latency_type type) { - //LOG("as_latency_type_to_string begin"); switch (type) { case AS_LATENCY_TYPE_CONN: return "conn"; @@ -515,33 +760,28 @@ as_latency_type_to_string(as_latency_type type) return "none"; break; } - LOG("as_latency_type_to_string end"); } void as_metrics_latency_buckets_init(as_latency_buckets* latency_buckets, uint32_t latency_columns, uint32_t latency_shift) { - LOG("as_metrics_latency_buckets_init begin"); latency_buckets->latency_columns = latency_columns; latency_buckets->latency_shift = latency_shift; latency_buckets->buckets = cf_malloc(sizeof(uint64_t) * latency_columns); for (uint32_t i = 0; i < latency_columns; i++) { as_store_uint64(&latency_buckets->buckets[i], 0); } - LOG("as_metrics_latency_buckets_init end"); } uint64_t as_metrics_get_bucket(as_latency_buckets* buckets, uint32_t i) { - //LOG("as_metrics_get_bucket"); return as_load_uint64(&buckets->buckets[i]); } void as_metrics_latency_buckets_add(as_latency_buckets* latency_buckets, uint64_t elapsed) { - LOG("as_metrics_latency_buckets_add"); uint32_t index = as_metrics_get_index(latency_buckets, elapsed); as_incr_uint64(&latency_buckets->buckets[index]); } @@ -549,7 +789,6 @@ as_metrics_latency_buckets_add(as_latency_buckets* latency_buckets, uint64_t ela uint32_t as_metrics_get_index(as_latency_buckets* latency_buckets, uint64_t elapsed_nanos) { - LOG("as_metrics_get_index begin"); // Convert nanoseconds to milliseconds. uint64_t elapsed = elapsed_nanos / NS_TO_MS; @@ -567,210 +806,24 @@ as_metrics_get_index(as_latency_buckets* latency_buckets, uint64_t elapsed_nanos } limit <<= latency_buckets->latency_shift; } - LOG("as_metrics_get_index end"); return last_bucket; } as_node_metrics* as_node_metrics_init(uint32_t latency_columns, uint32_t latency_shift) { - LOG("as_node_metrics_init begin"); - as_node_metrics* node_metrics = (as_node_metrics *)cf_malloc(sizeof(as_node_metrics)); + as_node_metrics* node_metrics = (as_node_metrics*)cf_malloc(sizeof(as_node_metrics)); uint32_t max_latency_type = AS_LATENCY_TYPE_NONE; - node_metrics->latency = (as_latency_buckets *)cf_malloc(sizeof(as_latency_buckets) * max_latency_type); + node_metrics->latency = (as_latency_buckets*)cf_malloc(sizeof(as_latency_buckets) * max_latency_type); for (uint32_t i = 0; i < max_latency_type; i++) { as_metrics_latency_buckets_init(&node_metrics->latency[i], latency_columns, latency_shift); } - LOG("as_node_metrics_init end"); + return node_metrics; } void as_metrics_add_latency(as_node_metrics* node_metrics, as_latency_type latency_type, uint64_t elapsed) { - LOG("as_metrics_add_latency"); as_metrics_latency_buckets_add(&node_metrics->latency[latency_type], elapsed); } - - -#if defined(__linux__) -//#include -//#include -//#include -//#include -//#include - -void -as_metrics_process_cpu_load_mem_usage(uint32_t* cpu_usage, uint32_t* mem) -{ - /*double resident_set, mem_d, cpu_usage_d; - as_metrics_proc_stat_mem_cpu(&mem_d, &resident_set, &cpu_usage_d); - cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); - mem_d = mem_d + 0.5 - (mem_d < 0); - *cpu_usage = (uint32_t)cpu_usage_d; - *mem = (uint32_t)mem_d;*/ - *cpu_usage = 100; - *mem = 100; -} - -/*void -as_metrics_proc_stat_mem_cpu(double* vm_usage, double* resident_set, double* cpu_usage) -{ - using std::ios_base; - using std::ifstream; - using std::string; - - vm_usage = 0.0; - resident_set = 0.0; - - ifstream stat_stream("/proc/self/stat", ios_base::in); - - // dummy vars for leading entries in stat that we don't care about - string pid, comm, state, ppid, pgrp, session, tty_nr; - string tpgid, flags, minflt, cminflt, majflt, cmajflt; - string cutime, cstime, priority, nice; - string O, itrealvalue; - - // the fields we want - uint64_t utime, stime, starttime; - uint64_t vsize; - int64_t rss; - - stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr - >> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt - >> utime >> stime >> cutime >> cstime >> priority >> nice - >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest - - stat_stream.close(); - - int64_t page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages - *vm_usage = vsize / 1024.0; - *resident_set = rss * page_size_kb; - - uint64_t u_time_sec = utime / sysconf(_SC_CLK_TCK); - uint64_t s_time_sec = stime / sysconf(_SC_CLK_TCK); - uint64_t start_time_sec = starttime / sysconf(_SC_CLK_TCK); - - *cpu_usage = (u_time_sec + s_time_sec) / (cf_get_seconds() - start_time_sec); -}*/ -#endif - -#if defined(_MSC_VER) -#include -#include - -void -as_metrics_process_cpu_load_mem_usage(uint32_t* cpu_usage, uint32_t* mem) -{ - LOG("as_metrics_process_cpu_load_mem_usage"); - /*double cpu_usage_d = as_metrics_process_cpu_load(); - cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); - *cpu_usage = (uint32_t)cpu_usage_d; - *mem = as_metrics_process_mem_usage();*/ - *cpu_usage = 100; - *mem = 100; -} - -/*static double -as_metrics_calculate_cpu_load(uint64_t idleTicks, uint64_t totalTicks) -{ - LOG("as_metrics_calculate_cpu_load"); - static uint64_t _previousTotalTicks = 0; - static uint64_t _previousIdleTicks = 0; - - uint64_t totalTicksSinceLastTime = totalTicks - _previousTotalTicks; - uint64_t idleTicksSinceLastTime = idleTicks - _previousIdleTicks; - - double ret = 1.0f - ((totalTicksSinceLastTime > 0) ? ((double)idleTicksSinceLastTime) / totalTicksSinceLastTime : 0); - - _previousTotalTicks = totalTicks; - _previousIdleTicks = idleTicks; - return ret; -} - -static uint64_t -as_metrics_file_time_to_uint_64(const FILETIME ft) -{ - return (((uint64_t)(ft.dwHighDateTime)) << 32) | ((uint64_t)ft.dwLowDateTime); -} - -// Returns 1.0f for "CPU fully pinned", 0.0f for "CPU idle", or somewhere in between -// You'll need to call this at regular intervals, since it measures the load between -// the previous call and the current one. Returns -1.0 on error. -double -as_metrics_process_cpu_load() -{ - LOG("as_metrics_process_cpu_load"); - FILETIME idleTime, kernelTime, userTime; - return GetSystemTimes(&idleTime, &kernelTime, &userTime) ? - as_metrics_calculate_cpu_load(as_metrics_file_time_to_uint_64(idleTime), as_metrics_file_time_to_uint_64(kernelTime) + as_metrics_file_time_to_uint_64(userTime)) * 100: -1.0f; -} - -uint32_t -as_metrics_process_mem_usage() -{ - LOG("as_metrics_process_mem_usage"); - PROCESS_MEMORY_COUNTERS memCounter; - BOOL result = GetProcessMemoryInfo(GetCurrentProcess(), - &memCounter, - sizeof(memCounter)); - - return (uint32_t)memCounter.WorkingSetSize; -}*/ - -#endif - -#if defined(__APPLE__) -#include -#include -#include - -void -as_metrics_process_cpu_load_mem_usage(uint32_t* cpu_usage, uint32_t* mem) -{ - double cpu_usage_d = as_metrics_process_cpu_load(); - double mem_d = as_metrics_process_mem_usage(); - cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); - mem_d = mem_d + 0.5 - (mem_d < 0); - *cpu_usage = (uint32_t)cpu_usage_d; - *mem = (uint32_t)mem_d; -} - -double -as_metrics_process_mem_usage() -{ - struct task_basic_info t_info; - mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT; - - if (KERN_SUCCESS != task_info(mach_task_self(), - TASK_BASIC_INFO, (task_info_t)&t_info, - &t_info_count)) - { - return -1; - } - - return t_info.virtual_size; -} - -double -as_metrics_process_cpu_load() -{ - /* This code does not compile on mac. - pid_t pid = getpid(); - as_string_builder sb; - as_string_builder_inita(&sb, 20, true); - as_string_builder_append(&sb, "ps -p "); - as_string_builder_append_int(&sb, pid); - as_string_builder_append(&sb, " -o %CPU"); - FILE* result = popen(sb.data); - char[5] cpu_holder; - char[6] cpu_percent; - fgets(result, 4, cpu_holder); // %CPU placeholder - fgets(result, 5, cpu_percent); - pclose(result); - - return atof(cpu_percent); - */ - return 1.0; -} -#endif diff --git a/src/test/aerospike_test.c b/src/test/aerospike_test.c index d4dd4ed429..cdbf68331e 100644 --- a/src/test/aerospike_test.c +++ b/src/test/aerospike_test.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "test.h" #include "aerospike_test.h" @@ -409,6 +410,8 @@ static bool after(atf_plan* plan) return false; } + as_sleep(5*60000); + as_error err; as_error_reset(&err); From 5b1ee26a9d43ffff0f6358417d8526011ebaeaca Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Fri, 9 Feb 2024 16:45:52 -0500 Subject: [PATCH 24/64] Call new as_event_connection_complete() when connection is complete in all 3 async frameworks. --- src/include/aerospike/as_event_internal.h | 3 ++ src/main/aerospike/as_event.c | 6 ++++ src/main/aerospike/as_event_ev.c | 38 +++++++++++++---------- src/main/aerospike/as_event_event.c | 38 +++++++++++++---------- src/main/aerospike/as_event_uv.c | 6 +++- 5 files changed, 58 insertions(+), 33 deletions(-) diff --git a/src/include/aerospike/as_event_internal.h b/src/include/aerospike/as_event_internal.h index 1ef7ffc279..587007347a 100644 --- a/src/include/aerospike/as_event_internal.h +++ b/src/include/aerospike/as_event_internal.h @@ -189,6 +189,9 @@ as_event_command_execute(as_event_command* cmd, as_error* err); void as_event_command_schedule(as_event_command* cmd); +void +as_event_connection_complete(as_event_command* cmd); + bool as_event_proto_parse(as_event_command* cmd, as_proto* proto); diff --git a/src/main/aerospike/as_event.c b/src/main/aerospike/as_event.c index efada9b736..7d47d66241 100644 --- a/src/main/aerospike/as_event.c +++ b/src/main/aerospike/as_event.c @@ -580,6 +580,12 @@ as_event_create_connection(as_event_command* cmd, as_async_conn_pool* pool) as_event_connect(cmd, pool); } +void +as_event_connection_complete(as_event_command* cmd) +{ + // Put connect metrics here. +} + static void as_event_command_begin(as_event_loop* event_loop, as_event_command* cmd) { diff --git a/src/main/aerospike/as_event_ev.c b/src/main/aerospike/as_event_ev.c index b234306a30..aad3a7391d 100644 --- a/src/main/aerospike/as_event_ev.c +++ b/src/main/aerospike/as_event_ev.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2022 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. @@ -389,6 +389,21 @@ as_event_command_write_start(as_event_command* cmd) as_ev_command_write(cmd); } +static int +as_ev_command_start(as_event_command* cmd) +{ + as_event_connection_complete(cmd); + + if (cmd->type == AS_ASYNC_TYPE_CONNECTOR) { + as_event_connector_success(cmd); + return AS_EVENT_COMMAND_DONE; + } + else { + as_event_command_write_start(cmd); + return AS_EVENT_READ_COMPLETE; + } +} + static inline void as_ev_command_auth_write(as_event_command* cmd) { @@ -402,7 +417,7 @@ as_ev_command_auth_write(as_event_command* cmd) } static void -as_ev_command_start(as_event_command* cmd) +as_ev_connect_complete(as_event_command* cmd) { if (cmd->cluster->auth_enabled) { as_session* session = as_session_load(&cmd->node->session); @@ -416,14 +431,11 @@ as_ev_command_start(as_event_command* cmd) as_ev_command_auth_write(cmd); } else { - as_event_command_write_start(cmd); + as_ev_command_start(cmd); } } - else if (cmd->type == AS_ASYNC_TYPE_CONNECTOR) { - as_event_connector_success(cmd); - } else { - as_event_command_write_start(cmd); + as_ev_command_start(cmd); } } @@ -528,13 +540,7 @@ as_ev_parse_authentication(as_event_command* cmd) return AS_EVENT_READ_ERROR; } - if (cmd->type == AS_ASYNC_TYPE_CONNECTOR) { - as_event_connector_success(cmd); - return AS_EVENT_COMMAND_DONE; - } - - as_event_command_write_start(cmd); - return AS_EVENT_READ_COMPLETE; + return as_ev_command_start(cmd); } static int @@ -629,7 +635,7 @@ as_ev_tls_connect(as_event_command* cmd, as_event_connection* conn) } // TLS connection established. - as_ev_command_start(cmd); + as_ev_connect_complete(cmd); return false; } @@ -637,7 +643,7 @@ static void as_ev_callback_common(as_event_command* cmd, as_event_connection* conn) { switch (cmd->state) { case AS_ASYNC_STATE_CONNECT: - as_ev_command_start(cmd); + as_ev_connect_complete(cmd); break; case AS_ASYNC_STATE_TLS_CONNECT: diff --git a/src/main/aerospike/as_event_event.c b/src/main/aerospike/as_event_event.c index 96afc87ee0..40107ef948 100644 --- a/src/main/aerospike/as_event_event.c +++ b/src/main/aerospike/as_event_event.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2022 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. @@ -469,6 +469,21 @@ as_event_command_write_start(as_event_command* cmd) as_event_command_write(cmd); } +static int +as_event_command_start(as_event_command* cmd) +{ + as_event_connection_complete(cmd); + + if (cmd->type == AS_ASYNC_TYPE_CONNECTOR) { + as_event_connector_success(cmd); + return AS_EVENT_COMMAND_DONE; + } + else { + as_event_command_write_start(cmd); + return AS_EVENT_READ_COMPLETE; + } +} + static inline void as_event_command_auth_write(as_event_command* cmd) { @@ -482,7 +497,7 @@ as_event_command_auth_write(as_event_command* cmd) } static inline void -as_event_command_start(as_event_command* cmd) +as_event_connect_complete(as_event_command* cmd) { if (cmd->cluster->auth_enabled) { as_session* session = as_session_load(&cmd->node->session); @@ -496,14 +511,11 @@ as_event_command_start(as_event_command* cmd) as_event_command_auth_write(cmd); } else { - as_event_command_write_start(cmd); + as_event_command_start(cmd); } } - else if (cmd->type == AS_ASYNC_TYPE_CONNECTOR) { - as_event_connector_success(cmd); - } else { - as_event_command_write_start(cmd); + as_event_command_start(cmd); } } @@ -608,13 +620,7 @@ as_event_parse_authentication(as_event_command* cmd) return AS_EVENT_READ_ERROR; } - if (cmd->type == AS_ASYNC_TYPE_CONNECTOR) { - as_event_connector_success(cmd); - return AS_EVENT_COMMAND_DONE; - } - - as_event_command_write_start(cmd); - return AS_EVENT_READ_COMPLETE; + return as_event_command_start(cmd); } static int @@ -709,7 +715,7 @@ as_event_tls_connect(as_event_command* cmd, as_event_connection* conn) } // TLS connection established. - as_event_command_start(cmd); + as_event_connect_complete(cmd); return false; } @@ -717,7 +723,7 @@ static void as_event_callback_common(as_event_command* cmd, as_event_connection* conn) { switch (cmd->state) { case AS_ASYNC_STATE_CONNECT: - as_event_command_start(cmd); + as_event_connect_complete(cmd); break; case AS_ASYNC_STATE_TLS_CONNECT: diff --git a/src/main/aerospike/as_event_uv.c b/src/main/aerospike/as_event_uv.c index 4ceb44c90f..d15d485da9 100644 --- a/src/main/aerospike/as_event_uv.c +++ b/src/main/aerospike/as_event_uv.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2022 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. @@ -434,6 +434,8 @@ as_uv_command_write_start(as_event_command* cmd, uv_stream_t* stream) static inline void as_uv_command_start(as_event_command* cmd, uv_stream_t* stream) { + as_event_connection_complete(cmd); + if (cmd->type == AS_ASYNC_TYPE_CONNECTOR) { as_event_connector_success(cmd); return; @@ -935,6 +937,8 @@ as_uv_tls_command_write_start(as_event_command* cmd) static inline void as_uv_tls_command_start(as_event_command* cmd) { + as_event_connection_complete(cmd); + if (cmd->type == AS_ASYNC_TYPE_CONNECTOR) { as_event_connector_success(cmd); return; From b7bedf34b7782a267702e98aab8999eb81a25cd2 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Mon, 12 Feb 2024 15:06:04 -0700 Subject: [PATCH 25/64] Add connection metrics --- src/include/aerospike/as_event_internal.h | 1 + src/main/aerospike/as_event.c | 7 ++++++- src/main/aerospike/as_node.c | 6 ++++++ src/test/aerospike_test.c | 3 ++- 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/include/aerospike/as_event_internal.h b/src/include/aerospike/as_event_internal.h index 587007347a..dc206b37c0 100644 --- a/src/include/aerospike/as_event_internal.h +++ b/src/include/aerospike/as_event_internal.h @@ -140,6 +140,7 @@ typedef struct as_event_command { cf_ll_element pipe_link; uint8_t* buf; + uint64_t begin; // Used for metrics uint32_t command_sent_counter; uint32_t write_offset; uint32_t write_len; diff --git a/src/main/aerospike/as_event.c b/src/main/aerospike/as_event.c index 7d47d66241..b3e09f5183 100644 --- a/src/main/aerospike/as_event.c +++ b/src/main/aerospike/as_event.c @@ -577,13 +577,18 @@ as_event_create_connection(as_event_command* cmd, as_async_conn_pool* pool) conn->base.watching = 0; conn->cmd = cmd; cmd->conn = &conn->base; + cmd->begin = cf_getns(); as_event_connect(cmd, pool); } void as_event_connection_complete(as_event_command* cmd) { - // Put connect metrics here. + if (cmd->cluster->metrics_enabled) + { + uint64_t elapsed = cf_getns() - cmd->begin; + as_node_add_latency(cmd->node, AS_LATENCY_TYPE_CONN, elapsed); + } } static void diff --git a/src/main/aerospike/as_node.c b/src/main/aerospike/as_node.c index 400e6f2c97..6c8d360d63 100644 --- a/src/main/aerospike/as_node.c +++ b/src/main/aerospike/as_node.c @@ -481,7 +481,13 @@ as_node_create_connections(as_node* node, as_conn_pool* pool, uint32_t timeout_m // Create sync connections. while (count > 0) { uint64_t deadline_ms = as_socket_deadline(timeout_ms); + uint64_t begin = cf_getns(); status = as_node_create_connection(&err, node, 0, deadline_ms, pool, &sock); + if (node->cluster->metrics_enabled) + { + uint64_t elapsed = cf_getns() - begin; + as_node_add_latency(node, AS_LATENCY_TYPE_CONN, elapsed); + } if (status != AEROSPIKE_OK) { as_log_debug("Failed to create min connections: %d %s", err.code, err.message); diff --git a/src/test/aerospike_test.c b/src/test/aerospike_test.c index cdbf68331e..e5f2fb4017 100644 --- a/src/test/aerospike_test.c +++ b/src/test/aerospike_test.c @@ -385,6 +385,7 @@ static bool before(atf_plan* plan) as_policy_metrics policy; as_metrics_policy_init(&policy); policy.interval = 5; + policy.report_size_limit = 1000000; #ifdef _WIN32 policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; #else @@ -410,7 +411,7 @@ static bool after(atf_plan* plan) return false; } - as_sleep(5*60000); + //as_sleep(5*60000); as_error err; as_error_reset(&err); From 5339598888e5b1f8790ef8bc5df6946012128e86 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Tue, 13 Feb 2024 14:54:21 -0700 Subject: [PATCH 26/64] Work on async and connection metrics --- src/include/aerospike/as_async.h | 15 ++++++++++++++- src/include/aerospike/as_event_internal.h | 1 + src/main/aerospike/aerospike_batch.c | 6 ++++++ src/main/aerospike/aerospike_key.c | 17 +++++++++-------- src/main/aerospike/aerospike_query.c | 6 ++++++ src/main/aerospike/aerospike_scan.c | 3 +++ src/main/aerospike/as_event.c | 13 +++++++++++++ src/main/aerospike/as_metrics.c | 2 +- src/main/aerospike/as_node.c | 13 +++++++------ 9 files changed, 60 insertions(+), 16 deletions(-) diff --git a/src/include/aerospike/as_async.h b/src/include/aerospike/as_async.h index e4ca1f5551..04d2d6bb77 100644 --- a/src/include/aerospike/as_async.h +++ b/src/include/aerospike/as_async.h @@ -110,7 +110,10 @@ as_async_write_command_create( cmd->flags = 0; cmd->replica_size = pi->replica_size; cmd->replica_index = 0; + cmd->begin = 0; + cmd->latency_type = AS_LATENCY_TYPE_WRITE; wcmd->listener = listener; + as_cluster_add_tran(cluster); return cmd; } @@ -119,7 +122,8 @@ as_async_record_command_create( as_cluster* cluster, const as_policy_base* policy, as_partition_info* pi, as_policy_replica replica, uint8_t replica_index, bool deserialize, bool heap_rec, uint8_t flags, as_async_record_listener listener, void* udata, as_event_loop* event_loop, - as_pipe_listener pipe_listener, size_t size, as_event_parse_results_fn parse_results + as_pipe_listener pipe_listener, size_t size, as_event_parse_results_fn parse_results, + as_latency_type latency_type ) { // Allocate enough memory to cover: struct size + write buffer size + auth max buffer size @@ -158,7 +162,10 @@ as_async_record_command_create( cmd->replica_size = pi->replica_size; cmd->replica_index = replica_index; + cmd->begin = 0; + cmd->latency_type = latency_type; rcmd->listener = listener; + as_cluster_add_tran(cluster); return cmd; } @@ -197,7 +204,10 @@ as_async_value_command_create( cmd->flags = 0; cmd->replica_size = pi->replica_size; cmd->replica_index = 0; + cmd->begin = 0; + cmd->latency_type = AS_LATENCY_TYPE_WRITE; vcmd->listener = listener; + as_cluster_add_tran(cluster); return cmd; } @@ -233,7 +243,10 @@ as_async_info_command_create( cmd->flags = 0; cmd->replica_size = 1; cmd->replica_index = 0; + cmd->begin = 0; + cmd->latency_type = AS_LATENCY_TYPE_NONE; icmd->listener = listener; + as_cluster_add_tran(node->cluster); return cmd; } diff --git a/src/include/aerospike/as_event_internal.h b/src/include/aerospike/as_event_internal.h index dc206b37c0..40a5be7cf7 100644 --- a/src/include/aerospike/as_event_internal.h +++ b/src/include/aerospike/as_event_internal.h @@ -156,6 +156,7 @@ typedef struct as_event_command { uint8_t replica_size; uint8_t replica_index; uint8_t replica_index_sc; // Used in batch only. + as_latency_type latency_type; } as_event_command; typedef struct { diff --git a/src/main/aerospike/aerospike_batch.c b/src/main/aerospike/aerospike_batch.c index 6b91316e7a..7eaab265f8 100644 --- a/src/main/aerospike/aerospike_batch.c +++ b/src/main/aerospike/aerospike_batch.c @@ -2431,6 +2431,9 @@ as_batch_command_create( // cmd->replica_size = 1; cmd->replica_index = rep->replica_index; cmd->replica_index_sc = rep->replica_index_sc; + cmd->begin = 0; + cmd->latency_type = AS_LATENCY_TYPE_BATCH; + as_cluster_add_tran(cluster); return bc; } @@ -2951,6 +2954,9 @@ as_batch_retry_command_create( // cmd->replica_size = 1; cmd->replica_index = rep->replica_index; cmd->replica_index_sc = rep->replica_index_sc; + cmd->begin = 0; + cmd->latency_type = AS_LATENCY_TYPE_BATCH; + as_cluster_add_tran(node->cluster); return bc; } diff --git a/src/main/aerospike/aerospike_key.c b/src/main/aerospike/aerospike_key.c index 12366db5d9..d83d70c1f0 100644 --- a/src/main/aerospike/aerospike_key.c +++ b/src/main/aerospike/aerospike_key.c @@ -276,7 +276,7 @@ aerospike_key_get_async( as_event_command* cmd = as_async_record_command_create( cluster, &policy->base, &pi, ri.replica, ri.replica_index, policy->deserialize, policy->async_heap_rec, ri.flags, listener, udata, event_loop, pipe_listener, size, - as_event_command_parse_result); + as_event_command_parse_result, AS_LATENCY_TYPE_READ); uint32_t timeout = as_command_server_timeout(&policy->base); uint8_t* p = as_command_write_header_read(cmd->buf, &policy->base, policy->read_mode_ap, @@ -388,7 +388,7 @@ aerospike_key_select_async( as_event_command* cmd = as_async_record_command_create( cluster, &policy->base, &pi, ri.replica, ri.replica_index, policy->deserialize, policy->async_heap_rec, ri.flags, listener, udata, event_loop, pipe_listener, size, - as_event_command_parse_result); + as_event_command_parse_result, AS_LATENCY_TYPE_READ); uint32_t timeout = as_command_server_timeout(&policy->base); uint8_t* p = as_command_write_header_read(cmd->buf, &policy->base, policy->read_mode_ap, @@ -478,7 +478,8 @@ aerospike_key_exists_async( as_event_command* cmd = as_async_record_command_create( cluster, &policy->base, &pi, ri.replica, ri.replica_index, false, policy->async_heap_rec, - ri.flags, listener, udata, event_loop, pipe_listener, size, as_event_command_parse_result); + ri.flags, listener, udata, event_loop, pipe_listener, size, as_event_command_parse_result, + AS_LATENCY_TYPE_READ); uint8_t* p = as_command_write_header_read_header(cmd->buf, &policy->base, policy->read_mode_ap, policy->read_mode_sc, n_fields, 0, AS_MSG_INFO1_READ | AS_MSG_INFO1_GET_NOBINDATA); @@ -1039,13 +1040,13 @@ aerospike_key_operate_async( as_event_command* cmd; - if (! (policy->base.compress && oper.size > AS_COMPRESS_THRESHOLD)) { + if (! (policy->base.compress && oper.size > AS_COMPRESS_THRESHOLD)) { // Send uncompressed command. if (oper.write_attr & AS_MSG_INFO2_WRITE) { cmd = as_async_record_command_create( cluster, &policy->base, &pi, policy->replica, 0, policy->deserialize, policy->async_heap_rec, 0, listener, udata, event_loop, pipe_listener, oper.size, - as_event_command_parse_result); + as_event_command_parse_result, AS_LATENCY_TYPE_WRITE); } else { as_read_info ri; @@ -1054,7 +1055,7 @@ aerospike_key_operate_async( cmd = as_async_record_command_create( cluster, &policy->base, &pi, ri.replica, ri.replica_index, policy->deserialize, policy->async_heap_rec, ri.flags, listener, udata, event_loop, pipe_listener, - oper.size, as_event_command_parse_result); + oper.size, as_event_command_parse_result, AS_LATENCY_TYPE_READ); } cmd->write_len = (uint32_t)as_operate_write(&oper, cmd->buf); @@ -1073,7 +1074,7 @@ aerospike_key_operate_async( cmd = as_async_record_command_create( cluster, &policy->base, &pi, policy->replica, 0, policy->deserialize, policy->async_heap_rec, 0, listener, udata, event_loop, pipe_listener, comp_size, - as_event_command_parse_result); + as_event_command_parse_result, AS_LATENCY_TYPE_WRITE); } else { as_read_info ri; @@ -1082,7 +1083,7 @@ aerospike_key_operate_async( cmd = as_async_record_command_create( cluster, &policy->base, &pi, ri.replica, ri.replica_index, policy->deserialize, policy->async_heap_rec, ri.flags, listener, udata, event_loop, pipe_listener, - comp_size, as_event_command_parse_result); + comp_size, as_event_command_parse_result, AS_LATENCY_TYPE_READ); } // Compress buffer and execute. diff --git a/src/main/aerospike/aerospike_query.c b/src/main/aerospike/aerospike_query.c index ecdd3f1894..80b52e3e8a 100644 --- a/src/main/aerospike/aerospike_query.c +++ b/src/main/aerospike/aerospike_query.c @@ -1721,6 +1721,9 @@ as_query_partition_execute_async( cmd->flags = qe->deserialize ? AS_ASYNC_FLAGS_DESERIALIZE : 0; cmd->replica_size = 1; cmd->replica_index = 0; + cmd->begin = 0; + cmd->latency_type = AS_LATENCY_TYPE_QUERY; + as_cluster_add_tran(np->node->cluster); ee->commands[i] = cmd; } @@ -2253,6 +2256,9 @@ aerospike_query_async( cmd->flags = policy->deserialize ? AS_ASYNC_FLAGS_DESERIALIZE : 0; cmd->replica_size = 1; cmd->replica_index = 0; + cmd->begin = 0; + cmd->latency_type = AS_LATENCY_TYPE_QUERY; + as_cluster_add_tran(cluster); memcpy(cmd->buf, cmd_buf, size); exec->commands[i] = cmd; } diff --git a/src/main/aerospike/aerospike_scan.c b/src/main/aerospike/aerospike_scan.c index 55f827a4bc..7504df42a4 100644 --- a/src/main/aerospike/aerospike_scan.c +++ b/src/main/aerospike/aerospike_scan.c @@ -1093,6 +1093,9 @@ as_scan_partition_execute_async(as_async_scan_executor* se, as_partition_tracker cmd->flags = se->deserialize_list_map ? AS_ASYNC_FLAGS_DESERIALIZE : 0; cmd->replica_size = 1; cmd->replica_index = 0; + cmd->begin = 0; + cmd->latency_type = AS_LATENCY_TYPE_QUERY; + as_cluster_add_tran(np->node->cluster); ee->commands[i] = cmd; } diff --git a/src/main/aerospike/as_event.c b/src/main/aerospike/as_event.c index b3e09f5183..bb31a10f0e 100644 --- a/src/main/aerospike/as_event.c +++ b/src/main/aerospike/as_event.c @@ -595,6 +595,7 @@ static void as_event_command_begin(as_event_loop* event_loop, as_event_command* cmd) { cmd->state = AS_ASYNC_STATE_CONNECT; + cmd->begin = cf_getns(); if (cmd->partition) { // If in retry, need to release node from prior attempt. @@ -989,6 +990,12 @@ as_event_put_connection(as_event_command* cmd, as_async_conn_pool* pool) static inline void as_event_response_complete(as_event_command* cmd) { + if (cmd->cluster->metrics_enabled && cmd->latency_type != AS_LATENCY_TYPE_NONE) + { + uint64_t elapsed = cf_getns() - cmd->begin; + as_node_add_latency(cmd->node, cmd->latency_type, elapsed); + } + if (cmd->pipe_listener != NULL) { as_pipe_response_complete(cmd); return; @@ -1258,6 +1265,12 @@ as_event_socket_error(as_event_command* cmd, as_error* err) void as_event_response_error(as_event_command* cmd, as_error* err) { + if (cmd->cluster->metrics_enabled && cmd->latency_type != AS_LATENCY_TYPE_NONE) + { + uint64_t elapsed = cf_getns() - cmd->begin; + as_node_add_latency(cmd->node, cmd->latency_type, elapsed); + } + if (cmd->pipe_listener != NULL) { as_pipe_response_error(cmd, err); return; diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index e70b2043f9..00a877d01c 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -144,7 +144,7 @@ as_metrics_open_writer(as_metrics_writer* mw, as_error* err) timestamp_to_string(now_str, sizeof(now_str)); char data[512]; - int rv = snprintf(data, sizeof(data), "%s header(1) cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]] eventloop[processSize,queueSize] node[name,address,port,syncConn,asyncConn,errors,timeouts,latency[]] conn[inUse,inPool,opened,closed] latency(%u,%u)[type[l1,l2,l3...]]\n", + int rv = snprintf(data, sizeof(data), "%s header(1) cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]] eventloop[processSize,queueSize] node[name,address:port,syncConn,asyncConn,errors,timeouts,latency[]] conn[inUse,inPool,opened,closed] latency(%u,%u)[type[l1,l2,l3...]]\n", now_str, mw->latency_columns, mw->latency_shift); if (rv <= 0) { return as_error_update(err, AEROSPIKE_ERR_CLIENT, diff --git a/src/main/aerospike/as_node.c b/src/main/aerospike/as_node.c index 6c8d360d63..1bed414be8 100644 --- a/src/main/aerospike/as_node.c +++ b/src/main/aerospike/as_node.c @@ -443,6 +443,7 @@ as_node_create_connection( as_socket* sock ) { + uint64_t begin = cf_getns(); as_status status = as_node_create_socket(err, node, pool, sock, deadline_ms); if (status) { @@ -468,6 +469,12 @@ as_node_create_connection( } } } + + if (node->cluster->metrics_enabled) + { + uint64_t elapsed = cf_getns() - begin; + as_node_add_latency(node, AS_LATENCY_TYPE_CONN, elapsed); + } return AEROSPIKE_OK; } @@ -481,13 +488,7 @@ as_node_create_connections(as_node* node, as_conn_pool* pool, uint32_t timeout_m // Create sync connections. while (count > 0) { uint64_t deadline_ms = as_socket_deadline(timeout_ms); - uint64_t begin = cf_getns(); status = as_node_create_connection(&err, node, 0, deadline_ms, pool, &sock); - if (node->cluster->metrics_enabled) - { - uint64_t elapsed = cf_getns() - begin; - as_node_add_latency(node, AS_LATENCY_TYPE_CONN, elapsed); - } if (status != AEROSPIKE_OK) { as_log_debug("Failed to create min connections: %d %s", err.code, err.message); From 455a1e107076bc9a24e392e35f2f66b4ecdcec66 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Tue, 13 Feb 2024 14:56:49 -0700 Subject: [PATCH 27/64] Update copyright year --- src/include/aerospike/as_async.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/aerospike/as_async.h b/src/include/aerospike/as_async.h index 04d2d6bb77..9cbf79e918 100644 --- a/src/include/aerospike/as_async.h +++ b/src/include/aerospike/as_async.h @@ -1,5 +1,5 @@ /* - * Copyright 2008-2023 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. From 6632734fe4ad13303b508cbc6f0e9b2301b068f6 Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Tue, 13 Feb 2024 17:58:19 -0500 Subject: [PATCH 28/64] Rename as_sum_init() to as_conn_stats_init() and make inline. Rename as_sum_no_lock() to as_conn_stats_sum(). Fix compile error in as_metrics_process_cpu_load(). --- src/include/aerospike/aerospike_stats.h | 20 +++++--- src/main/aerospike/aerospike_stats.c | 61 +++++++++++-------------- src/main/aerospike/as_metrics.c | 33 ++++++++----- 3 files changed, 60 insertions(+), 54 deletions(-) diff --git a/src/include/aerospike/aerospike_stats.h b/src/include/aerospike/aerospike_stats.h index fcf21f166b..7001336e7c 100644 --- a/src/include/aerospike/aerospike_stats.h +++ b/src/include/aerospike/aerospike_stats.h @@ -94,7 +94,7 @@ typedef struct as_node_stats_s { uint64_t error_count; /** - * Transaction timeout count since node was initialized. If the timeout is retryable (ie socketTimeout), + * Transaction timeout count since node was initialized. If the timeout is retryable (ie socket timeout), * multiple timeouts per transaction may occur. */ uint64_t timeout_count; @@ -245,12 +245,6 @@ aerospike_event_loop_stats(as_event_loop* event_loop, as_event_loop_stats* stats stats->queue_size = as_event_loop_get_queue_size(event_loop); } -void -as_sum_init(as_conn_stats* stats); - -void -as_sum_no_lock(as_async_conn_pool* pool, as_conn_stats* stats); - /** * Return string representation of cluster statistics. * The string should be freed when it's no longer needed. @@ -260,6 +254,18 @@ as_sum_no_lock(as_async_conn_pool* pool, as_conn_stats* stats); AS_EXTERN char* aerospike_stats_to_string(as_cluster_stats* stats); +static inline void +as_conn_stats_init(as_conn_stats* stats) +{ + stats->in_pool = 0; + stats->in_use = 0; + stats->opened = 0; + stats->closed = 0; +} + +void +as_conn_stats_sum(as_conn_stats* stats, as_async_conn_pool* pool); + #ifdef __cplusplus } // end extern "C" #endif diff --git a/src/main/aerospike/aerospike_stats.c b/src/main/aerospike/aerospike_stats.c index 2e54c6ca50..9e634b224e 100644 --- a/src/main/aerospike/aerospike_stats.c +++ b/src/main/aerospike/aerospike_stats.c @@ -51,36 +51,6 @@ as_conn_stats_tostring(as_string_builder* sb, const char* title, as_conn_stats* * FUNCTIONS *****************************************************************************/ -void -as_sum_init(as_conn_stats* stats) -{ - stats->in_pool = 0; - stats->in_use = 0; - stats->opened = 0; - stats->closed = 0; -} - -void -as_sum_no_lock(as_async_conn_pool* pool, as_conn_stats* stats) -{ - // Warning: cross-thread reference without a lock. - int tmp = as_queue_size(&pool->queue); - - // Timing issues may cause values to go negative. Adjust. - if (tmp < 0) { - tmp = 0; - } - stats->in_pool += tmp; - tmp = pool->queue.total - tmp; - - if (tmp < 0) { - tmp = 0; - } - stats->in_use += tmp; - stats->opened += pool->opened; - stats->closed += pool->closed; -} - void aerospike_cluster_stats(as_cluster* cluster, as_cluster_stats* stats) { @@ -137,9 +107,9 @@ aerospike_node_stats(as_node* node, as_node_stats* stats) stats->error_count = as_node_get_error_count(node); stats->timeout_count = as_node_get_timeout_count(node); - as_sum_init(&stats->sync); - as_sum_init(&stats->async); - as_sum_init(&stats->pipeline); + as_conn_stats_init(&stats->sync); + as_conn_stats_init(&stats->async); + as_conn_stats_init(&stats->pipeline); uint32_t max = node->cluster->conn_pools_per_node; @@ -162,10 +132,10 @@ aerospike_node_stats(as_node* node, as_node_stats* stats) if (as_event_loop_capacity > 0) { for (uint32_t i = 0; i < as_event_loop_size; i++) { // Regular async. - as_sum_no_lock(&node->async_conn_pools[i], &stats->async); + as_conn_stats_sum(&stats->async, &node->async_conn_pools[i]); // Pipeline async. - as_sum_no_lock(&node->pipe_conn_pools[i], &stats->pipeline); + as_conn_stats_sum(&stats->pipeline, &node->pipe_conn_pools[i]); } } } @@ -209,3 +179,24 @@ aerospike_stats_to_string(as_cluster_stats* stats) } return sb.data; } + +void +as_conn_stats_sum(as_conn_stats* stats, as_async_conn_pool* pool) +{ + // Warning: cross-thread reference without a lock. + int tmp = as_queue_size(&pool->queue); + + // Timing issues may cause values to go negative. Adjust. + if (tmp < 0) { + tmp = 0; + } + stats->in_pool += tmp; + tmp = pool->queue.total - tmp; + + if (tmp < 0) { + tmp = 0; + } + stats->in_use += tmp; + stats->opened += pool->opened; + stats->closed += pool->closed; +} diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 00a877d01c..5322db5639 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -182,7 +182,7 @@ as_metrics_get_node_async_conn_stats(const struct as_node_s* node, struct as_con if (as_event_loop_capacity > 0) { for (uint32_t i = 0; i < as_event_loop_size; i++) { // Regular async. - as_sum_no_lock(&node->async_conn_pools[i], async); + as_conn_stats_sum(async, &node->async_conn_pools[i]); } } } @@ -211,8 +211,8 @@ as_metrics_write_node(as_metrics_writer* mw, as_string_builder* sb, struct as_no struct as_conn_stats_s sync; struct as_conn_stats_s async; - as_sum_init(&sync); - as_sum_init(&async); + as_conn_stats_init(&sync); + as_conn_stats_init(&async); as_metrics_get_node_sync_conn_stats(node, &sync); as_metrics_write_conn(mw, sb, &sync); as_string_builder_append_char(sb, ','); @@ -449,24 +449,33 @@ static double as_metrics_process_cpu_load() { pid_t pid = getpid(); + as_string_builder sb; as_string_builder_inita(&sb, 128, false); as_string_builder_append(&sb, "ps -p "); as_string_builder_append_int(&sb, pid); as_string_builder_append(&sb, " -o %CPU"); + FILE* file = popen(sb.data, "r"); + if (!file) { - return -1; + return -1.0; } - char[5] cpu_holder; - char[6] cpu_percent; - fgets(file, 4, cpu_holder); // %CPU placeholder - fgets(file, 5, cpu_percent); - int result = pclose(file); - if (result < 0) { - return -1; + + char cpu_holder[5]; + char cpu_percent[6]; + + if (!fgets(cpu_holder, sizeof(cpu_holder), file)) { + pclose(file); + return -1.0; } - + + if (!fgets(cpu_percent, sizeof(cpu_percent), file)) { + pclose(file); + return -1.0; + } + + pclose(file); return atof(cpu_percent); } From 8dd9bee4241f3ce897a13a137d15875741768f79 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Wed, 14 Feb 2024 09:44:11 -0700 Subject: [PATCH 29/64] Fix batch, scan, query transaction counts --- src/main/aerospike/aerospike_batch.c | 6 +++--- src/main/aerospike/aerospike_query.c | 7 +++---- src/main/aerospike/aerospike_scan.c | 5 +++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/main/aerospike/aerospike_batch.c b/src/main/aerospike/aerospike_batch.c index 7eaab265f8..bb40e85e4b 100644 --- a/src/main/aerospike/aerospike_batch.c +++ b/src/main/aerospike/aerospike_batch.c @@ -1617,7 +1617,6 @@ as_batch_command_init( cmd->partition_id = 0; // Not referenced when node set. cmd->replica = task->replica; cmd->latency_type = AS_LATENCY_TYPE_BATCH; - as_cluster_add_tran(task->cluster); // Note: Do not set flags to AS_COMMAND_FLAGS_LINEARIZE because AP and SC replicas // are tracked separately for batch (cmd->master and cmd->master_sc). @@ -2082,6 +2081,7 @@ as_batch_keys_execute( as_batch_base_record* rec, as_batch_attr* attr, as_batch_listener listener, void* udata ) { + as_cluster_add_tran(as->cluster); uint32_t n_keys = batch->keys.size; if (n_keys == 0) { @@ -2433,7 +2433,6 @@ as_batch_command_create( cmd->replica_index_sc = rep->replica_index_sc; cmd->begin = 0; cmd->latency_type = AS_LATENCY_TYPE_BATCH; - as_cluster_add_tran(cluster); return bc; } @@ -2558,6 +2557,7 @@ as_batch_records_execute( as_async_batch_executor* async_executor, bool has_write ) { + as_cluster_add_tran(as->cluster); as_vector* list = &records->list; uint32_t n_keys = records->list.size; @@ -2674,6 +2674,7 @@ as_batch_records_execute_async( as_async_batch_listener listener, void* udata, as_event_loop* event_loop, bool has_write ) { + as_cluster_add_tran(as->cluster); // Check for empty batch. if (records->list.size == 0) { listener(0, records, udata, event_loop); @@ -2956,7 +2957,6 @@ as_batch_retry_command_create( cmd->replica_index_sc = rep->replica_index_sc; cmd->begin = 0; cmd->latency_type = AS_LATENCY_TYPE_BATCH; - as_cluster_add_tran(node->cluster); return bc; } diff --git a/src/main/aerospike/aerospike_query.c b/src/main/aerospike/aerospike_query.c index 80b52e3e8a..fd1606c05e 100644 --- a/src/main/aerospike/aerospike_query.c +++ b/src/main/aerospike/aerospike_query.c @@ -1133,7 +1133,6 @@ as_query_command_execute_old(as_query_task* task) cmd.replica_size = 1; cmd.replica_index = 0; cmd.latency_type = AS_LATENCY_TYPE_QUERY; - as_cluster_add_tran(task->cluster); as_command_start_timer(&cmd); @@ -1236,7 +1235,6 @@ as_query_command_execute_new(as_query_task* task) cmd.replica_size = 1; cmd.replica_index = 0; cmd.latency_type = AS_LATENCY_TYPE_QUERY; - as_cluster_add_tran(task->cluster); as_command_start_timer(&cmd); @@ -1305,6 +1303,7 @@ as_query_worker_new(void* data) static as_status as_query_execute(as_query_task* task, const as_query* query, as_nodes* nodes) { + as_cluster_add_tran(task->cluster); as_status status = AEROSPIKE_OK; if (task->query_policy && task->query_policy->fail_on_cluster_change) { @@ -1472,6 +1471,7 @@ as_query_partitions( as_cluster* cluster, as_error* err, const as_policy_query* policy, const as_query* query, as_partition_tracker* pt, aerospike_query_foreach_callback callback, void* udata) { + as_cluster_add_tran(cluster); uint64_t parent_id = as_random_get_uint64(); as_status status = AEROSPIKE_OK; @@ -1723,7 +1723,6 @@ as_query_partition_execute_async( cmd->replica_index = 0; cmd->begin = 0; cmd->latency_type = AS_LATENCY_TYPE_QUERY; - as_cluster_add_tran(np->node->cluster); ee->commands[i] = cmd; } @@ -1762,6 +1761,7 @@ as_query_partition_async( as_event_loop* event_loop ) { + as_cluster_add_tran(cluster); pt->sleep_between_retries = 0; as_status status = as_partition_tracker_assign(pt, cluster, query->ns, err); @@ -2258,7 +2258,6 @@ aerospike_query_async( cmd->replica_index = 0; cmd->begin = 0; cmd->latency_type = AS_LATENCY_TYPE_QUERY; - as_cluster_add_tran(cluster); memcpy(cmd->buf, cmd_buf, size); exec->commands[i] = cmd; } diff --git a/src/main/aerospike/aerospike_scan.c b/src/main/aerospike/aerospike_scan.c index 7504df42a4..aebe37c0d6 100644 --- a/src/main/aerospike/aerospike_scan.c +++ b/src/main/aerospike/aerospike_scan.c @@ -688,7 +688,6 @@ as_scan_command_execute(as_scan_task* task) cmd.replica_size = 1; cmd.replica_index = 0; cmd.latency_type = AS_LATENCY_TYPE_QUERY; - as_cluster_add_tran(task->cluster); as_command_start_timer(&cmd); @@ -762,6 +761,7 @@ as_scan_generic( aerospike_scan_foreach_callback callback, void* udata, uint64_t* task_id_ptr ) { + as_cluster_add_tran(cluster); as_status status = as_scan_validate(err, policy, scan); if (status != AEROSPIKE_OK) { @@ -878,6 +878,7 @@ as_scan_partitions( as_cluster* cluster, as_error* err, const as_policy_scan* policy, const as_scan* scan, as_partition_tracker* pt, aerospike_scan_foreach_callback callback, void* udata) { + as_cluster_add_tran(cluster); uint64_t parent_id = as_random_get_uint64(); as_status status = AEROSPIKE_OK; @@ -1095,7 +1096,6 @@ as_scan_partition_execute_async(as_async_scan_executor* se, as_partition_tracker cmd->replica_index = 0; cmd->begin = 0; cmd->latency_type = AS_LATENCY_TYPE_QUERY; - as_cluster_add_tran(np->node->cluster); ee->commands[i] = cmd; } @@ -1179,6 +1179,7 @@ as_scan_partition_async( as_event_loop* event_loop ) { + as_cluster_add_tran(cluster); pt->sleep_between_retries = 0; as_status status = as_partition_tracker_assign(pt, cluster, scan->ns, err); From 26f8af5aad315cf2df9c459c0897819e8229c3c8 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Wed, 14 Feb 2024 10:25:40 -0700 Subject: [PATCH 30/64] Move assignment of cluster pointer --- src/main/aerospike/aerospike_batch.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/aerospike/aerospike_batch.c b/src/main/aerospike/aerospike_batch.c index bb40e85e4b..34910df3f4 100644 --- a/src/main/aerospike/aerospike_batch.c +++ b/src/main/aerospike/aerospike_batch.c @@ -2081,7 +2081,8 @@ as_batch_keys_execute( as_batch_base_record* rec, as_batch_attr* attr, as_batch_listener listener, void* udata ) { - as_cluster_add_tran(as->cluster); + as_cluster* cluster = as->cluster; + as_cluster_add_tran(cluster); uint32_t n_keys = batch->keys.size; if (n_keys == 0) { @@ -2091,7 +2092,6 @@ as_batch_keys_execute( return AEROSPIKE_OK; } - as_cluster* cluster = as->cluster; as_nodes* nodes = as_nodes_reserve(cluster); uint32_t n_nodes = nodes->size; as_nodes_release(nodes); @@ -2557,7 +2557,8 @@ as_batch_records_execute( as_async_batch_executor* async_executor, bool has_write ) { - as_cluster_add_tran(as->cluster); + as_cluster* cluster = as->cluster; + as_cluster_add_tran(cluster); as_vector* list = &records->list; uint32_t n_keys = records->list.size; @@ -2565,7 +2566,6 @@ as_batch_records_execute( return AEROSPIKE_OK; } - as_cluster* cluster = as->cluster; as_nodes* nodes = as_nodes_reserve(cluster); uint32_t n_nodes = nodes->size; as_nodes_release(nodes); From c9df71ecdd1644440dc34a1d22413079e33380a8 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Wed, 14 Feb 2024 11:20:58 -0700 Subject: [PATCH 31/64] Change where begin is calculated, remove connection from transaction latency calculation --- src/include/aerospike/as_async.h | 4 ---- src/main/aerospike/aerospike_batch.c | 2 -- src/main/aerospike/aerospike_query.c | 2 -- src/main/aerospike/aerospike_scan.c | 1 - src/main/aerospike/as_command.c | 11 +++++++---- src/main/aerospike/as_event.c | 14 ++++++++++---- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/include/aerospike/as_async.h b/src/include/aerospike/as_async.h index 9cbf79e918..2a1e2699c9 100644 --- a/src/include/aerospike/as_async.h +++ b/src/include/aerospike/as_async.h @@ -110,7 +110,6 @@ as_async_write_command_create( cmd->flags = 0; cmd->replica_size = pi->replica_size; cmd->replica_index = 0; - cmd->begin = 0; cmd->latency_type = AS_LATENCY_TYPE_WRITE; wcmd->listener = listener; as_cluster_add_tran(cluster); @@ -162,7 +161,6 @@ as_async_record_command_create( cmd->replica_size = pi->replica_size; cmd->replica_index = replica_index; - cmd->begin = 0; cmd->latency_type = latency_type; rcmd->listener = listener; as_cluster_add_tran(cluster); @@ -204,7 +202,6 @@ as_async_value_command_create( cmd->flags = 0; cmd->replica_size = pi->replica_size; cmd->replica_index = 0; - cmd->begin = 0; cmd->latency_type = AS_LATENCY_TYPE_WRITE; vcmd->listener = listener; as_cluster_add_tran(cluster); @@ -243,7 +240,6 @@ as_async_info_command_create( cmd->flags = 0; cmd->replica_size = 1; cmd->replica_index = 0; - cmd->begin = 0; cmd->latency_type = AS_LATENCY_TYPE_NONE; icmd->listener = listener; as_cluster_add_tran(node->cluster); diff --git a/src/main/aerospike/aerospike_batch.c b/src/main/aerospike/aerospike_batch.c index 34910df3f4..c62bb2fdad 100644 --- a/src/main/aerospike/aerospike_batch.c +++ b/src/main/aerospike/aerospike_batch.c @@ -2431,7 +2431,6 @@ as_batch_command_create( // cmd->replica_size = 1; cmd->replica_index = rep->replica_index; cmd->replica_index_sc = rep->replica_index_sc; - cmd->begin = 0; cmd->latency_type = AS_LATENCY_TYPE_BATCH; return bc; } @@ -2955,7 +2954,6 @@ as_batch_retry_command_create( // cmd->replica_size = 1; cmd->replica_index = rep->replica_index; cmd->replica_index_sc = rep->replica_index_sc; - cmd->begin = 0; cmd->latency_type = AS_LATENCY_TYPE_BATCH; return bc; } diff --git a/src/main/aerospike/aerospike_query.c b/src/main/aerospike/aerospike_query.c index fd1606c05e..efa91540c8 100644 --- a/src/main/aerospike/aerospike_query.c +++ b/src/main/aerospike/aerospike_query.c @@ -1721,7 +1721,6 @@ as_query_partition_execute_async( cmd->flags = qe->deserialize ? AS_ASYNC_FLAGS_DESERIALIZE : 0; cmd->replica_size = 1; cmd->replica_index = 0; - cmd->begin = 0; cmd->latency_type = AS_LATENCY_TYPE_QUERY; ee->commands[i] = cmd; } @@ -2256,7 +2255,6 @@ aerospike_query_async( cmd->flags = policy->deserialize ? AS_ASYNC_FLAGS_DESERIALIZE : 0; cmd->replica_size = 1; cmd->replica_index = 0; - cmd->begin = 0; cmd->latency_type = AS_LATENCY_TYPE_QUERY; memcpy(cmd->buf, cmd_buf, size); exec->commands[i] = cmd; diff --git a/src/main/aerospike/aerospike_scan.c b/src/main/aerospike/aerospike_scan.c index aebe37c0d6..a054e6c14e 100644 --- a/src/main/aerospike/aerospike_scan.c +++ b/src/main/aerospike/aerospike_scan.c @@ -1094,7 +1094,6 @@ as_scan_partition_execute_async(as_async_scan_executor* se, as_partition_tracker cmd->flags = se->deserialize_list_map ? AS_ASYNC_FLAGS_DESERIALIZE : 0; cmd->replica_size = 1; cmd->replica_index = 0; - cmd->begin = 0; cmd->latency_type = AS_LATENCY_TYPE_QUERY; ee->commands[i] = cmd; } diff --git a/src/main/aerospike/as_command.c b/src/main/aerospike/as_command.c index 34529028e8..97a7fb0b61 100644 --- a/src/main/aerospike/as_command.c +++ b/src/main/aerospike/as_command.c @@ -591,7 +591,6 @@ as_command_execute(as_command* cmd, as_error* err) as_status status; bool release_node; as_latency_type latency_type = cmd->cluster->metrics_enabled ? cmd->latency_type : AS_LATENCY_TYPE_NONE; - uint64_t begin = cf_getns(); // Execute command until successful, timed out or maximum iterations have been reached. while (true) { @@ -637,6 +636,10 @@ as_command_execute(as_command* cmd, as_error* err) goto Retry; } + uint64_t begin = 0; + if (latency_type != AS_LATENCY_TYPE_NONE) { + begin = cf_getns(); + } // Send command. status = as_socket_write_deadline(err, &socket, node, cmd->buf, cmd->buf_size, cmd->socket_timeout, cmd->deadline_ms); @@ -705,14 +708,14 @@ as_command_execute(as_command* cmd, as_error* err) } } - // Put connection back in pool. - as_node_put_connection(node, &socket); - if (latency_type != AS_LATENCY_TYPE_NONE) { uint64_t elapsed = cf_getns() - begin; as_node_add_latency(node, latency_type, elapsed); } + + // Put connection back in pool. + as_node_put_connection(node, &socket); // Release resources. if (release_node) { diff --git a/src/main/aerospike/as_event.c b/src/main/aerospike/as_event.c index bb31a10f0e..4ced942218 100644 --- a/src/main/aerospike/as_event.c +++ b/src/main/aerospike/as_event.c @@ -433,6 +433,8 @@ void as_event_command_execute_in_loop(as_event_loop* event_loop, as_event_command* cmd) { // Initialize read buffer (buf) to be located after write buffer. + cmd->begin = 0; + cmd->latency_type = cmd->cluster->metrics_enabled ? cmd->latency_type : AS_LATENCY_TYPE_NONE; cmd->write_offset = (uint32_t)(cmd->buf - (uint8_t*)cmd); cmd->buf += cmd->write_len; cmd->conn = NULL; @@ -577,7 +579,9 @@ as_event_create_connection(as_event_command* cmd, as_async_conn_pool* pool) conn->base.watching = 0; conn->cmd = cmd; cmd->conn = &conn->base; - cmd->begin = cf_getns(); + if (cmd->cluster->metrics_enabled) { + cmd->begin = cf_getns(); + } as_event_connect(cmd, pool); } @@ -595,7 +599,6 @@ static void as_event_command_begin(as_event_loop* event_loop, as_event_command* cmd) { cmd->state = AS_ASYNC_STATE_CONNECT; - cmd->begin = cf_getns(); if (cmd->partition) { // If in retry, need to release node from prior attempt. @@ -673,6 +676,9 @@ as_event_command_begin(as_event_loop* event_loop, as_event_command* cmd) // Create connection only when connection count within limit. if (as_async_conn_pool_incr_total(pool)) { + if (cmd->latency_type != AS_LATENCY_TYPE_NONE) { + cmd->begin = cf_getns(); + } as_event_create_connection(cmd, pool); return; } @@ -990,7 +996,7 @@ as_event_put_connection(as_event_command* cmd, as_async_conn_pool* pool) static inline void as_event_response_complete(as_event_command* cmd) { - if (cmd->cluster->metrics_enabled && cmd->latency_type != AS_LATENCY_TYPE_NONE) + if (cmd->latency_type != AS_LATENCY_TYPE_NONE) { uint64_t elapsed = cf_getns() - cmd->begin; as_node_add_latency(cmd->node, cmd->latency_type, elapsed); @@ -1265,7 +1271,7 @@ as_event_socket_error(as_event_command* cmd, as_error* err) void as_event_response_error(as_event_command* cmd, as_error* err) { - if (cmd->cluster->metrics_enabled && cmd->latency_type != AS_LATENCY_TYPE_NONE) + if (cmd->latency_type != AS_LATENCY_TYPE_NONE) { uint64_t elapsed = cf_getns() - cmd->begin; as_node_add_latency(cmd->node, cmd->latency_type, elapsed); From df40dd0fe83c83d85addff73deedd718eaa9fed3 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Wed, 14 Feb 2024 13:26:23 -0700 Subject: [PATCH 32/64] Add code for pipe connection metrics --- src/main/aerospike/as_pipe.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/aerospike/as_pipe.c b/src/main/aerospike/as_pipe.c index 9a42a8ff7a..09614a81e2 100644 --- a/src/main/aerospike/as_pipe.c +++ b/src/main/aerospike/as_pipe.c @@ -365,6 +365,9 @@ as_pipe_get_connection(as_event_command* cmd) as_log_trace("Creating new pipeline connection"); if (as_async_conn_pool_incr_total(pool)) { + if (cmd->cluster->metrics_enabled) { + cmd->begin = cf_getns(); + } conn = cf_malloc(sizeof(as_pipe_connection)); assert(conn != NULL); From abf2a605798c6add99e32d249e94ea5c1ccffaf7 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Wed, 14 Feb 2024 14:57:21 -0700 Subject: [PATCH 33/64] Use _MSC_VER instead of _WIN32 --- examples/async_examples/async_batch_get/src/main/example.c | 2 +- examples/basic_examples/append/src/main/example.c | 2 +- examples/batch_examples/get/src/main/example.c | 2 +- src/include/aerospike/as_metrics.h | 7 ++++--- src/main/aerospike/as_metrics.c | 6 +++--- src/test/aerospike_test.c | 2 +- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/examples/async_examples/async_batch_get/src/main/example.c b/examples/async_examples/async_batch_get/src/main/example.c index 0302747e1a..6a97df3d4e 100644 --- a/examples/async_examples/async_batch_get/src/main/example.c +++ b/examples/async_examples/async_batch_get/src/main/example.c @@ -83,7 +83,7 @@ main(int argc, char* argv[]) as_metrics_policy_init(&policy); policy.interval = 5; -#ifdef _WIN32 +#ifdef _MSC_VER policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; #else policy.report_directory = "/home/sklaus/metrics"; diff --git a/examples/basic_examples/append/src/main/example.c b/examples/basic_examples/append/src/main/example.c index 15f53c3cf5..39ba40b40e 100644 --- a/examples/basic_examples/append/src/main/example.c +++ b/examples/basic_examples/append/src/main/example.c @@ -61,7 +61,7 @@ main(int argc, char* argv[]) as_policy_metrics policy; as_metrics_policy_init(&policy); policy.interval = 5; -#ifdef _WIN32 +#ifdef _MSC_VER policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; #else policy.report_directory = "/home/sklaus/metrics"; diff --git a/examples/batch_examples/get/src/main/example.c b/examples/batch_examples/get/src/main/example.c index eed1267bd2..19276fde2c 100644 --- a/examples/batch_examples/get/src/main/example.c +++ b/examples/batch_examples/get/src/main/example.c @@ -77,7 +77,7 @@ main(int argc, char* argv[]) as_policy_metrics policy; as_metrics_policy_init(&policy); policy.interval = 5; -#ifdef _WIN32 +#ifdef _MSC_VER policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; #else policy.report_directory = "/home/sklaus/metrics"; diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index e9fd85c0c1..7080627b66 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -29,8 +29,9 @@ #include #endif -#ifdef _WIN32 -#include +#if defined(_MSC_VER) +#define WIN32_LEAN_AND_MEAN +#include #endif #ifdef __cplusplus @@ -127,7 +128,7 @@ typedef struct as_metrics_writer_s { uint64_t size; uint32_t latency_columns; uint32_t latency_shift; -#ifdef _WIN32 +#ifdef _MSC_VER FILETIME prev_process_times_kernel; FILETIME prev_system_times_kernel; FILETIME prev_process_times_user; diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 5322db5639..d7f33df061 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -92,7 +92,7 @@ as_metrics_writer_init_udata() mw->latency_columns = 0; mw->latency_shift = 0; mw->enable = false; -#ifdef _WIN32 +#ifdef _MSC_VER mw->pid = GetCurrentProcessId(); mw->process = OpenProcess(PROCESS_QUERY_INFORMATION, false, mw->pid); FILETIME dummy; @@ -108,7 +108,7 @@ as_metrics_writer_init_udata() static inline char separator() { -#ifdef _WIN32 +#ifdef _MSC_VER return '\\'; #else return '/'; @@ -515,7 +515,7 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster uint32_t cpu_load = 0; uint32_t mem = 0; -#ifdef _WIN32 +#ifdef _MSC_VER as_status result = as_metrics_process_cpu_load_mem_usage(err, mw, &cpu_load, &mem); #else as_status result = as_metrics_process_cpu_load_mem_usage(err, &cpu_load, &mem); diff --git a/src/test/aerospike_test.c b/src/test/aerospike_test.c index e5f2fb4017..d38fab9c01 100644 --- a/src/test/aerospike_test.c +++ b/src/test/aerospike_test.c @@ -386,7 +386,7 @@ static bool before(atf_plan* plan) as_metrics_policy_init(&policy); policy.interval = 5; policy.report_size_limit = 1000000; -#ifdef _WIN32 +#ifdef _MSC_VER policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; #else policy.report_directory = "/home/sklaus/metrics"; From bdb9bcc1d45bd3c70c6564fc103e8a676e119d93 Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Thu, 15 Feb 2024 11:14:29 -0500 Subject: [PATCH 34/64] remove unnecessary includes. --- src/include/aerospike/as_metrics.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index 7080627b66..d8e038928f 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -24,11 +24,6 @@ #include #include -#if !defined(_MSC_VER) -#include -#include -#endif - #if defined(_MSC_VER) #define WIN32_LEAN_AND_MEAN #include From 37ba6d37e9dbf059b093fd97761591da66bd2b6b Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Thu, 15 Feb 2024 12:15:35 -0500 Subject: [PATCH 35/64] Document metrics policy fields. --- src/include/aerospike/as_metrics.h | 94 +++++++++++++++++++++++++----- 1 file changed, 80 insertions(+), 14 deletions(-) diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index d8e038928f..82f6f29842 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -14,7 +14,6 @@ * License for the specific language governing permissions and limitations under * the License. */ - #pragma once #include @@ -68,7 +67,7 @@ struct as_node_s; struct as_cluster_s; /** - * Callbacks for metrics listener operations + * Callbacks for metrics listener operations. */ typedef as_status(*as_metrics_enable_listener)(as_error* err, const struct as_policy_metrics_s* policy, void* udata); @@ -79,31 +78,98 @@ typedef as_status(*as_metrics_node_close_listener)(as_error* err, struct as_node typedef as_status(*as_metrics_disable_listener)(as_error* err, struct as_cluster_s* cluster, void* udata); /** - * Struct to hold required callbacks + * Metrics listener callbacks. */ typedef struct as_metrics_listeners_s { + /** + * Periodic extended metrics has been enabled for the given cluster. + */ as_metrics_enable_listener enable_listener; + + /** + * A metrics snapshot has been requested for the given cluster. + */ as_metrics_snapshot_listener snapshot_listener; + + /** + * A node is being dropped from the cluster. + */ as_metrics_node_close_listener node_close_listener; + + /** + * Periodic extended metrics has been disabled for the given cluster. + */ as_metrics_disable_listener disable_listener; + + /** + * User defined data. + */ void* udata; } as_metrics_listeners; /** -* Metrics Policy -*/ + * Client periodic metrics configuration. + */ typedef struct as_policy_metrics_s { - const char* report_directory; // where the metrics file is output - - uint64_t report_size_limit; // default 0 - - uint32_t interval; // default 30 - - uint32_t latency_columns; // default 7 + /** + * Listeners that handles metrics notification events. The default listener implementation + * writes the metrics snapshot to a file which will later be read and forwarded to + * OpenTelemetry by a separate offline application. + *

+ * The listener could be overridden to send the metrics snapshot directly to OpenTelemetry. + */ + as_metrics_listeners metrics_listeners; + + /** + * Directory path to write metrics log files for listeners that write logs. + * + * Default: + */ + const char* report_directory; - uint32_t latency_shift; // default 1 + /** + * Metrics file size soft limit in bytes for listeners that write logs. + * + * When report_size_limit is reached or exceeded, the current metrics file is closed and a new + * metrics file is created with a new timestamp. If report_size_limit is zero, the metrics file + * size is unbounded and the file will only be closed when aerospike_disable_metrics() or + * aerospike_close() is called. + * + * Default: 0 + */ + uint64_t report_size_limit; + + /** + * Number of cluster tend iterations between metrics notification events. One tend iteration + * is defined as as_config.tender_interval (default 1 second) plus the time to tend all + * nodes. + * + * Default: 30 + */ + uint32_t interval; + + /** + * Number of elapsed time range buckets in latency histograms. + * + * Default: 7 + */ + uint32_t latency_columns; - as_metrics_listeners metrics_listeners; + /** + * Power of 2 multiple between each range bucket in latency histograms starting at column 3. The bucket units + * are in milliseconds. The first 2 buckets are "<=1ms" and ">1ms". Examples: + * + * ~~~~~~~~~~{.c} + * // latencyColumns=7 latencyShift=1 + * <=1ms >1ms >2ms >4ms >8ms >16ms >32ms + * + * // latencyColumns=5 latencyShift=3 + * <=1ms >1ms >8ms >64ms >512ms + * ~~~~~~~~~~ + * + * Default: 1 + */ + uint32_t latency_shift; } as_policy_metrics; /** From b920f375c4a3e15a6c1400a0325772e5ed8dbd30 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Thu, 15 Feb 2024 11:41:36 -0700 Subject: [PATCH 36/64] Pass in report directory to metrics init, change from pointer to array --- .../async_batch_get/src/main/example.c | 11 ++++++----- .../basic_examples/append/src/main/example.c | 8 ++++---- examples/batch_examples/get/src/main/example.c | 8 ++++---- src/include/aerospike/as_metrics.h | 6 +++--- src/main/aerospike/as_metrics.c | 18 +++++++++--------- src/test/aerospike_test.c | 11 ++++++----- 6 files changed, 32 insertions(+), 30 deletions(-) diff --git a/examples/async_examples/async_batch_get/src/main/example.c b/examples/async_examples/async_batch_get/src/main/example.c index 6a97df3d4e..0b3c3372ab 100644 --- a/examples/async_examples/async_batch_get/src/main/example.c +++ b/examples/async_examples/async_batch_get/src/main/example.c @@ -80,14 +80,15 @@ main(int argc, char* argv[]) as_error err; as_error_reset(&err); as_policy_metrics policy; - as_metrics_policy_init(&policy); - policy.interval = 5; - #ifdef _MSC_VER - policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; + char report_dir[] = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; #else - policy.report_directory = "/home/sklaus/metrics"; + char report_dir[] = "/home/sklaus/metrics"; #endif + as_metrics_policy_init(&policy, report_dir); + policy.interval = 5; + + // enable metrics as_status status = aerospike_enable_metrics(&as, &err, &policy); diff --git a/examples/basic_examples/append/src/main/example.c b/examples/basic_examples/append/src/main/example.c index 39ba40b40e..013dc843ef 100644 --- a/examples/basic_examples/append/src/main/example.c +++ b/examples/basic_examples/append/src/main/example.c @@ -59,13 +59,13 @@ main(int argc, char* argv[]) as_error err; as_error_reset(&err); as_policy_metrics policy; - as_metrics_policy_init(&policy); - policy.interval = 5; #ifdef _MSC_VER - policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; + char report_dir[] = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; #else - policy.report_directory = "/home/sklaus/metrics"; + char report_dir[] = "/home/sklaus/metrics"; #endif + as_metrics_policy_init(&policy, report_dir); + policy.interval = 5; // enable metrics as_status status = aerospike_enable_metrics(&as, &err, &policy); diff --git a/examples/batch_examples/get/src/main/example.c b/examples/batch_examples/get/src/main/example.c index 19276fde2c..1be1e162b4 100644 --- a/examples/batch_examples/get/src/main/example.c +++ b/examples/batch_examples/get/src/main/example.c @@ -75,13 +75,13 @@ main(int argc, char* argv[]) as_error err; as_error_reset(&err); as_policy_metrics policy; - as_metrics_policy_init(&policy); - policy.interval = 5; #ifdef _MSC_VER - policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; + char report_dir[] = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; #else - policy.report_directory = "/home/sklaus/metrics"; + char report_dir[] = "/home/sklaus/metrics"; #endif + as_metrics_policy_init(&policy, report_dir); + policy.interval = 5; // enable metrics as_status status = aerospike_enable_metrics(&as, &err, &policy); diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index 82f6f29842..174ad5fae0 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -125,7 +125,7 @@ typedef struct as_policy_metrics_s { * * Default: */ - const char* report_directory; + char report_dir[256]; /** * Metrics file size soft limit in bytes for listeners that write logs. @@ -184,7 +184,7 @@ typedef struct as_node_metrics_s { */ typedef struct as_metrics_writer_s { FILE* file; - const char* report_directory; + char report_dir[256]; uint64_t max_size; uint64_t size; uint32_t latency_columns; @@ -204,7 +204,7 @@ typedef struct as_metrics_writer_s { * Initalize metrics policy */ AS_EXTERN void -as_metrics_policy_init(as_policy_metrics* policy); +as_metrics_policy_init(as_policy_metrics* policy, const char* report_dir); /** * Enable extended periodic cluster and node latency metrics. diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index d7f33df061..48516a46f9 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -65,7 +65,7 @@ as_metrics_write_line(as_metrics_writer* mw, const char* data, as_error* err) int written = fprintf(mw->file, "%s", data); if (written <= 0) { return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "Failed to write metrics data: %d,%s", written, mw->report_directory); + "Failed to write metrics data: %d,%s", written, mw->report_dir); } mw->size += written; @@ -74,7 +74,7 @@ as_metrics_write_line(as_metrics_writer* mw, const char* data, as_error* err) if (result != 0) { return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "File stream did not close successfully: %s", mw->report_directory); + "File stream did not close successfully: %s", mw->report_dir); } return as_metrics_open_writer(mw, err); } @@ -87,7 +87,6 @@ as_metrics_writer_init_udata() { as_metrics_writer* mw = (as_metrics_writer*)cf_malloc(sizeof(as_metrics_writer)); mw->file = NULL; - mw->report_directory = NULL; mw->max_size = 0; mw->latency_columns = 0; mw->latency_shift = 0; @@ -124,8 +123,8 @@ as_metrics_open_writer(as_metrics_writer* mw, as_error* err) as_string_builder file_name; as_string_builder_inita(&file_name, 256, false); - as_string_builder_append(&file_name, mw->report_directory); - char last_char = mw->report_directory[(strlen(mw->report_directory) - 1)]; + as_string_builder_append(&file_name, mw->report_dir); + char last_char = mw->report_dir[(strlen(mw->report_dir) - 1)]; if (last_char != '/' && last_char != '\\') { as_string_builder_append_char(&file_name, separator()); } @@ -136,7 +135,7 @@ as_metrics_open_writer(as_metrics_writer* mw, as_error* err) if (!mw->file) { return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "Failed to open file: %s", mw->report_directory); + "Failed to open file: %s", mw->report_dir); } mw->size = 0; @@ -625,7 +624,7 @@ as_metrics_writer_enable(as_error* err, const struct as_policy_metrics_s* policy mw->max_size = policy->report_size_limit; mw->latency_columns = policy->latency_columns; mw->latency_shift = policy->latency_shift; - mw->report_directory = policy->report_directory; + as_strncpy(mw->report_dir, policy->report_dir, sizeof(mw->report_dir)); as_status status = as_metrics_open_writer(mw, err); @@ -655,7 +654,7 @@ as_metrics_writer_snapshot(as_error* err, struct as_cluster_s* cluster, void* ud as_metrics_writer_destroy_nodes(cluster); as_metrics_writer_destroy(mw); return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "File stream did not flush successfully: %s", mw->report_directory); + "File stream did not flush successfully: %s", mw->report_dir); } } @@ -730,9 +729,10 @@ aerospike_disable_metrics(aerospike* as, as_error* err) } void -as_metrics_policy_init(as_policy_metrics* policy) +as_metrics_policy_init(as_policy_metrics* policy, const char* report_dir) { policy->report_size_limit = 0; + as_strncpy(policy->report_dir, report_dir, sizeof(policy->report_dir)); policy->interval = 30; policy->latency_columns = 7; policy->latency_shift = 1; diff --git a/src/test/aerospike_test.c b/src/test/aerospike_test.c index d38fab9c01..215ccd8ff0 100644 --- a/src/test/aerospike_test.c +++ b/src/test/aerospike_test.c @@ -383,14 +383,15 @@ static bool before(atf_plan* plan) } as_policy_metrics policy; - as_metrics_policy_init(&policy); - policy.interval = 5; - policy.report_size_limit = 1000000; #ifdef _MSC_VER - policy.report_directory = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; + char report_dir[] = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; #else - policy.report_directory = "/home/sklaus/metrics"; + char report_dir[] = "/home/sklaus/metrics"; #endif + as_metrics_policy_init(&policy, &report_dir); + policy.interval = 5; + policy.report_size_limit = 1000000; + // enable metrics as_status status = aerospike_enable_metrics(as, &err, &policy); From 2306b3fd994bfae79bca7bda3db5331cc45cad0d Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Thu, 15 Feb 2024 14:06:07 -0500 Subject: [PATCH 37/64] Add as_metrics_policy_set_report_dir(). Rename as_policy_metrics to as_metrics_policy. --- .../async_batch_get/src/main/example.c | 5 ++- .../basic_examples/append/src/main/example.c | 5 ++- .../batch_examples/get/src/main/example.c | 5 ++- src/include/aerospike/as_cluster.h | 2 +- src/include/aerospike/as_metrics.h | 42 ++++++++++++------- src/include/aerospike/as_node.h | 2 +- src/main/aerospike/as_cluster.c | 2 +- src/main/aerospike/as_metrics.c | 4 +- src/main/aerospike/as_node.c | 2 +- src/test/aerospike_test.c | 5 ++- 10 files changed, 46 insertions(+), 28 deletions(-) diff --git a/examples/async_examples/async_batch_get/src/main/example.c b/examples/async_examples/async_batch_get/src/main/example.c index 0b3c3372ab..ecb675ff6b 100644 --- a/examples/async_examples/async_batch_get/src/main/example.c +++ b/examples/async_examples/async_batch_get/src/main/example.c @@ -79,13 +79,14 @@ main(int argc, char* argv[]) as_error err; as_error_reset(&err); - as_policy_metrics policy; + as_metrics_policy policy; #ifdef _MSC_VER char report_dir[] = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; #else char report_dir[] = "/home/sklaus/metrics"; #endif - as_metrics_policy_init(&policy, report_dir); + as_metrics_policy_init(&policy); + as_metrics_policy_set_report_dir(&policy, report_dir); policy.interval = 5; diff --git a/examples/basic_examples/append/src/main/example.c b/examples/basic_examples/append/src/main/example.c index 013dc843ef..d40c463fd1 100644 --- a/examples/basic_examples/append/src/main/example.c +++ b/examples/basic_examples/append/src/main/example.c @@ -58,13 +58,14 @@ main(int argc, char* argv[]) as_error err; as_error_reset(&err); - as_policy_metrics policy; + as_metrics_policy policy; #ifdef _MSC_VER char report_dir[] = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; #else char report_dir[] = "/home/sklaus/metrics"; #endif - as_metrics_policy_init(&policy, report_dir); + as_metrics_policy_init(&policy); + as_metrics_policy_set_report_dir(&policy, report_dir); policy.interval = 5; // enable metrics diff --git a/examples/batch_examples/get/src/main/example.c b/examples/batch_examples/get/src/main/example.c index 1be1e162b4..9113248a0d 100644 --- a/examples/batch_examples/get/src/main/example.c +++ b/examples/batch_examples/get/src/main/example.c @@ -74,13 +74,14 @@ main(int argc, char* argv[]) as_error err; as_error_reset(&err); - as_policy_metrics policy; + as_metrics_policy policy; #ifdef _MSC_VER char report_dir[] = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; #else char report_dir[] = "/home/sklaus/metrics"; #endif - as_metrics_policy_init(&policy, report_dir); + as_metrics_policy_init(&policy); + as_metrics_policy_set_report_dir(&policy, report_dir); policy.interval = 5; // enable metrics diff --git a/src/include/aerospike/as_cluster.h b/src/include/aerospike/as_cluster.h index 54469845c7..c66693b7dc 100644 --- a/src/include/aerospike/as_cluster.h +++ b/src/include/aerospike/as_cluster.h @@ -541,7 +541,7 @@ as_partition_shm_get_node( * Enable the collection of metrics */ as_status -as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_policy_metrics* policy); +as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_metrics_policy* policy); /** * @private diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index 174ad5fae0..6e689ad020 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #if defined(_MSC_VER) @@ -52,16 +53,6 @@ typedef uint8_t as_latency_type; // Types //--------------------------------- -/** - * Latency buckets for a transaction group. - * Latency bucket counts are cumulative and not reset on each metrics snapshot interval - */ -typedef struct as_latency_buckets_s { - uint64_t* buckets; - uint32_t latency_shift; - uint32_t latency_columns; -} as_latency_buckets; - struct as_policy_metrics_s; struct as_node_s; struct as_cluster_s; @@ -170,7 +161,17 @@ typedef struct as_policy_metrics_s { * Default: 1 */ uint32_t latency_shift; -} as_policy_metrics; +} as_metrics_policy; + +/** + * Latency buckets for a transaction group. + * Latency bucket counts are cumulative and not reset on each metrics snapshot interval + */ +typedef struct as_latency_buckets_s { + uint64_t* buckets; + uint32_t latency_shift; + uint32_t latency_columns; +} as_latency_buckets; /** * Node metrics latency bucket struct @@ -200,17 +201,30 @@ typedef struct as_metrics_writer_s { bool enable; } as_metrics_writer; +//--------------------------------- +// Functions +//--------------------------------- + /** * Initalize metrics policy */ AS_EXTERN void -as_metrics_policy_init(as_policy_metrics* policy, const char* report_dir); +as_metrics_policy_init(as_metrics_policy* policy); + +/** + * Initalize metrics policy + */ +static inline void +as_metrics_policy_set_report_dir(as_metrics_policy* policy, const char* report_dir) +{ + as_strncpy(policy->report_dir, report_dir, sizeof(policy->report_dir)); +} /** * Enable extended periodic cluster and node latency metrics. */ AS_EXTERN as_status -aerospike_enable_metrics(aerospike* as, as_error* err, as_policy_metrics* policy); +aerospike_enable_metrics(aerospike* as, as_error* err, as_metrics_policy* policy); /** * Disable extended periodic cluster and node latency metrics. @@ -220,7 +234,7 @@ aerospike_disable_metrics(aerospike* as, as_error* err); static inline void as_metrics_set_listeners( - as_policy_metrics* policy, as_metrics_enable_listener enable, + as_metrics_policy* policy, as_metrics_enable_listener enable, as_metrics_disable_listener disable, as_metrics_node_close_listener node_close, as_metrics_snapshot_listener snapshot ) diff --git a/src/include/aerospike/as_node.h b/src/include/aerospike/as_node.h index ffb0011174..cd15b84363 100644 --- a/src/include/aerospike/as_node.h +++ b/src/include/aerospike/as_node.h @@ -667,7 +667,7 @@ as_node_add_latency(as_node* node, as_latency_type latency_type, uint64_t elapse * Enable metrics at the node level */ void -as_node_enable_metrics(as_node* node, const as_policy_metrics* policy); +as_node_enable_metrics(as_node* node, const as_metrics_policy* policy); /** * Return transaction error count. The value is cumulative and not reset per metrics interval. diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index 9e99775477..a6bcdbd0cd 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -555,7 +555,7 @@ as_cluster_remove_nodes_copy(as_cluster* cluster, as_vector* /* */ no } as_status -as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_policy_metrics* policy) +as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_metrics_policy* policy) { if (cluster->metrics_enabled) { cluster->metrics_listeners.disable_listener(err, cluster, cluster->metrics_listeners.udata); diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 48516a46f9..a7a3cf2856 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -729,10 +729,10 @@ aerospike_disable_metrics(aerospike* as, as_error* err) } void -as_metrics_policy_init(as_policy_metrics* policy, const char* report_dir) +as_metrics_policy_init(as_metrics_policy* policy) { policy->report_size_limit = 0; - as_strncpy(policy->report_dir, report_dir, sizeof(policy->report_dir)); + as_strncpy(policy->report_dir, ".", sizeof(policy->report_dir)); policy->interval = 30; policy->latency_columns = 7; policy->latency_shift = 1; diff --git a/src/main/aerospike/as_node.c b/src/main/aerospike/as_node.c index 1bed414be8..ac39c17e64 100644 --- a/src/main/aerospike/as_node.c +++ b/src/main/aerospike/as_node.c @@ -1322,7 +1322,7 @@ as_node_add_latency(as_node* node, as_latency_type latency_type, uint64_t elapse } void -as_node_enable_metrics(as_node* node, const as_policy_metrics* policy) +as_node_enable_metrics(as_node* node, const as_metrics_policy* policy) { node->metrics = as_node_metrics_init(policy->latency_columns, policy->latency_shift); } diff --git a/src/test/aerospike_test.c b/src/test/aerospike_test.c index 215ccd8ff0..3d427453a1 100644 --- a/src/test/aerospike_test.c +++ b/src/test/aerospike_test.c @@ -382,13 +382,14 @@ static bool before(atf_plan* plan) } } - as_policy_metrics policy; + as_metrics_policy policy; #ifdef _MSC_VER char report_dir[] = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; #else char report_dir[] = "/home/sklaus/metrics"; #endif - as_metrics_policy_init(&policy, &report_dir); + as_metrics_policy_init(&policy); + as_metrics_policy_set_report_dir(&policy, report_dir); policy.interval = 5; policy.report_size_limit = 1000000; From b274d285dfab42c9c23e227ca7211678b860e3a2 Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Fri, 16 Feb 2024 13:24:45 -0500 Subject: [PATCH 38/64] Move metrics_writer implementation to it's own separate files. Add as_metrics_writer_create() to be used when default metrics_writer implementation is needed. Do not reserve nodes when function is always called from cluster tend thread. Fix macos cpu/mem calculations. --- Makefile | 1 + src/include/aerospike/as_metrics.h | 57 +- src/include/aerospike/as_metrics_writer.h | 81 +++ src/main/aerospike/as_cluster.c | 58 +- src/main/aerospike/as_metrics.c | 704 +-------------------- src/main/aerospike/as_metrics_writer.c | 717 ++++++++++++++++++++++ xcode/aerospike.xcodeproj/project.pbxproj | 8 + 7 files changed, 870 insertions(+), 756 deletions(-) create mode 100644 src/include/aerospike/as_metrics_writer.h create mode 100644 src/main/aerospike/as_metrics_writer.c diff --git a/Makefile b/Makefile index c99a0c2ee4..fbe28616cd 100644 --- a/Makefile +++ b/Makefile @@ -137,6 +137,7 @@ AEROSPIKE += as_list_operations.o AEROSPIKE += as_lookup.o AEROSPIKE += as_map_operations.o AEROSPIKE += as_metrics.o +AEROSPIKE += as_metrics_writer.o AEROSPIKE += as_node.o AEROSPIKE += as_operations.o AEROSPIKE += as_partition.o diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index 6e689ad020..68328d57dd 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -22,12 +22,6 @@ #include #include #include -#include - -#if defined(_MSC_VER) -#define WIN32_LEAN_AND_MEAN -#include -#endif #ifdef __cplusplus extern "C" { @@ -38,7 +32,6 @@ extern "C" { //--------------------------------- #define NS_TO_MS 1000000 -#define MIN_FILE_SIZE 1000000 typedef uint8_t as_latency_type; @@ -60,7 +53,7 @@ struct as_cluster_s; /** * Callbacks for metrics listener operations. */ -typedef as_status(*as_metrics_enable_listener)(as_error* err, const struct as_policy_metrics_s* policy, void* udata); +typedef as_status(*as_metrics_enable_listener)(as_error* err, void* udata); typedef as_status(*as_metrics_snapshot_listener)(as_error* err, struct as_cluster_s* cluster, void* udata); @@ -180,27 +173,6 @@ typedef struct as_node_metrics_s { as_latency_buckets* latency; } as_node_metrics; -/** - * Implementation of metrics_listeners - */ -typedef struct as_metrics_writer_s { - FILE* file; - char report_dir[256]; - uint64_t max_size; - uint64_t size; - uint32_t latency_columns; - uint32_t latency_shift; -#ifdef _MSC_VER - FILETIME prev_process_times_kernel; - FILETIME prev_system_times_kernel; - FILETIME prev_process_times_user; - FILETIME prev_system_times_user; - HANDLE process; - DWORD pid; -#endif - bool enable; -} as_metrics_writer; - //--------------------------------- // Functions //--------------------------------- @@ -220,6 +192,20 @@ as_metrics_policy_set_report_dir(as_metrics_policy* policy, const char* report_d as_strncpy(policy->report_dir, report_dir, sizeof(policy->report_dir)); } +static inline void +as_metrics_policy_set_listeners( + as_metrics_policy* policy, as_metrics_enable_listener enable, + as_metrics_disable_listener disable, as_metrics_node_close_listener node_close, + as_metrics_snapshot_listener snapshot, void* udata + ) +{ + policy->metrics_listeners.enable_listener = enable; + policy->metrics_listeners.disable_listener = disable; + policy->metrics_listeners.node_close_listener = node_close; + policy->metrics_listeners.snapshot_listener = snapshot; + policy->metrics_listeners.udata = udata; +} + /** * Enable extended periodic cluster and node latency metrics. */ @@ -232,19 +218,6 @@ aerospike_enable_metrics(aerospike* as, as_error* err, as_metrics_policy* policy AS_EXTERN as_status aerospike_disable_metrics(aerospike* as, as_error* err); -static inline void -as_metrics_set_listeners( - as_metrics_policy* policy, as_metrics_enable_listener enable, - as_metrics_disable_listener disable, as_metrics_node_close_listener node_close, - as_metrics_snapshot_listener snapshot - ) -{ - policy->metrics_listeners.enable_listener = enable; - policy->metrics_listeners.disable_listener = disable; - policy->metrics_listeners.node_close_listener = node_close; - policy->metrics_listeners.snapshot_listener = snapshot; -} - /** * Convert latency_type to string version for printing to the output file */ diff --git a/src/include/aerospike/as_metrics_writer.h b/src/include/aerospike/as_metrics_writer.h new file mode 100644 index 0000000000..b7fcf55490 --- /dev/null +++ b/src/include/aerospike/as_metrics_writer.h @@ -0,0 +1,81 @@ +/* + * Copyright 2008-2024 Aerospike, Inc. + * + * Portions may be licensed to Aerospike, Inc. under one or more contributor + * license agreements. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) +#define WIN32_LEAN_AND_MEAN +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +//--------------------------------- +// Types +//--------------------------------- + +/** + * Default metrics listener. This implementation writes periodic metrics snapshots to a file which + * will later be read and forwarded to OpenTelemetry by a separate offline application. + */ +typedef struct as_metrics_writer_s { + char report_dir[256]; + FILE* file; + uint64_t max_size; + uint64_t size; + uint32_t latency_columns; + uint32_t latency_shift; +#ifdef _MSC_VER + FILETIME prev_process_times_kernel; + FILETIME prev_system_times_kernel; + FILETIME prev_process_times_user; + FILETIME prev_system_times_user; + HANDLE process; + DWORD pid; +#endif + bool enable; +} as_metrics_writer; + +//--------------------------------- +// Functions +//--------------------------------- + +AS_EXTERN as_status +as_metrics_writer_create(as_error* err, const as_metrics_policy* policy, as_metrics_listeners* listeners); + +AS_EXTERN as_status +as_metrics_writer_enable(as_error* err, void* udata); + +AS_EXTERN as_status +as_metrics_writer_snapshot(as_error* err, as_cluster* cluster, void* udata); + +AS_EXTERN as_status +as_metrics_writer_node_close(as_error* err, struct as_node_s* node, void* udata); + +AS_EXTERN as_status +as_metrics_writer_disable(as_error* err, struct as_cluster_s* cluster, void* udata); + +#ifdef __cplusplus +} // end extern "C" +#endif diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index a6bcdbd0cd..b18c00e271 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -561,26 +562,52 @@ as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_metrics_policy* cluster->metrics_listeners.disable_listener(err, cluster, cluster->metrics_listeners.udata); } - cluster->metrics_listeners = policy->metrics_listeners; + as_status status = AEROSPIKE_OK; + + as_error_reset(err); + + if (policy->metrics_listeners.enable_listener) { + // Use listeners defined in the metrics policy. + // Ensure all listeners and user data has been defined. + if (! (policy->metrics_listeners.enable_listener && policy->metrics_listeners.snapshot_listener && + policy->metrics_listeners.node_close_listener && policy->metrics_listeners.disable_listener && + policy->metrics_listeners.udata)) { + return as_error_set_message(err, AEROSPIKE_ERR_PARAM, "All metrics listeners and udata must be defined"); + } + + // Copy listeners from policy. + cluster->metrics_listeners = policy->metrics_listeners; + } + else { + // Create default metrics writer and set cluster llsteners. + status = as_metrics_writer_create(err, policy, &cluster->metrics_listeners); + + if (status != AEROSPIKE_OK) { + return status; + } + } + cluster->metrics_interval = policy->interval; cluster->metrics_latency_columns = policy->latency_columns; cluster->metrics_latency_shift = policy->latency_shift; as_nodes* nodes = as_nodes_reserve(cluster); + for (uint32_t i = 0; i < nodes->size; i++) { as_node* node = nodes->array[i]; as_node_enable_metrics(node, policy); } - as_nodes_release(nodes); - as_status status = cluster->metrics_listeners.enable_listener(err, policy, cluster->metrics_listeners.udata); + status = cluster->metrics_listeners.enable_listener(err, cluster->metrics_listeners.udata); + if (status != AEROSPIKE_OK) { as_cluster_disable_metrics(err, cluster); return status; } + cluster->metrics_enabled = true; - return AEROSPIKE_OK; + return status; } as_status @@ -648,14 +675,11 @@ as_cluster_remove_nodes(as_error* err, as_cluster* cluster, as_vector* /* size; i++) { as_node* node = as_vector_get_ptr(nodes_to_remove, i); - as_node_deactivate(node); if (cluster->metrics_enabled) { - as_status status = cluster->metrics_listeners.node_close_listener(err, node, node->cluster->metrics_listeners.udata); - if (status != AEROSPIKE_OK) { - return status; - } + cluster->metrics_listeners.node_close_listener(err, node, node->cluster->metrics_listeners.udata); } + as_node_deactivate(node); } // Remove all nodes at once to avoid copying entire array multiple times. @@ -751,6 +775,15 @@ as_cluster_manage(as_cluster* cluster) if (cluster->max_error_rate > 0 && cluster->tend_count % cluster->error_rate_window == 0) { as_cluster_reset_error_rate(cluster); } + + if (cluster->metrics_enabled && cluster->tend_count % cluster->metrics_interval == 0) { + as_error err; + as_status status = cluster->metrics_listeners.snapshot_listener(&err, cluster, cluster->metrics_listeners.udata); + + if (status != AEROSPIKE_OK) { + as_log_warn("Metrics error: %s %s", as_error_string(status), err.message); + } + } } /** @@ -978,13 +1011,6 @@ as_cluster_tend(as_cluster* cluster, as_error* err, bool is_init) as_incr_uint32(&cluster->shm_info->cluster_shm->rebalance_gen); } - if (cluster->metrics_enabled && (cluster->tend_count % cluster->metrics_interval) == 0) { - as_status status = cluster->metrics_listeners.snapshot_listener(err, cluster, cluster->metrics_listeners.udata); - if (status != AEROSPIKE_OK) { - return status; - } - } - as_cluster_destroy_peers(&peers); as_cluster_manage(cluster); return AEROSPIKE_OK; diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index a7a3cf2856..90a2c377d1 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -14,703 +14,11 @@ * License for the specific language governing permissions and limitations under * the License. */ - -#include #include #include #include #include -#include -#include - -#define LOG(_fmt, ...) { printf(_fmt "\n", ##__VA_ARGS__); fflush(stdout); } - -//--------------------------------- -// Globals -//--------------------------------- -extern uint32_t as_event_loop_capacity; - -//--------------------------------- -// Static Functions -//--------------------------------- - -static void -timestamp_to_string_filename(char* str, size_t str_size) -{ - time_t now = time(NULL); - struct tm * local = localtime(&now); - snprintf(str, str_size, - "%4d%02d%02d%02d%02d%02d", - 1900 + local->tm_year, local->tm_mon + 1, local->tm_mday, - local->tm_hour, local->tm_min, local->tm_sec); -} - -static void -timestamp_to_string(char* str, size_t str_size) -{ - time_t now = time(NULL); - struct tm* local = localtime(&now); - snprintf(str, str_size, - "%4d-%02d-%02d %02d:%02d:%02d", - 1900 + local->tm_year, local->tm_mon + 1, local->tm_mday, - local->tm_hour, local->tm_min, local->tm_sec); -} - -static as_status -as_metrics_open_writer(as_metrics_writer* mw, as_error* err); - -static as_status -as_metrics_write_line(as_metrics_writer* mw, const char* data, as_error* err) -{ - int written = fprintf(mw->file, "%s", data); - if (written <= 0) { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "Failed to write metrics data: %d,%s", written, mw->report_dir); - } - mw->size += written; - - if (mw->max_size > 0 && mw->size >= mw->max_size) { - uint32_t result = fclose(mw->file); - - if (result != 0) { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "File stream did not close successfully: %s", mw->report_dir); - } - return as_metrics_open_writer(mw, err); - } - - return AEROSPIKE_OK; -} - -static void* -as_metrics_writer_init_udata() -{ - as_metrics_writer* mw = (as_metrics_writer*)cf_malloc(sizeof(as_metrics_writer)); - mw->file = NULL; - mw->max_size = 0; - mw->latency_columns = 0; - mw->latency_shift = 0; - mw->enable = false; -#ifdef _MSC_VER - mw->pid = GetCurrentProcessId(); - mw->process = OpenProcess(PROCESS_QUERY_INFORMATION, false, mw->pid); - FILETIME dummy; - if (mw->process != NULL) - { - GetProcessTimes(mw->process, &dummy, &dummy, &mw->prev_process_times_kernel, &mw->prev_process_times_user); - GetSystemTimes(0, &mw->prev_system_times_kernel, &mw->prev_system_times_user); - } -#endif - - return mw; -} - -static inline char separator() -{ -#ifdef _MSC_VER - return '\\'; -#else - return '/'; -#endif -} - -static as_status -as_metrics_open_writer(as_metrics_writer* mw, as_error* err) -{ - as_error_reset(err); - char now_file_str[128]; - timestamp_to_string_filename(now_file_str, sizeof(now_file_str)); - - as_string_builder file_name; - as_string_builder_inita(&file_name, 256, false); - as_string_builder_append(&file_name, mw->report_dir); - char last_char = mw->report_dir[(strlen(mw->report_dir) - 1)]; - if (last_char != '/' && last_char != '\\') { - as_string_builder_append_char(&file_name, separator()); - } - as_string_builder_append(&file_name, "metrics-"); - as_string_builder_append(&file_name, now_file_str); - as_string_builder_append(&file_name, ".log"); - mw->file = fopen(file_name.data, "w"); - - if (!mw->file) { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "Failed to open file: %s", mw->report_dir); - } - - mw->size = 0; - char now_str[128]; - timestamp_to_string(now_str, sizeof(now_str)); - - char data[512]; - int rv = snprintf(data, sizeof(data), "%s header(1) cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]] eventloop[processSize,queueSize] node[name,address:port,syncConn,asyncConn,errors,timeouts,latency[]] conn[inUse,inPool,opened,closed] latency(%u,%u)[type[l1,l2,l3...]]\n", - now_str, mw->latency_columns, mw->latency_shift); - if (rv <= 0) { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "Failed to construct metrics header: %d,%s", rv, file_name.data); - } - - return as_metrics_write_line(mw, data, err); -} - -static void -as_metrics_get_node_sync_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* sync) -{ - uint32_t max = node->cluster->conn_pools_per_node; - - // Sync connection summary. - for (uint32_t i = 0; i < max; i++) { - as_conn_pool* pool = &node->sync_conn_pools[i]; - - pthread_mutex_lock(&pool->lock); - uint32_t in_pool = as_queue_size(&pool->queue); - uint32_t total = pool->queue.total; - pthread_mutex_unlock(&pool->lock); - - sync->in_pool += in_pool; - sync->in_use += total - in_pool; - } - sync->opened = node->sync_conns_opened; - sync->closed = node->sync_conns_closed; -} - -static void -as_metrics_get_node_async_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* async) -{ - // Async connection summary. - if (as_event_loop_capacity > 0) { - for (uint32_t i = 0; i < as_event_loop_size; i++) { - // Regular async. - as_conn_stats_sum(async, &node->async_conn_pools[i]); - } - } -} - -static void -as_metrics_write_conn(as_metrics_writer* mw, as_string_builder* sb, const struct as_conn_stats_s* stats) -{ - as_string_builder_append_uint(sb, stats->in_use); - as_string_builder_append_char(sb, ','); - as_string_builder_append_uint(sb, stats->in_pool); - as_string_builder_append_char(sb, ','); - as_string_builder_append_uint(sb, stats->opened); // Cumulative. Not reset on each interval. - as_string_builder_append_char(sb, ','); - as_string_builder_append_uint(sb, stats->closed); // Cumulative. Not reset on each interval. -} - -static void -as_metrics_write_node(as_metrics_writer* mw, as_string_builder* sb, struct as_node_s* node) -{ - as_string_builder_append_char(sb, '['); - as_string_builder_append(sb, node->name); - as_string_builder_append_char(sb, ','); - - as_string_builder_append(sb, as_node_get_address_string(node)); - as_string_builder_append_char(sb, ','); - - struct as_conn_stats_s sync; - struct as_conn_stats_s async; - as_conn_stats_init(&sync); - as_conn_stats_init(&async); - as_metrics_get_node_sync_conn_stats(node, &sync); - as_metrics_write_conn(mw, sb, &sync); - as_string_builder_append_char(sb, ','); - as_metrics_get_node_async_conn_stats(node, &async); - as_metrics_write_conn(mw, sb, &async); - as_string_builder_append_char(sb, ','); - - as_string_builder_append_uint64(sb, as_node_get_error_count(node)); - as_string_builder_append_char(sb, ','); - as_string_builder_append_uint64(sb, as_node_get_timeout_count(node)); - as_string_builder_append(sb, ",["); - - as_node_metrics* node_metrics = node->metrics; - uint32_t max = AS_LATENCY_TYPE_NONE; - - for (uint32_t i = 0; i < max; i++) { - if (i > 0) { - as_string_builder_append_char(sb, ','); - } - as_string_builder_append(sb, as_latency_type_to_string(i)); - as_string_builder_append_char(sb, '['); - - as_latency_buckets* buckets = &node_metrics->latency[i]; - uint32_t bucket_max = buckets->latency_columns; - - for (uint32_t j = 0; j < bucket_max; j++) { - if (j > 0) { - as_string_builder_append_char(sb, ','); - } - as_string_builder_append_uint64(sb, as_metrics_get_bucket(buckets, j)); - } - as_string_builder_append_char(sb, ']'); - } - as_string_builder_append(sb, "]]"); -} - -#if defined(__linux__) - -#include -#include - -static as_status -as_metrics_proc_stat_mem_cpu(as_error* err, double* vm_usage, double* resident_set, double* cpu_usage) -{ - *vm_usage = 0.0; - *resident_set = 0.0; - - FILE* proc_stat = fopen("/proc/self/stat", "r"); - if (!proc_stat) { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "Error calculating memory and CPU usage"); - } - - // dummies - int dummy_d; - char dummy_c; - unsigned int dummy_u; - long unsigned int dummy_lu; - long int dummy_ld; - - // the fields we want - uint64_t utime, stime; - long long unsigned int starttime; - uint64_t vsize; - int64_t rss; - - int matched = fscanf(proc_stat, "%d %s %c %d %d %d %d %d %u %lu %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %llu %lu %ld", - &dummy_d, &dummy_c, &dummy_c, &dummy_d, &dummy_d, &dummy_d, &dummy_d, &dummy_d, &dummy_u, &dummy_lu, &dummy_lu, &dummy_lu, &dummy_lu, - &utime, &stime, &dummy_ld, &dummy_ld, &dummy_ld, &dummy_ld, &dummy_ld, &dummy_ld, &starttime, &vsize, &rss); - - if (matched == 0) { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "Error calculating memory and CPU usage"); - } - - int result = fclose(proc_stat); - - if (result != 0) { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "Error closing /proc/self/stat"); - } - - int64_t page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages - *vm_usage = vsize / 1024.0; - *resident_set = rss * page_size_kb; - - float u_time_sec = utime / sysconf(_SC_CLK_TCK); - float s_time_sec = stime / sysconf(_SC_CLK_TCK); - float start_time_sec = starttime / sysconf(_SC_CLK_TCK); - - struct sysinfo info; - int success = sysinfo(&info); - if (success != 0) { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "Error calculating CPU usage"); - } - - *cpu_usage = (u_time_sec + s_time_sec) / (info.uptime - start_time_sec) * 100; - - return AEROSPIKE_OK; -} - -static as_status -as_metrics_process_cpu_load_mem_usage(as_error* err, uint32_t* cpu_usage, uint32_t* mem) -{ - double resident_set = 0.0; - double mem_d = 0.0; - double cpu_usage_d = 0.0; - as_status result = as_metrics_proc_stat_mem_cpu(err, &mem_d, &resident_set, &cpu_usage_d); - if (result != AEROSPIKE_OK) { - return result; - } - - LOG("cpu %f", cpu_usage_d); - cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); - mem_d = mem_d + 0.5 - (mem_d < 0); - *cpu_usage = (uint32_t)cpu_usage_d; - *mem = (uint32_t)mem_d; - - return AEROSPIKE_OK; -} -#endif - -#if defined(_MSC_VER) -#include - -static ULONGLONG -as_metrics_filetime_difference(FILETIME* prev_kernel, FILETIME* prev_user, FILETIME* cur_kernel, FILETIME* cur_user) { - LARGE_INTEGER a1, a2; - a1.LowPart = prev_kernel->dwLowDateTime; - a1.HighPart = prev_kernel->dwHighDateTime; - a2.LowPart = prev_user->dwLowDateTime; - a2.HighPart = prev_user->dwHighDateTime; - - LARGE_INTEGER b1, b2; - b1.LowPart = cur_kernel->dwLowDateTime; - b1.HighPart = cur_kernel->dwHighDateTime; - b2.LowPart = cur_user->dwLowDateTime; - b2.HighPart = cur_user->dwHighDateTime; - - //a1 and b1 - contains kernel times - //a2 and b2 - contains user times - return (b1.QuadPart - a1.QuadPart) + (b2.QuadPart - a2.QuadPart); -} - -static double -as_metrics_process_cpu_load(as_metrics_writer* mw) -{ - if (mw->process == NULL) { - return -1; - } - - FILETIME dummy; - FILETIME process_times_kernel, process_times_user, system_times_kernel, system_times_user; - - if (GetProcessTimes(mw->process, &dummy, &dummy, &process_times_kernel, &process_times_user) == 0) { - return -1; - } - if (GetSystemTimes(0, &system_times_kernel, &system_times_user) == 0) { - return -1; - } - - // Get diffrence latest - previous times. - ULONGLONG proc = as_metrics_filetime_difference(&mw->prev_process_times_kernel, &mw->prev_process_times_user, - &process_times_kernel, &process_times_user); - ULONGLONG system = as_metrics_filetime_difference(&mw->prev_system_times_kernel, &mw->prev_system_times_user, - &system_times_kernel, &system_times_user); - double usage = 0.0; - - // Calcualte percentage. - if (system != 0) { - usage = 100.0 * (proc / (double)system); - } - - // Assign latest times to previous times for the next round of calculation. - mw->prev_process_times_kernel = process_times_kernel; - mw->prev_process_times_user = process_times_user; - mw->prev_system_times_kernel = system_times_kernel; - mw->prev_system_times_user = system_times_user; - - return usage; -} - -static uint32_t -as_metrics_process_mem_usage() -{ - PROCESS_MEMORY_COUNTERS memCounter; - BOOL result = GetProcessMemoryInfo(GetCurrentProcess(), - &memCounter, - sizeof(memCounter)); - - return (uint32_t)memCounter.WorkingSetSize; -} - -static as_status -as_metrics_process_cpu_load_mem_usage(as_error* err, as_metrics_writer* mw, uint32_t* cpu_usage, uint32_t* mem) -{ - double cpu_usage_d = as_metrics_process_cpu_load(mw); - if (cpu_usage_d < 0) { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "Error calculating CPU usage"); - } - cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); - *cpu_usage = (uint32_t)cpu_usage_d; - *mem = as_metrics_process_mem_usage(); - - return AEROSPIKE_OK; -} - -#endif - -#if defined(__APPLE__) -#include -#include -#include - -static double -as_metrics_process_mem_usage() -{ - struct task_basic_info t_info; - mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT; - - if (KERN_SUCCESS != task_info(mach_task_self(), - TASK_BASIC_INFO, (task_info_t)&t_info, - &t_info_count)) - { - return -1; - } - - return t_info.virtual_size; -} - -static double -as_metrics_process_cpu_load() -{ - pid_t pid = getpid(); - - as_string_builder sb; - as_string_builder_inita(&sb, 128, false); - as_string_builder_append(&sb, "ps -p "); - as_string_builder_append_int(&sb, pid); - as_string_builder_append(&sb, " -o %CPU"); - - FILE* file = popen(sb.data, "r"); - - if (!file) { - return -1.0; - } - - char cpu_holder[5]; - char cpu_percent[6]; - - if (!fgets(cpu_holder, sizeof(cpu_holder), file)) { - pclose(file); - return -1.0; - } - - if (!fgets(cpu_percent, sizeof(cpu_percent), file)) { - pclose(file); - return -1.0; - } - - pclose(file); - return atof(cpu_percent); -} - -static as_status -as_metrics_process_cpu_load_mem_usage(as_error* err, uint32_t* cpu_usage, uint32_t* mem) -{ - double cpu_usage_d = as_metrics_process_cpu_load(); - double mem_d = as_metrics_process_mem_usage(); - - if (cpu_usage_d < 0) { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "Error calculating CPU usage"); - } - - if (mem_d < 0) { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "Error calculating memory usage"); - } - - cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); - mem_d = mem_d + 0.5 - (mem_d < 0); - *cpu_usage = (uint32_t)cpu_usage_d; - *mem = (uint32_t)mem_d; - - return AEROSPIKE_OK; -} -#endif - -static as_status -as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, struct as_cluster_s* cluster) -{ - char* cluster_name = cluster->cluster_name; - - if (cluster_name == NULL) { - cluster_name = ""; - } - - uint32_t cpu_load = 0; - uint32_t mem = 0; -#ifdef _MSC_VER - as_status result = as_metrics_process_cpu_load_mem_usage(err, mw, &cpu_load, &mem); -#else - as_status result = as_metrics_process_cpu_load_mem_usage(err, &cpu_load, &mem); -#endif - if (result != AEROSPIKE_OK) { - return result; - } - - char now_str[128]; - timestamp_to_string(now_str, sizeof(now_str)); - as_string_builder sb; - as_string_builder_inita(&sb, 16384, true); - as_string_builder_append(&sb, now_str); - as_string_builder_append(&sb, " cluster["); - as_string_builder_append(&sb, cluster_name); - as_string_builder_append_char(&sb, ','); - as_string_builder_append_int(&sb, cpu_load); - as_string_builder_append_char(&sb, ','); - as_string_builder_append_int(&sb, mem); - as_string_builder_append_char(&sb, ','); - as_string_builder_append_uint(&sb, cluster->invalid_node_count); // Cumulative. Not reset on each interval. - as_string_builder_append_char(&sb, ','); - as_string_builder_append_uint64(&sb, as_cluster_get_tran_count(cluster)); // Cumulative. Not reset on each interval. - as_string_builder_append_char(&sb, ','); - as_string_builder_append_uint64(&sb, cluster->retry_count); // Cumulative. Not reset on each interval. - as_string_builder_append_char(&sb, ','); - as_string_builder_append_uint64(&sb, cluster->delay_queue_timeout_count); // Cumulative. Not reset on each interval. - as_string_builder_append(&sb, ",["); - - for (uint32_t i = 0; i < as_event_loop_size; i++) { - as_event_loop* loop = &as_event_loops[i]; - if (i > 0) { - as_string_builder_append_char(&sb, ','); - } - as_string_builder_append_char(&sb, '['); - as_string_builder_append_int(&sb, as_event_loop_get_process_size(loop)); - as_string_builder_append_char(&sb, ','); - as_string_builder_append_uint(&sb, as_event_loop_get_queue_size(loop)); - as_string_builder_append_char(&sb, ']'); - } - as_string_builder_append(&sb, "],["); - - as_nodes* nodes = as_nodes_reserve(cluster); - for (uint32_t i = 0; i < nodes->size; i++) { - as_node* node = nodes->array[i]; - as_node_reserve(node); - - if (i > 0) { - as_string_builder_append_char(&sb, ','); - } - as_metrics_write_node(mw, &sb, node); - - as_node_release(node); - } - as_string_builder_append(&sb, "]]"); - - as_nodes_release(nodes); - as_string_builder_append_newline(&sb); - as_status status = as_metrics_write_line(mw, sb.data, err); - as_string_builder_destroy(&sb); - return status; -} - -static void -as_metrics_writer_destroy(as_metrics_writer* mw) -{ - fclose(mw->file); - cf_free(mw); -} - -static void -as_metrics_writer_destroy_node_metrics(as_node* node) -{ - if (node->metrics != NULL) { - uint32_t max_latency_type = AS_LATENCY_TYPE_NONE; - for (uint32_t i = 0; i < max_latency_type; i++) { - cf_free(node->metrics->latency[i].buckets); - } - cf_free(node->metrics->latency); - cf_free(node->metrics); - node->metrics = NULL; - } -} - -static void -as_metrics_writer_destroy_nodes(as_cluster* cluster) -{ - // Free node memory - as_nodes* nodes = as_nodes_reserve(cluster); - for (uint32_t i = 0; i < nodes->size; i++) { - as_metrics_writer_destroy_node_metrics(nodes->array[i]); - } - as_nodes_release(nodes); -} - -static as_status -as_metrics_writer_enable(as_error* err, const struct as_policy_metrics_s* policy, void* udata) -{ - as_error_reset(err); - if (policy->report_size_limit != 0 && policy->report_size_limit < MIN_FILE_SIZE) { - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "Metrics policy report_size_limit %d must be at least %d", policy->report_size_limit, MIN_FILE_SIZE); - } - - // create file directory - as_metrics_writer* mw = udata; - mw->max_size = policy->report_size_limit; - mw->latency_columns = policy->latency_columns; - mw->latency_shift = policy->latency_shift; - as_strncpy(mw->report_dir, policy->report_dir, sizeof(mw->report_dir)); - - as_status status = as_metrics_open_writer(mw, err); - - if (status != AEROSPIKE_OK) { - return status; - } - - mw->enable = true; - return AEROSPIKE_OK; -} - -static as_status -as_metrics_writer_snapshot(as_error* err, struct as_cluster_s* cluster, void* udata) -{ - as_error_reset(err); - as_metrics_writer* mw = udata; - - if (mw->enable && mw->file != NULL) { - as_status status = as_metrics_write_cluster(err, mw, cluster); - if (status != AEROSPIKE_OK) { - as_metrics_writer_destroy_nodes(cluster); - as_metrics_writer_destroy(mw); - return status; - } - uint32_t result = fflush(mw->file); - if (result != 0) { - as_metrics_writer_destroy_nodes(cluster); - as_metrics_writer_destroy(mw); - return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "File stream did not flush successfully: %s", mw->report_dir); - } - } - - return AEROSPIKE_OK; -} - -static as_status -as_metrics_writer_disable(as_error* err, struct as_cluster_s* cluster, void* udata) -{ - // write cluster into to file, disable - as_error_reset(err); - as_metrics_writer* mw = udata; - if (mw != NULL) { - if (mw->enable && mw->file != NULL) { - as_status status = as_metrics_write_cluster(err, mw, cluster); - - if (status != AEROSPIKE_OK) { - as_metrics_writer_destroy_nodes(cluster); - as_metrics_writer_destroy(mw); - return status; - } - } - as_metrics_writer_destroy_nodes(cluster); - as_metrics_writer_destroy(mw); - } - - return AEROSPIKE_OK; -} - -static as_status -as_metrics_writer_node_close(as_error* err, struct as_node_s* node, void* udata) -{ - // write node info to file - as_error_reset(err); - as_metrics_writer* mw = udata; - - if (mw->enable && mw->file != NULL) { - char now_str[128]; - timestamp_to_string(now_str, sizeof(now_str)); - as_string_builder sb; - as_string_builder_inita(&sb, 16384, true); - as_string_builder_append(&sb, now_str); - as_node_reserve(node); - as_metrics_write_node(mw, &sb, node); - as_string_builder_append_newline(&sb); - as_status status = as_metrics_write_line(mw, sb.data, err); - - as_metrics_writer_destroy_node_metrics(node); - as_node_release(node); - as_string_builder_destroy(&sb); - - return status; - } - - return AEROSPIKE_OK; -} +#include //--------------------------------- // Functions @@ -736,11 +44,11 @@ as_metrics_policy_init(as_metrics_policy* policy) policy->interval = 30; policy->latency_columns = 7; policy->latency_shift = 1; - policy->metrics_listeners.enable_listener = as_metrics_writer_enable; - policy->metrics_listeners.disable_listener = as_metrics_writer_disable; - policy->metrics_listeners.node_close_listener = as_metrics_writer_node_close; - policy->metrics_listeners.snapshot_listener = as_metrics_writer_snapshot; - policy->metrics_listeners.udata = as_metrics_writer_init_udata(); + policy->metrics_listeners.enable_listener = NULL; + policy->metrics_listeners.snapshot_listener = NULL; + policy->metrics_listeners.node_close_listener = NULL; + policy->metrics_listeners.disable_listener = NULL; + policy->metrics_listeners.udata = NULL; } char* diff --git a/src/main/aerospike/as_metrics_writer.c b/src/main/aerospike/as_metrics_writer.c new file mode 100644 index 0000000000..df1f9b4569 --- /dev/null +++ b/src/main/aerospike/as_metrics_writer.c @@ -0,0 +1,717 @@ +/* + * Copyright 2008-2024 Aerospike, Inc. + * + * Portions may be licensed to Aerospike, Inc. under one or more contributor + * license agreements. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +#include +#include +#include +#include +#include + +//--------------------------------- +// Macros +//--------------------------------- + +#define MIN_FILE_SIZE 1000000 + +#define LOG(_fmt, ...) { printf(_fmt "\n", ##__VA_ARGS__); fflush(stdout); } + +#ifdef _MSC_VER +static char as_dir_sep = '\\'; +#else +static char as_dir_sep = '/'; +#endif + +//--------------------------------- +// Linux Static Functions +//--------------------------------- + +#if defined(__linux__) +#include +#include + +static as_status +as_metrics_proc_stat_mem_cpu(as_error* err, double* vm_usage, double* resident_set, double* cpu_usage) +{ + *vm_usage = 0.0; + *resident_set = 0.0; + + FILE* proc_stat = fopen("/proc/self/stat", "r"); + if (!proc_stat) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Error calculating memory and CPU usage"); + } + + // dummies + int dummy_d; + char dummy_c; + unsigned int dummy_u; + long unsigned int dummy_lu; + long int dummy_ld; + + // the fields we want + uint64_t utime, stime; + long long unsigned int starttime; + uint64_t vsize; + int64_t rss; + + int matched = fscanf(proc_stat, "%d %s %c %d %d %d %d %d %u %lu %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %llu %lu %ld", + &dummy_d, &dummy_c, &dummy_c, &dummy_d, &dummy_d, &dummy_d, &dummy_d, &dummy_d, &dummy_u, &dummy_lu, &dummy_lu, &dummy_lu, &dummy_lu, + &utime, &stime, &dummy_ld, &dummy_ld, &dummy_ld, &dummy_ld, &dummy_ld, &dummy_ld, &starttime, &vsize, &rss); + + if (matched == 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Error calculating memory and CPU usage"); + } + + int result = fclose(proc_stat); + + if (result != 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Error closing /proc/self/stat"); + } + + int64_t page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages + *vm_usage = vsize / 1024.0; + *resident_set = rss * page_size_kb; + + float u_time_sec = utime / sysconf(_SC_CLK_TCK); + float s_time_sec = stime / sysconf(_SC_CLK_TCK); + float start_time_sec = starttime / sysconf(_SC_CLK_TCK); + + struct sysinfo info; + int success = sysinfo(&info); + if (success != 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Error calculating CPU usage"); + } + + *cpu_usage = (u_time_sec + s_time_sec) / (info.uptime - start_time_sec) * 100; + + return AEROSPIKE_OK; +} + +static as_status +as_metrics_process_cpu_load_mem_usage(as_error* err, as_metrics_writer* mw, uint32_t* cpu_usage, uint32_t* mem) +{ + double resident_set = 0.0; + double mem_d = 0.0; + double cpu_usage_d = 0.0; + as_status result = as_metrics_proc_stat_mem_cpu(err, &mem_d, &resident_set, &cpu_usage_d); + if (result != AEROSPIKE_OK) { + return result; + } + + LOG("cpu %f", cpu_usage_d); + cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); + mem_d = mem_d + 0.5 - (mem_d < 0); + *cpu_usage = (uint32_t)cpu_usage_d; + *mem = (uint32_t)mem_d; + + return AEROSPIKE_OK; +} +#endif + +//--------------------------------- +// MacOS Static Functions +//--------------------------------- + +#if defined(__APPLE__) +#include +#include +#include + +static double +as_metrics_process_mem_usage() +{ + struct task_basic_info t_info; + mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT; + + if (KERN_SUCCESS != task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&t_info, &t_info_count)) + { + return -1.0; + } + + return t_info.resident_size; +} + +static double +as_metrics_process_cpu_load() +{ + pid_t pid = getpid(); + + as_string_builder sb; + as_string_builder_inita(&sb, 128, false); + as_string_builder_append(&sb, "ps -p "); + as_string_builder_append_int(&sb, pid); + as_string_builder_append(&sb, " -o %cpu"); + + FILE* file = popen(sb.data, "r"); + + if (!file) { + return -1.0; + } + + char cpu_holder[5]; + char cpu_percent[6]; + + if (!fgets(cpu_holder, sizeof(cpu_holder), file)) { + pclose(file); + return 0.0; + } + + if (!fgets(cpu_percent, sizeof(cpu_percent), file)) { + pclose(file); + return 0.0; + } + + pclose(file); + return atof(cpu_percent); +} + +static as_status +as_metrics_process_cpu_load_mem_usage(as_error* err, as_metrics_writer* mw, uint32_t* cpu_usage, uint32_t* mem) +{ + double cpu_usage_d = as_metrics_process_cpu_load(); + double mem_d = as_metrics_process_mem_usage(); + + if (cpu_usage_d < 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Error calculating CPU usage"); + } + + if (mem_d < 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Error calculating memory usage"); + } + + // Round values. + cpu_usage_d = cpu_usage_d + 0.5; + mem_d = mem_d + 0.5; + *cpu_usage = (uint32_t)cpu_usage_d; + *mem = (uint32_t)mem_d; + + return AEROSPIKE_OK; +} +#endif + +//--------------------------------- +// Microsoft Static Functions +//--------------------------------- + +#if defined(_MSC_VER) +#include + +static ULONGLONG +as_metrics_filetime_difference(FILETIME* prev_kernel, FILETIME* prev_user, FILETIME* cur_kernel, FILETIME* cur_user) { + LARGE_INTEGER a1, a2; + a1.LowPart = prev_kernel->dwLowDateTime; + a1.HighPart = prev_kernel->dwHighDateTime; + a2.LowPart = prev_user->dwLowDateTime; + a2.HighPart = prev_user->dwHighDateTime; + + LARGE_INTEGER b1, b2; + b1.LowPart = cur_kernel->dwLowDateTime; + b1.HighPart = cur_kernel->dwHighDateTime; + b2.LowPart = cur_user->dwLowDateTime; + b2.HighPart = cur_user->dwHighDateTime; + + //a1 and b1 - contains kernel times + //a2 and b2 - contains user times + return (b1.QuadPart - a1.QuadPart) + (b2.QuadPart - a2.QuadPart); +} + +static double +as_metrics_process_cpu_load(as_metrics_writer* mw) +{ + if (mw->process == NULL) { + return -1; + } + + FILETIME dummy; + FILETIME process_times_kernel, process_times_user, system_times_kernel, system_times_user; + + if (GetProcessTimes(mw->process, &dummy, &dummy, &process_times_kernel, &process_times_user) == 0) { + return -1; + } + if (GetSystemTimes(0, &system_times_kernel, &system_times_user) == 0) { + return -1; + } + + // Get diffrence latest - previous times. + ULONGLONG proc = as_metrics_filetime_difference(&mw->prev_process_times_kernel, &mw->prev_process_times_user, + &process_times_kernel, &process_times_user); + ULONGLONG system = as_metrics_filetime_difference(&mw->prev_system_times_kernel, &mw->prev_system_times_user, + &system_times_kernel, &system_times_user); + double usage = 0.0; + + // Calcualte percentage. + if (system != 0) { + usage = 100.0 * (proc / (double)system); + } + + // Assign latest times to previous times for the next round of calculation. + mw->prev_process_times_kernel = process_times_kernel; + mw->prev_process_times_user = process_times_user; + mw->prev_system_times_kernel = system_times_kernel; + mw->prev_system_times_user = system_times_user; + + return usage; +} + +static uint32_t +as_metrics_process_mem_usage() +{ + PROCESS_MEMORY_COUNTERS memCounter; + BOOL result = GetProcessMemoryInfo(GetCurrentProcess(), + &memCounter, + sizeof(memCounter)); + + return (uint32_t)memCounter.WorkingSetSize; +} + +static as_status +as_metrics_process_cpu_load_mem_usage(as_error* err, as_metrics_writer* mw, uint32_t* cpu_usage, uint32_t* mem) +{ + double cpu_usage_d = as_metrics_process_cpu_load(mw); + if (cpu_usage_d < 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Error calculating CPU usage"); + } + cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); + *cpu_usage = (uint32_t)cpu_usage_d; + *mem = as_metrics_process_mem_usage(); + + return AEROSPIKE_OK; +} +#endif + +//--------------------------------- +// Static Functions +//--------------------------------- + +static as_status +as_metrics_open_writer(as_metrics_writer* mw, as_error* err); + +static void +timestamp_to_string(char* str, size_t str_size) +{ + time_t now = time(NULL); + struct tm* local = localtime(&now); + snprintf(str, str_size, + "%4d-%02d-%02d %02d:%02d:%02d", + 1900 + local->tm_year, local->tm_mon + 1, local->tm_mday, + local->tm_hour, local->tm_min, local->tm_sec); +} + +static void +timestamp_to_string_filename(char* str, size_t str_size) +{ + time_t now = time(NULL); + struct tm* local = localtime(&now); + snprintf(str, str_size, + "%4d%02d%02d%02d%02d%02d", + 1900 + local->tm_year, local->tm_mon + 1, local->tm_mday, + local->tm_hour, local->tm_min, local->tm_sec); +} + +static as_status +as_metrics_write_line(as_metrics_writer* mw, const char* data, as_error* err) +{ + int written = fprintf(mw->file, "%s", data); + if (written <= 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Failed to write metrics data: %d,%s", written, mw->report_dir); + } + mw->size += written; + + if (mw->max_size > 0 && mw->size >= mw->max_size) { + uint32_t result = fclose(mw->file); + + if (result != 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "File stream did not close successfully: %s", mw->report_dir); + } + return as_metrics_open_writer(mw, err); + } + + return AEROSPIKE_OK; +} + +static as_status +as_metrics_open_writer(as_metrics_writer* mw, as_error* err) +{ + as_error_reset(err); + char now_file_str[128]; + timestamp_to_string_filename(now_file_str, sizeof(now_file_str)); + + as_string_builder file_name; + as_string_builder_inita(&file_name, 256, false); + as_string_builder_append(&file_name, mw->report_dir); + char last_char = mw->report_dir[(strlen(mw->report_dir) - 1)]; + if (last_char != '/' && last_char != '\\') { + as_string_builder_append_char(&file_name, as_dir_sep); + } + as_string_builder_append(&file_name, "metrics-"); + as_string_builder_append(&file_name, now_file_str); + as_string_builder_append(&file_name, ".log"); + mw->file = fopen(file_name.data, "w"); + + if (!mw->file) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, "Failed to open file: %s", file_name.data); + } + + mw->size = 0; + char now_str[128]; + timestamp_to_string(now_str, sizeof(now_str)); + + char data[512]; + int rv = snprintf(data, sizeof(data), "%s header(1) cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]] eventloop[processSize,queueSize] node[name,address:port,syncConn,asyncConn,errors,timeouts,latency[]] conn[inUse,inPool,opened,closed] latency(%u,%u)[type[l1,l2,l3...]]\n", + now_str, mw->latency_columns, mw->latency_shift); + if (rv <= 0) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Failed to construct metrics header: %d,%s", rv, file_name.data); + } + + return as_metrics_write_line(mw, data, err); +} + +static void +as_metrics_get_node_sync_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* sync) +{ + uint32_t max = node->cluster->conn_pools_per_node; + + // Sync connection summary. + for (uint32_t i = 0; i < max; i++) { + as_conn_pool* pool = &node->sync_conn_pools[i]; + + pthread_mutex_lock(&pool->lock); + uint32_t in_pool = as_queue_size(&pool->queue); + uint32_t total = pool->queue.total; + pthread_mutex_unlock(&pool->lock); + + sync->in_pool += in_pool; + sync->in_use += total - in_pool; + } + sync->opened = node->sync_conns_opened; + sync->closed = node->sync_conns_closed; +} + +static void +as_metrics_get_node_async_conn_stats(const struct as_node_s* node, struct as_conn_stats_s* async) +{ + // Async connection summary. + for (uint32_t i = 0; i < as_event_loop_size; i++) { + // Regular async. + as_conn_stats_sum(async, &node->async_conn_pools[i]); + } +} + +static void +as_metrics_write_conn(as_metrics_writer* mw, as_string_builder* sb, const struct as_conn_stats_s* stats) +{ + as_string_builder_append_uint(sb, stats->in_use); + as_string_builder_append_char(sb, ','); + as_string_builder_append_uint(sb, stats->in_pool); + as_string_builder_append_char(sb, ','); + as_string_builder_append_uint(sb, stats->opened); // Cumulative. Not reset on each interval. + as_string_builder_append_char(sb, ','); + as_string_builder_append_uint(sb, stats->closed); // Cumulative. Not reset on each interval. +} + +static void +as_metrics_write_node(as_metrics_writer* mw, as_string_builder* sb, struct as_node_s* node) +{ + as_string_builder_append_char(sb, '['); + as_string_builder_append(sb, node->name); + as_string_builder_append_char(sb, ','); + + as_string_builder_append(sb, as_node_get_address_string(node)); + as_string_builder_append_char(sb, ','); + + struct as_conn_stats_s sync; + struct as_conn_stats_s async; + as_conn_stats_init(&sync); + as_conn_stats_init(&async); + as_metrics_get_node_sync_conn_stats(node, &sync); + as_metrics_write_conn(mw, sb, &sync); + as_string_builder_append_char(sb, ','); + as_metrics_get_node_async_conn_stats(node, &async); + as_metrics_write_conn(mw, sb, &async); + as_string_builder_append_char(sb, ','); + + as_string_builder_append_uint64(sb, as_node_get_error_count(node)); + as_string_builder_append_char(sb, ','); + as_string_builder_append_uint64(sb, as_node_get_timeout_count(node)); + as_string_builder_append(sb, ",["); + + as_node_metrics* node_metrics = node->metrics; + uint32_t max = AS_LATENCY_TYPE_NONE; + + for (uint32_t i = 0; i < max; i++) { + if (i > 0) { + as_string_builder_append_char(sb, ','); + } + as_string_builder_append(sb, as_latency_type_to_string(i)); + as_string_builder_append_char(sb, '['); + + as_latency_buckets* buckets = &node_metrics->latency[i]; + uint32_t bucket_max = buckets->latency_columns; + + for (uint32_t j = 0; j < bucket_max; j++) { + if (j > 0) { + as_string_builder_append_char(sb, ','); + } + as_string_builder_append_uint64(sb, as_metrics_get_bucket(buckets, j)); + } + as_string_builder_append_char(sb, ']'); + } + as_string_builder_append(sb, "]]"); +} + +static as_status +as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, as_cluster* cluster) +{ + char* cluster_name = cluster->cluster_name; + + if (cluster_name == NULL) { + cluster_name = ""; + } + + uint32_t cpu_load = 0; + uint32_t mem = 0; + as_status result = as_metrics_process_cpu_load_mem_usage(err, mw, &cpu_load, &mem); + if (result != AEROSPIKE_OK) { + return result; + } + + char now_str[128]; + timestamp_to_string(now_str, sizeof(now_str)); + as_string_builder sb; + as_string_builder_inita(&sb, 16384, true); + as_string_builder_append(&sb, now_str); + as_string_builder_append(&sb, " cluster["); + as_string_builder_append(&sb, cluster_name); + as_string_builder_append_char(&sb, ','); + as_string_builder_append_int(&sb, cpu_load); + as_string_builder_append_char(&sb, ','); + as_string_builder_append_int(&sb, mem); + as_string_builder_append_char(&sb, ','); + as_string_builder_append_uint(&sb, cluster->invalid_node_count); // Cumulative. Not reset on each interval. + as_string_builder_append_char(&sb, ','); + as_string_builder_append_uint64(&sb, as_cluster_get_tran_count(cluster)); // Cumulative. Not reset on each interval. + as_string_builder_append_char(&sb, ','); + as_string_builder_append_uint64(&sb, cluster->retry_count); // Cumulative. Not reset on each interval. + as_string_builder_append_char(&sb, ','); + as_string_builder_append_uint64(&sb, cluster->delay_queue_timeout_count); // Cumulative. Not reset on each interval. + as_string_builder_append(&sb, ",["); + + for (uint32_t i = 0; i < as_event_loop_size; i++) { + as_event_loop* loop = &as_event_loops[i]; + if (i > 0) { + as_string_builder_append_char(&sb, ','); + } + as_string_builder_append_char(&sb, '['); + as_string_builder_append_int(&sb, as_event_loop_get_process_size(loop)); + as_string_builder_append_char(&sb, ','); + as_string_builder_append_uint(&sb, as_event_loop_get_queue_size(loop)); + as_string_builder_append_char(&sb, ']'); + } + as_string_builder_append(&sb, "],["); + + // Since this function is only called from the cluster tend thread, there is no need to reserve nodes. + as_nodes* nodes = cluster->nodes; + for (uint32_t i = 0; i < nodes->size; i++) { + as_node* node = nodes->array[i]; + + if (i > 0) { + as_string_builder_append_char(&sb, ','); + } + as_metrics_write_node(mw, &sb, node); + } + as_string_builder_append(&sb, "]]"); + + as_string_builder_append_newline(&sb); + as_status status = as_metrics_write_line(mw, sb.data, err); + as_string_builder_destroy(&sb); + return status; +} + +static void +as_metrics_writer_destroy_node_metrics(as_node* node) +{ + if (node->metrics != NULL) { + uint32_t max_latency_type = AS_LATENCY_TYPE_NONE; + for (uint32_t i = 0; i < max_latency_type; i++) { + cf_free(node->metrics->latency[i].buckets); + } + cf_free(node->metrics->latency); + cf_free(node->metrics); + node->metrics = NULL; + } +} + +static void +as_metrics_writer_destroy_nodes(as_cluster* cluster) +{ + // Free node memory + as_nodes* nodes = as_nodes_reserve(cluster); + for (uint32_t i = 0; i < nodes->size; i++) { + as_metrics_writer_destroy_node_metrics(nodes->array[i]); + } + as_nodes_release(nodes); +} + +static void +as_metrics_writer_destroy(as_metrics_writer* mw) +{ + fclose(mw->file); + cf_free(mw); +} + +//--------------------------------- +// Public Functions +//--------------------------------- + +as_status +as_metrics_writer_create(as_error* err, const as_metrics_policy* policy, as_metrics_listeners* listeners) +{ + if (policy->report_size_limit != 0 && policy->report_size_limit < MIN_FILE_SIZE) { + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "Metrics policy report_size_limit %" PRIu64 " must be at least %d", policy->report_size_limit, MIN_FILE_SIZE); + } + + as_metrics_writer* mw = cf_malloc(sizeof(as_metrics_writer)); + as_strncpy(mw->report_dir, policy->report_dir, sizeof(mw->report_dir)); + mw->file = NULL; + mw->max_size = policy->report_size_limit; + mw->size = 0; + mw->latency_columns = policy->latency_columns; + mw->latency_shift = policy->latency_shift; + mw->enable = false; + +#ifdef _MSC_VER + mw->pid = GetCurrentProcessId(); + mw->process = OpenProcess(PROCESS_QUERY_INFORMATION, false, mw->pid); + mw->prev_process_times_kernel = 0; + mw->prev_process_times_user = 0; + mw->prev_system_times_kernel = 0; + mw->prev_system_times_user = 0; + + FILETIME dummy; + if (mw->process != NULL) + { + GetProcessTimes(mw->process, &dummy, &dummy, &mw->prev_process_times_kernel, &mw->prev_process_times_user); + GetSystemTimes(0, &mw->prev_system_times_kernel, &mw->prev_system_times_user); + } +#endif + + listeners->enable_listener = as_metrics_writer_enable; + listeners->snapshot_listener = as_metrics_writer_snapshot; + listeners->node_close_listener = as_metrics_writer_node_close; + listeners->disable_listener = as_metrics_writer_disable; + listeners->udata = mw; + return AEROSPIKE_OK; +} + +as_status +as_metrics_writer_enable(as_error* err, void* udata) +{ + as_metrics_writer* mw = udata; + as_status status = as_metrics_open_writer(mw, err); + + if (status != AEROSPIKE_OK) { + return status; + } + + mw->enable = true; + return AEROSPIKE_OK; +} + +as_status +as_metrics_writer_snapshot(as_error* err, as_cluster* cluster, void* udata) +{ + as_error_reset(err); + as_metrics_writer* mw = udata; + + if (mw->enable && mw->file != NULL) { + as_status status = as_metrics_write_cluster(err, mw, cluster); + if (status != AEROSPIKE_OK) { + as_metrics_writer_destroy_nodes(cluster); + as_metrics_writer_destroy(mw); + return status; + } + uint32_t result = fflush(mw->file); + if (result != 0) { + as_metrics_writer_destroy_nodes(cluster); + as_metrics_writer_destroy(mw); + return as_error_update(err, AEROSPIKE_ERR_CLIENT, + "File stream did not flush successfully: %s", mw->report_dir); + } + } + + return AEROSPIKE_OK; +} + +as_status +as_metrics_writer_node_close(as_error* err, as_node* node, void* udata) +{ + // write node info to file + as_error_reset(err); + as_metrics_writer* mw = udata; + + if (mw->enable && mw->file != NULL) { + char now_str[128]; + timestamp_to_string(now_str, sizeof(now_str)); + + as_string_builder sb; + as_string_builder_inita(&sb, 16384, true); + as_string_builder_append(&sb, now_str); + as_metrics_write_node(mw, &sb, node); + as_string_builder_append_newline(&sb); + + as_status status = as_metrics_write_line(mw, sb.data, err); + + as_metrics_writer_destroy_node_metrics(node); + as_string_builder_destroy(&sb); + + return status; + } + + return AEROSPIKE_OK; +} + +as_status +as_metrics_writer_disable(as_error* err, as_cluster* cluster, void* udata) +{ + // write cluster into to file, disable + as_error_reset(err); + as_metrics_writer* mw = udata; + if (mw != NULL) { + if (mw->enable && mw->file != NULL) { + as_status status = as_metrics_write_cluster(err, mw, cluster); + + if (status != AEROSPIKE_OK) { + as_metrics_writer_destroy_nodes(cluster); + as_metrics_writer_destroy(mw); + return status; + } + } + as_metrics_writer_destroy_nodes(cluster); + as_metrics_writer_destroy(mw); + } + + return AEROSPIKE_OK; +} diff --git a/xcode/aerospike.xcodeproj/project.pbxproj b/xcode/aerospike.xcodeproj/project.pbxproj index 9cc60a4403..44d96a5f67 100644 --- a/xcode/aerospike.xcodeproj/project.pbxproj +++ b/xcode/aerospike.xcodeproj/project.pbxproj @@ -215,6 +215,8 @@ BFD8FE7C20CF6DFC000A80F1 /* as_query_validate.c in Sources */ = {isa = PBXBuildFile; fileRef = BFD8FE7B20CF6DFC000A80F1 /* as_query_validate.c */; }; BFE3C3991D6270C200AA7F20 /* as_address.h in Headers */ = {isa = PBXBuildFile; fileRef = BFE3C3981D6270C200AA7F20 /* as_address.h */; }; BFE3C39B1D62720800AA7F20 /* as_address.c in Sources */ = {isa = PBXBuildFile; fileRef = BFE3C39A1D62720800AA7F20 /* as_address.c */; }; + BFE8EF472B7E9C0600D0C31B /* as_metrics_writer.h in Headers */ = {isa = PBXBuildFile; fileRef = BFE8EF462B7E9C0600D0C31B /* as_metrics_writer.h */; }; + BFE8EF492B7E9C3A00D0C31B /* as_metrics_writer.c in Sources */ = {isa = PBXBuildFile; fileRef = BFE8EF482B7E9C3A00D0C31B /* as_metrics_writer.c */; }; BFEAF6322228638E00FB4248 /* as_conn_pool.h in Headers */ = {isa = PBXBuildFile; fileRef = BFEAF6312228638E00FB4248 /* as_conn_pool.h */; }; BFF344B01CDAC67700FD1976 /* as_map_operations.h in Headers */ = {isa = PBXBuildFile; fileRef = BFF344AF1CDAC67700FD1976 /* as_map_operations.h */; }; BFF344C31CEA7ACD00FD1976 /* as_list_operations.h in Headers */ = {isa = PBXBuildFile; fileRef = BFF344C21CEA7ACD00FD1976 /* as_list_operations.h */; }; @@ -432,6 +434,8 @@ BFD8FE7B20CF6DFC000A80F1 /* as_query_validate.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = as_query_validate.c; path = ../src/main/aerospike/as_query_validate.c; sourceTree = ""; }; BFE3C3981D6270C200AA7F20 /* as_address.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = as_address.h; path = ../src/include/aerospike/as_address.h; sourceTree = ""; }; BFE3C39A1D62720800AA7F20 /* as_address.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = as_address.c; path = ../src/main/aerospike/as_address.c; sourceTree = ""; }; + BFE8EF462B7E9C0600D0C31B /* as_metrics_writer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = as_metrics_writer.h; path = ../src/include/aerospike/as_metrics_writer.h; sourceTree = ""; }; + BFE8EF482B7E9C3A00D0C31B /* as_metrics_writer.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = as_metrics_writer.c; path = ../src/main/aerospike/as_metrics_writer.c; sourceTree = ""; }; BFEAF6312228638E00FB4248 /* as_conn_pool.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = as_conn_pool.h; path = ../src/include/aerospike/as_conn_pool.h; sourceTree = ""; }; BFF344AF1CDAC67700FD1976 /* as_map_operations.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = as_map_operations.h; path = ../src/include/aerospike/as_map_operations.h; sourceTree = ""; }; BFF344C21CEA7ACD00FD1976 /* as_list_operations.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = as_list_operations.h; path = ../src/include/aerospike/as_list_operations.h; sourceTree = ""; }; @@ -556,6 +560,7 @@ BFC002891901E08500CB9BC8 /* as_lookup.c */, BF90C77022AB30E40062D920 /* as_map_operations.c */, BFAF276F2B6AB39100A3858B /* as_metrics.c */, + BFE8EF482B7E9C3A00D0C31B /* as_metrics_writer.c */, BFBB3C8E192D729A00251B15 /* as_node.c */, BF2AA7C718BEBFA400E54AF3 /* as_operations.c */, BFBA916A1914344B00AADA9A /* as_partition.c */, @@ -755,6 +760,7 @@ BFC65B521C921E9E0079DF5A /* as_lookup.h */, BFF344AF1CDAC67700FD1976 /* as_map_operations.h */, BFAF276D2B6AB36A00A3858B /* as_metrics.h */, + BFE8EF462B7E9C0600D0C31B /* as_metrics_writer.h */, BFC65B531C921E9E0079DF5A /* as_node.h */, BFC65B541C921E9E0079DF5A /* as_operations.h */, BFC65B551C921E9E0079DF5A /* as_partition.h */, @@ -798,6 +804,7 @@ BFC65B791C921E9E0079DF5A /* as_job.h in Headers */, BF90C76A22AB143C0062D920 /* as_cdt_internal.h in Headers */, BF1C2ADF20BE031B00868695 /* aerospike_stats.h in Headers */, + BFE8EF472B7E9C0600D0C31B /* as_metrics_writer.h in Headers */, BFC65B731C921E9E0079DF5A /* as_command.h in Headers */, BFF344B01CDAC67700FD1976 /* as_map_operations.h in Headers */, BF809CDB24327E9300C16F3D /* as_hll_operations.h in Headers */, @@ -945,6 +952,7 @@ BF8EF4A82AE1B41100FEEC3A /* lcorolib.c in Sources */, BF32147123E8F9C6004A7E19 /* as_partition_tracker.c in Sources */, BFBA04A91947AA8400F9924E /* cf_random.c in Sources */, + BFE8EF492B7E9C3A00D0C31B /* as_metrics_writer.c in Sources */, BF2337A21B4DC8BD00670C64 /* as_double.c in Sources */, BF8EF4AC2AE1B41100FEEC3A /* ldo.c in Sources */, BF2AA7DC18BEBFA500E54AF3 /* aerospike_info.c in Sources */, From c85ba8ff4b2531c4331e876cb2fad0234c23044d Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Fri, 16 Feb 2024 14:05:35 -0500 Subject: [PATCH 39/64] Use cf_calloc to initialize as_metrics_writer struct. Ignore certain Visual Studio warning messages. --- src/main/aerospike/as_metrics_writer.c | 8 +------- vs/aerospike/aerospike.vcxproj | 2 ++ vs/aerospike/aerospike.vcxproj.filters | 6 ++++++ vs/props/base.props | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/main/aerospike/as_metrics_writer.c b/src/main/aerospike/as_metrics_writer.c index df1f9b4569..1003190d84 100644 --- a/src/main/aerospike/as_metrics_writer.c +++ b/src/main/aerospike/as_metrics_writer.c @@ -593,11 +593,9 @@ as_metrics_writer_create(as_error* err, const as_metrics_policy* policy, as_metr "Metrics policy report_size_limit %" PRIu64 " must be at least %d", policy->report_size_limit, MIN_FILE_SIZE); } - as_metrics_writer* mw = cf_malloc(sizeof(as_metrics_writer)); + as_metrics_writer* mw = cf_calloc(1, sizeof(as_metrics_writer)); as_strncpy(mw->report_dir, policy->report_dir, sizeof(mw->report_dir)); - mw->file = NULL; mw->max_size = policy->report_size_limit; - mw->size = 0; mw->latency_columns = policy->latency_columns; mw->latency_shift = policy->latency_shift; mw->enable = false; @@ -605,10 +603,6 @@ as_metrics_writer_create(as_error* err, const as_metrics_policy* policy, as_metr #ifdef _MSC_VER mw->pid = GetCurrentProcessId(); mw->process = OpenProcess(PROCESS_QUERY_INFORMATION, false, mw->pid); - mw->prev_process_times_kernel = 0; - mw->prev_process_times_user = 0; - mw->prev_system_times_kernel = 0; - mw->prev_system_times_user = 0; FILETIME dummy; if (mw->process != NULL) diff --git a/vs/aerospike/aerospike.vcxproj b/vs/aerospike/aerospike.vcxproj index 113721eadc..6f483659c7 100644 --- a/vs/aerospike/aerospike.vcxproj +++ b/vs/aerospike/aerospike.vcxproj @@ -399,6 +399,7 @@ + @@ -551,6 +552,7 @@ + diff --git a/vs/aerospike/aerospike.vcxproj.filters b/vs/aerospike/aerospike.vcxproj.filters index 0d375a0167..cdb86cd2f4 100644 --- a/vs/aerospike/aerospike.vcxproj.filters +++ b/vs/aerospike/aerospike.vcxproj.filters @@ -498,6 +498,9 @@ Header Files + + Header Files + @@ -947,6 +950,9 @@ Source Files + + Source Files + diff --git a/vs/props/base.props b/vs/props/base.props index 5f458bb22d..ad6ec7d48d 100644 --- a/vs/props/base.props +++ b/vs/props/base.props @@ -8,7 +8,7 @@ _CRT_SECURE_NO_DEPRECATE;_TIMESPEC_DEFINED;%(PreprocessorDefinitions) - 4098;4996 + 4098;4996;6255;6262;26451 From 5d126c0dff36bfb0a60ee02cbe6a7af09e9302ab Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Tue, 20 Feb 2024 09:26:28 -0700 Subject: [PATCH 40/64] Working on error cases --- .../async_batch_get/src/main/example.c | 25 ------ .../basic_examples/append/src/main/example.c | 26 +----- .../batch_examples/get/src/main/example.c | 28 +----- src/include/aerospike/as_cluster.h | 87 +++++++++++++++++++ src/main/aerospike/aerospike_batch.c | 6 ++ src/main/aerospike/aerospike_query.c | 1 + src/main/aerospike/aerospike_scan.c | 1 + src/main/aerospike/as_cluster.c | 6 +- src/main/aerospike/as_command.c | 2 + src/main/aerospike/as_event.c | 5 ++ src/main/aerospike/as_metrics_writer.c | 13 +-- src/test/aerospike_query/query_async.c | 2 +- src/test/aerospike_test.c | 7 -- 13 files changed, 114 insertions(+), 95 deletions(-) diff --git a/examples/async_examples/async_batch_get/src/main/example.c b/examples/async_examples/async_batch_get/src/main/example.c index ecb675ff6b..f6d043036a 100644 --- a/examples/async_examples/async_batch_get/src/main/example.c +++ b/examples/async_examples/async_batch_get/src/main/example.c @@ -28,8 +28,6 @@ #include #include #include -#include -#include #include "example_utils.h" @@ -77,28 +75,6 @@ main(int argc, char* argv[]) // Connect to the aerospike database cluster. example_connect_to_aerospike(&as); - as_error err; - as_error_reset(&err); - as_metrics_policy policy; -#ifdef _MSC_VER - char report_dir[] = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; -#else - char report_dir[] = "/home/sklaus/metrics"; -#endif - as_metrics_policy_init(&policy); - as_metrics_policy_set_report_dir(&policy, report_dir); - policy.interval = 5; - - - - // enable metrics - as_status status = aerospike_enable_metrics(&as, &err, &policy); - - if (status != AEROSPIKE_OK) { - LOG("aerospike_enable_metrics() returned %d - %s", err.code, err.message); - exit(-1); - } - // Start clean. example_remove_test_records(&as); @@ -110,7 +86,6 @@ main(int argc, char* argv[]) // Wait till commands have completed before shutting down. as_monitor_wait(&monitor); - as_sleep(30000); // Cleanup and shutdown. example_remove_test_records(&as); diff --git a/examples/basic_examples/append/src/main/example.c b/examples/basic_examples/append/src/main/example.c index d40c463fd1..d64671a95d 100644 --- a/examples/basic_examples/append/src/main/example.c +++ b/examples/basic_examples/append/src/main/example.c @@ -34,8 +34,6 @@ #include #include #include -#include -#include #include "example_utils.h" @@ -56,30 +54,10 @@ main(int argc, char* argv[]) aerospike as; example_connect_to_aerospike(&as); - as_error err; - as_error_reset(&err); - as_metrics_policy policy; -#ifdef _MSC_VER - char report_dir[] = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; -#else - char report_dir[] = "/home/sklaus/metrics"; -#endif - as_metrics_policy_init(&policy); - as_metrics_policy_set_report_dir(&policy, report_dir); - policy.interval = 5; - - // enable metrics - as_status status = aerospike_enable_metrics(&as, &err, &policy); - - if (status != AEROSPIKE_OK) { - LOG("aerospike_enable_metrics() returned %d - %s", err.code, err.message); - exit(-1); - } - // Start clean. example_remove_test_record(&as); - //as_error err; + as_error err; // Create an as_operations object with three concatenation operations. // Generally, if using as_operations_inita(), we won't need to destroy the @@ -146,8 +124,6 @@ main(int argc, char* argv[]) LOG("as_operations object to apply to database:"); example_dump_operations(&ops); - as_sleep(30000); - // Try to apply the operations. This will fail, since we can't append a // string value to an existing bin with "raw" value. Note that if any // operation in the transaction is rejected, none will be applied. diff --git a/examples/batch_examples/get/src/main/example.c b/examples/batch_examples/get/src/main/example.c index 9113248a0d..c50096ac61 100644 --- a/examples/batch_examples/get/src/main/example.c +++ b/examples/batch_examples/get/src/main/example.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -30,10 +29,8 @@ #include #include #include -#include #include #include "example_utils.h" -#include //------------------------------------ // Forward Declarations @@ -72,27 +69,6 @@ main(int argc, char* argv[]) aerospike as; example_connect_to_aerospike(&as); - as_error err; - as_error_reset(&err); - as_metrics_policy policy; -#ifdef _MSC_VER - char report_dir[] = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; -#else - char report_dir[] = "/home/sklaus/metrics"; -#endif - as_metrics_policy_init(&policy); - as_metrics_policy_set_report_dir(&policy, report_dir); - policy.interval = 5; - - // enable metrics - as_status status = aerospike_enable_metrics(&as, &err, &policy); - - if (status != AEROSPIKE_OK) { - LOG("aerospike_enable_metrics() returned %d - %s", err.code, err.message); - cleanup(&as); - exit(-1); - } - // Start clean. example_remove_test_records(&as); @@ -101,7 +77,7 @@ main(int argc, char* argv[]) exit(-1); } - //as_error err; + as_error err; // Make a batch of all the keys we inserted. as_batch batch; @@ -219,8 +195,6 @@ main(int argc, char* argv[]) exit(-1); } - as_sleep(10000); - // Cleanup and disconnect from the database cluster. cleanup(&as); diff --git a/src/include/aerospike/as_cluster.h b/src/include/aerospike/as_cluster.h index c66693b7dc..1517285097 100644 --- a/src/include/aerospike/as_cluster.h +++ b/src/include/aerospike/as_cluster.h @@ -381,20 +381,72 @@ typedef struct as_cluster_s { */ volatile bool valid; + /** + * @private + * Is metrics colleciton enabled. + */ bool metrics_enabled; + /** + * @private + * Number of cluster tend iterations between metrics notification events. One tend iteration + * is defined as as_config.tender_interval (default 1 second) plus the time to tend all + * nodes. This is set using as_policy_metrics. + */ uint32_t metrics_interval; + /** + * @private + * Number of elapsed time range buckets in latency histograms. This is set using as_policy_metrics. + */ uint32_t metrics_latency_columns; + /** + * @private + * Power of 2 multiple between each range bucket in latency histograms starting at column 3. The bucket units + * are in milliseconds. The first 2 buckets are "<=1ms" and ">1ms". Examples: + * + * ~~~~~~~~~~{.c} + * // latencyColumns=7 latencyShift=1 + * <=1ms >1ms >2ms >4ms >8ms >16ms >32ms + * + * // latencyColumns=5 latencyShift=3 + * <=1ms >1ms >8ms >64ms >512ms + * ~~~~~~~~~~ + * + * This is set using as_policy_metrics. + */ uint32_t metrics_latency_shift; + /** + * @private + * Listeners that handles metrics notification events. The default listener implementation + * writes the metrics snapshot to a file which will later be read and forwarded to + * OpenTelemetry by a separate offline application. + *

+ * The listener could be overridden to send the metrics snapshot directly to OpenTelemetry. + * + * This is set using as_policy_metrics. + */ as_metrics_listeners metrics_listeners; + /** + * @private + * Transaction retry count. There can be multiple retries for a single transaction. + * The value is cumulative and not reset per metrics interval. + */ uint64_t retry_count; + /** + * @private + * Transaction count. The value is cumulative and not reset per metrics interval. + */ uint64_t tran_count; + /** + * @private + * Delay queue timeout count. The value is cumulative and not reset per metrics interval. + */ uint64_t delay_queue_timeout_count; } as_cluster; @@ -564,6 +616,41 @@ as_cluster_add_tran(as_cluster* cluster); uint64_t as_cluster_get_tran_count(const as_cluster* cluster); +/** + * @private + * Increment async delay queue timeout count. + */ +void +as_cluster_add_retry(as_cluster* cluster); + +/** + * @private + * Add transaction retry count. There can be multiple retries for a single transaction. + */ +void +as_cluster_add_retries(as_cluster* cluster, uint32_t count); + +/** + * @private + * Return transaction retry count. The value is cumulative and not reset per metrics interval. + */ +uint64_t +as_cluster_get_retry_count(const as_cluster* cluster); + +/** + * @private + * Increment async delay queue timeout count. + */ +void +as_cluster_add_delay_queue_timeout(as_cluster* cluster); + +/** + * @private + * Return async delay queue timeout count. + */ +uint64_t +as_cluster_get_delay_queue_timeout_count(const as_cluster* cluster); + /** * @private * Get mapped node given partition and replica. This function does not reserve the node. diff --git a/src/main/aerospike/aerospike_batch.c b/src/main/aerospike/aerospike_batch.c index c62bb2fdad..146e8f739b 100644 --- a/src/main/aerospike/aerospike_batch.c +++ b/src/main/aerospike/aerospike_batch.c @@ -2730,6 +2730,8 @@ as_batch_retry_records(as_batch_task_records* btr, as_command* parent, as_error* as_vector batch_nodes; as_vector_inita(&batch_nodes, sizeof(as_batch_node), n_nodes); + as_cluster_add_retries(cluster, batch_nodes.size); + // Create initial key capacity for each node as average + 25%. uint32_t offsets_size = task->offsets.size; uint32_t offsets_capacity = offsets_size / n_nodes; @@ -2816,6 +2818,8 @@ as_batch_retry_keys(as_batch_task_keys* btk, as_command* parent, as_error* err) as_vector batch_nodes; as_vector_inita(&batch_nodes, sizeof(as_batch_node), n_nodes); + as_cluster_add_retries(cluster, batch_nodes.size); + as_status status = AEROSPIKE_OK; // Create initial key capacity for each node as average + 25%. @@ -3148,6 +3152,8 @@ as_batch_retry_async(as_event_command* parent, bool timeout) as_vector bnodes; as_vector_inita(&bnodes, sizeof(as_batch_retry_node), n_nodes); + as_cluster_add_retries(cluster, bnodes.size); + as_batch_replica rep; rep.replica = be->replica; rep.replica_sc = be->replica_sc; diff --git a/src/main/aerospike/aerospike_query.c b/src/main/aerospike/aerospike_query.c index efa91540c8..a9cb380b1a 100644 --- a/src/main/aerospike/aerospike_query.c +++ b/src/main/aerospike/aerospike_query.c @@ -1878,6 +1878,7 @@ as_query_partition_retry_async(as_async_query_executor* qe_old, as_error* err) ee->queued = 0; ee->notify = true; ee->valid = true; + as_cluster_add_retry(qe->cluster); return as_query_partition_execute_async(qe, qe->pt, err); } diff --git a/src/main/aerospike/aerospike_scan.c b/src/main/aerospike/aerospike_scan.c index a054e6c14e..ecbe10f807 100644 --- a/src/main/aerospike/aerospike_scan.c +++ b/src/main/aerospike/aerospike_scan.c @@ -1167,6 +1167,7 @@ as_scan_partition_retry_async(as_async_scan_executor* se_old, as_error* err) ee->queued = 0; ee->notify = true; ee->valid = true; + as_cluster_add_retry(se->cluster); return as_scan_partition_execute_async(se, se->pt, err); } diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index b18c00e271..da6ad2d8a6 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -677,7 +677,11 @@ as_cluster_remove_nodes(as_error* err, as_cluster* cluster, as_vector* /* metrics_enabled) { - cluster->metrics_listeners.node_close_listener(err, node, node->cluster->metrics_listeners.udata); + as_status status = cluster->metrics_listeners.node_close_listener(err, node, node->cluster->metrics_listeners.udata); + if (status != AEROSPIKE_OK) { + as_log_warn("Metrics error: %s %s", as_error_string(status), err->message); + } + as_error_reset(err); } as_node_deactivate(node); } diff --git a/src/main/aerospike/as_command.c b/src/main/aerospike/as_command.c index 97a7fb0b61..5deeccbcc1 100644 --- a/src/main/aerospike/as_command.c +++ b/src/main/aerospike/as_command.c @@ -783,6 +783,8 @@ as_command_execute(as_command* cmd, as_error* err) return status; } } + + as_cluster_add_retry(cmd->cluster); } // Retries have been exhausted. diff --git a/src/main/aerospike/as_event.c b/src/main/aerospike/as_event.c index 4ced942218..19e8bf1b3c 100644 --- a/src/main/aerospike/as_event.c +++ b/src/main/aerospike/as_event.c @@ -852,6 +852,10 @@ as_event_delay_timeout(as_event_command* cmd) // Notify user, but do not destroy command. as_event_notify_error(cmd, &err); + if (cmd->latency_type != AS_LATENCY_TYPE_NONE) + { + as_cluster_add_delay_queue_timeout(cmd->cluster); + } } void @@ -980,6 +984,7 @@ as_event_execute_retry(as_event_command* cmd) } // Retry command. + as_cluster_add_retry(cmd->cluster); as_event_command_begin(cmd->event_loop, cmd); } diff --git a/src/main/aerospike/as_metrics_writer.c b/src/main/aerospike/as_metrics_writer.c index 1003190d84..efc703f6bf 100644 --- a/src/main/aerospike/as_metrics_writer.c +++ b/src/main/aerospike/as_metrics_writer.c @@ -26,8 +26,6 @@ #define MIN_FILE_SIZE 1000000 -#define LOG(_fmt, ...) { printf(_fmt "\n", ##__VA_ARGS__); fflush(stdout); } - #ifdef _MSC_VER static char as_dir_sep = '\\'; #else @@ -93,6 +91,7 @@ as_metrics_proc_stat_mem_cpu(as_error* err, double* vm_usage, double* resident_s struct sysinfo info; int success = sysinfo(&info); + success = 3; if (success != 0) { return as_error_update(err, AEROSPIKE_ERR_CLIENT, "Error calculating CPU usage"); @@ -114,7 +113,6 @@ as_metrics_process_cpu_load_mem_usage(as_error* err, as_metrics_writer* mw, uint return result; } - LOG("cpu %f", cpu_usage_d); cpu_usage_d = cpu_usage_d + 0.5 - (cpu_usage_d < 0); mem_d = mem_d + 0.5 - (mem_d < 0); *cpu_usage = (uint32_t)cpu_usage_d; @@ -286,6 +284,7 @@ static as_status as_metrics_process_cpu_load_mem_usage(as_error* err, as_metrics_writer* mw, uint32_t* cpu_usage, uint32_t* mem) { double cpu_usage_d = as_metrics_process_cpu_load(mw); + cpu_usage_d = -1.0; if (cpu_usage_d < 0) { return as_error_update(err, AEROSPIKE_ERR_CLIENT, "Error calculating CPU usage"); @@ -513,9 +512,9 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, as_cluster* clust as_string_builder_append_char(&sb, ','); as_string_builder_append_uint64(&sb, as_cluster_get_tran_count(cluster)); // Cumulative. Not reset on each interval. as_string_builder_append_char(&sb, ','); - as_string_builder_append_uint64(&sb, cluster->retry_count); // Cumulative. Not reset on each interval. + as_string_builder_append_uint64(&sb, as_cluster_get_retry_count(cluster)); // Cumulative. Not reset on each interval. as_string_builder_append_char(&sb, ','); - as_string_builder_append_uint64(&sb, cluster->delay_queue_timeout_count); // Cumulative. Not reset on each interval. + as_string_builder_append_uint64(&sb, as_cluster_get_delay_queue_timeout_count(cluster)); // Cumulative. Not reset on each interval. as_string_builder_append(&sb, ",["); for (uint32_t i = 0; i < as_event_loop_size; i++) { @@ -643,14 +642,10 @@ as_metrics_writer_snapshot(as_error* err, as_cluster* cluster, void* udata) if (mw->enable && mw->file != NULL) { as_status status = as_metrics_write_cluster(err, mw, cluster); if (status != AEROSPIKE_OK) { - as_metrics_writer_destroy_nodes(cluster); - as_metrics_writer_destroy(mw); return status; } uint32_t result = fflush(mw->file); if (result != 0) { - as_metrics_writer_destroy_nodes(cluster); - as_metrics_writer_destroy(mw); return as_error_update(err, AEROSPIKE_ERR_CLIENT, "File stream did not flush successfully: %s", mw->report_dir); } diff --git a/src/test/aerospike_query/query_async.c b/src/test/aerospike_query/query_async.c index f4ff9ed459..c7bef22b7d 100644 --- a/src/test/aerospike_query/query_async.c +++ b/src/test/aerospike_query/query_async.c @@ -178,7 +178,7 @@ TEST(query_async_quit_early, "normal query and quit early") as_query_destroy(&q); - assert_int_eq(status, AEROSPIKE_OK); + assert_int_eq(status, AEROSPIKE_OK); as_monitor_wait(&monitor); } diff --git a/src/test/aerospike_test.c b/src/test/aerospike_test.c index 3d427453a1..e02d210dcb 100644 --- a/src/test/aerospike_test.c +++ b/src/test/aerospike_test.c @@ -383,17 +383,10 @@ static bool before(atf_plan* plan) } as_metrics_policy policy; -#ifdef _MSC_VER - char report_dir[] = "C:\\Users\\sklaus\\repos\\aerospike-client-c\\src\\test"; -#else - char report_dir[] = "/home/sklaus/metrics"; -#endif as_metrics_policy_init(&policy); - as_metrics_policy_set_report_dir(&policy, report_dir); policy.interval = 5; policy.report_size_limit = 1000000; - // enable metrics as_status status = aerospike_enable_metrics(as, &err, &policy); From a20aa2175d18a5a875acc134402386ded28c99d3 Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Tue, 20 Feb 2024 11:39:30 -0700 Subject: [PATCH 41/64] Fix for unit tests stopping unexpectedly --- src/main/aerospike/aerospike.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/aerospike/aerospike.c b/src/main/aerospike/aerospike.c index 324658feee..5e37a3dad7 100644 --- a/src/main/aerospike/aerospike.c +++ b/src/main/aerospike/aerospike.c @@ -252,7 +252,11 @@ aerospike_close(aerospike* as, as_error* err) if (cluster) { if (cluster->metrics_enabled) { - aerospike_disable_metrics(as, err); + as_status status = aerospike_disable_metrics(as, err); + if (status != AEROSPIKE_OK) { + as_log_warn("Metrics error: %s %s", as_error_string(status), err->message); + } + as_error_reset(err); } if (as_event_loop_size > 0 && !as_event_single_thread) { From c9ac44938d899dacad4a614a4545b595408a9e70 Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Tue, 20 Feb 2024 14:58:33 -0500 Subject: [PATCH 42/64] Do not exit(-1) in aerospike_test.c --- src/main/aerospike/as_cluster.c | 2 +- src/test/aerospike_test.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index da6ad2d8a6..b8c62e5585 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -785,7 +785,7 @@ as_cluster_manage(as_cluster* cluster) as_status status = cluster->metrics_listeners.snapshot_listener(&err, cluster, cluster->metrics_listeners.udata); if (status != AEROSPIKE_OK) { - as_log_warn("Metrics error: %s %s", as_error_string(status), err.message); + as_log_warn("Metrics error: %d %s", err.code, err.message); } } } diff --git a/src/test/aerospike_test.c b/src/test/aerospike_test.c index e02d210dcb..fc88576da5 100644 --- a/src/test/aerospike_test.c +++ b/src/test/aerospike_test.c @@ -381,6 +381,7 @@ static bool before(atf_plan* plan) return false; } } + cf_free(result); as_metrics_policy policy; as_metrics_policy_init(&policy); @@ -392,10 +393,9 @@ static bool before(atf_plan* plan) if (status != AEROSPIKE_OK) { error("aerospike_enable_metrics() returned %d - %s", err.code, err.message); - exit(-1); + return false; } - - cf_free(result); + return true; } From 287dc63c60a52d7bf66e5741fec5e2b808bb03ea Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Tue, 20 Feb 2024 15:04:38 -0500 Subject: [PATCH 43/64] Only reset error if a previous error was set. --- src/main/aerospike/aerospike.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/aerospike/aerospike.c b/src/main/aerospike/aerospike.c index 5e37a3dad7..0017ba9324 100644 --- a/src/main/aerospike/aerospike.c +++ b/src/main/aerospike/aerospike.c @@ -253,10 +253,11 @@ aerospike_close(aerospike* as, as_error* err) if (cluster) { if (cluster->metrics_enabled) { as_status status = aerospike_disable_metrics(as, err); + if (status != AEROSPIKE_OK) { as_log_warn("Metrics error: %s %s", as_error_string(status), err->message); + as_error_reset(err); } - as_error_reset(err); } if (as_event_loop_size > 0 && !as_event_single_thread) { From 3d280ee5ed9133ebab9207b42c6dfce5d775c052 Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Tue, 20 Feb 2024 15:10:08 -0500 Subject: [PATCH 44/64] To be consistent revert back to older tend warning message format when calling snapshot listener. --- src/main/aerospike/as_cluster.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index b8c62e5585..da6ad2d8a6 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -785,7 +785,7 @@ as_cluster_manage(as_cluster* cluster) as_status status = cluster->metrics_listeners.snapshot_listener(&err, cluster, cluster->metrics_listeners.udata); if (status != AEROSPIKE_OK) { - as_log_warn("Metrics error: %d %s", err.code, err.message); + as_log_warn("Metrics error: %s %s", as_error_string(status), err.message); } } } From e8d11cb751b4fb91367f5fd69e6ddca448cc7aca Mon Sep 17 00:00:00 2001 From: Shannon Klaus Date: Wed, 21 Feb 2024 13:52:10 -0700 Subject: [PATCH 45/64] Changes after error testing, disable metrics in unit tests --- src/main/aerospike/as_cluster.c | 7 +++---- src/main/aerospike/as_metrics_writer.c | 3 +-- src/test/aerospike_test.c | 12 +++--------- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index da6ad2d8a6..f1491b9ec0 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -559,11 +559,10 @@ as_status as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_metrics_policy* policy) { if (cluster->metrics_enabled) { - cluster->metrics_listeners.disable_listener(err, cluster, cluster->metrics_listeners.udata); + as_cluster_disable_metrics(err, cluster); } as_status status = AEROSPIKE_OK; - as_error_reset(err); if (policy->metrics_listeners.enable_listener) { @@ -599,6 +598,7 @@ as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_metrics_policy* } as_nodes_release(nodes); + cluster->metrics_enabled = true; status = cluster->metrics_listeners.enable_listener(err, cluster->metrics_listeners.udata); if (status != AEROSPIKE_OK) { @@ -606,7 +606,6 @@ as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_metrics_policy* return status; } - cluster->metrics_enabled = true; return status; } @@ -680,8 +679,8 @@ as_cluster_remove_nodes(as_error* err, as_cluster* cluster, as_vector* /* metrics_listeners.node_close_listener(err, node, node->cluster->metrics_listeners.udata); if (status != AEROSPIKE_OK) { as_log_warn("Metrics error: %s %s", as_error_string(status), err->message); + as_error_reset(err); } - as_error_reset(err); } as_node_deactivate(node); } diff --git a/src/main/aerospike/as_metrics_writer.c b/src/main/aerospike/as_metrics_writer.c index efc703f6bf..611caf498b 100644 --- a/src/main/aerospike/as_metrics_writer.c +++ b/src/main/aerospike/as_metrics_writer.c @@ -91,7 +91,6 @@ as_metrics_proc_stat_mem_cpu(as_error* err, double* vm_usage, double* resident_s struct sysinfo info; int success = sysinfo(&info); - success = 3; if (success != 0) { return as_error_update(err, AEROSPIKE_ERR_CLIENT, "Error calculating CPU usage"); @@ -284,7 +283,6 @@ static as_status as_metrics_process_cpu_load_mem_usage(as_error* err, as_metrics_writer* mw, uint32_t* cpu_usage, uint32_t* mem) { double cpu_usage_d = as_metrics_process_cpu_load(mw); - cpu_usage_d = -1.0; if (cpu_usage_d < 0) { return as_error_update(err, AEROSPIKE_ERR_CLIENT, "Error calculating CPU usage"); @@ -668,6 +666,7 @@ as_metrics_writer_node_close(as_error* err, as_node* node, void* udata) as_string_builder sb; as_string_builder_inita(&sb, 16384, true); as_string_builder_append(&sb, now_str); + as_string_builder_append_char(&sb, ' '); as_metrics_write_node(mw, &sb, node); as_string_builder_append_newline(&sb); diff --git a/src/test/aerospike_test.c b/src/test/aerospike_test.c index fc88576da5..6f7bef2e4f 100644 --- a/src/test/aerospike_test.c +++ b/src/test/aerospike_test.c @@ -25,7 +25,6 @@ #include #include #include -#include #include "test.h" #include "aerospike_test.h" @@ -383,18 +382,15 @@ static bool before(atf_plan* plan) } cf_free(result); - as_metrics_policy policy; + // enable metrics + /*as_metrics_policy policy; as_metrics_policy_init(&policy); - policy.interval = 5; - policy.report_size_limit = 1000000; - // enable metrics as_status status = aerospike_enable_metrics(as, &err, &policy); if (status != AEROSPIKE_OK) { error("aerospike_enable_metrics() returned %d - %s", err.code, err.message); - return false; - } + }*/ return true; } @@ -406,8 +402,6 @@ static bool after(atf_plan* plan) return false; } - //as_sleep(5*60000); - as_error err; as_error_reset(&err); From 5deb361d0fb22d16a825e64a67422b38493126dc Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Wed, 21 Feb 2024 17:48:18 -0500 Subject: [PATCH 46/64] Move latency bucket structs to as_latency.{c,h} Move private node metrics to as_node.c Temporarily enable metrics in test. TODO: Comment out enable metrics when finished with new testing. --- Makefile | 1 + src/include/aerospike/as_latency.h | 66 ++++++++++++++++ src/include/aerospike/as_metrics.h | 76 +----------------- src/include/aerospike/as_node.h | 13 ++- src/main/aerospike/as_latency.c | 42 ++++++++++ src/main/aerospike/as_metrics.c | 96 +---------------------- src/main/aerospike/as_metrics_writer.c | 2 +- src/main/aerospike/as_node.c | 57 +++++++++++++- src/test/aerospike_test.c | 7 +- xcode/aerospike.xcodeproj/project.pbxproj | 8 ++ 10 files changed, 189 insertions(+), 179 deletions(-) create mode 100644 src/include/aerospike/as_latency.h create mode 100644 src/main/aerospike/as_latency.c diff --git a/Makefile b/Makefile index fbe28616cd..49b21ef60b 100644 --- a/Makefile +++ b/Makefile @@ -133,6 +133,7 @@ AEROSPIKE += as_host.o AEROSPIKE += as_info.o AEROSPIKE += as_job.o AEROSPIKE += as_key.o +AEROSPIKE += as_latency.o AEROSPIKE += as_list_operations.o AEROSPIKE += as_lookup.o AEROSPIKE += as_map_operations.o diff --git a/src/include/aerospike/as_latency.h b/src/include/aerospike/as_latency.h new file mode 100644 index 0000000000..60d964e8a3 --- /dev/null +++ b/src/include/aerospike/as_latency.h @@ -0,0 +1,66 @@ +/* + * Copyright 2008-2024 Aerospike, Inc. + * + * Portions may be licensed to Aerospike, Inc. under one or more contributor + * license agreements. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +//--------------------------------- +// Types +//--------------------------------- + +typedef uint8_t as_latency_type; + +#define AS_LATENCY_TYPE_CONN 0 +#define AS_LATENCY_TYPE_WRITE 1 +#define AS_LATENCY_TYPE_READ 2 +#define AS_LATENCY_TYPE_BATCH 3 +#define AS_LATENCY_TYPE_QUERY 4 +#define AS_LATENCY_TYPE_NONE 5 + +/** + * Latency buckets for a transaction group. + * Latency bucket counts are cumulative and not reset on each metrics snapshot interval + */ +typedef struct as_latency_buckets_s { + uint64_t* buckets; + uint32_t latency_shift; + uint32_t latency_columns; +} as_latency_buckets; + +//--------------------------------- +// Functions +//--------------------------------- + +static inline uint64_t +as_latency_get_bucket(as_latency_buckets* buckets, uint32_t i) +{ + return as_load_uint64(&buckets->buckets[i]); +} + +/** + * Convert latency_type to string version for printing to the output file + */ +AS_EXTERN char* +as_latency_type_to_string(as_latency_type type); + +#ifdef __cplusplus +} // end extern "C" +#endif diff --git a/src/include/aerospike/as_metrics.h b/src/include/aerospike/as_metrics.h index 68328d57dd..6a58443c13 100644 --- a/src/include/aerospike/as_metrics.h +++ b/src/include/aerospike/as_metrics.h @@ -27,21 +27,6 @@ extern "C" { #endif -//--------------------------------- -// Macros -//--------------------------------- - -#define NS_TO_MS 1000000 - -typedef uint8_t as_latency_type; - -#define AS_LATENCY_TYPE_CONN 0 -#define AS_LATENCY_TYPE_WRITE 1 -#define AS_LATENCY_TYPE_READ 2 -#define AS_LATENCY_TYPE_BATCH 3 -#define AS_LATENCY_TYPE_QUERY 4 -#define AS_LATENCY_TYPE_NONE 5 - //--------------------------------- // Types //--------------------------------- @@ -94,7 +79,7 @@ typedef struct as_metrics_listeners_s { /** * Client periodic metrics configuration. */ -typedef struct as_policy_metrics_s { +typedef struct as_metrics_policy_s { /** * Listeners that handles metrics notification events. The default listener implementation * writes the metrics snapshot to a file which will later be read and forwarded to @@ -156,23 +141,6 @@ typedef struct as_policy_metrics_s { uint32_t latency_shift; } as_metrics_policy; -/** - * Latency buckets for a transaction group. - * Latency bucket counts are cumulative and not reset on each metrics snapshot interval - */ -typedef struct as_latency_buckets_s { - uint64_t* buckets; - uint32_t latency_shift; - uint32_t latency_columns; -} as_latency_buckets; - -/** - * Node metrics latency bucket struct - */ -typedef struct as_node_metrics_s { - as_latency_buckets* latency; -} as_node_metrics; - //--------------------------------- // Functions //--------------------------------- @@ -218,48 +186,6 @@ aerospike_enable_metrics(aerospike* as, as_error* err, as_metrics_policy* policy AS_EXTERN as_status aerospike_disable_metrics(aerospike* as, as_error* err); -/** - * Convert latency_type to string version for printing to the output file - */ -char* -as_latency_type_to_string(as_latency_type type); - -/** - * Initalize latency bucket struct - */ -void -as_metrics_latency_buckets_init(as_latency_buckets* latency_buckets, uint32_t latency_columns, uint32_t latency_shift); - -/** - * Return cumulative count of a bucket. - */ -uint64_t -as_metrics_get_bucket(as_latency_buckets* buckets, uint32_t i); - -/** - * Increment count of bucket corresponding to the elapsed time in nanoseconds. - */ -void -as_metrics_latency_buckets_add(as_latency_buckets* latency_buckets, uint64_t elapsed); - -/** - * Determine which index of bucket the elapsed time belongs in - */ -uint32_t -as_metrics_get_index(as_latency_buckets* latency_buckets, uint64_t elapsed_nanos); - -/** - * Initalize node metrics struct - */ -as_node_metrics* -as_node_metrics_init(uint32_t latency_columns, uint32_t latency_shift); - -/** - * Add latency to corresponding bucket type - */ -void -as_metrics_add_latency(as_node_metrics* node_metrics, as_latency_type latency_type, uint64_t elapsed); - #ifdef __cplusplus } // end extern "C" #endif diff --git a/src/include/aerospike/as_node.h b/src/include/aerospike/as_node.h index cd15b84363..73a25c2d79 100644 --- a/src/include/aerospike/as_node.h +++ b/src/include/aerospike/as_node.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include @@ -206,6 +206,13 @@ typedef struct as_async_conn_pool_s { } as_async_conn_pool; +/** + * Node metrics latency bucket struct + */ +typedef struct as_node_metrics_s { + as_latency_buckets* latency; +} as_node_metrics; + struct as_cluster_s; /** @@ -662,12 +669,14 @@ as_node_has_rack(as_node* node, const char* ns, int rack_id); void as_node_add_latency(as_node* node, as_latency_type latency_type, uint64_t elapsed); +struct as_metrics_policy_s; + /** * @private * Enable metrics at the node level */ void -as_node_enable_metrics(as_node* node, const as_metrics_policy* policy); +as_node_enable_metrics(as_node* node, const struct as_metrics_policy_s* policy); /** * Return transaction error count. The value is cumulative and not reset per metrics interval. diff --git a/src/main/aerospike/as_latency.c b/src/main/aerospike/as_latency.c new file mode 100644 index 0000000000..5372a325cc --- /dev/null +++ b/src/main/aerospike/as_latency.c @@ -0,0 +1,42 @@ +/* + * Copyright 2008-2024 Aerospike, Inc. + * + * Portions may be licensed to Aerospike, Inc. under one or more contributor + * license agreements. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +#include + +char* +as_latency_type_to_string(as_latency_type type) +{ + switch (type) { + case AS_LATENCY_TYPE_CONN: + return "conn"; + + case AS_LATENCY_TYPE_WRITE: + return "write"; + + case AS_LATENCY_TYPE_READ: + return "read"; + + case AS_LATENCY_TYPE_BATCH: + return "batch"; + + case AS_LATENCY_TYPE_QUERY: + return "query"; + + default: + case AS_LATENCY_TYPE_NONE: + return "none"; + } +} diff --git a/src/main/aerospike/as_metrics.c b/src/main/aerospike/as_metrics.c index 90a2c377d1..3daaa41a43 100644 --- a/src/main/aerospike/as_metrics.c +++ b/src/main/aerospike/as_metrics.c @@ -25,7 +25,7 @@ //--------------------------------- as_status -aerospike_enable_metrics(aerospike* as, as_error* err, struct as_policy_metrics_s* policy) +aerospike_enable_metrics(aerospike* as, as_error* err, as_metrics_policy* policy) { return as_cluster_enable_metrics(err, as->cluster, policy); } @@ -50,97 +50,3 @@ as_metrics_policy_init(as_metrics_policy* policy) policy->metrics_listeners.disable_listener = NULL; policy->metrics_listeners.udata = NULL; } - -char* -as_latency_type_to_string(as_latency_type type) -{ - switch (type) { - case AS_LATENCY_TYPE_CONN: - return "conn"; - break; - case AS_LATENCY_TYPE_WRITE: - return "write"; - break; - case AS_LATENCY_TYPE_READ: - return "read"; - break; - case AS_LATENCY_TYPE_BATCH: - return "batch"; - break; - case AS_LATENCY_TYPE_QUERY: - return "query"; - break; - case AS_LATENCY_TYPE_NONE: - return "none"; - break; - default: - return "none"; - break; - } -} - -void -as_metrics_latency_buckets_init(as_latency_buckets* latency_buckets, uint32_t latency_columns, uint32_t latency_shift) -{ - latency_buckets->latency_columns = latency_columns; - latency_buckets->latency_shift = latency_shift; - latency_buckets->buckets = cf_malloc(sizeof(uint64_t) * latency_columns); - for (uint32_t i = 0; i < latency_columns; i++) { - as_store_uint64(&latency_buckets->buckets[i], 0); - } -} - -uint64_t -as_metrics_get_bucket(as_latency_buckets* buckets, uint32_t i) -{ - return as_load_uint64(&buckets->buckets[i]); -} - -void -as_metrics_latency_buckets_add(as_latency_buckets* latency_buckets, uint64_t elapsed) -{ - uint32_t index = as_metrics_get_index(latency_buckets, elapsed); - as_incr_uint64(&latency_buckets->buckets[index]); -} - -uint32_t -as_metrics_get_index(as_latency_buckets* latency_buckets, uint64_t elapsed_nanos) -{ - // Convert nanoseconds to milliseconds. - uint64_t elapsed = elapsed_nanos / NS_TO_MS; - - // Round up elapsed to nearest millisecond. - if ((elapsed_nanos - (elapsed * NS_TO_MS)) > 0) { - elapsed++; - } - - uint32_t last_bucket = latency_buckets->latency_columns - 1; - uint64_t limit = 1; - - for (uint32_t i = 0; i < last_bucket; i++) { - if (elapsed <= limit) { - return i; - } - limit <<= latency_buckets->latency_shift; - } - return last_bucket; -} - -as_node_metrics* -as_node_metrics_init(uint32_t latency_columns, uint32_t latency_shift) -{ - as_node_metrics* node_metrics = (as_node_metrics*)cf_malloc(sizeof(as_node_metrics)); - uint32_t max_latency_type = AS_LATENCY_TYPE_NONE; - node_metrics->latency = (as_latency_buckets*)cf_malloc(sizeof(as_latency_buckets) * max_latency_type); - for (uint32_t i = 0; i < max_latency_type; i++) { - as_metrics_latency_buckets_init(&node_metrics->latency[i], latency_columns, latency_shift); - } - - return node_metrics; -} - -void -as_metrics_add_latency(as_node_metrics* node_metrics, as_latency_type latency_type, uint64_t elapsed) -{ - as_metrics_latency_buckets_add(&node_metrics->latency[latency_type], elapsed); -} diff --git a/src/main/aerospike/as_metrics_writer.c b/src/main/aerospike/as_metrics_writer.c index 611caf498b..b1fcaf6b38 100644 --- a/src/main/aerospike/as_metrics_writer.c +++ b/src/main/aerospike/as_metrics_writer.c @@ -471,7 +471,7 @@ as_metrics_write_node(as_metrics_writer* mw, as_string_builder* sb, struct as_no if (j > 0) { as_string_builder_append_char(sb, ','); } - as_string_builder_append_uint64(sb, as_metrics_get_bucket(buckets, j)); + as_string_builder_append_uint64(sb, as_latency_get_bucket(buckets, j)); } as_string_builder_append_char(sb, ']'); } diff --git a/src/main/aerospike/as_node.c b/src/main/aerospike/as_node.c index ac39c17e64..6c63869fa3 100644 --- a/src/main/aerospike/as_node.c +++ b/src/main/aerospike/as_node.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -34,6 +35,9 @@ // Replicas take ~2K per namespace, so this will cover most deployments: #define INFO_STACK_BUF_SIZE (16 * 1024) +// Number of nanoseconds per millisecond +#define NS_TO_MS 1000000 + /****************************************************************************** * Function declarations. *****************************************************************************/ @@ -91,6 +95,31 @@ as_node_create_async_pools(uint32_t min_conns_per_node, uint32_t max_conns_per_n return pools; } +static void +as_latency_buckets_init(as_latency_buckets* latency_buckets, uint32_t latency_columns, uint32_t latency_shift) +{ + latency_buckets->latency_columns = latency_columns; + latency_buckets->latency_shift = latency_shift; + latency_buckets->buckets = cf_malloc(sizeof(uint64_t) * latency_columns); + + for (uint32_t i = 0; i < latency_columns; i++) { + as_store_uint64(&latency_buckets->buckets[i], 0); + } +} + +static as_node_metrics* +as_node_metrics_init(uint32_t latency_columns, uint32_t latency_shift) +{ + as_node_metrics* node_metrics = (as_node_metrics*)cf_malloc(sizeof(as_node_metrics)); + uint32_t max_latency_type = AS_LATENCY_TYPE_NONE; + node_metrics->latency = (as_latency_buckets*)cf_malloc(sizeof(as_latency_buckets) * max_latency_type); + + for (uint32_t i = 0; i < max_latency_type; i++) { + as_latency_buckets_init(&node_metrics->latency[i], latency_columns, latency_shift); + } + return node_metrics; +} + as_node* as_node_create(as_cluster* cluster, as_node_info* node_info) { @@ -1315,10 +1344,35 @@ as_node_parse_racks(as_cluster* cluster, as_error* err, as_node* node, char* buf return AEROSPIKE_OK; } +static uint32_t +as_latency_buckets_get_index(as_latency_buckets* latency_buckets, uint64_t elapsed_nanos) +{ + // Convert nanoseconds to milliseconds. + uint64_t elapsed = elapsed_nanos / NS_TO_MS; + + // Round up elapsed to nearest millisecond. + if ((elapsed_nanos - (elapsed * NS_TO_MS)) > 0) { + elapsed++; + } + + uint32_t last_bucket = latency_buckets->latency_columns - 1; + uint64_t limit = 1; + + for (uint32_t i = 0; i < last_bucket; i++) { + if (elapsed <= limit) { + return i; + } + limit <<= latency_buckets->latency_shift; + } + return last_bucket; +} + void as_node_add_latency(as_node* node, as_latency_type latency_type, uint64_t elapsed) { - as_metrics_add_latency(node->metrics, latency_type, elapsed); + as_latency_buckets* latency_buckets = &node->metrics->latency[latency_type]; + uint32_t index = as_latency_buckets_get_index(latency_buckets, elapsed); + as_incr_uint64(&latency_buckets->buckets[index]); } void @@ -1351,7 +1405,6 @@ as_node_add_timeout(as_node* node) as_incr_uint64(&node->timeout_count); } - static as_status as_node_process_racks(as_cluster* cluster, as_error* err, as_node* node, as_vector* values) { diff --git a/src/test/aerospike_test.c b/src/test/aerospike_test.c index 6f7bef2e4f..11cdd02f7f 100644 --- a/src/test/aerospike_test.c +++ b/src/test/aerospike_test.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2023 Aerospike, Inc. + * Copyright 2008-2024 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more contributor * license agreements. @@ -382,15 +382,14 @@ static bool before(atf_plan* plan) } cf_free(result); - // enable metrics - /*as_metrics_policy policy; + as_metrics_policy policy; as_metrics_policy_init(&policy); as_status status = aerospike_enable_metrics(as, &err, &policy); if (status != AEROSPIKE_OK) { error("aerospike_enable_metrics() returned %d - %s", err.code, err.message); - }*/ + } return true; } diff --git a/xcode/aerospike.xcodeproj/project.pbxproj b/xcode/aerospike.xcodeproj/project.pbxproj index 44d96a5f67..c274396037 100644 --- a/xcode/aerospike.xcodeproj/project.pbxproj +++ b/xcode/aerospike.xcodeproj/project.pbxproj @@ -109,6 +109,8 @@ BF90C76C22AB154A0062D920 /* as_cdt_internal.c in Sources */ = {isa = PBXBuildFile; fileRef = BF90C76B22AB154A0062D920 /* as_cdt_internal.c */; }; BF90C77122AB30E40062D920 /* as_map_operations.c in Sources */ = {isa = PBXBuildFile; fileRef = BF90C77022AB30E40062D920 /* as_map_operations.c */; }; BF93AA061AE9E6EB003ECE3B /* as_thread_pool.c in Sources */ = {isa = PBXBuildFile; fileRef = BF93AA051AE9E6EB003ECE3B /* as_thread_pool.c */; }; + BF94A3BD2B86A87800295885 /* as_latency.h in Headers */ = {isa = PBXBuildFile; fileRef = BF94A3BC2B86A87800295885 /* as_latency.h */; }; + BF94A3BF2B86AA4300295885 /* as_latency.c in Sources */ = {isa = PBXBuildFile; fileRef = BF94A3BE2B86AA4300295885 /* as_latency.c */; }; BF986E001F466BEE0057802C /* version.h in Headers */ = {isa = PBXBuildFile; fileRef = BF986DFF1F466BEE0057802C /* version.h */; }; BFA5B21020FD3FA4002AF0BB /* as_cpu.h in Headers */ = {isa = PBXBuildFile; fileRef = BFA5B20F20FD3FA4002AF0BB /* as_cpu.h */; }; BFABF3311FCF85EC004745A1 /* as_queue_mt.c in Sources */ = {isa = PBXBuildFile; fileRef = BFABF3301FCF85EC004745A1 /* as_queue_mt.c */; }; @@ -326,6 +328,8 @@ BF90C76B22AB154A0062D920 /* as_cdt_internal.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = as_cdt_internal.c; path = ../src/main/aerospike/as_cdt_internal.c; sourceTree = ""; }; BF90C77022AB30E40062D920 /* as_map_operations.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = as_map_operations.c; path = ../src/main/aerospike/as_map_operations.c; sourceTree = ""; }; BF93AA051AE9E6EB003ECE3B /* as_thread_pool.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = as_thread_pool.c; path = ../modules/common/src/main/aerospike/as_thread_pool.c; sourceTree = ""; }; + BF94A3BC2B86A87800295885 /* as_latency.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = as_latency.h; path = ../src/include/aerospike/as_latency.h; sourceTree = ""; }; + BF94A3BE2B86AA4300295885 /* as_latency.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = as_latency.c; path = ../src/main/aerospike/as_latency.c; sourceTree = ""; }; BF986DFF1F466BEE0057802C /* version.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = version.h; path = ../src/include/aerospike/version.h; sourceTree = ""; }; BFA5B20F20FD3FA4002AF0BB /* as_cpu.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = as_cpu.h; path = ../src/include/aerospike/as_cpu.h; sourceTree = ""; }; BFABF3301FCF85EC004745A1 /* as_queue_mt.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = as_queue_mt.c; path = ../modules/common/src/main/aerospike/as_queue_mt.c; sourceTree = ""; }; @@ -556,6 +560,7 @@ BFBDAFDF191B0C5C007EB07C /* as_info.c */, BF26C4661B45AE8F00E6929D /* as_job.c */, BF2AA7C418BEBFA400E54AF3 /* as_key.c */, + BF94A3BE2B86AA4300295885 /* as_latency.c */, BF90C76422AB0EB20062D920 /* as_list_operations.c */, BFC002891901E08500CB9BC8 /* as_lookup.c */, BF90C77022AB30E40062D920 /* as_map_operations.c */, @@ -755,6 +760,7 @@ BFC65B4D1C921E9E0079DF5A /* as_info.h */, BFC65B4E1C921E9E0079DF5A /* as_job.h */, BFC65B4F1C921E9E0079DF5A /* as_key.h */, + BF94A3BC2B86A87800295885 /* as_latency.h */, BFF344C21CEA7ACD00FD1976 /* as_list_operations.h */, BFC65B511C921E9E0079DF5A /* as_listener.h */, BFC65B521C921E9E0079DF5A /* as_lookup.h */, @@ -797,6 +803,7 @@ BF986E001F466BEE0057802C /* version.h in Headers */, BFC65B8B1C921E9E0079DF5A /* as_udf.h in Headers */, BFC65B811C921E9E0079DF5A /* as_pipe.h in Headers */, + BF94A3BD2B86A87800295885 /* as_latency.h in Headers */, BF32146F23E8F630004A7E19 /* as_partition_tracker.h in Headers */, BF457A8622B1AC6600409D04 /* as_bit_operations.h in Headers */, BFC65B761C921E9E0079DF5A /* as_event_internal.h in Headers */, @@ -934,6 +941,7 @@ BF93AA061AE9E6EB003ECE3B /* as_thread_pool.c in Sources */, BFC38AE11948F7CA000C53D9 /* as_admin.c in Sources */, BF8EF4CE2AE1B47B00FEEC3A /* lstrlib.c in Sources */, + BF94A3BF2B86AA4300295885 /* as_latency.c in Sources */, BFBD205618BC3436009ED931 /* mod_lua_record.c in Sources */, BF8EF4A52AE1B41100FEEC3A /* lauxlib.c in Sources */, BF8EF4BA2AE1B44900FEEC3A /* lgc.c in Sources */, From b9bc5d0c05812891ef16abdd6ba2e357cdcaaaab Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Wed, 21 Feb 2024 18:23:11 -0500 Subject: [PATCH 47/64] Port previous commit to windows. --- vs/aerospike/aerospike.vcxproj | 2 ++ vs/aerospike/aerospike.vcxproj.filters | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/vs/aerospike/aerospike.vcxproj b/vs/aerospike/aerospike.vcxproj index 6f483659c7..7608c81a41 100644 --- a/vs/aerospike/aerospike.vcxproj +++ b/vs/aerospike/aerospike.vcxproj @@ -394,6 +394,7 @@ + @@ -548,6 +549,7 @@ + diff --git a/vs/aerospike/aerospike.vcxproj.filters b/vs/aerospike/aerospike.vcxproj.filters index cdb86cd2f4..fbb436a1e6 100644 --- a/vs/aerospike/aerospike.vcxproj.filters +++ b/vs/aerospike/aerospike.vcxproj.filters @@ -501,6 +501,9 @@ Header Files + + Header Files + @@ -953,6 +956,9 @@ Source Files + + Source Files + From 63d53158aeaa963b04fc279f058cb4002f80b42f Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Thu, 22 Feb 2024 17:00:25 -0500 Subject: [PATCH 48/64] Add metrics lock and run all metrics calls under this lock. If a metrics file error occurs after the metrics file open succeeds in as_metrics_writer_enable(), close file and return error. Destroy node metrics in as_node_destroy() and when metrics are disabled. --- src/include/aerospike/as_cluster.h | 6 ++ src/include/aerospike/as_node.h | 7 ++ src/main/aerospike/as_cluster.c | 106 ++++++++++++++++++------- src/main/aerospike/as_metrics_writer.c | 55 ++++--------- src/main/aerospike/as_node.c | 19 +++++ 5 files changed, 125 insertions(+), 68 deletions(-) diff --git a/src/include/aerospike/as_cluster.h b/src/include/aerospike/as_cluster.h index 1517285097..ef70aba3a4 100644 --- a/src/include/aerospike/as_cluster.h +++ b/src/include/aerospike/as_cluster.h @@ -204,6 +204,12 @@ typedef struct as_cluster_s { */ pthread_mutex_t seed_lock; + /** + * @private + * Lock for metrics operations. + */ + pthread_mutex_t metrics_lock; + /** * @private * Lock for the tend thread to wait on with the tend interval as timeout. diff --git a/src/include/aerospike/as_node.h b/src/include/aerospike/as_node.h index 73a25c2d79..7fbf332ea9 100644 --- a/src/include/aerospike/as_node.h +++ b/src/include/aerospike/as_node.h @@ -454,6 +454,13 @@ as_node_create(struct as_cluster_s* cluster, as_node_info* node_info); AS_EXTERN void as_node_destroy(as_node* node); +/** + * @private + * Destroy node metrics. + */ +void +as_node_destroy_metrics(as_node* node); + /** * @private * Create configured minimum number of connections. diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index f1491b9ec0..85934485b7 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -555,25 +555,50 @@ as_cluster_remove_nodes_copy(as_cluster* cluster, as_vector* /* */ no as_vector_append(cluster->gc, &item); } -as_status -as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_metrics_policy* policy) +static void +as_cluster_destroy_node_metrics(as_cluster* cluster) { - if (cluster->metrics_enabled) { - as_cluster_disable_metrics(err, cluster); + as_nodes* nodes = as_nodes_reserve(cluster); + + for (uint32_t i = 0; i < nodes->size; i++) { + as_node_destroy_metrics(nodes->array[i]); } + as_nodes_release(nodes); +} - as_status status = AEROSPIKE_OK; - as_error_reset(err); - - if (policy->metrics_listeners.enable_listener) { - // Use listeners defined in the metrics policy. +as_status +as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_metrics_policy* policy) +{ + bool custom_listener = policy->metrics_listeners.enable_listener != NULL; + + if (custom_listener) { // Ensure all listeners and user data has been defined. if (! (policy->metrics_listeners.enable_listener && policy->metrics_listeners.snapshot_listener && policy->metrics_listeners.node_close_listener && policy->metrics_listeners.disable_listener && policy->metrics_listeners.udata)) { return as_error_set_message(err, AEROSPIKE_ERR_PARAM, "All metrics listeners and udata must be defined"); } - + } + + pthread_mutex_lock(&cluster->metrics_lock); + + as_status status = AEROSPIKE_OK; + + if (cluster->metrics_enabled) { + cluster->metrics_enabled = false; + status = cluster->metrics_listeners.disable_listener(err, cluster, cluster->metrics_listeners.udata); + as_cluster_destroy_node_metrics(cluster); + + if (status != AEROSPIKE_OK) { + // Disabling old metrics should not prevent new metrics from being created. + // Log error and continue processing. + as_log_warn("Metrics disable error: %s %s", as_error_string(status), err->message); + } + } + + as_error_reset(err); + + if (custom_listener) { // Copy listeners from policy. cluster->metrics_listeners = policy->metrics_listeners; } @@ -582,6 +607,7 @@ as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_metrics_policy* status = as_metrics_writer_create(err, policy, &cluster->metrics_listeners); if (status != AEROSPIKE_OK) { + pthread_mutex_unlock(&cluster->metrics_lock); return status; } } @@ -598,26 +624,35 @@ as_cluster_enable_metrics(as_error* err, as_cluster* cluster, as_metrics_policy* } as_nodes_release(nodes); - cluster->metrics_enabled = true; status = cluster->metrics_listeners.enable_listener(err, cluster->metrics_listeners.udata); if (status != AEROSPIKE_OK) { - as_cluster_disable_metrics(err, cluster); + as_cluster_destroy_node_metrics(cluster); + pthread_mutex_unlock(&cluster->metrics_lock); return status; } - + + cluster->metrics_enabled = true; + pthread_mutex_unlock(&cluster->metrics_lock); return status; } as_status as_cluster_disable_metrics(as_error* err, as_cluster* cluster) { + as_status status = AEROSPIKE_OK; + as_error_reset(err); + + pthread_mutex_lock(&cluster->metrics_lock); + if (cluster->metrics_enabled) { cluster->metrics_enabled = false; - return cluster->metrics_listeners.disable_listener(err, cluster, cluster->metrics_listeners.udata); + status = cluster->metrics_listeners.disable_listener(err, cluster, cluster->metrics_listeners.udata); + as_cluster_destroy_node_metrics(cluster); } - return AEROSPIKE_OK; + pthread_mutex_unlock(&cluster->metrics_lock); + return status; } void @@ -674,17 +709,24 @@ as_cluster_remove_nodes(as_error* err, as_cluster* cluster, as_vector* /* size; i++) { as_node* node = as_vector_get_ptr(nodes_to_remove, i); + as_status status = AEROSPIKE_OK; + + pthread_mutex_lock(&cluster->metrics_lock); if (cluster->metrics_enabled) { - as_status status = cluster->metrics_listeners.node_close_listener(err, node, node->cluster->metrics_listeners.udata); - if (status != AEROSPIKE_OK) { - as_log_warn("Metrics error: %s %s", as_error_string(status), err->message); - as_error_reset(err); - } + status = cluster->metrics_listeners.node_close_listener(err, node, node->cluster->metrics_listeners.udata); + } + pthread_mutex_unlock(&cluster->metrics_lock); + + if (status != AEROSPIKE_OK) { + // Metrics failures should not interrupt cluster tend. + // Log warning and continue processing. + as_log_warn("Metrics error: %s %s", as_error_string(status), err->message); + as_error_reset(err); } as_node_deactivate(node); } - + // Remove all nodes at once to avoid copying entire array multiple times. as_cluster_remove_nodes_copy(cluster, nodes_to_remove); @@ -778,14 +820,22 @@ as_cluster_manage(as_cluster* cluster) if (cluster->max_error_rate > 0 && cluster->tend_count % cluster->error_rate_window == 0) { as_cluster_reset_error_rate(cluster); } + + // Call metrics listener every metrics_interval when enabled. + as_status status = AEROSPIKE_OK; + as_error err; + pthread_mutex_lock(&cluster->metrics_lock); + if (cluster->metrics_enabled && cluster->tend_count % cluster->metrics_interval == 0) { - as_error err; - as_status status = cluster->metrics_listeners.snapshot_listener(&err, cluster, cluster->metrics_listeners.udata); - - if (status != AEROSPIKE_OK) { - as_log_warn("Metrics error: %s %s", as_error_string(status), err.message); - } + status = cluster->metrics_listeners.snapshot_listener(&err, cluster, cluster->metrics_listeners.udata); + } + pthread_mutex_unlock(&cluster->metrics_lock); + + if (status != AEROSPIKE_OK) { + // Metrics failures should not interrupt cluster tend. + // Log warning and continue processing. + as_log_warn("Metrics error: %s %s", as_error_string(status), err.message); } } @@ -1426,6 +1476,7 @@ as_cluster_create(as_config* config, as_error* err, as_cluster** cluster_out) } cluster->seeds = trg; pthread_mutex_init(&cluster->seed_lock, NULL); + pthread_mutex_init(&cluster->metrics_lock, NULL); // Initialize IP map translation if provided. if (config->ip_map && config->ip_map_size > 0) { @@ -1606,6 +1657,7 @@ as_cluster_destroy(as_cluster* cluster) as_vector_destroy(seeds); pthread_mutex_unlock(&cluster->seed_lock); pthread_mutex_destroy(&cluster->seed_lock); + pthread_mutex_destroy(&cluster->metrics_lock); // Destroy tend lock and condition. pthread_mutex_destroy(&cluster->tend_lock); diff --git a/src/main/aerospike/as_metrics_writer.c b/src/main/aerospike/as_metrics_writer.c index b1fcaf6b38..9c9d574ac4 100644 --- a/src/main/aerospike/as_metrics_writer.c +++ b/src/main/aerospike/as_metrics_writer.c @@ -378,11 +378,17 @@ as_metrics_open_writer(as_metrics_writer* mw, as_error* err) int rv = snprintf(data, sizeof(data), "%s header(1) cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]] eventloop[processSize,queueSize] node[name,address:port,syncConn,asyncConn,errors,timeouts,latency[]] conn[inUse,inPool,opened,closed] latency(%u,%u)[type[l1,l2,l3...]]\n", now_str, mw->latency_columns, mw->latency_shift); if (rv <= 0) { + fclose(mw->file); return as_error_update(err, AEROSPIKE_ERR_CLIENT, - "Failed to construct metrics header: %d,%s", rv, file_name.data); + "Failed to write metrics header: %d,%s", rv, file_name.data); } - return as_metrics_write_line(mw, data, err); + as_status status = as_metrics_write_line(mw, data, err); + + if (status != AEROSPIKE_OK) { + fclose(mw->file); + } + return status; } static void @@ -546,31 +552,6 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, as_cluster* clust return status; } -static void -as_metrics_writer_destroy_node_metrics(as_node* node) -{ - if (node->metrics != NULL) { - uint32_t max_latency_type = AS_LATENCY_TYPE_NONE; - for (uint32_t i = 0; i < max_latency_type; i++) { - cf_free(node->metrics->latency[i].buckets); - } - cf_free(node->metrics->latency); - cf_free(node->metrics); - node->metrics = NULL; - } -} - -static void -as_metrics_writer_destroy_nodes(as_cluster* cluster) -{ - // Free node memory - as_nodes* nodes = as_nodes_reserve(cluster); - for (uint32_t i = 0; i < nodes->size; i++) { - as_metrics_writer_destroy_node_metrics(nodes->array[i]); - } - as_nodes_release(nodes); -} - static void as_metrics_writer_destroy(as_metrics_writer* mw) { @@ -648,7 +629,6 @@ as_metrics_writer_snapshot(as_error* err, as_cluster* cluster, void* udata) "File stream did not flush successfully: %s", mw->report_dir); } } - return AEROSPIKE_OK; } @@ -671,13 +651,10 @@ as_metrics_writer_node_close(as_error* err, as_node* node, void* udata) as_string_builder_append_newline(&sb); as_status status = as_metrics_write_line(mw, sb.data, err); - - as_metrics_writer_destroy_node_metrics(node); + as_string_builder_destroy(&sb); - return status; } - return AEROSPIKE_OK; } @@ -687,19 +664,15 @@ as_metrics_writer_disable(as_error* err, as_cluster* cluster, void* udata) // write cluster into to file, disable as_error_reset(err); as_metrics_writer* mw = udata; + if (mw != NULL) { + as_status status = AEROSPIKE_OK; + if (mw->enable && mw->file != NULL) { - as_status status = as_metrics_write_cluster(err, mw, cluster); - - if (status != AEROSPIKE_OK) { - as_metrics_writer_destroy_nodes(cluster); - as_metrics_writer_destroy(mw); - return status; - } + status = as_metrics_write_cluster(err, mw, cluster); } - as_metrics_writer_destroy_nodes(cluster); as_metrics_writer_destroy(mw); + return status; } - return AEROSPIKE_OK; } diff --git a/src/main/aerospike/as_node.c b/src/main/aerospike/as_node.c index 6c63869fa3..296ea7f27b 100644 --- a/src/main/aerospike/as_node.c +++ b/src/main/aerospike/as_node.c @@ -120,6 +120,23 @@ as_node_metrics_init(uint32_t latency_columns, uint32_t latency_shift) return node_metrics; } +void +as_node_destroy_metrics(as_node* node) +{ + as_node_metrics* node_metrics = node->metrics; + + if (node_metrics) { + uint32_t max = AS_LATENCY_TYPE_NONE; + + for (uint32_t i = 0; i < max; i++) { + cf_free(node_metrics->latency[i].buckets); + } + cf_free(node_metrics->latency); + cf_free(node_metrics); + node->metrics = NULL; + } +} + as_node* as_node_create(as_cluster* cluster, as_node_info* node_info) { @@ -248,6 +265,8 @@ as_node_destroy(as_node* node) if (racks) { as_racks_release(racks); } + + as_node_destroy_metrics(node); cf_free(node); } From 981701acf1482703a5cd4b005ec99b785d9c507d Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Thu, 22 Feb 2024 17:14:05 -0500 Subject: [PATCH 49/64] Comment out metrics enable in tests. --- src/test/aerospike_test.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/test/aerospike_test.c b/src/test/aerospike_test.c index 11cdd02f7f..b7b287b2ae 100644 --- a/src/test/aerospike_test.c +++ b/src/test/aerospike_test.c @@ -382,6 +382,8 @@ static bool before(atf_plan* plan) } cf_free(result); + /* + // Test metrics as_metrics_policy policy; as_metrics_policy_init(&policy); @@ -390,6 +392,7 @@ static bool before(atf_plan* plan) if (status != AEROSPIKE_OK) { error("aerospike_enable_metrics() returned %d - %s", err.code, err.message); } + */ return true; } From a12161d2ec00c8851634589bf1fe235f53af97c7 Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Thu, 22 Feb 2024 18:09:20 -0500 Subject: [PATCH 50/64] Add timeout_count and retry_count to aerospike_stats. --- src/main/aerospike/aerospike_stats.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/aerospike/aerospike_stats.c b/src/main/aerospike/aerospike_stats.c index 9e634b224e..b943167d23 100644 --- a/src/main/aerospike/aerospike_stats.c +++ b/src/main/aerospike/aerospike_stats.c @@ -145,7 +145,7 @@ aerospike_stats_to_string(as_cluster_stats* stats) { as_string_builder sb; as_string_builder_init(&sb, 4096, true); - as_string_builder_append(&sb, "nodes(inUse,inPool,opened,closed):"); + as_string_builder_append(&sb, "nodes(inUse,inPool,opened,closed) error_count,timeout_count"); as_string_builder_append_newline(&sb); for (uint32_t i = 0; i < stats->nodes_size; i++) { @@ -154,9 +154,10 @@ aerospike_stats_to_string(as_cluster_stats* stats) as_conn_stats_tostring(&sb, "sync", &node_stats->sync); as_conn_stats_tostring(&sb, "async", &node_stats->async); as_conn_stats_tostring(&sb, "pipeline", &node_stats->pipeline); - as_string_builder_append_newline(&sb); - as_string_builder_append(&sb, "error count: "); + as_string_builder_append_char(&sb, ' '); as_string_builder_append_uint64(&sb, node_stats->error_count); + as_string_builder_append_char(&sb, ','); + as_string_builder_append_uint64(&sb, node_stats->timeout_count); as_string_builder_append_newline(&sb); } @@ -177,6 +178,10 @@ aerospike_stats_to_string(as_cluster_stats* stats) } as_string_builder_append_newline(&sb); } + + as_string_builder_append(&sb, "retry_count: "); + as_string_builder_append_uint64(&sb, stats->retry_count); + return sb.data; } From 1f0e5ef990e1395800e3709d666b77ee57024fc1 Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Fri, 23 Feb 2024 13:32:25 -0500 Subject: [PATCH 51/64] Inline short cluster/node metrics functions. Call as_node_add_timeout() on timeout. --- src/include/aerospike/as_cluster.h | 51 ++++++++++++++++++++++-------- src/include/aerospike/as_node.h | 28 +++++++++++----- src/main/aerospike/as_cluster.c | 44 -------------------------- src/main/aerospike/as_command.c | 2 ++ src/main/aerospike/as_event.c | 19 ++++++++--- src/main/aerospike/as_node.c | 24 -------------- 6 files changed, 73 insertions(+), 95 deletions(-) diff --git a/src/include/aerospike/as_cluster.h b/src/include/aerospike/as_cluster.h index ef70aba3a4..99381af4ed 100644 --- a/src/include/aerospike/as_cluster.h +++ b/src/include/aerospike/as_cluster.h @@ -612,50 +612,73 @@ as_cluster_disable_metrics(as_error* err, as_cluster* cluster); * @private * Increment transaction count when metrics are enabled. */ -void -as_cluster_add_tran(as_cluster* cluster); +static inline void +as_cluster_add_tran(as_cluster* cluster) +{ + if (cluster->metrics_enabled) { + as_incr_uint64(&cluster->tran_count); + } +} /** * @private * Return transaction count. The value is cumulative and not reset per metrics interval. */ -uint64_t -as_cluster_get_tran_count(const as_cluster* cluster); +static inline uint64_t +as_cluster_get_tran_count(const as_cluster* cluster) +{ + return as_load_uint64(&cluster->tran_count); +} /** * @private * Increment async delay queue timeout count. */ -void -as_cluster_add_retry(as_cluster* cluster); +static inline void +as_cluster_add_retry(as_cluster* cluster) +{ + as_incr_uint64(&cluster->retry_count); +} /** * @private * Add transaction retry count. There can be multiple retries for a single transaction. */ -void -as_cluster_add_retries(as_cluster* cluster, uint32_t count); +static inline void +as_cluster_add_retries(as_cluster* cluster, uint32_t count) +{ + as_faa_uint64(&cluster->retry_count, count); +} /** * @private * Return transaction retry count. The value is cumulative and not reset per metrics interval. */ -uint64_t -as_cluster_get_retry_count(const as_cluster* cluster); +static inline uint64_t +as_cluster_get_retry_count(const as_cluster* cluster) +{ + return as_load_uint64(&cluster->retry_count); +} /** * @private * Increment async delay queue timeout count. */ -void -as_cluster_add_delay_queue_timeout(as_cluster* cluster); +static inline void +as_cluster_add_delay_queue_timeout(as_cluster* cluster) +{ + as_incr_uint64(&cluster->delay_queue_timeout_count); +} /** * @private * Return async delay queue timeout count. */ -uint64_t -as_cluster_get_delay_queue_timeout_count(const as_cluster* cluster); +static inline uint64_t +as_cluster_get_delay_queue_timeout_count(const as_cluster* cluster) +{ + return as_load_uint64(&cluster->delay_queue_timeout_count); +} /** * @private diff --git a/src/include/aerospike/as_node.h b/src/include/aerospike/as_node.h index 7fbf332ea9..2463e788c5 100644 --- a/src/include/aerospike/as_node.h +++ b/src/include/aerospike/as_node.h @@ -688,28 +688,40 @@ as_node_enable_metrics(as_node* node, const struct as_metrics_policy_s* policy); /** * Return transaction error count. The value is cumulative and not reset per metrics interval. */ -uint64_t -as_node_get_error_count(as_node* node); +static inline uint64_t +as_node_get_error_count(as_node* node) +{ + return as_load_uint64(&node->error_count); +} /** * Increment transaction error count. If the error is retryable, multiple errors per * transaction may occur. */ -void -as_node_add_error(as_node* node); +static inline void +as_node_add_error(as_node* node) +{ + as_incr_uint64(&node->error_count); +} /** * Return transaction timeout count. The value is cumulative and not reset per metrics interval. */ -uint64_t -as_node_get_timeout_count(as_node* node); +static inline uint64_t +as_node_get_timeout_count(as_node* node) +{ + return as_load_uint64(&node->timeout_count); +} /** * Increment transaction timeout count. If the timeout is retryable (ie socketTimeout), * multiple timeouts per transaction may occur. */ -void -as_node_add_timeout(as_node* node); +static inline void +as_node_add_timeout(as_node* node) +{ + as_incr_uint64(&node->timeout_count); +} /** * @private diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index 85934485b7..e2f6e47579 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -655,50 +655,6 @@ as_cluster_disable_metrics(as_error* err, as_cluster* cluster) return status; } -void -as_cluster_add_tran(as_cluster* cluster) -{ - if (cluster->metrics_enabled) { - as_incr_uint64(&cluster->tran_count); - } -} - -uint64_t -as_cluster_get_tran_count(const as_cluster* cluster) -{ - return as_load_uint64(&cluster->tran_count); -} - -void -as_cluster_add_retry(as_cluster* cluster) -{ - as_incr_uint64(&cluster->retry_count); -} - -void -as_cluster_add_retries(as_cluster* cluster, uint32_t count) -{ - as_faa_uint64(&cluster->retry_count, count); -} - -uint64_t -as_cluster_get_retry_count(const as_cluster* cluster) -{ - return as_load_uint64(&cluster->retry_count); -} - -void -as_cluster_add_delay_queue_timeout(as_cluster* cluster) -{ - as_incr_uint64(&cluster->delay_queue_timeout_count); -} - -uint64_t -as_cluster_get_delay_queue_timeout_count(const as_cluster* cluster) -{ - return as_load_uint64(&cluster->delay_queue_timeout_count); -} - static as_status as_cluster_remove_nodes(as_error* err, as_cluster* cluster, as_vector* /* */ nodes_to_remove) { diff --git a/src/main/aerospike/as_command.c b/src/main/aerospike/as_command.c index 5deeccbcc1..18c855afc0 100644 --- a/src/main/aerospike/as_command.c +++ b/src/main/aerospike/as_command.c @@ -681,6 +681,8 @@ as_command_execute(as_command* cmd, as_error* err) goto Retry; case AEROSPIKE_ERR_TIMEOUT: + as_node_add_timeout(node); + if (is_server_timeout(err)) { as_node_put_conn_error(node, &socket); } diff --git a/src/main/aerospike/as_event.c b/src/main/aerospike/as_event.c index 19e8bf1b3c..eaf89bf8a9 100644 --- a/src/main/aerospike/as_event.c +++ b/src/main/aerospike/as_event.c @@ -823,6 +823,8 @@ as_event_socket_timeout(as_event_command* cmd) return; } + as_node_add_timeout(cmd->node); + if (cmd->pipe_listener) { as_pipe_timeout(cmd, true); return; @@ -847,15 +849,15 @@ as_event_delay_timeout(as_event_command* cmd) { cmd->state = AS_ASYNC_STATE_QUEUE_ERROR; + if (cmd->latency_type != AS_LATENCY_TYPE_NONE) { + as_cluster_add_delay_queue_timeout(cmd->cluster); + } + as_error err; as_error_set_message(&err, AEROSPIKE_ERR_TIMEOUT, "Delay queue timeout"); // Notify user, but do not destroy command. as_event_notify_error(cmd, &err); - if (cmd->latency_type != AS_LATENCY_TYPE_NONE) - { - as_cluster_add_delay_queue_timeout(cmd->cluster); - } } void @@ -887,12 +889,14 @@ as_event_process_timer(as_event_command* cmd) void as_event_total_timeout(as_event_command* cmd) { + // Node should not be null at this point. + as_node_add_timeout(cmd->node); + if (cmd->pipe_listener) { as_pipe_timeout(cmd, false); return; } - // Node should not be null at this point. as_event_connection_timeout(cmd, &cmd->node->async_conn_pools[cmd->event_loop->index]); as_error err; @@ -1312,6 +1316,11 @@ as_event_response_error(as_event_command* cmd, as_error* err) as_event_release_connection(cmd->conn, pool); as_node_incr_error_rate(cmd->node); break; + + case AEROSPIKE_ERR_TIMEOUT: + as_node_add_timeout(cmd->node); + as_event_put_connection(cmd, pool); + break; default: as_event_put_connection(cmd, pool); diff --git a/src/main/aerospike/as_node.c b/src/main/aerospike/as_node.c index 296ea7f27b..eabf9563d0 100644 --- a/src/main/aerospike/as_node.c +++ b/src/main/aerospike/as_node.c @@ -1400,30 +1400,6 @@ as_node_enable_metrics(as_node* node, const as_metrics_policy* policy) node->metrics = as_node_metrics_init(policy->latency_columns, policy->latency_shift); } -uint64_t -as_node_get_error_count(as_node* node) -{ - return as_load_uint64(&node->error_count); -} - -void -as_node_add_error(as_node* node) -{ - as_incr_uint64(&node->error_count); -} - -uint64_t -as_node_get_timeout_count(as_node* node) -{ - return as_load_uint64(&node->timeout_count); -} - -void -as_node_add_timeout(as_node* node) -{ - as_incr_uint64(&node->timeout_count); -} - static as_status as_node_process_racks(as_cluster* cluster, as_error* err, as_node* node, as_vector* values) { From 5563928cd3d2243c3be27ed51f18fc78c7c6d62b Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Fri, 23 Feb 2024 16:42:46 -0500 Subject: [PATCH 52/64] Call as_node_add_error() on errors other than timeout. Only add latency for commands that completed successfully, In async, only set begin timestamp once at the beginning and measure time elapsed for connection creation and command completion from that time. --- src/main/aerospike/as_command.c | 16 ++++++++++------ src/main/aerospike/as_event.c | 32 +++++++++++++------------------- src/main/aerospike/as_node.c | 10 +++++++--- src/main/aerospike/as_pipe.c | 3 --- 4 files changed, 30 insertions(+), 31 deletions(-) diff --git a/src/main/aerospike/as_command.c b/src/main/aerospike/as_command.c index 18c855afc0..c4b2fb53d1 100644 --- a/src/main/aerospike/as_command.c +++ b/src/main/aerospike/as_command.c @@ -640,6 +640,7 @@ as_command_execute(as_command* cmd, as_error* err) if (latency_type != AS_LATENCY_TYPE_NONE) { begin = cf_getns(); } + // Send command. status = as_socket_write_deadline(err, &socket, node, cmd->buf, cmd->buf_size, cmd->socket_timeout, cmd->deadline_ms); @@ -661,6 +662,11 @@ as_command_execute(as_command* cmd, as_error* err) } if (status == AEROSPIKE_OK) { + if (latency_type != AS_LATENCY_TYPE_NONE) { + uint64_t elapsed = cf_getns() - begin; + as_node_add_latency(node, latency_type, elapsed); + } + // Reset error code if retry had occurred. if (cmd->iteration > 0) { as_error_reset(err); @@ -673,10 +679,12 @@ as_command_execute(as_command* cmd, as_error* err) switch (status) { case AEROSPIKE_ERR_CLUSTER: case AEROSPIKE_ERR_DEVICE_OVERLOAD: + as_node_add_error(node); as_node_put_conn_error(node, &socket); goto Retry; case AEROSPIKE_ERR_CONNECTION: + as_node_add_error(node); as_node_close_conn_error(node, &socket, socket.pool); goto Retry; @@ -697,6 +705,7 @@ as_command_execute(as_command* cmd, as_error* err) case AEROSPIKE_ERR_SCAN_ABORTED: case AEROSPIKE_ERR_CLIENT_ABORT: case AEROSPIKE_ERR_CLIENT: + as_node_add_error(node); as_node_close_conn_error(node, &socket, socket.pool); if (release_node) { as_node_release(node); @@ -705,17 +714,12 @@ as_command_execute(as_command* cmd, as_error* err) return status; default: + as_node_add_error(node); as_error_set_in_doubt(err, cmd->flags & AS_COMMAND_FLAGS_READ, cmd->sent); break; } } - if (latency_type != AS_LATENCY_TYPE_NONE) - { - uint64_t elapsed = cf_getns() - begin; - as_node_add_latency(node, latency_type, elapsed); - } - // Put connection back in pool. as_node_put_connection(node, &socket); diff --git a/src/main/aerospike/as_event.c b/src/main/aerospike/as_event.c index eaf89bf8a9..a3ced16277 100644 --- a/src/main/aerospike/as_event.c +++ b/src/main/aerospike/as_event.c @@ -433,7 +433,12 @@ void as_event_command_execute_in_loop(as_event_loop* event_loop, as_event_command* cmd) { // Initialize read buffer (buf) to be located after write buffer. - cmd->begin = 0; + if (cmd->cluster->metrics_enabled) { + cmd->begin = cf_getns(); + } + else { + cmd->begin = 0; + } cmd->latency_type = cmd->cluster->metrics_enabled ? cmd->latency_type : AS_LATENCY_TYPE_NONE; cmd->write_offset = (uint32_t)(cmd->buf - (uint8_t*)cmd); cmd->buf += cmd->write_len; @@ -579,17 +584,13 @@ as_event_create_connection(as_event_command* cmd, as_async_conn_pool* pool) conn->base.watching = 0; conn->cmd = cmd; cmd->conn = &conn->base; - if (cmd->cluster->metrics_enabled) { - cmd->begin = cf_getns(); - } as_event_connect(cmd, pool); } void as_event_connection_complete(as_event_command* cmd) { - if (cmd->cluster->metrics_enabled) - { + if (cmd->cluster->metrics_enabled) { uint64_t elapsed = cf_getns() - cmd->begin; as_node_add_latency(cmd->node, AS_LATENCY_TYPE_CONN, elapsed); } @@ -676,9 +677,6 @@ as_event_command_begin(as_event_loop* event_loop, as_event_command* cmd) // Create connection only when connection count within limit. if (as_async_conn_pool_incr_total(pool)) { - if (cmd->latency_type != AS_LATENCY_TYPE_NONE) { - cmd->begin = cf_getns(); - } as_event_create_connection(cmd, pool); return; } @@ -1005,8 +1003,7 @@ as_event_put_connection(as_event_command* cmd, as_async_conn_pool* pool) static inline void as_event_response_complete(as_event_command* cmd) { - if (cmd->latency_type != AS_LATENCY_TYPE_NONE) - { + if (cmd->latency_type != AS_LATENCY_TYPE_NONE) { uint64_t elapsed = cf_getns() - cmd->begin; as_node_add_latency(cmd->node, cmd->latency_type, elapsed); } @@ -1280,12 +1277,6 @@ as_event_socket_error(as_event_command* cmd, as_error* err) void as_event_response_error(as_event_command* cmd, as_error* err) { - if (cmd->latency_type != AS_LATENCY_TYPE_NONE) - { - uint64_t elapsed = cf_getns() - cmd->begin; - as_node_add_latency(cmd->node, cmd->latency_type, elapsed); - } - if (cmd->pipe_listener != NULL) { as_pipe_response_error(cmd, err); return; @@ -1302,8 +1293,9 @@ as_event_response_error(as_event_command* cmd, as_error* err) switch (err->code) { case AEROSPIKE_ERR_CLUSTER: case AEROSPIKE_ERR_DEVICE_OVERLOAD: - as_event_put_connection(cmd, pool); + as_node_add_error(cmd->node); as_node_incr_error_rate(cmd->node); + as_event_put_connection(cmd, pool); break; case AEROSPIKE_ERR_QUERY_ABORTED: @@ -1313,8 +1305,9 @@ as_event_response_error(as_event_command* cmd, as_error* err) case AEROSPIKE_ERR_CLIENT_ABORT: case AEROSPIKE_ERR_CLIENT: case AEROSPIKE_NOT_AUTHENTICATED: - as_event_release_connection(cmd->conn, pool); + as_node_add_error(cmd->node); as_node_incr_error_rate(cmd->node); + as_event_release_connection(cmd->conn, pool); break; case AEROSPIKE_ERR_TIMEOUT: @@ -1323,6 +1316,7 @@ as_event_response_error(as_event_command* cmd, as_error* err) break; default: + as_node_add_error(cmd->node); as_event_put_connection(cmd, pool); break; } diff --git a/src/main/aerospike/as_node.c b/src/main/aerospike/as_node.c index eabf9563d0..2d7a6938b2 100644 --- a/src/main/aerospike/as_node.c +++ b/src/main/aerospike/as_node.c @@ -491,7 +491,12 @@ as_node_create_connection( as_socket* sock ) { - uint64_t begin = cf_getns(); + uint64_t begin = 0; + + if (node->cluster->metrics_enabled) { + begin = cf_getns(); + } + as_status status = as_node_create_socket(err, node, pool, sock, deadline_ms); if (status) { @@ -518,8 +523,7 @@ as_node_create_connection( } } - if (node->cluster->metrics_enabled) - { + if (node->cluster->metrics_enabled) { uint64_t elapsed = cf_getns() - begin; as_node_add_latency(node, AS_LATENCY_TYPE_CONN, elapsed); } diff --git a/src/main/aerospike/as_pipe.c b/src/main/aerospike/as_pipe.c index 09614a81e2..9a42a8ff7a 100644 --- a/src/main/aerospike/as_pipe.c +++ b/src/main/aerospike/as_pipe.c @@ -365,9 +365,6 @@ as_pipe_get_connection(as_event_command* cmd) as_log_trace("Creating new pipeline connection"); if (as_async_conn_pool_incr_total(pool)) { - if (cmd->cluster->metrics_enabled) { - cmd->begin = cf_getns(); - } conn = cf_malloc(sizeof(as_pipe_connection)); assert(conn != NULL); From 574c55c76da3e94be01467bad618cd4d657f8b0c Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Fri, 23 Feb 2024 17:34:31 -0500 Subject: [PATCH 53/64] Call "as_cluster_add_retries(cluster, batch_nodes.size)" after batch_nodes have been populated. --- src/main/aerospike/aerospike_batch.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/aerospike/aerospike_batch.c b/src/main/aerospike/aerospike_batch.c index 146e8f739b..e9fbc3fee1 100644 --- a/src/main/aerospike/aerospike_batch.c +++ b/src/main/aerospike/aerospike_batch.c @@ -2730,8 +2730,6 @@ as_batch_retry_records(as_batch_task_records* btr, as_command* parent, as_error* as_vector batch_nodes; as_vector_inita(&batch_nodes, sizeof(as_batch_node), n_nodes); - as_cluster_add_retries(cluster, batch_nodes.size); - // Create initial key capacity for each node as average + 25%. uint32_t offsets_size = task->offsets.size; uint32_t offsets_capacity = offsets_size / n_nodes; @@ -2796,6 +2794,8 @@ as_batch_retry_records(as_batch_task_records* btr, as_command* parent, as_error* return AEROSPIKE_USE_NORMAL_RETRY; } } + + as_cluster_add_retries(cluster, batch_nodes.size); parent->flags |= AS_COMMAND_FLAGS_SPLIT_RETRY; return as_batch_execute_sync(cluster, err, task->policy, btr->defs, task->has_write, &rep, @@ -2818,8 +2818,6 @@ as_batch_retry_keys(as_batch_task_keys* btk, as_command* parent, as_error* err) as_vector batch_nodes; as_vector_inita(&batch_nodes, sizeof(as_batch_node), n_nodes); - as_cluster_add_retries(cluster, batch_nodes.size); - as_status status = AEROSPIKE_OK; // Create initial key capacity for each node as average + 25%. @@ -2883,6 +2881,8 @@ as_batch_retry_keys(as_batch_task_keys* btk, as_command* parent, as_error* err) return AEROSPIKE_USE_NORMAL_RETRY; } } + + as_cluster_add_retries(cluster, batch_nodes.size); parent->flags |= AS_COMMAND_FLAGS_SPLIT_RETRY; // Run batch retries sequentially in same thread. @@ -3152,8 +3152,6 @@ as_batch_retry_async(as_event_command* parent, bool timeout) as_vector bnodes; as_vector_inita(&bnodes, sizeof(as_batch_retry_node), n_nodes); - as_cluster_add_retries(cluster, bnodes.size); - as_batch_replica rep; rep.replica = be->replica; rep.replica_sc = be->replica_sc; @@ -3268,6 +3266,8 @@ as_batch_retry_async(as_event_command* parent, bool timeout) } } + as_cluster_add_retries(cluster, bnodes.size); + as_event_executor* e = &be->executor; pthread_mutex_lock(&e->lock); e->max += bnodes.size - 1; From 7192e98dce2703c2fb8c411e3ad0df749e6fdc30 Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Fri, 23 Feb 2024 17:43:36 -0500 Subject: [PATCH 54/64] Pack as_cluster_stats and as_node for optimal size. --- src/include/aerospike/aerospike_stats.h | 10 +++++----- src/include/aerospike/as_node.h | 24 ++++++++++++------------ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/include/aerospike/aerospike_stats.h b/src/include/aerospike/aerospike_stats.h index 7001336e7c..cfb3e81114 100644 --- a/src/include/aerospike/aerospike_stats.h +++ b/src/include/aerospike/aerospike_stats.h @@ -135,6 +135,11 @@ typedef struct as_cluster_stats_s { */ as_event_loop_stats* event_loops; + /** + * Count of transaction retries since cluster was started. + */ + uint64_t retry_count; + /** * Node count. */ @@ -151,11 +156,6 @@ typedef struct as_cluster_stats_s { */ uint32_t thread_pool_queued_tasks; - /** - * Count of transaction retries since cluster was started. - */ - uint64_t retry_count; - } as_cluster_stats; struct as_cluster_s; diff --git a/src/include/aerospike/as_node.h b/src/include/aerospike/as_node.h index 2463e788c5..b2c18230e2 100644 --- a/src/include/aerospike/as_node.h +++ b/src/include/aerospike/as_node.h @@ -315,6 +315,18 @@ typedef struct as_node_s { */ as_socket info_socket; + /** + * Transaction error count since node was initialized. If the error is retryable, multiple errors per + * transaction may occur. + */ + uint64_t error_count; + + /** + * Transaction timeout count since node was initialized. If the timeout is retryable (ie socketTimeout), + * multiple timeouts per transaction may occur. + */ + uint64_t timeout_count; + /** * Connection queue iterator. Not atomic by design. */ @@ -335,18 +347,6 @@ typedef struct as_node_s { */ uint32_t error_rate; - /** - * Transaction error count since node was initialized. If the error is retryable, multiple errors per - * transaction may occur. - */ - uint64_t error_count; - - /** - * Transaction timeout count since node was initialized. If the timeout is retryable (ie socketTimeout), - * multiple timeouts per transaction may occur. - */ - uint64_t timeout_count; - /** * Server's generation count for peers. */ From 37a374adcc381ee009bcf973bf5d98e7ad29b33b Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Mon, 26 Feb 2024 11:56:36 -0500 Subject: [PATCH 55/64] Optimize setting async cmd latency_type. --- src/main/aerospike/as_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/aerospike/as_event.c b/src/main/aerospike/as_event.c index a3ced16277..8d25a541e1 100644 --- a/src/main/aerospike/as_event.c +++ b/src/main/aerospike/as_event.c @@ -438,8 +438,8 @@ as_event_command_execute_in_loop(as_event_loop* event_loop, as_event_command* cm } else { cmd->begin = 0; + cmd->latency_type = AS_LATENCY_TYPE_NONE; } - cmd->latency_type = cmd->cluster->metrics_enabled ? cmd->latency_type : AS_LATENCY_TYPE_NONE; cmd->write_offset = (uint32_t)(cmd->buf - (uint8_t*)cmd); cmd->buf += cmd->write_len; cmd->conn = NULL; From 3f14ae6fc60ba55a45535fd9b6afda28b047c475 Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Mon, 26 Feb 2024 13:19:52 -0500 Subject: [PATCH 56/64] Do not increment node error count on record not found. --- src/main/aerospike/as_command.c | 5 +++++ src/main/aerospike/as_event.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/src/main/aerospike/as_command.c b/src/main/aerospike/as_command.c index c4b2fb53d1..31095d2e04 100644 --- a/src/main/aerospike/as_command.c +++ b/src/main/aerospike/as_command.c @@ -713,6 +713,11 @@ as_command_execute(as_command* cmd, as_error* err) as_error_set_in_doubt(err, cmd->flags & AS_COMMAND_FLAGS_READ, cmd->sent); return status; + case AEROSPIKE_ERR_RECORD_NOT_FOUND: + // Do not increment error count on record not found. + as_error_set_in_doubt(err, cmd->flags & AS_COMMAND_FLAGS_READ, cmd->sent); + break; + default: as_node_add_error(node); as_error_set_in_doubt(err, cmd->flags & AS_COMMAND_FLAGS_READ, cmd->sent); diff --git a/src/main/aerospike/as_event.c b/src/main/aerospike/as_event.c index 8d25a541e1..78308ea61e 100644 --- a/src/main/aerospike/as_event.c +++ b/src/main/aerospike/as_event.c @@ -1315,6 +1315,11 @@ as_event_response_error(as_event_command* cmd, as_error* err) as_event_put_connection(cmd, pool); break; + case AEROSPIKE_ERR_RECORD_NOT_FOUND: + // Do not increment error count on record not found. + as_event_put_connection(cmd, pool); + break; + default: as_node_add_error(cmd->node); as_event_put_connection(cmd, pool); From 39a3795460ca138c1287a08dc81617bd107c51af Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Mon, 26 Feb 2024 14:16:22 -0500 Subject: [PATCH 57/64] Print socket address and port separated by a comma to be consistent with the java client metrics output. --- src/main/aerospike/as_metrics_writer.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main/aerospike/as_metrics_writer.c b/src/main/aerospike/as_metrics_writer.c index 9c9d574ac4..a559772901 100644 --- a/src/main/aerospike/as_metrics_writer.c +++ b/src/main/aerospike/as_metrics_writer.c @@ -375,7 +375,7 @@ as_metrics_open_writer(as_metrics_writer* mw, as_error* err) timestamp_to_string(now_str, sizeof(now_str)); char data[512]; - int rv = snprintf(data, sizeof(data), "%s header(1) cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]] eventloop[processSize,queueSize] node[name,address:port,syncConn,asyncConn,errors,timeouts,latency[]] conn[inUse,inPool,opened,closed] latency(%u,%u)[type[l1,l2,l3...]]\n", + int rv = snprintf(data, sizeof(data), "%s header(1) cluster[name,cpu,mem,invalidNodeCount,tranCount,retryCount,delayQueueTimeoutCount,eventloop[],node[]] eventloop[processSize,queueSize] node[name,address,port,syncConn,asyncConn,errors,timeouts,latency[]] conn[inUse,inPool,opened,closed] latency(%u,%u)[type[l1,l2,l3...]]\n", now_str, mw->latency_columns, mw->latency_shift); if (rv <= 0) { fclose(mw->file); @@ -440,8 +440,17 @@ as_metrics_write_node(as_metrics_writer* mw, as_string_builder* sb, struct as_no as_string_builder_append_char(sb, '['); as_string_builder_append(sb, node->name); as_string_builder_append_char(sb, ','); + + as_address* address = as_node_get_address(node); + struct sockaddr* addr = (struct sockaddr*)&address->addr; + + char address_name[AS_IP_ADDRESS_SIZE]; + as_address_short_name(addr, address_name, sizeof(address_name)); + as_string_builder_append(sb, address_name); + as_string_builder_append_char(sb, ','); - as_string_builder_append(sb, as_node_get_address_string(node)); + uint16_t port = as_address_port(addr); + as_string_builder_append_uint(sb, port); as_string_builder_append_char(sb, ','); struct as_conn_stats_s sync; From f68b483c85767ce0e99edb4b4045c22e992a7b36 Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Mon, 26 Feb 2024 15:07:45 -0500 Subject: [PATCH 58/64] Initialize error_count, timeout_count before initializing node metrics. --- src/main/aerospike/as_node.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/aerospike/as_node.c b/src/main/aerospike/as_node.c index 2d7a6938b2..e88d62dadb 100644 --- a/src/main/aerospike/as_node.c +++ b/src/main/aerospike/as_node.c @@ -181,6 +181,9 @@ as_node_create(as_cluster* cluster, as_node_info* node_info) node->active = true; node->partition_changed = true; node->rebalance_changed = cluster->rack_aware; + node->error_rate = 0; + node->error_count = 0; + node->timeout_count = 0; if (cluster->metrics_enabled) { node->metrics = as_node_metrics_init(cluster->metrics_latency_columns, cluster->metrics_latency_shift); @@ -193,9 +196,6 @@ as_node_create(as_cluster* cluster, as_node_info* node_info) node->sync_conn_pools = cf_malloc(sizeof(as_conn_pool) * cluster->conn_pools_per_node); node->sync_conns_opened = 1; node->sync_conns_closed = 0; - node->error_rate = 0; - node->error_count = 0; - node->timeout_count = 0; node->conn_iter = 0; uint32_t min = cluster->min_conns_per_node / cluster->conn_pools_per_node; From 082a088882ceece8b0460ef875b48624e58dc6f0 Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Mon, 26 Feb 2024 15:56:49 -0500 Subject: [PATCH 59/64] Calculate latency on AEROSPIKE_ERR_RECORD_NOT_FOUND too. --- src/main/aerospike/as_command.c | 5 +++++ src/main/aerospike/as_event.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/src/main/aerospike/as_command.c b/src/main/aerospike/as_command.c index 31095d2e04..81111976d2 100644 --- a/src/main/aerospike/as_command.c +++ b/src/main/aerospike/as_command.c @@ -715,6 +715,11 @@ as_command_execute(as_command* cmd, as_error* err) case AEROSPIKE_ERR_RECORD_NOT_FOUND: // Do not increment error count on record not found. + // Add latency metrics instead. + if (latency_type != AS_LATENCY_TYPE_NONE) { + uint64_t elapsed = cf_getns() - begin; + as_node_add_latency(node, latency_type, elapsed); + } as_error_set_in_doubt(err, cmd->flags & AS_COMMAND_FLAGS_READ, cmd->sent); break; diff --git a/src/main/aerospike/as_event.c b/src/main/aerospike/as_event.c index 78308ea61e..2d569fa8bf 100644 --- a/src/main/aerospike/as_event.c +++ b/src/main/aerospike/as_event.c @@ -1317,6 +1317,11 @@ as_event_response_error(as_event_command* cmd, as_error* err) case AEROSPIKE_ERR_RECORD_NOT_FOUND: // Do not increment error count on record not found. + // Add latency metrics instead. + if (cmd->latency_type != AS_LATENCY_TYPE_NONE) { + uint64_t elapsed = cf_getns() - cmd->begin; + as_node_add_latency(cmd->node, cmd->latency_type, elapsed); + } as_event_put_connection(cmd, pool); break; From 7e1a304c26c66728c1d59e9bba1609e8c5af4607 Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Tue, 27 Feb 2024 15:55:33 -0500 Subject: [PATCH 60/64] Increment retries when applicable in sync scans. Reserve nodes whenever writing cluster metrics. --- src/main/aerospike/aerospike_scan.c | 4 ++++ src/main/aerospike/as_metrics_writer.c | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/aerospike/aerospike_scan.c b/src/main/aerospike/aerospike_scan.c index ecbe10f807..9d781ad2ab 100644 --- a/src/main/aerospike/aerospike_scan.c +++ b/src/main/aerospike/aerospike_scan.c @@ -892,6 +892,10 @@ as_scan_partitions( } uint32_t n_nodes = pt->node_parts.size; + + if (pt->iteration > 1) { + as_cluster_add_retries(cluster, n_nodes); + } // Initialize task. uint32_t error_mutex = 0; diff --git a/src/main/aerospike/as_metrics_writer.c b/src/main/aerospike/as_metrics_writer.c index a559772901..4b6a9f1410 100644 --- a/src/main/aerospike/as_metrics_writer.c +++ b/src/main/aerospike/as_metrics_writer.c @@ -543,8 +543,8 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, as_cluster* clust } as_string_builder_append(&sb, "],["); - // Since this function is only called from the cluster tend thread, there is no need to reserve nodes. - as_nodes* nodes = cluster->nodes; + as_nodes* nodes = as_nodes_reserve(cluster); + for (uint32_t i = 0; i < nodes->size; i++) { as_node* node = nodes->array[i]; @@ -553,6 +553,7 @@ as_metrics_write_cluster(as_error* err, as_metrics_writer* mw, as_cluster* clust } as_metrics_write_node(mw, &sb, node); } + as_nodes_release(nodes); as_string_builder_append(&sb, "]]"); as_string_builder_append_newline(&sb); From b3c2289ed9df3cd51b29afdbbe2840f55eedfe5c Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Tue, 27 Feb 2024 16:58:39 -0500 Subject: [PATCH 61/64] Increment retries when applicable in sync queries. --- src/main/aerospike/aerospike_query.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/aerospike/aerospike_query.c b/src/main/aerospike/aerospike_query.c index a9cb380b1a..087ff95e10 100644 --- a/src/main/aerospike/aerospike_query.c +++ b/src/main/aerospike/aerospike_query.c @@ -1486,6 +1486,10 @@ as_query_partitions( uint32_t n_nodes = pt->node_parts.size; + if (pt->iteration > 1) { + as_cluster_add_retries(cluster, n_nodes); + } + // Initialize task. uint32_t error_mutex = 0; From ced01bed1fa8c2bc38df9c21386f5dcebbaf87fa Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Tue, 27 Feb 2024 18:33:37 -0500 Subject: [PATCH 62/64] Do not pass in as_error to as_cluster_remove_nodes() because it never needs to return an error. --- src/main/aerospike/as_cluster.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/main/aerospike/as_cluster.c b/src/main/aerospike/as_cluster.c index e2f6e47579..811f742967 100644 --- a/src/main/aerospike/as_cluster.c +++ b/src/main/aerospike/as_cluster.c @@ -655,12 +655,13 @@ as_cluster_disable_metrics(as_error* err, as_cluster* cluster) return status; } -static as_status -as_cluster_remove_nodes(as_error* err, as_cluster* cluster, as_vector* /* */ nodes_to_remove) +static void +as_cluster_remove_nodes(as_cluster* cluster, as_vector* /* */ nodes_to_remove) { // There is no need to delete nodes from partition tables because the nodes // have already been set to inactive. Further connection requests will result // in an exception and a different node will be tried. + as_error err; // Set node to inactive. for (uint32_t i = 0; i < nodes_to_remove->size; i++) { @@ -670,15 +671,14 @@ as_cluster_remove_nodes(as_error* err, as_cluster* cluster, as_vector* /* metrics_lock); if (cluster->metrics_enabled) { - status = cluster->metrics_listeners.node_close_listener(err, node, node->cluster->metrics_listeners.udata); + status = cluster->metrics_listeners.node_close_listener(&err, node, node->cluster->metrics_listeners.udata); } pthread_mutex_unlock(&cluster->metrics_lock); if (status != AEROSPIKE_OK) { // Metrics failures should not interrupt cluster tend. // Log warning and continue processing. - as_log_warn("Metrics error: %s %s", as_error_string(status), err->message); - as_error_reset(err); + as_log_warn("Metrics error: %s %s", as_error_string(status), err.message); } as_node_deactivate(node); } @@ -690,8 +690,6 @@ as_cluster_remove_nodes(as_error* err, as_cluster* cluster, as_vector* /* shm_info) { as_shm_remove_nodes(cluster, nodes_to_remove); } - - return AEROSPIKE_OK; } static as_status @@ -962,10 +960,7 @@ as_cluster_tend(as_cluster* cluster, as_error* err, bool is_init) // Remove nodes in a batch. if (nodes_to_remove.size > 0) { - as_status status = as_cluster_remove_nodes(err, cluster, &nodes_to_remove); - if (status != AEROSPIKE_OK) { - return status; - } + as_cluster_remove_nodes(cluster, &nodes_to_remove); nodes = cluster->nodes; } as_vector_destroy(&nodes_to_remove); From 901ded2d6aeee8cebabf017eb2e1a455bc821460 Mon Sep 17 00:00:00 2001 From: Brian Nichols Date: Tue, 27 Feb 2024 18:48:11 -0500 Subject: [PATCH 63/64] Remove

as doxygen does not use it. FIx report_dir default doc. --- src/include/aerospike/as_cdt_ctx.h | 2 +- src/include/aerospike/as_cluster.h | 2 +- src/include/aerospike/as_metrics.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/include/aerospike/as_cdt_ctx.h b/src/include/aerospike/as_cdt_ctx.h index 9e5104b78d..8317a1cc76 100644 --- a/src/include/aerospike/as_cdt_ctx.h +++ b/src/include/aerospike/as_cdt_ctx.h @@ -211,7 +211,7 @@ as_cdt_ctx_add_list_value(as_cdt_ctx* ctx, as_val* val) /** * Lookup map by index offset. - *

+ * * If the index is negative, the resolved index starts backwards from end of list. * If an index is out of bounds, a parameter error will be returned. Examples: *