Skip to content

Commit cd1dade

Browse files
tbarbetteFerruh Yigit
authored and
Ferruh Yigit
committed
examples/rxtx_callbacks: support HW timestamp
Use rxtx callback to demonstrate a way to use rte_eth_read_clock to convert the hardware timestamps to an amount of cycles. This allows to get the amount of time the packet spent since its entry in the device. While the regular latency only shows the latency from when it entered the software stack. Signed-off-by: Tom Barbette <[email protected]> Reviewed-by: Ferruh Yigit <[email protected]>
1 parent e571ad5 commit cd1dade

File tree

4 files changed

+99
-5
lines changed

4 files changed

+99
-5
lines changed

doc/guides/sample_app_ug/rxtx_callbacks.rst

+8-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ In the sample application a user defined callback is applied to all received
1313
packets to add a timestamp. A separate callback is applied to all packets
1414
prior to transmission to calculate the elapsed time, in CPU cycles.
1515

16+
If hardware timestamping is supported by the NIC, the sample application will
17+
also display the average latency since the packet was timestamped in hardware,
18+
on top of the latency since the packet was received and processed by the RX
19+
callback.
1620

1721
Compiling the Application
1822
-------------------------
@@ -36,7 +40,10 @@ To run the example in a ``linux`` environment:
3640

3741
.. code-block:: console
3842
39-
./build/rxtx_callbacks -l 1 -n 4
43+
./build/rxtx_callbacks -l 1 -n 4 -- [-t]
44+
45+
Use -t to enable hardware timestamping. If not supported by the NIC, an error
46+
will be displayed.
4047

4148
Refer to *DPDK Getting Started Guide* for general information on running
4249
applications and the Environment Abstraction Layer (EAL) options.

examples/rxtx_callbacks/Makefile

+3
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ include $(RTE_SDK)/mk/rte.vars.mk
5050

5151
CFLAGS += $(WERROR_FLAGS)
5252

53+
# rte_eth_read_clock is experimental
54+
CFLAGS += -DALLOW_EXPERIMENTAL_API
55+
5356
# workaround for a gcc bug with noreturn attribute
5457
# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
5558
ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)

examples/rxtx_callbacks/main.c

+85-4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#include <stdint.h>
66
#include <inttypes.h>
7+
#include <getopt.h>
78
#include <rte_eal.h>
89
#include <rte_ethdev.h>
910
#include <rte_cycles.h>
@@ -17,6 +18,9 @@
1718
#define MBUF_CACHE_SIZE 250
1819
#define BURST_SIZE 32
1920

21+
static const char usage[] =
22+
"%s EAL_ARGS -- [-t]\n";
23+
2024
static const struct rte_eth_conf port_conf_default = {
2125
.rxmode = {
2226
.max_rx_pkt_len = RTE_ETHER_MAX_LEN,
@@ -25,9 +29,14 @@ static const struct rte_eth_conf port_conf_default = {
2529

2630
static struct {
2731
uint64_t total_cycles;
32+
uint64_t total_queue_cycles;
2833
uint64_t total_pkts;
2934
} latency_numbers;
3035

36+
int hw_timestamping;
37+
38+
#define TICKS_PER_CYCLE_SHIFT 16
39+
static uint64_t ticks_per_cycle_mult;
3140

3241
static uint16_t
3342
add_timestamps(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
@@ -43,22 +52,42 @@ add_timestamps(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
4352
}
4453

4554
static uint16_t
46-
calc_latency(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
55+
calc_latency(uint16_t port, uint16_t qidx __rte_unused,
4756
struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
4857
{
4958
uint64_t cycles = 0;
59+
uint64_t queue_ticks = 0;
5060
uint64_t now = rte_rdtsc();
61+
uint64_t ticks;
5162
unsigned i;
5263

53-
for (i = 0; i < nb_pkts; i++)
64+
if (hw_timestamping)
65+
rte_eth_read_clock(port, &ticks);
66+
67+
for (i = 0; i < nb_pkts; i++) {
5468
cycles += now - pkts[i]->udata64;
69+
if (hw_timestamping)
70+
queue_ticks += ticks - pkts[i]->timestamp;
71+
}
72+
5573
latency_numbers.total_cycles += cycles;
74+
if (hw_timestamping)
75+
latency_numbers.total_queue_cycles += (queue_ticks
76+
* ticks_per_cycle_mult) >> TICKS_PER_CYCLE_SHIFT;
77+
5678
latency_numbers.total_pkts += nb_pkts;
5779

5880
if (latency_numbers.total_pkts > (100 * 1000 * 1000ULL)) {
5981
printf("Latency = %"PRIu64" cycles\n",
6082
latency_numbers.total_cycles / latency_numbers.total_pkts);
61-
latency_numbers.total_cycles = latency_numbers.total_pkts = 0;
83+
if (hw_timestamping) {
84+
printf("Latency from HW = %"PRIu64" cycles\n",
85+
latency_numbers.total_queue_cycles
86+
/ latency_numbers.total_pkts);
87+
}
88+
latency_numbers.total_cycles = 0;
89+
latency_numbers.total_queue_cycles = 0;
90+
latency_numbers.total_pkts = 0;
6291
}
6392
return nb_pkts;
6493
}
@@ -77,6 +106,7 @@ port_init(uint16_t port, struct rte_mempool *mbuf_pool)
77106
int retval;
78107
uint16_t q;
79108
struct rte_eth_dev_info dev_info;
109+
struct rte_eth_rxconf rxconf;
80110
struct rte_eth_txconf txconf;
81111

82112
if (!rte_eth_dev_is_valid_port(port))
@@ -95,9 +125,20 @@ port_init(uint16_t port, struct rte_mempool *mbuf_pool)
95125
if (retval != 0)
96126
return retval;
97127

128+
rxconf = dev_info.default_rxconf;
129+
130+
if (hw_timestamping) {
131+
if (!(dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TIMESTAMP)) {
132+
printf("\nERROR: Port %u does not support hardware timestamping\n"
133+
, port);
134+
return -1;
135+
}
136+
rxconf.offloads |= DEV_RX_OFFLOAD_TIMESTAMP;
137+
}
138+
98139
for (q = 0; q < rx_rings; q++) {
99140
retval = rte_eth_rx_queue_setup(port, q, nb_rxd,
100-
rte_eth_dev_socket_id(port), NULL, mbuf_pool);
141+
rte_eth_dev_socket_id(port), &rxconf, mbuf_pool);
101142
if (retval < 0)
102143
return retval;
103144
}
@@ -115,6 +156,29 @@ port_init(uint16_t port, struct rte_mempool *mbuf_pool)
115156
if (retval < 0)
116157
return retval;
117158

159+
if (hw_timestamping && ticks_per_cycle_mult == 0) {
160+
uint64_t cycles_base = rte_rdtsc();
161+
uint64_t ticks_base;
162+
retval = rte_eth_read_clock(port, &ticks_base);
163+
if (retval != 0)
164+
return retval;
165+
rte_delay_ms(100);
166+
uint64_t cycles = rte_rdtsc();
167+
uint64_t ticks;
168+
rte_eth_read_clock(port, &ticks);
169+
uint64_t c_freq = cycles - cycles_base;
170+
uint64_t t_freq = ticks - ticks_base;
171+
double freq_mult = (double)c_freq / t_freq;
172+
printf("TSC Freq ~= %" PRIu64
173+
"\nHW Freq ~= %" PRIu64
174+
"\nRatio : %f\n",
175+
c_freq * 10, t_freq * 10, freq_mult);
176+
/* TSC will be faster than internal ticks so freq_mult is > 0
177+
* We convert the multiplication to an integer shift & mult
178+
*/
179+
ticks_per_cycle_mult = (1 << TICKS_PER_CYCLE_SHIFT) / freq_mult;
180+
}
181+
118182
struct rte_ether_addr addr;
119183

120184
rte_eth_macaddr_get(port, &addr);
@@ -177,6 +241,11 @@ main(int argc, char *argv[])
177241
struct rte_mempool *mbuf_pool;
178242
uint16_t nb_ports;
179243
uint16_t portid;
244+
struct option lgopts[] = {
245+
{ NULL, 0, 0, 0 }
246+
};
247+
int opt, option_index;
248+
180249

181250
/* init EAL */
182251
int ret = rte_eal_init(argc, argv);
@@ -186,6 +255,18 @@ main(int argc, char *argv[])
186255
argc -= ret;
187256
argv += ret;
188257

258+
while ((opt = getopt_long(argc, argv, "t", lgopts, &option_index))
259+
!= EOF)
260+
switch (opt) {
261+
case 't':
262+
hw_timestamping = 1;
263+
break;
264+
default:
265+
printf(usage, argv[0]);
266+
return -1;
267+
}
268+
optind = 1; /* reset getopt lib */
269+
189270
nb_ports = rte_eth_dev_count_avail();
190271
if (nb_ports < 2 || (nb_ports & 1))
191272
rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");

examples/rxtx_callbacks/meson.build

+3
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
# To build this example as a standalone application with an already-installed
77
# DPDK instance, use 'make'
88

9+
#rte_eth_read_clock is experimental
10+
allow_experimental_apis = true
11+
912
sources = files(
1013
'main.c'
1114
)

0 commit comments

Comments
 (0)