diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/seastar/dpdk/app/test-pmd/icmpecho.c | |
parent | Initial commit. (diff) | |
download | ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/seastar/dpdk/app/test-pmd/icmpecho.c')
-rw-r--r-- | src/seastar/dpdk/app/test-pmd/icmpecho.c | 560 |
1 files changed, 560 insertions, 0 deletions
diff --git a/src/seastar/dpdk/app/test-pmd/icmpecho.c b/src/seastar/dpdk/app/test-pmd/icmpecho.c new file mode 100644 index 00000000..d4b4c9eb --- /dev/null +++ b/src/seastar/dpdk/app/test-pmd/icmpecho.c @@ -0,0 +1,560 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2013 6WIND + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <stdarg.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <unistd.h> +#include <inttypes.h> + +#include <sys/queue.h> +#include <sys/stat.h> + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_debug.h> +#include <rte_cycles.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_memory.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_arp.h> +#include <rte_ip.h> +#include <rte_icmp.h> +#include <rte_string_fns.h> +#include <rte_flow.h> + +#include "testpmd.h" + +static const char * +arp_op_name(uint16_t arp_op) +{ + switch (arp_op ) { + case ARP_OP_REQUEST: + return "ARP Request"; + case ARP_OP_REPLY: + return "ARP Reply"; + case ARP_OP_REVREQUEST: + return "Reverse ARP Request"; + case ARP_OP_REVREPLY: + return "Reverse ARP Reply"; + case ARP_OP_INVREQUEST: + return "Peer Identify Request"; + case ARP_OP_INVREPLY: + return "Peer Identify Reply"; + default: + break; + } + return "Unkwown ARP op"; +} + +static const char * +ip_proto_name(uint16_t ip_proto) +{ + static const char * ip_proto_names[] = { + "IP6HOPOPTS", /**< IP6 hop-by-hop options */ + "ICMP", /**< control message protocol */ + "IGMP", /**< group mgmt protocol */ + "GGP", /**< gateway^2 (deprecated) */ + "IPv4", /**< IPv4 encapsulation */ + + "UNASSIGNED", + "TCP", /**< transport control protocol */ + "ST", /**< Stream protocol II */ + "EGP", /**< exterior gateway protocol */ + "PIGP", /**< private interior gateway */ + + "RCC_MON", /**< BBN RCC Monitoring */ + "NVPII", /**< network voice protocol*/ + "PUP", /**< pup */ + "ARGUS", /**< Argus */ + "EMCON", /**< EMCON */ + + "XNET", /**< Cross Net Debugger */ + "CHAOS", /**< Chaos*/ + "UDP", /**< user datagram protocol */ + "MUX", /**< Multiplexing */ + "DCN_MEAS", /**< DCN Measurement Subsystems */ + + "HMP", /**< Host Monitoring */ + "PRM", /**< Packet Radio Measurement */ + "XNS_IDP", /**< xns idp */ + "TRUNK1", /**< Trunk-1 */ + "TRUNK2", /**< Trunk-2 */ + + "LEAF1", /**< Leaf-1 */ + "LEAF2", /**< Leaf-2 */ + "RDP", /**< Reliable Data */ + "IRTP", /**< Reliable Transaction */ + "TP4", /**< tp-4 w/ class negotiation */ + + "BLT", /**< Bulk Data Transfer */ + "NSP", /**< Network Services */ + "INP", /**< Merit Internodal */ + "SEP", /**< Sequential Exchange */ + "3PC", /**< Third Party Connect */ + + "IDPR", /**< InterDomain Policy Routing */ + "XTP", /**< XTP */ + "DDP", /**< Datagram Delivery */ + "CMTP", /**< Control Message Transport */ + "TPXX", /**< TP++ Transport */ + + "ILTP", /**< IL transport protocol */ + "IPv6_HDR", /**< IP6 header */ + "SDRP", /**< Source Demand Routing */ + "IPv6_RTG", /**< IP6 routing header */ + "IPv6_FRAG", /**< IP6 fragmentation header */ + + "IDRP", /**< InterDomain Routing*/ + "RSVP", /**< resource reservation */ + "GRE", /**< General Routing Encap. */ + "MHRP", /**< Mobile Host Routing */ + "BHA", /**< BHA */ + + "ESP", /**< IP6 Encap Sec. Payload */ + "AH", /**< IP6 Auth Header */ + "INLSP", /**< Integ. Net Layer Security */ + "SWIPE", /**< IP with encryption */ + "NHRP", /**< Next Hop Resolution */ + + "UNASSIGNED", + "UNASSIGNED", + "UNASSIGNED", + "ICMPv6", /**< ICMP6 */ + "IPv6NONEXT", /**< IP6 no next header */ + + "Ipv6DSTOPTS",/**< IP6 destination option */ + "AHIP", /**< any host internal protocol */ + "CFTP", /**< CFTP */ + "HELLO", /**< "hello" routing protocol */ + "SATEXPAK", /**< SATNET/Backroom EXPAK */ + + "KRYPTOLAN", /**< Kryptolan */ + "RVD", /**< Remote Virtual Disk */ + "IPPC", /**< Pluribus Packet Core */ + "ADFS", /**< Any distributed FS */ + "SATMON", /**< Satnet Monitoring */ + + "VISA", /**< VISA Protocol */ + "IPCV", /**< Packet Core Utility */ + "CPNX", /**< Comp. Prot. Net. Executive */ + "CPHB", /**< Comp. Prot. HeartBeat */ + "WSN", /**< Wang Span Network */ + + "PVP", /**< Packet Video Protocol */ + "BRSATMON", /**< BackRoom SATNET Monitoring */ + "ND", /**< Sun net disk proto (temp.) */ + "WBMON", /**< WIDEBAND Monitoring */ + "WBEXPAK", /**< WIDEBAND EXPAK */ + + "EON", /**< ISO cnlp */ + "VMTP", /**< VMTP */ + "SVMTP", /**< Secure VMTP */ + "VINES", /**< Banyon VINES */ + "TTP", /**< TTP */ + + "IGP", /**< NSFNET-IGP */ + "DGP", /**< dissimilar gateway prot. */ + "TCF", /**< TCF */ + "IGRP", /**< Cisco/GXS IGRP */ + "OSPFIGP", /**< OSPFIGP */ + + "SRPC", /**< Strite RPC protocol */ + "LARP", /**< Locus Address Resolution */ + "MTP", /**< Multicast Transport */ + "AX25", /**< AX.25 Frames */ + "4IN4", /**< IP encapsulated in IP */ + + "MICP", /**< Mobile Int.ing control */ + "SCCSP", /**< Semaphore Comm. security */ + "ETHERIP", /**< Ethernet IP encapsulation */ + "ENCAP", /**< encapsulation header */ + "AES", /**< any private encr. scheme */ + + "GMTP", /**< GMTP */ + "IPCOMP", /**< payload compression (IPComp) */ + "UNASSIGNED", + "UNASSIGNED", + "PIM", /**< Protocol Independent Mcast */ + }; + + if (ip_proto < sizeof(ip_proto_names) / sizeof(ip_proto_names[0])) + return ip_proto_names[ip_proto]; + switch (ip_proto) { +#ifdef IPPROTO_PGM + case IPPROTO_PGM: /**< PGM */ + return "PGM"; +#endif + case IPPROTO_SCTP: /**< Stream Control Transport Protocol */ + return "SCTP"; +#ifdef IPPROTO_DIVERT + case IPPROTO_DIVERT: /**< divert pseudo-protocol */ + return "DIVERT"; +#endif + case IPPROTO_RAW: /**< raw IP packet */ + return "RAW"; + default: + break; + } + return "UNASSIGNED"; +} + +static void +ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf) +{ + uint32_t ipv4_addr; + + ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr); + sprintf(buf, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF, + (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF, + ipv4_addr & 0xFF); +} + +static void +ether_addr_dump(const char *what, const struct ether_addr *ea) +{ + char buf[ETHER_ADDR_FMT_SIZE]; + + ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, ea); + if (what) + printf("%s", what); + printf("%s", buf); +} + +static void +ipv4_addr_dump(const char *what, uint32_t be_ipv4_addr) +{ + char buf[16]; + + ipv4_addr_to_dot(be_ipv4_addr, buf); + if (what) + printf("%s", what); + printf("%s", buf); +} + +static uint16_t +ipv4_hdr_cksum(struct ipv4_hdr *ip_h) +{ + uint16_t *v16_h; + uint32_t ip_cksum; + + /* + * Compute the sum of successive 16-bit words of the IPv4 header, + * skipping the checksum field of the header. + */ + v16_h = (unaligned_uint16_t *) ip_h; + ip_cksum = v16_h[0] + v16_h[1] + v16_h[2] + v16_h[3] + + v16_h[4] + v16_h[6] + v16_h[7] + v16_h[8] + v16_h[9]; + + /* reduce 32 bit checksum to 16 bits and complement it */ + ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16); + ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16); + ip_cksum = (~ip_cksum) & 0x0000FFFF; + return (ip_cksum == 0) ? 0xFFFF : (uint16_t) ip_cksum; +} + +#define is_multicast_ipv4_addr(ipv4_addr) \ + (((rte_be_to_cpu_32((ipv4_addr)) >> 24) & 0x000000FF) == 0xE0) + +/* + * Receive a burst of packets, lookup for ICMP echo requests, and, if any, + * send back ICMP echo replies. + */ +static void +reply_to_icmp_echo_rqsts(struct fwd_stream *fs) +{ + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + struct rte_mbuf *pkt; + struct ether_hdr *eth_h; + struct vlan_hdr *vlan_h; + struct arp_hdr *arp_h; + struct ipv4_hdr *ip_h; + struct icmp_hdr *icmp_h; + struct ether_addr eth_addr; + uint32_t retry; + uint32_t ip_addr; + uint16_t nb_rx; + uint16_t nb_tx; + uint16_t nb_replies; + uint16_t eth_type; + uint16_t vlan_id; + uint16_t arp_op; + uint16_t arp_pro; + uint32_t cksum; + uint8_t i; + int l2_len; +#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES + uint64_t start_tsc; + uint64_t end_tsc; + uint64_t core_cycles; +#endif + +#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES + start_tsc = rte_rdtsc(); +#endif + + /* + * First, receive a burst of packets. + */ + nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, + nb_pkt_per_burst); + if (unlikely(nb_rx == 0)) + return; + +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS + fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; +#endif + fs->rx_packets += nb_rx; + nb_replies = 0; + for (i = 0; i < nb_rx; i++) { + if (likely(i < nb_rx - 1)) + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i + 1], + void *)); + pkt = pkts_burst[i]; + eth_h = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + eth_type = RTE_BE_TO_CPU_16(eth_h->ether_type); + l2_len = sizeof(struct ether_hdr); + if (verbose_level > 0) { + printf("\nPort %d pkt-len=%u nb-segs=%u\n", + fs->rx_port, pkt->pkt_len, pkt->nb_segs); + ether_addr_dump(" ETH: src=", ð_h->s_addr); + ether_addr_dump(" dst=", ð_h->d_addr); + } + if (eth_type == ETHER_TYPE_VLAN) { + vlan_h = (struct vlan_hdr *) + ((char *)eth_h + sizeof(struct ether_hdr)); + l2_len += sizeof(struct vlan_hdr); + eth_type = rte_be_to_cpu_16(vlan_h->eth_proto); + if (verbose_level > 0) { + vlan_id = rte_be_to_cpu_16(vlan_h->vlan_tci) + & 0xFFF; + printf(" [vlan id=%u]", vlan_id); + } + } + if (verbose_level > 0) { + printf(" type=0x%04x\n", eth_type); + } + + /* Reply to ARP requests */ + if (eth_type == ETHER_TYPE_ARP) { + arp_h = (struct arp_hdr *) ((char *)eth_h + l2_len); + arp_op = RTE_BE_TO_CPU_16(arp_h->arp_op); + arp_pro = RTE_BE_TO_CPU_16(arp_h->arp_pro); + if (verbose_level > 0) { + printf(" ARP: hrd=%d proto=0x%04x hln=%d " + "pln=%d op=%u (%s)\n", + RTE_BE_TO_CPU_16(arp_h->arp_hrd), + arp_pro, arp_h->arp_hln, + arp_h->arp_pln, arp_op, + arp_op_name(arp_op)); + } + if ((RTE_BE_TO_CPU_16(arp_h->arp_hrd) != + ARP_HRD_ETHER) || + (arp_pro != ETHER_TYPE_IPv4) || + (arp_h->arp_hln != 6) || + (arp_h->arp_pln != 4) + ) { + rte_pktmbuf_free(pkt); + if (verbose_level > 0) + printf("\n"); + continue; + } + if (verbose_level > 0) { + ether_addr_copy(&arp_h->arp_data.arp_sha, ð_addr); + ether_addr_dump(" sha=", ð_addr); + ip_addr = arp_h->arp_data.arp_sip; + ipv4_addr_dump(" sip=", ip_addr); + printf("\n"); + ether_addr_copy(&arp_h->arp_data.arp_tha, ð_addr); + ether_addr_dump(" tha=", ð_addr); + ip_addr = arp_h->arp_data.arp_tip; + ipv4_addr_dump(" tip=", ip_addr); + printf("\n"); + } + if (arp_op != ARP_OP_REQUEST) { + rte_pktmbuf_free(pkt); + continue; + } + + /* + * Build ARP reply. + */ + + /* Use source MAC address as destination MAC address. */ + ether_addr_copy(ð_h->s_addr, ð_h->d_addr); + /* Set source MAC address with MAC address of TX port */ + ether_addr_copy(&ports[fs->tx_port].eth_addr, + ð_h->s_addr); + + arp_h->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY); + ether_addr_copy(&arp_h->arp_data.arp_tha, ð_addr); + ether_addr_copy(&arp_h->arp_data.arp_sha, &arp_h->arp_data.arp_tha); + ether_addr_copy(ð_h->s_addr, &arp_h->arp_data.arp_sha); + + /* Swap IP addresses in ARP payload */ + ip_addr = arp_h->arp_data.arp_sip; + arp_h->arp_data.arp_sip = arp_h->arp_data.arp_tip; + arp_h->arp_data.arp_tip = ip_addr; + pkts_burst[nb_replies++] = pkt; + continue; + } + + if (eth_type != ETHER_TYPE_IPv4) { + rte_pktmbuf_free(pkt); + continue; + } + ip_h = (struct ipv4_hdr *) ((char *)eth_h + l2_len); + if (verbose_level > 0) { + ipv4_addr_dump(" IPV4: src=", ip_h->src_addr); + ipv4_addr_dump(" dst=", ip_h->dst_addr); + printf(" proto=%d (%s)\n", + ip_h->next_proto_id, + ip_proto_name(ip_h->next_proto_id)); + } + + /* + * Check if packet is a ICMP echo request. + */ + icmp_h = (struct icmp_hdr *) ((char *)ip_h + + sizeof(struct ipv4_hdr)); + if (! ((ip_h->next_proto_id == IPPROTO_ICMP) && + (icmp_h->icmp_type == IP_ICMP_ECHO_REQUEST) && + (icmp_h->icmp_code == 0))) { + rte_pktmbuf_free(pkt); + continue; + } + + if (verbose_level > 0) + printf(" ICMP: echo request seq id=%d\n", + rte_be_to_cpu_16(icmp_h->icmp_seq_nb)); + + /* + * Prepare ICMP echo reply to be sent back. + * - switch ethernet source and destinations addresses, + * - use the request IP source address as the reply IP + * destination address, + * - if the request IP destination address is a multicast + * address: + * - choose a reply IP source address different from the + * request IP source address, + * - re-compute the IP header checksum. + * Otherwise: + * - switch the request IP source and destination + * addresses in the reply IP header, + * - keep the IP header checksum unchanged. + * - set IP_ICMP_ECHO_REPLY in ICMP header. + * ICMP checksum is computed by assuming it is valid in the + * echo request and not verified. + */ + ether_addr_copy(ð_h->s_addr, ð_addr); + ether_addr_copy(ð_h->d_addr, ð_h->s_addr); + ether_addr_copy(ð_addr, ð_h->d_addr); + ip_addr = ip_h->src_addr; + if (is_multicast_ipv4_addr(ip_h->dst_addr)) { + uint32_t ip_src; + + ip_src = rte_be_to_cpu_32(ip_addr); + if ((ip_src & 0x00000003) == 1) + ip_src = (ip_src & 0xFFFFFFFC) | 0x00000002; + else + ip_src = (ip_src & 0xFFFFFFFC) | 0x00000001; + ip_h->src_addr = rte_cpu_to_be_32(ip_src); + ip_h->dst_addr = ip_addr; + ip_h->hdr_checksum = ipv4_hdr_cksum(ip_h); + } else { + ip_h->src_addr = ip_h->dst_addr; + ip_h->dst_addr = ip_addr; + } + icmp_h->icmp_type = IP_ICMP_ECHO_REPLY; + cksum = ~icmp_h->icmp_cksum & 0xffff; + cksum += ~htons(IP_ICMP_ECHO_REQUEST << 8) & 0xffff; + cksum += htons(IP_ICMP_ECHO_REPLY << 8); + cksum = (cksum & 0xffff) + (cksum >> 16); + cksum = (cksum & 0xffff) + (cksum >> 16); + icmp_h->icmp_cksum = ~cksum; + pkts_burst[nb_replies++] = pkt; + } + + /* Send back ICMP echo replies, if any. */ + if (nb_replies > 0) { + nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, + nb_replies); + /* + * Retry if necessary + */ + if (unlikely(nb_tx < nb_replies) && fs->retry_enabled) { + retry = 0; + while (nb_tx < nb_replies && + retry++ < burst_tx_retry_num) { + rte_delay_us(burst_tx_delay_time); + nb_tx += rte_eth_tx_burst(fs->tx_port, + fs->tx_queue, + &pkts_burst[nb_tx], + nb_replies - nb_tx); + } + } + fs->tx_packets += nb_tx; +#ifdef RTE_TEST_PMD_RECORD_BURST_STATS + fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; +#endif + if (unlikely(nb_tx < nb_replies)) { + fs->fwd_dropped += (nb_replies - nb_tx); + do { + rte_pktmbuf_free(pkts_burst[nb_tx]); + } while (++nb_tx < nb_replies); + } + } + +#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES + end_tsc = rte_rdtsc(); + core_cycles = (end_tsc - start_tsc); + fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); +#endif +} + +struct fwd_engine icmp_echo_engine = { + .fwd_mode_name = "icmpecho", + .port_fwd_begin = NULL, + .port_fwd_end = NULL, + .packet_fwd = reply_to_icmp_echo_rqsts, +}; |