summaryrefslogtreecommitdiffstats
path: root/src/lib/dhcp/pkt_filter_lpf.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/dhcp/pkt_filter_lpf.cc')
-rw-r--r--src/lib/dhcp/pkt_filter_lpf.cc340
1 files changed, 340 insertions, 0 deletions
diff --git a/src/lib/dhcp/pkt_filter_lpf.cc b/src/lib/dhcp/pkt_filter_lpf.cc
new file mode 100644
index 0000000..791e863
--- /dev/null
+++ b/src/lib/dhcp/pkt_filter_lpf.cc
@@ -0,0 +1,340 @@
+// Copyright (C) 2013-2021 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <config.h>
+#include <dhcp/dhcp4.h>
+#include <dhcp/iface_mgr.h>
+#include <dhcp/pkt4.h>
+#include <dhcp/pkt_filter_lpf.h>
+#include <dhcp/protocol_util.h>
+#include <exceptions/exceptions.h>
+#include <fcntl.h>
+#include <net/ethernet.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+
+namespace {
+
+using namespace isc::dhcp;
+
+/// The following structure defines a Berkeley Packet Filter program to perform
+/// packet filtering. The program operates on Ethernet packets. To help with
+/// interpretation of the program, for the types of Ethernet packets we are
+/// interested in, the header layout is:
+///
+/// 6 bytes Destination Ethernet Address
+/// 6 bytes Source Ethernet Address
+/// 2 bytes Ethernet packet type
+///
+/// 20 bytes Fixed part of IP header
+/// variable Variable part of IP header
+///
+/// 2 bytes UDP Source port
+/// 2 bytes UDP destination port
+/// 4 bytes Rest of UDP header
+///
+/// Each instruction is preceded with the comments giving the instruction
+/// number within a BPF program, in the following format: #123.
+///
+/// @todo We may want to extend the filter to receive packets sent
+/// to the particular IP address assigned to the interface or
+/// broadcast address.
+struct sock_filter dhcp_sock_filter [] = {
+ // Make sure this is an IP packet: check the half-word (two bytes)
+ // at offset 12 in the packet (the Ethernet packet type). If it
+ // is, advance to the next instruction. If not, advance 11
+ // instructions (which takes execution to the last instruction in
+ // the sequence: "drop it").
+ // #0
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ETHERNET_PACKET_TYPE_OFFSET),
+ // #1
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_IP, 0, 11),
+
+ // Make sure it's a UDP packet. The IP protocol is at offset
+ // 9 in the IP header so, adding the Ethernet packet header size
+ // of 14 bytes gives an absolute byte offset in the packet of 23.
+ // #2
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS,
+ ETHERNET_HEADER_LEN + IP_PROTO_TYPE_OFFSET),
+ // #3
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 9),
+
+ // Make sure this isn't a fragment by checking that the fragment
+ // offset field in the IP header is zero. This field is the
+ // least-significant 13 bits in the bytes at offsets 6 and 7 in
+ // the IP header, so the half-word at offset 20 (6 + size of
+ // Ethernet header) is loaded and an appropriate mask applied.
+ // #4
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ETHERNET_HEADER_LEN + IP_FLAGS_OFFSET),
+ // #5
+ BPF_JUMP(BPF_JMP + BPF_JSET + BPF_K, 0x1fff, 7, 0),
+
+ // Check the packet's destination address. The program will only
+ // allow the packets sent to the broadcast address or unicast
+ // to the specific address on the interface. By default, this
+ // address is set to 0 and must be set to the specific value
+ // when the raw socket is created and the program is attached
+ // to it. The caller must assign the address to the
+ // prog.bf_insns[8].k in the network byte order.
+ // #6
+ BPF_STMT(BPF_LD + BPF_W + BPF_ABS,
+ ETHERNET_HEADER_LEN + IP_DEST_ADDR_OFFSET),
+ // If this is a broadcast address, skip the next check.
+ // #7
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0xffffffff, 1, 0),
+ // If this is not broadcast address, compare it with the unicast
+ // address specified for the interface.
+ // #8
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x00000000, 0, 4),
+
+ // Get the IP header length. This is achieved by the following
+ // (special) instruction that, given the offset of the start
+ // of the IP header (offset 14) loads the IP header length.
+ // #9
+ BPF_STMT(BPF_LDX + BPF_B + BPF_MSH, ETHERNET_HEADER_LEN),
+
+ // Make sure it's to the right port. The following instruction
+ // adds the previously extracted IP header length to the given
+ // offset to locate the correct byte. The given offset of 16
+ // comprises the length of the Ethernet header (14) plus the offset
+ // of the UDP destination port (2) within the UDP header.
+ // #10
+ BPF_STMT(BPF_LD + BPF_H + BPF_IND, ETHERNET_HEADER_LEN + UDP_DEST_PORT),
+ // The following instruction tests against the default DHCP server port,
+ // but the action port is actually set in PktFilterBPF::openSocket().
+ // N.B. The code in that method assumes that this instruction is at
+ // offset 11 in the program. If this is changed, openSocket() must be
+ // updated.
+ // #11
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DHCP4_SERVER_PORT, 0, 1),
+
+ // If we passed all the tests, ask for the whole packet.
+ // #12
+ BPF_STMT(BPF_RET + BPF_K, (u_int)-1),
+
+ // Otherwise, drop it.
+ // #13
+ BPF_STMT(BPF_RET + BPF_K, 0),
+};
+
+}
+
+using namespace isc::util;
+
+namespace isc {
+namespace dhcp {
+
+SocketInfo
+PktFilterLPF::openSocket(Iface& iface,
+ const isc::asiolink::IOAddress& addr,
+ const uint16_t port, const bool,
+ const bool) {
+
+ // Open fallback socket first. If it fails, it will give us an indication
+ // that there is another service (perhaps DHCP server) running.
+ // The function will throw an exception and effectively cease opening
+ // raw socket below.
+ int fallback = openFallbackSocket(addr, port);
+
+ // The fallback is open, so we are good to open primary socket.
+ int sock = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+ if (sock < 0) {
+ close(fallback);
+ isc_throw(SocketConfigError, "Failed to create raw LPF socket");
+ }
+
+ // Set the close-on-exec flag.
+ if (fcntl(sock, F_SETFD, FD_CLOEXEC) < 0) {
+ close(sock);
+ close(fallback);
+ isc_throw(SocketConfigError, "Failed to set close-on-exec flag"
+ << " on the socket " << sock);
+ }
+
+ // Create socket filter program. This program will only allow incoming UDP
+ // traffic which arrives on the specific (DHCP) port). It will also filter
+ // out all fragmented packets.
+ struct sock_fprog filter_program;
+ memset(&filter_program, 0, sizeof(filter_program));
+
+ filter_program.filter = dhcp_sock_filter;
+ filter_program.len = sizeof(dhcp_sock_filter) / sizeof(struct sock_filter);
+
+ // Configure the filter program to receive unicast packets sent to the
+ // specified address. The program will also allow packets sent to the
+ // 255.255.255.255 broadcast address.
+ dhcp_sock_filter[8].k = addr.toUint32();
+
+ // Override the default port value.
+ dhcp_sock_filter[11].k = port;
+ // Apply the filter.
+ if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &filter_program,
+ sizeof(filter_program)) < 0) {
+ close(sock);
+ close(fallback);
+ isc_throw(SocketConfigError, "Failed to install packet filtering program"
+ << " on the socket " << sock);
+ }
+
+ struct sockaddr_ll sa;
+ memset(&sa, 0, sizeof(sockaddr_ll));
+ sa.sll_family = AF_PACKET;
+ sa.sll_ifindex = iface.getIndex();
+
+ // For raw sockets we construct IP headers on our own, so we don't bind
+ // socket to IP address but to the interface. We will later use the
+ // Linux Packet Filtering to filter out these packets that we are
+ // interested in.
+ if (bind(sock, reinterpret_cast<const struct sockaddr*>(&sa),
+ sizeof(sa)) < 0) {
+ close(sock);
+ close(fallback);
+ isc_throw(SocketConfigError, "Failed to bind LPF socket '" << sock
+ << "' to interface '" << iface.getName() << "'");
+ }
+
+ // Set socket to non-blocking mode.
+ if (fcntl(sock, F_SETFL, O_NONBLOCK) != 0) {
+ // Get the error message immediately after the bind because the
+ // invocation to close() below would override the errno.
+ char* errmsg = strerror(errno);
+ close(sock);
+ close(fallback);
+ isc_throw(SocketConfigError, "failed to set SO_NONBLOCK option on the"
+ " LPF socket '" << sock << "' to interface '"
+ << iface.getName() << "', reason: " << errmsg);
+ }
+
+ return (SocketInfo(addr, port, sock, fallback));
+
+}
+
+Pkt4Ptr
+PktFilterLPF::receive(Iface& iface, const SocketInfo& socket_info) {
+ uint8_t raw_buf[IfaceMgr::RCVBUFSIZE];
+ // First let's get some data from the fallback socket. The data will be
+ // discarded but we don't want the socket buffer to bloat. We get the
+ // packets from the socket in loop but most of the time the loop will
+ // end after receiving one packet. The call to recv returns immediately
+ // when there is no data left on the socket because the socket is
+ // non-blocking.
+ // @todo In the normal conditions, both the primary socket and the fallback
+ // socket are in sync as they are set to receive packets on the same
+ // address and port. The reception of packets on the fallback socket
+ // shouldn't cause significant lags in packet reception. If we find in the
+ // future that it does, the sort of threshold could be set for the maximum
+ // bytes received on the fallback socket in a single round. Further
+ // optimizations would include an asynchronous read from the fallback socket
+ // when the DHCP server is idle.
+ int datalen;
+ do {
+ datalen = recv(socket_info.fallbackfd_, raw_buf, sizeof(raw_buf), 0);
+ } while (datalen > 0);
+
+ // Now that we finished getting data from the fallback socket, we
+ // have to get the data from the raw socket too.
+ int data_len = read(socket_info.sockfd_, raw_buf, sizeof(raw_buf));
+ // If negative value is returned by read(), it indicates that an
+ // error occurred. If returned value is 0, no data was read from the
+ // socket. In both cases something has gone wrong, because we expect
+ // that a chunk of data is there. We signal the lack of data by
+ // returning an empty packet.
+ if (data_len <= 0) {
+ return Pkt4Ptr();
+ }
+
+ InputBuffer buf(raw_buf, data_len);
+
+ // @todo: This is awkward way to solve the chicken and egg problem
+ // whereby we don't know the offset where DHCP data start in the
+ // received buffer when we create the packet object. In general case,
+ // the IP header has variable length. The information about its length
+ // is stored in one of its fields. Therefore, we have to decode the
+ // packet to get the offset of the DHCP data. The dummy object is
+ // created so as we can pass it to the functions which decode IP stack
+ // and find actual offset of the DHCP data.
+ // Once we find the offset we can create another Pkt4 object from
+ // the reminder of the input buffer and set the IP addresses and
+ // ports from the dummy packet. We should consider doing it
+ // in some more elegant way.
+ Pkt4Ptr dummy_pkt = Pkt4Ptr(new Pkt4(DHCPDISCOVER, 0));
+
+ // Decode ethernet, ip and udp headers.
+ decodeEthernetHeader(buf, dummy_pkt);
+ decodeIpUdpHeader(buf, dummy_pkt);
+
+ // Read the DHCP data.
+ std::vector<uint8_t> dhcp_buf;
+ buf.readVector(dhcp_buf, buf.getLength() - buf.getPosition());
+
+ // Decode DHCP data into the Pkt4 object.
+ Pkt4Ptr pkt = Pkt4Ptr(new Pkt4(&dhcp_buf[0], dhcp_buf.size()));
+
+ // Set the appropriate packet members using data collected from
+ // the decoded headers.
+ pkt->setIndex(iface.getIndex());
+ pkt->setIface(iface.getName());
+ pkt->setLocalAddr(dummy_pkt->getLocalAddr());
+ pkt->setRemoteAddr(dummy_pkt->getRemoteAddr());
+ pkt->setLocalPort(dummy_pkt->getLocalPort());
+ pkt->setRemotePort(dummy_pkt->getRemotePort());
+ pkt->setLocalHWAddr(dummy_pkt->getLocalHWAddr());
+ pkt->setRemoteHWAddr(dummy_pkt->getRemoteHWAddr());
+
+ return (pkt);
+}
+
+int
+PktFilterLPF::send(const Iface& iface, uint16_t sockfd, const Pkt4Ptr& pkt) {
+
+ OutputBuffer buf(14);
+
+ // Some interfaces may have no HW address - e.g. loopback interface.
+ // For these interfaces the HW address length is 0. If this is the case,
+ // then we will rely on the functions which construct the IP/UDP headers
+ // to provide a default HW addres. Otherwise, create the HW address
+ // object using the HW address of the interface.
+ if (iface.getMacLen() > 0) {
+ HWAddrPtr hwaddr(new HWAddr(iface.getMac(), iface.getMacLen(),
+ iface.getHWType()));
+ pkt->setLocalHWAddr(hwaddr);
+ }
+
+
+ // Ethernet frame header.
+ // Note that we don't validate whether HW addresses in 'pkt'
+ // are valid because they are checked by the function called.
+ writeEthernetHeader(pkt, buf);
+
+ // IP and UDP header
+ writeIpUdpHeader(pkt, buf);
+
+ // DHCPv4 message
+ buf.writeData(pkt->getBuffer().getData(), pkt->getBuffer().getLength());
+
+ sockaddr_ll sa;
+ memset(&sa, 0x0, sizeof(sa));
+ sa.sll_family = AF_PACKET;
+ sa.sll_ifindex = iface.getIndex();
+ sa.sll_protocol = htons(ETH_P_IP);
+ sa.sll_halen = 6;
+
+ int result = sendto(sockfd, buf.getData(), buf.getLength(), 0,
+ reinterpret_cast<const struct sockaddr*>(&sa),
+ sizeof(sockaddr_ll));
+ if (result < 0) {
+ isc_throw(SocketWriteError, "failed to send DHCPv4 packet, errno="
+ << errno << " (check errno.h)");
+ }
+
+ return (0);
+
+}
+
+
+} // end of isc::dhcp namespace
+} // end of isc namespace