summaryrefslogtreecommitdiffstats
path: root/src/msg/async/dpdk/IP.h
blob: 480b4b95b3226fa7544129d2cc806659e4771878 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 
/*
 * This file is open source software, licensed to you under the terms
 * of the Apache License, Version 2.0 (the "License").  See the NOTICE file
 * distributed with this work for additional information regarding copyright
 * ownership.  You may not use this file except in compliance with the License.
 *
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
/*
 * Copyright (C) 2014 Cloudius Systems, Ltd.
 *
 */

#ifndef CEPH_MSG_IP_H_
#define CEPH_MSG_IP_H_

#include <arpa/inet.h>
#include <unordered_map>
#include <cstdint>
#include <array>
#include <map>
#include <list>
#include <chrono>

#include "msg/async/Event.h"
#include "common/Throttle.h"

#include "array_map.h"
#include "ARP.h"
#include "IPChecksum.h"
#include "ip_types.h"
#include "const.h"
#include "net.h"
#include "PacketUtil.h"
#include "toeplitz.h"

class ipv4;
template <ip_protocol_num ProtoNum>
class ipv4_l4;

template <typename InetTraits>
class tcp;

struct ipv4_traits {
  using address_type = ipv4_address;
  using inet_type = ipv4_l4<ip_protocol_num::tcp>;
  struct l4packet {
    ipv4_address to;
    Packet p;
    ethernet_address e_dst;
    ip_protocol_num proto_num;
  };
  using packet_provider_type = std::function<Tub<l4packet> ()>;
  static void tcp_pseudo_header_checksum(checksummer& csum, ipv4_address src, ipv4_address dst, uint16_t len) {
    csum.sum_many(src.ip, dst.ip, uint8_t(0), uint8_t(ip_protocol_num::tcp), len);
  }
  static constexpr uint8_t ip_hdr_len_min = ipv4_hdr_len_min;
};

template <ip_protocol_num ProtoNum>
class ipv4_l4 {
 public:
  ipv4& _inet;
 public:
  ipv4_l4(ipv4& inet) : _inet(inet) {}
  void register_packet_provider(ipv4_traits::packet_provider_type func);
  void wait_l2_dst_address(ipv4_address to, Packet p, resolution_cb cb);
};

class ip_protocol {
 public:
  virtual ~ip_protocol() {}
  virtual void received(Packet p, ipv4_address from, ipv4_address to) = 0;
  virtual bool forward(forward_hash& out_hash_data, Packet& p, size_t off) { return true; }
};

template <typename InetTraits>
struct l4connid {
  using ipaddr = typename InetTraits::address_type;
  using inet_type = typename InetTraits::inet_type;
  struct connid_hash;

  ipaddr local_ip;
  ipaddr foreign_ip;
  uint16_t local_port;
  uint16_t foreign_port;

  bool operator==(const l4connid& x) const {
    return local_ip == x.local_ip
           && foreign_ip == x.foreign_ip
           && local_port == x.local_port
           && foreign_port == x.foreign_port;
  }

  uint32_t hash(const rss_key_type& rss_key) {
    forward_hash hash_data;
    hash_data.push_back(hton(foreign_ip.ip));
    hash_data.push_back(hton(local_ip.ip));
    hash_data.push_back(hton(foreign_port));
    hash_data.push_back(hton(local_port));
    return toeplitz_hash(rss_key, hash_data);
  }
};

class ipv4_tcp final : public ip_protocol {
  ipv4_l4<ip_protocol_num::tcp> _inet_l4;
  std::unique_ptr<tcp<ipv4_traits>> _tcp;
 public:
  ipv4_tcp(ipv4& inet, EventCenter *c);
  ~ipv4_tcp();
  virtual void received(Packet p, ipv4_address from, ipv4_address to) override;
  virtual bool forward(forward_hash& out_hash_data, Packet& p, size_t off) override;
  friend class ipv4;
};

struct icmp_hdr {
  enum class msg_type : uint8_t {
    echo_reply = 0,
    echo_request = 8,
  };
  msg_type type;
  uint8_t code;
  uint16_t csum;
  uint32_t rest;
} __attribute__((packed));


class icmp {
 public:
  using ipaddr = ipv4_address;
  using inet_type = ipv4_l4<ip_protocol_num::icmp>;
  explicit icmp(CephContext *c, inet_type& inet)
      : cct(c), _inet(inet), _queue_space(c, "DPDK::icmp::_queue_space", 212992) {
    _inet.register_packet_provider([this] {
      Tub<ipv4_traits::l4packet> l4p;
      if (!_packetq.empty()) {
        l4p = std::move(_packetq.front());
        _packetq.pop_front();
        _queue_space.put(l4p->p.len());
      }
      return l4p;
    });
  }
  void received(Packet p, ipaddr from, ipaddr to);

 private:
  CephContext *cct;
  // ipv4_l4<ip_protocol_num::icmp>
  inet_type& _inet;
  circular_buffer<ipv4_traits::l4packet> _packetq;
  Throttle _queue_space;
};

class ipv4_icmp final : public ip_protocol {
  CephContext *cct;
  ipv4_l4<ip_protocol_num::icmp> _inet_l4;
  icmp _icmp;
 public:
  ipv4_icmp(CephContext *c, ipv4& inet) : cct(c), _inet_l4(inet), _icmp(c, _inet_l4) {}
  virtual void received(Packet p, ipv4_address from, ipv4_address to) override {
    _icmp.received(std::move(p), from, to);
  }
  friend class ipv4;
};

struct ip_hdr;

struct ip_packet_filter {
  virtual ~ip_packet_filter() {};
  virtual void handle(Packet& p, ip_hdr* iph, ethernet_address from, bool & handled) = 0;
};

struct ipv4_frag_id {
  struct hash;
  ipv4_address src_ip;
  ipv4_address dst_ip;
  uint16_t identification;
  uint8_t protocol;
  bool operator==(const ipv4_frag_id& x) const {
    return src_ip == x.src_ip &&
           dst_ip == x.dst_ip &&
           identification == x.identification &&
           protocol == x.protocol;
  }
};

struct ipv4_frag_id::hash : private std::hash<ipv4_address>,
                            private std::hash<uint16_t>, private std::hash<uint8_t> {
  size_t operator()(const ipv4_frag_id& id) const noexcept {
    using h1 = std::hash<ipv4_address>;
    using h2 = std::hash<uint16_t>;
    using h3 = std::hash<uint8_t>;
    return h1::operator()(id.src_ip) ^
           h1::operator()(id.dst_ip) ^
           h2::operator()(id.identification) ^
           h3::operator()(id.protocol);
  }
};

struct ipv4_tag {};
using ipv4_packet_merger = packet_merger<uint32_t, ipv4_tag>;

class interface;

class ipv4 {
 public:
  using address_type = ipv4_address;
  using proto_type = uint16_t;
  static address_type broadcast_address() { return ipv4_address(0xffffffff); }
  static proto_type arp_protocol_type() { return proto_type(eth_protocol_num::ipv4); }
  CephContext *cct;
  EventCenter *center;

 private:
  interface* _netif;
  std::vector<ipv4_traits::packet_provider_type> _pkt_providers;
  Tub<uint64_t> frag_timefd;
  EventCallbackRef frag_handler;
  arp _global_arp;
  arp_for<ipv4> _arp;
  ipv4_address _host_address;
  ipv4_address _gw_address;
  ipv4_address _netmask;
  l3_protocol _l3;
  subscription<Packet, ethernet_address> _rx_packets;
  ipv4_tcp _tcp;
  ipv4_icmp _icmp;
  array_map<ip_protocol*, 256> _l4;
  ip_packet_filter *_packet_filter;
  struct frag {
    Packet header;
    ipv4_packet_merger data;
    utime_t rx_time;
    uint32_t mem_size = 0;
    // fragment with MF == 0 inidates it is the last fragment
    bool last_frag_received = false;

    Packet get_assembled_packet(ethernet_address from, ethernet_address to);
    int32_t merge(ip_hdr &h, uint16_t offset, Packet p);
    bool is_complete();
  };
  std::unordered_map<ipv4_frag_id, frag, ipv4_frag_id::hash> _frags;
  std::list<ipv4_frag_id> _frags_age;
  static utime_t _frag_timeout;
  static constexpr uint32_t _frag_low_thresh{3 * 1024 * 1024};
  static constexpr uint32_t _frag_high_thresh{4 * 1024 * 1024};
  uint32_t _frag_mem = 0;
  circular_buffer<l3_protocol::l3packet> _packetq;
  unsigned _pkt_provider_idx = 0;
  PerfCounters *perf_logger;

 private:
  int handle_received_packet(Packet p, ethernet_address from);
  bool forward(forward_hash& out_hash_data, Packet& p, size_t off);
  Tub<l3_protocol::l3packet> get_packet();
  bool in_my_netmask(ipv4_address a) const {
    return !((a.ip ^ _host_address.ip) & _netmask.ip);
  }
  void frag_limit_mem();
  void frag_drop(ipv4_frag_id frag_id, uint32_t dropped_size) {
    _frags.erase(frag_id);
    _frag_mem -= dropped_size;
  }
  void frag_arm(utime_t now) {
    auto tp = now + _frag_timeout;
    frag_timefd.construct(center->create_time_event(tp.to_nsec() / 1000, frag_handler));
  }
  void frag_arm() {
    auto now = ceph_clock_now();
    frag_timefd.construct(center->create_time_event(now.to_nsec() / 1000, frag_handler));
  }

 public:
  void frag_timeout();

 public:
  explicit ipv4(CephContext *c, EventCenter *cen, interface* netif);
  ~ipv4() {
    delete frag_handler;
  }
  void set_host_address(ipv4_address ip) {
    _host_address = ip;
    _arp.set_self_addr(ip);
  }
  ipv4_address host_address() {
    return _host_address;
  }
  void set_gw_address(ipv4_address ip) {
    _gw_address = ip;
  }
  ipv4_address gw_address() const {
    return _gw_address;
  }
  void set_netmask_address(ipv4_address ip) {
    _netmask = ip;
  }
  ipv4_address netmask_address() const {
    return _netmask;
  }
  interface *netif() const {
    return _netif;
  }
  // TODO or something. Should perhaps truly be a list
  // of filters. With ordering. And blackjack. Etc.
  // But for now, a simple single raw pointer suffices
  void set_packet_filter(ip_packet_filter *f) {
    _packet_filter = f;
  }
  ip_packet_filter * packet_filter() const {
    return _packet_filter;
  }
  void send(ipv4_address to, ip_protocol_num proto_num, Packet p, ethernet_address e_dst);
  tcp<ipv4_traits>& get_tcp() { return *_tcp._tcp; }
  void register_l4(proto_type id, ip_protocol* handler);
  const hw_features& get_hw_features() const;
  static bool needs_frag(Packet& p, ip_protocol_num proto_num, hw_features hw_features) {
    if (p.len() + ipv4_hdr_len_min <= hw_features.mtu)
      return false;

    if ((proto_num == ip_protocol_num::tcp && hw_features.tx_tso))
      return false;

    return true;
  }
  void learn(ethernet_address l2, ipv4_address l3) {
    _arp.learn(l2, l3);
  }
  void register_packet_provider(ipv4_traits::packet_provider_type&& func) {
    _pkt_providers.push_back(std::move(func));
  }
  void wait_l2_dst_address(ipv4_address to, Packet p, resolution_cb cb);
};

template <ip_protocol_num ProtoNum>
inline void ipv4_l4<ProtoNum>::register_packet_provider(
    ipv4_traits::packet_provider_type func) {
  _inet.register_packet_provider([func] {
    auto l4p = func();
    if (l4p) {
      (*l4p).proto_num = ProtoNum;
    }
    return l4p;
  });
}

template <ip_protocol_num ProtoNum>
inline void ipv4_l4<ProtoNum>::wait_l2_dst_address(ipv4_address to, Packet p, resolution_cb cb) {
  _inet.wait_l2_dst_address(to, std::move(p), std::move(cb));
}

struct ip_hdr {
  uint8_t ihl : 4;
  uint8_t ver : 4;
  uint8_t dscp : 6;
  uint8_t ecn : 2;
  uint16_t len;
  uint16_t id;
  uint16_t frag;
  enum class frag_bits : uint8_t { mf = 13, df = 14, reserved = 15, offset_shift = 3 };
  uint8_t ttl;
  uint8_t ip_proto;
  uint16_t csum;
  ipv4_address src_ip;
  ipv4_address dst_ip;
  uint8_t options[0];
  ip_hdr hton() {
    ip_hdr hdr = *this;
    hdr.len = ::hton(len);
    hdr.id = ::hton(id);
    hdr.frag = ::hton(frag);
    hdr.csum = ::hton(csum);
    hdr.src_ip.ip = ::hton(src_ip.ip);
    hdr.dst_ip.ip = ::hton(dst_ip.ip);
    return hdr;
  }
  ip_hdr ntoh() {
    ip_hdr hdr = *this;
    hdr.len = ::ntoh(len);
    hdr.id = ::ntoh(id);
    hdr.frag = ::ntoh(frag);
    hdr.csum = ::ntoh(csum);
    hdr.src_ip = src_ip.ntoh();
    hdr.dst_ip = dst_ip.ntoh();
    return hdr;
  }

  bool mf() { return frag & (1 << uint8_t(frag_bits::mf)); }
  bool df() { return frag & (1 << uint8_t(frag_bits::df)); }
  uint16_t offset() { return frag << uint8_t(frag_bits::offset_shift); }
} __attribute__((packed));

template <typename InetTraits>
struct l4connid<InetTraits>::connid_hash : private std::hash<ipaddr>, private std::hash<uint16_t> {
  size_t operator()(const l4connid<InetTraits>& id) const noexcept {
    using h1 = std::hash<ipaddr>;
    using h2 = std::hash<uint16_t>;
    return h1::operator()(id.local_ip)
           ^ h1::operator()(id.foreign_ip)
           ^ h2::operator()(id.local_port)
           ^ h2::operator()(id.foreign_port);
  }
};

#endif /* CEPH_MSG_IP_H */