From 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 27 Apr 2024 12:05:51 +0200 Subject: Adding upstream version 5.10.209. Signed-off-by: Daniel Baumann --- Documentation/networking/6lowpan.rst | 53 + Documentation/networking/6pack.rst | 191 ++ Documentation/networking/af_xdp.rst | 640 ++++ Documentation/networking/alias.rst | 49 + Documentation/networking/arcnet-hardware.rst | 3234 ++++++++++++++++++++ Documentation/networking/arcnet.rst | 594 ++++ Documentation/networking/atm.rst | 14 + Documentation/networking/ax25.rst | 16 + Documentation/networking/bareudp.rst | 58 + Documentation/networking/batman-adv.rst | 168 + Documentation/networking/bonding.rst | 2873 +++++++++++++++++ Documentation/networking/bridge.rst | 21 + Documentation/networking/caif/caif.rst | 139 + Documentation/networking/caif/index.rst | 12 + Documentation/networking/caif/linux_caif.rst | 195 ++ Documentation/networking/can.rst | 1437 +++++++++ Documentation/networking/can_ucan_protocol.rst | 332 ++ Documentation/networking/cdc_mbim.rst | 355 +++ Documentation/networking/checksum-offloads.rst | 143 + Documentation/networking/dccp.rst | 219 ++ Documentation/networking/dctcp.rst | 52 + .../networking/device_drivers/appletalk/cops.rst | 80 + .../networking/device_drivers/appletalk/index.rst | 19 + .../networking/device_drivers/appletalk/ltpc.rst | 144 + .../networking/device_drivers/atm/cxacru-cf.py | 48 + .../networking/device_drivers/atm/cxacru.rst | 120 + .../networking/device_drivers/atm/fore200e.rst | 66 + .../networking/device_drivers/atm/index.rst | 20 + .../networking/device_drivers/atm/iphase.rst | 193 ++ .../networking/device_drivers/cable/index.rst | 18 + .../networking/device_drivers/cable/sb1000.rst | 222 ++ .../networking/device_drivers/cellular/index.rst | 18 + .../device_drivers/cellular/qualcomm/rmnet.rst | 95 + .../device_drivers/ethernet/3com/3c509.rst | 249 ++ .../device_drivers/ethernet/3com/vortex.rst | 459 +++ .../device_drivers/ethernet/altera/altera_tse.rst | 286 ++ .../device_drivers/ethernet/amazon/ena.rst | 325 ++ .../device_drivers/ethernet/aquantia/atlantic.rst | 556 ++++ .../device_drivers/ethernet/chelsio/cxgb.rst | 393 +++ .../device_drivers/ethernet/cirrus/cs89x0.rst | 647 ++++ .../device_drivers/ethernet/davicom/dm9000.rst | 171 ++ .../device_drivers/ethernet/dec/de4x5.rst | 189 ++ .../device_drivers/ethernet/dec/dmfe.rst | 71 + .../device_drivers/ethernet/dlink/dl2k.rst | 314 ++ .../device_drivers/ethernet/freescale/dpaa.rst | 269 ++ .../ethernet/freescale/dpaa2/dpio-driver.rst | 160 + .../ethernet/freescale/dpaa2/ethernet-driver.rst | 186 ++ .../ethernet/freescale/dpaa2/index.rst | 11 + .../ethernet/freescale/dpaa2/mac-phy-support.rst | 191 ++ .../ethernet/freescale/dpaa2/overview.rst | 405 +++ .../device_drivers/ethernet/freescale/gianfar.rst | 51 + .../device_drivers/ethernet/google/gve.rst | 123 + .../device_drivers/ethernet/huawei/hinic.rst | 128 + .../networking/device_drivers/ethernet/index.rst | 60 + .../device_drivers/ethernet/intel/e100.rst | 188 ++ .../device_drivers/ethernet/intel/e1000.rst | 463 +++ .../device_drivers/ethernet/intel/e1000e.rst | 383 +++ .../device_drivers/ethernet/intel/fm10k.rst | 142 + .../device_drivers/ethernet/intel/i40e.rst | 771 +++++ .../device_drivers/ethernet/intel/iavf.rst | 331 ++ .../device_drivers/ethernet/intel/ice.rst | 46 + .../device_drivers/ethernet/intel/igb.rst | 213 ++ .../device_drivers/ethernet/intel/igbvf.rst | 65 + .../device_drivers/ethernet/intel/ixgb.rst | 468 +++ .../device_drivers/ethernet/intel/ixgbe.rst | 557 ++++ .../device_drivers/ethernet/intel/ixgbevf.rst | 67 + .../device_drivers/ethernet/marvell/octeontx2.rst | 159 + .../device_drivers/ethernet/mellanox/mlx5.rst | 321 ++ .../device_drivers/ethernet/microsoft/netvsc.rst | 116 + .../device_drivers/ethernet/neterion/s2io.rst | 196 ++ .../device_drivers/ethernet/neterion/vxge.rst | 115 + .../device_drivers/ethernet/netronome/nfp.rst | 249 ++ .../device_drivers/ethernet/pensando/ionic.rst | 274 ++ .../device_drivers/ethernet/smsc/smc9.rst | 48 + .../device_drivers/ethernet/stmicro/stmmac.rst | 700 +++++ .../networking/device_drivers/ethernet/ti/cpsw.rst | 587 ++++ .../device_drivers/ethernet/ti/cpsw_switchdev.rst | 242 ++ .../networking/device_drivers/ethernet/ti/tlan.rst | 140 + .../device_drivers/ethernet/toshiba/spider_net.rst | 202 ++ .../networking/device_drivers/fddi/defza.rst | 63 + .../networking/device_drivers/fddi/index.rst | 19 + .../networking/device_drivers/fddi/skfp.rst | 253 ++ .../networking/device_drivers/hamradio/baycom.rst | 174 ++ .../networking/device_drivers/hamradio/index.rst | 19 + .../device_drivers/hamradio/z8530drv.rst | 686 +++++ Documentation/networking/device_drivers/index.rst | 26 + .../networking/device_drivers/wan/index.rst | 18 + .../networking/device_drivers/wan/z8530book.rst | 256 ++ .../networking/device_drivers/wifi/index.rst | 20 + .../device_drivers/wifi/intel/ipw2100.rst | 323 ++ .../device_drivers/wifi/intel/ipw2200.rst | 526 ++++ .../networking/device_drivers/wifi/ray_cs.rst | 165 + Documentation/networking/devlink/bnxt.rst | 80 + Documentation/networking/devlink/devlink-dpipe.rst | 252 ++ Documentation/networking/devlink/devlink-flash.rst | 121 + .../networking/devlink/devlink-health.rst | 114 + Documentation/networking/devlink/devlink-info.rst | 210 ++ .../networking/devlink/devlink-params.rst | 116 + .../networking/devlink/devlink-region.rst | 70 + .../networking/devlink/devlink-reload.rst | 81 + .../networking/devlink/devlink-resource.rst | 62 + Documentation/networking/devlink/devlink-trap.rst | 617 ++++ Documentation/networking/devlink/ice.rst | 195 ++ Documentation/networking/devlink/index.rst | 46 + Documentation/networking/devlink/ionic.rst | 29 + Documentation/networking/devlink/mlx4.rst | 56 + Documentation/networking/devlink/mlx5.rst | 65 + Documentation/networking/devlink/mlxsw.rst | 81 + Documentation/networking/devlink/mv88e6xxx.rst | 28 + Documentation/networking/devlink/netdevsim.rst | 72 + Documentation/networking/devlink/nfp.rst | 65 + Documentation/networking/devlink/qed.rst | 26 + Documentation/networking/devlink/sja1105.rst | 49 + .../networking/devlink/ti-cpsw-switch.rst | 31 + Documentation/networking/dns_resolver.rst | 155 + Documentation/networking/driver.rst | 97 + Documentation/networking/dsa/b53.rst | 183 ++ Documentation/networking/dsa/bcm_sf2.rst | 115 + Documentation/networking/dsa/configuration.rst | 292 ++ Documentation/networking/dsa/dsa.rst | 587 ++++ Documentation/networking/dsa/index.rst | 13 + Documentation/networking/dsa/lan9303.rst | 37 + Documentation/networking/dsa/sja1105.rst | 581 ++++ Documentation/networking/eql.rst | 373 +++ Documentation/networking/ethtool-netlink.rst | 1380 +++++++++ Documentation/networking/failover.rst | 18 + Documentation/networking/fib_trie.rst | 149 + Documentation/networking/filter.rst | 1653 ++++++++++ Documentation/networking/framerelay.rst | 44 + Documentation/networking/gen_stats.rst | 129 + Documentation/networking/generic-hdlc.rst | 170 + Documentation/networking/generic_netlink.rst | 9 + Documentation/networking/gtp.rst | 251 ++ Documentation/networking/ieee802154.rst | 182 ++ Documentation/networking/ila.rst | 296 ++ Documentation/networking/index.rst | 120 + Documentation/networking/ip-sysctl.rst | 2700 ++++++++++++++++ Documentation/networking/ip_dynaddr.rst | 40 + Documentation/networking/ipddp.rst | 78 + Documentation/networking/ipsec.rst | 46 + Documentation/networking/ipv6.rst | 78 + Documentation/networking/ipvlan.rst | 189 ++ Documentation/networking/ipvs-sysctl.rst | 301 ++ Documentation/networking/j1939.rst | 422 +++ Documentation/networking/kapi.rst | 180 ++ Documentation/networking/kcm.rst | 290 ++ Documentation/networking/l2tp.rst | 677 ++++ Documentation/networking/lapb-module.rst | 305 ++ .../networking/mac80211-auth-assoc-deauth.txt | 95 + Documentation/networking/mac80211-injection.rst | 106 + .../networking/mac80211_hwsim/hostapd.conf | 11 + .../networking/mac80211_hwsim/mac80211_hwsim.rst | 80 + .../networking/mac80211_hwsim/wpa_supplicant.conf | 10 + Documentation/networking/mpls-sysctl.rst | 57 + Documentation/networking/msg_zerocopy.rst | 256 ++ Documentation/networking/multiqueue.rst | 78 + Documentation/networking/net_dim.rst | 176 ++ Documentation/networking/net_failover.rst | 119 + Documentation/networking/netconsole.rst | 239 ++ Documentation/networking/netdev-FAQ.rst | 234 ++ Documentation/networking/netdev-features.rst | 184 ++ Documentation/networking/netdevices.rst | 270 ++ Documentation/networking/netfilter-sysctl.rst | 17 + Documentation/networking/netif-msg.rst | 95 + Documentation/networking/nf_conntrack-sysctl.rst | 179 ++ Documentation/networking/nf_flowtable.rst | 117 + Documentation/networking/nfc.rst | 130 + Documentation/networking/openvswitch.rst | 251 ++ Documentation/networking/operstates.rst | 185 ++ Documentation/networking/packet_mmap.rst | 1084 +++++++ Documentation/networking/page_pool.rst | 159 + Documentation/networking/phonet.rst | 230 ++ Documentation/networking/phy.rst | 510 +++ Documentation/networking/pktgen.rst | 412 +++ Documentation/networking/plip.rst | 222 ++ Documentation/networking/ppp_generic.rst | 440 +++ Documentation/networking/proc_net_tcp.rst | 57 + Documentation/networking/radiotap-headers.rst | 159 + Documentation/networking/rds.rst | 448 +++ Documentation/networking/regulatory.rst | 209 ++ Documentation/networking/rxrpc.rst | 1178 +++++++ Documentation/networking/scaling.rst | 523 ++++ Documentation/networking/sctp.rst | 42 + Documentation/networking/secid.rst | 20 + Documentation/networking/seg6-sysctl.rst | 26 + Documentation/networking/segmentation-offloads.rst | 184 ++ Documentation/networking/sfp-phylink.rst | 284 ++ Documentation/networking/snmp_counter.rst | 1793 +++++++++++ Documentation/networking/statistics.rst | 178 ++ Documentation/networking/strparser.rst | 240 ++ Documentation/networking/switchdev.rst | 387 +++ Documentation/networking/sysfs-tagging.rst | 48 + Documentation/networking/tc-actions-env-rules.rst | 29 + Documentation/networking/tcp-thin.rst | 52 + Documentation/networking/team.rst | 8 + Documentation/networking/timestamping.rst | 756 +++++ Documentation/networking/tls-offload-layers.svg | 1 + .../networking/tls-offload-reorder-bad.svg | 1 + .../networking/tls-offload-reorder-good.svg | 1 + Documentation/networking/tls-offload.rst | 530 ++++ Documentation/networking/tls.rst | 241 ++ Documentation/networking/tproxy.rst | 109 + Documentation/networking/tuntap.rst | 259 ++ Documentation/networking/udplite.rst | 291 ++ Documentation/networking/vrf.rst | 451 +++ Documentation/networking/vxlan.rst | 88 + Documentation/networking/x25-iface.rst | 129 + Documentation/networking/x25.rst | 48 + Documentation/networking/xfrm_device.rst | 151 + Documentation/networking/xfrm_proc.rst | 113 + Documentation/networking/xfrm_sync.rst | 189 ++ Documentation/networking/xfrm_sysctl.rst | 11 + 212 files changed, 56459 insertions(+) create mode 100644 Documentation/networking/6lowpan.rst create mode 100644 Documentation/networking/6pack.rst create mode 100644 Documentation/networking/af_xdp.rst create mode 100644 Documentation/networking/alias.rst create mode 100644 Documentation/networking/arcnet-hardware.rst create mode 100644 Documentation/networking/arcnet.rst create mode 100644 Documentation/networking/atm.rst create mode 100644 Documentation/networking/ax25.rst create mode 100644 Documentation/networking/bareudp.rst create mode 100644 Documentation/networking/batman-adv.rst create mode 100644 Documentation/networking/bonding.rst create mode 100644 Documentation/networking/bridge.rst create mode 100644 Documentation/networking/caif/caif.rst create mode 100644 Documentation/networking/caif/index.rst create mode 100644 Documentation/networking/caif/linux_caif.rst create mode 100644 Documentation/networking/can.rst create mode 100644 Documentation/networking/can_ucan_protocol.rst create mode 100644 Documentation/networking/cdc_mbim.rst create mode 100644 Documentation/networking/checksum-offloads.rst create mode 100644 Documentation/networking/dccp.rst create mode 100644 Documentation/networking/dctcp.rst create mode 100644 Documentation/networking/device_drivers/appletalk/cops.rst create mode 100644 Documentation/networking/device_drivers/appletalk/index.rst create mode 100644 Documentation/networking/device_drivers/appletalk/ltpc.rst create mode 100644 Documentation/networking/device_drivers/atm/cxacru-cf.py create mode 100644 Documentation/networking/device_drivers/atm/cxacru.rst create mode 100644 Documentation/networking/device_drivers/atm/fore200e.rst create mode 100644 Documentation/networking/device_drivers/atm/index.rst create mode 100644 Documentation/networking/device_drivers/atm/iphase.rst create mode 100644 Documentation/networking/device_drivers/cable/index.rst create mode 100644 Documentation/networking/device_drivers/cable/sb1000.rst create mode 100644 Documentation/networking/device_drivers/cellular/index.rst create mode 100644 Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst create mode 100644 Documentation/networking/device_drivers/ethernet/3com/3c509.rst create mode 100644 Documentation/networking/device_drivers/ethernet/3com/vortex.rst create mode 100644 Documentation/networking/device_drivers/ethernet/altera/altera_tse.rst create mode 100644 Documentation/networking/device_drivers/ethernet/amazon/ena.rst create mode 100644 Documentation/networking/device_drivers/ethernet/aquantia/atlantic.rst create mode 100644 Documentation/networking/device_drivers/ethernet/chelsio/cxgb.rst create mode 100644 Documentation/networking/device_drivers/ethernet/cirrus/cs89x0.rst create mode 100644 Documentation/networking/device_drivers/ethernet/davicom/dm9000.rst create mode 100644 Documentation/networking/device_drivers/ethernet/dec/de4x5.rst create mode 100644 Documentation/networking/device_drivers/ethernet/dec/dmfe.rst create mode 100644 Documentation/networking/device_drivers/ethernet/dlink/dl2k.rst create mode 100644 Documentation/networking/device_drivers/ethernet/freescale/dpaa.rst create mode 100644 Documentation/networking/device_drivers/ethernet/freescale/dpaa2/dpio-driver.rst create mode 100644 Documentation/networking/device_drivers/ethernet/freescale/dpaa2/ethernet-driver.rst create mode 100644 Documentation/networking/device_drivers/ethernet/freescale/dpaa2/index.rst create mode 100644 Documentation/networking/device_drivers/ethernet/freescale/dpaa2/mac-phy-support.rst create mode 100644 Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst create mode 100644 Documentation/networking/device_drivers/ethernet/freescale/gianfar.rst create mode 100644 Documentation/networking/device_drivers/ethernet/google/gve.rst create mode 100644 Documentation/networking/device_drivers/ethernet/huawei/hinic.rst create mode 100644 Documentation/networking/device_drivers/ethernet/index.rst create mode 100644 Documentation/networking/device_drivers/ethernet/intel/e100.rst create mode 100644 Documentation/networking/device_drivers/ethernet/intel/e1000.rst create mode 100644 Documentation/networking/device_drivers/ethernet/intel/e1000e.rst create mode 100644 Documentation/networking/device_drivers/ethernet/intel/fm10k.rst create mode 100644 Documentation/networking/device_drivers/ethernet/intel/i40e.rst create mode 100644 Documentation/networking/device_drivers/ethernet/intel/iavf.rst create mode 100644 Documentation/networking/device_drivers/ethernet/intel/ice.rst create mode 100644 Documentation/networking/device_drivers/ethernet/intel/igb.rst create mode 100644 Documentation/networking/device_drivers/ethernet/intel/igbvf.rst create mode 100644 Documentation/networking/device_drivers/ethernet/intel/ixgb.rst create mode 100644 Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst create mode 100644 Documentation/networking/device_drivers/ethernet/intel/ixgbevf.rst create mode 100644 Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst create mode 100644 Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst create mode 100644 Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst create mode 100644 Documentation/networking/device_drivers/ethernet/neterion/s2io.rst create mode 100644 Documentation/networking/device_drivers/ethernet/neterion/vxge.rst create mode 100644 Documentation/networking/device_drivers/ethernet/netronome/nfp.rst create mode 100644 Documentation/networking/device_drivers/ethernet/pensando/ionic.rst create mode 100644 Documentation/networking/device_drivers/ethernet/smsc/smc9.rst create mode 100644 Documentation/networking/device_drivers/ethernet/stmicro/stmmac.rst create mode 100644 Documentation/networking/device_drivers/ethernet/ti/cpsw.rst create mode 100644 Documentation/networking/device_drivers/ethernet/ti/cpsw_switchdev.rst create mode 100644 Documentation/networking/device_drivers/ethernet/ti/tlan.rst create mode 100644 Documentation/networking/device_drivers/ethernet/toshiba/spider_net.rst create mode 100644 Documentation/networking/device_drivers/fddi/defza.rst create mode 100644 Documentation/networking/device_drivers/fddi/index.rst create mode 100644 Documentation/networking/device_drivers/fddi/skfp.rst create mode 100644 Documentation/networking/device_drivers/hamradio/baycom.rst create mode 100644 Documentation/networking/device_drivers/hamradio/index.rst create mode 100644 Documentation/networking/device_drivers/hamradio/z8530drv.rst create mode 100644 Documentation/networking/device_drivers/index.rst create mode 100644 Documentation/networking/device_drivers/wan/index.rst create mode 100644 Documentation/networking/device_drivers/wan/z8530book.rst create mode 100644 Documentation/networking/device_drivers/wifi/index.rst create mode 100644 Documentation/networking/device_drivers/wifi/intel/ipw2100.rst create mode 100644 Documentation/networking/device_drivers/wifi/intel/ipw2200.rst create mode 100644 Documentation/networking/device_drivers/wifi/ray_cs.rst create mode 100644 Documentation/networking/devlink/bnxt.rst create mode 100644 Documentation/networking/devlink/devlink-dpipe.rst create mode 100644 Documentation/networking/devlink/devlink-flash.rst create mode 100644 Documentation/networking/devlink/devlink-health.rst create mode 100644 Documentation/networking/devlink/devlink-info.rst create mode 100644 Documentation/networking/devlink/devlink-params.rst create mode 100644 Documentation/networking/devlink/devlink-region.rst create mode 100644 Documentation/networking/devlink/devlink-reload.rst create mode 100644 Documentation/networking/devlink/devlink-resource.rst create mode 100644 Documentation/networking/devlink/devlink-trap.rst create mode 100644 Documentation/networking/devlink/ice.rst create mode 100644 Documentation/networking/devlink/index.rst create mode 100644 Documentation/networking/devlink/ionic.rst create mode 100644 Documentation/networking/devlink/mlx4.rst create mode 100644 Documentation/networking/devlink/mlx5.rst create mode 100644 Documentation/networking/devlink/mlxsw.rst create mode 100644 Documentation/networking/devlink/mv88e6xxx.rst create mode 100644 Documentation/networking/devlink/netdevsim.rst create mode 100644 Documentation/networking/devlink/nfp.rst create mode 100644 Documentation/networking/devlink/qed.rst create mode 100644 Documentation/networking/devlink/sja1105.rst create mode 100644 Documentation/networking/devlink/ti-cpsw-switch.rst create mode 100644 Documentation/networking/dns_resolver.rst create mode 100644 Documentation/networking/driver.rst create mode 100644 Documentation/networking/dsa/b53.rst create mode 100644 Documentation/networking/dsa/bcm_sf2.rst create mode 100644 Documentation/networking/dsa/configuration.rst create mode 100644 Documentation/networking/dsa/dsa.rst create mode 100644 Documentation/networking/dsa/index.rst create mode 100644 Documentation/networking/dsa/lan9303.rst create mode 100644 Documentation/networking/dsa/sja1105.rst create mode 100644 Documentation/networking/eql.rst create mode 100644 Documentation/networking/ethtool-netlink.rst create mode 100644 Documentation/networking/failover.rst create mode 100644 Documentation/networking/fib_trie.rst create mode 100644 Documentation/networking/filter.rst create mode 100644 Documentation/networking/framerelay.rst create mode 100644 Documentation/networking/gen_stats.rst create mode 100644 Documentation/networking/generic-hdlc.rst create mode 100644 Documentation/networking/generic_netlink.rst create mode 100644 Documentation/networking/gtp.rst create mode 100644 Documentation/networking/ieee802154.rst create mode 100644 Documentation/networking/ila.rst create mode 100644 Documentation/networking/index.rst create mode 100644 Documentation/networking/ip-sysctl.rst create mode 100644 Documentation/networking/ip_dynaddr.rst create mode 100644 Documentation/networking/ipddp.rst create mode 100644 Documentation/networking/ipsec.rst create mode 100644 Documentation/networking/ipv6.rst create mode 100644 Documentation/networking/ipvlan.rst create mode 100644 Documentation/networking/ipvs-sysctl.rst create mode 100644 Documentation/networking/j1939.rst create mode 100644 Documentation/networking/kapi.rst create mode 100644 Documentation/networking/kcm.rst create mode 100644 Documentation/networking/l2tp.rst create mode 100644 Documentation/networking/lapb-module.rst create mode 100644 Documentation/networking/mac80211-auth-assoc-deauth.txt create mode 100644 Documentation/networking/mac80211-injection.rst create mode 100644 Documentation/networking/mac80211_hwsim/hostapd.conf create mode 100644 Documentation/networking/mac80211_hwsim/mac80211_hwsim.rst create mode 100644 Documentation/networking/mac80211_hwsim/wpa_supplicant.conf create mode 100644 Documentation/networking/mpls-sysctl.rst create mode 100644 Documentation/networking/msg_zerocopy.rst create mode 100644 Documentation/networking/multiqueue.rst create mode 100644 Documentation/networking/net_dim.rst create mode 100644 Documentation/networking/net_failover.rst create mode 100644 Documentation/networking/netconsole.rst create mode 100644 Documentation/networking/netdev-FAQ.rst create mode 100644 Documentation/networking/netdev-features.rst create mode 100644 Documentation/networking/netdevices.rst create mode 100644 Documentation/networking/netfilter-sysctl.rst create mode 100644 Documentation/networking/netif-msg.rst create mode 100644 Documentation/networking/nf_conntrack-sysctl.rst create mode 100644 Documentation/networking/nf_flowtable.rst create mode 100644 Documentation/networking/nfc.rst create mode 100644 Documentation/networking/openvswitch.rst create mode 100644 Documentation/networking/operstates.rst create mode 100644 Documentation/networking/packet_mmap.rst create mode 100644 Documentation/networking/page_pool.rst create mode 100644 Documentation/networking/phonet.rst create mode 100644 Documentation/networking/phy.rst create mode 100644 Documentation/networking/pktgen.rst create mode 100644 Documentation/networking/plip.rst create mode 100644 Documentation/networking/ppp_generic.rst create mode 100644 Documentation/networking/proc_net_tcp.rst create mode 100644 Documentation/networking/radiotap-headers.rst create mode 100644 Documentation/networking/rds.rst create mode 100644 Documentation/networking/regulatory.rst create mode 100644 Documentation/networking/rxrpc.rst create mode 100644 Documentation/networking/scaling.rst create mode 100644 Documentation/networking/sctp.rst create mode 100644 Documentation/networking/secid.rst create mode 100644 Documentation/networking/seg6-sysctl.rst create mode 100644 Documentation/networking/segmentation-offloads.rst create mode 100644 Documentation/networking/sfp-phylink.rst create mode 100644 Documentation/networking/snmp_counter.rst create mode 100644 Documentation/networking/statistics.rst create mode 100644 Documentation/networking/strparser.rst create mode 100644 Documentation/networking/switchdev.rst create mode 100644 Documentation/networking/sysfs-tagging.rst create mode 100644 Documentation/networking/tc-actions-env-rules.rst create mode 100644 Documentation/networking/tcp-thin.rst create mode 100644 Documentation/networking/team.rst create mode 100644 Documentation/networking/timestamping.rst create mode 100644 Documentation/networking/tls-offload-layers.svg create mode 100644 Documentation/networking/tls-offload-reorder-bad.svg create mode 100644 Documentation/networking/tls-offload-reorder-good.svg create mode 100644 Documentation/networking/tls-offload.rst create mode 100644 Documentation/networking/tls.rst create mode 100644 Documentation/networking/tproxy.rst create mode 100644 Documentation/networking/tuntap.rst create mode 100644 Documentation/networking/udplite.rst create mode 100644 Documentation/networking/vrf.rst create mode 100644 Documentation/networking/vxlan.rst create mode 100644 Documentation/networking/x25-iface.rst create mode 100644 Documentation/networking/x25.rst create mode 100644 Documentation/networking/xfrm_device.rst create mode 100644 Documentation/networking/xfrm_proc.rst create mode 100644 Documentation/networking/xfrm_sync.rst create mode 100644 Documentation/networking/xfrm_sysctl.rst (limited to 'Documentation/networking') diff --git a/Documentation/networking/6lowpan.rst b/Documentation/networking/6lowpan.rst new file mode 100644 index 000000000..e70a6520c --- /dev/null +++ b/Documentation/networking/6lowpan.rst @@ -0,0 +1,53 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================== +Netdev private dataroom for 6lowpan interfaces +============================================== + +All 6lowpan able net devices, means all interfaces with ARPHRD_6LOWPAN, +must have "struct lowpan_priv" placed at beginning of netdev_priv. + +The priv_size of each interface should be calculate by:: + + dev->priv_size = LOWPAN_PRIV_SIZE(LL_6LOWPAN_PRIV_DATA); + +Where LL_PRIV_6LOWPAN_DATA is sizeof linklayer 6lowpan private data struct. +To access the LL_PRIV_6LOWPAN_DATA structure you can cast:: + + lowpan_priv(dev)-priv; + +to your LL_6LOWPAN_PRIV_DATA structure. + +Before registering the lowpan netdev interface you must run:: + + lowpan_netdev_setup(dev, LOWPAN_LLTYPE_FOOBAR); + +wheres LOWPAN_LLTYPE_FOOBAR is a define for your 6LoWPAN linklayer type of +enum lowpan_lltypes. + +Example to evaluate the private usually you can do:: + + static inline struct lowpan_priv_foobar * + lowpan_foobar_priv(struct net_device *dev) + { + return (struct lowpan_priv_foobar *)lowpan_priv(dev)->priv; + } + + switch (dev->type) { + case ARPHRD_6LOWPAN: + lowpan_priv = lowpan_priv(dev); + /* do great stuff which is ARPHRD_6LOWPAN related */ + switch (lowpan_priv->lltype) { + case LOWPAN_LLTYPE_FOOBAR: + /* do 802.15.4 6LoWPAN handling here */ + lowpan_foobar_priv(dev)->bar = foo; + break; + ... + } + break; + ... + } + +In case of generic 6lowpan branch ("net/6lowpan") you can remove the check +on ARPHRD_6LOWPAN, because you can be sure that these function are called +by ARPHRD_6LOWPAN interfaces. diff --git a/Documentation/networking/6pack.rst b/Documentation/networking/6pack.rst new file mode 100644 index 000000000..bc5bf1f1a --- /dev/null +++ b/Documentation/networking/6pack.rst @@ -0,0 +1,191 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============== +6pack Protocol +============== + +This is the 6pack-mini-HOWTO, written by + +Andreas Könsgen DG3KQ + +:Internet: ajk@comnets.uni-bremen.de +:AMPR-net: dg3kq@db0pra.ampr.org +:AX.25: dg3kq@db0ach.#nrw.deu.eu + +Last update: April 7, 1998 + +1. What is 6pack, and what are the advantages to KISS? +====================================================== + +6pack is a transmission protocol for data exchange between the PC and +the TNC over a serial line. It can be used as an alternative to KISS. + +6pack has two major advantages: + +- The PC is given full control over the radio + channel. Special control data is exchanged between the PC and the TNC so + that the PC knows at any time if the TNC is receiving data, if a TNC + buffer underrun or overrun has occurred, if the PTT is + set and so on. This control data is processed at a higher priority than + normal data, so a data stream can be interrupted at any time to issue an + important event. This helps to improve the channel access and timing + algorithms as everything is computed in the PC. It would even be possible + to experiment with something completely different from the known CSMA and + DAMA channel access methods. + This kind of real-time control is especially important to supply several + TNCs that are connected between each other and the PC by a daisy chain + (however, this feature is not supported yet by the Linux 6pack driver). + +- Each packet transferred over the serial line is supplied with a checksum, + so it is easy to detect errors due to problems on the serial line. + Received packets that are corrupt are not passed on to the AX.25 layer. + Damaged packets that the TNC has received from the PC are not transmitted. + +More details about 6pack are described in the file 6pack.ps that is located +in the doc directory of the AX.25 utilities package. + +2. Who has developed the 6pack protocol? +======================================== + +The 6pack protocol has been developed by Ekki Plicht DF4OR, Henning Rech +DF9IC and Gunter Jost DK7WJ. A driver for 6pack, written by Gunter Jost and +Matthias Welwarsky DG2FEF, comes along with the PC version of FlexNet. +They have also written a firmware for TNCs to perform the 6pack +protocol (see section 4 below). + +3. Where can I get the latest version of 6pack for LinuX? +========================================================= + +At the moment, the 6pack stuff can obtained via anonymous ftp from +db0bm.automation.fh-aachen.de. In the directory /incoming/dg3kq, +there is a file named 6pack.tgz. + +4. Preparing the TNC for 6pack operation +======================================== + +To be able to use 6pack, a special firmware for the TNC is needed. The EPROM +of a newly bought TNC does not contain 6pack, so you will have to +program an EPROM yourself. The image file for 6pack EPROMs should be +available on any packet radio box where PC/FlexNet can be found. The name of +the file is 6pack.bin. This file is copyrighted and maintained by the FlexNet +team. It can be used under the terms of the license that comes along +with PC/FlexNet. Please do not ask me about the internals of this file as I +don't know anything about it. I used a textual description of the 6pack +protocol to program the Linux driver. + +TNCs contain a 64kByte EPROM, the lower half of which is used for +the firmware/KISS. The upper half is either empty or is sometimes +programmed with software called TAPR. In the latter case, the TNC +is supplied with a DIP switch so you can easily change between the +two systems. When programming a new EPROM, one of the systems is replaced +by 6pack. It is useful to replace TAPR, as this software is rarely used +nowadays. If your TNC is not equipped with the switch mentioned above, you +can build in one yourself that switches over the highest address pin +of the EPROM between HIGH and LOW level. After having inserted the new EPROM +and switched to 6pack, apply power to the TNC for a first test. The connect +and the status LED are lit for about a second if the firmware initialises +the TNC correctly. + +5. Building and installing the 6pack driver +=========================================== + +The driver has been tested with kernel version 2.1.90. Use with older +kernels may lead to a compilation error because the interface to a kernel +function has been changed in the 2.1.8x kernels. + +How to turn on 6pack support: +============================= + +- In the linux kernel configuration program, select the code maturity level + options menu and turn on the prompting for development drivers. + +- Select the amateur radio support menu and turn on the serial port 6pack + driver. + +- Compile and install the kernel and the modules. + +To use the driver, the kissattach program delivered with the AX.25 utilities +has to be modified. + +- Do a cd to the directory that holds the kissattach sources. Edit the + kissattach.c file. At the top, insert the following lines:: + + #ifndef N_6PACK + #define N_6PACK (N_AX25+1) + #endif + + Then find the line: + + int disc = N_AX25; + + and replace N_AX25 by N_6PACK. + +- Recompile kissattach. Rename it to spattach to avoid confusions. + +Installing the driver: +---------------------- + +- Do an insmod 6pack. Look at your /var/log/messages file to check if the + module has printed its initialization message. + +- Do a spattach as you would launch kissattach when starting a KISS port. + Check if the kernel prints the message '6pack: TNC found'. + +- From here, everything should work as if you were setting up a KISS port. + The only difference is that the network device that represents + the 6pack port is called sp instead of sl or ax. So, sp0 would be the + first 6pack port. + +Although the driver has been tested on various platforms, I still declare it +ALPHA. BE CAREFUL! Sync your disks before insmoding the 6pack module +and spattaching. Watch out if your computer behaves strangely. Read section +6 of this file about known problems. + +Note that the connect and status LEDs of the TNC are controlled in a +different way than they are when the TNC is used with PC/FlexNet. When using +FlexNet, the connect LED is on if there is a connection; the status LED is +on if there is data in the buffer of the PC's AX.25 engine that has to be +transmitted. Under Linux, the 6pack layer is beyond the AX.25 layer, +so the 6pack driver doesn't know anything about connects or data that +has not yet been transmitted. Therefore the LEDs are controlled +as they are in KISS mode: The connect LED is turned on if data is transferred +from the PC to the TNC over the serial line, the status LED if data is +sent to the PC. + +6. Known problems +================= + +When testing the driver with 2.0.3x kernels and +operating with data rates on the radio channel of 9600 Baud or higher, +the driver may, on certain systems, sometimes print the message '6pack: +bad checksum', which is due to data loss if the other station sends two +or more subsequent packets. I have been told that this is due to a problem +with the serial driver of 2.0.3x kernels. I don't know yet if the problem +still exists with 2.1.x kernels, as I have heard that the serial driver +code has been changed with 2.1.x. + +When shutting down the sp interface with ifconfig, the kernel crashes if +there is still an AX.25 connection left over which an IP connection was +running, even if that IP connection is already closed. The problem does not +occur when there is a bare AX.25 connection still running. I don't know if +this is a problem of the 6pack driver or something else in the kernel. + +The driver has been tested as a module, not yet as a kernel-builtin driver. + +The 6pack protocol supports daisy-chaining of TNCs in a token ring, which is +connected to one serial port of the PC. This feature is not implemented +and at least at the moment I won't be able to do it because I do not have +the opportunity to build a TNC daisy-chain and test it. + +Some of the comments in the source code are inaccurate. They are left from +the SLIP/KISS driver, from which the 6pack driver has been derived. +I haven't modified or removed them yet -- sorry! The code itself needs +some cleaning and optimizing. This will be done in a later release. + +If you encounter a bug or if you have a question or suggestion concerning the +driver, feel free to mail me, using the addresses given at the beginning of +this file. + +Have fun! + +Andreas diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst new file mode 100644 index 000000000..70623cb13 --- /dev/null +++ b/Documentation/networking/af_xdp.rst @@ -0,0 +1,640 @@ +.. SPDX-License-Identifier: GPL-2.0 + +====== +AF_XDP +====== + +Overview +======== + +AF_XDP is an address family that is optimized for high performance +packet processing. + +This document assumes that the reader is familiar with BPF and XDP. If +not, the Cilium project has an excellent reference guide at +http://cilium.readthedocs.io/en/latest/bpf/. + +Using the XDP_REDIRECT action from an XDP program, the program can +redirect ingress frames to other XDP enabled netdevs, using the +bpf_redirect_map() function. AF_XDP sockets enable the possibility for +XDP programs to redirect frames to a memory buffer in a user-space +application. + +An AF_XDP socket (XSK) is created with the normal socket() +syscall. Associated with each XSK are two rings: the RX ring and the +TX ring. A socket can receive packets on the RX ring and it can send +packets on the TX ring. These rings are registered and sized with the +setsockopts XDP_RX_RING and XDP_TX_RING, respectively. It is mandatory +to have at least one of these rings for each socket. An RX or TX +descriptor ring points to a data buffer in a memory area called a +UMEM. RX and TX can share the same UMEM so that a packet does not have +to be copied between RX and TX. Moreover, if a packet needs to be kept +for a while due to a possible retransmit, the descriptor that points +to that packet can be changed to point to another and reused right +away. This again avoids copying data. + +The UMEM consists of a number of equally sized chunks. A descriptor in +one of the rings references a frame by referencing its addr. The addr +is simply an offset within the entire UMEM region. The user space +allocates memory for this UMEM using whatever means it feels is most +appropriate (malloc, mmap, huge pages, etc). This memory area is then +registered with the kernel using the new setsockopt XDP_UMEM_REG. The +UMEM also has two rings: the FILL ring and the COMPLETION ring. The +FILL ring is used by the application to send down addr for the kernel +to fill in with RX packet data. References to these frames will then +appear in the RX ring once each packet has been received. The +COMPLETION ring, on the other hand, contains frame addr that the +kernel has transmitted completely and can now be used again by user +space, for either TX or RX. Thus, the frame addrs appearing in the +COMPLETION ring are addrs that were previously transmitted using the +TX ring. In summary, the RX and FILL rings are used for the RX path +and the TX and COMPLETION rings are used for the TX path. + +The socket is then finally bound with a bind() call to a device and a +specific queue id on that device, and it is not until bind is +completed that traffic starts to flow. + +The UMEM can be shared between processes, if desired. If a process +wants to do this, it simply skips the registration of the UMEM and its +corresponding two rings, sets the XDP_SHARED_UMEM flag in the bind +call and submits the XSK of the process it would like to share UMEM +with as well as its own newly created XSK socket. The new process will +then receive frame addr references in its own RX ring that point to +this shared UMEM. Note that since the ring structures are +single-consumer / single-producer (for performance reasons), the new +process has to create its own socket with associated RX and TX rings, +since it cannot share this with the other process. This is also the +reason that there is only one set of FILL and COMPLETION rings per +UMEM. It is the responsibility of a single process to handle the UMEM. + +How is then packets distributed from an XDP program to the XSKs? There +is a BPF map called XSKMAP (or BPF_MAP_TYPE_XSKMAP in full). The +user-space application can place an XSK at an arbitrary place in this +map. The XDP program can then redirect a packet to a specific index in +this map and at this point XDP validates that the XSK in that map was +indeed bound to that device and ring number. If not, the packet is +dropped. If the map is empty at that index, the packet is also +dropped. This also means that it is currently mandatory to have an XDP +program loaded (and one XSK in the XSKMAP) to be able to get any +traffic to user space through the XSK. + +AF_XDP can operate in two different modes: XDP_SKB and XDP_DRV. If the +driver does not have support for XDP, or XDP_SKB is explicitly chosen +when loading the XDP program, XDP_SKB mode is employed that uses SKBs +together with the generic XDP support and copies out the data to user +space. A fallback mode that works for any network device. On the other +hand, if the driver has support for XDP, it will be used by the AF_XDP +code to provide better performance, but there is still a copy of the +data into user space. + +Concepts +======== + +In order to use an AF_XDP socket, a number of associated objects need +to be setup. These objects and their options are explained in the +following sections. + +For an overview on how AF_XDP works, you can also take a look at the +Linux Plumbers paper from 2018 on the subject: +http://vger.kernel.org/lpc_net2018_talks/lpc18_paper_af_xdp_perf-v2.pdf. Do +NOT consult the paper from 2017 on "AF_PACKET v4", the first attempt +at AF_XDP. Nearly everything changed since then. Jonathan Corbet has +also written an excellent article on LWN, "Accelerating networking +with AF_XDP". It can be found at https://lwn.net/Articles/750845/. + +UMEM +---- + +UMEM is a region of virtual contiguous memory, divided into +equal-sized frames. An UMEM is associated to a netdev and a specific +queue id of that netdev. It is created and configured (chunk size, +headroom, start address and size) by using the XDP_UMEM_REG setsockopt +system call. A UMEM is bound to a netdev and queue id, via the bind() +system call. + +An AF_XDP is socket linked to a single UMEM, but one UMEM can have +multiple AF_XDP sockets. To share an UMEM created via one socket A, +the next socket B can do this by setting the XDP_SHARED_UMEM flag in +struct sockaddr_xdp member sxdp_flags, and passing the file descriptor +of A to struct sockaddr_xdp member sxdp_shared_umem_fd. + +The UMEM has two single-producer/single-consumer rings that are used +to transfer ownership of UMEM frames between the kernel and the +user-space application. + +Rings +----- + +There are a four different kind of rings: FILL, COMPLETION, RX and +TX. All rings are single-producer/single-consumer, so the user-space +application need explicit synchronization of multiple +processes/threads are reading/writing to them. + +The UMEM uses two rings: FILL and COMPLETION. Each socket associated +with the UMEM must have an RX queue, TX queue or both. Say, that there +is a setup with four sockets (all doing TX and RX). Then there will be +one FILL ring, one COMPLETION ring, four TX rings and four RX rings. + +The rings are head(producer)/tail(consumer) based rings. A producer +writes the data ring at the index pointed out by struct xdp_ring +producer member, and increasing the producer index. A consumer reads +the data ring at the index pointed out by struct xdp_ring consumer +member, and increasing the consumer index. + +The rings are configured and created via the _RING setsockopt system +calls and mmapped to user-space using the appropriate offset to mmap() +(XDP_PGOFF_RX_RING, XDP_PGOFF_TX_RING, XDP_UMEM_PGOFF_FILL_RING and +XDP_UMEM_PGOFF_COMPLETION_RING). + +The size of the rings need to be of size power of two. + +UMEM Fill Ring +~~~~~~~~~~~~~~ + +The FILL ring is used to transfer ownership of UMEM frames from +user-space to kernel-space. The UMEM addrs are passed in the ring. As +an example, if the UMEM is 64k and each chunk is 4k, then the UMEM has +16 chunks and can pass addrs between 0 and 64k. + +Frames passed to the kernel are used for the ingress path (RX rings). + +The user application produces UMEM addrs to this ring. Note that, if +running the application with aligned chunk mode, the kernel will mask +the incoming addr. E.g. for a chunk size of 2k, the log2(2048) LSB of +the addr will be masked off, meaning that 2048, 2050 and 3000 refers +to the same chunk. If the user application is run in the unaligned +chunks mode, then the incoming addr will be left untouched. + + +UMEM Completion Ring +~~~~~~~~~~~~~~~~~~~~ + +The COMPLETION Ring is used transfer ownership of UMEM frames from +kernel-space to user-space. Just like the FILL ring, UMEM indices are +used. + +Frames passed from the kernel to user-space are frames that has been +sent (TX ring) and can be used by user-space again. + +The user application consumes UMEM addrs from this ring. + + +RX Ring +~~~~~~~ + +The RX ring is the receiving side of a socket. Each entry in the ring +is a struct xdp_desc descriptor. The descriptor contains UMEM offset +(addr) and the length of the data (len). + +If no frames have been passed to kernel via the FILL ring, no +descriptors will (or can) appear on the RX ring. + +The user application consumes struct xdp_desc descriptors from this +ring. + +TX Ring +~~~~~~~ + +The TX ring is used to send frames. The struct xdp_desc descriptor is +filled (index, length and offset) and passed into the ring. + +To start the transfer a sendmsg() system call is required. This might +be relaxed in the future. + +The user application produces struct xdp_desc descriptors to this +ring. + +Libbpf +====== + +Libbpf is a helper library for eBPF and XDP that makes using these +technologies a lot simpler. It also contains specific helper functions +in tools/lib/bpf/xsk.h for facilitating the use of AF_XDP. It +contains two types of functions: those that can be used to make the +setup of AF_XDP socket easier and ones that can be used in the data +plane to access the rings safely and quickly. To see an example on how +to use this API, please take a look at the sample application in +samples/bpf/xdpsock_usr.c which uses libbpf for both setup and data +plane operations. + +We recommend that you use this library unless you have become a power +user. It will make your program a lot simpler. + +XSKMAP / BPF_MAP_TYPE_XSKMAP +============================ + +On XDP side there is a BPF map type BPF_MAP_TYPE_XSKMAP (XSKMAP) that +is used in conjunction with bpf_redirect_map() to pass the ingress +frame to a socket. + +The user application inserts the socket into the map, via the bpf() +system call. + +Note that if an XDP program tries to redirect to a socket that does +not match the queue configuration and netdev, the frame will be +dropped. E.g. an AF_XDP socket is bound to netdev eth0 and +queue 17. Only the XDP program executing for eth0 and queue 17 will +successfully pass data to the socket. Please refer to the sample +application (samples/bpf/) in for an example. + +Configuration Flags and Socket Options +====================================== + +These are the various configuration flags that can be used to control +and monitor the behavior of AF_XDP sockets. + +XDP_COPY and XDP_ZERO_COPY bind flags +------------------------------------- + +When you bind to a socket, the kernel will first try to use zero-copy +copy. If zero-copy is not supported, it will fall back on using copy +mode, i.e. copying all packets out to user space. But if you would +like to force a certain mode, you can use the following flags. If you +pass the XDP_COPY flag to the bind call, the kernel will force the +socket into copy mode. If it cannot use copy mode, the bind call will +fail with an error. Conversely, the XDP_ZERO_COPY flag will force the +socket into zero-copy mode or fail. + +XDP_SHARED_UMEM bind flag +------------------------- + +This flag enables you to bind multiple sockets to the same UMEM. It +works on the same queue id, between queue ids and between +netdevs/devices. In this mode, each socket has their own RX and TX +rings as usual, but you are going to have one or more FILL and +COMPLETION ring pairs. You have to create one of these pairs per +unique netdev and queue id tuple that you bind to. + +Starting with the case were we would like to share a UMEM between +sockets bound to the same netdev and queue id. The UMEM (tied to the +fist socket created) will only have a single FILL ring and a single +COMPLETION ring as there is only on unique netdev,queue_id tuple that +we have bound to. To use this mode, create the first socket and bind +it in the normal way. Create a second socket and create an RX and a TX +ring, or at least one of them, but no FILL or COMPLETION rings as the +ones from the first socket will be used. In the bind call, set he +XDP_SHARED_UMEM option and provide the initial socket's fd in the +sxdp_shared_umem_fd field. You can attach an arbitrary number of extra +sockets this way. + +What socket will then a packet arrive on? This is decided by the XDP +program. Put all the sockets in the XSK_MAP and just indicate which +index in the array you would like to send each packet to. A simple +round-robin example of distributing packets is shown below: + +.. code-block:: c + + #include + #include "bpf_helpers.h" + + #define MAX_SOCKS 16 + + struct { + __uint(type, BPF_MAP_TYPE_XSKMAP); + __uint(max_entries, MAX_SOCKS); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + } xsks_map SEC(".maps"); + + static unsigned int rr; + + SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) + { + rr = (rr + 1) & (MAX_SOCKS - 1); + + return bpf_redirect_map(&xsks_map, rr, XDP_DROP); + } + +Note, that since there is only a single set of FILL and COMPLETION +rings, and they are single producer, single consumer rings, you need +to make sure that multiple processes or threads do not use these rings +concurrently. There are no synchronization primitives in the +libbpf code that protects multiple users at this point in time. + +Libbpf uses this mode if you create more than one socket tied to the +same UMEM. However, note that you need to supply the +XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD libbpf_flag with the +xsk_socket__create calls and load your own XDP program as there is no +built in one in libbpf that will route the traffic for you. + +The second case is when you share a UMEM between sockets that are +bound to different queue ids and/or netdevs. In this case you have to +create one FILL ring and one COMPLETION ring for each unique +netdev,queue_id pair. Let us say you want to create two sockets bound +to two different queue ids on the same netdev. Create the first socket +and bind it in the normal way. Create a second socket and create an RX +and a TX ring, or at least one of them, and then one FILL and +COMPLETION ring for this socket. Then in the bind call, set he +XDP_SHARED_UMEM option and provide the initial socket's fd in the +sxdp_shared_umem_fd field as you registered the UMEM on that +socket. These two sockets will now share one and the same UMEM. + +There is no need to supply an XDP program like the one in the previous +case where sockets were bound to the same queue id and +device. Instead, use the NIC's packet steering capabilities to steer +the packets to the right queue. In the previous example, there is only +one queue shared among sockets, so the NIC cannot do this steering. It +can only steer between queues. + +In libbpf, you need to use the xsk_socket__create_shared() API as it +takes a reference to a FILL ring and a COMPLETION ring that will be +created for you and bound to the shared UMEM. You can use this +function for all the sockets you create, or you can use it for the +second and following ones and use xsk_socket__create() for the first +one. Both methods yield the same result. + +Note that a UMEM can be shared between sockets on the same queue id +and device, as well as between queues on the same device and between +devices at the same time. + +XDP_USE_NEED_WAKEUP bind flag +----------------------------- + +This option adds support for a new flag called need_wakeup that is +present in the FILL ring and the TX ring, the rings for which user +space is a producer. When this option is set in the bind call, the +need_wakeup flag will be set if the kernel needs to be explicitly +woken up by a syscall to continue processing packets. If the flag is +zero, no syscall is needed. + +If the flag is set on the FILL ring, the application needs to call +poll() to be able to continue to receive packets on the RX ring. This +can happen, for example, when the kernel has detected that there are no +more buffers on the FILL ring and no buffers left on the RX HW ring of +the NIC. In this case, interrupts are turned off as the NIC cannot +receive any packets (as there are no buffers to put them in), and the +need_wakeup flag is set so that user space can put buffers on the +FILL ring and then call poll() so that the kernel driver can put these +buffers on the HW ring and start to receive packets. + +If the flag is set for the TX ring, it means that the application +needs to explicitly notify the kernel to send any packets put on the +TX ring. This can be accomplished either by a poll() call, as in the +RX path, or by calling sendto(). + +An example of how to use this flag can be found in +samples/bpf/xdpsock_user.c. An example with the use of libbpf helpers +would look like this for the TX path: + +.. code-block:: c + + if (xsk_ring_prod__needs_wakeup(&my_tx_ring)) + sendto(xsk_socket__fd(xsk_handle), NULL, 0, MSG_DONTWAIT, NULL, 0); + +I.e., only use the syscall if the flag is set. + +We recommend that you always enable this mode as it usually leads to +better performance especially if you run the application and the +driver on the same core, but also if you use different cores for the +application and the kernel driver, as it reduces the number of +syscalls needed for the TX path. + +XDP_{RX|TX|UMEM_FILL|UMEM_COMPLETION}_RING setsockopts +------------------------------------------------------ + +These setsockopts sets the number of descriptors that the RX, TX, +FILL, and COMPLETION rings respectively should have. It is mandatory +to set the size of at least one of the RX and TX rings. If you set +both, you will be able to both receive and send traffic from your +application, but if you only want to do one of them, you can save +resources by only setting up one of them. Both the FILL ring and the +COMPLETION ring are mandatory as you need to have a UMEM tied to your +socket. But if the XDP_SHARED_UMEM flag is used, any socket after the +first one does not have a UMEM and should in that case not have any +FILL or COMPLETION rings created as the ones from the shared UMEM will +be used. Note, that the rings are single-producer single-consumer, so +do not try to access them from multiple processes at the same +time. See the XDP_SHARED_UMEM section. + +In libbpf, you can create Rx-only and Tx-only sockets by supplying +NULL to the rx and tx arguments, respectively, to the +xsk_socket__create function. + +If you create a Tx-only socket, we recommend that you do not put any +packets on the fill ring. If you do this, drivers might think you are +going to receive something when you in fact will not, and this can +negatively impact performance. + +XDP_UMEM_REG setsockopt +----------------------- + +This setsockopt registers a UMEM to a socket. This is the area that +contain all the buffers that packet can recide in. The call takes a +pointer to the beginning of this area and the size of it. Moreover, it +also has parameter called chunk_size that is the size that the UMEM is +divided into. It can only be 2K or 4K at the moment. If you have an +UMEM area that is 128K and a chunk size of 2K, this means that you +will be able to hold a maximum of 128K / 2K = 64 packets in your UMEM +area and that your largest packet size can be 2K. + +There is also an option to set the headroom of each single buffer in +the UMEM. If you set this to N bytes, it means that the packet will +start N bytes into the buffer leaving the first N bytes for the +application to use. The final option is the flags field, but it will +be dealt with in separate sections for each UMEM flag. + +SO_BINDTODEVICE setsockopt +-------------------------- + +This is a generic SOL_SOCKET option that can be used to tie AF_XDP +socket to a particular network interface. It is useful when a socket +is created by a privileged process and passed to a non-privileged one. +Once the option is set, kernel will refuse attempts to bind that socket +to a different interface. Updating the value requires CAP_NET_RAW. + +XDP_STATISTICS getsockopt +------------------------- + +Gets drop statistics of a socket that can be useful for debug +purposes. The supported statistics are shown below: + +.. code-block:: c + + struct xdp_statistics { + __u64 rx_dropped; /* Dropped for reasons other than invalid desc */ + __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */ + __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */ + }; + +XDP_OPTIONS getsockopt +---------------------- + +Gets options from an XDP socket. The only one supported so far is +XDP_OPTIONS_ZEROCOPY which tells you if zero-copy is on or not. + +Usage +===== + +In order to use AF_XDP sockets two parts are needed. The +user-space application and the XDP program. For a complete setup and +usage example, please refer to the sample application. The user-space +side is xdpsock_user.c and the XDP side is part of libbpf. + +The XDP code sample included in tools/lib/bpf/xsk.c is the following: + +.. code-block:: c + + SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) + { + int index = ctx->rx_queue_index; + + // A set entry here means that the corresponding queue_id + // has an active AF_XDP socket bound to it. + if (bpf_map_lookup_elem(&xsks_map, &index)) + return bpf_redirect_map(&xsks_map, index, 0); + + return XDP_PASS; + } + +A simple but not so performance ring dequeue and enqueue could look +like this: + +.. code-block:: c + + // struct xdp_rxtx_ring { + // __u32 *producer; + // __u32 *consumer; + // struct xdp_desc *desc; + // }; + + // struct xdp_umem_ring { + // __u32 *producer; + // __u32 *consumer; + // __u64 *desc; + // }; + + // typedef struct xdp_rxtx_ring RING; + // typedef struct xdp_umem_ring RING; + + // typedef struct xdp_desc RING_TYPE; + // typedef __u64 RING_TYPE; + + int dequeue_one(RING *ring, RING_TYPE *item) + { + __u32 entries = *ring->producer - *ring->consumer; + + if (entries == 0) + return -1; + + // read-barrier! + + *item = ring->desc[*ring->consumer & (RING_SIZE - 1)]; + (*ring->consumer)++; + return 0; + } + + int enqueue_one(RING *ring, const RING_TYPE *item) + { + u32 free_entries = RING_SIZE - (*ring->producer - *ring->consumer); + + if (free_entries == 0) + return -1; + + ring->desc[*ring->producer & (RING_SIZE - 1)] = *item; + + // write-barrier! + + (*ring->producer)++; + return 0; + } + +But please use the libbpf functions as they are optimized and ready to +use. Will make your life easier. + +Sample application +================== + +There is a xdpsock benchmarking/test application included that +demonstrates how to use AF_XDP sockets with private UMEMs. Say that +you would like your UDP traffic from port 4242 to end up in queue 16, +that we will enable AF_XDP on. Here, we use ethtool for this:: + + ethtool -N p3p2 rx-flow-hash udp4 fn + ethtool -N p3p2 flow-type udp4 src-port 4242 dst-port 4242 \ + action 16 + +Running the rxdrop benchmark in XDP_DRV mode can then be done +using:: + + samples/bpf/xdpsock -i p3p2 -q 16 -r -N + +For XDP_SKB mode, use the switch "-S" instead of "-N" and all options +can be displayed with "-h", as usual. + +This sample application uses libbpf to make the setup and usage of +AF_XDP simpler. If you want to know how the raw uapi of AF_XDP is +really used to make something more advanced, take a look at the libbpf +code in tools/lib/bpf/xsk.[ch]. + +FAQ +======= + +Q: I am not seeing any traffic on the socket. What am I doing wrong? + +A: When a netdev of a physical NIC is initialized, Linux usually + allocates one RX and TX queue pair per core. So on a 8 core system, + queue ids 0 to 7 will be allocated, one per core. In the AF_XDP + bind call or the xsk_socket__create libbpf function call, you + specify a specific queue id to bind to and it is only the traffic + towards that queue you are going to get on you socket. So in the + example above, if you bind to queue 0, you are NOT going to get any + traffic that is distributed to queues 1 through 7. If you are + lucky, you will see the traffic, but usually it will end up on one + of the queues you have not bound to. + + There are a number of ways to solve the problem of getting the + traffic you want to the queue id you bound to. If you want to see + all the traffic, you can force the netdev to only have 1 queue, queue + id 0, and then bind to queue 0. You can use ethtool to do this:: + + sudo ethtool -L combined 1 + + If you want to only see part of the traffic, you can program the + NIC through ethtool to filter out your traffic to a single queue id + that you can bind your XDP socket to. Here is one example in which + UDP traffic to and from port 4242 are sent to queue 2:: + + sudo ethtool -N rx-flow-hash udp4 fn + sudo ethtool -N flow-type udp4 src-port 4242 dst-port \ + 4242 action 2 + + A number of other ways are possible all up to the capabilities of + the NIC you have. + +Q: Can I use the XSKMAP to implement a switch betwen different umems + in copy mode? + +A: The short answer is no, that is not supported at the moment. The + XSKMAP can only be used to switch traffic coming in on queue id X + to sockets bound to the same queue id X. The XSKMAP can contain + sockets bound to different queue ids, for example X and Y, but only + traffic goming in from queue id Y can be directed to sockets bound + to the same queue id Y. In zero-copy mode, you should use the + switch, or other distribution mechanism, in your NIC to direct + traffic to the correct queue id and socket. + +Q: My packets are sometimes corrupted. What is wrong? + +A: Care has to be taken not to feed the same buffer in the UMEM into + more than one ring at the same time. If you for example feed the + same buffer into the FILL ring and the TX ring at the same time, the + NIC might receive data into the buffer at the same time it is + sending it. This will cause some packets to become corrupted. Same + thing goes for feeding the same buffer into the FILL rings + belonging to different queue ids or netdevs bound with the + XDP_SHARED_UMEM flag. + +Credits +======= + +- Björn Töpel (AF_XDP core) +- Magnus Karlsson (AF_XDP core) +- Alexander Duyck +- Alexei Starovoitov +- Daniel Borkmann +- Jesper Dangaard Brouer +- John Fastabend +- Jonathan Corbet (LWN coverage) +- Michael S. Tsirkin +- Qi Z Zhang +- Willem de Bruijn diff --git a/Documentation/networking/alias.rst b/Documentation/networking/alias.rst new file mode 100644 index 000000000..af7c5ee92 --- /dev/null +++ b/Documentation/networking/alias.rst @@ -0,0 +1,49 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========== +IP-Aliasing +=========== + +IP-aliases are an obsolete way to manage multiple IP-addresses/masks +per interface. Newer tools such as iproute2 support multiple +address/prefixes per interface, but aliases are still supported +for backwards compatibility. + +An alias is formed by adding a colon and a string when running ifconfig. +This string is usually numeric, but this is not a must. + + +Alias creation +============== + +Alias creation is done by 'magic' interface naming: eg. to create a +200.1.1.1 alias for eth0 ... +:: + + # ifconfig eth0:0 200.1.1.1 etc,etc.... + ~~ -> request alias #0 creation (if not yet exists) for eth0 + +The corresponding route is also set up by this command. Please note: +The route always points to the base interface. + + +Alias deletion +============== + +The alias is removed by shutting the alias down:: + + # ifconfig eth0:0 down + ~~~~~~~~~~ -> will delete alias + + +Alias (re-)configuring +====================== + +Aliases are not real devices, but programs should be able to configure +and refer to them as usual (ifconfig, route, etc). + + +Relationship with main device +============================= + +If the base device is shut down the added aliases will be deleted too. diff --git a/Documentation/networking/arcnet-hardware.rst b/Documentation/networking/arcnet-hardware.rst new file mode 100644 index 000000000..ac249ac8f --- /dev/null +++ b/Documentation/networking/arcnet-hardware.rst @@ -0,0 +1,3234 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============== +ARCnet Hardware +=============== + +.. note:: + + 1) This file is a supplement to arcnet.txt. Please read that for general + driver configuration help. + 2) This file is no longer Linux-specific. It should probably be moved out + of the kernel sources. Ideas? + +Because so many people (myself included) seem to have obtained ARCnet cards +without manuals, this file contains a quick introduction to ARCnet hardware, +some cabling tips, and a listing of all jumper settings I can find. Please +e-mail apenwarr@worldvisions.ca with any settings for your particular card, +or any other information you have! + + +Introduction to ARCnet +====================== + +ARCnet is a network type which works in a way similar to popular Ethernet +networks but which is also different in some very important ways. + +First of all, you can get ARCnet cards in at least two speeds: 2.5 Mbps +(slower than Ethernet) and 100 Mbps (faster than normal Ethernet). In fact, +there are others as well, but these are less common. The different hardware +types, as far as I'm aware, are not compatible and so you cannot wire a +100 Mbps card to a 2.5 Mbps card, and so on. From what I hear, my driver does +work with 100 Mbps cards, but I haven't been able to verify this myself, +since I only have the 2.5 Mbps variety. It is probably not going to saturate +your 100 Mbps card. Stop complaining. :) + +You also cannot connect an ARCnet card to any kind of Ethernet card and +expect it to work. + +There are two "types" of ARCnet - STAR topology and BUS topology. This +refers to how the cards are meant to be wired together. According to most +available documentation, you can only connect STAR cards to STAR cards and +BUS cards to BUS cards. That makes sense, right? Well, it's not quite +true; see below under "Cabling." + +Once you get past these little stumbling blocks, ARCnet is actually quite a +well-designed standard. It uses something called "modified token passing" +which makes it completely incompatible with so-called "Token Ring" cards, +but which makes transfers much more reliable than Ethernet does. In fact, +ARCnet will guarantee that a packet arrives safely at the destination, and +even if it can't possibly be delivered properly (ie. because of a cable +break, or because the destination computer does not exist) it will at least +tell the sender about it. + +Because of the carefully defined action of the "token", it will always make +a pass around the "ring" within a maximum length of time. This makes it +useful for realtime networks. + +In addition, all known ARCnet cards have an (almost) identical programming +interface. This means that with one ARCnet driver you can support any +card, whereas with Ethernet each manufacturer uses what is sometimes a +completely different programming interface, leading to a lot of different, +sometimes very similar, Ethernet drivers. Of course, always using the same +programming interface also means that when high-performance hardware +facilities like PCI bus mastering DMA appear, it's hard to take advantage of +them. Let's not go into that. + +One thing that makes ARCnet cards difficult to program for, however, is the +limit on their packet sizes; standard ARCnet can only send packets that are +up to 508 bytes in length. This is smaller than the Internet "bare minimum" +of 576 bytes, let alone the Ethernet MTU of 1500. To compensate, an extra +level of encapsulation is defined by RFC1201, which I call "packet +splitting," that allows "virtual packets" to grow as large as 64K each, +although they are generally kept down to the Ethernet-style 1500 bytes. + +For more information on the advantages and disadvantages (mostly the +advantages) of ARCnet networks, you might try the "ARCnet Trade Association" +WWW page: + + http://www.arcnet.com + + +Cabling ARCnet Networks +======================= + +This section was rewritten by + + Vojtech Pavlik + +using information from several people, including: + + - Avery Pennraun + - Stephen A. Wood + - John Paul Morrison + - Joachim Koenig + +and Avery touched it up a bit, at Vojtech's request. + +ARCnet (the classic 2.5 Mbps version) can be connected by two different +types of cabling: coax and twisted pair. The other ARCnet-type networks +(100 Mbps TCNS and 320 kbps - 32 Mbps ARCnet Plus) use different types of +cabling (Type1, Fiber, C1, C4, C5). + +For a coax network, you "should" use 93 Ohm RG-62 cable. But other cables +also work fine, because ARCnet is a very stable network. I personally use 75 +Ohm TV antenna cable. + +Cards for coax cabling are shipped in two different variants: for BUS and +STAR network topologies. They are mostly the same. The only difference +lies in the hybrid chip installed. BUS cards use high impedance output, +while STAR use low impedance. Low impedance card (STAR) is electrically +equal to a high impedance one with a terminator installed. + +Usually, the ARCnet networks are built up from STAR cards and hubs. There +are two types of hubs - active and passive. Passive hubs are small boxes +with four BNC connectors containing four 47 Ohm resistors:: + + | | wires + R + junction + -R-+-R- R 47 Ohm resistors + R + | + +The shielding is connected together. Active hubs are much more complicated; +they are powered and contain electronics to amplify the signal and send it +to other segments of the net. They usually have eight connectors. Active +hubs come in two variants - dumb and smart. The dumb variant just +amplifies, but the smart one decodes to digital and encodes back all packets +coming through. This is much better if you have several hubs in the net, +since many dumb active hubs may worsen the signal quality. + +And now to the cabling. What you can connect together: + +1. A card to a card. This is the simplest way of creating a 2-computer + network. + +2. A card to a passive hub. Remember that all unused connectors on the hub + must be properly terminated with 93 Ohm (or something else if you don't + have the right ones) terminators. + + (Avery's note: oops, I didn't know that. Mine (TV cable) works + anyway, though.) + +3. A card to an active hub. Here is no need to terminate the unused + connectors except some kind of aesthetic feeling. But, there may not be + more than eleven active hubs between any two computers. That of course + doesn't limit the number of active hubs on the network. + +4. An active hub to another. + +5. An active hub to passive hub. + +Remember that you cannot connect two passive hubs together. The power loss +implied by such a connection is too high for the net to operate reliably. + +An example of a typical ARCnet network:: + + R S - STAR type card + S------H--------A-------S R - Terminator + | | H - Hub + | | A - Active hub + | S----H----S + S | + | + S + +The BUS topology is very similar to the one used by Ethernet. The only +difference is in cable and terminators: they should be 93 Ohm. Ethernet +uses 50 Ohm impedance. You use T connectors to put the computers on a single +line of cable, the bus. You have to put terminators at both ends of the +cable. A typical BUS ARCnet network looks like:: + + RT----T------T------T------T------TR + B B B B B B + + B - BUS type card + R - Terminator + T - T connector + +But that is not all! The two types can be connected together. According to +the official documentation the only way of connecting them is using an active +hub:: + + A------T------T------TR + | B B B + S---H---S + | + S + +The official docs also state that you can use STAR cards at the ends of +BUS network in place of a BUS card and a terminator:: + + S------T------T------S + B B + +But, according to my own experiments, you can simply hang a BUS type card +anywhere in middle of a cable in a STAR topology network. And more - you +can use the bus card in place of any star card if you use a terminator. Then +you can build very complicated networks fulfilling all your needs! An +example:: + + S + | + RT------T-------T------H------S + B B B | + | R + S------A------T-------T-------A-------H------TR + | B B | | B + | S BT | + | | | S----A-----S + S------H---A----S | | + | | S------T----H---S | + S S B R S + +A basically different cabling scheme is used with Twisted Pair cabling. Each +of the TP cards has two RJ (phone-cord style) connectors. The cards are +then daisy-chained together using a cable connecting every two neighboring +cards. The ends are terminated with RJ 93 Ohm terminators which plug into +the empty connectors of cards on the ends of the chain. An example:: + + ___________ ___________ + _R_|_ _|_|_ _|_R_ + | | | | | | + |Card | |Card | |Card | + |_____| |_____| |_____| + + +There are also hubs for the TP topology. There is nothing difficult +involved in using them; you just connect a TP chain to a hub on any end or +even at both. This way you can create almost any network configuration. +The maximum of 11 hubs between any two computers on the net applies here as +well. An example:: + + RP-------P--------P--------H-----P------P-----PR + | + RP-----H--------P--------H-----P------PR + | | + PR PR + + R - RJ Terminator + P - TP Card + H - TP Hub + +Like any network, ARCnet has a limited cable length. These are the maximum +cable lengths between two active ends (an active end being an active hub or +a STAR card). + + ========== ======= =========== + RG-62 93 Ohm up to 650 m + RG-59/U 75 Ohm up to 457 m + RG-11/U 75 Ohm up to 533 m + IBM Type 1 150 Ohm up to 200 m + IBM Type 3 100 Ohm up to 100 m + ========== ======= =========== + +The maximum length of all cables connected to a passive hub is limited to 65 +meters for RG-62 cabling; less for others. You can see that using passive +hubs in a large network is a bad idea. The maximum length of a single "BUS +Trunk" is about 300 meters for RG-62. The maximum distance between the two +most distant points of the net is limited to 3000 meters. The maximum length +of a TP cable between two cards/hubs is 650 meters. + + +Setting the Jumpers +=================== + +All ARCnet cards should have a total of four or five different settings: + + - the I/O address: this is the "port" your ARCnet card is on. Probed + values in the Linux ARCnet driver are only from 0x200 through 0x3F0. (If + your card has additional ones, which is possible, please tell me.) This + should not be the same as any other device on your system. According to + a doc I got from Novell, MS Windows prefers values of 0x300 or more, + eating net connections on my system (at least) otherwise. My guess is + this may be because, if your card is at 0x2E0, probing for a serial port + at 0x2E8 will reset the card and probably mess things up royally. + + - Avery's favourite: 0x300. + + - the IRQ: on 8-bit cards, it might be 2 (9), 3, 4, 5, or 7. + on 16-bit cards, it might be 2 (9), 3, 4, 5, 7, or 10-15. + + Make sure this is different from any other card on your system. Note + that IRQ2 is the same as IRQ9, as far as Linux is concerned. You can + "cat /proc/interrupts" for a somewhat complete list of which ones are in + use at any given time. Here is a list of common usages from Vojtech + Pavlik : + + ("Not on bus" means there is no way for a card to generate this + interrupt) + + ====== ========================================================= + IRQ 0 Timer 0 (Not on bus) + IRQ 1 Keyboard (Not on bus) + IRQ 2 IRQ Controller 2 (Not on bus, nor does interrupt the CPU) + IRQ 3 COM2 + IRQ 4 COM1 + IRQ 5 FREE (LPT2 if you have it; sometimes COM3; maybe PLIP) + IRQ 6 Floppy disk controller + IRQ 7 FREE (LPT1 if you don't use the polling driver; PLIP) + IRQ 8 Realtime Clock Interrupt (Not on bus) + IRQ 9 FREE (VGA vertical sync interrupt if enabled) + IRQ 10 FREE + IRQ 11 FREE + IRQ 12 FREE + IRQ 13 Numeric Coprocessor (Not on bus) + IRQ 14 Fixed Disk Controller + IRQ 15 FREE (Fixed Disk Controller 2 if you have it) + ====== ========================================================= + + + .. note:: + + IRQ 9 is used on some video cards for the "vertical retrace" + interrupt. This interrupt would have been handy for things like + video games, as it occurs exactly once per screen refresh, but + unfortunately IBM cancelled this feature starting with the original + VGA and thus many VGA/SVGA cards do not support it. For this + reason, no modern software uses this interrupt and it can almost + always be safely disabled, if your video card supports it at all. + + If your card for some reason CANNOT disable this IRQ (usually there + is a jumper), one solution would be to clip the printed circuit + contact on the board: it's the fourth contact from the left on the + back side. I take no responsibility if you try this. + + - Avery's favourite: IRQ2 (actually IRQ9). Watch that VGA, though. + + - the memory address: Unlike most cards, ARCnets use "shared memory" for + copying buffers around. Make SURE it doesn't conflict with any other + used memory in your system! + + :: + + A0000 - VGA graphics memory (ok if you don't have VGA) + B0000 - Monochrome text mode + C0000 \ One of these is your VGA BIOS - usually C0000. + E0000 / + F0000 - System BIOS + + Anything less than 0xA0000 is, well, a BAD idea since it isn't above + 640k. + + - Avery's favourite: 0xD0000 + + - the station address: Every ARCnet card has its own "unique" network + address from 0 to 255. Unlike Ethernet, you can set this address + yourself with a jumper or switch (or on some cards, with special + software). Since it's only 8 bits, you can only have 254 ARCnet cards + on a network. DON'T use 0 or 255, since these are reserved (although + neat stuff will probably happen if you DO use them). By the way, if you + haven't already guessed, don't set this the same as any other ARCnet on + your network! + + - Avery's favourite: 3 and 4. Not that it matters. + + - There may be ETS1 and ETS2 settings. These may or may not make a + difference on your card (many manuals call them "reserved"), but are + used to change the delays used when powering up a computer on the + network. This is only necessary when wiring VERY long range ARCnet + networks, on the order of 4km or so; in any case, the only real + requirement here is that all cards on the network with ETS1 and ETS2 + jumpers have them in the same position. Chris Hindy + sent in a chart with actual values for this: + + ======= ======= =============== ==================== + ET1 ET2 Response Time Reconfiguration Time + ======= ======= =============== ==================== + open open 74.7us 840us + open closed 283.4us 1680us + closed open 561.8us 1680us + closed closed 1118.6us 1680us + ======= ======= =============== ==================== + + Make sure you set ETS1 and ETS2 to the SAME VALUE for all cards on your + network. + +Also, on many cards (not mine, though) there are red and green LED's. +Vojtech Pavlik tells me this is what they mean: + + =============== =============== ===================================== + GREEN RED Status + =============== =============== ===================================== + OFF OFF Power off + OFF Short flashes Cabling problems (broken cable or not + terminated) + OFF (short) ON Card init + ON ON Normal state - everything OK, nothing + happens + ON Long flashes Data transfer + ON OFF Never happens (maybe when wrong ID) + =============== =============== ===================================== + + +The following is all the specific information people have sent me about +their own particular ARCnet cards. It is officially a mess, and contains +huge amounts of duplicated information. I have no time to fix it. If you +want to, PLEASE DO! Just send me a 'diff -u' of all your changes. + +The model # is listed right above specifics for that card, so you should be +able to use your text viewer's "search" function to find the entry you want. +If you don't KNOW what kind of card you have, try looking through the +various diagrams to see if you can tell. + +If your model isn't listed and/or has different settings, PLEASE PLEASE +tell me. I had to figure mine out without the manual, and it WASN'T FUN! + +Even if your ARCnet model isn't listed, but has the same jumpers as another +model that is, please e-mail me to say so. + +Cards Listed in this file (in this order, mostly): + + =============== ======================= ==== + Manufacturer Model # Bits + =============== ======================= ==== + SMC PC100 8 + SMC PC110 8 + SMC PC120 8 + SMC PC130 8 + SMC PC270E 8 + SMC PC500 16 + SMC PC500Longboard 16 + SMC PC550Longboard 16 + SMC PC600 16 + SMC PC710 8 + SMC? LCS-8830(-T) 8/16 + Puredata PDI507 8 + CNet Tech CN120-Series 8 + CNet Tech CN160-Series 16 + Lantech? UM9065L chipset 8 + Acer 5210-003 8 + Datapoint? LAN-ARC-8 8 + Topware TA-ARC/10 8 + Thomas-Conrad 500-6242-0097 REV A 8 + Waterloo? (C)1985 Waterloo Micro. 8 + No Name -- 8/16 + No Name Taiwan R.O.C? 8 + No Name Model 9058 8 + Tiara Tiara Lancard? 8 + =============== ======================= ==== + + +* SMC = Standard Microsystems Corp. +* CNet Tech = CNet Technology, Inc. + +Unclassified Stuff +================== + + - Please send any other information you can find. + + - And some other stuff (more info is welcome!):: + + From: root@ultraworld.xs4all.nl (Timo Hilbrink) + To: apenwarr@foxnet.net (Avery Pennarun) + Date: Wed, 26 Oct 1994 02:10:32 +0000 (GMT) + Reply-To: timoh@xs4all.nl + + [...parts deleted...] + + About the jumpers: On my PC130 there is one more jumper, located near the + cable-connector and it's for changing to star or bus topology; + closed: star - open: bus + On the PC500 are some more jumper-pins, one block labeled with RX,PDN,TXI + and another with ALE,LA17,LA18,LA19 these are undocumented.. + + [...more parts deleted...] + + --- CUT --- + +Standard Microsystems Corp (SMC) +================================ + +PC100, PC110, PC120, PC130 (8-bit cards) and PC500, PC600 (16-bit cards) +------------------------------------------------------------------------ + + - mainly from Avery Pennarun . Values depicted + are from Avery's setup. + - special thanks to Timo Hilbrink for noting that PC120, + 130, 500, and 600 all have the same switches as Avery's PC100. + PC500/600 have several extra, undocumented pins though. (?) + - PC110 settings were verified by Stephen A. Wood + - Also, the JP- and S-numbers probably don't match your card exactly. Try + to find jumpers/switches with the same number of settings - it's + probably more reliable. + +:: + + JP5 [|] : : : : + (IRQ Setting) IRQ2 IRQ3 IRQ4 IRQ5 IRQ7 + Put exactly one jumper on exactly one set of pins. + + + 1 2 3 4 5 6 7 8 9 10 + S1 /----------------------------------\ + (I/O and Memory | 1 1 * 0 0 0 0 * 1 1 0 1 | + addresses) \----------------------------------/ + |--| |--------| |--------| + (a) (b) (m) + + WARNING. It's very important when setting these which way + you're holding the card, and which way you think is '1'! + + If you suspect that your settings are not being made + correctly, try reversing the direction or inverting the + switch positions. + + a: The first digit of the I/O address. + Setting Value + ------- ----- + 00 0 + 01 1 + 10 2 + 11 3 + + b: The second digit of the I/O address. + Setting Value + ------- ----- + 0000 0 + 0001 1 + 0010 2 + ... ... + 1110 E + 1111 F + + The I/O address is in the form ab0. For example, if + a is 0x2 and b is 0xE, the address will be 0x2E0. + + DO NOT SET THIS LESS THAN 0x200!!!!! + + + m: The first digit of the memory address. + Setting Value + ------- ----- + 0000 0 + 0001 1 + 0010 2 + ... ... + 1110 E + 1111 F + + The memory address is in the form m0000. For example, if + m is D, the address will be 0xD0000. + + DO NOT SET THIS TO C0000, F0000, OR LESS THAN A0000! + + 1 2 3 4 5 6 7 8 + S2 /--------------------------\ + (Station Address) | 1 1 0 0 0 0 0 0 | + \--------------------------/ + + Setting Value + ------- ----- + 00000000 00 + 10000000 01 + 01000000 02 + ... + 01111111 FE + 11111111 FF + + Note that this is binary with the digits reversed! + + DO NOT SET THIS TO 0 OR 255 (0xFF)! + + +PC130E/PC270E (8-bit cards) +--------------------------- + + - from Juergen Seifert + +This description has been written by Juergen Seifert +using information from the following Original SMC Manual + + "Configuration Guide for ARCNET(R)-PC130E/PC270 Network + Controller Boards Pub. # 900.044A June, 1989" + +ARCNET is a registered trademark of the Datapoint Corporation +SMC is a registered trademark of the Standard Microsystems Corporation + +The PC130E is an enhanced version of the PC130 board, is equipped with a +standard BNC female connector for connection to RG-62/U coax cable. +Since this board is designed both for point-to-point connection in star +networks and for connection to bus networks, it is downwardly compatible +with all the other standard boards designed for coax networks (that is, +the PC120, PC110 and PC100 star topology boards and the PC220, PC210 and +PC200 bus topology boards). + +The PC270E is an enhanced version of the PC260 board, is equipped with two +modular RJ11-type jacks for connection to twisted pair wiring. +It can be used in a star or a daisy-chained network. + +:: + + 8 7 6 5 4 3 2 1 + ________________________________________________________________ + | | S1 | | + | |_________________| | + | Offs|Base |I/O Addr | + | RAM Addr | ___| + | ___ ___ CR3 |___| + | | \/ | CR4 |___| + | | PROM | ___| + | | | N | | 8 + | | SOCKET | o | | 7 + | |________| d | | 6 + | ___________________ e | | 5 + | | | A | S | 4 + | |oo| EXT2 | | d | 2 | 3 + | |oo| EXT1 | SMC | d | | 2 + | |oo| ROM | 90C63 | r |___| 1 + | |oo| IRQ7 | | |o| _____| + | |oo| IRQ5 | | |o| | J1 | + | |oo| IRQ4 | | STAR |_____| + | |oo| IRQ3 | | | J2 | + | |oo| IRQ2 |___________________| |_____| + |___ ______________| + | | + |_____________________________________________| + +Legend:: + + SMC 90C63 ARCNET Controller / Transceiver /Logic + S1 1-3: I/O Base Address Select + 4-6: Memory Base Address Select + 7-8: RAM Offset Select + S2 1-8: Node ID Select + EXT Extended Timeout Select + ROM ROM Enable Select + STAR Selected - Star Topology (PC130E only) + Deselected - Bus Topology (PC130E only) + CR3/CR4 Diagnostic LEDs + J1 BNC RG62/U Connector (PC130E only) + J1 6-position Telephone Jack (PC270E only) + J2 6-position Telephone Jack (PC270E only) + +Setting one of the switches to Off/Open means "1", On/Closed means "0". + + +Setting the Node ID +^^^^^^^^^^^^^^^^^^^ + +The eight switches in group S2 are used to set the node ID. +These switches work in a way similar to the PC100-series cards; see that +entry for more information. + + +Setting the I/O Base Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The first three switches in switch group S1 are used to select one +of eight possible I/O Base addresses using the following table:: + + + Switch | Hex I/O + 1 2 3 | Address + -------|-------- + 0 0 0 | 260 + 0 0 1 | 290 + 0 1 0 | 2E0 (Manufacturer's default) + 0 1 1 | 2F0 + 1 0 0 | 300 + 1 0 1 | 350 + 1 1 0 | 380 + 1 1 1 | 3E0 + + +Setting the Base Memory (RAM) buffer Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The memory buffer requires 2K of a 16K block of RAM. The base of this +16K block can be located in any of eight positions. +Switches 4-6 of switch group S1 select the Base of the 16K block. +Within that 16K address space, the buffer may be assigned any one of four +positions, determined by the offset, switches 7 and 8 of group S1. + +:: + + Switch | Hex RAM | Hex ROM + 4 5 6 7 8 | Address | Address *) + -----------|---------|----------- + 0 0 0 0 0 | C0000 | C2000 + 0 0 0 0 1 | C0800 | C2000 + 0 0 0 1 0 | C1000 | C2000 + 0 0 0 1 1 | C1800 | C2000 + | | + 0 0 1 0 0 | C4000 | C6000 + 0 0 1 0 1 | C4800 | C6000 + 0 0 1 1 0 | C5000 | C6000 + 0 0 1 1 1 | C5800 | C6000 + | | + 0 1 0 0 0 | CC000 | CE000 + 0 1 0 0 1 | CC800 | CE000 + 0 1 0 1 0 | CD000 | CE000 + 0 1 0 1 1 | CD800 | CE000 + | | + 0 1 1 0 0 | D0000 | D2000 (Manufacturer's default) + 0 1 1 0 1 | D0800 | D2000 + 0 1 1 1 0 | D1000 | D2000 + 0 1 1 1 1 | D1800 | D2000 + | | + 1 0 0 0 0 | D4000 | D6000 + 1 0 0 0 1 | D4800 | D6000 + 1 0 0 1 0 | D5000 | D6000 + 1 0 0 1 1 | D5800 | D6000 + | | + 1 0 1 0 0 | D8000 | DA000 + 1 0 1 0 1 | D8800 | DA000 + 1 0 1 1 0 | D9000 | DA000 + 1 0 1 1 1 | D9800 | DA000 + | | + 1 1 0 0 0 | DC000 | DE000 + 1 1 0 0 1 | DC800 | DE000 + 1 1 0 1 0 | DD000 | DE000 + 1 1 0 1 1 | DD800 | DE000 + | | + 1 1 1 0 0 | E0000 | E2000 + 1 1 1 0 1 | E0800 | E2000 + 1 1 1 1 0 | E1000 | E2000 + 1 1 1 1 1 | E1800 | E2000 + + *) To enable the 8K Boot PROM install the jumper ROM. + The default is jumper ROM not installed. + + +Setting the Timeouts and Interrupt +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The jumpers labeled EXT1 and EXT2 are used to determine the timeout +parameters. These two jumpers are normally left open. + +To select a hardware interrupt level set one (only one!) of the jumpers +IRQ2, IRQ3, IRQ4, IRQ5, IRQ7. The Manufacturer's default is IRQ2. + + +Configuring the PC130E for Star or Bus Topology +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The single jumper labeled STAR is used to configure the PC130E board for +star or bus topology. +When the jumper is installed, the board may be used in a star network, when +it is removed, the board can be used in a bus topology. + + +Diagnostic LEDs +^^^^^^^^^^^^^^^ + +Two diagnostic LEDs are visible on the rear bracket of the board. +The green LED monitors the network activity: the red one shows the +board activity:: + + Green | Status Red | Status + -------|------------------- ---------|------------------- + on | normal activity flash/on | data transfer + blink | reconfiguration off | no data transfer; + off | defective board or | incorrect memory or + | node ID is zero | I/O address + + +PC500/PC550 Longboard (16-bit cards) +------------------------------------ + + - from Juergen Seifert + + + .. note:: + + There is another Version of the PC500 called Short Version, which + is different in hard- and software! The most important differences + are: + + - The long board has no Shared memory. + - On the long board the selection of the interrupt is done by binary + coded switch, on the short board directly by jumper. + +[Avery's note: pay special attention to that: the long board HAS NO SHARED +MEMORY. This means the current Linux-ARCnet driver can't use these cards. +I have obtained a PC500Longboard and will be doing some experiments on it in +the future, but don't hold your breath. Thanks again to Juergen Seifert for +his advice about this!] + +This description has been written by Juergen Seifert +using information from the following Original SMC Manual + + "Configuration Guide for SMC ARCNET-PC500/PC550 + Series Network Controller Boards Pub. # 900.033 Rev. A + November, 1989" + +ARCNET is a registered trademark of the Datapoint Corporation +SMC is a registered trademark of the Standard Microsystems Corporation + +The PC500 is equipped with a standard BNC female connector for connection +to RG-62/U coax cable. +The board is designed both for point-to-point connection in star networks +and for connection to bus networks. + +The PC550 is equipped with two modular RJ11-type jacks for connection +to twisted pair wiring. +It can be used in a star or a daisy-chained (BUS) network. + +:: + + 1 + 0 9 8 7 6 5 4 3 2 1 6 5 4 3 2 1 + ____________________________________________________________________ + < | SW1 | | SW2 | | + > |_____________________| |_____________| | + < IRQ |I/O Addr | + > ___| + < CR4 |___| + > CR3 |___| + < ___| + > N | | 8 + < o | | 7 + > d | S | 6 + < e | W | 5 + > A | 3 | 4 + < d | | 3 + > d | | 2 + < r |___| 1 + > |o| _____| + < |o| | J1 | + > 3 1 JP6 |_____| + < |o|o| JP2 | J2 | + > |o|o| |_____| + < 4 2__ ______________| + > | | | + <____| |_____________________________________________| + +Legend:: + + SW1 1-6: I/O Base Address Select + 7-10: Interrupt Select + SW2 1-6: Reserved for Future Use + SW3 1-8: Node ID Select + JP2 1-4: Extended Timeout Select + JP6 Selected - Star Topology (PC500 only) + Deselected - Bus Topology (PC500 only) + CR3 Green Monitors Network Activity + CR4 Red Monitors Board Activity + J1 BNC RG62/U Connector (PC500 only) + J1 6-position Telephone Jack (PC550 only) + J2 6-position Telephone Jack (PC550 only) + +Setting one of the switches to Off/Open means "1", On/Closed means "0". + + +Setting the Node ID +^^^^^^^^^^^^^^^^^^^ + +The eight switches in group SW3 are used to set the node ID. Each node +attached to the network must have an unique node ID which must be +different from 0. +Switch 1 serves as the least significant bit (LSB). + +The node ID is the sum of the values of all switches set to "1" +These values are:: + + Switch | Value + -------|------- + 1 | 1 + 2 | 2 + 3 | 4 + 4 | 8 + 5 | 16 + 6 | 32 + 7 | 64 + 8 | 128 + +Some Examples:: + + Switch | Hex | Decimal + 8 7 6 5 4 3 2 1 | Node ID | Node ID + ----------------|---------|--------- + 0 0 0 0 0 0 0 0 | not allowed + 0 0 0 0 0 0 0 1 | 1 | 1 + 0 0 0 0 0 0 1 0 | 2 | 2 + 0 0 0 0 0 0 1 1 | 3 | 3 + . . . | | + 0 1 0 1 0 1 0 1 | 55 | 85 + . . . | | + 1 0 1 0 1 0 1 0 | AA | 170 + . . . | | + 1 1 1 1 1 1 0 1 | FD | 253 + 1 1 1 1 1 1 1 0 | FE | 254 + 1 1 1 1 1 1 1 1 | FF | 255 + + +Setting the I/O Base Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The first six switches in switch group SW1 are used to select one +of 32 possible I/O Base addresses using the following table:: + + Switch | Hex I/O + 6 5 4 3 2 1 | Address + -------------|-------- + 0 1 0 0 0 0 | 200 + 0 1 0 0 0 1 | 210 + 0 1 0 0 1 0 | 220 + 0 1 0 0 1 1 | 230 + 0 1 0 1 0 0 | 240 + 0 1 0 1 0 1 | 250 + 0 1 0 1 1 0 | 260 + 0 1 0 1 1 1 | 270 + 0 1 1 0 0 0 | 280 + 0 1 1 0 0 1 | 290 + 0 1 1 0 1 0 | 2A0 + 0 1 1 0 1 1 | 2B0 + 0 1 1 1 0 0 | 2C0 + 0 1 1 1 0 1 | 2D0 + 0 1 1 1 1 0 | 2E0 (Manufacturer's default) + 0 1 1 1 1 1 | 2F0 + 1 1 0 0 0 0 | 300 + 1 1 0 0 0 1 | 310 + 1 1 0 0 1 0 | 320 + 1 1 0 0 1 1 | 330 + 1 1 0 1 0 0 | 340 + 1 1 0 1 0 1 | 350 + 1 1 0 1 1 0 | 360 + 1 1 0 1 1 1 | 370 + 1 1 1 0 0 0 | 380 + 1 1 1 0 0 1 | 390 + 1 1 1 0 1 0 | 3A0 + 1 1 1 0 1 1 | 3B0 + 1 1 1 1 0 0 | 3C0 + 1 1 1 1 0 1 | 3D0 + 1 1 1 1 1 0 | 3E0 + 1 1 1 1 1 1 | 3F0 + + +Setting the Interrupt +^^^^^^^^^^^^^^^^^^^^^ + +Switches seven through ten of switch group SW1 are used to select the +interrupt level. The interrupt level is binary coded, so selections +from 0 to 15 would be possible, but only the following eight values will +be supported: 3, 4, 5, 7, 9, 10, 11, 12. + +:: + + Switch | IRQ + 10 9 8 7 | + ---------|-------- + 0 0 1 1 | 3 + 0 1 0 0 | 4 + 0 1 0 1 | 5 + 0 1 1 1 | 7 + 1 0 0 1 | 9 (=2) (default) + 1 0 1 0 | 10 + 1 0 1 1 | 11 + 1 1 0 0 | 12 + + +Setting the Timeouts +^^^^^^^^^^^^^^^^^^^^ + +The two jumpers JP2 (1-4) are used to determine the timeout parameters. +These two jumpers are normally left open. +Refer to the COM9026 Data Sheet for alternate configurations. + + +Configuring the PC500 for Star or Bus Topology +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The single jumper labeled JP6 is used to configure the PC500 board for +star or bus topology. +When the jumper is installed, the board may be used in a star network, when +it is removed, the board can be used in a bus topology. + + +Diagnostic LEDs +^^^^^^^^^^^^^^^ + +Two diagnostic LEDs are visible on the rear bracket of the board. +The green LED monitors the network activity: the red one shows the +board activity:: + + Green | Status Red | Status + -------|------------------- ---------|------------------- + on | normal activity flash/on | data transfer + blink | reconfiguration off | no data transfer; + off | defective board or | incorrect memory or + | node ID is zero | I/O address + + +PC710 (8-bit card) +------------------ + + - from J.S. van Oosten + +Note: this data is gathered by experimenting and looking at info of other +cards. However, I'm sure I got 99% of the settings right. + +The SMC710 card resembles the PC270 card, but is much more basic (i.e. no +LEDs, RJ11 jacks, etc.) and 8 bit. Here's a little drawing:: + + _______________________________________ + | +---------+ +---------+ |____ + | | S2 | | S1 | | + | +---------+ +---------+ | + | | + | +===+ __ | + | | R | | | X-tal ###___ + | | O | |__| ####__'| + | | M | || ### + | +===+ | + | | + | .. JP1 +----------+ | + | .. | big chip | | + | .. | 90C63 | | + | .. | | | + | .. +----------+ | + ------- ----------- + ||||||||||||||||||||| + +The row of jumpers at JP1 actually consists of 8 jumpers, (sometimes +labelled) the same as on the PC270, from top to bottom: EXT2, EXT1, ROM, +IRQ7, IRQ5, IRQ4, IRQ3, IRQ2 (gee, wonder what they would do? :-) ) + +S1 and S2 perform the same function as on the PC270, only their numbers +are swapped (S1 is the nodeaddress, S2 sets IO- and RAM-address). + +I know it works when connected to a PC110 type ARCnet board. + + +***************************************************************************** + +Possibly SMC +============ + +LCS-8830(-T) (8 and 16-bit cards) +--------------------------------- + + - from Mathias Katzer + - Marek Michalkiewicz says the + LCS-8830 is slightly different from LCS-8830-T. These are 8 bit, BUS + only (the JP0 jumper is hardwired), and BNC only. + +This is a LCS-8830-T made by SMC, I think ('SMC' only appears on one PLCC, +nowhere else, not even on the few Xeroxed sheets from the manual). + +SMC ARCnet Board Type LCS-8830-T:: + + ------------------------------------ + | | + | JP3 88 8 JP2 | + | ##### | \ | + | ##### ET1 ET2 ###| + | 8 ###| + | U3 SW 1 JP0 ###| Phone Jacks + | -- ###| + | | | | + | | | SW2 | + | | | | + | | | ##### | + | -- ##### #### BNC Connector + | #### + | 888888 JP1 | + | 234567 | + -- ------- + ||||||||||||||||||||||||||| + -------------------------- + + + SW1: DIP-Switches for Station Address + SW2: DIP-Switches for Memory Base and I/O Base addresses + + JP0: If closed, internal termination on (default open) + JP1: IRQ Jumpers + JP2: Boot-ROM enabled if closed + JP3: Jumpers for response timeout + + U3: Boot-ROM Socket + + + ET1 ET2 Response Time Idle Time Reconfiguration Time + + 78 86 840 + X 285 316 1680 + X 563 624 1680 + X X 1130 1237 1680 + + (X means closed jumper) + + (DIP-Switch downwards means "0") + +The station address is binary-coded with SW1. + +The I/O base address is coded with DIP-Switches 6,7 and 8 of SW2: + +======== ======== +Switches Base +678 Address +======== ======== +000 260-26f +100 290-29f +010 2e0-2ef +110 2f0-2ff +001 300-30f +101 350-35f +011 380-38f +111 3e0-3ef +======== ======== + + +DIP Switches 1-5 of SW2 encode the RAM and ROM Address Range: + +======== ============= ================ +Switches RAM ROM +12345 Address Range Address Range +======== ============= ================ +00000 C:0000-C:07ff C:2000-C:3fff +10000 C:0800-C:0fff +01000 C:1000-C:17ff +11000 C:1800-C:1fff +00100 C:4000-C:47ff C:6000-C:7fff +10100 C:4800-C:4fff +01100 C:5000-C:57ff +11100 C:5800-C:5fff +00010 C:C000-C:C7ff C:E000-C:ffff +10010 C:C800-C:Cfff +01010 C:D000-C:D7ff +11010 C:D800-C:Dfff +00110 D:0000-D:07ff D:2000-D:3fff +10110 D:0800-D:0fff +01110 D:1000-D:17ff +11110 D:1800-D:1fff +00001 D:4000-D:47ff D:6000-D:7fff +10001 D:4800-D:4fff +01001 D:5000-D:57ff +11001 D:5800-D:5fff +00101 D:8000-D:87ff D:A000-D:bfff +10101 D:8800-D:8fff +01101 D:9000-D:97ff +11101 D:9800-D:9fff +00011 D:C000-D:c7ff D:E000-D:ffff +10011 D:C800-D:cfff +01011 D:D000-D:d7ff +11011 D:D800-D:dfff +00111 E:0000-E:07ff E:2000-E:3fff +10111 E:0800-E:0fff +01111 E:1000-E:17ff +11111 E:1800-E:1fff +======== ============= ================ + + +PureData Corp +============= + +PDI507 (8-bit card) +-------------------- + + - from Mark Rejhon (slight modifications by Avery) + - Avery's note: I think PDI508 cards (but definitely NOT PDI508Plus cards) + are mostly the same as this. PDI508Plus cards appear to be mainly + software-configured. + +Jumpers: + + There is a jumper array at the bottom of the card, near the edge + connector. This array is labelled J1. They control the IRQs and + something else. Put only one jumper on the IRQ pins. + + ETS1, ETS2 are for timing on very long distance networks. See the + more general information near the top of this file. + + There is a J2 jumper on two pins. A jumper should be put on them, + since it was already there when I got the card. I don't know what + this jumper is for though. + + There is a two-jumper array for J3. I don't know what it is for, + but there were already two jumpers on it when I got the card. It's + a six pin grid in a two-by-three fashion. The jumpers were + configured as follows:: + + .-------. + o | o o | + :-------: ------> Accessible end of card with connectors + o | o o | in this direction -------> + `-------' + +Carl de Billy explains J3 and J4: + + J3 Diagram:: + + .-------. + o | o o | + :-------: TWIST Technology + o | o o | + `-------' + .-------. + | o o | o + :-------: COAX Technology + | o o | o + `-------' + + - If using coax cable in a bus topology the J4 jumper must be removed; + place it on one pin. + + - If using bus topology with twisted pair wiring move the J3 + jumpers so they connect the middle pin and the pins closest to the RJ11 + Connectors. Also the J4 jumper must be removed; place it on one pin of + J4 jumper for storage. + + - If using star topology with twisted pair wiring move the J3 + jumpers so they connect the middle pin and the pins closest to the RJ11 + connectors. + + +DIP Switches: + + The DIP switches accessible on the accessible end of the card while + it is installed, is used to set the ARCnet address. There are 8 + switches. Use an address from 1 to 254 + + ========== ========================= + Switch No. ARCnet address + 12345678 + ========== ========================= + 00000000 FF (Don't use this!) + 00000001 FE + 00000010 FD + ... + 11111101 2 + 11111110 1 + 11111111 0 (Don't use this!) + ========== ========================= + + There is another array of eight DIP switches at the top of the + card. There are five labelled MS0-MS4 which seem to control the + memory address, and another three labelled IO0-IO2 which seem to + control the base I/O address of the card. + + This was difficult to test by trial and error, and the I/O addresses + are in a weird order. This was tested by setting the DIP switches, + rebooting the computer, and attempting to load ARCETHER at various + addresses (mostly between 0x200 and 0x400). The address that caused + the red transmit LED to blink, is the one that I thought works. + + Also, the address 0x3D0 seem to have a special meaning, since the + ARCETHER packet driver loaded fine, but without the red LED + blinking. I don't know what 0x3D0 is for though. I recommend using + an address of 0x300 since Windows may not like addresses below + 0x300. + + ============= =========== + IO Switch No. I/O address + 210 + ============= =========== + 111 0x260 + 110 0x290 + 101 0x2E0 + 100 0x2F0 + 011 0x300 + 010 0x350 + 001 0x380 + 000 0x3E0 + ============= =========== + + The memory switches set a reserved address space of 0x1000 bytes + (0x100 segment units, or 4k). For example if I set an address of + 0xD000, it will use up addresses 0xD000 to 0xD100. + + The memory switches were tested by booting using QEMM386 stealth, + and using LOADHI to see what address automatically became excluded + from the upper memory regions, and then attempting to load ARCETHER + using these addresses. + + I recommend using an ARCnet memory address of 0xD000, and putting + the EMS page frame at 0xC000 while using QEMM stealth mode. That + way, you get contiguous high memory from 0xD100 almost all the way + the end of the megabyte. + + Memory Switch 0 (MS0) didn't seem to work properly when set to OFF + on my card. It could be malfunctioning on my card. Experiment with + it ON first, and if it doesn't work, set it to OFF. (It may be a + modifier for the 0x200 bit?) + + ============= ============================================ + MS Switch No. + 43210 Memory address + ============= ============================================ + 00001 0xE100 (guessed - was not detected by QEMM) + 00011 0xE000 (guessed - was not detected by QEMM) + 00101 0xDD00 + 00111 0xDC00 + 01001 0xD900 + 01011 0xD800 + 01101 0xD500 + 01111 0xD400 + 10001 0xD100 + 10011 0xD000 + 10101 0xCD00 + 10111 0xCC00 + 11001 0xC900 (guessed - crashes tested system) + 11011 0xC800 (guessed - crashes tested system) + 11101 0xC500 (guessed - crashes tested system) + 11111 0xC400 (guessed - crashes tested system) + ============= ============================================ + +CNet Technology Inc. (8-bit cards) +================================== + +120 Series (8-bit cards) +------------------------ + - from Juergen Seifert + +This description has been written by Juergen Seifert +using information from the following Original CNet Manual + + "ARCNET USER'S MANUAL for + CN120A + CN120AB + CN120TP + CN120ST + CN120SBT + P/N:12-01-0007 + Revision 3.00" + +ARCNET is a registered trademark of the Datapoint Corporation + +- P/N 120A ARCNET 8 bit XT/AT Star +- P/N 120AB ARCNET 8 bit XT/AT Bus +- P/N 120TP ARCNET 8 bit XT/AT Twisted Pair +- P/N 120ST ARCNET 8 bit XT/AT Star, Twisted Pair +- P/N 120SBT ARCNET 8 bit XT/AT Star, Bus, Twisted Pair + +:: + + __________________________________________________________________ + | | + | ___| + | LED |___| + | ___| + | N | | ID7 + | o | | ID6 + | d | S | ID5 + | e | W | ID4 + | ___________________ A | 2 | ID3 + | | | d | | ID2 + | | | 1 2 3 4 5 6 7 8 d | | ID1 + | | | _________________ r |___| ID0 + | | 90C65 || SW1 | ____| + | JP 8 7 | ||_________________| | | + | |o|o| JP1 | | | J2 | + | |o|o| |oo| | | JP 1 1 1 | | + | ______________ | | 0 1 2 |____| + | | PROM | |___________________| |o|o|o| _____| + | > SOCKET | JP 6 5 4 3 2 |o|o|o| | J1 | + | |______________| |o|o|o|o|o| |o|o|o| |_____| + |_____ |o|o|o|o|o| ______________| + | | + |_____________________________________________| + +Legend:: + + 90C65 ARCNET Probe + S1 1-5: Base Memory Address Select + 6-8: Base I/O Address Select + S2 1-8: Node ID Select (ID0-ID7) + JP1 ROM Enable Select + JP2 IRQ2 + JP3 IRQ3 + JP4 IRQ4 + JP5 IRQ5 + JP6 IRQ7 + JP7/JP8 ET1, ET2 Timeout Parameters + JP10/JP11 Coax / Twisted Pair Select (CN120ST/SBT only) + JP12 Terminator Select (CN120AB/ST/SBT only) + J1 BNC RG62/U Connector (all except CN120TP) + J2 Two 6-position Telephone Jack (CN120TP/ST/SBT only) + +Setting one of the switches to Off means "1", On means "0". + + +Setting the Node ID +^^^^^^^^^^^^^^^^^^^ + +The eight switches in SW2 are used to set the node ID. Each node attached +to the network must have an unique node ID which must be different from 0. +Switch 1 (ID0) serves as the least significant bit (LSB). + +The node ID is the sum of the values of all switches set to "1" +These values are: + + ======= ====== ===== + Switch Label Value + ======= ====== ===== + 1 ID0 1 + 2 ID1 2 + 3 ID2 4 + 4 ID3 8 + 5 ID4 16 + 6 ID5 32 + 7 ID6 64 + 8 ID7 128 + ======= ====== ===== + +Some Examples:: + + Switch | Hex | Decimal + 8 7 6 5 4 3 2 1 | Node ID | Node ID + ----------------|---------|--------- + 0 0 0 0 0 0 0 0 | not allowed + 0 0 0 0 0 0 0 1 | 1 | 1 + 0 0 0 0 0 0 1 0 | 2 | 2 + 0 0 0 0 0 0 1 1 | 3 | 3 + . . . | | + 0 1 0 1 0 1 0 1 | 55 | 85 + . . . | | + 1 0 1 0 1 0 1 0 | AA | 170 + . . . | | + 1 1 1 1 1 1 0 1 | FD | 253 + 1 1 1 1 1 1 1 0 | FE | 254 + 1 1 1 1 1 1 1 1 | FF | 255 + + +Setting the I/O Base Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The last three switches in switch block SW1 are used to select one +of eight possible I/O Base addresses using the following table:: + + + Switch | Hex I/O + 6 7 8 | Address + ------------|-------- + ON ON ON | 260 + OFF ON ON | 290 + ON OFF ON | 2E0 (Manufacturer's default) + OFF OFF ON | 2F0 + ON ON OFF | 300 + OFF ON OFF | 350 + ON OFF OFF | 380 + OFF OFF OFF | 3E0 + + +Setting the Base Memory (RAM) buffer Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The memory buffer (RAM) requires 2K. The base of this buffer can be +located in any of eight positions. The address of the Boot Prom is +memory base + 8K or memory base + 0x2000. +Switches 1-5 of switch block SW1 select the Memory Base address. + +:: + + Switch | Hex RAM | Hex ROM + 1 2 3 4 5 | Address | Address *) + --------------------|---------|----------- + ON ON ON ON ON | C0000 | C2000 + ON ON OFF ON ON | C4000 | C6000 + ON ON ON OFF ON | CC000 | CE000 + ON ON OFF OFF ON | D0000 | D2000 (Manufacturer's default) + ON ON ON ON OFF | D4000 | D6000 + ON ON OFF ON OFF | D8000 | DA000 + ON ON ON OFF OFF | DC000 | DE000 + ON ON OFF OFF OFF | E0000 | E2000 + + *) To enable the Boot ROM install the jumper JP1 + +.. note:: + + Since the switches 1 and 2 are always set to ON it may be possible + that they can be used to add an offset of 2K, 4K or 6K to the base + address, but this feature is not documented in the manual and I + haven't tested it yet. + + +Setting the Interrupt Line +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To select a hardware interrupt level install one (only one!) of the jumpers +JP2, JP3, JP4, JP5, JP6. JP2 is the default:: + + Jumper | IRQ + -------|----- + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 7 + + +Setting the Internal Terminator on CN120AB/TP/SBT +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The jumper JP12 is used to enable the internal terminator:: + + ----- + 0 | 0 | + ----- ON | | ON + | 0 | | 0 | + | | OFF ----- OFF + | 0 | 0 + ----- + Terminator Terminator + disabled enabled + + +Selecting the Connector Type on CN120ST/SBT +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + JP10 JP11 JP10 JP11 + ----- ----- + 0 0 | 0 | | 0 | + ----- ----- | | | | + | 0 | | 0 | | 0 | | 0 | + | | | | ----- ----- + | 0 | | 0 | 0 0 + ----- ----- + Coaxial Cable Twisted Pair Cable + (Default) + + +Setting the Timeout Parameters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The jumpers labeled EXT1 and EXT2 are used to determine the timeout +parameters. These two jumpers are normally left open. + + +CNet Technology Inc. (16-bit cards) +=================================== + +160 Series (16-bit cards) +------------------------- + - from Juergen Seifert + +This description has been written by Juergen Seifert +using information from the following Original CNet Manual + + "ARCNET USER'S MANUAL for + CN160A CN160AB CN160TP + P/N:12-01-0006 Revision 3.00" + +ARCNET is a registered trademark of the Datapoint Corporation + +- P/N 160A ARCNET 16 bit XT/AT Star +- P/N 160AB ARCNET 16 bit XT/AT Bus +- P/N 160TP ARCNET 16 bit XT/AT Twisted Pair + +:: + + ___________________________________________________________________ + < _________________________ ___| + > |oo| JP2 | | LED |___| + < |oo| JP1 | 9026 | LED |___| + > |_________________________| ___| + < N | | ID7 + > 1 o | | ID6 + < 1 2 3 4 5 6 7 8 9 0 d | S | ID5 + > _______________ _____________________ e | W | ID4 + < | PROM | | SW1 | A | 2 | ID3 + > > SOCKET | |_____________________| d | | ID2 + < |_______________| | IO-Base | MEM | d | | ID1 + > r |___| ID0 + < ____| + > | | + < | J1 | + > | | + < |____| + > 1 1 1 1 | + < 3 4 5 6 7 JP 8 9 0 1 2 3 | + > |o|o|o|o|o| |o|o|o|o|o|o| | + < |o|o|o|o|o| __ |o|o|o|o|o|o| ___________| + > | | | + <____________| |_______________________________________| + +Legend:: + + 9026 ARCNET Probe + SW1 1-6: Base I/O Address Select + 7-10: Base Memory Address Select + SW2 1-8: Node ID Select (ID0-ID7) + JP1/JP2 ET1, ET2 Timeout Parameters + JP3-JP13 Interrupt Select + J1 BNC RG62/U Connector (CN160A/AB only) + J1 Two 6-position Telephone Jack (CN160TP only) + LED + +Setting one of the switches to Off means "1", On means "0". + + +Setting the Node ID +^^^^^^^^^^^^^^^^^^^ + +The eight switches in SW2 are used to set the node ID. Each node attached +to the network must have an unique node ID which must be different from 0. +Switch 1 (ID0) serves as the least significant bit (LSB). + +The node ID is the sum of the values of all switches set to "1" +These values are:: + + Switch | Label | Value + -------|-------|------- + 1 | ID0 | 1 + 2 | ID1 | 2 + 3 | ID2 | 4 + 4 | ID3 | 8 + 5 | ID4 | 16 + 6 | ID5 | 32 + 7 | ID6 | 64 + 8 | ID7 | 128 + +Some Examples:: + + Switch | Hex | Decimal + 8 7 6 5 4 3 2 1 | Node ID | Node ID + ----------------|---------|--------- + 0 0 0 0 0 0 0 0 | not allowed + 0 0 0 0 0 0 0 1 | 1 | 1 + 0 0 0 0 0 0 1 0 | 2 | 2 + 0 0 0 0 0 0 1 1 | 3 | 3 + . . . | | + 0 1 0 1 0 1 0 1 | 55 | 85 + . . . | | + 1 0 1 0 1 0 1 0 | AA | 170 + . . . | | + 1 1 1 1 1 1 0 1 | FD | 253 + 1 1 1 1 1 1 1 0 | FE | 254 + 1 1 1 1 1 1 1 1 | FF | 255 + + +Setting the I/O Base Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The first six switches in switch block SW1 are used to select the I/O Base +address using the following table:: + + Switch | Hex I/O + 1 2 3 4 5 6 | Address + ------------------------|-------- + OFF ON ON OFF OFF ON | 260 + OFF ON OFF ON ON OFF | 290 + OFF ON OFF OFF OFF ON | 2E0 (Manufacturer's default) + OFF ON OFF OFF OFF OFF | 2F0 + OFF OFF ON ON ON ON | 300 + OFF OFF ON OFF ON OFF | 350 + OFF OFF OFF ON ON ON | 380 + OFF OFF OFF OFF OFF ON | 3E0 + +Note: Other IO-Base addresses seem to be selectable, but only the above + combinations are documented. + + +Setting the Base Memory (RAM) buffer Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The switches 7-10 of switch block SW1 are used to select the Memory +Base address of the RAM (2K) and the PROM:: + + Switch | Hex RAM | Hex ROM + 7 8 9 10 | Address | Address + ----------------|---------|----------- + OFF OFF ON ON | C0000 | C8000 + OFF OFF ON OFF | D0000 | D8000 (Default) + OFF OFF OFF ON | E0000 | E8000 + +.. note:: + + Other MEM-Base addresses seem to be selectable, but only the above + combinations are documented. + + +Setting the Interrupt Line +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To select a hardware interrupt level install one (only one!) of the jumpers +JP3 through JP13 using the following table:: + + Jumper | IRQ + -------|----------------- + 3 | 14 + 4 | 15 + 5 | 12 + 6 | 11 + 7 | 10 + 8 | 3 + 9 | 4 + 10 | 5 + 11 | 6 + 12 | 7 + 13 | 2 (=9) Default! + +.. note:: + + - Do not use JP11=IRQ6, it may conflict with your Floppy Disk + Controller + - Use JP3=IRQ14 only, if you don't have an IDE-, MFM-, or RLL- + Hard Disk, it may conflict with their controllers + + +Setting the Timeout Parameters +------------------------------ + +The jumpers labeled JP1 and JP2 are used to determine the timeout +parameters. These two jumpers are normally left open. + + +Lantech +======= + +8-bit card, unknown model +------------------------- + - from Vlad Lungu - his e-mail address seemed broken at + the time I tried to reach him. Sorry Vlad, if you didn't get my reply. + +:: + + ________________________________________________________________ + | 1 8 | + | ___________ __| + | | SW1 | LED |__| + | |__________| | + | ___| + | _____________________ |S | 8 + | | | |W | + | | | |2 | + | | | |__| 1 + | | UM9065L | |o| JP4 ____|____ + | | | |o| | CN | + | | | |________| + | | | | + | |___________________| | + | | + | | + | _____________ | + | | | | + | | PROM | |ooooo| JP6 | + | |____________| |ooooo| | + |_____________ _ _| + |____________________________________________| |__| + + +UM9065L : ARCnet Controller + +SW 1 : Shared Memory Address and I/O Base + +:: + + ON=0 + + 12345|Memory Address + -----|-------------- + 00001| D4000 + 00010| CC000 + 00110| D0000 + 01110| D1000 + 01101| D9000 + 10010| CC800 + 10011| DC800 + 11110| D1800 + +It seems that the bits are considered in reverse order. Also, you must +observe that some of those addresses are unusual and I didn't probe them; I +used a memory dump in DOS to identify them. For the 00000 configuration and +some others that I didn't write here the card seems to conflict with the +video card (an S3 GENDAC). I leave the full decoding of those addresses to +you. + +:: + + 678| I/O Address + ---|------------ + 000| 260 + 001| failed probe + 010| 2E0 + 011| 380 + 100| 290 + 101| 350 + 110| failed probe + 111| 3E0 + + SW 2 : Node ID (binary coded) + + JP 4 : Boot PROM enable CLOSE - enabled + OPEN - disabled + + JP 6 : IRQ set (ONLY ONE jumper on 1-5 for IRQ 2-6) + + +Acer +==== + +8-bit card, Model 5210-003 +-------------------------- + + - from Vojtech Pavlik using portions of the existing + arcnet-hardware file. + +This is a 90C26 based card. Its configuration seems similar to the SMC +PC100, but has some additional jumpers I don't know the meaning of. + +:: + + __ + | | + ___________|__|_________________________ + | | | | + | | BNC | | + | |______| ___| + | _____________________ |___ + | | | | + | | Hybrid IC | | + | | | o|o J1 | + | |_____________________| 8|8 | + | 8|8 J5 | + | o|o | + | 8|8 | + |__ 8|8 | + (|__| LED o|o | + | 8|8 | + | 8|8 J15 | + | | + | _____ | + | | | _____ | + | | | | | ___| + | | | | | | + | _____ | ROM | | UFS | | + | | | | | | | | + | | | ___ | | | | | + | | | | | |__.__| |__.__| | + | | NCR | |XTL| _____ _____ | + | | | |___| | | | | | + | |90C26| | | | | | + | | | | RAM | | UFS | | + | | | J17 o|o | | | | | + | | | J16 o|o | | | | | + | |__.__| |__.__| |__.__| | + | ___ | + | | |8 | + | |SW2| | + | | | | + | |___|1 | + | ___ | + | | |10 J18 o|o | + | | | o|o | + | |SW1| o|o | + | | | J21 o|o | + | |___|1 | + | | + |____________________________________| + + +Legend:: + + 90C26 ARCNET Chip + XTL 20 MHz Crystal + SW1 1-6 Base I/O Address Select + 7-10 Memory Address Select + SW2 1-8 Node ID Select (ID0-ID7) + J1-J5 IRQ Select + J6-J21 Unknown (Probably extra timeouts & ROM enable ...) + LED1 Activity LED + BNC Coax connector (STAR ARCnet) + RAM 2k of SRAM + ROM Boot ROM socket + UFS Unidentified Flying Sockets + + +Setting the Node ID +^^^^^^^^^^^^^^^^^^^ + +The eight switches in SW2 are used to set the node ID. Each node attached +to the network must have an unique node ID which must not be 0. +Switch 1 (ID0) serves as the least significant bit (LSB). + +Setting one of the switches to OFF means "1", ON means "0". + +The node ID is the sum of the values of all switches set to "1" +These values are:: + + Switch | Value + -------|------- + 1 | 1 + 2 | 2 + 3 | 4 + 4 | 8 + 5 | 16 + 6 | 32 + 7 | 64 + 8 | 128 + +Don't set this to 0 or 255; these values are reserved. + + +Setting the I/O Base Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The switches 1 to 6 of switch block SW1 are used to select one +of 32 possible I/O Base addresses using the following tables:: + + | Hex + Switch | Value + -------|------- + 1 | 200 + 2 | 100 + 3 | 80 + 4 | 40 + 5 | 20 + 6 | 10 + +The I/O address is sum of all switches set to "1". Remember that +the I/O address space bellow 0x200 is RESERVED for mainboard, so +switch 1 should be ALWAYS SET TO OFF. + + +Setting the Base Memory (RAM) buffer Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The memory buffer (RAM) requires 2K. The base of this buffer can be +located in any of sixteen positions. However, the addresses below +A0000 are likely to cause system hang because there's main RAM. + +Jumpers 7-10 of switch block SW1 select the Memory Base address:: + + Switch | Hex RAM + 7 8 9 10 | Address + ----------------|--------- + OFF OFF OFF OFF | F0000 (conflicts with main BIOS) + OFF OFF OFF ON | E0000 + OFF OFF ON OFF | D0000 + OFF OFF ON ON | C0000 (conflicts with video BIOS) + OFF ON OFF OFF | B0000 (conflicts with mono video) + OFF ON OFF ON | A0000 (conflicts with graphics) + + +Setting the Interrupt Line +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Jumpers 1-5 of the jumper block J1 control the IRQ level. ON means +shorted, OFF means open:: + + Jumper | IRQ + 1 2 3 4 5 | + ---------------------------- + ON OFF OFF OFF OFF | 7 + OFF ON OFF OFF OFF | 5 + OFF OFF ON OFF OFF | 4 + OFF OFF OFF ON OFF | 3 + OFF OFF OFF OFF ON | 2 + + +Unknown jumpers & sockets +^^^^^^^^^^^^^^^^^^^^^^^^^ + +I know nothing about these. I just guess that J16&J17 are timeout +jumpers and maybe one of J18-J21 selects ROM. Also J6-J10 and +J11-J15 are connecting IRQ2-7 to some pins on the UFSs. I can't +guess the purpose. + +Datapoint? +========== + +LAN-ARC-8, an 8-bit card +------------------------ + + - from Vojtech Pavlik + +This is another SMC 90C65-based ARCnet card. I couldn't identify the +manufacturer, but it might be DataPoint, because the card has the +original arcNet logo in its upper right corner. + +:: + + _______________________________________________________ + | _________ | + | | SW2 | ON arcNet | + | |_________| OFF ___| + | _____________ 1 ______ 8 | | 8 + | | | SW1 | XTAL | ____________ | S | + | > RAM (2k) | |______|| | | W | + | |_____________| | H | | 3 | + | _________|_____ y | |___| 1 + | _________ | | |b | | + | |_________| | | |r | | + | | SMC | |i | | + | | 90C65| |d | | + | _________ | | | | | + | | SW1 | ON | | |I | | + | |_________| OFF |_________|_____/C | _____| + | 1 8 | | | |___ + | ______________ | | | BNC |___| + | | | |____________| |_____| + | > EPROM SOCKET | _____________ | + | |______________| |_____________| | + | ______________| + | | + |________________________________________| + +Legend:: + + 90C65 ARCNET Chip + SW1 1-5: Base Memory Address Select + 6-8: Base I/O Address Select + SW2 1-8: Node ID Select + SW3 1-5: IRQ Select + 6-7: Extra Timeout + 8 : ROM Enable + BNC Coax connector + XTAL 20 MHz Crystal + + +Setting the Node ID +^^^^^^^^^^^^^^^^^^^ + +The eight switches in SW3 are used to set the node ID. Each node attached +to the network must have an unique node ID which must not be 0. +Switch 1 serves as the least significant bit (LSB). + +Setting one of the switches to Off means "1", On means "0". + +The node ID is the sum of the values of all switches set to "1" +These values are:: + + Switch | Value + -------|------- + 1 | 1 + 2 | 2 + 3 | 4 + 4 | 8 + 5 | 16 + 6 | 32 + 7 | 64 + 8 | 128 + + +Setting the I/O Base Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The last three switches in switch block SW1 are used to select one +of eight possible I/O Base addresses using the following table:: + + + Switch | Hex I/O + 6 7 8 | Address + ------------|-------- + ON ON ON | 260 + OFF ON ON | 290 + ON OFF ON | 2E0 (Manufacturer's default) + OFF OFF ON | 2F0 + ON ON OFF | 300 + OFF ON OFF | 350 + ON OFF OFF | 380 + OFF OFF OFF | 3E0 + + +Setting the Base Memory (RAM) buffer Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The memory buffer (RAM) requires 2K. The base of this buffer can be +located in any of eight positions. The address of the Boot Prom is +memory base + 0x2000. + +Jumpers 3-5 of switch block SW1 select the Memory Base address. + +:: + + Switch | Hex RAM | Hex ROM + 1 2 3 4 5 | Address | Address *) + --------------------|---------|----------- + ON ON ON ON ON | C0000 | C2000 + ON ON OFF ON ON | C4000 | C6000 + ON ON ON OFF ON | CC000 | CE000 + ON ON OFF OFF ON | D0000 | D2000 (Manufacturer's default) + ON ON ON ON OFF | D4000 | D6000 + ON ON OFF ON OFF | D8000 | DA000 + ON ON ON OFF OFF | DC000 | DE000 + ON ON OFF OFF OFF | E0000 | E2000 + + *) To enable the Boot ROM set the switch 8 of switch block SW3 to position ON. + +The switches 1 and 2 probably add 0x0800 and 0x1000 to RAM base address. + + +Setting the Interrupt Line +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Switches 1-5 of the switch block SW3 control the IRQ level:: + + Jumper | IRQ + 1 2 3 4 5 | + ---------------------------- + ON OFF OFF OFF OFF | 3 + OFF ON OFF OFF OFF | 4 + OFF OFF ON OFF OFF | 5 + OFF OFF OFF ON OFF | 7 + OFF OFF OFF OFF ON | 2 + + +Setting the Timeout Parameters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The switches 6-7 of the switch block SW3 are used to determine the timeout +parameters. These two switches are normally left in the OFF position. + + +Topware +======= + +8-bit card, TA-ARC/10 +--------------------- + + - from Vojtech Pavlik + +This is another very similar 90C65 card. Most of the switches and jumpers +are the same as on other clones. + +:: + + _____________________________________________________________________ + | ___________ | | ______ | + | |SW2 NODE ID| | | | XTAL | | + | |___________| | Hybrid IC | |______| | + | ___________ | | __| + | |SW1 MEM+I/O| |_________________________| LED1|__|) + | |___________| 1 2 | + | J3 |o|o| TIMEOUT ______| + | ______________ |o|o| | | + | | | ___________________ | RJ | + | > EPROM SOCKET | | \ |------| + |J2 |______________| | | | | + ||o| | | |______| + ||o| ROM ENABLE | SMC | _________ | + | _____________ | 90C65 | |_________| _____| + | | | | | | |___ + | > RAM (2k) | | | | BNC |___| + | |_____________| | | |_____| + | |____________________| | + | ________ IRQ 2 3 4 5 7 ___________ | + ||________| |o|o|o|o|o| |___________| | + |________ J1|o|o|o|o|o| ______________| + | | + |_____________________________________________| + +Legend:: + + 90C65 ARCNET Chip + XTAL 20 MHz Crystal + SW1 1-5 Base Memory Address Select + 6-8 Base I/O Address Select + SW2 1-8 Node ID Select (ID0-ID7) + J1 IRQ Select + J2 ROM Enable + J3 Extra Timeout + LED1 Activity LED + BNC Coax connector (BUS ARCnet) + RJ Twisted Pair Connector (daisy chain) + + +Setting the Node ID +^^^^^^^^^^^^^^^^^^^ + +The eight switches in SW2 are used to set the node ID. Each node attached to +the network must have an unique node ID which must not be 0. Switch 1 (ID0) +serves as the least significant bit (LSB). + +Setting one of the switches to Off means "1", On means "0". + +The node ID is the sum of the values of all switches set to "1" +These values are:: + + Switch | Label | Value + -------|-------|------- + 1 | ID0 | 1 + 2 | ID1 | 2 + 3 | ID2 | 4 + 4 | ID3 | 8 + 5 | ID4 | 16 + 6 | ID5 | 32 + 7 | ID6 | 64 + 8 | ID7 | 128 + +Setting the I/O Base Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The last three switches in switch block SW1 are used to select one +of eight possible I/O Base addresses using the following table:: + + + Switch | Hex I/O + 6 7 8 | Address + ------------|-------- + ON ON ON | 260 (Manufacturer's default) + OFF ON ON | 290 + ON OFF ON | 2E0 + OFF OFF ON | 2F0 + ON ON OFF | 300 + OFF ON OFF | 350 + ON OFF OFF | 380 + OFF OFF OFF | 3E0 + + +Setting the Base Memory (RAM) buffer Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The memory buffer (RAM) requires 2K. The base of this buffer can be +located in any of eight positions. The address of the Boot Prom is +memory base + 0x2000. + +Jumpers 3-5 of switch block SW1 select the Memory Base address. + +:: + + Switch | Hex RAM | Hex ROM + 1 2 3 4 5 | Address | Address *) + --------------------|---------|----------- + ON ON ON ON ON | C0000 | C2000 + ON ON OFF ON ON | C4000 | C6000 (Manufacturer's default) + ON ON ON OFF ON | CC000 | CE000 + ON ON OFF OFF ON | D0000 | D2000 + ON ON ON ON OFF | D4000 | D6000 + ON ON OFF ON OFF | D8000 | DA000 + ON ON ON OFF OFF | DC000 | DE000 + ON ON OFF OFF OFF | E0000 | E2000 + + *) To enable the Boot ROM short the jumper J2. + +The jumpers 1 and 2 probably add 0x0800 and 0x1000 to RAM address. + + +Setting the Interrupt Line +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Jumpers 1-5 of the jumper block J1 control the IRQ level. ON means +shorted, OFF means open:: + + Jumper | IRQ + 1 2 3 4 5 | + ---------------------------- + ON OFF OFF OFF OFF | 2 + OFF ON OFF OFF OFF | 3 + OFF OFF ON OFF OFF | 4 + OFF OFF OFF ON OFF | 5 + OFF OFF OFF OFF ON | 7 + + +Setting the Timeout Parameters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The jumpers J3 are used to set the timeout parameters. These two +jumpers are normally left open. + +Thomas-Conrad +============= + +Model #500-6242-0097 REV A (8-bit card) +--------------------------------------- + + - from Lars Karlsson <100617.3473@compuserve.com> + +:: + + ________________________________________________________ + | ________ ________ |_____ + | |........| |........| | + | |________| |________| ___| + | SW 3 SW 1 | | + | Base I/O Base Addr. Station | | + | address | | + | ______ switch | | + | | | | | + | | | |___| + | | | ______ |___._ + | |______| |______| ____| BNC + | Jumper- _____| Connector + | Main chip block _ __| ' + | | | | RJ Connector + | |_| | with 110 Ohm + | |__ Terminator + | ___________ __| + | |...........| | RJ-jack + | |...........| _____ | (unused) + | |___________| |_____| |__ + | Boot PROM socket IRQ-jumpers |_ Diagnostic + |________ __ _| LED (red) + | | | | | | | | | | | | | | | | | | | | | | + | | | | | | | | | | | | | | | | | | | | |________| + | + | + +And here are the settings for some of the switches and jumpers on the cards. + +:: + + I/O + + 1 2 3 4 5 6 7 8 + + 2E0----- 0 0 0 1 0 0 0 1 + 2F0----- 0 0 0 1 0 0 0 0 + 300----- 0 0 0 0 1 1 1 1 + 350----- 0 0 0 0 1 1 1 0 + +"0" in the above example means switch is off "1" means that it is on. + +:: + + ShMem address. + + 1 2 3 4 5 6 7 8 + + CX00--0 0 1 1 | | | + DX00--0 0 1 0 | + X000--------- 1 1 | + X400--------- 1 0 | + X800--------- 0 1 | + XC00--------- 0 0 + ENHANCED----------- 1 + COMPATIBLE--------- 0 + +:: + + IRQ + + + 3 4 5 7 2 + . . . . . + . . . . . + + +There is a DIP-switch with 8 switches, used to set the shared memory address +to be used. The first 6 switches set the address, the 7th doesn't have any +function, and the 8th switch is used to select "compatible" or "enhanced". +When I got my two cards, one of them had this switch set to "enhanced". That +card didn't work at all, it wasn't even recognized by the driver. The other +card had this switch set to "compatible" and it behaved absolutely normally. I +guess that the switch on one of the cards, must have been changed accidentally +when the card was taken out of its former host. The question remains +unanswered, what is the purpose of the "enhanced" position? + +[Avery's note: "enhanced" probably either disables shared memory (use IO +ports instead) or disables IO ports (use memory addresses instead). This +varies by the type of card involved. I fail to see how either of these +enhance anything. Send me more detailed information about this mode, or +just use "compatible" mode instead.] + +Waterloo Microsystems Inc. ?? +============================= + +8-bit card (C) 1985 +------------------- + - from Robert Michael Best + +[Avery's note: these don't work with my driver for some reason. These cards +SEEM to have settings similar to the PDI508Plus, which is +software-configured and doesn't work with my driver either. The "Waterloo +chip" is a boot PROM, probably designed specifically for the University of +Waterloo. If you have any further information about this card, please +e-mail me.] + +The probe has not been able to detect the card on any of the J2 settings, +and I tried them again with the "Waterloo" chip removed. + +:: + + _____________________________________________________________________ + | \/ \/ ___ __ __ | + | C4 C4 |^| | M || ^ ||^| | + | -- -- |_| | 5 || || | C3 | + | \/ \/ C10 |___|| ||_| | + | C4 C4 _ _ | | ?? | + | -- -- | \/ || | | + | | || | | + | | || C1 | | + | | || | \/ _____| + | | C6 || | C9 | |___ + | | || | -- | BNC |___| + | | || | >C7| |_____| + | | || | | + | __ __ |____||_____| 1 2 3 6 | + || ^ | >C4| |o|o|o|o|o|o| J2 >C4| | + || | |o|o|o|o|o|o| | + || C2 | >C4| >C4| | + || | >C8| | + || | 2 3 4 5 6 7 IRQ >C4| | + ||_____| |o|o|o|o|o|o| J3 | + |_______ |o|o|o|o|o|o| _______________| + | | + |_____________________________________________| + + C1 -- "COM9026 + SMC 8638" + In a chip socket. + + C2 -- "@Copyright + Waterloo Microsystems Inc. + 1985" + In a chip Socket with info printed on a label covering a round window + showing the circuit inside. (The window indicates it is an EPROM chip.) + + C3 -- "COM9032 + SMC 8643" + In a chip socket. + + C4 -- "74LS" + 9 total no sockets. + + M5 -- "50006-136 + 20.000000 MHZ + MTQ-T1-S3 + 0 M-TRON 86-40" + Metallic case with 4 pins, no socket. + + C6 -- "MOSTEK@TC8643 + MK6116N-20 + MALAYSIA" + No socket. + + C7 -- No stamp or label but in a 20 pin chip socket. + + C8 -- "PAL10L8CN + 8623" + In a 20 pin socket. + + C9 -- "PAl16R4A-2CN + 8641" + In a 20 pin socket. + + C10 -- "M8640 + NMC + 9306N" + In an 8 pin socket. + + ?? -- Some components on a smaller board and attached with 20 pins all + along the side closest to the BNC connector. The are coated in a dark + resin. + +On the board there are two jumper banks labeled J2 and J3. The +manufacturer didn't put a J1 on the board. The two boards I have both +came with a jumper box for each bank. + +:: + + J2 -- Numbered 1 2 3 4 5 6. + 4 and 5 are not stamped due to solder points. + + J3 -- IRQ 2 3 4 5 6 7 + +The board itself has a maple leaf stamped just above the irq jumpers +and "-2 46-86" beside C2. Between C1 and C6 "ASS 'Y 300163" and "@1986 +CORMAN CUSTOM ELECTRONICS CORP." stamped just below the BNC connector. +Below that "MADE IN CANADA" + +No Name +======= + +8-bit cards, 16-bit cards +------------------------- + + - from Juergen Seifert + +I have named this ARCnet card "NONAME", since there is no name of any +manufacturer on the Installation manual nor on the shipping box. The only +hint to the existence of a manufacturer at all is written in copper, +it is "Made in Taiwan" + +This description has been written by Juergen Seifert +using information from the Original + + "ARCnet Installation Manual" + +:: + + ________________________________________________________________ + | |STAR| BUS| T/P| | + | |____|____|____| | + | _____________________ | + | | | | + | | | | + | | | | + | | SMC | | + | | | | + | | COM90C65 | | + | | | | + | | | | + | |__________-__________| | + | _____| + | _______________ | CN | + | | PROM | |_____| + | > SOCKET | | + | |_______________| 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 | + | _______________ _______________ | + | |o|o|o|o|o|o|o|o| | SW1 || SW2 || + | |o|o|o|o|o|o|o|o| |_______________||_______________|| + |___ 2 3 4 5 7 E E R Node ID IOB__|__MEM____| + | \ IRQ / T T O | + |__________________1_2_M______________________| + +Legend:: + + COM90C65: ARCnet Probe + S1 1-8: Node ID Select + S2 1-3: I/O Base Address Select + 4-6: Memory Base Address Select + 7-8: RAM Offset Select + ET1, ET2 Extended Timeout Select + ROM ROM Enable Select + CN RG62 Coax Connector + STAR| BUS | T/P Three fields for placing a sign (colored circle) + indicating the topology of the card + +Setting one of the switches to Off means "1", On means "0". + + +Setting the Node ID +^^^^^^^^^^^^^^^^^^^ + +The eight switches in group SW1 are used to set the node ID. +Each node attached to the network must have an unique node ID which +must be different from 0. +Switch 8 serves as the least significant bit (LSB). + +The node ID is the sum of the values of all switches set to "1" +These values are:: + + Switch | Value + -------|------- + 8 | 1 + 7 | 2 + 6 | 4 + 5 | 8 + 4 | 16 + 3 | 32 + 2 | 64 + 1 | 128 + +Some Examples:: + + Switch | Hex | Decimal + 1 2 3 4 5 6 7 8 | Node ID | Node ID + ----------------|---------|--------- + 0 0 0 0 0 0 0 0 | not allowed + 0 0 0 0 0 0 0 1 | 1 | 1 + 0 0 0 0 0 0 1 0 | 2 | 2 + 0 0 0 0 0 0 1 1 | 3 | 3 + . . . | | + 0 1 0 1 0 1 0 1 | 55 | 85 + . . . | | + 1 0 1 0 1 0 1 0 | AA | 170 + . . . | | + 1 1 1 1 1 1 0 1 | FD | 253 + 1 1 1 1 1 1 1 0 | FE | 254 + 1 1 1 1 1 1 1 1 | FF | 255 + + +Setting the I/O Base Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The first three switches in switch group SW2 are used to select one +of eight possible I/O Base addresses using the following table:: + + Switch | Hex I/O + 1 2 3 | Address + ------------|-------- + ON ON ON | 260 + ON ON OFF | 290 + ON OFF ON | 2E0 (Manufacturer's default) + ON OFF OFF | 2F0 + OFF ON ON | 300 + OFF ON OFF | 350 + OFF OFF ON | 380 + OFF OFF OFF | 3E0 + + +Setting the Base Memory (RAM) buffer Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The memory buffer requires 2K of a 16K block of RAM. The base of this +16K block can be located in any of eight positions. +Switches 4-6 of switch group SW2 select the Base of the 16K block. +Within that 16K address space, the buffer may be assigned any one of four +positions, determined by the offset, switches 7 and 8 of group SW2. + +:: + + Switch | Hex RAM | Hex ROM + 4 5 6 7 8 | Address | Address *) + -----------|---------|----------- + 0 0 0 0 0 | C0000 | C2000 + 0 0 0 0 1 | C0800 | C2000 + 0 0 0 1 0 | C1000 | C2000 + 0 0 0 1 1 | C1800 | C2000 + | | + 0 0 1 0 0 | C4000 | C6000 + 0 0 1 0 1 | C4800 | C6000 + 0 0 1 1 0 | C5000 | C6000 + 0 0 1 1 1 | C5800 | C6000 + | | + 0 1 0 0 0 | CC000 | CE000 + 0 1 0 0 1 | CC800 | CE000 + 0 1 0 1 0 | CD000 | CE000 + 0 1 0 1 1 | CD800 | CE000 + | | + 0 1 1 0 0 | D0000 | D2000 (Manufacturer's default) + 0 1 1 0 1 | D0800 | D2000 + 0 1 1 1 0 | D1000 | D2000 + 0 1 1 1 1 | D1800 | D2000 + | | + 1 0 0 0 0 | D4000 | D6000 + 1 0 0 0 1 | D4800 | D6000 + 1 0 0 1 0 | D5000 | D6000 + 1 0 0 1 1 | D5800 | D6000 + | | + 1 0 1 0 0 | D8000 | DA000 + 1 0 1 0 1 | D8800 | DA000 + 1 0 1 1 0 | D9000 | DA000 + 1 0 1 1 1 | D9800 | DA000 + | | + 1 1 0 0 0 | DC000 | DE000 + 1 1 0 0 1 | DC800 | DE000 + 1 1 0 1 0 | DD000 | DE000 + 1 1 0 1 1 | DD800 | DE000 + | | + 1 1 1 0 0 | E0000 | E2000 + 1 1 1 0 1 | E0800 | E2000 + 1 1 1 1 0 | E1000 | E2000 + 1 1 1 1 1 | E1800 | E2000 + + *) To enable the 8K Boot PROM install the jumper ROM. + The default is jumper ROM not installed. + + +Setting Interrupt Request Lines (IRQ) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To select a hardware interrupt level set one (only one!) of the jumpers +IRQ2, IRQ3, IRQ4, IRQ5 or IRQ7. The manufacturer's default is IRQ2. + + +Setting the Timeouts +^^^^^^^^^^^^^^^^^^^^ + +The two jumpers labeled ET1 and ET2 are used to determine the timeout +parameters (response and reconfiguration time). Every node in a network +must be set to the same timeout values. + +:: + + ET1 ET2 | Response Time (us) | Reconfiguration Time (ms) + --------|--------------------|-------------------------- + Off Off | 78 | 840 (Default) + Off On | 285 | 1680 + On Off | 563 | 1680 + On On | 1130 | 1680 + +On means jumper installed, Off means jumper not installed + + +16-BIT ARCNET +------------- + +The manual of my 8-Bit NONAME ARCnet Card contains another description +of a 16-Bit Coax / Twisted Pair Card. This description is incomplete, +because there are missing two pages in the manual booklet. (The table +of contents reports pages ... 2-9, 2-11, 2-12, 3-1, ... but inside +the booklet there is a different way of counting ... 2-9, 2-10, A-1, +(empty page), 3-1, ..., 3-18, A-1 (again), A-2) +Also the picture of the board layout is not as good as the picture of +8-Bit card, because there isn't any letter like "SW1" written to the +picture. + +Should somebody have such a board, please feel free to complete this +description or to send a mail to me! + +This description has been written by Juergen Seifert +using information from the Original + + "ARCnet Installation Manual" + +:: + + ___________________________________________________________________ + < _________________ _________________ | + > | SW? || SW? | | + < |_________________||_________________| | + > ____________________ | + < | | | + > | | | + < | | | + > | | | + < | | | + > | | | + < | | | + > |____________________| | + < ____| + > ____________________ | | + < | | | J1 | + > | < | | + < |____________________| ? ? ? ? ? ? |____| + > |o|o|o|o|o|o| | + < |o|o|o|o|o|o| | + > | + < __ ___________| + > | | | + <____________| |_______________________________________| + + +Setting one of the switches to Off means "1", On means "0". + + +Setting the Node ID +^^^^^^^^^^^^^^^^^^^ + +The eight switches in group SW2 are used to set the node ID. +Each node attached to the network must have an unique node ID which +must be different from 0. +Switch 8 serves as the least significant bit (LSB). + +The node ID is the sum of the values of all switches set to "1" +These values are:: + + Switch | Value + -------|------- + 8 | 1 + 7 | 2 + 6 | 4 + 5 | 8 + 4 | 16 + 3 | 32 + 2 | 64 + 1 | 128 + +Some Examples:: + + Switch | Hex | Decimal + 1 2 3 4 5 6 7 8 | Node ID | Node ID + ----------------|---------|--------- + 0 0 0 0 0 0 0 0 | not allowed + 0 0 0 0 0 0 0 1 | 1 | 1 + 0 0 0 0 0 0 1 0 | 2 | 2 + 0 0 0 0 0 0 1 1 | 3 | 3 + . . . | | + 0 1 0 1 0 1 0 1 | 55 | 85 + . . . | | + 1 0 1 0 1 0 1 0 | AA | 170 + . . . | | + 1 1 1 1 1 1 0 1 | FD | 253 + 1 1 1 1 1 1 1 0 | FE | 254 + 1 1 1 1 1 1 1 1 | FF | 255 + + +Setting the I/O Base Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The first three switches in switch group SW1 are used to select one +of eight possible I/O Base addresses using the following table:: + + Switch | Hex I/O + 3 2 1 | Address + ------------|-------- + ON ON ON | 260 + ON ON OFF | 290 + ON OFF ON | 2E0 (Manufacturer's default) + ON OFF OFF | 2F0 + OFF ON ON | 300 + OFF ON OFF | 350 + OFF OFF ON | 380 + OFF OFF OFF | 3E0 + + +Setting the Base Memory (RAM) buffer Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The memory buffer requires 2K of a 16K block of RAM. The base of this +16K block can be located in any of eight positions. +Switches 6-8 of switch group SW1 select the Base of the 16K block. +Within that 16K address space, the buffer may be assigned any one of four +positions, determined by the offset, switches 4 and 5 of group SW1:: + + Switch | Hex RAM | Hex ROM + 8 7 6 5 4 | Address | Address + -----------|---------|----------- + 0 0 0 0 0 | C0000 | C2000 + 0 0 0 0 1 | C0800 | C2000 + 0 0 0 1 0 | C1000 | C2000 + 0 0 0 1 1 | C1800 | C2000 + | | + 0 0 1 0 0 | C4000 | C6000 + 0 0 1 0 1 | C4800 | C6000 + 0 0 1 1 0 | C5000 | C6000 + 0 0 1 1 1 | C5800 | C6000 + | | + 0 1 0 0 0 | CC000 | CE000 + 0 1 0 0 1 | CC800 | CE000 + 0 1 0 1 0 | CD000 | CE000 + 0 1 0 1 1 | CD800 | CE000 + | | + 0 1 1 0 0 | D0000 | D2000 (Manufacturer's default) + 0 1 1 0 1 | D0800 | D2000 + 0 1 1 1 0 | D1000 | D2000 + 0 1 1 1 1 | D1800 | D2000 + | | + 1 0 0 0 0 | D4000 | D6000 + 1 0 0 0 1 | D4800 | D6000 + 1 0 0 1 0 | D5000 | D6000 + 1 0 0 1 1 | D5800 | D6000 + | | + 1 0 1 0 0 | D8000 | DA000 + 1 0 1 0 1 | D8800 | DA000 + 1 0 1 1 0 | D9000 | DA000 + 1 0 1 1 1 | D9800 | DA000 + | | + 1 1 0 0 0 | DC000 | DE000 + 1 1 0 0 1 | DC800 | DE000 + 1 1 0 1 0 | DD000 | DE000 + 1 1 0 1 1 | DD800 | DE000 + | | + 1 1 1 0 0 | E0000 | E2000 + 1 1 1 0 1 | E0800 | E2000 + 1 1 1 1 0 | E1000 | E2000 + 1 1 1 1 1 | E1800 | E2000 + + +Setting Interrupt Request Lines (IRQ) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +?????????????????????????????????????? + + +Setting the Timeouts +^^^^^^^^^^^^^^^^^^^^ + +?????????????????????????????????????? + + +8-bit cards ("Made in Taiwan R.O.C.") +------------------------------------- + + - from Vojtech Pavlik + +I have named this ARCnet card "NONAME", since I got only the card with +no manual at all and the only text identifying the manufacturer is +"MADE IN TAIWAN R.O.C" printed on the card. + +:: + + ____________________________________________________________ + | 1 2 3 4 5 6 7 8 | + | |o|o| JP1 o|o|o|o|o|o|o|o| ON | + | + o|o|o|o|o|o|o|o| ___| + | _____________ o|o|o|o|o|o|o|o| OFF _____ | | ID7 + | | | SW1 | | | | ID6 + | > RAM (2k) | ____________________ | H | | S | ID5 + | |_____________| | || y | | W | ID4 + | | || b | | 2 | ID3 + | | || r | | | ID2 + | | || i | | | ID1 + | | 90C65 || d | |___| ID0 + | SW3 | || | | + | |o|o|o|o|o|o|o|o| ON | || I | | + | |o|o|o|o|o|o|o|o| | || C | | + | |o|o|o|o|o|o|o|o| OFF |____________________|| | _____| + | 1 2 3 4 5 6 7 8 | | | |___ + | ______________ | | | BNC |___| + | | | |_____| |_____| + | > EPROM SOCKET | | + | |______________| | + | ______________| + | | + |_____________________________________________| + +Legend:: + + 90C65 ARCNET Chip + SW1 1-5: Base Memory Address Select + 6-8: Base I/O Address Select + SW2 1-8: Node ID Select (ID0-ID7) + SW3 1-5: IRQ Select + 6-7: Extra Timeout + 8 : ROM Enable + JP1 Led connector + BNC Coax connector + +Although the jumpers SW1 and SW3 are marked SW, not JP, they are jumpers, not +switches. + +Setting the jumpers to ON means connecting the upper two pins, off the bottom +two - or - in case of IRQ setting, connecting none of them at all. + +Setting the Node ID +^^^^^^^^^^^^^^^^^^^ + +The eight switches in SW2 are used to set the node ID. Each node attached +to the network must have an unique node ID which must not be 0. +Switch 1 (ID0) serves as the least significant bit (LSB). + +Setting one of the switches to Off means "1", On means "0". + +The node ID is the sum of the values of all switches set to "1" +These values are:: + + Switch | Label | Value + -------|-------|------- + 1 | ID0 | 1 + 2 | ID1 | 2 + 3 | ID2 | 4 + 4 | ID3 | 8 + 5 | ID4 | 16 + 6 | ID5 | 32 + 7 | ID6 | 64 + 8 | ID7 | 128 + +Some Examples:: + + Switch | Hex | Decimal + 8 7 6 5 4 3 2 1 | Node ID | Node ID + ----------------|---------|--------- + 0 0 0 0 0 0 0 0 | not allowed + 0 0 0 0 0 0 0 1 | 1 | 1 + 0 0 0 0 0 0 1 0 | 2 | 2 + 0 0 0 0 0 0 1 1 | 3 | 3 + . . . | | + 0 1 0 1 0 1 0 1 | 55 | 85 + . . . | | + 1 0 1 0 1 0 1 0 | AA | 170 + . . . | | + 1 1 1 1 1 1 0 1 | FD | 253 + 1 1 1 1 1 1 1 0 | FE | 254 + 1 1 1 1 1 1 1 1 | FF | 255 + + +Setting the I/O Base Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The last three switches in switch block SW1 are used to select one +of eight possible I/O Base addresses using the following table:: + + + Switch | Hex I/O + 6 7 8 | Address + ------------|-------- + ON ON ON | 260 + OFF ON ON | 290 + ON OFF ON | 2E0 (Manufacturer's default) + OFF OFF ON | 2F0 + ON ON OFF | 300 + OFF ON OFF | 350 + ON OFF OFF | 380 + OFF OFF OFF | 3E0 + + +Setting the Base Memory (RAM) buffer Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The memory buffer (RAM) requires 2K. The base of this buffer can be +located in any of eight positions. The address of the Boot Prom is +memory base + 0x2000. + +Jumpers 3-5 of jumper block SW1 select the Memory Base address. + +:: + + Switch | Hex RAM | Hex ROM + 1 2 3 4 5 | Address | Address *) + --------------------|---------|----------- + ON ON ON ON ON | C0000 | C2000 + ON ON OFF ON ON | C4000 | C6000 + ON ON ON OFF ON | CC000 | CE000 + ON ON OFF OFF ON | D0000 | D2000 (Manufacturer's default) + ON ON ON ON OFF | D4000 | D6000 + ON ON OFF ON OFF | D8000 | DA000 + ON ON ON OFF OFF | DC000 | DE000 + ON ON OFF OFF OFF | E0000 | E2000 + + *) To enable the Boot ROM set the jumper 8 of jumper block SW3 to position ON. + +The jumpers 1 and 2 probably add 0x0800, 0x1000 and 0x1800 to RAM adders. + +Setting the Interrupt Line +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Jumpers 1-5 of the jumper block SW3 control the IRQ level:: + + Jumper | IRQ + 1 2 3 4 5 | + ---------------------------- + ON OFF OFF OFF OFF | 2 + OFF ON OFF OFF OFF | 3 + OFF OFF ON OFF OFF | 4 + OFF OFF OFF ON OFF | 5 + OFF OFF OFF OFF ON | 7 + + +Setting the Timeout Parameters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The jumpers 6-7 of the jumper block SW3 are used to determine the timeout +parameters. These two jumpers are normally left in the OFF position. + + + +(Generic Model 9058) +-------------------- + - from Andrew J. Kroll + - Sorry this sat in my to-do box for so long, Andrew! (yikes - over a + year!) + +:: + + _____ + | < + | .---' + ________________________________________________________________ | | + | | SW2 | | | + | ___________ |_____________| | | + | | | 1 2 3 4 5 6 ___| | + | > 6116 RAM | _________ 8 | | | + | |___________| |20MHzXtal| 7 | | | + | |_________| __________ 6 | S | | + | 74LS373 | |- 5 | W | | + | _________ | E |- 4 | | | + | >_______| ______________|..... P |- 3 | 3 | | + | | | : O |- 2 | | | + | | | : X |- 1 |___| | + | ________________ | | : Y |- | | + | | SW1 | | SL90C65 | : |- | | + | |________________| | | : B |- | | + | 1 2 3 4 5 6 7 8 | | : O |- | | + | |_________o____|..../ A |- _______| | + | ____________________ | R |- | |------, + | | | | D |- | BNC | # | + | > 2764 PROM SOCKET | |__________|- |_______|------' + | |____________________| _________ | | + | >________| <- 74LS245 | | + | | | + |___ ______________| | + |H H H H H H H H H H H H H H H H H H H H H H H| | | + |U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U| | | + \| + +Legend:: + + SL90C65 ARCNET Controller / Transceiver /Logic + SW1 1-5: IRQ Select + 6: ET1 + 7: ET2 + 8: ROM ENABLE + SW2 1-3: Memory Buffer/PROM Address + 3-6: I/O Address Map + SW3 1-8: Node ID Select + BNC BNC RG62/U Connection + *I* have had success using RG59B/U with *NO* terminators! + What gives?! + +SW1: Timeouts, Interrupt and ROM +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To select a hardware interrupt level set one (only one!) of the dip switches +up (on) SW1...(switches 1-5) +IRQ3, IRQ4, IRQ5, IRQ7, IRQ2. The Manufacturer's default is IRQ2. + +The switches on SW1 labeled EXT1 (switch 6) and EXT2 (switch 7) +are used to determine the timeout parameters. These two dip switches +are normally left off (down). + + To enable the 8K Boot PROM position SW1 switch 8 on (UP) labeled ROM. + The default is jumper ROM not installed. + + +Setting the I/O Base Address +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The last three switches in switch group SW2 are used to select one +of eight possible I/O Base addresses using the following table:: + + + Switch | Hex I/O + 4 5 6 | Address + -------|-------- + 0 0 0 | 260 + 0 0 1 | 290 + 0 1 0 | 2E0 (Manufacturer's default) + 0 1 1 | 2F0 + 1 0 0 | 300 + 1 0 1 | 350 + 1 1 0 | 380 + 1 1 1 | 3E0 + + +Setting the Base Memory Address (RAM & ROM) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The memory buffer requires 2K of a 16K block of RAM. The base of this +16K block can be located in any of eight positions. +Switches 1-3 of switch group SW2 select the Base of the 16K block. +(0 = DOWN, 1 = UP) +I could, however, only verify two settings... + + +:: + + Switch| Hex RAM | Hex ROM + 1 2 3 | Address | Address + ------|---------|----------- + 0 0 0 | E0000 | E2000 + 0 0 1 | D0000 | D2000 (Manufacturer's default) + 0 1 0 | ????? | ????? + 0 1 1 | ????? | ????? + 1 0 0 | ????? | ????? + 1 0 1 | ????? | ????? + 1 1 0 | ????? | ????? + 1 1 1 | ????? | ????? + + +Setting the Node ID +^^^^^^^^^^^^^^^^^^^ + +The eight switches in group SW3 are used to set the node ID. +Each node attached to the network must have an unique node ID which +must be different from 0. +Switch 1 serves as the least significant bit (LSB). +switches in the DOWN position are OFF (0) and in the UP position are ON (1) + +The node ID is the sum of the values of all switches set to "1" +These values are:: + + Switch | Value + -------|------- + 1 | 1 + 2 | 2 + 3 | 4 + 4 | 8 + 5 | 16 + 6 | 32 + 7 | 64 + 8 | 128 + +Some Examples:: + + Switch# | Hex | Decimal + 8 7 6 5 4 3 2 1 | Node ID | Node ID + ----------------|---------|--------- + 0 0 0 0 0 0 0 0 | not allowed <-. + 0 0 0 0 0 0 0 1 | 1 | 1 | + 0 0 0 0 0 0 1 0 | 2 | 2 | + 0 0 0 0 0 0 1 1 | 3 | 3 | + . . . | | | + 0 1 0 1 0 1 0 1 | 55 | 85 | + . . . | | + Don't use 0 or 255! + 1 0 1 0 1 0 1 0 | AA | 170 | + . . . | | | + 1 1 1 1 1 1 0 1 | FD | 253 | + 1 1 1 1 1 1 1 0 | FE | 254 | + 1 1 1 1 1 1 1 1 | FF | 255 <-' + + +Tiara +===== + +(model unknown) +--------------- + + - from Christoph Lameter + + +Here is information about my card as far as I could figure it out:: + + + ----------------------------------------------- tiara + Tiara LanCard of Tiara Computer Systems. + + +----------------------------------------------+ + ! ! Transmitter Unit ! ! + ! +------------------+ ------- + ! MEM Coax Connector + ! ROM 7654321 <- I/O ------- + ! : : +--------+ ! + ! : : ! 90C66LJ! +++ + ! : : ! ! !D Switch to set + ! : : ! ! !I the Nodenumber + ! : : +--------+ !P + ! !++ + ! 234567 <- IRQ ! + +------------!!!!!!!!!!!!!!!!!!!!!!!!--------+ + !!!!!!!!!!!!!!!!!!!!!!!! + +- 0 = Jumper Installed +- 1 = Open + +Top Jumper line Bit 7 = ROM Enable 654=Memory location 321=I/O + +Settings for Memory Location (Top Jumper Line) + +=== ================ +456 Address selected +=== ================ +000 C0000 +001 C4000 +010 CC000 +011 D0000 +100 D4000 +101 D8000 +110 DC000 +111 E0000 +=== ================ + +Settings for I/O Address (Top Jumper Line) + +=== ==== +123 Port +=== ==== +000 260 +001 290 +010 2E0 +011 2F0 +100 300 +101 350 +110 380 +111 3E0 +=== ==== + +Settings for IRQ Selection (Lower Jumper Line) + +====== ===== +234567 +====== ===== +011111 IRQ 2 +101111 IRQ 3 +110111 IRQ 4 +111011 IRQ 5 +111110 IRQ 7 +====== ===== + +Other Cards +=========== + +I have no information on other models of ARCnet cards at the moment. Please +send any and all info to: + + apenwarr@worldvisions.ca + +Thanks. diff --git a/Documentation/networking/arcnet.rst b/Documentation/networking/arcnet.rst new file mode 100644 index 000000000..82fce606c --- /dev/null +++ b/Documentation/networking/arcnet.rst @@ -0,0 +1,594 @@ +.. SPDX-License-Identifier: GPL-2.0 + +====== +ARCnet +====== + +.. note:: + + See also arcnet-hardware.txt in this directory for jumper-setting + and cabling information if you're like many of us and didn't happen to get a + manual with your ARCnet card. + +Since no one seems to listen to me otherwise, perhaps a poem will get your +attention:: + + This driver's getting fat and beefy, + But my cat is still named Fifi. + +Hmm, I think I'm allowed to call that a poem, even though it's only two +lines. Hey, I'm in Computer Science, not English. Give me a break. + +The point is: I REALLY REALLY REALLY REALLY REALLY want to hear from you if +you test this and get it working. Or if you don't. Or anything. + +ARCnet 0.32 ALPHA first made it into the Linux kernel 1.1.80 - this was +nice, but after that even FEWER people started writing to me because they +didn't even have to install the patch. + +Come on, be a sport! Send me a success report! + +(hey, that was even better than my original poem... this is getting bad!) + + +.. warning:: + + If you don't e-mail me about your success/failure soon, I may be forced to + start SINGING. And we don't want that, do we? + + (You know, it might be argued that I'm pushing this point a little too much. + If you think so, why not flame me in a quick little e-mail? Please also + include the type of card(s) you're using, software, size of network, and + whether it's working or not.) + + My e-mail address is: apenwarr@worldvisions.ca + +These are the ARCnet drivers for Linux. + +This new release (2.91) has been put together by David Woodhouse +, in an attempt to tidy up the driver after adding support +for yet another chipset. Now the generic support has been separated from the +individual chipset drivers, and the source files aren't quite so packed with +#ifdefs! I've changed this file a bit, but kept it in the first person from +Avery, because I didn't want to completely rewrite it. + +The previous release resulted from many months of on-and-off effort from me +(Avery Pennarun), many bug reports/fixes and suggestions from others, and in +particular a lot of input and coding from Tomasz Motylewski. Starting with +ARCnet 2.10 ALPHA, Tomasz's all-new-and-improved RFC1051 support has been +included and seems to be working fine! + + +Where do I discuss these drivers? +--------------------------------- + +Tomasz has been so kind as to set up a new and improved mailing list. +Subscribe by sending a message with the BODY "subscribe linux-arcnet YOUR +REAL NAME" to listserv@tichy.ch.uj.edu.pl. Then, to submit messages to the +list, mail to linux-arcnet@tichy.ch.uj.edu.pl. + +There are archives of the mailing list at: + + http://epistolary.org/mailman/listinfo.cgi/arcnet + +The people on linux-net@vger.kernel.org (now defunct, replaced by +netdev@vger.kernel.org) have also been known to be very helpful, especially +when we're talking about ALPHA Linux kernels that may or may not work right +in the first place. + + +Other Drivers and Info +---------------------- + +You can try my ARCNET page on the World Wide Web at: + + http://www.qis.net/~jschmitz/arcnet/ + +Also, SMC (one of the companies that makes ARCnet cards) has a WWW site you +might be interested in, which includes several drivers for various cards +including ARCnet. Try: + + http://www.smc.com/ + +Performance Technologies makes various network software that supports +ARCnet: + + http://www.perftech.com/ or ftp to ftp.perftech.com. + +Novell makes a networking stack for DOS which includes ARCnet drivers. Try +FTPing to ftp.novell.com. + +You can get the Crynwr packet driver collection (including arcether.com, the +one you'll want to use with ARCnet cards) from +oak.oakland.edu:/simtel/msdos/pktdrvr. It won't work perfectly on a 386+ +without patches, though, and also doesn't like several cards. Fixed +versions are available on my WWW page, or via e-mail if you don't have WWW +access. + + +Installing the Driver +--------------------- + +All you will need to do in order to install the driver is:: + + make config + (be sure to choose ARCnet in the network devices + and at least one chipset driver.) + make clean + make zImage + +If you obtained this ARCnet package as an upgrade to the ARCnet driver in +your current kernel, you will need to first copy arcnet.c over the one in +the linux/drivers/net directory. + +You will know the driver is installed properly if you get some ARCnet +messages when you reboot into the new Linux kernel. + +There are four chipset options: + + 1. Standard ARCnet COM90xx chipset. + +This is the normal ARCnet card, which you've probably got. This is the only +chipset driver which will autoprobe if not told where the card is. +It following options on the command line:: + + com90xx=[[,[,]]][,] | + +If you load the chipset support as a module, the options are:: + + io= irq= shmem= device= + +To disable the autoprobe, just specify "com90xx=" on the kernel command line. +To specify the name alone, but allow autoprobe, just put "com90xx=" + + 2. ARCnet COM20020 chipset. + +This is the new chipset from SMC with support for promiscuous mode (packet +sniffing), extra diagnostic information, etc. Unfortunately, there is no +sensible method of autoprobing for these cards. You must specify the I/O +address on the kernel command line. + +The command line options are:: + + com20020=[,[,[,backplane[,CKP[,timeout]]]]][,name] + +If you load the chipset support as a module, the options are:: + + io= irq= node= backplane= clock= + timeout= device= + +The COM20020 chipset allows you to set the node ID in software, overriding the +default which is still set in DIP switches on the card. If you don't have the +COM20020 data sheets, and you don't know what the other three options refer +to, then they won't interest you - forget them. + + 3. ARCnet COM90xx chipset in IO-mapped mode. + +This will also work with the normal ARCnet cards, but doesn't use the shared +memory. It performs less well than the above driver, but is provided in case +you have a card which doesn't support shared memory, or (strangely) in case +you have so many ARCnet cards in your machine that you run out of shmem slots. +If you don't give the IO address on the kernel command line, then the driver +will not find the card. + +The command line options are:: + + com90io=[,][,] + +If you load the chipset support as a module, the options are: + io= irq= device= + + 4. ARCnet RIM I cards. + +These are COM90xx chips which are _completely_ memory mapped. The support for +these is not tested. If you have one, please mail the author with a success +report. All options must be specified, except the device name. +Command line options:: + + arcrimi=,,[,] + +If you load the chipset support as a module, the options are:: + + shmem= irq= node= device= + + +Loadable Module Support +----------------------- + +Configure and rebuild Linux. When asked, answer 'm' to "Generic ARCnet +support" and to support for your ARCnet chipset if you want to use the +loadable module. You can also say 'y' to "Generic ARCnet support" and 'm' +to the chipset support if you wish. + +:: + + make config + make clean + make zImage + make modules + +If you're using a loadable module, you need to use insmod to load it, and +you can specify various characteristics of your card on the command +line. (In recent versions of the driver, autoprobing is much more reliable +and works as a module, so most of this is now unnecessary.) + +For example:: + + cd /usr/src/linux/modules + insmod arcnet.o + insmod com90xx.o + insmod com20020.o io=0x2e0 device=eth1 + + +Using the Driver +---------------- + +If you build your kernel with ARCnet COM90xx support included, it should +probe for your card automatically when you boot. If you use a different +chipset driver complied into the kernel, you must give the necessary options +on the kernel command line, as detailed above. + +Go read the NET-2-HOWTO and ETHERNET-HOWTO for Linux; they should be +available where you picked up this driver. Think of your ARCnet as a +souped-up (or down, as the case may be) Ethernet card. + +By the way, be sure to change all references from "eth0" to "arc0" in the +HOWTOs. Remember that ARCnet isn't a "true" Ethernet, and the device name +is DIFFERENT. + + +Multiple Cards in One Computer +------------------------------ + +Linux has pretty good support for this now, but since I've been busy, the +ARCnet driver has somewhat suffered in this respect. COM90xx support, if +compiled into the kernel, will (try to) autodetect all the installed cards. + +If you have other cards, with support compiled into the kernel, then you can +just repeat the options on the kernel command line, e.g.:: + + LILO: linux com20020=0x2e0 com20020=0x380 com90io=0x260 + +If you have the chipset support built as a loadable module, then you need to +do something like this:: + + insmod -o arc0 com90xx + insmod -o arc1 com20020 io=0x2e0 + insmod -o arc2 com90xx + +The ARCnet drivers will now sort out their names automatically. + + +How do I get it to work with...? +-------------------------------- + +NFS: + Should be fine linux->linux, just pretend you're using Ethernet cards. + oak.oakland.edu:/simtel/msdos/nfs has some nice DOS clients. There + is also a DOS-based NFS server called SOSS. It doesn't multitask + quite the way Linux does (actually, it doesn't multitask AT ALL) but + you never know what you might need. + + With AmiTCP (and possibly others), you may need to set the following + options in your Amiga nfstab: MD 1024 MR 1024 MW 1024 + (Thanks to Christian Gottschling + for this.) + + Probably these refer to maximum NFS data/read/write block sizes. I + don't know why the defaults on the Amiga didn't work; write to me if + you know more. + +DOS: + If you're using the freeware arcether.com, you might want to install + the driver patch from my web page. It helps with PC/TCP, and also + can get arcether to load if it timed out too quickly during + initialization. In fact, if you use it on a 386+ you REALLY need + the patch, really. + +Windows: + See DOS :) Trumpet Winsock works fine with either the Novell or + Arcether client, assuming you remember to load winpkt of course. + +LAN Manager and Windows for Workgroups: + These programs use protocols that + are incompatible with the Internet standard. They try to pretend + the cards are Ethernet, and confuse everyone else on the network. + + However, v2.00 and higher of the Linux ARCnet driver supports this + protocol via the 'arc0e' device. See the section on "Multiprotocol + Support" for more information. + + Using the freeware Samba server and clients for Linux, you can now + interface quite nicely with TCP/IP-based WfWg or Lan Manager + networks. + +Windows 95: + Tools are included with Win95 that let you use either the LANMAN + style network drivers (NDIS) or Novell drivers (ODI) to handle your + ARCnet packets. If you use ODI, you'll need to use the 'arc0' + device with Linux. If you use NDIS, then try the 'arc0e' device. + See the "Multiprotocol Support" section below if you need arc0e, + you're completely insane, and/or you need to build some kind of + hybrid network that uses both encapsulation types. + +OS/2: + I've been told it works under Warp Connect with an ARCnet driver from + SMC. You need to use the 'arc0e' interface for this. If you get + the SMC driver to work with the TCP/IP stuff included in the + "normal" Warp Bonus Pack, let me know. + + ftp.microsoft.com also has a freeware "Lan Manager for OS/2" client + which should use the same protocol as WfWg does. I had no luck + installing it under Warp, however. Please mail me with any results. + +NetBSD/AmiTCP: + These use an old version of the Internet standard ARCnet + protocol (RFC1051) which is compatible with the Linux driver v2.10 + ALPHA and above using the arc0s device. (See "Multiprotocol ARCnet" + below.) ** Newer versions of NetBSD apparently support RFC1201. + + +Using Multiprotocol ARCnet +-------------------------- + +The ARCnet driver v2.10 ALPHA supports three protocols, each on its own +"virtual network device": + + ====== =============================================================== + arc0 RFC1201 protocol, the official Internet standard which just + happens to be 100% compatible with Novell's TRXNET driver. + Version 1.00 of the ARCnet driver supported _only_ this + protocol. arc0 is the fastest of the three protocols (for + whatever reason), and allows larger packets to be used + because it supports RFC1201 "packet splitting" operations. + Unless you have a specific need to use a different protocol, + I strongly suggest that you stick with this one. + + arc0e "Ethernet-Encapsulation" which sends packets over ARCnet + that are actually a lot like Ethernet packets, including the + 6-byte hardware addresses. This protocol is compatible with + Microsoft's NDIS ARCnet driver, like the one in WfWg and + LANMAN. Because the MTU of 493 is actually smaller than the + one "required" by TCP/IP (576), there is a chance that some + network operations will not function properly. The Linux + TCP/IP layer can compensate in most cases, however, by + automatically fragmenting the TCP/IP packets to make them + fit. arc0e also works slightly more slowly than arc0, for + reasons yet to be determined. (Probably it's the smaller + MTU that does it.) + + arc0s The "[s]imple" RFC1051 protocol is the "previous" Internet + standard that is completely incompatible with the new + standard. Some software today, however, continues to + support the old standard (and only the old standard) + including NetBSD and AmiTCP. RFC1051 also does not support + RFC1201's packet splitting, and the MTU of 507 is still + smaller than the Internet "requirement," so it's quite + possible that you may run into problems. It's also slower + than RFC1201 by about 25%, for the same reason as arc0e. + + The arc0s support was contributed by Tomasz Motylewski + and modified somewhat by me. Bugs are probably my fault. + ====== =============================================================== + +You can choose not to compile arc0e and arc0s into the driver if you want - +this will save you a bit of memory and avoid confusion when eg. trying to +use the "NFS-root" stuff in recent Linux kernels. + +The arc0e and arc0s devices are created automatically when you first +ifconfig the arc0 device. To actually use them, though, you need to also +ifconfig the other virtual devices you need. There are a number of ways you +can set up your network then: + + +1. Single Protocol. + + This is the simplest way to configure your network: use just one of the + two available protocols. As mentioned above, it's a good idea to use + only arc0 unless you have a good reason (like some other software, ie. + WfWg, that only works with arc0e). + + If you need only arc0, then the following commands should get you going:: + + ifconfig arc0 MY.IP.ADD.RESS + route add MY.IP.ADD.RESS arc0 + route add -net SUB.NET.ADD.RESS arc0 + [add other local routes here] + + If you need arc0e (and only arc0e), it's a little different:: + + ifconfig arc0 MY.IP.ADD.RESS + ifconfig arc0e MY.IP.ADD.RESS + route add MY.IP.ADD.RESS arc0e + route add -net SUB.NET.ADD.RESS arc0e + + arc0s works much the same way as arc0e. + + +2. More than one protocol on the same wire. + + Now things start getting confusing. To even try it, you may need to be + partly crazy. Here's what *I* did. :) Note that I don't include arc0s in + my home network; I don't have any NetBSD or AmiTCP computers, so I only + use arc0s during limited testing. + + I have three computers on my home network; two Linux boxes (which prefer + RFC1201 protocol, for reasons listed above), and one XT that can't run + Linux but runs the free Microsoft LANMAN Client instead. + + Worse, one of the Linux computers (freedom) also has a modem and acts as + a router to my Internet provider. The other Linux box (insight) also has + its own IP address and needs to use freedom as its default gateway. The + XT (patience), however, does not have its own Internet IP address and so + I assigned it one on a "private subnet" (as defined by RFC1597). + + To start with, take a simple network with just insight and freedom. + Insight needs to: + + - talk to freedom via RFC1201 (arc0) protocol, because I like it + more and it's faster. + - use freedom as its Internet gateway. + + That's pretty easy to do. Set up insight like this:: + + ifconfig arc0 insight + route add insight arc0 + route add freedom arc0 /* I would use the subnet here (like I said + to in "single protocol" above), + but the rest of the subnet + unfortunately lies across the PPP + link on freedom, which confuses + things. */ + route add default gw freedom + + And freedom gets configured like so:: + + ifconfig arc0 freedom + route add freedom arc0 + route add insight arc0 + /* and default gateway is configured by pppd */ + + Great, now insight talks to freedom directly on arc0, and sends packets + to the Internet through freedom. If you didn't know how to do the above, + you should probably stop reading this section now because it only gets + worse. + + Now, how do I add patience into the network? It will be using LANMAN + Client, which means I need the arc0e device. It needs to be able to talk + to both insight and freedom, and also use freedom as a gateway to the + Internet. (Recall that patience has a "private IP address" which won't + work on the Internet; that's okay, I configured Linux IP masquerading on + freedom for this subnet). + + So patience (necessarily; I don't have another IP number from my + provider) has an IP address on a different subnet than freedom and + insight, but needs to use freedom as an Internet gateway. Worse, most + DOS networking programs, including LANMAN, have braindead networking + schemes that rely completely on the netmask and a 'default gateway' to + determine how to route packets. This means that to get to freedom or + insight, patience WILL send through its default gateway, regardless of + the fact that both freedom and insight (courtesy of the arc0e device) + could understand a direct transmission. + + I compensate by giving freedom an extra IP address - aliased 'gatekeeper' - + that is on my private subnet, the same subnet that patience is on. I + then define gatekeeper to be the default gateway for patience. + + To configure freedom (in addition to the commands above):: + + ifconfig arc0e gatekeeper + route add gatekeeper arc0e + route add patience arc0e + + This way, freedom will send all packets for patience through arc0e, + giving its IP address as gatekeeper (on the private subnet). When it + talks to insight or the Internet, it will use its "freedom" Internet IP + address. + + You will notice that we haven't configured the arc0e device on insight. + This would work, but is not really necessary, and would require me to + assign insight another special IP number from my private subnet. Since + both insight and patience are using freedom as their default gateway, the + two can already talk to each other. + + It's quite fortunate that I set things up like this the first time (cough + cough) because it's really handy when I boot insight into DOS. There, it + runs the Novell ODI protocol stack, which only works with RFC1201 ARCnet. + In this mode it would be impossible for insight to communicate directly + with patience, since the Novell stack is incompatible with Microsoft's + Ethernet-Encap. Without changing any settings on freedom or patience, I + simply set freedom as the default gateway for insight (now in DOS, + remember) and all the forwarding happens "automagically" between the two + hosts that would normally not be able to communicate at all. + + For those who like diagrams, I have created two "virtual subnets" on the + same physical ARCnet wire. You can picture it like this:: + + + [RFC1201 NETWORK] [ETHER-ENCAP NETWORK] + (registered Internet subnet) (RFC1597 private subnet) + + (IP Masquerade) + /---------------\ * /---------------\ + | | * | | + | +-Freedom-*-Gatekeeper-+ | + | | | * | | + \-------+-------/ | * \-------+-------/ + | | | + Insight | Patience + (Internet) + + + +It works: what now? +------------------- + +Send mail describing your setup, preferably including driver version, kernel +version, ARCnet card model, CPU type, number of systems on your network, and +list of software in use to me at the following address: + + apenwarr@worldvisions.ca + +I do send (sometimes automated) replies to all messages I receive. My email +can be weird (and also usually gets forwarded all over the place along the +way to me), so if you don't get a reply within a reasonable time, please +resend. + + +It doesn't work: what now? +-------------------------- + +Do the same as above, but also include the output of the ifconfig and route +commands, as well as any pertinent log entries (ie. anything that starts +with "arcnet:" and has shown up since the last reboot) in your mail. + +If you want to try fixing it yourself (I strongly recommend that you mail me +about the problem first, since it might already have been solved) you may +want to try some of the debug levels available. For heavy testing on +D_DURING or more, it would be a REALLY good idea to kill your klogd daemon +first! D_DURING displays 4-5 lines for each packet sent or received. D_TX, +D_RX, and D_SKB actually DISPLAY each packet as it is sent or received, +which is obviously quite big. + +Starting with v2.40 ALPHA, the autoprobe routines have changed +significantly. In particular, they won't tell you why the card was not +found unless you turn on the D_INIT_REASONS debugging flag. + +Once the driver is running, you can run the arcdump shell script (available +from me or in the full ARCnet package, if you have it) as root to list the +contents of the arcnet buffers at any time. To make any sense at all out of +this, you should grab the pertinent RFCs. (some are listed near the top of +arcnet.c). arcdump assumes your card is at 0xD0000. If it isn't, edit the +script. + +Buffers 0 and 1 are used for receiving, and Buffers 2 and 3 are for sending. +Ping-pong buffers are implemented both ways. + +If your debug level includes D_DURING and you did NOT define SLOW_XMIT_COPY, +the buffers are cleared to a constant value of 0x42 every time the card is +reset (which should only happen when you do an ifconfig up, or when Linux +decides that the driver is broken). During a transmit, unused parts of the +buffer will be cleared to 0x42 as well. This is to make it easier to figure +out which bytes are being used by a packet. + +You can change the debug level without recompiling the kernel by typing:: + + ifconfig arc0 down metric 1xxx + /etc/rc.d/rc.inet1 + +where "xxx" is the debug level you want. For example, "metric 1015" would put +you at debug level 15. Debug level 7 is currently the default. + +Note that the debug level is (starting with v1.90 ALPHA) a binary +combination of different debug flags; so debug level 7 is really 1+2+4 or +D_NORMAL+D_EXTRA+D_INIT. To include D_DURING, you would add 16 to this, +resulting in debug level 23. + +If you don't understand that, you probably don't want to know anyway. +E-mail me about your problem. + + +I want to send money: what now? +------------------------------- + +Go take a nap or something. You'll feel better in the morning. diff --git a/Documentation/networking/atm.rst b/Documentation/networking/atm.rst new file mode 100644 index 000000000..c1df8c038 --- /dev/null +++ b/Documentation/networking/atm.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=== +ATM +=== + +In order to use anything but the most primitive functions of ATM, +several user-mode programs are required to assist the kernel. These +programs and related material can be found via the ATM on Linux Web +page at http://linux-atm.sourceforge.net/ + +If you encounter problems with ATM, please report them on the ATM +on Linux mailing list. Subscription information, archives, etc., +can be found on http://linux-atm.sourceforge.net/ diff --git a/Documentation/networking/ax25.rst b/Documentation/networking/ax25.rst new file mode 100644 index 000000000..f060cfb14 --- /dev/null +++ b/Documentation/networking/ax25.rst @@ -0,0 +1,16 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===== +AX.25 +===== + +To use the amateur radio protocols within Linux you will need to get a +suitable copy of the AX.25 Utilities. More detailed information about +AX.25, NET/ROM and ROSE, associated programs and utilities can be +found on http://www.linux-ax25.org. + +There is an active mailing list for discussing Linux amateur radio matters +called linux-hams@vger.kernel.org. To subscribe to it, send a message to +majordomo@vger.kernel.org with the words "subscribe linux-hams" in the body +of the message, the subject field is ignored. You don't need to be +subscribed to post but of course that means you might miss an answer. diff --git a/Documentation/networking/bareudp.rst b/Documentation/networking/bareudp.rst new file mode 100644 index 000000000..b9d04ee6d --- /dev/null +++ b/Documentation/networking/bareudp.rst @@ -0,0 +1,58 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================================== +Bare UDP Tunnelling Module Documentation +======================================== + +There are various L3 encapsulation standards using UDP being discussed to +leverage the UDP based load balancing capability of different networks. +MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. + +The Bareudp tunnel module provides a generic L3 encapsulation support for +tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP tunnel. + +Special Handling +---------------- +The bareudp device supports special handling for MPLS & IP as they can have +multiple ethertypes. +MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). +IP protocol can have ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6). +This special handling can be enabled only for ethertypes ETH_P_IP & ETH_P_MPLS_UC +with a flag called multiproto mode. + +Usage +------ + +1) Device creation & deletion + + a) ip link add dev bareudp0 type bareudp dstport 6635 ethertype mpls_uc + + This creates a bareudp tunnel device which tunnels L3 traffic with ethertype + 0x8847 (MPLS traffic). The destination port of the UDP header will be set to + 6635.The device will listen on UDP port 6635 to receive traffic. + + b) ip link delete bareudp0 + +2) Device creation with multiproto mode enabled + +The multiproto mode allows bareudp tunnels to handle several protocols of the +same family. It is currently only available for IP and MPLS. This mode has to +be enabled explicitly with the "multiproto" flag. + + a) ip link add dev bareudp0 type bareudp dstport 6635 ethertype ipv4 multiproto + + For an IPv4 tunnel the multiproto mode allows the tunnel to also handle + IPv6. + + b) ip link add dev bareudp0 type bareudp dstport 6635 ethertype mpls_uc multiproto + + For MPLS, the multiproto mode allows the tunnel to handle both unicast + and multicast MPLS packets. + +3) Device Usage + +The bareudp device could be used along with OVS or flower filter in TC. +The OVS or TC flower layer must set the tunnel information in SKB dst field before +sending packet buffer to the bareudp device for transmission. On reception the +bareudp device extracts and stores the tunnel information in SKB dst field before +passing the packet buffer to the network stack. diff --git a/Documentation/networking/batman-adv.rst b/Documentation/networking/batman-adv.rst new file mode 100644 index 000000000..74821d29a --- /dev/null +++ b/Documentation/networking/batman-adv.rst @@ -0,0 +1,168 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========== +batman-adv +========== + +Batman advanced is a new approach to wireless networking which does no longer +operate on the IP basis. Unlike the batman daemon, which exchanges information +using UDP packets and sets routing tables, batman-advanced operates on ISO/OSI +Layer 2 only and uses and routes (or better: bridges) Ethernet Frames. It +emulates a virtual network switch of all nodes participating. Therefore all +nodes appear to be link local, thus all higher operating protocols won't be +affected by any changes within the network. You can run almost any protocol +above batman advanced, prominent examples are: IPv4, IPv6, DHCP, IPX. + +Batman advanced was implemented as a Linux kernel driver to reduce the overhead +to a minimum. It does not depend on any (other) network driver, and can be used +on wifi as well as ethernet lan, vpn, etc ... (anything with ethernet-style +layer 2). + + +Configuration +============= + +Load the batman-adv module into your kernel:: + + $ insmod batman-adv.ko + +The module is now waiting for activation. You must add some interfaces on which +batman-adv can operate. The batman-adv soft-interface can be created using the +iproute2 tool ``ip``:: + + $ ip link add name bat0 type batadv + +To activate a given interface simply attach it to the ``bat0`` interface:: + + $ ip link set dev eth0 master bat0 + +Repeat this step for all interfaces you wish to add. Now batman-adv starts +using/broadcasting on this/these interface(s). + +To deactivate an interface you have to detach it from the "bat0" interface:: + + $ ip link set dev eth0 nomaster + +The same can also be done using the batctl interface subcommand:: + + batctl -m bat0 interface create + batctl -m bat0 interface add -M eth0 + +To detach eth0 and destroy bat0:: + + batctl -m bat0 interface del -M eth0 + batctl -m bat0 interface destroy + +There are additional settings for each batadv mesh interface, vlan and hardif +which can be modified using batctl. Detailed information about this can be found +in its manual. + +For instance, you can check the current originator interval (value +in milliseconds which determines how often batman-adv sends its broadcast +packets):: + + $ batctl -M bat0 orig_interval + 1000 + +and also change its value:: + + $ batctl -M bat0 orig_interval 3000 + +In very mobile scenarios, you might want to adjust the originator interval to a +lower value. This will make the mesh more responsive to topology changes, but +will also increase the overhead. + +Information about the current state can be accessed via the batadv generic +netlink family. batctl provides a human readable version via its debug tables +subcommands. + + +Usage +===== + +To make use of your newly created mesh, batman advanced provides a new +interface "bat0" which you should use from this point on. All interfaces added +to batman advanced are not relevant any longer because batman handles them for +you. Basically, one "hands over" the data by using the batman interface and +batman will make sure it reaches its destination. + +The "bat0" interface can be used like any other regular interface. It needs an +IP address which can be either statically configured or dynamically (by using +DHCP or similar services):: + + NodeA: ip link set up dev bat0 + NodeA: ip addr add 192.168.0.1/24 dev bat0 + + NodeB: ip link set up dev bat0 + NodeB: ip addr add 192.168.0.2/24 dev bat0 + NodeB: ping 192.168.0.1 + +Note: In order to avoid problems remove all IP addresses previously assigned to +interfaces now used by batman advanced, e.g.:: + + $ ip addr flush dev eth0 + + +Logging/Debugging +================= + +All error messages, warnings and information messages are sent to the kernel +log. Depending on your operating system distribution this can be read in one of +a number of ways. Try using the commands: ``dmesg``, ``logread``, or looking in +the files ``/var/log/kern.log`` or ``/var/log/syslog``. All batman-adv messages +are prefixed with "batman-adv:" So to see just these messages try:: + + $ dmesg | grep batman-adv + +When investigating problems with your mesh network, it is sometimes necessary to +see more detailed debug messages. This must be enabled when compiling the +batman-adv module. When building batman-adv as part of the kernel, use "make +menuconfig" and enable the option ``B.A.T.M.A.N. debugging`` +(``CONFIG_BATMAN_ADV_DEBUG=y``). + +Those additional debug messages can be accessed using the perf infrastructure:: + + $ trace-cmd stream -e batadv:batadv_dbg + +The additional debug output is by default disabled. It can be enabled during +run time:: + + $ batctl -m bat0 loglevel routes tt + +will enable debug messages for when routes and translation table entries change. + +Counters for different types of packets entering and leaving the batman-adv +module are available through ethtool:: + + $ ethtool --statistics bat0 + + +batctl +====== + +As batman advanced operates on layer 2, all hosts participating in the virtual +switch are completely transparent for all protocols above layer 2. Therefore +the common diagnosis tools do not work as expected. To overcome these problems, +batctl was created. At the moment the batctl contains ping, traceroute, tcpdump +and interfaces to the kernel module settings. + +For more information, please see the manpage (``man batctl``). + +batctl is available on https://www.open-mesh.org/ + + +Contact +======= + +Please send us comments, experiences, questions, anything :) + +IRC: + #batman on irc.freenode.org +Mailing-list: + b.a.t.m.a.n@open-mesh.org (optional subscription at + https://lists.open-mesh.org/mailman3/postorius/lists/b.a.t.m.a.n.lists.open-mesh.org/) + +You can also contact the Authors: + +* Marek Lindner +* Simon Wunderlich diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst new file mode 100644 index 000000000..413dca513 --- /dev/null +++ b/Documentation/networking/bonding.rst @@ -0,0 +1,2873 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================================== +Linux Ethernet Bonding Driver HOWTO +=================================== + +Latest update: 27 April 2011 + +Initial release: Thomas Davis + +Corrections, HA extensions: 2000/10/03-15: + + - Willy Tarreau + - Constantine Gavrilov + - Chad N. Tindel + - Janice Girouard + - Jay Vosburgh + +Reorganized and updated Feb 2005 by Jay Vosburgh +Added Sysfs information: 2006/04/24 + + - Mitch Williams + +Introduction +============ + +The Linux bonding driver provides a method for aggregating +multiple network interfaces into a single logical "bonded" interface. +The behavior of the bonded interfaces depends upon the mode; generally +speaking, modes provide either hot standby or load balancing services. +Additionally, link integrity monitoring may be performed. + +The bonding driver originally came from Donald Becker's +beowulf patches for kernel 2.0. It has changed quite a bit since, and +the original tools from extreme-linux and beowulf sites will not work +with this version of the driver. + +For new versions of the driver, updated userspace tools, and +who to ask for help, please follow the links at the end of this file. + +.. Table of Contents + + 1. Bonding Driver Installation + + 2. Bonding Driver Options + + 3. Configuring Bonding Devices + 3.1 Configuration with Sysconfig Support + 3.1.1 Using DHCP with Sysconfig + 3.1.2 Configuring Multiple Bonds with Sysconfig + 3.2 Configuration with Initscripts Support + 3.2.1 Using DHCP with Initscripts + 3.2.2 Configuring Multiple Bonds with Initscripts + 3.3 Configuring Bonding Manually with Ifenslave + 3.3.1 Configuring Multiple Bonds Manually + 3.4 Configuring Bonding Manually via Sysfs + 3.5 Configuration with Interfaces Support + 3.6 Overriding Configuration for Special Cases + 3.7 Configuring LACP for 802.3ad mode in a more secure way + + 4. Querying Bonding Configuration + 4.1 Bonding Configuration + 4.2 Network Configuration + + 5. Switch Configuration + + 6. 802.1q VLAN Support + + 7. Link Monitoring + 7.1 ARP Monitor Operation + 7.2 Configuring Multiple ARP Targets + 7.3 MII Monitor Operation + + 8. Potential Trouble Sources + 8.1 Adventures in Routing + 8.2 Ethernet Device Renaming + 8.3 Painfully Slow Or No Failed Link Detection By Miimon + + 9. SNMP agents + + 10. Promiscuous mode + + 11. Configuring Bonding for High Availability + 11.1 High Availability in a Single Switch Topology + 11.2 High Availability in a Multiple Switch Topology + 11.2.1 HA Bonding Mode Selection for Multiple Switch Topology + 11.2.2 HA Link Monitoring for Multiple Switch Topology + + 12. Configuring Bonding for Maximum Throughput + 12.1 Maximum Throughput in a Single Switch Topology + 12.1.1 MT Bonding Mode Selection for Single Switch Topology + 12.1.2 MT Link Monitoring for Single Switch Topology + 12.2 Maximum Throughput in a Multiple Switch Topology + 12.2.1 MT Bonding Mode Selection for Multiple Switch Topology + 12.2.2 MT Link Monitoring for Multiple Switch Topology + + 13. Switch Behavior Issues + 13.1 Link Establishment and Failover Delays + 13.2 Duplicated Incoming Packets + + 14. Hardware Specific Considerations + 14.1 IBM BladeCenter + + 15. Frequently Asked Questions + + 16. Resources and Links + + +1. Bonding Driver Installation +============================== + +Most popular distro kernels ship with the bonding driver +already available as a module. If your distro does not, or you +have need to compile bonding from source (e.g., configuring and +installing a mainline kernel from kernel.org), you'll need to perform +the following steps: + +1.1 Configure and build the kernel with bonding +----------------------------------------------- + +The current version of the bonding driver is available in the +drivers/net/bonding subdirectory of the most recent kernel source +(which is available on http://kernel.org). Most users "rolling their +own" will want to use the most recent kernel from kernel.org. + +Configure kernel with "make menuconfig" (or "make xconfig" or +"make config"), then select "Bonding driver support" in the "Network +device support" section. It is recommended that you configure the +driver as module since it is currently the only way to pass parameters +to the driver or configure more than one bonding device. + +Build and install the new kernel and modules. + +1.2 Bonding Control Utility +--------------------------- + +It is recommended to configure bonding via iproute2 (netlink) +or sysfs, the old ifenslave control utility is obsolete. + +2. Bonding Driver Options +========================= + +Options for the bonding driver are supplied as parameters to the +bonding module at load time, or are specified via sysfs. + +Module options may be given as command line arguments to the +insmod or modprobe command, but are usually specified in either the +``/etc/modprobe.d/*.conf`` configuration files, or in a distro-specific +configuration file (some of which are detailed in the next section). + +Details on bonding support for sysfs is provided in the +"Configuring Bonding Manually via Sysfs" section, below. + +The available bonding driver parameters are listed below. If a +parameter is not specified the default value is used. When initially +configuring a bond, it is recommended "tail -f /var/log/messages" be +run in a separate window to watch for bonding driver error messages. + +It is critical that either the miimon or arp_interval and +arp_ip_target parameters be specified, otherwise serious network +degradation will occur during link failures. Very few devices do not +support at least miimon, so there is really no reason not to use it. + +Options with textual values will accept either the text name +or, for backwards compatibility, the option value. E.g., +"mode=802.3ad" and "mode=4" set the same mode. + +The parameters are as follows: + +active_slave + + Specifies the new active slave for modes that support it + (active-backup, balance-alb and balance-tlb). Possible values + are the name of any currently enslaved interface, or an empty + string. If a name is given, the slave and its link must be up in order + to be selected as the new active slave. If an empty string is + specified, the current active slave is cleared, and a new active + slave is selected automatically. + + Note that this is only available through the sysfs interface. No module + parameter by this name exists. + + The normal value of this option is the name of the currently + active slave, or the empty string if there is no active slave or + the current mode does not use an active slave. + +ad_actor_sys_prio + + In an AD system, this specifies the system priority. The allowed range + is 1 - 65535. If the value is not specified, it takes 65535 as the + default value. + + This parameter has effect only in 802.3ad mode and is available through + SysFs interface. + +ad_actor_system + + In an AD system, this specifies the mac-address for the actor in + protocol packet exchanges (LACPDUs). The value cannot be a multicast + address. If the all-zeroes MAC is specified, bonding will internally + use the MAC of the bond itself. It is preferred to have the + local-admin bit set for this mac but driver does not enforce it. If + the value is not given then system defaults to using the masters' + mac address as actors' system address. + + This parameter has effect only in 802.3ad mode and is available through + SysFs interface. + +ad_select + + Specifies the 802.3ad aggregation selection logic to use. The + possible values and their effects are: + + stable or 0 + + The active aggregator is chosen by largest aggregate + bandwidth. + + Reselection of the active aggregator occurs only when all + slaves of the active aggregator are down or the active + aggregator has no slaves. + + This is the default value. + + bandwidth or 1 + + The active aggregator is chosen by largest aggregate + bandwidth. Reselection occurs if: + + - A slave is added to or removed from the bond + + - Any slave's link state changes + + - Any slave's 802.3ad association state changes + + - The bond's administrative state changes to up + + count or 2 + + The active aggregator is chosen by the largest number of + ports (slaves). Reselection occurs as described under the + "bandwidth" setting, above. + + The bandwidth and count selection policies permit failover of + 802.3ad aggregations when partial failure of the active aggregator + occurs. This keeps the aggregator with the highest availability + (either in bandwidth or in number of ports) active at all times. + + This option was added in bonding version 3.4.0. + +ad_user_port_key + + In an AD system, the port-key has three parts as shown below - + + ===== ============ + Bits Use + ===== ============ + 00 Duplex + 01-05 Speed + 06-15 User-defined + ===== ============ + + This defines the upper 10 bits of the port key. The values can be + from 0 - 1023. If not given, the system defaults to 0. + + This parameter has effect only in 802.3ad mode and is available through + SysFs interface. + +all_slaves_active + + Specifies that duplicate frames (received on inactive ports) should be + dropped (0) or delivered (1). + + Normally, bonding will drop duplicate frames (received on inactive + ports), which is desirable for most users. But there are some times + it is nice to allow duplicate frames to be delivered. + + The default value is 0 (drop duplicate frames received on inactive + ports). + +arp_interval + + Specifies the ARP link monitoring frequency in milliseconds. + + The ARP monitor works by periodically checking the slave + devices to determine whether they have sent or received + traffic recently (the precise criteria depends upon the + bonding mode, and the state of the slave). Regular traffic is + generated via ARP probes issued for the addresses specified by + the arp_ip_target option. + + This behavior can be modified by the arp_validate option, + below. + + If ARP monitoring is used in an etherchannel compatible mode + (modes 0 and 2), the switch should be configured in a mode + that evenly distributes packets across all links. If the + switch is configured to distribute the packets in an XOR + fashion, all replies from the ARP targets will be received on + the same link which could cause the other team members to + fail. ARP monitoring should not be used in conjunction with + miimon. A value of 0 disables ARP monitoring. The default + value is 0. + +arp_ip_target + + Specifies the IP addresses to use as ARP monitoring peers when + arp_interval is > 0. These are the targets of the ARP request + sent to determine the health of the link to the targets. + Specify these values in ddd.ddd.ddd.ddd format. Multiple IP + addresses must be separated by a comma. At least one IP + address must be given for ARP monitoring to function. The + maximum number of targets that can be specified is 16. The + default value is no IP addresses. + +arp_validate + + Specifies whether or not ARP probes and replies should be + validated in any mode that supports arp monitoring, or whether + non-ARP traffic should be filtered (disregarded) for link + monitoring purposes. + + Possible values are: + + none or 0 + + No validation or filtering is performed. + + active or 1 + + Validation is performed only for the active slave. + + backup or 2 + + Validation is performed only for backup slaves. + + all or 3 + + Validation is performed for all slaves. + + filter or 4 + + Filtering is applied to all slaves. No validation is + performed. + + filter_active or 5 + + Filtering is applied to all slaves, validation is performed + only for the active slave. + + filter_backup or 6 + + Filtering is applied to all slaves, validation is performed + only for backup slaves. + + Validation: + + Enabling validation causes the ARP monitor to examine the incoming + ARP requests and replies, and only consider a slave to be up if it + is receiving the appropriate ARP traffic. + + For an active slave, the validation checks ARP replies to confirm + that they were generated by an arp_ip_target. Since backup slaves + do not typically receive these replies, the validation performed + for backup slaves is on the broadcast ARP request sent out via the + active slave. It is possible that some switch or network + configurations may result in situations wherein the backup slaves + do not receive the ARP requests; in such a situation, validation + of backup slaves must be disabled. + + The validation of ARP requests on backup slaves is mainly helping + bonding to decide which slaves are more likely to work in case of + the active slave failure, it doesn't really guarantee that the + backup slave will work if it's selected as the next active slave. + + Validation is useful in network configurations in which multiple + bonding hosts are concurrently issuing ARPs to one or more targets + beyond a common switch. Should the link between the switch and + target fail (but not the switch itself), the probe traffic + generated by the multiple bonding instances will fool the standard + ARP monitor into considering the links as still up. Use of + validation can resolve this, as the ARP monitor will only consider + ARP requests and replies associated with its own instance of + bonding. + + Filtering: + + Enabling filtering causes the ARP monitor to only use incoming ARP + packets for link availability purposes. Arriving packets that are + not ARPs are delivered normally, but do not count when determining + if a slave is available. + + Filtering operates by only considering the reception of ARP + packets (any ARP packet, regardless of source or destination) when + determining if a slave has received traffic for link availability + purposes. + + Filtering is useful in network configurations in which significant + levels of third party broadcast traffic would fool the standard + ARP monitor into considering the links as still up. Use of + filtering can resolve this, as only ARP traffic is considered for + link availability purposes. + + This option was added in bonding version 3.1.0. + +arp_all_targets + + Specifies the quantity of arp_ip_targets that must be reachable + in order for the ARP monitor to consider a slave as being up. + This option affects only active-backup mode for slaves with + arp_validation enabled. + + Possible values are: + + any or 0 + + consider the slave up only when any of the arp_ip_targets + is reachable + + all or 1 + + consider the slave up only when all of the arp_ip_targets + are reachable + +downdelay + + Specifies the time, in milliseconds, to wait before disabling + a slave after a link failure has been detected. This option + is only valid for the miimon link monitor. The downdelay + value should be a multiple of the miimon value; if not, it + will be rounded down to the nearest multiple. The default + value is 0. + +fail_over_mac + + Specifies whether active-backup mode should set all slaves to + the same MAC address at enslavement (the traditional + behavior), or, when enabled, perform special handling of the + bond's MAC address in accordance with the selected policy. + + Possible values are: + + none or 0 + + This setting disables fail_over_mac, and causes + bonding to set all slaves of an active-backup bond to + the same MAC address at enslavement time. This is the + default. + + active or 1 + + The "active" fail_over_mac policy indicates that the + MAC address of the bond should always be the MAC + address of the currently active slave. The MAC + address of the slaves is not changed; instead, the MAC + address of the bond changes during a failover. + + This policy is useful for devices that cannot ever + alter their MAC address, or for devices that refuse + incoming broadcasts with their own source MAC (which + interferes with the ARP monitor). + + The down side of this policy is that every device on + the network must be updated via gratuitous ARP, + vs. just updating a switch or set of switches (which + often takes place for any traffic, not just ARP + traffic, if the switch snoops incoming traffic to + update its tables) for the traditional method. If the + gratuitous ARP is lost, communication may be + disrupted. + + When this policy is used in conjunction with the mii + monitor, devices which assert link up prior to being + able to actually transmit and receive are particularly + susceptible to loss of the gratuitous ARP, and an + appropriate updelay setting may be required. + + follow or 2 + + The "follow" fail_over_mac policy causes the MAC + address of the bond to be selected normally (normally + the MAC address of the first slave added to the bond). + However, the second and subsequent slaves are not set + to this MAC address while they are in a backup role; a + slave is programmed with the bond's MAC address at + failover time (and the formerly active slave receives + the newly active slave's MAC address). + + This policy is useful for multiport devices that + either become confused or incur a performance penalty + when multiple ports are programmed with the same MAC + address. + + + The default policy is none, unless the first slave cannot + change its MAC address, in which case the active policy is + selected by default. + + This option may be modified via sysfs only when no slaves are + present in the bond. + + This option was added in bonding version 3.2.0. The "follow" + policy was added in bonding version 3.3.0. + +lacp_rate + + Option specifying the rate in which we'll ask our link partner + to transmit LACPDU packets in 802.3ad mode. Possible values + are: + + slow or 0 + Request partner to transmit LACPDUs every 30 seconds + + fast or 1 + Request partner to transmit LACPDUs every 1 second + + The default is slow. + +max_bonds + + Specifies the number of bonding devices to create for this + instance of the bonding driver. E.g., if max_bonds is 3, and + the bonding driver is not already loaded, then bond0, bond1 + and bond2 will be created. The default value is 1. Specifying + a value of 0 will load bonding, but will not create any devices. + +miimon + + Specifies the MII link monitoring frequency in milliseconds. + This determines how often the link state of each slave is + inspected for link failures. A value of zero disables MII + link monitoring. A value of 100 is a good starting point. + The use_carrier option, below, affects how the link state is + determined. See the High Availability section for additional + information. The default value is 0. + +min_links + + Specifies the minimum number of links that must be active before + asserting carrier. It is similar to the Cisco EtherChannel min-links + feature. This allows setting the minimum number of member ports that + must be up (link-up state) before marking the bond device as up + (carrier on). This is useful for situations where higher level services + such as clustering want to ensure a minimum number of low bandwidth + links are active before switchover. This option only affect 802.3ad + mode. + + The default value is 0. This will cause carrier to be asserted (for + 802.3ad mode) whenever there is an active aggregator, regardless of the + number of available links in that aggregator. Note that, because an + aggregator cannot be active without at least one available link, + setting this option to 0 or to 1 has the exact same effect. + +mode + + Specifies one of the bonding policies. The default is + balance-rr (round robin). Possible values are: + + balance-rr or 0 + + Round-robin policy: Transmit packets in sequential + order from the first available slave through the + last. This mode provides load balancing and fault + tolerance. + + active-backup or 1 + + Active-backup policy: Only one slave in the bond is + active. A different slave becomes active if, and only + if, the active slave fails. The bond's MAC address is + externally visible on only one port (network adapter) + to avoid confusing the switch. + + In bonding version 2.6.2 or later, when a failover + occurs in active-backup mode, bonding will issue one + or more gratuitous ARPs on the newly active slave. + One gratuitous ARP is issued for the bonding master + interface and each VLAN interfaces configured above + it, provided that the interface has at least one IP + address configured. Gratuitous ARPs issued for VLAN + interfaces are tagged with the appropriate VLAN id. + + This mode provides fault tolerance. The primary + option, documented below, affects the behavior of this + mode. + + balance-xor or 2 + + XOR policy: Transmit based on the selected transmit + hash policy. The default policy is a simple [(source + MAC address XOR'd with destination MAC address XOR + packet type ID) modulo slave count]. Alternate transmit + policies may be selected via the xmit_hash_policy option, + described below. + + This mode provides load balancing and fault tolerance. + + broadcast or 3 + + Broadcast policy: transmits everything on all slave + interfaces. This mode provides fault tolerance. + + 802.3ad or 4 + + IEEE 802.3ad Dynamic link aggregation. Creates + aggregation groups that share the same speed and + duplex settings. Utilizes all slaves in the active + aggregator according to the 802.3ad specification. + + Slave selection for outgoing traffic is done according + to the transmit hash policy, which may be changed from + the default simple XOR policy via the xmit_hash_policy + option, documented below. Note that not all transmit + policies may be 802.3ad compliant, particularly in + regards to the packet mis-ordering requirements of + section 43.2.4 of the 802.3ad standard. Differing + peer implementations will have varying tolerances for + noncompliance. + + Prerequisites: + + 1. Ethtool support in the base drivers for retrieving + the speed and duplex of each slave. + + 2. A switch that supports IEEE 802.3ad Dynamic link + aggregation. + + Most switches will require some type of configuration + to enable 802.3ad mode. + + balance-tlb or 5 + + Adaptive transmit load balancing: channel bonding that + does not require any special switch support. + + In tlb_dynamic_lb=1 mode; the outgoing traffic is + distributed according to the current load (computed + relative to the speed) on each slave. + + In tlb_dynamic_lb=0 mode; the load balancing based on + current load is disabled and the load is distributed + only using the hash distribution. + + Incoming traffic is received by the current slave. + If the receiving slave fails, another slave takes over + the MAC address of the failed receiving slave. + + Prerequisite: + + Ethtool support in the base drivers for retrieving the + speed of each slave. + + balance-alb or 6 + + Adaptive load balancing: includes balance-tlb plus + receive load balancing (rlb) for IPV4 traffic, and + does not require any special switch support. The + receive load balancing is achieved by ARP negotiation. + The bonding driver intercepts the ARP Replies sent by + the local system on their way out and overwrites the + source hardware address with the unique hardware + address of one of the slaves in the bond such that + different peers use different hardware addresses for + the server. + + Receive traffic from connections created by the server + is also balanced. When the local system sends an ARP + Request the bonding driver copies and saves the peer's + IP information from the ARP packet. When the ARP + Reply arrives from the peer, its hardware address is + retrieved and the bonding driver initiates an ARP + reply to this peer assigning it to one of the slaves + in the bond. A problematic outcome of using ARP + negotiation for balancing is that each time that an + ARP request is broadcast it uses the hardware address + of the bond. Hence, peers learn the hardware address + of the bond and the balancing of receive traffic + collapses to the current slave. This is handled by + sending updates (ARP Replies) to all the peers with + their individually assigned hardware address such that + the traffic is redistributed. Receive traffic is also + redistributed when a new slave is added to the bond + and when an inactive slave is re-activated. The + receive load is distributed sequentially (round robin) + among the group of highest speed slaves in the bond. + + When a link is reconnected or a new slave joins the + bond the receive traffic is redistributed among all + active slaves in the bond by initiating ARP Replies + with the selected MAC address to each of the + clients. The updelay parameter (detailed below) must + be set to a value equal or greater than the switch's + forwarding delay so that the ARP Replies sent to the + peers will not be blocked by the switch. + + Prerequisites: + + 1. Ethtool support in the base drivers for retrieving + the speed of each slave. + + 2. Base driver support for setting the hardware + address of a device while it is open. This is + required so that there will always be one slave in the + team using the bond hardware address (the + curr_active_slave) while having a unique hardware + address for each slave in the bond. If the + curr_active_slave fails its hardware address is + swapped with the new curr_active_slave that was + chosen. + +num_grat_arp, +num_unsol_na + + Specify the number of peer notifications (gratuitous ARPs and + unsolicited IPv6 Neighbor Advertisements) to be issued after a + failover event. As soon as the link is up on the new slave + (possibly immediately) a peer notification is sent on the + bonding device and each VLAN sub-device. This is repeated at + the rate specified by peer_notif_delay if the number is + greater than 1. + + The valid range is 0 - 255; the default value is 1. These options + affect only the active-backup mode. These options were added for + bonding versions 3.3.0 and 3.4.0 respectively. + + From Linux 3.0 and bonding version 3.7.1, these notifications + are generated by the ipv4 and ipv6 code and the numbers of + repetitions cannot be set independently. + +packets_per_slave + + Specify the number of packets to transmit through a slave before + moving to the next one. When set to 0 then a slave is chosen at + random. + + The valid range is 0 - 65535; the default value is 1. This option + has effect only in balance-rr mode. + +peer_notif_delay + + Specify the delay, in milliseconds, between each peer + notification (gratuitous ARP and unsolicited IPv6 Neighbor + Advertisement) when they are issued after a failover event. + This delay should be a multiple of the link monitor interval + (arp_interval or miimon, whichever is active). The default + value is 0 which means to match the value of the link monitor + interval. + +primary + + A string (eth0, eth2, etc) specifying which slave is the + primary device. The specified device will always be the + active slave while it is available. Only when the primary is + off-line will alternate devices be used. This is useful when + one slave is preferred over another, e.g., when one slave has + higher throughput than another. + + The primary option is only valid for active-backup(1), + balance-tlb (5) and balance-alb (6) mode. + +primary_reselect + + Specifies the reselection policy for the primary slave. This + affects how the primary slave is chosen to become the active slave + when failure of the active slave or recovery of the primary slave + occurs. This option is designed to prevent flip-flopping between + the primary slave and other slaves. Possible values are: + + always or 0 (default) + + The primary slave becomes the active slave whenever it + comes back up. + + better or 1 + + The primary slave becomes the active slave when it comes + back up, if the speed and duplex of the primary slave is + better than the speed and duplex of the current active + slave. + + failure or 2 + + The primary slave becomes the active slave only if the + current active slave fails and the primary slave is up. + + The primary_reselect setting is ignored in two cases: + + If no slaves are active, the first slave to recover is + made the active slave. + + When initially enslaved, the primary slave is always made + the active slave. + + Changing the primary_reselect policy via sysfs will cause an + immediate selection of the best active slave according to the new + policy. This may or may not result in a change of the active + slave, depending upon the circumstances. + + This option was added for bonding version 3.6.0. + +tlb_dynamic_lb + + Specifies if dynamic shuffling of flows is enabled in tlb + mode. The value has no effect on any other modes. + + The default behavior of tlb mode is to shuffle active flows across + slaves based on the load in that interval. This gives nice lb + characteristics but can cause packet reordering. If re-ordering is + a concern use this variable to disable flow shuffling and rely on + load balancing provided solely by the hash distribution. + xmit-hash-policy can be used to select the appropriate hashing for + the setup. + + The sysfs entry can be used to change the setting per bond device + and the initial value is derived from the module parameter. The + sysfs entry is allowed to be changed only if the bond device is + down. + + The default value is "1" that enables flow shuffling while value "0" + disables it. This option was added in bonding driver 3.7.1 + + +updelay + + Specifies the time, in milliseconds, to wait before enabling a + slave after a link recovery has been detected. This option is + only valid for the miimon link monitor. The updelay value + should be a multiple of the miimon value; if not, it will be + rounded down to the nearest multiple. The default value is 0. + +use_carrier + + Specifies whether or not miimon should use MII or ETHTOOL + ioctls vs. netif_carrier_ok() to determine the link + status. The MII or ETHTOOL ioctls are less efficient and + utilize a deprecated calling sequence within the kernel. The + netif_carrier_ok() relies on the device driver to maintain its + state with netif_carrier_on/off; at this writing, most, but + not all, device drivers support this facility. + + If bonding insists that the link is up when it should not be, + it may be that your network device driver does not support + netif_carrier_on/off. The default state for netif_carrier is + "carrier on," so if a driver does not support netif_carrier, + it will appear as if the link is always up. In this case, + setting use_carrier to 0 will cause bonding to revert to the + MII / ETHTOOL ioctl method to determine the link state. + + A value of 1 enables the use of netif_carrier_ok(), a value of + 0 will use the deprecated MII / ETHTOOL ioctls. The default + value is 1. + +xmit_hash_policy + + Selects the transmit hash policy to use for slave selection in + balance-xor, 802.3ad, and tlb modes. Possible values are: + + layer2 + + Uses XOR of hardware MAC addresses and packet type ID + field to generate the hash. The formula is + + hash = source MAC XOR destination MAC XOR packet type ID + slave number = hash modulo slave count + + This algorithm will place all traffic to a particular + network peer on the same slave. + + This algorithm is 802.3ad compliant. + + layer2+3 + + This policy uses a combination of layer2 and layer3 + protocol information to generate the hash. + + Uses XOR of hardware MAC addresses and IP addresses to + generate the hash. The formula is + + hash = source MAC XOR destination MAC XOR packet type ID + hash = hash XOR source IP XOR destination IP + hash = hash XOR (hash RSHIFT 16) + hash = hash XOR (hash RSHIFT 8) + And then hash is reduced modulo slave count. + + If the protocol is IPv6 then the source and destination + addresses are first hashed using ipv6_addr_hash. + + This algorithm will place all traffic to a particular + network peer on the same slave. For non-IP traffic, + the formula is the same as for the layer2 transmit + hash policy. + + This policy is intended to provide a more balanced + distribution of traffic than layer2 alone, especially + in environments where a layer3 gateway device is + required to reach most destinations. + + This algorithm is 802.3ad compliant. + + layer3+4 + + This policy uses upper layer protocol information, + when available, to generate the hash. This allows for + traffic to a particular network peer to span multiple + slaves, although a single connection will not span + multiple slaves. + + The formula for unfragmented TCP and UDP packets is + + hash = source port, destination port (as in the header) + hash = hash XOR source IP XOR destination IP + hash = hash XOR (hash RSHIFT 16) + hash = hash XOR (hash RSHIFT 8) + And then hash is reduced modulo slave count. + + If the protocol is IPv6 then the source and destination + addresses are first hashed using ipv6_addr_hash. + + For fragmented TCP or UDP packets and all other IPv4 and + IPv6 protocol traffic, the source and destination port + information is omitted. For non-IP traffic, the + formula is the same as for the layer2 transmit hash + policy. + + This algorithm is not fully 802.3ad compliant. A + single TCP or UDP conversation containing both + fragmented and unfragmented packets will see packets + striped across two interfaces. This may result in out + of order delivery. Most traffic types will not meet + this criteria, as TCP rarely fragments traffic, and + most UDP traffic is not involved in extended + conversations. Other implementations of 802.3ad may + or may not tolerate this noncompliance. + + encap2+3 + + This policy uses the same formula as layer2+3 but it + relies on skb_flow_dissect to obtain the header fields + which might result in the use of inner headers if an + encapsulation protocol is used. For example this will + improve the performance for tunnel users because the + packets will be distributed according to the encapsulated + flows. + + encap3+4 + + This policy uses the same formula as layer3+4 but it + relies on skb_flow_dissect to obtain the header fields + which might result in the use of inner headers if an + encapsulation protocol is used. For example this will + improve the performance for tunnel users because the + packets will be distributed according to the encapsulated + flows. + + The default value is layer2. This option was added in bonding + version 2.6.3. In earlier versions of bonding, this parameter + does not exist, and the layer2 policy is the only policy. The + layer2+3 value was added for bonding version 3.2.2. + +resend_igmp + + Specifies the number of IGMP membership reports to be issued after + a failover event. One membership report is issued immediately after + the failover, subsequent packets are sent in each 200ms interval. + + The valid range is 0 - 255; the default value is 1. A value of 0 + prevents the IGMP membership report from being issued in response + to the failover event. + + This option is useful for bonding modes balance-rr (0), active-backup + (1), balance-tlb (5) and balance-alb (6), in which a failover can + switch the IGMP traffic from one slave to another. Therefore a fresh + IGMP report must be issued to cause the switch to forward the incoming + IGMP traffic over the newly selected slave. + + This option was added for bonding version 3.7.0. + +lp_interval + + Specifies the number of seconds between instances where the bonding + driver sends learning packets to each slaves peer switch. + + The valid range is 1 - 0x7fffffff; the default value is 1. This Option + has effect only in balance-tlb and balance-alb modes. + +3. Configuring Bonding Devices +============================== + +You can configure bonding using either your distro's network +initialization scripts, or manually using either iproute2 or the +sysfs interface. Distros generally use one of three packages for the +network initialization scripts: initscripts, sysconfig or interfaces. +Recent versions of these packages have support for bonding, while older +versions do not. + +We will first describe the options for configuring bonding for +distros using versions of initscripts, sysconfig and interfaces with full +or partial support for bonding, then provide information on enabling +bonding without support from the network initialization scripts (i.e., +older versions of initscripts or sysconfig). + +If you're unsure whether your distro uses sysconfig, +initscripts or interfaces, or don't know if it's new enough, have no fear. +Determining this is fairly straightforward. + +First, look for a file called interfaces in /etc/network directory. +If this file is present in your system, then your system use interfaces. See +Configuration with Interfaces Support. + +Else, issue the command:: + + $ rpm -qf /sbin/ifup + +It will respond with a line of text starting with either +"initscripts" or "sysconfig," followed by some numbers. This is the +package that provides your network initialization scripts. + +Next, to determine if your installation supports bonding, +issue the command:: + + $ grep ifenslave /sbin/ifup + +If this returns any matches, then your initscripts or +sysconfig has support for bonding. + +3.1 Configuration with Sysconfig Support +---------------------------------------- + +This section applies to distros using a version of sysconfig +with bonding support, for example, SuSE Linux Enterprise Server 9. + +SuSE SLES 9's networking configuration system does support +bonding, however, at this writing, the YaST system configuration +front end does not provide any means to work with bonding devices. +Bonding devices can be managed by hand, however, as follows. + +First, if they have not already been configured, configure the +slave devices. On SLES 9, this is most easily done by running the +yast2 sysconfig configuration utility. The goal is for to create an +ifcfg-id file for each slave device. The simplest way to accomplish +this is to configure the devices for DHCP (this is only to get the +file ifcfg-id file created; see below for some issues with DHCP). The +name of the configuration file for each device will be of the form:: + + ifcfg-id-xx:xx:xx:xx:xx:xx + +Where the "xx" portion will be replaced with the digits from +the device's permanent MAC address. + +Once the set of ifcfg-id-xx:xx:xx:xx:xx:xx files has been +created, it is necessary to edit the configuration files for the slave +devices (the MAC addresses correspond to those of the slave devices). +Before editing, the file will contain multiple lines, and will look +something like this:: + + BOOTPROTO='dhcp' + STARTMODE='on' + USERCTL='no' + UNIQUE='XNzu.WeZGOGF+4wE' + _nm_name='bus-pci-0001:61:01.0' + +Change the BOOTPROTO and STARTMODE lines to the following:: + + BOOTPROTO='none' + STARTMODE='off' + +Do not alter the UNIQUE or _nm_name lines. Remove any other +lines (USERCTL, etc). + +Once the ifcfg-id-xx:xx:xx:xx:xx:xx files have been modified, +it's time to create the configuration file for the bonding device +itself. This file is named ifcfg-bondX, where X is the number of the +bonding device to create, starting at 0. The first such file is +ifcfg-bond0, the second is ifcfg-bond1, and so on. The sysconfig +network configuration system will correctly start multiple instances +of bonding. + +The contents of the ifcfg-bondX file is as follows:: + + BOOTPROTO="static" + BROADCAST="10.0.2.255" + IPADDR="10.0.2.10" + NETMASK="255.255.0.0" + NETWORK="10.0.2.0" + REMOTE_IPADDR="" + STARTMODE="onboot" + BONDING_MASTER="yes" + BONDING_MODULE_OPTS="mode=active-backup miimon=100" + BONDING_SLAVE0="eth0" + BONDING_SLAVE1="bus-pci-0000:06:08.1" + +Replace the sample BROADCAST, IPADDR, NETMASK and NETWORK +values with the appropriate values for your network. + +The STARTMODE specifies when the device is brought online. +The possible values are: + + ======== ====================================================== + onboot The device is started at boot time. If you're not + sure, this is probably what you want. + + manual The device is started only when ifup is called + manually. Bonding devices may be configured this + way if you do not wish them to start automatically + at boot for some reason. + + hotplug The device is started by a hotplug event. This is not + a valid choice for a bonding device. + + off or The device configuration is ignored. + ignore + ======== ====================================================== + +The line BONDING_MASTER='yes' indicates that the device is a +bonding master device. The only useful value is "yes." + +The contents of BONDING_MODULE_OPTS are supplied to the +instance of the bonding module for this device. Specify the options +for the bonding mode, link monitoring, and so on here. Do not include +the max_bonds bonding parameter; this will confuse the configuration +system if you have multiple bonding devices. + +Finally, supply one BONDING_SLAVEn="slave device" for each +slave. where "n" is an increasing value, one for each slave. The +"slave device" is either an interface name, e.g., "eth0", or a device +specifier for the network device. The interface name is easier to +find, but the ethN names are subject to change at boot time if, e.g., +a device early in the sequence has failed. The device specifiers +(bus-pci-0000:06:08.1 in the example above) specify the physical +network device, and will not change unless the device's bus location +changes (for example, it is moved from one PCI slot to another). The +example above uses one of each type for demonstration purposes; most +configurations will choose one or the other for all slave devices. + +When all configuration files have been modified or created, +networking must be restarted for the configuration changes to take +effect. This can be accomplished via the following:: + + # /etc/init.d/network restart + +Note that the network control script (/sbin/ifdown) will +remove the bonding module as part of the network shutdown processing, +so it is not necessary to remove the module by hand if, e.g., the +module parameters have changed. + +Also, at this writing, YaST/YaST2 will not manage bonding +devices (they do not show bonding interfaces on its list of network +devices). It is necessary to edit the configuration file by hand to +change the bonding configuration. + +Additional general options and details of the ifcfg file +format can be found in an example ifcfg template file:: + + /etc/sysconfig/network/ifcfg.template + +Note that the template does not document the various ``BONDING_*`` +settings described above, but does describe many of the other options. + +3.1.1 Using DHCP with Sysconfig +------------------------------- + +Under sysconfig, configuring a device with BOOTPROTO='dhcp' +will cause it to query DHCP for its IP address information. At this +writing, this does not function for bonding devices; the scripts +attempt to obtain the device address from DHCP prior to adding any of +the slave devices. Without active slaves, the DHCP requests are not +sent to the network. + +3.1.2 Configuring Multiple Bonds with Sysconfig +----------------------------------------------- + +The sysconfig network initialization system is capable of +handling multiple bonding devices. All that is necessary is for each +bonding instance to have an appropriately configured ifcfg-bondX file +(as described above). Do not specify the "max_bonds" parameter to any +instance of bonding, as this will confuse sysconfig. If you require +multiple bonding devices with identical parameters, create multiple +ifcfg-bondX files. + +Because the sysconfig scripts supply the bonding module +options in the ifcfg-bondX file, it is not necessary to add them to +the system ``/etc/modules.d/*.conf`` configuration files. + +3.2 Configuration with Initscripts Support +------------------------------------------ + +This section applies to distros using a recent version of +initscripts with bonding support, for example, Red Hat Enterprise Linux +version 3 or later, Fedora, etc. On these systems, the network +initialization scripts have knowledge of bonding, and can be configured to +control bonding devices. Note that older versions of the initscripts +package have lower levels of support for bonding; this will be noted where +applicable. + +These distros will not automatically load the network adapter +driver unless the ethX device is configured with an IP address. +Because of this constraint, users must manually configure a +network-script file for all physical adapters that will be members of +a bondX link. Network script files are located in the directory: + +/etc/sysconfig/network-scripts + +The file name must be prefixed with "ifcfg-eth" and suffixed +with the adapter's physical adapter number. For example, the script +for eth0 would be named /etc/sysconfig/network-scripts/ifcfg-eth0. +Place the following text in the file:: + + DEVICE=eth0 + USERCTL=no + ONBOOT=yes + MASTER=bond0 + SLAVE=yes + BOOTPROTO=none + +The DEVICE= line will be different for every ethX device and +must correspond with the name of the file, i.e., ifcfg-eth1 must have +a device line of DEVICE=eth1. The setting of the MASTER= line will +also depend on the final bonding interface name chosen for your bond. +As with other network devices, these typically start at 0, and go up +one for each device, i.e., the first bonding instance is bond0, the +second is bond1, and so on. + +Next, create a bond network script. The file name for this +script will be /etc/sysconfig/network-scripts/ifcfg-bondX where X is +the number of the bond. For bond0 the file is named "ifcfg-bond0", +for bond1 it is named "ifcfg-bond1", and so on. Within that file, +place the following text:: + + DEVICE=bond0 + IPADDR=192.168.1.1 + NETMASK=255.255.255.0 + NETWORK=192.168.1.0 + BROADCAST=192.168.1.255 + ONBOOT=yes + BOOTPROTO=none + USERCTL=no + +Be sure to change the networking specific lines (IPADDR, +NETMASK, NETWORK and BROADCAST) to match your network configuration. + +For later versions of initscripts, such as that found with Fedora +7 (or later) and Red Hat Enterprise Linux version 5 (or later), it is possible, +and, indeed, preferable, to specify the bonding options in the ifcfg-bond0 +file, e.g. a line of the format:: + + BONDING_OPTS="mode=active-backup arp_interval=60 arp_ip_target=192.168.1.254" + +will configure the bond with the specified options. The options +specified in BONDING_OPTS are identical to the bonding module parameters +except for the arp_ip_target field when using versions of initscripts older +than and 8.57 (Fedora 8) and 8.45.19 (Red Hat Enterprise Linux 5.2). When +using older versions each target should be included as a separate option and +should be preceded by a '+' to indicate it should be added to the list of +queried targets, e.g.,:: + + arp_ip_target=+192.168.1.1 arp_ip_target=+192.168.1.2 + +is the proper syntax to specify multiple targets. When specifying +options via BONDING_OPTS, it is not necessary to edit +``/etc/modprobe.d/*.conf``. + +For even older versions of initscripts that do not support +BONDING_OPTS, it is necessary to edit /etc/modprobe.d/*.conf, depending upon +your distro) to load the bonding module with your desired options when the +bond0 interface is brought up. The following lines in /etc/modprobe.d/*.conf +will load the bonding module, and select its options: + + alias bond0 bonding + options bond0 mode=balance-alb miimon=100 + +Replace the sample parameters with the appropriate set of +options for your configuration. + +Finally run "/etc/rc.d/init.d/network restart" as root. This +will restart the networking subsystem and your bond link should be now +up and running. + +3.2.1 Using DHCP with Initscripts +--------------------------------- + +Recent versions of initscripts (the versions supplied with Fedora +Core 3 and Red Hat Enterprise Linux 4, or later versions, are reported to +work) have support for assigning IP information to bonding devices via +DHCP. + +To configure bonding for DHCP, configure it as described +above, except replace the line "BOOTPROTO=none" with "BOOTPROTO=dhcp" +and add a line consisting of "TYPE=Bonding". Note that the TYPE value +is case sensitive. + +3.2.2 Configuring Multiple Bonds with Initscripts +------------------------------------------------- + +Initscripts packages that are included with Fedora 7 and Red Hat +Enterprise Linux 5 support multiple bonding interfaces by simply +specifying the appropriate BONDING_OPTS= in ifcfg-bondX where X is the +number of the bond. This support requires sysfs support in the kernel, +and a bonding driver of version 3.0.0 or later. Other configurations may +not support this method for specifying multiple bonding interfaces; for +those instances, see the "Configuring Multiple Bonds Manually" section, +below. + +3.3 Configuring Bonding Manually with iproute2 +----------------------------------------------- + +This section applies to distros whose network initialization +scripts (the sysconfig or initscripts package) do not have specific +knowledge of bonding. One such distro is SuSE Linux Enterprise Server +version 8. + +The general method for these systems is to place the bonding +module parameters into a config file in /etc/modprobe.d/ (as +appropriate for the installed distro), then add modprobe and/or +`ip link` commands to the system's global init script. The name of +the global init script differs; for sysconfig, it is +/etc/init.d/boot.local and for initscripts it is /etc/rc.d/rc.local. + +For example, if you wanted to make a simple bond of two e100 +devices (presumed to be eth0 and eth1), and have it persist across +reboots, edit the appropriate file (/etc/init.d/boot.local or +/etc/rc.d/rc.local), and add the following:: + + modprobe bonding mode=balance-alb miimon=100 + modprobe e100 + ifconfig bond0 192.168.1.1 netmask 255.255.255.0 up + ip link set eth0 master bond0 + ip link set eth1 master bond0 + +Replace the example bonding module parameters and bond0 +network configuration (IP address, netmask, etc) with the appropriate +values for your configuration. + +Unfortunately, this method will not provide support for the +ifup and ifdown scripts on the bond devices. To reload the bonding +configuration, it is necessary to run the initialization script, e.g.,:: + + # /etc/init.d/boot.local + +or:: + + # /etc/rc.d/rc.local + +It may be desirable in such a case to create a separate script +which only initializes the bonding configuration, then call that +separate script from within boot.local. This allows for bonding to be +enabled without re-running the entire global init script. + +To shut down the bonding devices, it is necessary to first +mark the bonding device itself as being down, then remove the +appropriate device driver modules. For our example above, you can do +the following:: + + # ifconfig bond0 down + # rmmod bonding + # rmmod e100 + +Again, for convenience, it may be desirable to create a script +with these commands. + + +3.3.1 Configuring Multiple Bonds Manually +----------------------------------------- + +This section contains information on configuring multiple +bonding devices with differing options for those systems whose network +initialization scripts lack support for configuring multiple bonds. + +If you require multiple bonding devices, but all with the same +options, you may wish to use the "max_bonds" module parameter, +documented above. + +To create multiple bonding devices with differing options, it is +preferable to use bonding parameters exported by sysfs, documented in the +section below. + +For versions of bonding without sysfs support, the only means to +provide multiple instances of bonding with differing options is to load +the bonding driver multiple times. Note that current versions of the +sysconfig network initialization scripts handle this automatically; if +your distro uses these scripts, no special action is needed. See the +section Configuring Bonding Devices, above, if you're not sure about your +network initialization scripts. + +To load multiple instances of the module, it is necessary to +specify a different name for each instance (the module loading system +requires that every loaded module, even multiple instances of the same +module, have a unique name). This is accomplished by supplying multiple +sets of bonding options in ``/etc/modprobe.d/*.conf``, for example:: + + alias bond0 bonding + options bond0 -o bond0 mode=balance-rr miimon=100 + + alias bond1 bonding + options bond1 -o bond1 mode=balance-alb miimon=50 + +will load the bonding module two times. The first instance is +named "bond0" and creates the bond0 device in balance-rr mode with an +miimon of 100. The second instance is named "bond1" and creates the +bond1 device in balance-alb mode with an miimon of 50. + +In some circumstances (typically with older distributions), +the above does not work, and the second bonding instance never sees +its options. In that case, the second options line can be substituted +as follows:: + + install bond1 /sbin/modprobe --ignore-install bonding -o bond1 \ + mode=balance-alb miimon=50 + +This may be repeated any number of times, specifying a new and +unique name in place of bond1 for each subsequent instance. + +It has been observed that some Red Hat supplied kernels are unable +to rename modules at load time (the "-o bond1" part). Attempts to pass +that option to modprobe will produce an "Operation not permitted" error. +This has been reported on some Fedora Core kernels, and has been seen on +RHEL 4 as well. On kernels exhibiting this problem, it will be impossible +to configure multiple bonds with differing parameters (as they are older +kernels, and also lack sysfs support). + +3.4 Configuring Bonding Manually via Sysfs +------------------------------------------ + +Starting with version 3.0.0, Channel Bonding may be configured +via the sysfs interface. This interface allows dynamic configuration +of all bonds in the system without unloading the module. It also +allows for adding and removing bonds at runtime. Ifenslave is no +longer required, though it is still supported. + +Use of the sysfs interface allows you to use multiple bonds +with different configurations without having to reload the module. +It also allows you to use multiple, differently configured bonds when +bonding is compiled into the kernel. + +You must have the sysfs filesystem mounted to configure +bonding this way. The examples in this document assume that you +are using the standard mount point for sysfs, e.g. /sys. If your +sysfs filesystem is mounted elsewhere, you will need to adjust the +example paths accordingly. + +Creating and Destroying Bonds +----------------------------- +To add a new bond foo:: + + # echo +foo > /sys/class/net/bonding_masters + +To remove an existing bond bar:: + + # echo -bar > /sys/class/net/bonding_masters + +To show all existing bonds:: + + # cat /sys/class/net/bonding_masters + +.. note:: + + due to 4K size limitation of sysfs files, this list may be + truncated if you have more than a few hundred bonds. This is unlikely + to occur under normal operating conditions. + +Adding and Removing Slaves +-------------------------- +Interfaces may be enslaved to a bond using the file +/sys/class/net//bonding/slaves. The semantics for this file +are the same as for the bonding_masters file. + +To enslave interface eth0 to bond bond0:: + + # ifconfig bond0 up + # echo +eth0 > /sys/class/net/bond0/bonding/slaves + +To free slave eth0 from bond bond0:: + + # echo -eth0 > /sys/class/net/bond0/bonding/slaves + +When an interface is enslaved to a bond, symlinks between the +two are created in the sysfs filesystem. In this case, you would get +/sys/class/net/bond0/slave_eth0 pointing to /sys/class/net/eth0, and +/sys/class/net/eth0/master pointing to /sys/class/net/bond0. + +This means that you can tell quickly whether or not an +interface is enslaved by looking for the master symlink. Thus: +# echo -eth0 > /sys/class/net/eth0/master/bonding/slaves +will free eth0 from whatever bond it is enslaved to, regardless of +the name of the bond interface. + +Changing a Bond's Configuration +------------------------------- +Each bond may be configured individually by manipulating the +files located in /sys/class/net//bonding + +The names of these files correspond directly with the command- +line parameters described elsewhere in this file, and, with the +exception of arp_ip_target, they accept the same values. To see the +current setting, simply cat the appropriate file. + +A few examples will be given here; for specific usage +guidelines for each parameter, see the appropriate section in this +document. + +To configure bond0 for balance-alb mode:: + + # ifconfig bond0 down + # echo 6 > /sys/class/net/bond0/bonding/mode + - or - + # echo balance-alb > /sys/class/net/bond0/bonding/mode + +.. note:: + + The bond interface must be down before the mode can be changed. + +To enable MII monitoring on bond0 with a 1 second interval:: + + # echo 1000 > /sys/class/net/bond0/bonding/miimon + +.. note:: + + If ARP monitoring is enabled, it will disabled when MII + monitoring is enabled, and vice-versa. + +To add ARP targets:: + + # echo +192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target + # echo +192.168.0.101 > /sys/class/net/bond0/bonding/arp_ip_target + +.. note:: + + up to 16 target addresses may be specified. + +To remove an ARP target:: + + # echo -192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target + +To configure the interval between learning packet transmits:: + + # echo 12 > /sys/class/net/bond0/bonding/lp_interval + +.. note:: + + the lp_interval is the number of seconds between instances where + the bonding driver sends learning packets to each slaves peer switch. The + default interval is 1 second. + +Example Configuration +--------------------- +We begin with the same example that is shown in section 3.3, +executed with sysfs, and without using ifenslave. + +To make a simple bond of two e100 devices (presumed to be eth0 +and eth1), and have it persist across reboots, edit the appropriate +file (/etc/init.d/boot.local or /etc/rc.d/rc.local), and add the +following:: + + modprobe bonding + modprobe e100 + echo balance-alb > /sys/class/net/bond0/bonding/mode + ifconfig bond0 192.168.1.1 netmask 255.255.255.0 up + echo 100 > /sys/class/net/bond0/bonding/miimon + echo +eth0 > /sys/class/net/bond0/bonding/slaves + echo +eth1 > /sys/class/net/bond0/bonding/slaves + +To add a second bond, with two e1000 interfaces in +active-backup mode, using ARP monitoring, add the following lines to +your init script:: + + modprobe e1000 + echo +bond1 > /sys/class/net/bonding_masters + echo active-backup > /sys/class/net/bond1/bonding/mode + ifconfig bond1 192.168.2.1 netmask 255.255.255.0 up + echo +192.168.2.100 /sys/class/net/bond1/bonding/arp_ip_target + echo 2000 > /sys/class/net/bond1/bonding/arp_interval + echo +eth2 > /sys/class/net/bond1/bonding/slaves + echo +eth3 > /sys/class/net/bond1/bonding/slaves + +3.5 Configuration with Interfaces Support +----------------------------------------- + +This section applies to distros which use /etc/network/interfaces file +to describe network interface configuration, most notably Debian and it's +derivatives. + +The ifup and ifdown commands on Debian don't support bonding out of +the box. The ifenslave-2.6 package should be installed to provide bonding +support. Once installed, this package will provide ``bond-*`` options +to be used into /etc/network/interfaces. + +Note that ifenslave-2.6 package will load the bonding module and use +the ifenslave command when appropriate. + +Example Configurations +---------------------- + +In /etc/network/interfaces, the following stanza will configure bond0, in +active-backup mode, with eth0 and eth1 as slaves:: + + auto bond0 + iface bond0 inet dhcp + bond-slaves eth0 eth1 + bond-mode active-backup + bond-miimon 100 + bond-primary eth0 eth1 + +If the above configuration doesn't work, you might have a system using +upstart for system startup. This is most notably true for recent +Ubuntu versions. The following stanza in /etc/network/interfaces will +produce the same result on those systems:: + + auto bond0 + iface bond0 inet dhcp + bond-slaves none + bond-mode active-backup + bond-miimon 100 + + auto eth0 + iface eth0 inet manual + bond-master bond0 + bond-primary eth0 eth1 + + auto eth1 + iface eth1 inet manual + bond-master bond0 + bond-primary eth0 eth1 + +For a full list of ``bond-*`` supported options in /etc/network/interfaces and +some more advanced examples tailored to you particular distros, see the files in +/usr/share/doc/ifenslave-2.6. + +3.6 Overriding Configuration for Special Cases +---------------------------------------------- + +When using the bonding driver, the physical port which transmits a frame is +typically selected by the bonding driver, and is not relevant to the user or +system administrator. The output port is simply selected using the policies of +the selected bonding mode. On occasion however, it is helpful to direct certain +classes of traffic to certain physical interfaces on output to implement +slightly more complex policies. For example, to reach a web server over a +bonded interface in which eth0 connects to a private network, while eth1 +connects via a public network, it may be desirous to bias the bond to send said +traffic over eth0 first, using eth1 only as a fall back, while all other traffic +can safely be sent over either interface. Such configurations may be achieved +using the traffic control utilities inherent in linux. + +By default the bonding driver is multiqueue aware and 16 queues are created +when the driver initializes (see Documentation/networking/multiqueue.rst +for details). If more or less queues are desired the module parameter +tx_queues can be used to change this value. There is no sysfs parameter +available as the allocation is done at module init time. + +The output of the file /proc/net/bonding/bondX has changed so the output Queue +ID is now printed for each slave:: + + Bonding Mode: fault-tolerance (active-backup) + Primary Slave: None + Currently Active Slave: eth0 + MII Status: up + MII Polling Interval (ms): 0 + Up Delay (ms): 0 + Down Delay (ms): 0 + + Slave Interface: eth0 + MII Status: up + Link Failure Count: 0 + Permanent HW addr: 00:1a:a0:12:8f:cb + Slave queue ID: 0 + + Slave Interface: eth1 + MII Status: up + Link Failure Count: 0 + Permanent HW addr: 00:1a:a0:12:8f:cc + Slave queue ID: 2 + +The queue_id for a slave can be set using the command:: + + # echo "eth1:2" > /sys/class/net/bond0/bonding/queue_id + +Any interface that needs a queue_id set should set it with multiple calls +like the one above until proper priorities are set for all interfaces. On +distributions that allow configuration via initscripts, multiple 'queue_id' +arguments can be added to BONDING_OPTS to set all needed slave queues. + +These queue id's can be used in conjunction with the tc utility to configure +a multiqueue qdisc and filters to bias certain traffic to transmit on certain +slave devices. For instance, say we wanted, in the above configuration to +force all traffic bound to 192.168.1.100 to use eth1 in the bond as its output +device. The following commands would accomplish this:: + + # tc qdisc add dev bond0 handle 1 root multiq + + # tc filter add dev bond0 protocol ip parent 1: prio 1 u32 match ip \ + dst 192.168.1.100 action skbedit queue_mapping 2 + +These commands tell the kernel to attach a multiqueue queue discipline to the +bond0 interface and filter traffic enqueued to it, such that packets with a dst +ip of 192.168.1.100 have their output queue mapping value overwritten to 2. +This value is then passed into the driver, causing the normal output path +selection policy to be overridden, selecting instead qid 2, which maps to eth1. + +Note that qid values begin at 1. Qid 0 is reserved to initiate to the driver +that normal output policy selection should take place. One benefit to simply +leaving the qid for a slave to 0 is the multiqueue awareness in the bonding +driver that is now present. This awareness allows tc filters to be placed on +slave devices as well as bond devices and the bonding driver will simply act as +a pass-through for selecting output queues on the slave device rather than +output port selection. + +This feature first appeared in bonding driver version 3.7.0 and support for +output slave selection was limited to round-robin and active-backup modes. + +3.7 Configuring LACP for 802.3ad mode in a more secure way +---------------------------------------------------------- + +When using 802.3ad bonding mode, the Actor (host) and Partner (switch) +exchange LACPDUs. These LACPDUs cannot be sniffed, because they are +destined to link local mac addresses (which switches/bridges are not +supposed to forward). However, most of the values are easily predictable +or are simply the machine's MAC address (which is trivially known to all +other hosts in the same L2). This implies that other machines in the L2 +domain can spoof LACPDU packets from other hosts to the switch and potentially +cause mayhem by joining (from the point of view of the switch) another +machine's aggregate, thus receiving a portion of that hosts incoming +traffic and / or spoofing traffic from that machine themselves (potentially +even successfully terminating some portion of flows). Though this is not +a likely scenario, one could avoid this possibility by simply configuring +few bonding parameters: + + (a) ad_actor_system : You can set a random mac-address that can be used for + these LACPDU exchanges. The value can not be either NULL or Multicast. + Also it's preferable to set the local-admin bit. Following shell code + generates a random mac-address as described above:: + + # sys_mac_addr=$(printf '%02x:%02x:%02x:%02x:%02x:%02x' \ + $(( (RANDOM & 0xFE) | 0x02 )) \ + $(( RANDOM & 0xFF )) \ + $(( RANDOM & 0xFF )) \ + $(( RANDOM & 0xFF )) \ + $(( RANDOM & 0xFF )) \ + $(( RANDOM & 0xFF ))) + # echo $sys_mac_addr > /sys/class/net/bond0/bonding/ad_actor_system + + (b) ad_actor_sys_prio : Randomize the system priority. The default value + is 65535, but system can take the value from 1 - 65535. Following shell + code generates random priority and sets it:: + + # sys_prio=$(( 1 + RANDOM + RANDOM )) + # echo $sys_prio > /sys/class/net/bond0/bonding/ad_actor_sys_prio + + (c) ad_user_port_key : Use the user portion of the port-key. The default + keeps this empty. These are the upper 10 bits of the port-key and value + ranges from 0 - 1023. Following shell code generates these 10 bits and + sets it:: + + # usr_port_key=$(( RANDOM & 0x3FF )) + # echo $usr_port_key > /sys/class/net/bond0/bonding/ad_user_port_key + + +4 Querying Bonding Configuration +================================= + +4.1 Bonding Configuration +------------------------- + +Each bonding device has a read-only file residing in the +/proc/net/bonding directory. The file contents include information +about the bonding configuration, options and state of each slave. + +For example, the contents of /proc/net/bonding/bond0 after the +driver is loaded with parameters of mode=0 and miimon=1000 is +generally as follows:: + + Ethernet Channel Bonding Driver: 2.6.1 (October 29, 2004) + Bonding Mode: load balancing (round-robin) + Currently Active Slave: eth0 + MII Status: up + MII Polling Interval (ms): 1000 + Up Delay (ms): 0 + Down Delay (ms): 0 + + Slave Interface: eth1 + MII Status: up + Link Failure Count: 1 + + Slave Interface: eth0 + MII Status: up + Link Failure Count: 1 + +The precise format and contents will change depending upon the +bonding configuration, state, and version of the bonding driver. + +4.2 Network configuration +------------------------- + +The network configuration can be inspected using the ifconfig +command. Bonding devices will have the MASTER flag set; Bonding slave +devices will have the SLAVE flag set. The ifconfig output does not +contain information on which slaves are associated with which masters. + +In the example below, the bond0 interface is the master +(MASTER) while eth0 and eth1 are slaves (SLAVE). Notice all slaves of +bond0 have the same MAC address (HWaddr) as bond0 for all modes except +TLB and ALB that require a unique MAC address for each slave:: + + # /sbin/ifconfig + bond0 Link encap:Ethernet HWaddr 00:C0:F0:1F:37:B4 + inet addr:XXX.XXX.XXX.YYY Bcast:XXX.XXX.XXX.255 Mask:255.255.252.0 + UP BROADCAST RUNNING MASTER MULTICAST MTU:1500 Metric:1 + RX packets:7224794 errors:0 dropped:0 overruns:0 frame:0 + TX packets:3286647 errors:1 dropped:0 overruns:1 carrier:0 + collisions:0 txqueuelen:0 + + eth0 Link encap:Ethernet HWaddr 00:C0:F0:1F:37:B4 + UP BROADCAST RUNNING SLAVE MULTICAST MTU:1500 Metric:1 + RX packets:3573025 errors:0 dropped:0 overruns:0 frame:0 + TX packets:1643167 errors:1 dropped:0 overruns:1 carrier:0 + collisions:0 txqueuelen:100 + Interrupt:10 Base address:0x1080 + + eth1 Link encap:Ethernet HWaddr 00:C0:F0:1F:37:B4 + UP BROADCAST RUNNING SLAVE MULTICAST MTU:1500 Metric:1 + RX packets:3651769 errors:0 dropped:0 overruns:0 frame:0 + TX packets:1643480 errors:0 dropped:0 overruns:0 carrier:0 + collisions:0 txqueuelen:100 + Interrupt:9 Base address:0x1400 + +5. Switch Configuration +======================= + +For this section, "switch" refers to whatever system the +bonded devices are directly connected to (i.e., where the other end of +the cable plugs into). This may be an actual dedicated switch device, +or it may be another regular system (e.g., another computer running +Linux), + +The active-backup, balance-tlb and balance-alb modes do not +require any specific configuration of the switch. + +The 802.3ad mode requires that the switch have the appropriate +ports configured as an 802.3ad aggregation. The precise method used +to configure this varies from switch to switch, but, for example, a +Cisco 3550 series switch requires that the appropriate ports first be +grouped together in a single etherchannel instance, then that +etherchannel is set to mode "lacp" to enable 802.3ad (instead of +standard EtherChannel). + +The balance-rr, balance-xor and broadcast modes generally +require that the switch have the appropriate ports grouped together. +The nomenclature for such a group differs between switches, it may be +called an "etherchannel" (as in the Cisco example, above), a "trunk +group" or some other similar variation. For these modes, each switch +will also have its own configuration options for the switch's transmit +policy to the bond. Typical choices include XOR of either the MAC or +IP addresses. The transmit policy of the two peers does not need to +match. For these three modes, the bonding mode really selects a +transmit policy for an EtherChannel group; all three will interoperate +with another EtherChannel group. + + +6. 802.1q VLAN Support +====================== + +It is possible to configure VLAN devices over a bond interface +using the 8021q driver. However, only packets coming from the 8021q +driver and passing through bonding will be tagged by default. Self +generated packets, for example, bonding's learning packets or ARP +packets generated by either ALB mode or the ARP monitor mechanism, are +tagged internally by bonding itself. As a result, bonding must +"learn" the VLAN IDs configured above it, and use those IDs to tag +self generated packets. + +For reasons of simplicity, and to support the use of adapters +that can do VLAN hardware acceleration offloading, the bonding +interface declares itself as fully hardware offloading capable, it gets +the add_vid/kill_vid notifications to gather the necessary +information, and it propagates those actions to the slaves. In case +of mixed adapter types, hardware accelerated tagged packets that +should go through an adapter that is not offloading capable are +"un-accelerated" by the bonding driver so the VLAN tag sits in the +regular location. + +VLAN interfaces *must* be added on top of a bonding interface +only after enslaving at least one slave. The bonding interface has a +hardware address of 00:00:00:00:00:00 until the first slave is added. +If the VLAN interface is created prior to the first enslavement, it +would pick up the all-zeroes hardware address. Once the first slave +is attached to the bond, the bond device itself will pick up the +slave's hardware address, which is then available for the VLAN device. + +Also, be aware that a similar problem can occur if all slaves +are released from a bond that still has one or more VLAN interfaces on +top of it. When a new slave is added, the bonding interface will +obtain its hardware address from the first slave, which might not +match the hardware address of the VLAN interfaces (which was +ultimately copied from an earlier slave). + +There are two methods to insure that the VLAN device operates +with the correct hardware address if all slaves are removed from a +bond interface: + +1. Remove all VLAN interfaces then recreate them + +2. Set the bonding interface's hardware address so that it +matches the hardware address of the VLAN interfaces. + +Note that changing a VLAN interface's HW address would set the +underlying device -- i.e. the bonding interface -- to promiscuous +mode, which might not be what you want. + + +7. Link Monitoring +================== + +The bonding driver at present supports two schemes for +monitoring a slave device's link state: the ARP monitor and the MII +monitor. + +At the present time, due to implementation restrictions in the +bonding driver itself, it is not possible to enable both ARP and MII +monitoring simultaneously. + +7.1 ARP Monitor Operation +------------------------- + +The ARP monitor operates as its name suggests: it sends ARP +queries to one or more designated peer systems on the network, and +uses the response as an indication that the link is operating. This +gives some assurance that traffic is actually flowing to and from one +or more peers on the local network. + +The ARP monitor relies on the device driver itself to verify +that traffic is flowing. In particular, the driver must keep up to +date the last receive time, dev->last_rx. Drivers that use NETIF_F_LLTX +flag must also update netdev_queue->trans_start. If they do not, then the +ARP monitor will immediately fail any slaves using that driver, and +those slaves will stay down. If networking monitoring (tcpdump, etc) +shows the ARP requests and replies on the network, then it may be that +your device driver is not updating last_rx and trans_start. + +7.2 Configuring Multiple ARP Targets +------------------------------------ + +While ARP monitoring can be done with just one target, it can +be useful in a High Availability setup to have several targets to +monitor. In the case of just one target, the target itself may go +down or have a problem making it unresponsive to ARP requests. Having +an additional target (or several) increases the reliability of the ARP +monitoring. + +Multiple ARP targets must be separated by commas as follows:: + + # example options for ARP monitoring with three targets + alias bond0 bonding + options bond0 arp_interval=60 arp_ip_target=192.168.0.1,192.168.0.3,192.168.0.9 + +For just a single target the options would resemble:: + + # example options for ARP monitoring with one target + alias bond0 bonding + options bond0 arp_interval=60 arp_ip_target=192.168.0.100 + + +7.3 MII Monitor Operation +------------------------- + +The MII monitor monitors only the carrier state of the local +network interface. It accomplishes this in one of three ways: by +depending upon the device driver to maintain its carrier state, by +querying the device's MII registers, or by making an ethtool query to +the device. + +If the use_carrier module parameter is 1 (the default value), +then the MII monitor will rely on the driver for carrier state +information (via the netif_carrier subsystem). As explained in the +use_carrier parameter information, above, if the MII monitor fails to +detect carrier loss on the device (e.g., when the cable is physically +disconnected), it may be that the driver does not support +netif_carrier. + +If use_carrier is 0, then the MII monitor will first query the +device's (via ioctl) MII registers and check the link state. If that +request fails (not just that it returns carrier down), then the MII +monitor will make an ethtool ETHOOL_GLINK request to attempt to obtain +the same information. If both methods fail (i.e., the driver either +does not support or had some error in processing both the MII register +and ethtool requests), then the MII monitor will assume the link is +up. + +8. Potential Sources of Trouble +=============================== + +8.1 Adventures in Routing +------------------------- + +When bonding is configured, it is important that the slave +devices not have routes that supersede routes of the master (or, +generally, not have routes at all). For example, suppose the bonding +device bond0 has two slaves, eth0 and eth1, and the routing table is +as follows:: + + Kernel IP routing table + Destination Gateway Genmask Flags MSS Window irtt Iface + 10.0.0.0 0.0.0.0 255.255.0.0 U 40 0 0 eth0 + 10.0.0.0 0.0.0.0 255.255.0.0 U 40 0 0 eth1 + 10.0.0.0 0.0.0.0 255.255.0.0 U 40 0 0 bond0 + 127.0.0.0 0.0.0.0 255.0.0.0 U 40 0 0 lo + +This routing configuration will likely still update the +receive/transmit times in the driver (needed by the ARP monitor), but +may bypass the bonding driver (because outgoing traffic to, in this +case, another host on network 10 would use eth0 or eth1 before bond0). + +The ARP monitor (and ARP itself) may become confused by this +configuration, because ARP requests (generated by the ARP monitor) +will be sent on one interface (bond0), but the corresponding reply +will arrive on a different interface (eth0). This reply looks to ARP +as an unsolicited ARP reply (because ARP matches replies on an +interface basis), and is discarded. The MII monitor is not affected +by the state of the routing table. + +The solution here is simply to insure that slaves do not have +routes of their own, and if for some reason they must, those routes do +not supersede routes of their master. This should generally be the +case, but unusual configurations or errant manual or automatic static +route additions may cause trouble. + +8.2 Ethernet Device Renaming +---------------------------- + +On systems with network configuration scripts that do not +associate physical devices directly with network interface names (so +that the same physical device always has the same "ethX" name), it may +be necessary to add some special logic to config files in +/etc/modprobe.d/. + +For example, given a modules.conf containing the following:: + + alias bond0 bonding + options bond0 mode=some-mode miimon=50 + alias eth0 tg3 + alias eth1 tg3 + alias eth2 e1000 + alias eth3 e1000 + +If neither eth0 and eth1 are slaves to bond0, then when the +bond0 interface comes up, the devices may end up reordered. This +happens because bonding is loaded first, then its slave device's +drivers are loaded next. Since no other drivers have been loaded, +when the e1000 driver loads, it will receive eth0 and eth1 for its +devices, but the bonding configuration tries to enslave eth2 and eth3 +(which may later be assigned to the tg3 devices). + +Adding the following:: + + add above bonding e1000 tg3 + +causes modprobe to load e1000 then tg3, in that order, when +bonding is loaded. This command is fully documented in the +modules.conf manual page. + +On systems utilizing modprobe an equivalent problem can occur. +In this case, the following can be added to config files in +/etc/modprobe.d/ as:: + + softdep bonding pre: tg3 e1000 + +This will load tg3 and e1000 modules before loading the bonding one. +Full documentation on this can be found in the modprobe.d and modprobe +manual pages. + +8.3. Painfully Slow Or No Failed Link Detection By Miimon +--------------------------------------------------------- + +By default, bonding enables the use_carrier option, which +instructs bonding to trust the driver to maintain carrier state. + +As discussed in the options section, above, some drivers do +not support the netif_carrier_on/_off link state tracking system. +With use_carrier enabled, bonding will always see these links as up, +regardless of their actual state. + +Additionally, other drivers do support netif_carrier, but do +not maintain it in real time, e.g., only polling the link state at +some fixed interval. In this case, miimon will detect failures, but +only after some long period of time has expired. If it appears that +miimon is very slow in detecting link failures, try specifying +use_carrier=0 to see if that improves the failure detection time. If +it does, then it may be that the driver checks the carrier state at a +fixed interval, but does not cache the MII register values (so the +use_carrier=0 method of querying the registers directly works). If +use_carrier=0 does not improve the failover, then the driver may cache +the registers, or the problem may be elsewhere. + +Also, remember that miimon only checks for the device's +carrier state. It has no way to determine the state of devices on or +beyond other ports of a switch, or if a switch is refusing to pass +traffic while still maintaining carrier on. + +9. SNMP agents +=============== + +If running SNMP agents, the bonding driver should be loaded +before any network drivers participating in a bond. This requirement +is due to the interface index (ipAdEntIfIndex) being associated to +the first interface found with a given IP address. That is, there is +only one ipAdEntIfIndex for each IP address. For example, if eth0 and +eth1 are slaves of bond0 and the driver for eth0 is loaded before the +bonding driver, the interface for the IP address will be associated +with the eth0 interface. This configuration is shown below, the IP +address 192.168.1.1 has an interface index of 2 which indexes to eth0 +in the ifDescr table (ifDescr.2). + +:: + + interfaces.ifTable.ifEntry.ifDescr.1 = lo + interfaces.ifTable.ifEntry.ifDescr.2 = eth0 + interfaces.ifTable.ifEntry.ifDescr.3 = eth1 + interfaces.ifTable.ifEntry.ifDescr.4 = eth2 + interfaces.ifTable.ifEntry.ifDescr.5 = eth3 + interfaces.ifTable.ifEntry.ifDescr.6 = bond0 + ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.10.10.10.10 = 5 + ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.192.168.1.1 = 2 + ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.10.74.20.94 = 4 + ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.127.0.0.1 = 1 + +This problem is avoided by loading the bonding driver before +any network drivers participating in a bond. Below is an example of +loading the bonding driver first, the IP address 192.168.1.1 is +correctly associated with ifDescr.2. + + interfaces.ifTable.ifEntry.ifDescr.1 = lo + interfaces.ifTable.ifEntry.ifDescr.2 = bond0 + interfaces.ifTable.ifEntry.ifDescr.3 = eth0 + interfaces.ifTable.ifEntry.ifDescr.4 = eth1 + interfaces.ifTable.ifEntry.ifDescr.5 = eth2 + interfaces.ifTable.ifEntry.ifDescr.6 = eth3 + ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.10.10.10.10 = 6 + ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.192.168.1.1 = 2 + ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.10.74.20.94 = 5 + ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.127.0.0.1 = 1 + +While some distributions may not report the interface name in +ifDescr, the association between the IP address and IfIndex remains +and SNMP functions such as Interface_Scan_Next will report that +association. + +10. Promiscuous mode +==================== + +When running network monitoring tools, e.g., tcpdump, it is +common to enable promiscuous mode on the device, so that all traffic +is seen (instead of seeing only traffic destined for the local host). +The bonding driver handles promiscuous mode changes to the bonding +master device (e.g., bond0), and propagates the setting to the slave +devices. + +For the balance-rr, balance-xor, broadcast, and 802.3ad modes, +the promiscuous mode setting is propagated to all slaves. + +For the active-backup, balance-tlb and balance-alb modes, the +promiscuous mode setting is propagated only to the active slave. + +For balance-tlb mode, the active slave is the slave currently +receiving inbound traffic. + +For balance-alb mode, the active slave is the slave used as a +"primary." This slave is used for mode-specific control traffic, for +sending to peers that are unassigned or if the load is unbalanced. + +For the active-backup, balance-tlb and balance-alb modes, when +the active slave changes (e.g., due to a link failure), the +promiscuous setting will be propagated to the new active slave. + +11. Configuring Bonding for High Availability +============================================= + +High Availability refers to configurations that provide +maximum network availability by having redundant or backup devices, +links or switches between the host and the rest of the world. The +goal is to provide the maximum availability of network connectivity +(i.e., the network always works), even though other configurations +could provide higher throughput. + +11.1 High Availability in a Single Switch Topology +-------------------------------------------------- + +If two hosts (or a host and a single switch) are directly +connected via multiple physical links, then there is no availability +penalty to optimizing for maximum bandwidth. In this case, there is +only one switch (or peer), so if it fails, there is no alternative +access to fail over to. Additionally, the bonding load balance modes +support link monitoring of their members, so if individual links fail, +the load will be rebalanced across the remaining devices. + +See Section 12, "Configuring Bonding for Maximum Throughput" +for information on configuring bonding with one peer device. + +11.2 High Availability in a Multiple Switch Topology +---------------------------------------------------- + +With multiple switches, the configuration of bonding and the +network changes dramatically. In multiple switch topologies, there is +a trade off between network availability and usable bandwidth. + +Below is a sample network, configured to maximize the +availability of the network:: + + | | + |port3 port3| + +-----+----+ +-----+----+ + | |port2 ISL port2| | + | switch A +--------------------------+ switch B | + | | | | + +-----+----+ +-----++---+ + |port1 port1| + | +-------+ | + +-------------+ host1 +---------------+ + eth0 +-------+ eth1 + +In this configuration, there is a link between the two +switches (ISL, or inter switch link), and multiple ports connecting to +the outside world ("port3" on each switch). There is no technical +reason that this could not be extended to a third switch. + +11.2.1 HA Bonding Mode Selection for Multiple Switch Topology +------------------------------------------------------------- + +In a topology such as the example above, the active-backup and +broadcast modes are the only useful bonding modes when optimizing for +availability; the other modes require all links to terminate on the +same peer for them to behave rationally. + +active-backup: + This is generally the preferred mode, particularly if + the switches have an ISL and play together well. If the + network configuration is such that one switch is specifically + a backup switch (e.g., has lower capacity, higher cost, etc), + then the primary option can be used to insure that the + preferred link is always used when it is available. + +broadcast: + This mode is really a special purpose mode, and is suitable + only for very specific needs. For example, if the two + switches are not connected (no ISL), and the networks beyond + them are totally independent. In this case, if it is + necessary for some specific one-way traffic to reach both + independent networks, then the broadcast mode may be suitable. + +11.2.2 HA Link Monitoring Selection for Multiple Switch Topology +---------------------------------------------------------------- + +The choice of link monitoring ultimately depends upon your +switch. If the switch can reliably fail ports in response to other +failures, then either the MII or ARP monitors should work. For +example, in the above example, if the "port3" link fails at the remote +end, the MII monitor has no direct means to detect this. The ARP +monitor could be configured with a target at the remote end of port3, +thus detecting that failure without switch support. + +In general, however, in a multiple switch topology, the ARP +monitor can provide a higher level of reliability in detecting end to +end connectivity failures (which may be caused by the failure of any +individual component to pass traffic for any reason). Additionally, +the ARP monitor should be configured with multiple targets (at least +one for each switch in the network). This will insure that, +regardless of which switch is active, the ARP monitor has a suitable +target to query. + +Note, also, that of late many switches now support a functionality +generally referred to as "trunk failover." This is a feature of the +switch that causes the link state of a particular switch port to be set +down (or up) when the state of another switch port goes down (or up). +Its purpose is to propagate link failures from logically "exterior" ports +to the logically "interior" ports that bonding is able to monitor via +miimon. Availability and configuration for trunk failover varies by +switch, but this can be a viable alternative to the ARP monitor when using +suitable switches. + +12. Configuring Bonding for Maximum Throughput +============================================== + +12.1 Maximizing Throughput in a Single Switch Topology +------------------------------------------------------ + +In a single switch configuration, the best method to maximize +throughput depends upon the application and network environment. The +various load balancing modes each have strengths and weaknesses in +different environments, as detailed below. + +For this discussion, we will break down the topologies into +two categories. Depending upon the destination of most traffic, we +categorize them into either "gatewayed" or "local" configurations. + +In a gatewayed configuration, the "switch" is acting primarily +as a router, and the majority of traffic passes through this router to +other networks. An example would be the following:: + + + +----------+ +----------+ + | |eth0 port1| | to other networks + | Host A +---------------------+ router +-------------------> + | +---------------------+ | Hosts B and C are out + | |eth1 port2| | here somewhere + +----------+ +----------+ + +The router may be a dedicated router device, or another host +acting as a gateway. For our discussion, the important point is that +the majority of traffic from Host A will pass through the router to +some other network before reaching its final destination. + +In a gatewayed network configuration, although Host A may +communicate with many other systems, all of its traffic will be sent +and received via one other peer on the local network, the router. + +Note that the case of two systems connected directly via +multiple physical links is, for purposes of configuring bonding, the +same as a gatewayed configuration. In that case, it happens that all +traffic is destined for the "gateway" itself, not some other network +beyond the gateway. + +In a local configuration, the "switch" is acting primarily as +a switch, and the majority of traffic passes through this switch to +reach other stations on the same network. An example would be the +following:: + + +----------+ +----------+ +--------+ + | |eth0 port1| +-------+ Host B | + | Host A +------------+ switch |port3 +--------+ + | +------------+ | +--------+ + | |eth1 port2| +------------------+ Host C | + +----------+ +----------+port4 +--------+ + + +Again, the switch may be a dedicated switch device, or another +host acting as a gateway. For our discussion, the important point is +that the majority of traffic from Host A is destined for other hosts +on the same local network (Hosts B and C in the above example). + +In summary, in a gatewayed configuration, traffic to and from +the bonded device will be to the same MAC level peer on the network +(the gateway itself, i.e., the router), regardless of its final +destination. In a local configuration, traffic flows directly to and +from the final destinations, thus, each destination (Host B, Host C) +will be addressed directly by their individual MAC addresses. + +This distinction between a gatewayed and a local network +configuration is important because many of the load balancing modes +available use the MAC addresses of the local network source and +destination to make load balancing decisions. The behavior of each +mode is described below. + + +12.1.1 MT Bonding Mode Selection for Single Switch Topology +----------------------------------------------------------- + +This configuration is the easiest to set up and to understand, +although you will have to decide which bonding mode best suits your +needs. The trade offs for each mode are detailed below: + +balance-rr: + This mode is the only mode that will permit a single + TCP/IP connection to stripe traffic across multiple + interfaces. It is therefore the only mode that will allow a + single TCP/IP stream to utilize more than one interface's + worth of throughput. This comes at a cost, however: the + striping generally results in peer systems receiving packets out + of order, causing TCP/IP's congestion control system to kick + in, often by retransmitting segments. + + It is possible to adjust TCP/IP's congestion limits by + altering the net.ipv4.tcp_reordering sysctl parameter. The + usual default value is 3. But keep in mind TCP stack is able + to automatically increase this when it detects reorders. + + Note that the fraction of packets that will be delivered out of + order is highly variable, and is unlikely to be zero. The level + of reordering depends upon a variety of factors, including the + networking interfaces, the switch, and the topology of the + configuration. Speaking in general terms, higher speed network + cards produce more reordering (due to factors such as packet + coalescing), and a "many to many" topology will reorder at a + higher rate than a "many slow to one fast" configuration. + + Many switches do not support any modes that stripe traffic + (instead choosing a port based upon IP or MAC level addresses); + for those devices, traffic for a particular connection flowing + through the switch to a balance-rr bond will not utilize greater + than one interface's worth of bandwidth. + + If you are utilizing protocols other than TCP/IP, UDP for + example, and your application can tolerate out of order + delivery, then this mode can allow for single stream datagram + performance that scales near linearly as interfaces are added + to the bond. + + This mode requires the switch to have the appropriate ports + configured for "etherchannel" or "trunking." + +active-backup: + There is not much advantage in this network topology to + the active-backup mode, as the inactive backup devices are all + connected to the same peer as the primary. In this case, a + load balancing mode (with link monitoring) will provide the + same level of network availability, but with increased + available bandwidth. On the plus side, active-backup mode + does not require any configuration of the switch, so it may + have value if the hardware available does not support any of + the load balance modes. + +balance-xor: + This mode will limit traffic such that packets destined + for specific peers will always be sent over the same + interface. Since the destination is determined by the MAC + addresses involved, this mode works best in a "local" network + configuration (as described above), with destinations all on + the same local network. This mode is likely to be suboptimal + if all your traffic is passed through a single router (i.e., a + "gatewayed" network configuration, as described above). + + As with balance-rr, the switch ports need to be configured for + "etherchannel" or "trunking." + +broadcast: + Like active-backup, there is not much advantage to this + mode in this type of network topology. + +802.3ad: + This mode can be a good choice for this type of network + topology. The 802.3ad mode is an IEEE standard, so all peers + that implement 802.3ad should interoperate well. The 802.3ad + protocol includes automatic configuration of the aggregates, + so minimal manual configuration of the switch is needed + (typically only to designate that some set of devices is + available for 802.3ad). The 802.3ad standard also mandates + that frames be delivered in order (within certain limits), so + in general single connections will not see misordering of + packets. The 802.3ad mode does have some drawbacks: the + standard mandates that all devices in the aggregate operate at + the same speed and duplex. Also, as with all bonding load + balance modes other than balance-rr, no single connection will + be able to utilize more than a single interface's worth of + bandwidth. + + Additionally, the linux bonding 802.3ad implementation + distributes traffic by peer (using an XOR of MAC addresses + and packet type ID), so in a "gatewayed" configuration, all + outgoing traffic will generally use the same device. Incoming + traffic may also end up on a single device, but that is + dependent upon the balancing policy of the peer's 802.3ad + implementation. In a "local" configuration, traffic will be + distributed across the devices in the bond. + + Finally, the 802.3ad mode mandates the use of the MII monitor, + therefore, the ARP monitor is not available in this mode. + +balance-tlb: + The balance-tlb mode balances outgoing traffic by peer. + Since the balancing is done according to MAC address, in a + "gatewayed" configuration (as described above), this mode will + send all traffic across a single device. However, in a + "local" network configuration, this mode balances multiple + local network peers across devices in a vaguely intelligent + manner (not a simple XOR as in balance-xor or 802.3ad mode), + so that mathematically unlucky MAC addresses (i.e., ones that + XOR to the same value) will not all "bunch up" on a single + interface. + + Unlike 802.3ad, interfaces may be of differing speeds, and no + special switch configuration is required. On the down side, + in this mode all incoming traffic arrives over a single + interface, this mode requires certain ethtool support in the + network device driver of the slave interfaces, and the ARP + monitor is not available. + +balance-alb: + This mode is everything that balance-tlb is, and more. + It has all of the features (and restrictions) of balance-tlb, + and will also balance incoming traffic from local network + peers (as described in the Bonding Module Options section, + above). + + The only additional down side to this mode is that the network + device driver must support changing the hardware address while + the device is open. + +12.1.2 MT Link Monitoring for Single Switch Topology +---------------------------------------------------- + +The choice of link monitoring may largely depend upon which +mode you choose to use. The more advanced load balancing modes do not +support the use of the ARP monitor, and are thus restricted to using +the MII monitor (which does not provide as high a level of end to end +assurance as the ARP monitor). + +12.2 Maximum Throughput in a Multiple Switch Topology +----------------------------------------------------- + +Multiple switches may be utilized to optimize for throughput +when they are configured in parallel as part of an isolated network +between two or more systems, for example:: + + +-----------+ + | Host A | + +-+---+---+-+ + | | | + +--------+ | +---------+ + | | | + +------+---+ +-----+----+ +-----+----+ + | Switch A | | Switch B | | Switch C | + +------+---+ +-----+----+ +-----+----+ + | | | + +--------+ | +---------+ + | | | + +-+---+---+-+ + | Host B | + +-----------+ + +In this configuration, the switches are isolated from one +another. One reason to employ a topology such as this is for an +isolated network with many hosts (a cluster configured for high +performance, for example), using multiple smaller switches can be more +cost effective than a single larger switch, e.g., on a network with 24 +hosts, three 24 port switches can be significantly less expensive than +a single 72 port switch. + +If access beyond the network is required, an individual host +can be equipped with an additional network device connected to an +external network; this host then additionally acts as a gateway. + +12.2.1 MT Bonding Mode Selection for Multiple Switch Topology +------------------------------------------------------------- + +In actual practice, the bonding mode typically employed in +configurations of this type is balance-rr. Historically, in this +network configuration, the usual caveats about out of order packet +delivery are mitigated by the use of network adapters that do not do +any kind of packet coalescing (via the use of NAPI, or because the +device itself does not generate interrupts until some number of +packets has arrived). When employed in this fashion, the balance-rr +mode allows individual connections between two hosts to effectively +utilize greater than one interface's bandwidth. + +12.2.2 MT Link Monitoring for Multiple Switch Topology +------------------------------------------------------ + +Again, in actual practice, the MII monitor is most often used +in this configuration, as performance is given preference over +availability. The ARP monitor will function in this topology, but its +advantages over the MII monitor are mitigated by the volume of probes +needed as the number of systems involved grows (remember that each +host in the network is configured with bonding). + +13. Switch Behavior Issues +========================== + +13.1 Link Establishment and Failover Delays +------------------------------------------- + +Some switches exhibit undesirable behavior with regard to the +timing of link up and down reporting by the switch. + +First, when a link comes up, some switches may indicate that +the link is up (carrier available), but not pass traffic over the +interface for some period of time. This delay is typically due to +some type of autonegotiation or routing protocol, but may also occur +during switch initialization (e.g., during recovery after a switch +failure). If you find this to be a problem, specify an appropriate +value to the updelay bonding module option to delay the use of the +relevant interface(s). + +Second, some switches may "bounce" the link state one or more +times while a link is changing state. This occurs most commonly while +the switch is initializing. Again, an appropriate updelay value may +help. + +Note that when a bonding interface has no active links, the +driver will immediately reuse the first link that goes up, even if the +updelay parameter has been specified (the updelay is ignored in this +case). If there are slave interfaces waiting for the updelay timeout +to expire, the interface that first went into that state will be +immediately reused. This reduces down time of the network if the +value of updelay has been overestimated, and since this occurs only in +cases with no connectivity, there is no additional penalty for +ignoring the updelay. + +In addition to the concerns about switch timings, if your +switches take a long time to go into backup mode, it may be desirable +to not activate a backup interface immediately after a link goes down. +Failover may be delayed via the downdelay bonding module option. + +13.2 Duplicated Incoming Packets +-------------------------------- + +NOTE: Starting with version 3.0.2, the bonding driver has logic to +suppress duplicate packets, which should largely eliminate this problem. +The following description is kept for reference. + +It is not uncommon to observe a short burst of duplicated +traffic when the bonding device is first used, or after it has been +idle for some period of time. This is most easily observed by issuing +a "ping" to some other host on the network, and noticing that the +output from ping flags duplicates (typically one per slave). + +For example, on a bond in active-backup mode with five slaves +all connected to one switch, the output may appear as follows:: + + # ping -n 10.0.4.2 + PING 10.0.4.2 (10.0.4.2) from 10.0.3.10 : 56(84) bytes of data. + 64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.7 ms + 64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!) + 64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!) + 64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!) + 64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!) + 64 bytes from 10.0.4.2: icmp_seq=2 ttl=64 time=0.216 ms + 64 bytes from 10.0.4.2: icmp_seq=3 ttl=64 time=0.267 ms + 64 bytes from 10.0.4.2: icmp_seq=4 ttl=64 time=0.222 ms + +This is not due to an error in the bonding driver, rather, it +is a side effect of how many switches update their MAC forwarding +tables. Initially, the switch does not associate the MAC address in +the packet with a particular switch port, and so it may send the +traffic to all ports until its MAC forwarding table is updated. Since +the interfaces attached to the bond may occupy multiple ports on a +single switch, when the switch (temporarily) floods the traffic to all +ports, the bond device receives multiple copies of the same packet +(one per slave device). + +The duplicated packet behavior is switch dependent, some +switches exhibit this, and some do not. On switches that display this +behavior, it can be induced by clearing the MAC forwarding table (on +most Cisco switches, the privileged command "clear mac address-table +dynamic" will accomplish this). + +14. Hardware Specific Considerations +==================================== + +This section contains additional information for configuring +bonding on specific hardware platforms, or for interfacing bonding +with particular switches or other devices. + +14.1 IBM BladeCenter +-------------------- + +This applies to the JS20 and similar systems. + +On the JS20 blades, the bonding driver supports only +balance-rr, active-backup, balance-tlb and balance-alb modes. This is +largely due to the network topology inside the BladeCenter, detailed +below. + +JS20 network adapter information +-------------------------------- + +All JS20s come with two Broadcom Gigabit Ethernet ports +integrated on the planar (that's "motherboard" in IBM-speak). In the +BladeCenter chassis, the eth0 port of all JS20 blades is hard wired to +I/O Module #1; similarly, all eth1 ports are wired to I/O Module #2. +An add-on Broadcom daughter card can be installed on a JS20 to provide +two more Gigabit Ethernet ports. These ports, eth2 and eth3, are +wired to I/O Modules 3 and 4, respectively. + +Each I/O Module may contain either a switch or a passthrough +module (which allows ports to be directly connected to an external +switch). Some bonding modes require a specific BladeCenter internal +network topology in order to function; these are detailed below. + +Additional BladeCenter-specific networking information can be +found in two IBM Redbooks (www.ibm.com/redbooks): + +- "IBM eServer BladeCenter Networking Options" +- "IBM eServer BladeCenter Layer 2-7 Network Switching" + +BladeCenter networking configuration +------------------------------------ + +Because a BladeCenter can be configured in a very large number +of ways, this discussion will be confined to describing basic +configurations. + +Normally, Ethernet Switch Modules (ESMs) are used in I/O +modules 1 and 2. In this configuration, the eth0 and eth1 ports of a +JS20 will be connected to different internal switches (in the +respective I/O modules). + +A passthrough module (OPM or CPM, optical or copper, +passthrough module) connects the I/O module directly to an external +switch. By using PMs in I/O module #1 and #2, the eth0 and eth1 +interfaces of a JS20 can be redirected to the outside world and +connected to a common external switch. + +Depending upon the mix of ESMs and PMs, the network will +appear to bonding as either a single switch topology (all PMs) or as a +multiple switch topology (one or more ESMs, zero or more PMs). It is +also possible to connect ESMs together, resulting in a configuration +much like the example in "High Availability in a Multiple Switch +Topology," above. + +Requirements for specific modes +------------------------------- + +The balance-rr mode requires the use of passthrough modules +for devices in the bond, all connected to an common external switch. +That switch must be configured for "etherchannel" or "trunking" on the +appropriate ports, as is usual for balance-rr. + +The balance-alb and balance-tlb modes will function with +either switch modules or passthrough modules (or a mix). The only +specific requirement for these modes is that all network interfaces +must be able to reach all destinations for traffic sent over the +bonding device (i.e., the network must converge at some point outside +the BladeCenter). + +The active-backup mode has no additional requirements. + +Link monitoring issues +---------------------- + +When an Ethernet Switch Module is in place, only the ARP +monitor will reliably detect link loss to an external switch. This is +nothing unusual, but examination of the BladeCenter cabinet would +suggest that the "external" network ports are the ethernet ports for +the system, when it fact there is a switch between these "external" +ports and the devices on the JS20 system itself. The MII monitor is +only able to detect link failures between the ESM and the JS20 system. + +When a passthrough module is in place, the MII monitor does +detect failures to the "external" port, which is then directly +connected to the JS20 system. + +Other concerns +-------------- + +The Serial Over LAN (SoL) link is established over the primary +ethernet (eth0) only, therefore, any loss of link to eth0 will result +in losing your SoL connection. It will not fail over with other +network traffic, as the SoL system is beyond the control of the +bonding driver. + +It may be desirable to disable spanning tree on the switch +(either the internal Ethernet Switch Module, or an external switch) to +avoid fail-over delay issues when using bonding. + + +15. Frequently Asked Questions +============================== + +1. Is it SMP safe? +------------------- + +Yes. The old 2.0.xx channel bonding patch was not SMP safe. +The new driver was designed to be SMP safe from the start. + +2. What type of cards will work with it? +----------------------------------------- + +Any Ethernet type cards (you can even mix cards - a Intel +EtherExpress PRO/100 and a 3com 3c905b, for example). For most modes, +devices need not be of the same speed. + +Starting with version 3.2.1, bonding also supports Infiniband +slaves in active-backup mode. + +3. How many bonding devices can I have? +---------------------------------------- + +There is no limit. + +4. How many slaves can a bonding device have? +---------------------------------------------- + +This is limited only by the number of network interfaces Linux +supports and/or the number of network cards you can place in your +system. + +5. What happens when a slave link dies? +---------------------------------------- + +If link monitoring is enabled, then the failing device will be +disabled. The active-backup mode will fail over to a backup link, and +other modes will ignore the failed link. The link will continue to be +monitored, and should it recover, it will rejoin the bond (in whatever +manner is appropriate for the mode). See the sections on High +Availability and the documentation for each mode for additional +information. + +Link monitoring can be enabled via either the miimon or +arp_interval parameters (described in the module parameters section, +above). In general, miimon monitors the carrier state as sensed by +the underlying network device, and the arp monitor (arp_interval) +monitors connectivity to another host on the local network. + +If no link monitoring is configured, the bonding driver will +be unable to detect link failures, and will assume that all links are +always available. This will likely result in lost packets, and a +resulting degradation of performance. The precise performance loss +depends upon the bonding mode and network configuration. + +6. Can bonding be used for High Availability? +---------------------------------------------- + +Yes. See the section on High Availability for details. + +7. Which switches/systems does it work with? +--------------------------------------------- + +The full answer to this depends upon the desired mode. + +In the basic balance modes (balance-rr and balance-xor), it +works with any system that supports etherchannel (also called +trunking). Most managed switches currently available have such +support, and many unmanaged switches as well. + +The advanced balance modes (balance-tlb and balance-alb) do +not have special switch requirements, but do need device drivers that +support specific features (described in the appropriate section under +module parameters, above). + +In 802.3ad mode, it works with systems that support IEEE +802.3ad Dynamic Link Aggregation. Most managed and many unmanaged +switches currently available support 802.3ad. + +The active-backup mode should work with any Layer-II switch. + +8. Where does a bonding device get its MAC address from? +--------------------------------------------------------- + +When using slave devices that have fixed MAC addresses, or when +the fail_over_mac option is enabled, the bonding device's MAC address is +the MAC address of the active slave. + +For other configurations, if not explicitly configured (with +ifconfig or ip link), the MAC address of the bonding device is taken from +its first slave device. This MAC address is then passed to all following +slaves and remains persistent (even if the first slave is removed) until +the bonding device is brought down or reconfigured. + +If you wish to change the MAC address, you can set it with +ifconfig or ip link:: + + # ifconfig bond0 hw ether 00:11:22:33:44:55 + + # ip link set bond0 address 66:77:88:99:aa:bb + +The MAC address can be also changed by bringing down/up the +device and then changing its slaves (or their order):: + + # ifconfig bond0 down ; modprobe -r bonding + # ifconfig bond0 .... up + # ifenslave bond0 eth... + +This method will automatically take the address from the next +slave that is added. + +To restore your slaves' MAC addresses, you need to detach them +from the bond (``ifenslave -d bond0 eth0``). The bonding driver will +then restore the MAC addresses that the slaves had before they were +enslaved. + +16. Resources and Links +======================= + +The latest version of the bonding driver can be found in the latest +version of the linux kernel, found on http://kernel.org + +The latest version of this document can be found in the latest kernel +source (named Documentation/networking/bonding.rst). + +Discussions regarding the development of the bonding driver take place +on the main Linux network mailing list, hosted at vger.kernel.org. The list +address is: + +netdev@vger.kernel.org + +The administrative interface (to subscribe or unsubscribe) can +be found at: + +http://vger.kernel.org/vger-lists.html#netdev diff --git a/Documentation/networking/bridge.rst b/Documentation/networking/bridge.rst new file mode 100644 index 000000000..4aef9cddd --- /dev/null +++ b/Documentation/networking/bridge.rst @@ -0,0 +1,21 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================= +Ethernet Bridging +================= + +In order to use the Ethernet bridging functionality, you'll need the +userspace tools. + +Documentation for Linux bridging is on: + http://www.linuxfoundation.org/collaborate/workgroups/networking/bridge + +The bridge-utilities are maintained at: + git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/bridge-utils.git + +Additionally, the iproute2 utilities can be used to configure +bridge devices. + +If you still have questions, don't hesitate to post to the mailing list +(more info https://lists.linux-foundation.org/mailman/listinfo/bridge). + diff --git a/Documentation/networking/caif/caif.rst b/Documentation/networking/caif/caif.rst new file mode 100644 index 000000000..a07213030 --- /dev/null +++ b/Documentation/networking/caif/caif.rst @@ -0,0 +1,139 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + + +================ +Using Linux CAIF +================ + + +:Copyright: |copy| ST-Ericsson AB 2010 + +:Author: Sjur Brendeland/ sjur.brandeland@stericsson.com + +Start +===== + +If you have compiled CAIF for modules do:: + + $modprobe crc_ccitt + $modprobe caif + $modprobe caif_socket + $modprobe chnl_net + + +Preparing the setup with a STE modem +==================================== + +If you are working on integration of CAIF you should make sure +that the kernel is built with module support. + +There are some things that need to be tweaked to get the host TTY correctly +set up to talk to the modem. +Since the CAIF stack is running in the kernel and we want to use the existing +TTY, we are installing our physical serial driver as a line discipline above +the TTY device. + +To achieve this we need to install the N_CAIF ldisc from user space. +The benefit is that we can hook up to any TTY. + +The use of Start-of-frame-extension (STX) must also be set as +module parameter "ser_use_stx". + +Normally Frame Checksum is always used on UART, but this is also provided as a +module parameter "ser_use_fcs". + +:: + + $ modprobe caif_serial ser_ttyname=/dev/ttyS0 ser_use_stx=yes + $ ifconfig caif_ttyS0 up + +PLEASE NOTE: + There is a limitation in Android shell. + It only accepts one argument to insmod/modprobe! + +Trouble shooting +================ + +There are debugfs parameters provided for serial communication. +/sys/kernel/debug/caif_serial// + +* ser_state: Prints the bit-mask status where + + - 0x02 means SENDING, this is a transient state. + - 0x10 means FLOW_OFF_SENT, i.e. the previous frame has not been sent + and is blocking further send operation. Flow OFF has been propagated + to all CAIF Channels using this TTY. + +* tty_status: Prints the bit-mask tty status information + + - 0x01 - tty->warned is on. + - 0x02 - tty->low_latency is on. + - 0x04 - tty->packed is on. + - 0x08 - tty->flow_stopped is on. + - 0x10 - tty->hw_stopped is on. + - 0x20 - tty->stopped is on. + +* last_tx_msg: Binary blob Prints the last transmitted frame. + + This can be printed with:: + + $od --format=x1 /sys/kernel/debug/caif_serial//last_rx_msg. + + The first two tx messages sent look like this. Note: The initial + byte 02 is start of frame extension (STX) used for re-syncing + upon errors. + + - Enumeration:: + + 0000000 02 05 00 00 03 01 d2 02 + | | | | | | + STX(1) | | | | + Length(2)| | | + Control Channel(1) + Command:Enumeration(1) + Link-ID(1) + Checksum(2) + + - Channel Setup:: + + 0000000 02 07 00 00 00 21 a1 00 48 df + | | | | | | | | + STX(1) | | | | | | + Length(2)| | | | | + Control Channel(1) + Command:Channel Setup(1) + Channel Type(1) + Priority and Link-ID(1) + Endpoint(1) + Checksum(2) + +* last_rx_msg: Prints the last transmitted frame. + + The RX messages for LinkSetup look almost identical but they have the + bit 0x20 set in the command bit, and Channel Setup has added one byte + before Checksum containing Channel ID. + + NOTE: + Several CAIF Messages might be concatenated. The maximum debug + buffer size is 128 bytes. + +Error Scenarios +=============== + +- last_tx_msg contains channel setup message and last_rx_msg is empty -> + The host seems to be able to send over the UART, at least the CAIF ldisc get + notified that sending is completed. + +- last_tx_msg contains enumeration message and last_rx_msg is empty -> + The host is not able to send the message from UART, the tty has not been + able to complete the transmit operation. + +- if /sys/kernel/debug/caif_serial//tty_status is non-zero there + might be problems transmitting over UART. + + E.g. host and modem wiring is not correct you will typically see + tty_status = 0x10 (hw_stopped) and ser_state = 0x10 (FLOW_OFF_SENT). + + You will probably see the enumeration message in last_tx_message + and empty last_rx_message. diff --git a/Documentation/networking/caif/index.rst b/Documentation/networking/caif/index.rst new file mode 100644 index 000000000..ec29b6f4b --- /dev/null +++ b/Documentation/networking/caif/index.rst @@ -0,0 +1,12 @@ +.. SPDX-License-Identifier: GPL-2.0 + +CAIF +==== + +Contents: + +.. toctree:: + :maxdepth: 2 + + linux_caif + caif diff --git a/Documentation/networking/caif/linux_caif.rst b/Documentation/networking/caif/linux_caif.rst new file mode 100644 index 000000000..a0480862a --- /dev/null +++ b/Documentation/networking/caif/linux_caif.rst @@ -0,0 +1,195 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +========== +Linux CAIF +========== + +Copyright |copy| ST-Ericsson AB 2010 + +:Author: Sjur Brendeland/ sjur.brandeland@stericsson.com +:License terms: GNU General Public License (GPL) version 2 + + +Introduction +============ + +CAIF is a MUX protocol used by ST-Ericsson cellular modems for +communication between Modem and host. The host processes can open virtual AT +channels, initiate GPRS Data connections, Video channels and Utility Channels. +The Utility Channels are general purpose pipes between modem and host. + +ST-Ericsson modems support a number of transports between modem +and host. Currently, UART and Loopback are available for Linux. + + +Architecture +============ + +The implementation of CAIF is divided into: + +* CAIF Socket Layer and GPRS IP Interface. +* CAIF Core Protocol Implementation +* CAIF Link Layer, implemented as NET devices. + +:: + + RTNL + ! + ! +------+ +------+ + ! +------+! +------+! + ! ! IP !! !Socket!! + +-------> !interf!+ ! API !+ <- CAIF Client APIs + ! +------+ +------! + ! ! ! + ! +-----------+ + ! ! + ! +------+ <- CAIF Core Protocol + ! ! CAIF ! + ! ! Core ! + ! +------+ + ! +----------!---------+ + ! ! ! ! + ! +------+ +-----+ +------+ + +--> ! HSI ! ! TTY ! ! USB ! <- Link Layer (Net Devices) + +------+ +-----+ +------+ + + + +Implementation +============== + + +CAIF Core Protocol Layer +------------------------ + +CAIF Core layer implements the CAIF protocol as defined by ST-Ericsson. +It implements the CAIF protocol stack in a layered approach, where +each layer described in the specification is implemented as a separate layer. +The architecture is inspired by the design patterns "Protocol Layer" and +"Protocol Packet". + +CAIF structure +^^^^^^^^^^^^^^ + +The Core CAIF implementation contains: + + - Simple implementation of CAIF. + - Layered architecture (a la Streams), each layer in the CAIF + specification is implemented in a separate c-file. + - Clients must call configuration function to add PHY layer. + - Clients must implement CAIF layer to consume/produce + CAIF payload with receive and transmit functions. + - Clients must call configuration function to add and connect the + Client layer. + - When receiving / transmitting CAIF Packets (cfpkt), ownership is passed + to the called function (except for framing layers' receive function) + +Layered Architecture +==================== + +The CAIF protocol can be divided into two parts: Support functions and Protocol +Implementation. The support functions include: + + - CFPKT CAIF Packet. Implementation of CAIF Protocol Packet. The + CAIF Packet has functions for creating, destroying and adding content + and for adding/extracting header and trailers to protocol packets. + +The CAIF Protocol implementation contains: + + - CFCNFG CAIF Configuration layer. Configures the CAIF Protocol + Stack and provides a Client interface for adding Link-Layer and + Driver interfaces on top of the CAIF Stack. + + - CFCTRL CAIF Control layer. Encodes and Decodes control messages + such as enumeration and channel setup. Also matches request and + response messages. + + - CFSERVL General CAIF Service Layer functionality; handles flow + control and remote shutdown requests. + + - CFVEI CAIF VEI layer. Handles CAIF AT Channels on VEI (Virtual + External Interface). This layer encodes/decodes VEI frames. + + - CFDGML CAIF Datagram layer. Handles CAIF Datagram layer (IP + traffic), encodes/decodes Datagram frames. + + - CFMUX CAIF Mux layer. Handles multiplexing between multiple + physical bearers and multiple channels such as VEI, Datagram, etc. + The MUX keeps track of the existing CAIF Channels and + Physical Instances and selects the appropriate instance based + on Channel-Id and Physical-ID. + + - CFFRML CAIF Framing layer. Handles Framing i.e. Frame length + and frame checksum. + + - CFSERL CAIF Serial layer. Handles concatenation/split of frames + into CAIF Frames with correct length. + +:: + + +---------+ + | Config | + | CFCNFG | + +---------+ + ! + +---------+ +---------+ +---------+ + | AT | | Control | | Datagram| + | CFVEIL | | CFCTRL | | CFDGML | + +---------+ +---------+ +---------+ + \_____________!______________/ + ! + +---------+ + | MUX | + | | + +---------+ + _____!_____ + / \ + +---------+ +---------+ + | CFFRML | | CFFRML | + | Framing | | Framing | + +---------+ +---------+ + ! ! + +---------+ +---------+ + | | | Serial | + | | | CFSERL | + +---------+ +---------+ + + +In this layered approach the following "rules" apply. + + - All layers embed the same structure "struct cflayer" + - A layer does not depend on any other layer's private data. + - Layers are stacked by setting the pointers:: + + layer->up , layer->dn + + - In order to send data upwards, each layer should do:: + + layer->up->receive(layer->up, packet); + + - In order to send data downwards, each layer should do:: + + layer->dn->transmit(layer->dn, packet); + + +CAIF Socket and IP interface +============================ + +The IP interface and CAIF socket API are implemented on top of the +CAIF Core protocol. The IP Interface and CAIF socket have an instance of +'struct cflayer', just like the CAIF Core protocol stack. +Net device and Socket implement the 'receive()' function defined by +'struct cflayer', just like the rest of the CAIF stack. In this way, transmit and +receive of packets is handled as by the rest of the layers: the 'dn->transmit()' +function is called in order to transmit data. + +Configuration of Link Layer +--------------------------- +The Link Layer is implemented as Linux network devices (struct net_device). +Payload handling and registration is done using standard Linux mechanisms. + +The CAIF Protocol relies on a loss-less link layer without implementing +retransmission. This implies that packet drops must not happen. +Therefore a flow-control mechanism is implemented where the physical +interface can initiate flow stop for all CAIF Channels. diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst new file mode 100644 index 000000000..ff05cbd05 --- /dev/null +++ b/Documentation/networking/can.rst @@ -0,0 +1,1437 @@ +=================================== +SocketCAN - Controller Area Network +=================================== + +Overview / What is SocketCAN +============================ + +The socketcan package is an implementation of CAN protocols +(Controller Area Network) for Linux. CAN is a networking technology +which has widespread use in automation, embedded devices, and +automotive fields. While there have been other CAN implementations +for Linux based on character devices, SocketCAN uses the Berkeley +socket API, the Linux network stack and implements the CAN device +drivers as network interfaces. The CAN socket API has been designed +as similar as possible to the TCP/IP protocols to allow programmers, +familiar with network programming, to easily learn how to use CAN +sockets. + + +.. _socketcan-motivation: + +Motivation / Why Using the Socket API +===================================== + +There have been CAN implementations for Linux before SocketCAN so the +question arises, why we have started another project. Most existing +implementations come as a device driver for some CAN hardware, they +are based on character devices and provide comparatively little +functionality. Usually, there is only a hardware-specific device +driver which provides a character device interface to send and +receive raw CAN frames, directly to/from the controller hardware. +Queueing of frames and higher-level transport protocols like ISO-TP +have to be implemented in user space applications. Also, most +character-device implementations support only one single process to +open the device at a time, similar to a serial interface. Exchanging +the CAN controller requires employment of another device driver and +often the need for adaption of large parts of the application to the +new driver's API. + +SocketCAN was designed to overcome all of these limitations. A new +protocol family has been implemented which provides a socket interface +to user space applications and which builds upon the Linux network +layer, enabling use all of the provided queueing functionality. A device +driver for CAN controller hardware registers itself with the Linux +network layer as a network device, so that CAN frames from the +controller can be passed up to the network layer and on to the CAN +protocol family module and also vice-versa. Also, the protocol family +module provides an API for transport protocol modules to register, so +that any number of transport protocols can be loaded or unloaded +dynamically. In fact, the can core module alone does not provide any +protocol and cannot be used without loading at least one additional +protocol module. Multiple sockets can be opened at the same time, +on different or the same protocol module and they can listen/send +frames on different or the same CAN IDs. Several sockets listening on +the same interface for frames with the same CAN ID are all passed the +same received matching CAN frames. An application wishing to +communicate using a specific transport protocol, e.g. ISO-TP, just +selects that protocol when opening the socket, and then can read and +write application data byte streams, without having to deal with +CAN-IDs, frames, etc. + +Similar functionality visible from user-space could be provided by a +character device, too, but this would lead to a technically inelegant +solution for a couple of reasons: + +* **Intricate usage:** Instead of passing a protocol argument to + socket(2) and using bind(2) to select a CAN interface and CAN ID, an + application would have to do all these operations using ioctl(2)s. + +* **Code duplication:** A character device cannot make use of the Linux + network queueing code, so all that code would have to be duplicated + for CAN networking. + +* **Abstraction:** In most existing character-device implementations, the + hardware-specific device driver for a CAN controller directly + provides the character device for the application to work with. + This is at least very unusual in Unix systems for both, char and + block devices. For example you don't have a character device for a + certain UART of a serial interface, a certain sound chip in your + computer, a SCSI or IDE controller providing access to your hard + disk or tape streamer device. Instead, you have abstraction layers + which provide a unified character or block device interface to the + application on the one hand, and a interface for hardware-specific + device drivers on the other hand. These abstractions are provided + by subsystems like the tty layer, the audio subsystem or the SCSI + and IDE subsystems for the devices mentioned above. + + The easiest way to implement a CAN device driver is as a character + device without such a (complete) abstraction layer, as is done by most + existing drivers. The right way, however, would be to add such a + layer with all the functionality like registering for certain CAN + IDs, supporting several open file descriptors and (de)multiplexing + CAN frames between them, (sophisticated) queueing of CAN frames, and + providing an API for device drivers to register with. However, then + it would be no more difficult, or may be even easier, to use the + networking framework provided by the Linux kernel, and this is what + SocketCAN does. + +The use of the networking framework of the Linux kernel is just the +natural and most appropriate way to implement CAN for Linux. + + +.. _socketcan-concept: + +SocketCAN Concept +================= + +As described in :ref:`socketcan-motivation` the main goal of SocketCAN is to +provide a socket interface to user space applications which builds +upon the Linux network layer. In contrast to the commonly known +TCP/IP and ethernet networking, the CAN bus is a broadcast-only(!) +medium that has no MAC-layer addressing like ethernet. The CAN-identifier +(can_id) is used for arbitration on the CAN-bus. Therefore the CAN-IDs +have to be chosen uniquely on the bus. When designing a CAN-ECU +network the CAN-IDs are mapped to be sent by a specific ECU. +For this reason a CAN-ID can be treated best as a kind of source address. + + +.. _socketcan-receive-lists: + +Receive Lists +------------- + +The network transparent access of multiple applications leads to the +problem that different applications may be interested in the same +CAN-IDs from the same CAN network interface. The SocketCAN core +module - which implements the protocol family CAN - provides several +high efficient receive lists for this reason. If e.g. a user space +application opens a CAN RAW socket, the raw protocol module itself +requests the (range of) CAN-IDs from the SocketCAN core that are +requested by the user. The subscription and unsubscription of +CAN-IDs can be done for specific CAN interfaces or for all(!) known +CAN interfaces with the can_rx_(un)register() functions provided to +CAN protocol modules by the SocketCAN core (see :ref:`socketcan-core-module`). +To optimize the CPU usage at runtime the receive lists are split up +into several specific lists per device that match the requested +filter complexity for a given use-case. + + +.. _socketcan-local-loopback1: + +Local Loopback of Sent Frames +----------------------------- + +As known from other networking concepts the data exchanging +applications may run on the same or different nodes without any +change (except for the according addressing information): + +.. code:: + + ___ ___ ___ _______ ___ + | _ | | _ | | _ | | _ _ | | _ | + ||A|| ||B|| ||C|| ||A| |B|| ||C|| + |___| |___| |___| |_______| |___| + | | | | | + -----------------(1)- CAN bus -(2)--------------- + +To ensure that application A receives the same information in the +example (2) as it would receive in example (1) there is need for +some kind of local loopback of the sent CAN frames on the appropriate +node. + +The Linux network devices (by default) just can handle the +transmission and reception of media dependent frames. Due to the +arbitration on the CAN bus the transmission of a low prio CAN-ID +may be delayed by the reception of a high prio CAN frame. To +reflect the correct [#f1]_ traffic on the node the loopback of the sent +data has to be performed right after a successful transmission. If +the CAN network interface is not capable of performing the loopback for +some reason the SocketCAN core can do this task as a fallback solution. +See :ref:`socketcan-local-loopback1` for details (recommended). + +The loopback functionality is enabled by default to reflect standard +networking behaviour for CAN applications. Due to some requests from +the RT-SocketCAN group the loopback optionally may be disabled for each +separate socket. See sockopts from the CAN RAW sockets in :ref:`socketcan-raw-sockets`. + +.. [#f1] you really like to have this when you're running analyser + tools like 'candump' or 'cansniffer' on the (same) node. + + +.. _socketcan-network-problem-notifications: + +Network Problem Notifications +----------------------------- + +The use of the CAN bus may lead to several problems on the physical +and media access control layer. Detecting and logging of these lower +layer problems is a vital requirement for CAN users to identify +hardware issues on the physical transceiver layer as well as +arbitration problems and error frames caused by the different +ECUs. The occurrence of detected errors are important for diagnosis +and have to be logged together with the exact timestamp. For this +reason the CAN interface driver can generate so called Error Message +Frames that can optionally be passed to the user application in the +same way as other CAN frames. Whenever an error on the physical layer +or the MAC layer is detected (e.g. by the CAN controller) the driver +creates an appropriate error message frame. Error messages frames can +be requested by the user application using the common CAN filter +mechanisms. Inside this filter definition the (interested) type of +errors may be selected. The reception of error messages is disabled +by default. The format of the CAN error message frame is briefly +described in the Linux header file "include/uapi/linux/can/error.h". + + +How to use SocketCAN +==================== + +Like TCP/IP, you first need to open a socket for communicating over a +CAN network. Since SocketCAN implements a new protocol family, you +need to pass PF_CAN as the first argument to the socket(2) system +call. Currently, there are two CAN protocols to choose from, the raw +socket protocol and the broadcast manager (BCM). So to open a socket, +you would write:: + + s = socket(PF_CAN, SOCK_RAW, CAN_RAW); + +and:: + + s = socket(PF_CAN, SOCK_DGRAM, CAN_BCM); + +respectively. After the successful creation of the socket, you would +normally use the bind(2) system call to bind the socket to a CAN +interface (which is different from TCP/IP due to different addressing +- see :ref:`socketcan-concept`). After binding (CAN_RAW) or connecting (CAN_BCM) +the socket, you can read(2) and write(2) from/to the socket or use +send(2), sendto(2), sendmsg(2) and the recv* counterpart operations +on the socket as usual. There are also CAN specific socket options +described below. + +The basic CAN frame structure and the sockaddr structure are defined +in include/linux/can.h: + +.. code-block:: C + + struct can_frame { + canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */ + __u8 can_dlc; /* frame payload length in byte (0 .. 8) */ + __u8 __pad; /* padding */ + __u8 __res0; /* reserved / padding */ + __u8 __res1; /* reserved / padding */ + __u8 data[8] __attribute__((aligned(8))); + }; + +The alignment of the (linear) payload data[] to a 64bit boundary +allows the user to define their own structs and unions to easily access +the CAN payload. There is no given byteorder on the CAN bus by +default. A read(2) system call on a CAN_RAW socket transfers a +struct can_frame to the user space. + +The sockaddr_can structure has an interface index like the +PF_PACKET socket, that also binds to a specific interface: + +.. code-block:: C + + struct sockaddr_can { + sa_family_t can_family; + int can_ifindex; + union { + /* transport protocol class address info (e.g. ISOTP) */ + struct { canid_t rx_id, tx_id; } tp; + + /* reserved for future CAN protocols address information */ + } can_addr; + }; + +To determine the interface index an appropriate ioctl() has to +be used (example for CAN_RAW sockets without error checking): + +.. code-block:: C + + int s; + struct sockaddr_can addr; + struct ifreq ifr; + + s = socket(PF_CAN, SOCK_RAW, CAN_RAW); + + strcpy(ifr.ifr_name, "can0" ); + ioctl(s, SIOCGIFINDEX, &ifr); + + addr.can_family = AF_CAN; + addr.can_ifindex = ifr.ifr_ifindex; + + bind(s, (struct sockaddr *)&addr, sizeof(addr)); + + (..) + +To bind a socket to all(!) CAN interfaces the interface index must +be 0 (zero). In this case the socket receives CAN frames from every +enabled CAN interface. To determine the originating CAN interface +the system call recvfrom(2) may be used instead of read(2). To send +on a socket that is bound to 'any' interface sendto(2) is needed to +specify the outgoing interface. + +Reading CAN frames from a bound CAN_RAW socket (see above) consists +of reading a struct can_frame: + +.. code-block:: C + + struct can_frame frame; + + nbytes = read(s, &frame, sizeof(struct can_frame)); + + if (nbytes < 0) { + perror("can raw socket read"); + return 1; + } + + /* paranoid check ... */ + if (nbytes < sizeof(struct can_frame)) { + fprintf(stderr, "read: incomplete CAN frame\n"); + return 1; + } + + /* do something with the received CAN frame */ + +Writing CAN frames can be done similarly, with the write(2) system call:: + + nbytes = write(s, &frame, sizeof(struct can_frame)); + +When the CAN interface is bound to 'any' existing CAN interface +(addr.can_ifindex = 0) it is recommended to use recvfrom(2) if the +information about the originating CAN interface is needed: + +.. code-block:: C + + struct sockaddr_can addr; + struct ifreq ifr; + socklen_t len = sizeof(addr); + struct can_frame frame; + + nbytes = recvfrom(s, &frame, sizeof(struct can_frame), + 0, (struct sockaddr*)&addr, &len); + + /* get interface name of the received CAN frame */ + ifr.ifr_ifindex = addr.can_ifindex; + ioctl(s, SIOCGIFNAME, &ifr); + printf("Received a CAN frame from interface %s", ifr.ifr_name); + +To write CAN frames on sockets bound to 'any' CAN interface the +outgoing interface has to be defined certainly: + +.. code-block:: C + + strcpy(ifr.ifr_name, "can0"); + ioctl(s, SIOCGIFINDEX, &ifr); + addr.can_ifindex = ifr.ifr_ifindex; + addr.can_family = AF_CAN; + + nbytes = sendto(s, &frame, sizeof(struct can_frame), + 0, (struct sockaddr*)&addr, sizeof(addr)); + +An accurate timestamp can be obtained with an ioctl(2) call after reading +a message from the socket: + +.. code-block:: C + + struct timeval tv; + ioctl(s, SIOCGSTAMP, &tv); + +The timestamp has a resolution of one microsecond and is set automatically +at the reception of a CAN frame. + +Remark about CAN FD (flexible data rate) support: + +Generally the handling of CAN FD is very similar to the formerly described +examples. The new CAN FD capable CAN controllers support two different +bitrates for the arbitration phase and the payload phase of the CAN FD frame +and up to 64 bytes of payload. This extended payload length breaks all the +kernel interfaces (ABI) which heavily rely on the CAN frame with fixed eight +bytes of payload (struct can_frame) like the CAN_RAW socket. Therefore e.g. +the CAN_RAW socket supports a new socket option CAN_RAW_FD_FRAMES that +switches the socket into a mode that allows the handling of CAN FD frames +and (legacy) CAN frames simultaneously (see :ref:`socketcan-rawfd`). + +The struct canfd_frame is defined in include/linux/can.h: + +.. code-block:: C + + struct canfd_frame { + canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */ + __u8 len; /* frame payload length in byte (0 .. 64) */ + __u8 flags; /* additional flags for CAN FD */ + __u8 __res0; /* reserved / padding */ + __u8 __res1; /* reserved / padding */ + __u8 data[64] __attribute__((aligned(8))); + }; + +The struct canfd_frame and the existing struct can_frame have the can_id, +the payload length and the payload data at the same offset inside their +structures. This allows to handle the different structures very similar. +When the content of a struct can_frame is copied into a struct canfd_frame +all structure elements can be used as-is - only the data[] becomes extended. + +When introducing the struct canfd_frame it turned out that the data length +code (DLC) of the struct can_frame was used as a length information as the +length and the DLC has a 1:1 mapping in the range of 0 .. 8. To preserve +the easy handling of the length information the canfd_frame.len element +contains a plain length value from 0 .. 64. So both canfd_frame.len and +can_frame.can_dlc are equal and contain a length information and no DLC. +For details about the distinction of CAN and CAN FD capable devices and +the mapping to the bus-relevant data length code (DLC), see :ref:`socketcan-can-fd-driver`. + +The length of the two CAN(FD) frame structures define the maximum transfer +unit (MTU) of the CAN(FD) network interface and skbuff data length. Two +definitions are specified for CAN specific MTUs in include/linux/can.h: + +.. code-block:: C + + #define CAN_MTU (sizeof(struct can_frame)) == 16 => 'legacy' CAN frame + #define CANFD_MTU (sizeof(struct canfd_frame)) == 72 => CAN FD frame + + +.. _socketcan-raw-sockets: + +RAW Protocol Sockets with can_filters (SOCK_RAW) +------------------------------------------------ + +Using CAN_RAW sockets is extensively comparable to the commonly +known access to CAN character devices. To meet the new possibilities +provided by the multi user SocketCAN approach, some reasonable +defaults are set at RAW socket binding time: + +- The filters are set to exactly one filter receiving everything +- The socket only receives valid data frames (=> no error message frames) +- The loopback of sent CAN frames is enabled (see :ref:`socketcan-local-loopback2`) +- The socket does not receive its own sent frames (in loopback mode) + +These default settings may be changed before or after binding the socket. +To use the referenced definitions of the socket options for CAN_RAW +sockets, include . + + +.. _socketcan-rawfilter: + +RAW socket option CAN_RAW_FILTER +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The reception of CAN frames using CAN_RAW sockets can be controlled +by defining 0 .. n filters with the CAN_RAW_FILTER socket option. + +The CAN filter structure is defined in include/linux/can.h: + +.. code-block:: C + + struct can_filter { + canid_t can_id; + canid_t can_mask; + }; + +A filter matches, when: + +.. code-block:: C + + & mask == can_id & mask + +which is analogous to known CAN controllers hardware filter semantics. +The filter can be inverted in this semantic, when the CAN_INV_FILTER +bit is set in can_id element of the can_filter structure. In +contrast to CAN controller hardware filters the user may set 0 .. n +receive filters for each open socket separately: + +.. code-block:: C + + struct can_filter rfilter[2]; + + rfilter[0].can_id = 0x123; + rfilter[0].can_mask = CAN_SFF_MASK; + rfilter[1].can_id = 0x200; + rfilter[1].can_mask = 0x700; + + setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter)); + +To disable the reception of CAN frames on the selected CAN_RAW socket: + +.. code-block:: C + + setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, NULL, 0); + +To set the filters to zero filters is quite obsolete as to not read +data causes the raw socket to discard the received CAN frames. But +having this 'send only' use-case we may remove the receive list in the +Kernel to save a little (really a very little!) CPU usage. + +CAN Filter Usage Optimisation +............................. + +The CAN filters are processed in per-device filter lists at CAN frame +reception time. To reduce the number of checks that need to be performed +while walking through the filter lists the CAN core provides an optimized +filter handling when the filter subscription focusses on a single CAN ID. + +For the possible 2048 SFF CAN identifiers the identifier is used as an index +to access the corresponding subscription list without any further checks. +For the 2^29 possible EFF CAN identifiers a 10 bit XOR folding is used as +hash function to retrieve the EFF table index. + +To benefit from the optimized filters for single CAN identifiers the +CAN_SFF_MASK or CAN_EFF_MASK have to be set into can_filter.mask together +with set CAN_EFF_FLAG and CAN_RTR_FLAG bits. A set CAN_EFF_FLAG bit in the +can_filter.mask makes clear that it matters whether a SFF or EFF CAN ID is +subscribed. E.g. in the example from above: + +.. code-block:: C + + rfilter[0].can_id = 0x123; + rfilter[0].can_mask = CAN_SFF_MASK; + +both SFF frames with CAN ID 0x123 and EFF frames with 0xXXXXX123 can pass. + +To filter for only 0x123 (SFF) and 0x12345678 (EFF) CAN identifiers the +filter has to be defined in this way to benefit from the optimized filters: + +.. code-block:: C + + struct can_filter rfilter[2]; + + rfilter[0].can_id = 0x123; + rfilter[0].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_SFF_MASK); + rfilter[1].can_id = 0x12345678 | CAN_EFF_FLAG; + rfilter[1].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_EFF_MASK); + + setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter)); + + +RAW Socket Option CAN_RAW_ERR_FILTER +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As described in :ref:`socketcan-network-problem-notifications` the CAN interface driver can generate so +called Error Message Frames that can optionally be passed to the user +application in the same way as other CAN frames. The possible +errors are divided into different error classes that may be filtered +using the appropriate error mask. To register for every possible +error condition CAN_ERR_MASK can be used as value for the error mask. +The values for the error mask are defined in linux/can/error.h: + +.. code-block:: C + + can_err_mask_t err_mask = ( CAN_ERR_TX_TIMEOUT | CAN_ERR_BUSOFF ); + + setsockopt(s, SOL_CAN_RAW, CAN_RAW_ERR_FILTER, + &err_mask, sizeof(err_mask)); + + +RAW Socket Option CAN_RAW_LOOPBACK +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To meet multi user needs the local loopback is enabled by default +(see :ref:`socketcan-local-loopback1` for details). But in some embedded use-cases +(e.g. when only one application uses the CAN bus) this loopback +functionality can be disabled (separately for each socket): + +.. code-block:: C + + int loopback = 0; /* 0 = disabled, 1 = enabled (default) */ + + setsockopt(s, SOL_CAN_RAW, CAN_RAW_LOOPBACK, &loopback, sizeof(loopback)); + + +RAW socket option CAN_RAW_RECV_OWN_MSGS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When the local loopback is enabled, all the sent CAN frames are +looped back to the open CAN sockets that registered for the CAN +frames' CAN-ID on this given interface to meet the multi user +needs. The reception of the CAN frames on the same socket that was +sending the CAN frame is assumed to be unwanted and therefore +disabled by default. This default behaviour may be changed on +demand: + +.. code-block:: C + + int recv_own_msgs = 1; /* 0 = disabled (default), 1 = enabled */ + + setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS, + &recv_own_msgs, sizeof(recv_own_msgs)); + + +.. _socketcan-rawfd: + +RAW Socket Option CAN_RAW_FD_FRAMES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +CAN FD support in CAN_RAW sockets can be enabled with a new socket option +CAN_RAW_FD_FRAMES which is off by default. When the new socket option is +not supported by the CAN_RAW socket (e.g. on older kernels), switching the +CAN_RAW_FD_FRAMES option returns the error -ENOPROTOOPT. + +Once CAN_RAW_FD_FRAMES is enabled the application can send both CAN frames +and CAN FD frames. OTOH the application has to handle CAN and CAN FD frames +when reading from the socket: + +.. code-block:: C + + CAN_RAW_FD_FRAMES enabled: CAN_MTU and CANFD_MTU are allowed + CAN_RAW_FD_FRAMES disabled: only CAN_MTU is allowed (default) + +Example: + +.. code-block:: C + + [ remember: CANFD_MTU == sizeof(struct canfd_frame) ] + + struct canfd_frame cfd; + + nbytes = read(s, &cfd, CANFD_MTU); + + if (nbytes == CANFD_MTU) { + printf("got CAN FD frame with length %d\n", cfd.len); + /* cfd.flags contains valid data */ + } else if (nbytes == CAN_MTU) { + printf("got legacy CAN frame with length %d\n", cfd.len); + /* cfd.flags is undefined */ + } else { + fprintf(stderr, "read: invalid CAN(FD) frame\n"); + return 1; + } + + /* the content can be handled independently from the received MTU size */ + + printf("can_id: %X data length: %d data: ", cfd.can_id, cfd.len); + for (i = 0; i < cfd.len; i++) + printf("%02X ", cfd.data[i]); + +When reading with size CANFD_MTU only returns CAN_MTU bytes that have +been received from the socket a legacy CAN frame has been read into the +provided CAN FD structure. Note that the canfd_frame.flags data field is +not specified in the struct can_frame and therefore it is only valid in +CANFD_MTU sized CAN FD frames. + +Implementation hint for new CAN applications: + +To build a CAN FD aware application use struct canfd_frame as basic CAN +data structure for CAN_RAW based applications. When the application is +executed on an older Linux kernel and switching the CAN_RAW_FD_FRAMES +socket option returns an error: No problem. You'll get legacy CAN frames +or CAN FD frames and can process them the same way. + +When sending to CAN devices make sure that the device is capable to handle +CAN FD frames by checking if the device maximum transfer unit is CANFD_MTU. +The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall. + + +RAW socket option CAN_RAW_JOIN_FILTERS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The CAN_RAW socket can set multiple CAN identifier specific filters that +lead to multiple filters in the af_can.c filter processing. These filters +are indenpendent from each other which leads to logical OR'ed filters when +applied (see :ref:`socketcan-rawfilter`). + +This socket option joines the given CAN filters in the way that only CAN +frames are passed to user space that matched *all* given CAN filters. The +semantic for the applied filters is therefore changed to a logical AND. + +This is useful especially when the filterset is a combination of filters +where the CAN_INV_FILTER flag is set in order to notch single CAN IDs or +CAN ID ranges from the incoming traffic. + + +RAW Socket Returned Message Flags +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When using recvmsg() call, the msg->msg_flags may contain following flags: + +MSG_DONTROUTE: + set when the received frame was created on the local host. + +MSG_CONFIRM: + set when the frame was sent via the socket it is received on. + This flag can be interpreted as a 'transmission confirmation' when the + CAN driver supports the echo of frames on driver level, see + :ref:`socketcan-local-loopback1` and :ref:`socketcan-local-loopback2`. + In order to receive such messages, CAN_RAW_RECV_OWN_MSGS must be set. + + +Broadcast Manager Protocol Sockets (SOCK_DGRAM) +----------------------------------------------- + +The Broadcast Manager protocol provides a command based configuration +interface to filter and send (e.g. cyclic) CAN messages in kernel space. + +Receive filters can be used to down sample frequent messages; detect events +such as message contents changes, packet length changes, and do time-out +monitoring of received messages. + +Periodic transmission tasks of CAN frames or a sequence of CAN frames can be +created and modified at runtime; both the message content and the two +possible transmit intervals can be altered. + +A BCM socket is not intended for sending individual CAN frames using the +struct can_frame as known from the CAN_RAW socket. Instead a special BCM +configuration message is defined. The basic BCM configuration message used +to communicate with the broadcast manager and the available operations are +defined in the linux/can/bcm.h include. The BCM message consists of a +message header with a command ('opcode') followed by zero or more CAN frames. +The broadcast manager sends responses to user space in the same form: + +.. code-block:: C + + struct bcm_msg_head { + __u32 opcode; /* command */ + __u32 flags; /* special flags */ + __u32 count; /* run 'count' times with ival1 */ + struct timeval ival1, ival2; /* count and subsequent interval */ + canid_t can_id; /* unique can_id for task */ + __u32 nframes; /* number of can_frames following */ + struct can_frame frames[0]; + }; + +The aligned payload 'frames' uses the same basic CAN frame structure defined +at the beginning of :ref:`socketcan-rawfd` and in the include/linux/can.h include. All +messages to the broadcast manager from user space have this structure. + +Note a CAN_BCM socket must be connected instead of bound after socket +creation (example without error checking): + +.. code-block:: C + + int s; + struct sockaddr_can addr; + struct ifreq ifr; + + s = socket(PF_CAN, SOCK_DGRAM, CAN_BCM); + + strcpy(ifr.ifr_name, "can0"); + ioctl(s, SIOCGIFINDEX, &ifr); + + addr.can_family = AF_CAN; + addr.can_ifindex = ifr.ifr_ifindex; + + connect(s, (struct sockaddr *)&addr, sizeof(addr)); + + (..) + +The broadcast manager socket is able to handle any number of in flight +transmissions or receive filters concurrently. The different RX/TX jobs are +distinguished by the unique can_id in each BCM message. However additional +CAN_BCM sockets are recommended to communicate on multiple CAN interfaces. +When the broadcast manager socket is bound to 'any' CAN interface (=> the +interface index is set to zero) the configured receive filters apply to any +CAN interface unless the sendto() syscall is used to overrule the 'any' CAN +interface index. When using recvfrom() instead of read() to retrieve BCM +socket messages the originating CAN interface is provided in can_ifindex. + + +Broadcast Manager Operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The opcode defines the operation for the broadcast manager to carry out, +or details the broadcast managers response to several events, including +user requests. + +Transmit Operations (user space to broadcast manager): + +TX_SETUP: + Create (cyclic) transmission task. + +TX_DELETE: + Remove (cyclic) transmission task, requires only can_id. + +TX_READ: + Read properties of (cyclic) transmission task for can_id. + +TX_SEND: + Send one CAN frame. + +Transmit Responses (broadcast manager to user space): + +TX_STATUS: + Reply to TX_READ request (transmission task configuration). + +TX_EXPIRED: + Notification when counter finishes sending at initial interval + 'ival1'. Requires the TX_COUNTEVT flag to be set at TX_SETUP. + +Receive Operations (user space to broadcast manager): + +RX_SETUP: + Create RX content filter subscription. + +RX_DELETE: + Remove RX content filter subscription, requires only can_id. + +RX_READ: + Read properties of RX content filter subscription for can_id. + +Receive Responses (broadcast manager to user space): + +RX_STATUS: + Reply to RX_READ request (filter task configuration). + +RX_TIMEOUT: + Cyclic message is detected to be absent (timer ival1 expired). + +RX_CHANGED: + BCM message with updated CAN frame (detected content change). + Sent on first message received or on receipt of revised CAN messages. + + +Broadcast Manager Message Flags +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When sending a message to the broadcast manager the 'flags' element may +contain the following flag definitions which influence the behaviour: + +SETTIMER: + Set the values of ival1, ival2 and count + +STARTTIMER: + Start the timer with the actual values of ival1, ival2 + and count. Starting the timer leads simultaneously to emit a CAN frame. + +TX_COUNTEVT: + Create the message TX_EXPIRED when count expires + +TX_ANNOUNCE: + A change of data by the process is emitted immediately. + +TX_CP_CAN_ID: + Copies the can_id from the message header to each + subsequent frame in frames. This is intended as usage simplification. For + TX tasks the unique can_id from the message header may differ from the + can_id(s) stored for transmission in the subsequent struct can_frame(s). + +RX_FILTER_ID: + Filter by can_id alone, no frames required (nframes=0). + +RX_CHECK_DLC: + A change of the DLC leads to an RX_CHANGED. + +RX_NO_AUTOTIMER: + Prevent automatically starting the timeout monitor. + +RX_ANNOUNCE_RESUME: + If passed at RX_SETUP and a receive timeout occurred, a + RX_CHANGED message will be generated when the (cyclic) receive restarts. + +TX_RESET_MULTI_IDX: + Reset the index for the multiple frame transmission. + +RX_RTR_FRAME: + Send reply for RTR-request (placed in op->frames[0]). + + +Broadcast Manager Transmission Timers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Periodic transmission configurations may use up to two interval timers. +In this case the BCM sends a number of messages ('count') at an interval +'ival1', then continuing to send at another given interval 'ival2'. When +only one timer is needed 'count' is set to zero and only 'ival2' is used. +When SET_TIMER and START_TIMER flag were set the timers are activated. +The timer values can be altered at runtime when only SET_TIMER is set. + + +Broadcast Manager message sequence transmission +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Up to 256 CAN frames can be transmitted in a sequence in the case of a cyclic +TX task configuration. The number of CAN frames is provided in the 'nframes' +element of the BCM message head. The defined number of CAN frames are added +as array to the TX_SETUP BCM configuration message: + +.. code-block:: C + + /* create a struct to set up a sequence of four CAN frames */ + struct { + struct bcm_msg_head msg_head; + struct can_frame frame[4]; + } mytxmsg; + + (..) + mytxmsg.msg_head.nframes = 4; + (..) + + write(s, &mytxmsg, sizeof(mytxmsg)); + +With every transmission the index in the array of CAN frames is increased +and set to zero at index overflow. + + +Broadcast Manager Receive Filter Timers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The timer values ival1 or ival2 may be set to non-zero values at RX_SETUP. +When the SET_TIMER flag is set the timers are enabled: + +ival1: + Send RX_TIMEOUT when a received message is not received again within + the given time. When START_TIMER is set at RX_SETUP the timeout detection + is activated directly - even without a former CAN frame reception. + +ival2: + Throttle the received message rate down to the value of ival2. This + is useful to reduce messages for the application when the signal inside the + CAN frame is stateless as state changes within the ival2 periode may get + lost. + +Broadcast Manager Multiplex Message Receive Filter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To filter for content changes in multiplex message sequences an array of more +than one CAN frames can be passed in a RX_SETUP configuration message. The +data bytes of the first CAN frame contain the mask of relevant bits that +have to match in the subsequent CAN frames with the received CAN frame. +If one of the subsequent CAN frames is matching the bits in that frame data +mark the relevant content to be compared with the previous received content. +Up to 257 CAN frames (multiplex filter bit mask CAN frame plus 256 CAN +filters) can be added as array to the TX_SETUP BCM configuration message: + +.. code-block:: C + + /* usually used to clear CAN frame data[] - beware of endian problems! */ + #define U64_DATA(p) (*(unsigned long long*)(p)->data) + + struct { + struct bcm_msg_head msg_head; + struct can_frame frame[5]; + } msg; + + msg.msg_head.opcode = RX_SETUP; + msg.msg_head.can_id = 0x42; + msg.msg_head.flags = 0; + msg.msg_head.nframes = 5; + U64_DATA(&msg.frame[0]) = 0xFF00000000000000ULL; /* MUX mask */ + U64_DATA(&msg.frame[1]) = 0x01000000000000FFULL; /* data mask (MUX 0x01) */ + U64_DATA(&msg.frame[2]) = 0x0200FFFF000000FFULL; /* data mask (MUX 0x02) */ + U64_DATA(&msg.frame[3]) = 0x330000FFFFFF0003ULL; /* data mask (MUX 0x33) */ + U64_DATA(&msg.frame[4]) = 0x4F07FC0FF0000000ULL; /* data mask (MUX 0x4F) */ + + write(s, &msg, sizeof(msg)); + + +Broadcast Manager CAN FD Support +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The programming API of the CAN_BCM depends on struct can_frame which is +given as array directly behind the bcm_msg_head structure. To follow this +schema for the CAN FD frames a new flag 'CAN_FD_FRAME' in the bcm_msg_head +flags indicates that the concatenated CAN frame structures behind the +bcm_msg_head are defined as struct canfd_frame: + +.. code-block:: C + + struct { + struct bcm_msg_head msg_head; + struct canfd_frame frame[5]; + } msg; + + msg.msg_head.opcode = RX_SETUP; + msg.msg_head.can_id = 0x42; + msg.msg_head.flags = CAN_FD_FRAME; + msg.msg_head.nframes = 5; + (..) + +When using CAN FD frames for multiplex filtering the MUX mask is still +expected in the first 64 bit of the struct canfd_frame data section. + + +Connected Transport Protocols (SOCK_SEQPACKET) +---------------------------------------------- + +(to be written) + + +Unconnected Transport Protocols (SOCK_DGRAM) +-------------------------------------------- + +(to be written) + + +.. _socketcan-core-module: + +SocketCAN Core Module +===================== + +The SocketCAN core module implements the protocol family +PF_CAN. CAN protocol modules are loaded by the core module at +runtime. The core module provides an interface for CAN protocol +modules to subscribe needed CAN IDs (see :ref:`socketcan-receive-lists`). + + +can.ko Module Params +-------------------- + +- **stats_timer**: + To calculate the SocketCAN core statistics + (e.g. current/maximum frames per second) this 1 second timer is + invoked at can.ko module start time by default. This timer can be + disabled by using stattimer=0 on the module commandline. + +- **debug**: + (removed since SocketCAN SVN r546) + + +procfs content +-------------- + +As described in :ref:`socketcan-receive-lists` the SocketCAN core uses several filter +lists to deliver received CAN frames to CAN protocol modules. These +receive lists, their filters and the count of filter matches can be +checked in the appropriate receive list. All entries contain the +device and a protocol module identifier:: + + foo@bar:~$ cat /proc/net/can/rcvlist_all + + receive list 'rx_all': + (vcan3: no entry) + (vcan2: no entry) + (vcan1: no entry) + device can_id can_mask function userdata matches ident + vcan0 000 00000000 f88e6370 f6c6f400 0 raw + (any: no entry) + +In this example an application requests any CAN traffic from vcan0:: + + rcvlist_all - list for unfiltered entries (no filter operations) + rcvlist_eff - list for single extended frame (EFF) entries + rcvlist_err - list for error message frames masks + rcvlist_fil - list for mask/value filters + rcvlist_inv - list for mask/value filters (inverse semantic) + rcvlist_sff - list for single standard frame (SFF) entries + +Additional procfs files in /proc/net/can:: + + stats - SocketCAN core statistics (rx/tx frames, match ratios, ...) + reset_stats - manual statistic reset + version - prints the SocketCAN core version and the ABI version + + +Writing Own CAN Protocol Modules +-------------------------------- + +To implement a new protocol in the protocol family PF_CAN a new +protocol has to be defined in include/linux/can.h . +The prototypes and definitions to use the SocketCAN core can be +accessed by including include/linux/can/core.h . +In addition to functions that register the CAN protocol and the +CAN device notifier chain there are functions to subscribe CAN +frames received by CAN interfaces and to send CAN frames:: + + can_rx_register - subscribe CAN frames from a specific interface + can_rx_unregister - unsubscribe CAN frames from a specific interface + can_send - transmit a CAN frame (optional with local loopback) + +For details see the kerneldoc documentation in net/can/af_can.c or +the source code of net/can/raw.c or net/can/bcm.c . + + +CAN Network Drivers +=================== + +Writing a CAN network device driver is much easier than writing a +CAN character device driver. Similar to other known network device +drivers you mainly have to deal with: + +- TX: Put the CAN frame from the socket buffer to the CAN controller. +- RX: Put the CAN frame from the CAN controller to the socket buffer. + +See e.g. at Documentation/networking/netdevices.rst . The differences +for writing CAN network device driver are described below: + + +General Settings +---------------- + +.. code-block:: C + + dev->type = ARPHRD_CAN; /* the netdevice hardware type */ + dev->flags = IFF_NOARP; /* CAN has no arp */ + + dev->mtu = CAN_MTU; /* sizeof(struct can_frame) -> legacy CAN interface */ + + or alternative, when the controller supports CAN with flexible data rate: + dev->mtu = CANFD_MTU; /* sizeof(struct canfd_frame) -> CAN FD interface */ + +The struct can_frame or struct canfd_frame is the payload of each socket +buffer (skbuff) in the protocol family PF_CAN. + + +.. _socketcan-local-loopback2: + +Local Loopback of Sent Frames +----------------------------- + +As described in :ref:`socketcan-local-loopback1` the CAN network device driver should +support a local loopback functionality similar to the local echo +e.g. of tty devices. In this case the driver flag IFF_ECHO has to be +set to prevent the PF_CAN core from locally echoing sent frames +(aka loopback) as fallback solution:: + + dev->flags = (IFF_NOARP | IFF_ECHO); + + +CAN Controller Hardware Filters +------------------------------- + +To reduce the interrupt load on deep embedded systems some CAN +controllers support the filtering of CAN IDs or ranges of CAN IDs. +These hardware filter capabilities vary from controller to +controller and have to be identified as not feasible in a multi-user +networking approach. The use of the very controller specific +hardware filters could make sense in a very dedicated use-case, as a +filter on driver level would affect all users in the multi-user +system. The high efficient filter sets inside the PF_CAN core allow +to set different multiple filters for each socket separately. +Therefore the use of hardware filters goes to the category 'handmade +tuning on deep embedded systems'. The author is running a MPC603e +@133MHz with four SJA1000 CAN controllers from 2002 under heavy bus +load without any problems ... + + +The Virtual CAN Driver (vcan) +----------------------------- + +Similar to the network loopback devices, vcan offers a virtual local +CAN interface. A full qualified address on CAN consists of + +- a unique CAN Identifier (CAN ID) +- the CAN bus this CAN ID is transmitted on (e.g. can0) + +so in common use cases more than one virtual CAN interface is needed. + +The virtual CAN interfaces allow the transmission and reception of CAN +frames without real CAN controller hardware. Virtual CAN network +devices are usually named 'vcanX', like vcan0 vcan1 vcan2 ... +When compiled as a module the virtual CAN driver module is called vcan.ko + +Since Linux Kernel version 2.6.24 the vcan driver supports the Kernel +netlink interface to create vcan network devices. The creation and +removal of vcan network devices can be managed with the ip(8) tool:: + + - Create a virtual CAN network interface: + $ ip link add type vcan + + - Create a virtual CAN network interface with a specific name 'vcan42': + $ ip link add dev vcan42 type vcan + + - Remove a (virtual CAN) network interface 'vcan42': + $ ip link del vcan42 + + +The CAN Network Device Driver Interface +--------------------------------------- + +The CAN network device driver interface provides a generic interface +to setup, configure and monitor CAN network devices. The user can then +configure the CAN device, like setting the bit-timing parameters, via +the netlink interface using the program "ip" from the "IPROUTE2" +utility suite. The following chapter describes briefly how to use it. +Furthermore, the interface uses a common data structure and exports a +set of common functions, which all real CAN network device drivers +should use. Please have a look to the SJA1000 or MSCAN driver to +understand how to use them. The name of the module is can-dev.ko. + + +Netlink interface to set/get devices properties +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The CAN device must be configured via netlink interface. The supported +netlink message types are defined and briefly described in +"include/linux/can/netlink.h". CAN link support for the program "ip" +of the IPROUTE2 utility suite is available and it can be used as shown +below: + +Setting CAN device properties:: + + $ ip link set can0 type can help + Usage: ip link set DEVICE type can + [ bitrate BITRATE [ sample-point SAMPLE-POINT] ] | + [ tq TQ prop-seg PROP_SEG phase-seg1 PHASE-SEG1 + phase-seg2 PHASE-SEG2 [ sjw SJW ] ] + + [ dbitrate BITRATE [ dsample-point SAMPLE-POINT] ] | + [ dtq TQ dprop-seg PROP_SEG dphase-seg1 PHASE-SEG1 + dphase-seg2 PHASE-SEG2 [ dsjw SJW ] ] + + [ loopback { on | off } ] + [ listen-only { on | off } ] + [ triple-sampling { on | off } ] + [ one-shot { on | off } ] + [ berr-reporting { on | off } ] + [ fd { on | off } ] + [ fd-non-iso { on | off } ] + [ presume-ack { on | off } ] + + [ restart-ms TIME-MS ] + [ restart ] + + Where: BITRATE := { 1..1000000 } + SAMPLE-POINT := { 0.000..0.999 } + TQ := { NUMBER } + PROP-SEG := { 1..8 } + PHASE-SEG1 := { 1..8 } + PHASE-SEG2 := { 1..8 } + SJW := { 1..4 } + RESTART-MS := { 0 | NUMBER } + +Display CAN device details and statistics:: + + $ ip -details -statistics link show can0 + 2: can0: mtu 16 qdisc pfifo_fast state UP qlen 10 + link/can + can state ERROR-ACTIVE restart-ms 100 + bitrate 125000 sample_point 0.875 + tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1 + sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1 + clock 8000000 + re-started bus-errors arbit-lost error-warn error-pass bus-off + 41 17457 0 41 42 41 + RX: bytes packets errors dropped overrun mcast + 140859 17608 17457 0 0 0 + TX: bytes packets errors dropped carrier collsns + 861 112 0 41 0 0 + +More info to the above output: + +"" + Shows the list of selected CAN controller modes: LOOPBACK, + LISTEN-ONLY, or TRIPLE-SAMPLING. + +"state ERROR-ACTIVE" + The current state of the CAN controller: "ERROR-ACTIVE", + "ERROR-WARNING", "ERROR-PASSIVE", "BUS-OFF" or "STOPPED" + +"restart-ms 100" + Automatic restart delay time. If set to a non-zero value, a + restart of the CAN controller will be triggered automatically + in case of a bus-off condition after the specified delay time + in milliseconds. By default it's off. + +"bitrate 125000 sample-point 0.875" + Shows the real bit-rate in bits/sec and the sample-point in the + range 0.000..0.999. If the calculation of bit-timing parameters + is enabled in the kernel (CONFIG_CAN_CALC_BITTIMING=y), the + bit-timing can be defined by setting the "bitrate" argument. + Optionally the "sample-point" can be specified. By default it's + 0.000 assuming CIA-recommended sample-points. + +"tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1" + Shows the time quanta in ns, propagation segment, phase buffer + segment 1 and 2 and the synchronisation jump width in units of + tq. They allow to define the CAN bit-timing in a hardware + independent format as proposed by the Bosch CAN 2.0 spec (see + chapter 8 of http://www.semiconductors.bosch.de/pdf/can2spec.pdf). + +"sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1 clock 8000000" + Shows the bit-timing constants of the CAN controller, here the + "sja1000". The minimum and maximum values of the time segment 1 + and 2, the synchronisation jump width in units of tq, the + bitrate pre-scaler and the CAN system clock frequency in Hz. + These constants could be used for user-defined (non-standard) + bit-timing calculation algorithms in user-space. + +"re-started bus-errors arbit-lost error-warn error-pass bus-off" + Shows the number of restarts, bus and arbitration lost errors, + and the state changes to the error-warning, error-passive and + bus-off state. RX overrun errors are listed in the "overrun" + field of the standard network statistics. + +Setting the CAN Bit-Timing +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The CAN bit-timing parameters can always be defined in a hardware +independent format as proposed in the Bosch CAN 2.0 specification +specifying the arguments "tq", "prop_seg", "phase_seg1", "phase_seg2" +and "sjw":: + + $ ip link set canX type can tq 125 prop-seg 6 \ + phase-seg1 7 phase-seg2 2 sjw 1 + +If the kernel option CONFIG_CAN_CALC_BITTIMING is enabled, CIA +recommended CAN bit-timing parameters will be calculated if the bit- +rate is specified with the argument "bitrate":: + + $ ip link set canX type can bitrate 125000 + +Note that this works fine for the most common CAN controllers with +standard bit-rates but may *fail* for exotic bit-rates or CAN system +clock frequencies. Disabling CONFIG_CAN_CALC_BITTIMING saves some +space and allows user-space tools to solely determine and set the +bit-timing parameters. The CAN controller specific bit-timing +constants can be used for that purpose. They are listed by the +following command:: + + $ ip -details link show can0 + ... + sja1000: clock 8000000 tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1 + + +Starting and Stopping the CAN Network Device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A CAN network device is started or stopped as usual with the command +"ifconfig canX up/down" or "ip link set canX up/down". Be aware that +you *must* define proper bit-timing parameters for real CAN devices +before you can start it to avoid error-prone default settings:: + + $ ip link set canX up type can bitrate 125000 + +A device may enter the "bus-off" state if too many errors occurred on +the CAN bus. Then no more messages are received or sent. An automatic +bus-off recovery can be enabled by setting the "restart-ms" to a +non-zero value, e.g.:: + + $ ip link set canX type can restart-ms 100 + +Alternatively, the application may realize the "bus-off" condition +by monitoring CAN error message frames and do a restart when +appropriate with the command:: + + $ ip link set canX type can restart + +Note that a restart will also create a CAN error message frame (see +also :ref:`socketcan-network-problem-notifications`). + + +.. _socketcan-can-fd-driver: + +CAN FD (Flexible Data Rate) Driver Support +------------------------------------------ + +CAN FD capable CAN controllers support two different bitrates for the +arbitration phase and the payload phase of the CAN FD frame. Therefore a +second bit timing has to be specified in order to enable the CAN FD bitrate. + +Additionally CAN FD capable CAN controllers support up to 64 bytes of +payload. The representation of this length in can_frame.can_dlc and +canfd_frame.len for userspace applications and inside the Linux network +layer is a plain value from 0 .. 64 instead of the CAN 'data length code'. +The data length code was a 1:1 mapping to the payload length in the legacy +CAN frames anyway. The payload length to the bus-relevant DLC mapping is +only performed inside the CAN drivers, preferably with the helper +functions can_dlc2len() and can_len2dlc(). + +The CAN netdevice driver capabilities can be distinguished by the network +devices maximum transfer unit (MTU):: + + MTU = 16 (CAN_MTU) => sizeof(struct can_frame) => 'legacy' CAN device + MTU = 72 (CANFD_MTU) => sizeof(struct canfd_frame) => CAN FD capable device + +The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall. +N.B. CAN FD capable devices can also handle and send legacy CAN frames. + +When configuring CAN FD capable CAN controllers an additional 'data' bitrate +has to be set. This bitrate for the data phase of the CAN FD frame has to be +at least the bitrate which was configured for the arbitration phase. This +second bitrate is specified analogue to the first bitrate but the bitrate +setting keywords for the 'data' bitrate start with 'd' e.g. dbitrate, +dsample-point, dsjw or dtq and similar settings. When a data bitrate is set +within the configuration process the controller option "fd on" can be +specified to enable the CAN FD mode in the CAN controller. This controller +option also switches the device MTU to 72 (CANFD_MTU). + +The first CAN FD specification presented as whitepaper at the International +CAN Conference 2012 needed to be improved for data integrity reasons. +Therefore two CAN FD implementations have to be distinguished today: + +- ISO compliant: The ISO 11898-1:2015 CAN FD implementation (default) +- non-ISO compliant: The CAN FD implementation following the 2012 whitepaper + +Finally there are three types of CAN FD controllers: + +1. ISO compliant (fixed) +2. non-ISO compliant (fixed, like the M_CAN IP core v3.0.1 in m_can.c) +3. ISO/non-ISO CAN FD controllers (switchable, like the PEAK PCAN-USB FD) + +The current ISO/non-ISO mode is announced by the CAN controller driver via +netlink and displayed by the 'ip' tool (controller option FD-NON-ISO). +The ISO/non-ISO-mode can be altered by setting 'fd-non-iso {on|off}' for +switchable CAN FD controllers only. + +Example configuring 500 kbit/s arbitration bitrate and 4 Mbit/s data bitrate:: + + $ ip link set can0 up type can bitrate 500000 sample-point 0.75 \ + dbitrate 4000000 dsample-point 0.8 fd on + $ ip -details link show can0 + 5: can0: mtu 72 qdisc pfifo_fast state UNKNOWN \ + mode DEFAULT group default qlen 10 + link/can promiscuity 0 + can state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0 + bitrate 500000 sample-point 0.750 + tq 50 prop-seg 14 phase-seg1 15 phase-seg2 10 sjw 1 + pcan_usb_pro_fd: tseg1 1..64 tseg2 1..16 sjw 1..16 brp 1..1024 \ + brp-inc 1 + dbitrate 4000000 dsample-point 0.800 + dtq 12 dprop-seg 7 dphase-seg1 8 dphase-seg2 4 dsjw 1 + pcan_usb_pro_fd: dtseg1 1..16 dtseg2 1..8 dsjw 1..4 dbrp 1..1024 \ + dbrp-inc 1 + clock 80000000 + +Example when 'fd-non-iso on' is added on this switchable CAN FD adapter:: + + can state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0 + + +Supported CAN Hardware +---------------------- + +Please check the "Kconfig" file in "drivers/net/can" to get an actual +list of the support CAN hardware. On the SocketCAN project website +(see :ref:`socketcan-resources`) there might be further drivers available, also for +older kernel versions. + + +.. _socketcan-resources: + +SocketCAN Resources +=================== + +The Linux CAN / SocketCAN project resources (project site / mailing list) +are referenced in the MAINTAINERS file in the Linux source tree. +Search for CAN NETWORK [LAYERS|DRIVERS]. + +Credits +======= + +- Oliver Hartkopp (PF_CAN core, filters, drivers, bcm, SJA1000 driver) +- Urs Thuermann (PF_CAN core, kernel integration, socket interfaces, raw, vcan) +- Jan Kizka (RT-SocketCAN core, Socket-API reconciliation) +- Wolfgang Grandegger (RT-SocketCAN core & drivers, Raw Socket-API reviews, CAN device driver interface, MSCAN driver) +- Robert Schwebel (design reviews, PTXdist integration) +- Marc Kleine-Budde (design reviews, Kernel 2.6 cleanups, drivers) +- Benedikt Spranger (reviews) +- Thomas Gleixner (LKML reviews, coding style, posting hints) +- Andrey Volkov (kernel subtree structure, ioctls, MSCAN driver) +- Matthias Brukner (first SJA1000 CAN netdevice implementation Q2/2003) +- Klaus Hitschler (PEAK driver integration) +- Uwe Koppe (CAN netdevices with PF_PACKET approach) +- Michael Schulze (driver layer loopback requirement, RT CAN drivers review) +- Pavel Pisa (Bit-timing calculation) +- Sascha Hauer (SJA1000 platform driver) +- Sebastian Haas (SJA1000 EMS PCI driver) +- Markus Plessing (SJA1000 EMS PCI driver) +- Per Dalen (SJA1000 Kvaser PCI driver) +- Sam Ravnborg (reviews, coding style, kbuild help) diff --git a/Documentation/networking/can_ucan_protocol.rst b/Documentation/networking/can_ucan_protocol.rst new file mode 100644 index 000000000..638ac1ee7 --- /dev/null +++ b/Documentation/networking/can_ucan_protocol.rst @@ -0,0 +1,332 @@ +================= +The UCAN Protocol +================= + +UCAN is the protocol used by the microcontroller-based USB-CAN +adapter that is integrated on System-on-Modules from Theobroma Systems +and that is also available as a standalone USB stick. + +The UCAN protocol has been designed to be hardware-independent. +It is modeled closely after how Linux represents CAN devices +internally. All multi-byte integers are encoded as Little Endian. + +All structures mentioned in this document are defined in +``drivers/net/can/usb/ucan.c``. + +USB Endpoints +============= + +UCAN devices use three USB endpoints: + +CONTROL endpoint + The driver sends device management commands on this endpoint + +IN endpoint + The device sends CAN data frames and CAN error frames + +OUT endpoint + The driver sends CAN data frames on the out endpoint + + +CONTROL Messages +================ + +UCAN devices are configured using vendor requests on the control pipe. + +To support multiple CAN interfaces in a single USB device all +configuration commands target the corresponding interface in the USB +descriptor. + +The driver uses ``ucan_ctrl_command_in/out`` and +``ucan_device_request_in`` to deliver commands to the device. + +Setup Packet +------------ + +================= ===================================================== +``bmRequestType`` Direction | Vendor | (Interface or Device) +``bRequest`` Command Number +``wValue`` Subcommand Number (16 Bit) or 0 if not used +``wIndex`` USB Interface Index (0 for device commands) +``wLength`` * Host to Device - Number of bytes to transmit + * Device to Host - Maximum Number of bytes to + receive. If the device send less. Commom ZLP + semantics are used. +================= ===================================================== + +Error Handling +-------------- + +The device indicates failed control commands by stalling the +pipe. + +Device Commands +--------------- + +UCAN_DEVICE_GET_FW_STRING +~~~~~~~~~~~~~~~~~~~~~~~~~ + +*Dev2Host; optional* + +Request the device firmware string. + + +Interface Commands +------------------ + +UCAN_COMMAND_START +~~~~~~~~~~~~~~~~~~ + +*Host2Dev; mandatory* + +Bring the CAN interface up. + +Payload Format + ``ucan_ctl_payload_t.cmd_start`` + +==== ============================ +mode or mask of ``UCAN_MODE_*`` +==== ============================ + +UCAN_COMMAND_STOP +~~~~~~~~~~~~~~~~~~ + +*Host2Dev; mandatory* + +Stop the CAN interface + +Payload Format + *empty* + +UCAN_COMMAND_RESET +~~~~~~~~~~~~~~~~~~ + +*Host2Dev; mandatory* + +Reset the CAN controller (including error counters) + +Payload Format + *empty* + +UCAN_COMMAND_GET +~~~~~~~~~~~~~~~~ + +*Host2Dev; mandatory* + +Get Information from the Device + +Subcommands +^^^^^^^^^^^ + +UCAN_COMMAND_GET_INFO + Request the device information structure ``ucan_ctl_payload_t.device_info``. + + See the ``device_info`` field for details, and + ``uapi/linux/can/netlink.h`` for an explanation of the + ``can_bittiming fields``. + + Payload Format + ``ucan_ctl_payload_t.device_info`` + +UCAN_COMMAND_GET_PROTOCOL_VERSION + + Request the device protocol version + ``ucan_ctl_payload_t.protocol_version``. The current protocol version is 3. + + Payload Format + ``ucan_ctl_payload_t.protocol_version`` + +.. note:: Devices that do not implement this command use the old + protocol version 1 + +UCAN_COMMAND_SET_BITTIMING +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +*Host2Dev; mandatory* + +Setup bittiming by sending the structure +``ucan_ctl_payload_t.cmd_set_bittiming`` (see ``struct bittiming`` for +details) + +Payload Format + ``ucan_ctl_payload_t.cmd_set_bittiming``. + +UCAN_SLEEP/WAKE +~~~~~~~~~~~~~~~ + +*Host2Dev; optional* + +Configure sleep and wake modes. Not yet supported by the driver. + +UCAN_FILTER +~~~~~~~~~~~ + +*Host2Dev; optional* + +Setup hardware CAN filters. Not yet supported by the driver. + +Allowed interface commands +-------------------------- + +================== =================== ================== +Legal Device State Command New Device State +================== =================== ================== +stopped SET_BITTIMING stopped +stopped START started +started STOP or RESET stopped +stopped STOP or RESET stopped +started RESTART started +any GET *no change* +================== =================== ================== + +IN Message Format +================= + +A data packet on the USB IN endpoint contains one or more +``ucan_message_in`` values. If multiple messages are batched in a USB +data packet, the ``len`` field can be used to jump to the next +``ucan_message_in`` value (take care to sanity-check the ``len`` value +against the actual data size). + +.. _can_ucan_in_message_len: + +``len`` field +------------- + +Each ``ucan_message_in`` must be aligned to a 4-byte boundary (relative +to the start of the start of the data buffer). That means that there +may be padding bytes between multiple ``ucan_message_in`` values: + +.. code:: + + +----------------------------+ < 0 + | | + | struct ucan_message_in | + | | + +----------------------------+ < len + [padding] + +----------------------------+ < round_up(len, 4) + | | + | struct ucan_message_in | + | | + +----------------------------+ + [...] + +``type`` field +-------------- + +The ``type`` field specifies the type of the message. + +UCAN_IN_RX +~~~~~~~~~~ + +``subtype`` + zero + +Data received from the CAN bus (ID + payload). + +UCAN_IN_TX_COMPLETE +~~~~~~~~~~~~~~~~~~~ + +``subtype`` + zero + +The CAN device has sent a message to the CAN bus. It answers with a +list of tuples . + +The echo-id identifies the frame from (echos the id from a previous +UCAN_OUT_TX message). The flag indicates the result of the +transmission. Whereas a set Bit 0 indicates success. All other bits +are reserved and set to zero. + +Flow Control +------------ + +When receiving CAN messages there is no flow control on the USB +buffer. The driver has to handle inbound message quickly enough to +avoid drops. I case the device buffer overflow the condition is +reported by sending corresponding error frames (see +:ref:`can_ucan_error_handling`) + + +OUT Message Format +================== + +A data packet on the USB OUT endpoint contains one or more ``struct +ucan_message_out`` values. If multiple messages are batched into one +data packet, the device uses the ``len`` field to jump to the next +ucan_message_out value. Each ucan_message_out must be aligned to 4 +bytes (relative to the start of the data buffer). The mechanism is +same as described in :ref:`can_ucan_in_message_len`. + +.. code:: + + +----------------------------+ < 0 + | | + | struct ucan_message_out | + | | + +----------------------------+ < len + [padding] + +----------------------------+ < round_up(len, 4) + | | + | struct ucan_message_out | + | | + +----------------------------+ + [...] + +``type`` field +-------------- + +In protocol version 3 only ``UCAN_OUT_TX`` is defined, others are used +only by legacy devices (protocol version 1). + +UCAN_OUT_TX +~~~~~~~~~~~ +``subtype`` + echo id to be replied within a CAN_IN_TX_COMPLETE message + +Transmit a CAN frame. (parameters: ``id``, ``data``) + +Flow Control +------------ + +When the device outbound buffers are full it starts sending *NAKs* on +the *OUT* pipe until more buffers are available. The driver stops the +queue when a certain threshold of out packets are incomplete. + +.. _can_ucan_error_handling: + +CAN Error Handling +================== + +If error reporting is turned on the device encodes errors into CAN +error frames (see ``uapi/linux/can/error.h``) and sends it using the +IN endpoint. The driver updates its error statistics and forwards +it. + +Although UCAN devices can suppress error frames completely, in Linux +the driver is always interested. Hence, the device is always started with +the ``UCAN_MODE_BERR_REPORT`` set. Filtering those messages for the +user space is done by the driver. + +Bus OFF +------- + +- The device does not recover from bus of automatically. +- Bus OFF is indicated by an error frame (see ``uapi/linux/can/error.h``) +- Bus OFF recovery is started by ``UCAN_COMMAND_RESTART`` +- Once Bus OFF recover is completed the device sends an error frame + indicating that it is on ERROR-ACTIVE state. +- During Bus OFF no frames are sent by the device. +- During Bus OFF transmission requests from the host are completed + immediately with the success bit left unset. + +Example Conversation +==================== + +#) Device is connected to USB +#) Host sends command ``UCAN_COMMAND_RESET``, subcmd 0 +#) Host sends command ``UCAN_COMMAND_GET``, subcmd ``UCAN_COMMAND_GET_INFO`` +#) Device sends ``UCAN_IN_DEVICE_INFO`` +#) Host sends command ``UCAN_OUT_SET_BITTIMING`` +#) Host sends command ``UCAN_COMMAND_START``, subcmd 0, mode ``UCAN_MODE_BERR_REPORT`` diff --git a/Documentation/networking/cdc_mbim.rst b/Documentation/networking/cdc_mbim.rst new file mode 100644 index 000000000..0048409c0 --- /dev/null +++ b/Documentation/networking/cdc_mbim.rst @@ -0,0 +1,355 @@ +.. SPDX-License-Identifier: GPL-2.0 + +====================================================== +cdc_mbim - Driver for CDC MBIM Mobile Broadband modems +====================================================== + +The cdc_mbim driver supports USB devices conforming to the "Universal +Serial Bus Communications Class Subclass Specification for Mobile +Broadband Interface Model" [1], which is a further development of +"Universal Serial Bus Communications Class Subclass Specifications for +Network Control Model Devices" [2] optimized for Mobile Broadband +devices, aka "3G/LTE modems". + + +Command Line Parameters +======================= + +The cdc_mbim driver has no parameters of its own. But the probing +behaviour for NCM 1.0 backwards compatible MBIM functions (an +"NCM/MBIM function" as defined in section 3.2 of [1]) is affected +by a cdc_ncm driver parameter: + +prefer_mbim +----------- +:Type: Boolean +:Valid Range: N/Y (0-1) +:Default Value: Y (MBIM is preferred) + +This parameter sets the system policy for NCM/MBIM functions. Such +functions will be handled by either the cdc_ncm driver or the cdc_mbim +driver depending on the prefer_mbim setting. Setting prefer_mbim=N +makes the cdc_mbim driver ignore these functions and lets the cdc_ncm +driver handle them instead. + +The parameter is writable, and can be changed at any time. A manual +unbind/bind is required to make the change effective for NCM/MBIM +functions bound to the "wrong" driver + + +Basic usage +=========== + +MBIM functions are inactive when unmanaged. The cdc_mbim driver only +provides a userspace interface to the MBIM control channel, and will +not participate in the management of the function. This implies that a +userspace MBIM management application always is required to enable a +MBIM function. + +Such userspace applications includes, but are not limited to: + + - mbimcli (included with the libmbim [3] library), and + - ModemManager [4] + +Establishing a MBIM IP session reequires at least these actions by the +management application: + + - open the control channel + - configure network connection settings + - connect to network + - configure IP interface + +Management application development +---------------------------------- +The driver <-> userspace interfaces are described below. The MBIM +control channel protocol is described in [1]. + + +MBIM control channel userspace ABI +================================== + +/dev/cdc-wdmX character device +------------------------------ +The driver creates a two-way pipe to the MBIM function control channel +using the cdc-wdm driver as a subdriver. The userspace end of the +control channel pipe is a /dev/cdc-wdmX character device. + +The cdc_mbim driver does not process or police messages on the control +channel. The channel is fully delegated to the userspace management +application. It is therefore up to this application to ensure that it +complies with all the control channel requirements in [1]. + +The cdc-wdmX device is created as a child of the MBIM control +interface USB device. The character device associated with a specific +MBIM function can be looked up using sysfs. For example:: + + bjorn@nemi:~$ ls /sys/bus/usb/drivers/cdc_mbim/2-4:2.12/usbmisc + cdc-wdm0 + + bjorn@nemi:~$ grep . /sys/bus/usb/drivers/cdc_mbim/2-4:2.12/usbmisc/cdc-wdm0/dev + 180:0 + + +USB configuration descriptors +----------------------------- +The wMaxControlMessage field of the CDC MBIM functional descriptor +limits the maximum control message size. The managament application is +responsible for negotiating a control message size complying with the +requirements in section 9.3.1 of [1], taking this descriptor field +into consideration. + +The userspace application can access the CDC MBIM functional +descriptor of a MBIM function using either of the two USB +configuration descriptor kernel interfaces described in [6] or [7]. + +See also the ioctl documentation below. + + +Fragmentation +------------- +The userspace application is responsible for all control message +fragmentation and defragmentaion, as described in section 9.5 of [1]. + + +/dev/cdc-wdmX write() +--------------------- +The MBIM control messages from the management application *must not* +exceed the negotiated control message size. + + +/dev/cdc-wdmX read() +-------------------- +The management application *must* accept control messages of up the +negotiated control message size. + + +/dev/cdc-wdmX ioctl() +--------------------- +IOCTL_WDM_MAX_COMMAND: Get Maximum Command Size +This ioctl returns the wMaxControlMessage field of the CDC MBIM +functional descriptor for MBIM devices. This is intended as a +convenience, eliminating the need to parse the USB descriptors from +userspace. + +:: + + #include + #include + #include + #include + #include + int main() + { + __u16 max; + int fd = open("/dev/cdc-wdm0", O_RDWR); + if (!ioctl(fd, IOCTL_WDM_MAX_COMMAND, &max)) + printf("wMaxControlMessage is %d\n", max); + } + + +Custom device services +---------------------- +The MBIM specification allows vendors to freely define additional +services. This is fully supported by the cdc_mbim driver. + +Support for new MBIM services, including vendor specified services, is +implemented entirely in userspace, like the rest of the MBIM control +protocol + +New services should be registered in the MBIM Registry [5]. + + + +MBIM data channel userspace ABI +=============================== + +wwanY network device +-------------------- +The cdc_mbim driver represents the MBIM data channel as a single +network device of the "wwan" type. This network device is initially +mapped to MBIM IP session 0. + + +Multiplexed IP sessions (IPS) +----------------------------- +MBIM allows multiplexing up to 256 IP sessions over a single USB data +channel. The cdc_mbim driver models such IP sessions as 802.1q VLAN +subdevices of the master wwanY device, mapping MBIM IP session Z to +VLAN ID Z for all values of Z greater than 0. + +The device maximum Z is given in the MBIM_DEVICE_CAPS_INFO structure +described in section 10.5.1 of [1]. + +The userspace management application is responsible for adding new +VLAN links prior to establishing MBIM IP sessions where the SessionId +is greater than 0. These links can be added by using the normal VLAN +kernel interfaces, either ioctl or netlink. + +For example, adding a link for a MBIM IP session with SessionId 3:: + + ip link add link wwan0 name wwan0.3 type vlan id 3 + +The driver will automatically map the "wwan0.3" network device to MBIM +IP session 3. + + +Device Service Streams (DSS) +---------------------------- +MBIM also allows up to 256 non-IP data streams to be multiplexed over +the same shared USB data channel. The cdc_mbim driver models these +sessions as another set of 802.1q VLAN subdevices of the master wwanY +device, mapping MBIM DSS session A to VLAN ID (256 + A) for all values +of A. + +The device maximum A is given in the MBIM_DEVICE_SERVICES_INFO +structure described in section 10.5.29 of [1]. + +The DSS VLAN subdevices are used as a practical interface between the +shared MBIM data channel and a MBIM DSS aware userspace application. +It is not intended to be presented as-is to an end user. The +assumption is that a userspace application initiating a DSS session +also takes care of the necessary framing of the DSS data, presenting +the stream to the end user in an appropriate way for the stream type. + +The network device ABI requires a dummy ethernet header for every DSS +data frame being transported. The contents of this header is +arbitrary, with the following exceptions: + + - TX frames using an IP protocol (0x0800 or 0x86dd) will be dropped + - RX frames will have the protocol field set to ETH_P_802_3 (but will + not be properly formatted 802.3 frames) + - RX frames will have the destination address set to the hardware + address of the master device + +The DSS supporting userspace management application is responsible for +adding the dummy ethernet header on TX and stripping it on RX. + +This is a simple example using tools commonly available, exporting +DssSessionId 5 as a pty character device pointed to by a /dev/nmea +symlink:: + + ip link add link wwan0 name wwan0.dss5 type vlan id 261 + ip link set dev wwan0.dss5 up + socat INTERFACE:wwan0.dss5,type=2 PTY:,echo=0,link=/dev/nmea + +This is only an example, most suitable for testing out a DSS +service. Userspace applications supporting specific MBIM DSS services +are expected to use the tools and programming interfaces required by +that service. + +Note that adding VLAN links for DSS sessions is entirely optional. A +management application may instead choose to bind a packet socket +directly to the master network device, using the received VLAN tags to +map frames to the correct DSS session and adding 18 byte VLAN ethernet +headers with the appropriate tag on TX. In this case using a socket +filter is recommended, matching only the DSS VLAN subset. This avoid +unnecessary copying of unrelated IP session data to userspace. For +example:: + + static struct sock_filter dssfilter[] = { + /* use special negative offsets to get VLAN tag */ + BPF_STMT(BPF_LD|BPF_B|BPF_ABS, SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 1, 0, 6), /* true */ + + /* verify DSS VLAN range */ + BPF_STMT(BPF_LD|BPF_H|BPF_ABS, SKF_AD_OFF + SKF_AD_VLAN_TAG), + BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 256, 0, 4), /* 256 is first DSS VLAN */ + BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 512, 3, 0), /* 511 is last DSS VLAN */ + + /* verify ethertype */ + BPF_STMT(BPF_LD|BPF_H|BPF_ABS, 2 * ETH_ALEN), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, ETH_P_802_3, 0, 1), + + BPF_STMT(BPF_RET|BPF_K, (u_int)-1), /* accept */ + BPF_STMT(BPF_RET|BPF_K, 0), /* ignore */ + }; + + + +Tagged IP session 0 VLAN +------------------------ +As described above, MBIM IP session 0 is treated as special by the +driver. It is initially mapped to untagged frames on the wwanY +network device. + +This mapping implies a few restrictions on multiplexed IPS and DSS +sessions, which may not always be practical: + + - no IPS or DSS session can use a frame size greater than the MTU on + IP session 0 + - no IPS or DSS session can be in the up state unless the network + device representing IP session 0 also is up + +These problems can be avoided by optionally making the driver map IP +session 0 to a VLAN subdevice, similar to all other IP sessions. This +behaviour is triggered by adding a VLAN link for the magic VLAN ID +4094. The driver will then immediately start mapping MBIM IP session +0 to this VLAN, and will drop untagged frames on the master wwanY +device. + +Tip: It might be less confusing to the end user to name this VLAN +subdevice after the MBIM SessionID instead of the VLAN ID. For +example:: + + ip link add link wwan0 name wwan0.0 type vlan id 4094 + + +VLAN mapping +------------ + +Summarizing the cdc_mbim driver mapping described above, we have this +relationship between VLAN tags on the wwanY network device and MBIM +sessions on the shared USB data channel:: + + VLAN ID MBIM type MBIM SessionID Notes + --------------------------------------------------------- + untagged IPS 0 a) + 1 - 255 IPS 1 - 255 + 256 - 511 DSS 0 - 255 + 512 - 4093 b) + 4094 IPS 0 c) + + a) if no VLAN ID 4094 link exists, else dropped + b) unsupported VLAN range, unconditionally dropped + c) if a VLAN ID 4094 link exists, else dropped + + + + +References +========== + + 1) USB Implementers Forum, Inc. - "Universal Serial Bus + Communications Class Subclass Specification for Mobile Broadband + Interface Model", Revision 1.0 (Errata 1), May 1, 2013 + + - http://www.usb.org/developers/docs/devclass_docs/ + + 2) USB Implementers Forum, Inc. - "Universal Serial Bus + Communications Class Subclass Specifications for Network Control + Model Devices", Revision 1.0 (Errata 1), November 24, 2010 + + - http://www.usb.org/developers/docs/devclass_docs/ + + 3) libmbim - "a glib-based library for talking to WWAN modems and + devices which speak the Mobile Interface Broadband Model (MBIM) + protocol" + + - http://www.freedesktop.org/wiki/Software/libmbim/ + + 4) ModemManager - "a DBus-activated daemon which controls mobile + broadband (2G/3G/4G) devices and connections" + + - http://www.freedesktop.org/wiki/Software/ModemManager/ + + 5) "MBIM (Mobile Broadband Interface Model) Registry" + + - http://compliance.usb.org/mbim/ + + 6) "/sys/kernel/debug/usb/devices output format" + + - Documentation/driver-api/usb/usb.rst + + 7) "/sys/bus/usb/devices/.../descriptors" + + - Documentation/ABI/stable/sysfs-bus-usb diff --git a/Documentation/networking/checksum-offloads.rst b/Documentation/networking/checksum-offloads.rst new file mode 100644 index 000000000..69b23cf68 --- /dev/null +++ b/Documentation/networking/checksum-offloads.rst @@ -0,0 +1,143 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================= +Checksum Offloads +================= + + +Introduction +============ + +This document describes a set of techniques in the Linux networking stack to +take advantage of checksum offload capabilities of various NICs. + +The following technologies are described: + +* TX Checksum Offload +* LCO: Local Checksum Offload +* RCO: Remote Checksum Offload + +Things that should be documented here but aren't yet: + +* RX Checksum Offload +* CHECKSUM_UNNECESSARY conversion + + +TX Checksum Offload +=================== + +The interface for offloading a transmit checksum to a device is explained in +detail in comments near the top of include/linux/skbuff.h. + +In brief, it allows to request the device fill in a single ones-complement +checksum defined by the sk_buff fields skb->csum_start and skb->csum_offset. +The device should compute the 16-bit ones-complement checksum (i.e. the +'IP-style' checksum) from csum_start to the end of the packet, and fill in the +result at (csum_start + csum_offset). + +Because csum_offset cannot be negative, this ensures that the previous value of +the checksum field is included in the checksum computation, thus it can be used +to supply any needed corrections to the checksum (such as the sum of the +pseudo-header for UDP or TCP). + +This interface only allows a single checksum to be offloaded. Where +encapsulation is used, the packet may have multiple checksum fields in +different header layers, and the rest will have to be handled by another +mechanism such as LCO or RCO. + +CRC32c can also be offloaded using this interface, by means of filling +skb->csum_start and skb->csum_offset as described above, and setting +skb->csum_not_inet: see skbuff.h comment (section 'D') for more details. + +No offloading of the IP header checksum is performed; it is always done in +software. This is OK because when we build the IP header, we obviously have it +in cache, so summing it isn't expensive. It's also rather short. + +The requirements for GSO are more complicated, because when segmenting an +encapsulated packet both the inner and outer checksums may need to be edited or +recomputed for each resulting segment. See the skbuff.h comment (section 'E') +for more details. + +A driver declares its offload capabilities in netdev->hw_features; see +Documentation/networking/netdev-features.rst for more. Note that a device +which only advertises NETIF_F_IP[V6]_CSUM must still obey the csum_start and +csum_offset given in the SKB; if it tries to deduce these itself in hardware +(as some NICs do) the driver should check that the values in the SKB match +those which the hardware will deduce, and if not, fall back to checksumming in +software instead (with skb_csum_hwoffload_help() or one of the +skb_checksum_help() / skb_crc32c_csum_help functions, as mentioned in +include/linux/skbuff.h). + +The stack should, for the most part, assume that checksum offload is supported +by the underlying device. The only place that should check is +validate_xmit_skb(), and the functions it calls directly or indirectly. That +function compares the offload features requested by the SKB (which may include +other offloads besides TX Checksum Offload) and, if they are not supported or +enabled on the device (determined by netdev->features), performs the +corresponding offload in software. In the case of TX Checksum Offload, that +means calling skb_csum_hwoffload_help(skb, features). + + +LCO: Local Checksum Offload +=========================== + +LCO is a technique for efficiently computing the outer checksum of an +encapsulated datagram when the inner checksum is due to be offloaded. + +The ones-complement sum of a correctly checksummed TCP or UDP packet is equal +to the complement of the sum of the pseudo header, because everything else gets +'cancelled out' by the checksum field. This is because the sum was +complemented before being written to the checksum field. + +More generally, this holds in any case where the 'IP-style' ones complement +checksum is used, and thus any checksum that TX Checksum Offload supports. + +That is, if we have set up TX Checksum Offload with a start/offset pair, we +know that after the device has filled in that checksum, the ones complement sum +from csum_start to the end of the packet will be equal to the complement of +whatever value we put in the checksum field beforehand. This allows us to +compute the outer checksum without looking at the payload: we simply stop +summing when we get to csum_start, then add the complement of the 16-bit word +at (csum_start + csum_offset). + +Then, when the true inner checksum is filled in (either by hardware or by +skb_checksum_help()), the outer checksum will become correct by virtue of the +arithmetic. + +LCO is performed by the stack when constructing an outer UDP header for an +encapsulation such as VXLAN or GENEVE, in udp_set_csum(). Similarly for the +IPv6 equivalents, in udp6_set_csum(). + +It is also performed when constructing an IPv4 GRE header, in +net/ipv4/ip_gre.c:build_header(). It is *not* currently performed when +constructing an IPv6 GRE header; the GRE checksum is computed over the whole +packet in net/ipv6/ip6_gre.c:ip6gre_xmit2(), but it should be possible to use +LCO here as IPv6 GRE still uses an IP-style checksum. + +All of the LCO implementations use a helper function lco_csum(), in +include/linux/skbuff.h. + +LCO can safely be used for nested encapsulations; in this case, the outer +encapsulation layer will sum over both its own header and the 'middle' header. +This does mean that the 'middle' header will get summed multiple times, but +there doesn't seem to be a way to avoid that without incurring bigger costs +(e.g. in SKB bloat). + + +RCO: Remote Checksum Offload +============================ + +RCO is a technique for eliding the inner checksum of an encapsulated datagram, +allowing the outer checksum to be offloaded. It does, however, involve a +change to the encapsulation protocols, which the receiver must also support. +For this reason, it is disabled by default. + +RCO is detailed in the following Internet-Drafts: + +* https://tools.ietf.org/html/draft-herbert-remotecsumoffload-00 +* https://tools.ietf.org/html/draft-herbert-vxlan-rco-00 + +In Linux, RCO is implemented individually in each encapsulation protocol, and +most tunnel types have flags controlling its use. For instance, VXLAN has the +flag VXLAN_F_REMCSUM_TX (per struct vxlan_rdst) to indicate that RCO should be +used when transmitting to a given remote destination. diff --git a/Documentation/networking/dccp.rst b/Documentation/networking/dccp.rst new file mode 100644 index 000000000..91e5c33ba --- /dev/null +++ b/Documentation/networking/dccp.rst @@ -0,0 +1,219 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============= +DCCP protocol +============= + + +.. Contents + - Introduction + - Missing features + - Socket options + - Sysctl variables + - IOCTLs + - Other tunables + - Notes + + +Introduction +============ +Datagram Congestion Control Protocol (DCCP) is an unreliable, connection +oriented protocol designed to solve issues present in UDP and TCP, particularly +for real-time and multimedia (streaming) traffic. +It divides into a base protocol (RFC 4340) and pluggable congestion control +modules called CCIDs. Like pluggable TCP congestion control, at least one CCID +needs to be enabled in order for the protocol to function properly. In the Linux +implementation, this is the TCP-like CCID2 (RFC 4341). Additional CCIDs, such as +the TCP-friendly CCID3 (RFC 4342), are optional. +For a brief introduction to CCIDs and suggestions for choosing a CCID to match +given applications, see section 10 of RFC 4340. + +It has a base protocol and pluggable congestion control IDs (CCIDs). + +DCCP is a Proposed Standard (RFC 2026), and the homepage for DCCP as a protocol +is at http://www.ietf.org/html.charters/dccp-charter.html + + +Missing features +================ +The Linux DCCP implementation does not currently support all the features that are +specified in RFCs 4340...42. + +The known bugs are at: + + http://www.linuxfoundation.org/collaborate/workgroups/networking/todo#DCCP + +For more up-to-date versions of the DCCP implementation, please consider using +the experimental DCCP test tree; instructions for checking this out are on: +http://www.linuxfoundation.org/collaborate/workgroups/networking/dccp_testing#Experimental_DCCP_source_tree + + +Socket options +============== +DCCP_SOCKOPT_QPOLICY_ID sets the dequeuing policy for outgoing packets. It takes +a policy ID as argument and can only be set before the connection (i.e. changes +during an established connection are not supported). Currently, two policies are +defined: the "simple" policy (DCCPQ_POLICY_SIMPLE), which does nothing special, +and a priority-based variant (DCCPQ_POLICY_PRIO). The latter allows to pass an +u32 priority value as ancillary data to sendmsg(), where higher numbers indicate +a higher packet priority (similar to SO_PRIORITY). This ancillary data needs to +be formatted using a cmsg(3) message header filled in as follows:: + + cmsg->cmsg_level = SOL_DCCP; + cmsg->cmsg_type = DCCP_SCM_PRIORITY; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); /* or CMSG_LEN(4) */ + +DCCP_SOCKOPT_QPOLICY_TXQLEN sets the maximum length of the output queue. A zero +value is always interpreted as unbounded queue length. If different from zero, +the interpretation of this parameter depends on the current dequeuing policy +(see above): the "simple" policy will enforce a fixed queue size by returning +EAGAIN, whereas the "prio" policy enforces a fixed queue length by dropping the +lowest-priority packet first. The default value for this parameter is +initialised from /proc/sys/net/dccp/default/tx_qlen. + +DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of +service codes (RFC 4340, sec. 8.1.2); if this socket option is not set, +the socket will fall back to 0 (which means that no meaningful service code +is present). On active sockets this is set before connect(); specifying more +than one code has no effect (all subsequent service codes are ignored). The +case is different for passive sockets, where multiple service codes (up to 32) +can be set before calling bind(). + +DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet +size (application payload size) in bytes, see RFC 4340, section 14. + +DCCP_SOCKOPT_AVAILABLE_CCIDS is also read-only and returns the list of CCIDs +supported by the endpoint. The option value is an array of type uint8_t whose +size is passed as option length. The minimum array size is 4 elements, the +value returned in the optlen argument always reflects the true number of +built-in CCIDs. + +DCCP_SOCKOPT_CCID is write-only and sets both the TX and RX CCIDs at the same +time, combining the operation of the next two socket options. This option is +preferable over the latter two, since often applications will use the same +type of CCID for both directions; and mixed use of CCIDs is not currently well +understood. This socket option takes as argument at least one uint8_t value, or +an array of uint8_t values, which must match available CCIDS (see above). CCIDs +must be registered on the socket before calling connect() or listen(). + +DCCP_SOCKOPT_TX_CCID is read/write. It returns the current CCID (if set) or sets +the preference list for the TX CCID, using the same format as DCCP_SOCKOPT_CCID. +Please note that the getsockopt argument type here is ``int``, not uint8_t. + +DCCP_SOCKOPT_RX_CCID is analogous to DCCP_SOCKOPT_TX_CCID, but for the RX CCID. + +DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold +timewait state when closing the connection (RFC 4340, 8.3). The usual case is +that the closing server sends a CloseReq, whereupon the client holds timewait +state. When this boolean socket option is on, the server sends a Close instead +and will enter TIMEWAIT. This option must be set after accept() returns. + +DCCP_SOCKOPT_SEND_CSCOV and DCCP_SOCKOPT_RECV_CSCOV are used for setting the +partial checksum coverage (RFC 4340, sec. 9.2). The default is that checksums +always cover the entire packet and that only fully covered application data is +accepted by the receiver. Hence, when using this feature on the sender, it must +be enabled at the receiver, too with suitable choice of CsCov. + +DCCP_SOCKOPT_SEND_CSCOV sets the sender checksum coverage. Values in the + range 0..15 are acceptable. The default setting is 0 (full coverage), + values between 1..15 indicate partial coverage. + +DCCP_SOCKOPT_RECV_CSCOV is for the receiver and has a different meaning: it + sets a threshold, where again values 0..15 are acceptable. The default + of 0 means that all packets with a partial coverage will be discarded. + Values in the range 1..15 indicate that packets with minimally such a + coverage value are also acceptable. The higher the number, the more + restrictive this setting (see [RFC 4340, sec. 9.2.1]). Partial coverage + settings are inherited to the child socket after accept(). + +The following two options apply to CCID 3 exclusively and are getsockopt()-only. +In either case, a TFRC info struct (defined in ) is returned. + +DCCP_SOCKOPT_CCID_RX_INFO + Returns a ``struct tfrc_rx_info`` in optval; the buffer for optval and + optlen must be set to at least sizeof(struct tfrc_rx_info). + +DCCP_SOCKOPT_CCID_TX_INFO + Returns a ``struct tfrc_tx_info`` in optval; the buffer for optval and + optlen must be set to at least sizeof(struct tfrc_tx_info). + +On unidirectional connections it is useful to close the unused half-connection +via shutdown (SHUT_WR or SHUT_RD): this will reduce per-packet processing costs. + + +Sysctl variables +================ +Several DCCP default parameters can be managed by the following sysctls +(sysctl net.dccp.default or /proc/sys/net/dccp/default): + +request_retries + The number of active connection initiation retries (the number of + Requests minus one) before timing out. In addition, it also governs + the behaviour of the other, passive side: this variable also sets + the number of times DCCP repeats sending a Response when the initial + handshake does not progress from RESPOND to OPEN (i.e. when no Ack + is received after the initial Request). This value should be greater + than 0, suggested is less than 10. Analogue of tcp_syn_retries. + +retries1 + How often a DCCP Response is retransmitted until the listening DCCP + side considers its connecting peer dead. Analogue of tcp_retries1. + +retries2 + The number of times a general DCCP packet is retransmitted. This has + importance for retransmitted acknowledgments and feature negotiation, + data packets are never retransmitted. Analogue of tcp_retries2. + +tx_ccid = 2 + Default CCID for the sender-receiver half-connection. Depending on the + choice of CCID, the Send Ack Vector feature is enabled automatically. + +rx_ccid = 2 + Default CCID for the receiver-sender half-connection; see tx_ccid. + +seq_window = 100 + The initial sequence window (sec. 7.5.2) of the sender. This influences + the local ackno validity and the remote seqno validity windows (7.5.1). + Values in the range Wmin = 32 (RFC 4340, 7.5.2) up to 2^32-1 can be set. + +tx_qlen = 5 + The size of the transmit buffer in packets. A value of 0 corresponds + to an unbounded transmit buffer. + +sync_ratelimit = 125 ms + The timeout between subsequent DCCP-Sync packets sent in response to + sequence-invalid packets on the same socket (RFC 4340, 7.5.4). The unit + of this parameter is milliseconds; a value of 0 disables rate-limiting. + + +IOCTLS +====== +FIONREAD + Works as in udp(7): returns in the ``int`` argument pointer the size of + the next pending datagram in bytes, or 0 when no datagram is pending. + +SIOCOUTQ + Returns the number of unsent data bytes in the socket send queue as ``int`` + into the buffer specified by the argument pointer. + +Other tunables +============== +Per-route rto_min support + CCID-2 supports the RTAX_RTO_MIN per-route setting for the minimum value + of the RTO timer. This setting can be modified via the 'rto_min' option + of iproute2; for example:: + + > ip route change 10.0.0.0/24 rto_min 250j dev wlan0 + > ip route add 10.0.0.254/32 rto_min 800j dev wlan0 + > ip route show dev wlan0 + + CCID-3 also supports the rto_min setting: it is used to define the lower + bound for the expiry of the nofeedback timer. This can be useful on LANs + with very low RTTs (e.g., loopback, Gbit ethernet). + + +Notes +===== +DCCP does not travel through NAT successfully at present on many boxes. This is +because the checksum covers the pseudo-header as per TCP and UDP. Linux NAT +support for DCCP has been added. diff --git a/Documentation/networking/dctcp.rst b/Documentation/networking/dctcp.rst new file mode 100644 index 000000000..4cc8bb2da --- /dev/null +++ b/Documentation/networking/dctcp.rst @@ -0,0 +1,52 @@ +.. SPDX-License-Identifier: GPL-2.0 + +====================== +DCTCP (DataCenter TCP) +====================== + +DCTCP is an enhancement to the TCP congestion control algorithm for data +center networks and leverages Explicit Congestion Notification (ECN) in +the data center network to provide multi-bit feedback to the end hosts. + +To enable it on end hosts:: + + sysctl -w net.ipv4.tcp_congestion_control=dctcp + sysctl -w net.ipv4.tcp_ecn_fallback=0 (optional) + +All switches in the data center network running DCTCP must support ECN +marking and be configured for marking when reaching defined switch buffer +thresholds. The default ECN marking threshold heuristic for DCTCP on +switches is 20 packets (30KB) at 1Gbps, and 65 packets (~100KB) at 10Gbps, +but might need further careful tweaking. + +For more details, see below documents: + +Paper: + +The algorithm is further described in detail in the following two +SIGCOMM/SIGMETRICS papers: + + i) Mohammad Alizadeh, Albert Greenberg, David A. Maltz, Jitendra Padhye, + Parveen Patel, Balaji Prabhakar, Sudipta Sengupta, and Murari Sridharan: + + "Data Center TCP (DCTCP)", Data Center Networks session" + + Proc. ACM SIGCOMM, New Delhi, 2010. + + http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf + http://www.sigcomm.org/ccr/papers/2010/October/1851275.1851192 + +ii) Mohammad Alizadeh, Adel Javanmard, and Balaji Prabhakar: + + "Analysis of DCTCP: Stability, Convergence, and Fairness" + Proc. ACM SIGMETRICS, San Jose, 2011. + + http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp_analysis-full.pdf + +IETF informational draft: + + http://tools.ietf.org/html/draft-bensley-tcpm-dctcp-00 + +DCTCP site: + + http://simula.stanford.edu/~alizade/Site/DCTCP.html diff --git a/Documentation/networking/device_drivers/appletalk/cops.rst b/Documentation/networking/device_drivers/appletalk/cops.rst new file mode 100644 index 000000000..964ba8059 --- /dev/null +++ b/Documentation/networking/device_drivers/appletalk/cops.rst @@ -0,0 +1,80 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================================== +The COPS LocalTalk Linux driver (cops.c) +======================================== + +By Jay Schulist + +This driver has two modes and they are: Dayna mode and Tangent mode. +Each mode corresponds with the type of card. It has been found +that there are 2 main types of cards and all other cards are +the same and just have different names or only have minor differences +such as more IO ports. As this driver is tested it will +become more clear exactly what cards are supported. + +Right now these cards are known to work with the COPS driver. The +LT-200 cards work in a somewhat more limited capacity than the +DL200 cards, which work very well and are in use by many people. + +TANGENT driver mode: + - Tangent ATB-II, Novell NL-1000, Daystar Digital LT-200 + +DAYNA driver mode: + - Dayna DL2000/DaynaTalk PC (Half Length), COPS LT-95, + - Farallon PhoneNET PC III, Farallon PhoneNET PC II + +Other cards possibly supported mode unknown though: + - Dayna DL2000 (Full length) + +The COPS driver defaults to using Dayna mode. To change the driver's +mode if you built a driver with dual support use board_type=1 or +board_type=2 for Dayna or Tangent with insmod. + +Operation/loading of the driver +=============================== + +Use modprobe like this: /sbin/modprobe cops.o (IO #) (IRQ #) +If you do not specify any options the driver will try and use the IO = 0x240, +IRQ = 5. As of right now I would only use IRQ 5 for the card, if autoprobing. + +To load multiple COPS driver Localtalk cards you can do one of the following:: + + insmod cops io=0x240 irq=5 + insmod -o cops2 cops io=0x260 irq=3 + +Or in lilo.conf put something like this:: + + append="ether=5,0x240,lt0 ether=3,0x260,lt1" + +Then bring up the interface with ifconfig. It will look something like this:: + + lt0 Link encap:UNSPEC HWaddr 00-00-00-00-00-00-00-F7-00-00-00-00-00-00-00-00 + inet addr:192.168.1.2 Bcast:192.168.1.255 Mask:255.255.255.0 + UP BROADCAST RUNNING NOARP MULTICAST MTU:600 Metric:1 + RX packets:0 errors:0 dropped:0 overruns:0 frame:0 + TX packets:0 errors:0 dropped:0 overruns:0 carrier:0 coll:0 + +Netatalk Configuration +====================== + +You will need to configure atalkd with something like the following to make +it work with the cops.c driver. + +* For single LTalk card use:: + + dummy -seed -phase 2 -net 2000 -addr 2000.10 -zone "1033" + lt0 -seed -phase 1 -net 1000 -addr 1000.50 -zone "1033" + +* For multiple cards, Ethernet and LocalTalk:: + + eth0 -seed -phase 2 -net 3000 -addr 3000.20 -zone "1033" + lt0 -seed -phase 1 -net 1000 -addr 1000.50 -zone "1033" + +* For multiple LocalTalk cards, and an Ethernet card. + +* Order seems to matter here, Ethernet last:: + + lt0 -seed -phase 1 -net 1000 -addr 1000.10 -zone "LocalTalk1" + lt1 -seed -phase 1 -net 2000 -addr 2000.20 -zone "LocalTalk2" + eth0 -seed -phase 2 -net 3000 -addr 3000.30 -zone "EtherTalk" diff --git a/Documentation/networking/device_drivers/appletalk/index.rst b/Documentation/networking/device_drivers/appletalk/index.rst new file mode 100644 index 000000000..de7507f02 --- /dev/null +++ b/Documentation/networking/device_drivers/appletalk/index.rst @@ -0,0 +1,19 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +AppleTalk Device Drivers +======================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + cops + ltpc + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/networking/device_drivers/appletalk/ltpc.rst b/Documentation/networking/device_drivers/appletalk/ltpc.rst new file mode 100644 index 000000000..0ad197fd1 --- /dev/null +++ b/Documentation/networking/device_drivers/appletalk/ltpc.rst @@ -0,0 +1,144 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========== +LTPC Driver +=========== + +This is the ALPHA version of the ltpc driver. + +In order to use it, you will need at least version 1.3.3 of the +netatalk package, and the Apple or Farallon LocalTalk PC card. +There are a number of different LocalTalk cards for the PC; this +driver applies only to the one with the 65c02 processor chip on it. + +To include it in the kernel, select the CONFIG_LTPC switch in the +configuration dialog. You can also compile it as a module. + +While the driver will attempt to autoprobe the I/O port address, IRQ +line, and DMA channel of the card, this does not always work. For +this reason, you should be prepared to supply these parameters +yourself. (see "Card Configuration" below for how to determine or +change the settings on your card) + +When the driver is compiled into the kernel, you can add a line such +as the following to your /etc/lilo.conf:: + + append="ltpc=0x240,9,1" + +where the parameters (in order) are the port address, IRQ, and DMA +channel. The second and third values can be omitted, in which case +the driver will try to determine them itself. + +If you load the driver as a module, you can pass the parameters "io=", +"irq=", and "dma=" on the command line with insmod or modprobe, or add +them as options in a configuration file in /etc/modprobe.d/ directory:: + + alias lt0 ltpc # autoload the module when the interface is configured + options ltpc io=0x240 irq=9 dma=1 + +Before starting up the netatalk demons (perhaps in rc.local), you +need to add a line such as:: + + /sbin/ifconfig lt0 127.0.0.42 + +The address is unimportant - however, the card needs to be configured +with ifconfig so that Netatalk can find it. + +The appropriate netatalk configuration depends on whether you are +attached to a network that includes AppleTalk routers or not. If, +like me, you are simply connecting to your home Macintoshes and +printers, you need to set up netatalk to "seed". The way I do this +is to have the lines:: + + dummy -seed -phase 2 -net 2000 -addr 2000.26 -zone "1033" + lt0 -seed -phase 1 -net 1033 -addr 1033.27 -zone "1033" + +in my atalkd.conf. What is going on here is that I need to fool +netatalk into thinking that there are two AppleTalk interfaces +present; otherwise, it refuses to seed. This is a hack, and a more +permanent solution would be to alter the netatalk code. Also, make +sure you have the correct name for the dummy interface - If it's +compiled as a module, you will need to refer to it as "dummy0" or some +such. + +If you are attached to an extended AppleTalk network, with routers on +it, then you don't need to fool around with this -- the appropriate +line in atalkd.conf is:: + + lt0 -phase 1 + + +Card Configuration +================== + +The interrupts and so forth are configured via the dipswitch on the +board. Set the switches so as not to conflict with other hardware. + + Interrupts -- set at most one. If none are set, the driver uses + polled mode. Because the card was developed in the XT era, the + original documentation refers to IRQ2. Since you'll be running + this on an AT (or later) class machine, that really means IRQ9. + + === =========================================================== + SW1 IRQ 4 + SW2 IRQ 3 + SW3 IRQ 9 (2 in original card documentation only applies to XT) + === =========================================================== + + + DMA -- choose DMA 1 or 3, and set both corresponding switches. + + === ===== + SW4 DMA 3 + SW5 DMA 1 + SW6 DMA 3 + SW7 DMA 1 + === ===== + + + I/O address -- choose one. + + === ========= + SW8 220 / 240 + === ========= + + +IP +== + +Yes, it is possible to do IP over LocalTalk. However, you can't just +treat the LocalTalk device like an ordinary Ethernet device, even if +that's what it looks like to Netatalk. + +Instead, you follow the same procedure as for doing IP in EtherTalk. +See Documentation/networking/ipddp.rst for more information about the +kernel driver and userspace tools needed. + + +Bugs +==== + +IRQ autoprobing often doesn't work on a cold boot. To get around +this, either compile the driver as a module, or pass the parameters +for the card to the kernel as described above. + +Also, as usual, autoprobing is not recommended when you use the driver +as a module. (though it usually works at boot time, at least) + +Polled mode is *really* slow sometimes, but this seems to depend on +the configuration of the network. + +It may theoretically be possible to use two LTPC cards in the same +machine, but this is unsupported, so if you really want to do this, +you'll probably have to hack the initialization code a bit. + + +Thanks +====== + +Thanks to Alan Cox for helpful discussions early on in this +work, and to Denis Hainsworth for doing the bleeding-edge testing. + +Bradford Johnson + +Updated 11/09/1998 by David Huggins-Daines diff --git a/Documentation/networking/device_drivers/atm/cxacru-cf.py b/Documentation/networking/device_drivers/atm/cxacru-cf.py new file mode 100644 index 000000000..b41d29839 --- /dev/null +++ b/Documentation/networking/device_drivers/atm/cxacru-cf.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# Copyright 2009 Simon Arlott +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# Usage: cxacru-cf.py < cxacru-cf.bin +# Output: values string suitable for the sysfs adsl_config attribute +# +# Warning: cxacru-cf.bin with MD5 hash cdbac2689969d5ed5d4850f117702110 +# contains mis-aligned values which will stop the modem from being able +# to make a connection. If the first and last two bytes are removed then +# the values become valid, but the modulation will be forced to ANSI +# T1.413 only which may not be appropriate. +# +# The original binary format is a packed list of le32 values. + +import sys +import struct + +i = 0 +while True: + buf = sys.stdin.read(4) + + if len(buf) == 0: + break + elif len(buf) != 4: + sys.stdout.write("\n") + sys.stderr.write("Error: read {0} not 4 bytes\n".format(len(buf))) + sys.exit(1) + + if i > 0: + sys.stdout.write(" ") + sys.stdout.write("{0:x}={1}".format(i, struct.unpack("=, + separated by whitespace, e.g.: + + "1=0 a=5" + + - Up to 7 parameters at a time will be sent and the modem will restart + the ADSL connection when any value is set. These are logged for future + reference. + +* downstream_attenuation (dB) +* downstream_bits_per_frame +* downstream_rate (kbps) +* downstream_snr_margin (dB) + + - Downstream stats. + +* upstream_attenuation (dB) +* upstream_bits_per_frame +* upstream_rate (kbps) +* upstream_snr_margin (dB) +* transmitter_power (dBm/Hz) + + - Upstream stats. + +* downstream_crc_errors +* downstream_fec_errors +* downstream_hec_errors +* upstream_crc_errors +* upstream_fec_errors +* upstream_hec_errors + + - Error counts. + +* line_startable + + - Indicates that ADSL support on the device + is/can be enabled, see adsl_start. + +* line_status + + - "initialising" + - "down" + - "attempting to activate" + - "training" + - "channel analysis" + - "exchange" + - "waiting" + - "up" + + Changes between "down" and "attempting to activate" + if there is no signal. + +* link_status + + - "not connected" + - "connected" + - "lost" + +* mac_address + +* modulation + + - "" (when not connected) + - "ANSI T1.413" + - "ITU-T G.992.1 (G.DMT)" + - "ITU-T G.992.2 (G.LITE)" + +* startup_attempts + + - Count of total attempts to initialise ADSL. + +To enable/disable ADSL, the following can be written to the adsl_state file: + + - "start" + - "stop + - "restart" (stops, waits 1.5s, then starts) + - "poll" (used to resume status polling if it was disabled due to failure) + +Changes in adsl/line state are reported via kernel log messages:: + + [4942145.150704] ATM dev 0: ADSL state: running + [4942243.663766] ATM dev 0: ADSL line: down + [4942249.665075] ATM dev 0: ADSL line: attempting to activate + [4942253.654954] ATM dev 0: ADSL line: training + [4942255.666387] ATM dev 0: ADSL line: channel analysis + [4942259.656262] ATM dev 0: ADSL line: exchange + [2635357.696901] ATM dev 0: ADSL line: up (8128 kb/s down | 832 kb/s up) diff --git a/Documentation/networking/device_drivers/atm/fore200e.rst b/Documentation/networking/device_drivers/atm/fore200e.rst new file mode 100644 index 000000000..55df9ec09 --- /dev/null +++ b/Documentation/networking/device_drivers/atm/fore200e.rst @@ -0,0 +1,66 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================= +FORE Systems PCA-200E/SBA-200E ATM NIC driver +============================================= + +This driver adds support for the FORE Systems 200E-series ATM adapters +to the Linux operating system. It is based on the earlier PCA-200E driver +written by Uwe Dannowski. + +The driver simultaneously supports PCA-200E and SBA-200E adapters on +i386, alpha (untested), powerpc, sparc and sparc64 archs. + +The intent is to enable the use of different models of FORE adapters at the +same time, by hosts that have several bus interfaces (such as PCI+SBUS, +or PCI+EISA). + +Only PCI and SBUS devices are currently supported by the driver, but support +for other bus interfaces such as EISA should not be too hard to add. + + +Firmware Copyright Notice +------------------------- + +Please read the fore200e_firmware_copyright file present +in the linux/drivers/atm directory for details and restrictions. + + +Firmware Updates +---------------- + +The FORE Systems 200E-series driver is shipped with firmware data being +uploaded to the ATM adapters at system boot time or at module loading time. +The supplied firmware images should work with all adapters. + +However, if you encounter problems (the firmware doesn't start or the driver +is unable to read the PROM data), you may consider trying another firmware +version. Alternative binary firmware images can be found somewhere on the +ForeThought CD-ROM supplied with your adapter by FORE Systems. + +You can also get the latest firmware images from FORE Systems at +https://en.wikipedia.org/wiki/FORE_Systems. Register TACTics Online and go to +the 'software updates' pages. The firmware binaries are part of +the various ForeThought software distributions. + +Notice that different versions of the PCA-200E firmware exist, depending +on the endianness of the host architecture. The driver is shipped with +both little and big endian PCA firmware images. + +Name and location of the new firmware images can be set at kernel +configuration time: + +1. Copy the new firmware binary files (with .bin, .bin1 or .bin2 suffix) + to some directory, such as linux/drivers/atm. + +2. Reconfigure your kernel to set the new firmware name and location. + Expected pathnames are absolute or relative to the drivers/atm directory. + +3. Rebuild and re-install your kernel or your module. + + +Feedback +-------- + +Feedback is welcome. Please send success stories/bug reports/ +patches/improvement/comments/flames to . diff --git a/Documentation/networking/device_drivers/atm/index.rst b/Documentation/networking/device_drivers/atm/index.rst new file mode 100644 index 000000000..7b593f031 --- /dev/null +++ b/Documentation/networking/device_drivers/atm/index.rst @@ -0,0 +1,20 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +Asynchronous Transfer Mode (ATM) Device Drivers +=============================================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + cxacru + fore200e + iphase + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/networking/device_drivers/atm/iphase.rst b/Documentation/networking/device_drivers/atm/iphase.rst new file mode 100644 index 000000000..92d9b757d --- /dev/null +++ b/Documentation/networking/device_drivers/atm/iphase.rst @@ -0,0 +1,193 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================== +ATM (i)Chip IA Linux Driver Source +================================== + + READ ME FISRT + +-------------------------------------------------------------------------------- + + Read This Before You Begin! + +-------------------------------------------------------------------------------- + +Description +=========== + +This is the README file for the Interphase PCI ATM (i)Chip IA Linux driver +source release. + +The features and limitations of this driver are as follows: + + - A single VPI (VPI value of 0) is supported. + - Supports 4K VCs for the server board (with 512K control memory) and 1K + VCs for the client board (with 128K control memory). + - UBR, ABR and CBR service categories are supported. + - Only AAL5 is supported. + - Supports setting of PCR on the VCs. + - Multiple adapters in a system are supported. + - All variants of Interphase ATM PCI (i)Chip adapter cards are supported, + including x575 (OC3, control memory 128K , 512K and packet memory 128K, + 512K and 1M), x525 (UTP25) and x531 (DS3 and E3). See + http://www.iphase.com/ + for details. + - Only x86 platforms are supported. + - SMP is supported. + + +Before You Start +================ + + +Installation +------------ + +1. Installing the adapters in the system + + To install the ATM adapters in the system, follow the steps below. + + a. Login as root. + b. Shut down the system and power off the system. + c. Install one or more ATM adapters in the system. + d. Connect each adapter to a port on an ATM switch. The green 'Link' + LED on the front panel of the adapter will be on if the adapter is + connected to the switch properly when the system is powered up. + e. Power on and boot the system. + +2. [ Removed ] + +3. Rebuild kernel with ABR support + + [ a. and b. removed ] + + c. Reconfigure the kernel, choose the Interphase ia driver through "make + menuconfig" or "make xconfig". + d. Rebuild the kernel, loadable modules and the atm tools. + e. Install the new built kernel and modules and reboot. + +4. Load the adapter hardware driver (ia driver) if it is built as a module + + a. Login as root. + b. Change directory to /lib/modules//atm. + c. Run "insmod suni.o;insmod iphase.o" + The yellow 'status' LED on the front panel of the adapter will blink + while the driver is loaded in the system. + d. To verify that the 'ia' driver is loaded successfully, run the + following command:: + + cat /proc/atm/devices + + If the driver is loaded successfully, the output of the command will + be similar to the following lines:: + + Itf Type ESI/"MAC"addr AAL(TX,err,RX,err,drop) ... + 0 ia xxxxxxxxx 0 ( 0 0 0 0 0 ) 5 ( 0 0 0 0 0 ) + + You can also check the system log file /var/log/messages for messages + related to the ATM driver. + +5. Ia Driver Configuration + +5.1 Configuration of adapter buffers + The (i)Chip boards have 3 different packet RAM size variants: 128K, 512K and + 1M. The RAM size decides the number of buffers and buffer size. The default + size and number of buffers are set as following: + + ========= ======= ====== ====== ====== ====== ====== + Total Rx RAM Tx RAM Rx Buf Tx Buf Rx buf Tx buf + RAM size size size size size cnt cnt + ========= ======= ====== ====== ====== ====== ====== + 128K 64K 64K 10K 10K 6 6 + 512K 256K 256K 10K 10K 25 25 + 1M 512K 512K 10K 10K 51 51 + ========= ======= ====== ====== ====== ====== ====== + + These setting should work well in most environments, but can be + changed by typing the following command:: + + insmod /ia.o IA_RX_BUF= IA_RX_BUF_SZ= \ + IA_TX_BUF= IA_TX_BUF_SZ= + + Where: + + - RX_CNT = number of receive buffers in the range (1-128) + - RX_SIZE = size of receive buffers in the range (48-64K) + - TX_CNT = number of transmit buffers in the range (1-128) + - TX_SIZE = size of transmit buffers in the range (48-64K) + + 1. Transmit and receive buffer size must be a multiple of 4. + 2. Care should be taken so that the memory required for the + transmit and receive buffers is less than or equal to the + total adapter packet memory. + +5.2 Turn on ia debug trace + + When the ia driver is built with the CONFIG_ATM_IA_DEBUG flag, the driver + can provide more debug trace if needed. There is a bit mask variable, + IADebugFlag, which controls the output of the traces. You can find the bit + map of the IADebugFlag in iphase.h. + The debug trace can be turn on through the insmod command line option, for + example, "insmod iphase.o IADebugFlag=0xffffffff" can turn on all the debug + traces together with loading the driver. + +6. Ia Driver Test Using ttcp_atm and PVC + + For the PVC setup, the test machines can either be connected back-to-back or + through a switch. If connected through the switch, the switch must be + configured for the PVC(s). + + a. For UBR test: + + At the test machine intended to receive data, type:: + + ttcp_atm -r -a -s 0.100 + + At the other test machine, type:: + + ttcp_atm -t -a -s 0.100 -n 10000 + + Run "ttcp_atm -h" to display more options of the ttcp_atm tool. + b. For ABR test: + + It is the same as the UBR testing, but with an extra command option:: + + -Pabr:max_pcr= + + where: + + xxx = the maximum peak cell rate, from 170 - 353207. + + This option must be set on both the machines. + + c. For CBR test: + + It is the same as the UBR testing, but with an extra command option:: + + -Pcbr:max_pcr= + + where: + + xxx = the maximum peak cell rate, from 170 - 353207. + + This option may only be set on the transmit machine. + + +Outstanding Issues +================== + + + +Contact Information +------------------- + +:: + + Customer Support: + United States: Telephone: (214) 654-5555 + Fax: (214) 654-5500 + E-Mail: intouch@iphase.com + Europe: Telephone: 33 (0)1 41 15 44 00 + Fax: 33 (0)1 41 15 12 13 + World Wide Web: http://www.iphase.com + Anonymous FTP: ftp.iphase.com diff --git a/Documentation/networking/device_drivers/cable/index.rst b/Documentation/networking/device_drivers/cable/index.rst new file mode 100644 index 000000000..cce3c4392 --- /dev/null +++ b/Documentation/networking/device_drivers/cable/index.rst @@ -0,0 +1,18 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +Cable Modem Device Drivers +========================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + sb1000 + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/networking/device_drivers/cable/sb1000.rst b/Documentation/networking/device_drivers/cable/sb1000.rst new file mode 100644 index 000000000..c8582ca40 --- /dev/null +++ b/Documentation/networking/device_drivers/cable/sb1000.rst @@ -0,0 +1,222 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================== +SB100 device driver +=================== + +sb1000 is a module network device driver for the General Instrument (also known +as NextLevel) SURFboard1000 internal cable modem board. This is an ISA card +which is used by a number of cable TV companies to provide cable modem access. +It's a one-way downstream-only cable modem, meaning that your upstream net link +is provided by your regular phone modem. + +This driver was written by Franco Venturi . He deserves +a great deal of thanks for this wonderful piece of code! + +Needed tools +============ + +Support for this device is now a part of the standard Linux kernel. The +driver source code file is drivers/net/sb1000.c. In addition to this +you will need: + +1. The "cmconfig" program. This is a utility which supplements "ifconfig" + to configure the cable modem and network interface (usually called "cm0"); + +2. Several PPP scripts which live in /etc/ppp to make connecting via your + cable modem easy. + + These utilities can be obtained from: + + http://www.jacksonville.net/~fventuri/ + + in Franco's original source code distribution .tar.gz file. Support for + the sb1000 driver can be found at: + + - http://web.archive.org/web/%2E/http://home.adelphia.net/~siglercm/sb1000.html + - http://web.archive.org/web/%2E/http://linuxpower.cx/~cable/ + + along with these utilities. + +3. The standard isapnp tools. These are necessary to configure your SB1000 + card at boot time (or afterwards by hand) since it's a PnP card. + + If you don't have these installed as a standard part of your Linux + distribution, you can find them at: + + http://www.roestock.demon.co.uk/isapnptools/ + + or check your Linux distribution binary CD or their web site. For help with + isapnp, pnpdump, or /etc/isapnp.conf, go to: + + http://www.roestock.demon.co.uk/isapnptools/isapnpfaq.html + +Using the driver +================ + +To make the SB1000 card work, follow these steps: + +1. Run ``make config``, or ``make menuconfig``, or ``make xconfig``, whichever + you prefer, in the top kernel tree directory to set up your kernel + configuration. Make sure to say "Y" to "Prompt for development drivers" + and to say "M" to the sb1000 driver. Also say "Y" or "M" to all the standard + networking questions to get TCP/IP and PPP networking support. + +2. **BEFORE** you build the kernel, edit drivers/net/sb1000.c. Make sure + to redefine the value of READ_DATA_PORT to match the I/O address used + by isapnp to access your PnP cards. This is the value of READPORT in + /etc/isapnp.conf or given by the output of pnpdump. + +3. Build and install the kernel and modules as usual. + +4. Boot your new kernel following the usual procedures. + +5. Set up to configure the new SB1000 PnP card by capturing the output + of "pnpdump" to a file and editing this file to set the correct I/O ports, + IRQ, and DMA settings for all your PnP cards. Make sure none of the settings + conflict with one another. Then test this configuration by running the + "isapnp" command with your new config file as the input. Check for + errors and fix as necessary. (As an aside, I use I/O ports 0x110 and + 0x310 and IRQ 11 for my SB1000 card and these work well for me. YMMV.) + Then save the finished config file as /etc/isapnp.conf for proper + configuration on subsequent reboots. + +6. Download the original file sb1000-1.1.2.tar.gz from Franco's site or one of + the others referenced above. As root, unpack it into a temporary directory + and do a ``make cmconfig`` and then ``install -c cmconfig /usr/local/sbin``. + Don't do ``make install`` because it expects to find all the utilities built + and ready for installation, not just cmconfig. + +7. As root, copy all the files under the ppp/ subdirectory in Franco's + tar file into /etc/ppp, being careful not to overwrite any files that are + already in there. Then modify ppp@gi-on to set the correct login name, + phone number, and frequency for the cable modem. Also edit pap-secrets + to specify your login name and password and any site-specific information + you need. + +8. Be sure to modify /etc/ppp/firewall to use ipchains instead of + the older ipfwadm commands from the 2.0.x kernels. There's a neat utility to + convert ipfwadm commands to ipchains commands: + + http://users.dhp.com/~whisper/ipfwadm2ipchains/ + + You may also wish to modify the firewall script to implement a different + firewalling scheme. + +9. Start the PPP connection via the script /etc/ppp/ppp@gi-on. You must be + root to do this. It's better to use a utility like sudo to execute + frequently used commands like this with root permissions if possible. If you + connect successfully the cable modem interface will come up and you'll see a + driver message like this at the console:: + + cm0: sb1000 at (0x110,0x310), csn 1, S/N 0x2a0d16d8, IRQ 11. + sb1000.c:v1.1.2 6/01/98 (fventuri@mediaone.net) + + The "ifconfig" command should show two new interfaces, ppp0 and cm0. + + The command "cmconfig cm0" will give you information about the cable modem + interface. + +10. Try pinging a site via ``ping -c 5 www.yahoo.com``, for example. You should + see packets received. + +11. If you can't get site names (like www.yahoo.com) to resolve into + IP addresses (like 204.71.200.67), be sure your /etc/resolv.conf file + has no syntax errors and has the right nameserver IP addresses in it. + If this doesn't help, try something like ``ping -c 5 204.71.200.67`` to + see if the networking is running but the DNS resolution is where the + problem lies. + +12. If you still have problems, go to the support web sites mentioned above + and read the information and documentation there. + +Common problems +=============== + +1. Packets go out on the ppp0 interface but don't come back on the cm0 + interface. It looks like I'm connected but I can't even ping any + numerical IP addresses. (This happens predominantly on Debian systems due + to a default boot-time configuration script.) + +Solution + As root ``echo 0 > /proc/sys/net/ipv4/conf/cm0/rp_filter`` so it + can share the same IP address as the ppp0 interface. Note that this + command should probably be added to the /etc/ppp/cablemodem script + *right*between* the "/sbin/ifconfig" and "/sbin/cmconfig" commands. + You may need to do this to /proc/sys/net/ipv4/conf/ppp0/rp_filter as well. + If you do this to /proc/sys/net/ipv4/conf/default/rp_filter on each reboot + (in rc.local or some such) then any interfaces can share the same IP + addresses. + +2. I get "unresolved symbol" error messages on executing ``insmod sb1000.o``. + +Solution + You probably have a non-matching kernel source tree and + /usr/include/linux and /usr/include/asm header files. Make sure you + install the correct versions of the header files in these two directories. + Then rebuild and reinstall the kernel. + +3. When isapnp runs it reports an error, and my SB1000 card isn't working. + +Solution + There's a problem with later versions of isapnp using the "(CHECK)" + option in the lines that allocate the two I/O addresses for the SB1000 card. + This first popped up on RH 6.0. Delete "(CHECK)" for the SB1000 I/O addresses. + Make sure they don't conflict with any other pieces of hardware first! Then + rerun isapnp and go from there. + +4. I can't execute the /etc/ppp/ppp@gi-on file. + +Solution + As root do ``chmod ug+x /etc/ppp/ppp@gi-on``. + +5. The firewall script isn't working (with 2.2.x and higher kernels). + +Solution + Use the ipfwadm2ipchains script referenced above to convert the + /etc/ppp/firewall script from the deprecated ipfwadm commands to ipchains. + +6. I'm getting *tons* of firewall deny messages in the /var/kern.log, + /var/messages, and/or /var/syslog files, and they're filling up my /var + partition!!! + +Solution + First, tell your ISP that you're receiving DoS (Denial of Service) + and/or portscanning (UDP connection attempts) attacks! Look over the deny + messages to figure out what the attack is and where it's coming from. Next, + edit /etc/ppp/cablemodem and make sure the ",nobroadcast" option is turned on + to the "cmconfig" command (uncomment that line). If you're not receiving these + denied packets on your broadcast interface (IP address xxx.yyy.zzz.255 + typically), then someone is attacking your machine in particular. Be careful + out there.... + +7. Everything seems to work fine but my computer locks up after a while + (and typically during a lengthy download through the cable modem)! + +Solution + You may need to add a short delay in the driver to 'slow down' the + SURFboard because your PC might not be able to keep up with the transfer rate + of the SB1000. To do this, it's probably best to download Franco's + sb1000-1.1.2.tar.gz archive and build and install sb1000.o manually. You'll + want to edit the 'Makefile' and look for the 'SB1000_DELAY' + define. Uncomment those 'CFLAGS' lines (and comment out the default ones) + and try setting the delay to something like 60 microseconds with: + '-DSB1000_DELAY=60'. Then do ``make`` and as root ``make install`` and try + it out. If it still doesn't work or you like playing with the driver, you may + try other numbers. Remember though that the higher the delay, the slower the + driver (which slows down the rest of the PC too when it is actively + used). Thanks to Ed Daiga for this tip! + +Credits +======= + +This README came from Franco Venturi's original README file which is +still supplied with his driver .tar.gz archive. I and all other sb1000 users +owe Franco a tremendous "Thank you!" Additional thanks goes to Carl Patten +and Ralph Bonnell who are now managing the Linux SB1000 web site, and to +the SB1000 users who reported and helped debug the common problems listed +above. + + + Clemmitt Sigler + csigler@vt.edu diff --git a/Documentation/networking/device_drivers/cellular/index.rst b/Documentation/networking/device_drivers/cellular/index.rst new file mode 100644 index 000000000..fc1812d3f --- /dev/null +++ b/Documentation/networking/device_drivers/cellular/index.rst @@ -0,0 +1,18 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +Cellular Modem Device Drivers +============================= + +Contents: + +.. toctree:: + :maxdepth: 2 + + qualcomm/rmnet + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst b/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst new file mode 100644 index 000000000..70643b58d --- /dev/null +++ b/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst @@ -0,0 +1,95 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============ +Rmnet Driver +============ + +1. Introduction +=============== + +rmnet driver is used for supporting the Multiplexing and aggregation +Protocol (MAP). This protocol is used by all recent chipsets using Qualcomm +Technologies, Inc. modems. + +This driver can be used to register onto any physical network device in +IP mode. Physical transports include USB, HSIC, PCIe and IP accelerator. + +Multiplexing allows for creation of logical netdevices (rmnet devices) to +handle multiple private data networks (PDN) like a default internet, tethering, +multimedia messaging service (MMS) or IP media subsystem (IMS). Hardware sends +packets with MAP headers to rmnet. Based on the multiplexer id, rmnet +routes to the appropriate PDN after removing the MAP header. + +Aggregation is required to achieve high data rates. This involves hardware +sending aggregated bunch of MAP frames. rmnet driver will de-aggregate +these MAP frames and send them to appropriate PDN's. + +2. Packet format +================ + +a. MAP packet (data / control) + +MAP header has the same endianness of the IP packet. + +Packet format:: + + Bit 0 1 2-7 8 - 15 16 - 31 + Function Command / Data Reserved Pad Multiplexer ID Payload length + Bit 32 - x + Function Raw Bytes + +Command (1)/ Data (0) bit value is to indicate if the packet is a MAP command +or data packet. Control packet is used for transport level flow control. Data +packets are standard IP packets. + +Reserved bits are usually zeroed out and to be ignored by receiver. + +Padding is number of bytes to be added for 4 byte alignment if required by +hardware. + +Multiplexer ID is to indicate the PDN on which data has to be sent. + +Payload length includes the padding length but does not include MAP header +length. + +b. MAP packet (command specific):: + + Bit 0 1 2-7 8 - 15 16 - 31 + Function Command Reserved Pad Multiplexer ID Payload length + Bit 32 - 39 40 - 45 46 - 47 48 - 63 + Function Command name Reserved Command Type Reserved + Bit 64 - 95 + Function Transaction ID + Bit 96 - 127 + Function Command data + +Command 1 indicates disabling flow while 2 is enabling flow + +Command types + += ========================================== +0 for MAP command request +1 is to acknowledge the receipt of a command +2 is for unsupported commands +3 is for error during processing of commands += ========================================== + +c. Aggregation + +Aggregation is multiple MAP packets (can be data or command) delivered to +rmnet in a single linear skb. rmnet will process the individual +packets and either ACK the MAP command or deliver the IP packet to the +network stack as needed + +MAP header|IP Packet|Optional padding|MAP header|IP Packet|Optional padding.... + +MAP header|IP Packet|Optional padding|MAP header|Command Packet|Optional pad... + +3. Userspace configuration +========================== + +rmnet userspace configuration is done through netlink library librmnetctl +and command line utility rmnetcli. Utility is hosted in codeaurora forum git. +The driver uses rtnl_link_ops for communication. + +https://source.codeaurora.org/quic/la/platform/vendor/qcom-opensource/dataservices/tree/rmnetctl diff --git a/Documentation/networking/device_drivers/ethernet/3com/3c509.rst b/Documentation/networking/device_drivers/ethernet/3com/3c509.rst new file mode 100644 index 000000000..47f706bac --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/3com/3c509.rst @@ -0,0 +1,249 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================================================= +Linux and the 3Com EtherLink III Series Ethercards (driver v1.18c and higher) +============================================================================= + +This file contains the instructions and caveats for v1.18c and higher versions +of the 3c509 driver. You should not use the driver without reading this file. + +release 1.0 + +28 February 2002 + +Current maintainer (corrections to): + David Ruggiero + +Introduction +============ + +The following are notes and information on using the 3Com EtherLink III series +ethercards in Linux. These cards are commonly known by the most widely-used +card's 3Com model number, 3c509. They are all 10mb/s ISA-bus cards and shouldn't +be (but sometimes are) confused with the similarly-numbered PCI-bus "3c905" +(aka "Vortex" or "Boomerang") series. Kernel support for the 3c509 family is +provided by the module 3c509.c, which has code to support all of the following +models: + + - 3c509 (original ISA card) + - 3c509B (later revision of the ISA card; supports full-duplex) + - 3c589 (PCMCIA) + - 3c589B (later revision of the 3c589; supports full-duplex) + - 3c579 (EISA) + +Large portions of this documentation were heavily borrowed from the guide +written the original author of the 3c509 driver, Donald Becker. The master +copy of that document, which contains notes on older versions of the driver, +currently resides on Scyld web server: http://www.scyld.com/. + + +Special Driver Features +======================= + +Overriding card settings + +The driver allows boot- or load-time overriding of the card's detected IOADDR, +IRQ, and transceiver settings, although this capability shouldn't generally be +needed except to enable full-duplex mode (see below). An example of the syntax +for LILO parameters for doing this:: + + ether=10,0x310,3,0x3c509,eth0 + +This configures the first found 3c509 card for IRQ 10, base I/O 0x310, and +transceiver type 3 (10base2). The flag "0x3c509" must be set to avoid conflicts +with other card types when overriding the I/O address. When the driver is +loaded as a module, only the IRQ may be overridden. For example, +setting two cards to IRQ10 and IRQ11 is done by using the irq module +option:: + + options 3c509 irq=10,11 + + +Full-duplex mode +================ + +The v1.18c driver added support for the 3c509B's full-duplex capabilities. +In order to enable and successfully use full-duplex mode, three conditions +must be met: + +(a) You must have a Etherlink III card model whose hardware supports full- +duplex operations. Currently, the only members of the 3c509 family that are +positively known to support full-duplex are the 3c509B (ISA bus) and 3c589B +(PCMCIA) cards. Cards without the "B" model designation do *not* support +full-duplex mode; these include the original 3c509 (no "B"), the original +3c589, the 3c529 (MCA bus), and the 3c579 (EISA bus). + +(b) You must be using your card's 10baseT transceiver (i.e., the RJ-45 +connector), not its AUI (thick-net) or 10base2 (thin-net/coax) interfaces. +AUI and 10base2 network cabling is physically incapable of full-duplex +operation. + +(c) Most importantly, your 3c509B must be connected to a link partner that is +itself full-duplex capable. This is almost certainly one of two things: a full- +duplex-capable Ethernet switch (*not* a hub), or a full-duplex-capable NIC on +another system that's connected directly to the 3c509B via a crossover cable. + +Full-duplex mode can be enabled using 'ethtool'. + +.. warning:: + + Extremely important caution concerning full-duplex mode + + Understand that the 3c509B's hardware's full-duplex support is much more + limited than that provide by more modern network interface cards. Although + at the physical layer of the network it fully supports full-duplex operation, + the card was designed before the current Ethernet auto-negotiation (N-way) + spec was written. This means that the 3c509B family ***cannot and will not + auto-negotiate a full-duplex connection with its link partner under any + circumstances, no matter how it is initialized***. If the full-duplex mode + of the 3c509B is enabled, its link partner will very likely need to be + independently _forced_ into full-duplex mode as well; otherwise various nasty + failures will occur - at the very least, you'll see massive numbers of packet + collisions. This is one of very rare circumstances where disabling auto- + negotiation and forcing the duplex mode of a network interface card or switch + would ever be necessary or desirable. + + +Available Transceiver Types +=========================== + +For versions of the driver v1.18c and above, the available transceiver types are: + +== ========================================================================= +0 transceiver type from EEPROM config (normally 10baseT); force half-duplex +1 AUI (thick-net / DB15 connector) +2 (undefined) +3 10base2 (thin-net == coax / BNC connector) +4 10baseT (RJ-45 connector); force half-duplex mode +8 transceiver type and duplex mode taken from card's EEPROM config settings +12 10baseT (RJ-45 connector); force full-duplex mode +== ========================================================================= + +Prior to driver version 1.18c, only transceiver codes 0-4 were supported. Note +that the new transceiver codes 8 and 12 are the *only* ones that will enable +full-duplex mode, no matter what the card's detected EEPROM settings might be. +This insured that merely upgrading the driver from an earlier version would +never automatically enable full-duplex mode in an existing installation; +it must always be explicitly enabled via one of these code in order to be +activated. + +The transceiver type can be changed using 'ethtool'. + + +Interpretation of error messages and common problems +---------------------------------------------------- + +Error Messages +^^^^^^^^^^^^^^ + +eth0: Infinite loop in interrupt, status 2011. +These are "mostly harmless" message indicating that the driver had too much +work during that interrupt cycle. With a status of 0x2011 you are receiving +packets faster than they can be removed from the card. This should be rare +or impossible in normal operation. Possible causes of this error report are: + + - a "green" mode enabled that slows the processor down when there is no + keyboard activity. + + - some other device or device driver hogging the bus or disabling interrupts. + Check /proc/interrupts for excessive interrupt counts. The timer tick + interrupt should always be incrementing faster than the others. + +No received packets +^^^^^^^^^^^^^^^^^^^ + +If a 3c509, 3c562 or 3c589 can successfully transmit packets, but never +receives packets (as reported by /proc/net/dev or 'ifconfig') you likely +have an interrupt line problem. Check /proc/interrupts to verify that the +card is actually generating interrupts. If the interrupt count is not +increasing you likely have a physical conflict with two devices trying to +use the same ISA IRQ line. The common conflict is with a sound card on IRQ10 +or IRQ5, and the easiest solution is to move the 3c509 to a different +interrupt line. If the device is receiving packets but 'ping' doesn't work, +you have a routing problem. + +Tx Carrier Errors Reported in /proc/net/dev +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +If an EtherLink III appears to transmit packets, but the "Tx carrier errors" +field in /proc/net/dev increments as quickly as the Tx packet count, you +likely have an unterminated network or the incorrect media transceiver selected. + +3c509B card is not detected on machines with an ISA PnP BIOS. +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +While the updated driver works with most PnP BIOS programs, it does not work +with all. This can be fixed by disabling PnP support using the 3Com-supplied +setup program. + +3c509 card is not detected on overclocked machines +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Increase the delay time in id_read_eeprom() from the current value, 500, +to an absurdly high value, such as 5000. + + +Decoding Status and Error Messages +---------------------------------- + + +The bits in the main status register are: + +===== ====================================== +value description +===== ====================================== +0x01 Interrupt latch +0x02 Tx overrun, or Rx underrun +0x04 Tx complete +0x08 Tx FIFO room available +0x10 A complete Rx packet has arrived +0x20 A Rx packet has started to arrive +0x40 The driver has requested an interrupt +0x80 Statistics counter nearly full +===== ====================================== + +The bits in the transmit (Tx) status word are: + +===== ============================================ +value description +===== ============================================ +0x02 Out-of-window collision. +0x04 Status stack overflow (normally impossible). +0x08 16 collisions. +0x10 Tx underrun (not enough PCI bus bandwidth). +0x20 Tx jabber. +0x40 Tx interrupt requested. +0x80 Status is valid (this should always be set). +===== ============================================ + + +When a transmit error occurs the driver produces a status message such as:: + + eth0: Transmit error, Tx status register 82 + +The two values typically seen here are: + +0x82 +^^^^ + +Out of window collision. This typically occurs when some other Ethernet +host is incorrectly set to full duplex on a half duplex network. + +0x88 +^^^^ + +16 collisions. This typically occurs when the network is exceptionally busy +or when another host doesn't correctly back off after a collision. If this +error is mixed with 0x82 errors it is the result of a host incorrectly set +to full duplex (see above). + +Both of these errors are the result of network problems that should be +corrected. They do not represent driver malfunction. + + +Revision history (this file) +============================ + +28Feb02 v1.0 DR New; major portions based on Becker original 3c509 docs + diff --git a/Documentation/networking/device_drivers/ethernet/3com/vortex.rst b/Documentation/networking/device_drivers/ethernet/3com/vortex.rst new file mode 100644 index 000000000..eab10fc6d --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/3com/vortex.rst @@ -0,0 +1,459 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================= +3Com Vortex device driver +========================= + +Andrew Morton + +30 April 2000 + + +This document describes the usage and errata of the 3Com "Vortex" device +driver for Linux, 3c59x.c. + +The driver was written by Donald Becker + +Don is no longer the prime maintainer of this version of the driver. +Please report problems to one or more of: + +- Andrew Morton +- Netdev mailing list +- Linux kernel mailing list + +Please note the 'Reporting and Diagnosing Problems' section at the end +of this file. + + +Since kernel 2.3.99-pre6, this driver incorporates the support for the +3c575-series Cardbus cards which used to be handled by 3c575_cb.c. + +This driver supports the following hardware: + + - 3c590 Vortex 10Mbps + - 3c592 EISA 10Mbps Demon/Vortex + - 3c597 EISA Fast Demon/Vortex + - 3c595 Vortex 100baseTx + - 3c595 Vortex 100baseT4 + - 3c595 Vortex 100base-MII + - 3c900 Boomerang 10baseT + - 3c900 Boomerang 10Mbps Combo + - 3c900 Cyclone 10Mbps TPO + - 3c900 Cyclone 10Mbps Combo + - 3c900 Cyclone 10Mbps TPC + - 3c900B-FL Cyclone 10base-FL + - 3c905 Boomerang 100baseTx + - 3c905 Boomerang 100baseT4 + - 3c905B Cyclone 100baseTx + - 3c905B Cyclone 10/100/BNC + - 3c905B-FX Cyclone 100baseFx + - 3c905C Tornado + - 3c920B-EMB-WNM (ATI Radeon 9100 IGP) + - 3c980 Cyclone + - 3c980C Python-T + - 3cSOHO100-TX Hurricane + - 3c555 Laptop Hurricane + - 3c556 Laptop Tornado + - 3c556B Laptop Hurricane + - 3c575 [Megahertz] 10/100 LAN CardBus + - 3c575 Boomerang CardBus + - 3CCFE575BT Cyclone CardBus + - 3CCFE575CT Tornado CardBus + - 3CCFE656 Cyclone CardBus + - 3CCFEM656B Cyclone+Winmodem CardBus + - 3CXFEM656C Tornado+Winmodem CardBus + - 3c450 HomePNA Tornado + - 3c920 Tornado + - 3c982 Hydra Dual Port A + - 3c982 Hydra Dual Port B + - 3c905B-T4 + - 3c920B-EMB-WNM Tornado + +Module parameters +================= + +There are several parameters which may be provided to the driver when +its module is loaded. These are usually placed in ``/etc/modprobe.d/*.conf`` +configuration files. Example:: + + options 3c59x debug=3 rx_copybreak=300 + +If you are using the PCMCIA tools (cardmgr) then the options may be +placed in /etc/pcmcia/config.opts:: + + module "3c59x" opts "debug=3 rx_copybreak=300" + + +The supported parameters are: + +debug=N + + Where N is a number from 0 to 7. Anything above 3 produces a lot + of output in your system logs. debug=1 is default. + +options=N1,N2,N3,... + + Each number in the list provides an option to the corresponding + network card. So if you have two 3c905's and you wish to provide + them with option 0x204 you would use:: + + options=0x204,0x204 + + The individual options are composed of a number of bitfields which + have the following meanings: + + Possible media type settings + + == ================================= + 0 10baseT + 1 10Mbs AUI + 2 undefined + 3 10base2 (BNC) + 4 100base-TX + 5 100base-FX + 6 MII (Media Independent Interface) + 7 Use default setting from EEPROM + 8 Autonegotiate + 9 External MII + 10 Use default setting from EEPROM + == ================================= + + When generating a value for the 'options' setting, the above media + selection values may be OR'ed (or added to) the following: + + ====== ============================================= + 0x8000 Set driver debugging level to 7 + 0x4000 Set driver debugging level to 2 + 0x0400 Enable Wake-on-LAN + 0x0200 Force full duplex mode. + 0x0010 Bus-master enable bit (Old Vortex cards only) + ====== ============================================= + + For example:: + + insmod 3c59x options=0x204 + + will force full-duplex 100base-TX, rather than allowing the usual + autonegotiation. + +global_options=N + + Sets the ``options`` parameter for all 3c59x NICs in the machine. + Entries in the ``options`` array above will override any setting of + this. + +full_duplex=N1,N2,N3... + + Similar to bit 9 of 'options'. Forces the corresponding card into + full-duplex mode. Please use this in preference to the ``options`` + parameter. + + In fact, please don't use this at all! You're better off getting + autonegotiation working properly. + +global_full_duplex=N1 + + Sets full duplex mode for all 3c59x NICs in the machine. Entries + in the ``full_duplex`` array above will override any setting of this. + +flow_ctrl=N1,N2,N3... + + Use 802.3x MAC-layer flow control. The 3com cards only support the + PAUSE command, which means that they will stop sending packets for a + short period if they receive a PAUSE frame from the link partner. + + The driver only allows flow control on a link which is operating in + full duplex mode. + + This feature does not appear to work on the 3c905 - only 3c905B and + 3c905C have been tested. + + The 3com cards appear to only respond to PAUSE frames which are + sent to the reserved destination address of 01:80:c2:00:00:01. They + do not honour PAUSE frames which are sent to the station MAC address. + +rx_copybreak=M + + The driver preallocates 32 full-sized (1536 byte) network buffers + for receiving. When a packet arrives, the driver has to decide + whether to leave the packet in its full-sized buffer, or to allocate + a smaller buffer and copy the packet across into it. + + This is a speed/space tradeoff. + + The value of rx_copybreak is used to decide when to make the copy. + If the packet size is less than rx_copybreak, the packet is copied. + The default value for rx_copybreak is 200 bytes. + +max_interrupt_work=N + + The driver's interrupt service routine can handle many receive and + transmit packets in a single invocation. It does this in a loop. + The value of max_interrupt_work governs how many times the interrupt + service routine will loop. The default value is 32 loops. If this + is exceeded the interrupt service routine gives up and generates a + warning message "eth0: Too much work in interrupt". + +hw_checksums=N1,N2,N3,... + + Recent 3com NICs are able to generate IPv4, TCP and UDP checksums + in hardware. Linux has used the Rx checksumming for a long time. + The "zero copy" patch which is planned for the 2.4 kernel series + allows you to make use of the NIC's DMA scatter/gather and transmit + checksumming as well. + + The driver is set up so that, when the zerocopy patch is applied, + all Tornado and Cyclone devices will use S/G and Tx checksums. + + This module parameter has been provided so you can override this + decision. If you think that Tx checksums are causing a problem, you + may disable the feature with ``hw_checksums=0``. + + If you think your NIC should be performing Tx checksumming and the + driver isn't enabling it, you can force the use of hardware Tx + checksumming with ``hw_checksums=1``. + + The driver drops a message in the logfiles to indicate whether or + not it is using hardware scatter/gather and hardware Tx checksums. + + Scatter/gather and hardware checksums provide considerable + performance improvement for the sendfile() system call, but a small + decrease in throughput for send(). There is no effect upon receive + efficiency. + +compaq_ioaddr=N, +compaq_irq=N, +compaq_device_id=N + + "Variables to work-around the Compaq PCI BIOS32 problem".... + +watchdog=N + + Sets the time duration (in milliseconds) after which the kernel + decides that the transmitter has become stuck and needs to be reset. + This is mainly for debugging purposes, although it may be advantageous + to increase this value on LANs which have very high collision rates. + The default value is 5000 (5.0 seconds). + +enable_wol=N1,N2,N3,... + + Enable Wake-on-LAN support for the relevant interface. Donald + Becker's ``ether-wake`` application may be used to wake suspended + machines. + + Also enables the NIC's power management support. + +global_enable_wol=N + + Sets enable_wol mode for all 3c59x NICs in the machine. Entries in + the ``enable_wol`` array above will override any setting of this. + +Media selection +--------------- + +A number of the older NICs such as the 3c590 and 3c900 series have +10base2 and AUI interfaces. + +Prior to January, 2001 this driver would autoeselect the 10base2 or AUI +port if it didn't detect activity on the 10baseT port. It would then +get stuck on the 10base2 port and a driver reload was necessary to +switch back to 10baseT. This behaviour could not be prevented with a +module option override. + +Later (current) versions of the driver _do_ support locking of the +media type. So if you load the driver module with + + modprobe 3c59x options=0 + +it will permanently select the 10baseT port. Automatic selection of +other media types does not occur. + + +Transmit error, Tx status register 82 +------------------------------------- + +This is a common error which is almost always caused by another host on +the same network being in full-duplex mode, while this host is in +half-duplex mode. You need to find that other host and make it run in +half-duplex mode or fix this host to run in full-duplex mode. + +As a last resort, you can force the 3c59x driver into full-duplex mode +with + + options 3c59x full_duplex=1 + +but this has to be viewed as a workaround for broken network gear and +should only really be used for equipment which cannot autonegotiate. + + +Additional resources +-------------------- + +Details of the device driver implementation are at the top of the source file. + +Additional documentation is available at Don Becker's Linux Drivers site: + + http://www.scyld.com/vortex.html + +Donald Becker's driver development site: + + http://www.scyld.com/network.html + +Donald's vortex-diag program is useful for inspecting the NIC's state: + + http://www.scyld.com/ethercard_diag.html + +Donald's mii-diag program may be used for inspecting and manipulating +the NIC's Media Independent Interface subsystem: + + http://www.scyld.com/ethercard_diag.html#mii-diag + +Donald's wake-on-LAN page: + + http://www.scyld.com/wakeonlan.html + +3Com's DOS-based application for setting up the NICs EEPROMs: + + ftp://ftp.3com.com/pub/nic/3c90x/3c90xx2.exe + + +Autonegotiation notes +--------------------- + + The driver uses a one-minute heartbeat for adapting to changes in + the external LAN environment if link is up and 5 seconds if link is down. + This means that when, for example, a machine is unplugged from a hubbed + 10baseT LAN plugged into a switched 100baseT LAN, the throughput + will be quite dreadful for up to sixty seconds. Be patient. + + Cisco interoperability note from Walter Wong : + + On a side note, adding HAS_NWAY seems to share a problem with the + Cisco 6509 switch. Specifically, you need to change the spanning + tree parameter for the port the machine is plugged into to 'portfast' + mode. Otherwise, the negotiation fails. This has been an issue + we've noticed for a while but haven't had the time to track down. + + Cisco switches (Jeff Busch ) + + My "standard config" for ports to which PC's/servers connect directly:: + + interface FastEthernet0/N + description machinename + load-interval 30 + spanning-tree portfast + + If autonegotiation is a problem, you may need to specify "speed + 100" and "duplex full" as well (or "speed 10" and "duplex half"). + + WARNING: DO NOT hook up hubs/switches/bridges to these + specially-configured ports! The switch will become very confused. + + +Reporting and diagnosing problems +--------------------------------- + +Maintainers find that accurate and complete problem reports are +invaluable in resolving driver problems. We are frequently not able to +reproduce problems and must rely on your patience and efforts to get to +the bottom of the problem. + +If you believe you have a driver problem here are some of the +steps you should take: + +- Is it really a driver problem? + + Eliminate some variables: try different cards, different + computers, different cables, different ports on the switch/hub, + different versions of the kernel or of the driver, etc. + +- OK, it's a driver problem. + + You need to generate a report. Typically this is an email to the + maintainer and/or netdev@vger.kernel.org. The maintainer's + email address will be in the driver source or in the MAINTAINERS file. + +- The contents of your report will vary a lot depending upon the + problem. If it's a kernel crash then you should refer to the + admin-guide/reporting-bugs.rst file. + + But for most problems it is useful to provide the following: + + - Kernel version, driver version + + - A copy of the banner message which the driver generates when + it is initialised. For example: + + eth0: 3Com PCI 3c905C Tornado at 0xa400, 00:50:da:6a:88:f0, IRQ 19 + 8K byte-wide RAM 5:3 Rx:Tx split, autoselect/Autonegotiate interface. + MII transceiver found at address 24, status 782d. + Enabling bus-master transmits and whole-frame receives. + + NOTE: You must provide the ``debug=2`` modprobe option to generate + a full detection message. Please do this:: + + modprobe 3c59x debug=2 + + - If it is a PCI device, the relevant output from 'lspci -vx', eg:: + + 00:09.0 Ethernet controller: 3Com Corporation 3c905C-TX [Fast Etherlink] (rev 74) + Subsystem: 3Com Corporation: Unknown device 9200 + Flags: bus master, medium devsel, latency 32, IRQ 19 + I/O ports at a400 [size=128] + Memory at db000000 (32-bit, non-prefetchable) [size=128] + Expansion ROM at [disabled] [size=128K] + Capabilities: [dc] Power Management version 2 + 00: b7 10 00 92 07 00 10 02 74 00 00 02 08 20 00 00 + 10: 01 a4 00 00 00 00 00 db 00 00 00 00 00 00 00 00 + 20: 00 00 00 00 00 00 00 00 00 00 00 00 b7 10 00 10 + 30: 00 00 00 00 dc 00 00 00 00 00 00 00 05 01 0a 0a + + - A description of the environment: 10baseT? 100baseT? + full/half duplex? switched or hubbed? + + - Any additional module parameters which you may be providing to the driver. + + - Any kernel logs which are produced. The more the merrier. + If this is a large file and you are sending your report to a + mailing list, mention that you have the logfile, but don't send + it. If you're reporting direct to the maintainer then just send + it. + + To ensure that all kernel logs are available, add the + following line to /etc/syslog.conf:: + + kern.* /var/log/messages + + Then restart syslogd with:: + + /etc/rc.d/init.d/syslog restart + + (The above may vary, depending upon which Linux distribution you use). + + - If your problem is reproducible then that's great. Try the + following: + + 1) Increase the debug level. Usually this is done via: + + a) modprobe driver debug=7 + b) In /etc/modprobe.d/driver.conf: + options driver debug=7 + + 2) Recreate the problem with the higher debug level, + send all logs to the maintainer. + + 3) Download you card's diagnostic tool from Donald + Becker's website . + Download mii-diag.c as well. Build these. + + a) Run 'vortex-diag -aaee' and 'mii-diag -v' when the card is + working correctly. Save the output. + + b) Run the above commands when the card is malfunctioning. Send + both sets of output. + +Finally, please be patient and be prepared to do some work. You may +end up working on this problem for a week or more as the maintainer +asks more questions, asks for more tests, asks for patches to be +applied, etc. At the end of it all, the problem may even remain +unresolved. diff --git a/Documentation/networking/device_drivers/ethernet/altera/altera_tse.rst b/Documentation/networking/device_drivers/ethernet/altera/altera_tse.rst new file mode 100644 index 000000000..7a7040072 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/altera/altera_tse.rst @@ -0,0 +1,286 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: + +======================================= +Altera Triple-Speed Ethernet MAC driver +======================================= + +Copyright |copy| 2008-2014 Altera Corporation + +This is the driver for the Altera Triple-Speed Ethernet (TSE) controllers +using the SGDMA and MSGDMA soft DMA IP components. The driver uses the +platform bus to obtain component resources. The designs used to test this +driver were built for a Cyclone(R) V SOC FPGA board, a Cyclone(R) V FPGA board, +and tested with ARM and NIOS processor hosts separately. The anticipated use +cases are simple communications between an embedded system and an external peer +for status and simple configuration of the embedded system. + +For more information visit www.altera.com and www.rocketboards.org. Support +forums for the driver may be found on www.rocketboards.org, and a design used +to test this driver may be found there as well. Support is also available from +the maintainer of this driver, found in MAINTAINERS. + +The Triple-Speed Ethernet, SGDMA, and MSGDMA components are all soft IP +components that can be assembled and built into an FPGA using the Altera +Quartus toolchain. Quartus 13.1 and 14.0 were used to build the design that +this driver was tested against. The sopc2dts tool is used to create the +device tree for the driver, and may be found at rocketboards.org. + +The driver probe function examines the device tree and determines if the +Triple-Speed Ethernet instance is using an SGDMA or MSGDMA component. The +probe function then installs the appropriate set of DMA routines to +initialize, setup transmits, receives, and interrupt handling primitives for +the respective configurations. + +The SGDMA component is to be deprecated in the near future (over the next 1-2 +years as of this writing in early 2014) in favor of the MSGDMA component. +SGDMA support is included for existing designs and reference in case a +developer wishes to support their own soft DMA logic and driver support. Any +new designs should not use the SGDMA. + +The SGDMA supports only a single transmit or receive operation at a time, and +therefore will not perform as well compared to the MSGDMA soft IP. Please +visit www.altera.com for known, documented SGDMA errata. + +Scatter-gather DMA is not supported by the SGDMA or MSGDMA at this time. +Scatter-gather DMA will be added to a future maintenance update to this +driver. + +Jumbo frames are not supported at this time. + +The driver limits PHY operations to 10/100Mbps, and has not yet been fully +tested for 1Gbps. This support will be added in a future maintenance update. + +1. Kernel Configuration +======================= + +The kernel configuration option is ALTERA_TSE: + + Device Drivers ---> Network device support ---> Ethernet driver support ---> + Altera Triple-Speed Ethernet MAC support (ALTERA_TSE) + +2. Driver parameters list +========================= + + - debug: message level (0: no output, 16: all); + - dma_rx_num: Number of descriptors in the RX list (default is 64); + - dma_tx_num: Number of descriptors in the TX list (default is 64). + +3. Command line options +======================= + +Driver parameters can be also passed in command line by using:: + + altera_tse=dma_rx_num:128,dma_tx_num:512 + +4. Driver information and notes +=============================== + +4.1. Transmit process +--------------------- +When the driver's transmit routine is called by the kernel, it sets up a +transmit descriptor by calling the underlying DMA transmit routine (SGDMA or +MSGDMA), and initiates a transmit operation. Once the transmit is complete, an +interrupt is driven by the transmit DMA logic. The driver handles the transmit +completion in the context of the interrupt handling chain by recycling +resource required to send and track the requested transmit operation. + +4.2. Receive process +-------------------- +The driver will post receive buffers to the receive DMA logic during driver +initialization. Receive buffers may or may not be queued depending upon the +underlying DMA logic (MSGDMA is able queue receive buffers, SGDMA is not able +to queue receive buffers to the SGDMA receive logic). When a packet is +received, the DMA logic generates an interrupt. The driver handles a receive +interrupt by obtaining the DMA receive logic status, reaping receive +completions until no more receive completions are available. + +4.3. Interrupt Mitigation +------------------------- +The driver is able to mitigate the number of its DMA interrupts +using NAPI for receive operations. Interrupt mitigation is not yet supported +for transmit operations, but will be added in a future maintenance release. + +4.4) Ethtool support +-------------------- +Ethtool is supported. Driver statistics and internal errors can be taken using: +ethtool -S ethX command. It is possible to dump registers etc. + +4.5) PHY Support +---------------- +The driver is compatible with PAL to work with PHY and GPHY devices. + +4.7) List of source files: +-------------------------- + - Kconfig + - Makefile + - altera_tse_main.c: main network device driver + - altera_tse_ethtool.c: ethtool support + - altera_tse.h: private driver structure and common definitions + - altera_msgdma.h: MSGDMA implementation function definitions + - altera_sgdma.h: SGDMA implementation function definitions + - altera_msgdma.c: MSGDMA implementation + - altera_sgdma.c: SGDMA implementation + - altera_sgdmahw.h: SGDMA register and descriptor definitions + - altera_msgdmahw.h: MSGDMA register and descriptor definitions + - altera_utils.c: Driver utility functions + - altera_utils.h: Driver utility function definitions + +5. Debug Information +==================== + +The driver exports debug information such as internal statistics, +debug information, MAC and DMA registers etc. + +A user may use the ethtool support to get statistics: +e.g. using: ethtool -S ethX (that shows the statistics counters) +or sees the MAC registers: e.g. using: ethtool -d ethX + +The developer can also use the "debug" module parameter to get +further debug information. + +6. Statistics Support +===================== + +The controller and driver support a mix of IEEE standard defined statistics, +RFC defined statistics, and driver or Altera defined statistics. The four +specifications containing the standard definitions for these statistics are +as follows: + + - IEEE 802.3-2012 - IEEE Standard for Ethernet. + - RFC 2863 found at http://www.rfc-editor.org/rfc/rfc2863.txt. + - RFC 2819 found at http://www.rfc-editor.org/rfc/rfc2819.txt. + - Altera Triple Speed Ethernet User Guide, found at http://www.altera.com + +The statistics supported by the TSE and the device driver are as follows: + +"tx_packets" is equivalent to aFramesTransmittedOK defined in IEEE 802.3-2012, +Section 5.2.2.1.2. This statistics is the count of frames that are successfully +transmitted. + +"rx_packets" is equivalent to aFramesReceivedOK defined in IEEE 802.3-2012, +Section 5.2.2.1.5. This statistic is the count of frames that are successfully +received. This count does not include any error packets such as CRC errors, +length errors, or alignment errors. + +"rx_crc_errors" is equivalent to aFrameCheckSequenceErrors defined in IEEE +802.3-2012, Section 5.2.2.1.6. This statistic is the count of frames that are +an integral number of bytes in length and do not pass the CRC test as the frame +is received. + +"rx_align_errors" is equivalent to aAlignmentErrors defined in IEEE 802.3-2012, +Section 5.2.2.1.7. This statistic is the count of frames that are not an +integral number of bytes in length and do not pass the CRC test as the frame is +received. + +"tx_bytes" is equivalent to aOctetsTransmittedOK defined in IEEE 802.3-2012, +Section 5.2.2.1.8. This statistic is the count of data and pad bytes +successfully transmitted from the interface. + +"rx_bytes" is equivalent to aOctetsReceivedOK defined in IEEE 802.3-2012, +Section 5.2.2.1.14. This statistic is the count of data and pad bytes +successfully received by the controller. + +"tx_pause" is equivalent to aPAUSEMACCtrlFramesTransmitted defined in IEEE +802.3-2012, Section 30.3.4.2. This statistic is a count of PAUSE frames +transmitted from the network controller. + +"rx_pause" is equivalent to aPAUSEMACCtrlFramesReceived defined in IEEE +802.3-2012, Section 30.3.4.3. This statistic is a count of PAUSE frames +received by the network controller. + +"rx_errors" is equivalent to ifInErrors defined in RFC 2863. This statistic is +a count of the number of packets received containing errors that prevented the +packet from being delivered to a higher level protocol. + +"tx_errors" is equivalent to ifOutErrors defined in RFC 2863. This statistic +is a count of the number of packets that could not be transmitted due to errors. + +"rx_unicast" is equivalent to ifInUcastPkts defined in RFC 2863. This +statistic is a count of the number of packets received that were not addressed +to the broadcast address or a multicast group. + +"rx_multicast" is equivalent to ifInMulticastPkts defined in RFC 2863. This +statistic is a count of the number of packets received that were addressed to +a multicast address group. + +"rx_broadcast" is equivalent to ifInBroadcastPkts defined in RFC 2863. This +statistic is a count of the number of packets received that were addressed to +the broadcast address. + +"tx_discards" is equivalent to ifOutDiscards defined in RFC 2863. This +statistic is the number of outbound packets not transmitted even though an +error was not detected. An example of a reason this might occur is to free up +internal buffer space. + +"tx_unicast" is equivalent to ifOutUcastPkts defined in RFC 2863. This +statistic counts the number of packets transmitted that were not addressed to +a multicast group or broadcast address. + +"tx_multicast" is equivalent to ifOutMulticastPkts defined in RFC 2863. This +statistic counts the number of packets transmitted that were addressed to a +multicast group. + +"tx_broadcast" is equivalent to ifOutBroadcastPkts defined in RFC 2863. This +statistic counts the number of packets transmitted that were addressed to a +broadcast address. + +"ether_drops" is equivalent to etherStatsDropEvents defined in RFC 2819. +This statistic counts the number of packets dropped due to lack of internal +controller resources. + +"rx_total_bytes" is equivalent to etherStatsOctets defined in RFC 2819. +This statistic counts the total number of bytes received by the controller, +including error and discarded packets. + +"rx_total_packets" is equivalent to etherStatsPkts defined in RFC 2819. +This statistic counts the total number of packets received by the controller, +including error, discarded, unicast, multicast, and broadcast packets. + +"rx_undersize" is equivalent to etherStatsUndersizePkts defined in RFC 2819. +This statistic counts the number of correctly formed packets received less +than 64 bytes long. + +"rx_oversize" is equivalent to etherStatsOversizePkts defined in RFC 2819. +This statistic counts the number of correctly formed packets greater than 1518 +bytes long. + +"rx_64_bytes" is equivalent to etherStatsPkts64Octets defined in RFC 2819. +This statistic counts the total number of packets received that were 64 octets +in length. + +"rx_65_127_bytes" is equivalent to etherStatsPkts65to127Octets defined in RFC +2819. This statistic counts the total number of packets received that were +between 65 and 127 octets in length inclusive. + +"rx_128_255_bytes" is equivalent to etherStatsPkts128to255Octets defined in +RFC 2819. This statistic is the total number of packets received that were +between 128 and 255 octets in length inclusive. + +"rx_256_511_bytes" is equivalent to etherStatsPkts256to511Octets defined in +RFC 2819. This statistic is the total number of packets received that were +between 256 and 511 octets in length inclusive. + +"rx_512_1023_bytes" is equivalent to etherStatsPkts512to1023Octets defined in +RFC 2819. This statistic is the total number of packets received that were +between 512 and 1023 octets in length inclusive. + +"rx_1024_1518_bytes" is equivalent to etherStatsPkts1024to1518Octets define +in RFC 2819. This statistic is the total number of packets received that were +between 1024 and 1518 octets in length inclusive. + +"rx_gte_1519_bytes" is a statistic defined specific to the behavior of the +Altera TSE. This statistics counts the number of received good and errored +frames between the length of 1519 and the maximum frame length configured +in the frm_length register. See the Altera TSE User Guide for More details. + +"rx_jabbers" is equivalent to etherStatsJabbers defined in RFC 2819. This +statistic is the total number of packets received that were longer than 1518 +octets, and had either a bad CRC with an integral number of octets (CRC Error) +or a bad CRC with a non-integral number of octets (Alignment Error). + +"rx_runts" is equivalent to etherStatsFragments defined in RFC 2819. This +statistic is the total number of packets received that were less than 64 octets +in length and had either a bad CRC with an integral number of octets (CRC +error) or a bad CRC with a non-integral number of octets (Alignment Error). diff --git a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst new file mode 100644 index 000000000..3561a8a29 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst @@ -0,0 +1,325 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================================ +Linux kernel driver for Elastic Network Adapter (ENA) family +============================================================ + +Overview +======== + +ENA is a networking interface designed to make good use of modern CPU +features and system architectures. + +The ENA device exposes a lightweight management interface with a +minimal set of memory mapped registers and extendable command set +through an Admin Queue. + +The driver supports a range of ENA devices, is link-speed independent +(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has +a negotiated and extendable feature set. + +Some ENA devices support SR-IOV. This driver is used for both the +SR-IOV Physical Function (PF) and Virtual Function (VF) devices. + +ENA devices enable high speed and low overhead network traffic +processing by providing multiple Tx/Rx queue pairs (the maximum number +is advertised by the device via the Admin Queue), a dedicated MSI-X +interrupt vector per Tx/Rx queue pair, adaptive interrupt moderation, +and CPU cacheline optimized data placement. + +The ENA driver supports industry standard TCP/IP offload features such +as checksum offload and TCP transmit segmentation offload (TSO). +Receive-side scaling (RSS) is supported for multi-core scaling. + +The ENA driver and its corresponding devices implement health +monitoring mechanisms such as watchdog, enabling the device and driver +to recover in a manner transparent to the application, as well as +debug logs. + +Some of the ENA devices support a working mode called Low-latency +Queue (LLQ), which saves several more microseconds. + +ENA Source Code Directory Structure +=================================== + +================= ====================================================== +ena_com.[ch] Management communication layer. This layer is + responsible for the handling all the management + (admin) communication between the device and the + driver. +ena_eth_com.[ch] Tx/Rx data path. +ena_admin_defs.h Definition of ENA management interface. +ena_eth_io_defs.h Definition of ENA data path interface. +ena_common_defs.h Common definitions for ena_com layer. +ena_regs_defs.h Definition of ENA PCI memory-mapped (MMIO) registers. +ena_netdev.[ch] Main Linux kernel driver. +ena_syfsfs.[ch] Sysfs files. +ena_ethtool.c ethtool callbacks. +ena_pci_id_tbl.h Supported device IDs. +================= ====================================================== + +Management Interface: +===================== + +ENA management interface is exposed by means of: + +- PCIe Configuration Space +- Device Registers +- Admin Queue (AQ) and Admin Completion Queue (ACQ) +- Asynchronous Event Notification Queue (AENQ) + +ENA device MMIO Registers are accessed only during driver +initialization and are not involved in further normal device +operation. + +AQ is used for submitting management commands, and the +results/responses are reported asynchronously through ACQ. + +ENA introduces a small set of management commands with room for +vendor-specific extensions. Most of the management operations are +framed in a generic Get/Set feature command. + +The following admin queue commands are supported: + +- Create I/O submission queue +- Create I/O completion queue +- Destroy I/O submission queue +- Destroy I/O completion queue +- Get feature +- Set feature +- Configure AENQ +- Get statistics + +Refer to ena_admin_defs.h for the list of supported Get/Set Feature +properties. + +The Asynchronous Event Notification Queue (AENQ) is a uni-directional +queue used by the ENA device to send to the driver events that cannot +be reported using ACQ. AENQ events are subdivided into groups. Each +group may have multiple syndromes, as shown below + +The events are: + + ==================== =============== + Group Syndrome + ==================== =============== + Link state change **X** + Fatal error **X** + Notification Suspend traffic + Notification Resume traffic + Keep-Alive **X** + ==================== =============== + +ACQ and AENQ share the same MSI-X vector. + +Keep-Alive is a special mechanism that allows monitoring of the +device's health. The driver maintains a watchdog (WD) handler which, +if fired, logs the current state and statistics then resets and +restarts the ENA device and driver. A Keep-Alive event is delivered by +the device every second. The driver re-arms the WD upon reception of a +Keep-Alive event. A missed Keep-Alive event causes the WD handler to +fire. + +Data Path Interface +=================== +I/O operations are based on Tx and Rx Submission Queues (Tx SQ and Rx +SQ correspondingly). Each SQ has a completion queue (CQ) associated +with it. + +The SQs and CQs are implemented as descriptor rings in contiguous +physical memory. + +The ENA driver supports two Queue Operation modes for Tx SQs: + +- Regular mode + + * In this mode the Tx SQs reside in the host's memory. The ENA + device fetches the ENA Tx descriptors and packet data from host + memory. + +- Low Latency Queue (LLQ) mode or "push-mode". + + * In this mode the driver pushes the transmit descriptors and the + first 128 bytes of the packet directly to the ENA device memory + space. The rest of the packet payload is fetched by the + device. For this operation mode, the driver uses a dedicated PCI + device memory BAR, which is mapped with write-combine capability. + +The Rx SQs support only the regular mode. + +Note: Not all ENA devices support LLQ, and this feature is negotiated + with the device upon initialization. If the ENA device does not + support LLQ mode, the driver falls back to the regular mode. + +The driver supports multi-queue for both Tx and Rx. This has various +benefits: + +- Reduced CPU/thread/process contention on a given Ethernet interface. +- Cache miss rate on completion is reduced, particularly for data + cache lines that hold the sk_buff structures. +- Increased process-level parallelism when handling received packets. +- Increased data cache hit rate, by steering kernel processing of + packets to the CPU, where the application thread consuming the + packet is running. +- In hardware interrupt re-direction. + +Interrupt Modes +=============== +The driver assigns a single MSI-X vector per queue pair (for both Tx +and Rx directions). The driver assigns an additional dedicated MSI-X vector +for management (for ACQ and AENQ). + +Management interrupt registration is performed when the Linux kernel +probes the adapter, and it is de-registered when the adapter is +removed. I/O queue interrupt registration is performed when the Linux +interface of the adapter is opened, and it is de-registered when the +interface is closed. + +The management interrupt is named:: + + ena-mgmnt@pci: + +and for each queue pair, an interrupt is named:: + + -Tx-Rx- + +The ENA device operates in auto-mask and auto-clear interrupt +modes. That is, once MSI-X is delivered to the host, its Cause bit is +automatically cleared and the interrupt is masked. The interrupt is +unmasked by the driver after NAPI processing is complete. + +Interrupt Moderation +==================== +ENA driver and device can operate in conventional or adaptive interrupt +moderation mode. + +In conventional mode the driver instructs device to postpone interrupt +posting according to static interrupt delay value. The interrupt delay +value can be configured through ethtool(8). The following ethtool +parameters are supported by the driver: tx-usecs, rx-usecs + +In adaptive interrupt moderation mode the interrupt delay value is +updated by the driver dynamically and adjusted every NAPI cycle +according to the traffic nature. + +Adaptive coalescing can be switched on/off through ethtool(8) +adaptive_rx on|off parameter. + +More information about Adaptive Interrupt Moderation (DIM) can be found in +Documentation/networking/net_dim.rst + +RX copybreak +============ +The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK +and can be configured by the ETHTOOL_STUNABLE command of the +SIOCETHTOOL ioctl. + +SKB +=== +The driver-allocated SKB for frames received from Rx handling using +NAPI context. The allocation method depends on the size of the packet. +If the frame length is larger than rx_copybreak, napi_get_frags() +is used, otherwise netdev_alloc_skb_ip_align() is used, the buffer +content is copied (by CPU) to the SKB, and the buffer is recycled. + +Statistics +========== +The user can obtain ENA device and driver statistics using ethtool. +The driver can collect regular or extended statistics (including +per-queue stats) from the device. + +In addition the driver logs the stats to syslog upon device reset. + +MTU +=== +The driver supports an arbitrarily large MTU with a maximum that is +negotiated with the device. The driver configures MTU using the +SetFeature command (ENA_ADMIN_MTU property). The user can change MTU +via ip(8) and similar legacy tools. + +Stateless Offloads +================== +The ENA driver supports: + +- TSO over IPv4/IPv6 +- TSO with ECN +- IPv4 header checksum offload +- TCP/UDP over IPv4/IPv6 checksum offloads + +RSS +=== +- The ENA device supports RSS that allows flexible Rx traffic + steering. +- Toeplitz and CRC32 hash functions are supported. +- Different combinations of L2/L3/L4 fields can be configured as + inputs for hash functions. +- The driver configures RSS settings using the AQ SetFeature command + (ENA_ADMIN_RSS_HASH_FUNCTION, ENA_ADMIN_RSS_HASH_INPUT and + ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG properties). +- If the NETIF_F_RXHASH flag is set, the 32-bit result of the hash + function delivered in the Rx CQ descriptor is set in the received + SKB. +- The user can provide a hash key, hash function, and configure the + indirection table through ethtool(8). + +DATA PATH +========= +Tx +-- + +end_start_xmit() is called by the stack. This function does the following: + +- Maps data buffers (skb->data and frags). +- Populates ena_buf for the push buffer (if the driver and device are + in push mode.) +- Prepares ENA bufs for the remaining frags. +- Allocates a new request ID from the empty req_id ring. The request + ID is the index of the packet in the Tx info. This is used for + out-of-order TX completions. +- Adds the packet to the proper place in the Tx ring. +- Calls ena_com_prepare_tx(), an ENA communication layer that converts + the ena_bufs to ENA descriptors (and adds meta ENA descriptors as + needed.) + + * This function also copies the ENA descriptors and the push buffer + to the Device memory space (if in push mode.) + +- Writes doorbell to the ENA device. +- When the ENA device finishes sending the packet, a completion + interrupt is raised. +- The interrupt handler schedules NAPI. +- The ena_clean_tx_irq() function is called. This function handles the + completion descriptors generated by the ENA, with a single + completion descriptor per completed packet. + + * req_id is retrieved from the completion descriptor. The tx_info of + the packet is retrieved via the req_id. The data buffers are + unmapped and req_id is returned to the empty req_id ring. + * The function stops when the completion descriptors are completed or + the budget is reached. + +Rx +-- + +- When a packet is received from the ENA device. +- The interrupt handler schedules NAPI. +- The ena_clean_rx_irq() function is called. This function calls + ena_rx_pkt(), an ENA communication layer function, which returns the + number of descriptors used for a new unhandled packet, and zero if + no new packet is found. +- Then it calls the ena_clean_rx_irq() function. +- ena_eth_rx_skb() checks packet length: + + * If the packet is small (len < rx_copybreak), the driver allocates + a SKB for the new packet, and copies the packet payload into the + SKB data buffer. + + - In this way the original data buffer is not passed to the stack + and is reused for future Rx packets. + + * Otherwise the function unmaps the Rx buffer, then allocates the + new SKB structure and hooks the Rx buffer to the SKB frags. + +- The new SKB is updated with the necessary information (protocol, + checksum hw verify result, etc.), and then passed to the network + stack, using the NAPI interface function napi_gro_receive(). diff --git a/Documentation/networking/device_drivers/ethernet/aquantia/atlantic.rst b/Documentation/networking/device_drivers/ethernet/aquantia/atlantic.rst new file mode 100644 index 000000000..595ddef1c --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/aquantia/atlantic.rst @@ -0,0 +1,556 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +=============================== +Marvell(Aquantia) AQtion Driver +=============================== + +For the aQuantia Multi-Gigabit PCI Express Family of Ethernet Adapters + +.. Contents + + - Identifying Your Adapter + - Configuration + - Supported ethtool options + - Command Line Parameters + - Config file parameters + - Support + - License + +Identifying Your Adapter +======================== + +The driver in this release is compatible with AQC-100, AQC-107, AQC-108 +based ethernet adapters. + + +SFP+ Devices (for AQC-100 based adapters) +----------------------------------------- + +This release tested with passive Direct Attach Cables (DAC) and SFP+/LC +Optical Transceiver. + +Configuration +============= + +Viewing Link Messages +--------------------- + Link messages will not be displayed to the console if the distribution is + restricting system messages. In order to see network driver link messages on + your console, set dmesg to eight by entering the following:: + + dmesg -n 8 + + .. note:: + + This setting is not saved across reboots. + +Jumbo Frames +------------ + The driver supports Jumbo Frames for all adapters. Jumbo Frames support is + enabled by changing the MTU to a value larger than the default of 1500. + The maximum value for the MTU is 16000. Use the `ip` command to + increase the MTU size. For example:: + + ip link set mtu 16000 dev enp1s0 + +ethtool +------- + The driver utilizes the ethtool interface for driver configuration and + diagnostics, as well as displaying statistical information. The latest + ethtool version is required for this functionality. + +NAPI +---- + NAPI (Rx polling mode) is supported in the atlantic driver. + +Supported ethtool options +========================= + +Viewing adapter settings +------------------------ + + :: + + ethtool + + Output example:: + + Settings for enp1s0: + Supported ports: [ TP ] + Supported link modes: 100baseT/Full + 1000baseT/Full + 10000baseT/Full + 2500baseT/Full + 5000baseT/Full + Supported pause frame use: Symmetric + Supports auto-negotiation: Yes + Supported FEC modes: Not reported + Advertised link modes: 100baseT/Full + 1000baseT/Full + 10000baseT/Full + 2500baseT/Full + 5000baseT/Full + Advertised pause frame use: Symmetric + Advertised auto-negotiation: Yes + Advertised FEC modes: Not reported + Speed: 10000Mb/s + Duplex: Full + Port: Twisted Pair + PHYAD: 0 + Transceiver: internal + Auto-negotiation: on + MDI-X: Unknown + Supports Wake-on: g + Wake-on: d + Link detected: yes + + + .. note:: + + AQrate speeds (2.5/5 Gb/s) will be displayed only with linux kernels > 4.10. + But you can still use these speeds:: + + ethtool -s eth0 autoneg off speed 2500 + +Viewing adapter information +--------------------------- + + :: + + ethtool -i + + Output example:: + + driver: atlantic + version: 5.2.0-050200rc5-generic-kern + firmware-version: 3.1.78 + expansion-rom-version: + bus-info: 0000:01:00.0 + supports-statistics: yes + supports-test: no + supports-eeprom-access: no + supports-register-dump: yes + supports-priv-flags: no + + +Viewing Ethernet adapter statistics +----------------------------------- + + :: + + ethtool -S + + Output example:: + + NIC statistics: + InPackets: 13238607 + InUCast: 13293852 + InMCast: 52 + InBCast: 3 + InErrors: 0 + OutPackets: 23703019 + OutUCast: 23704941 + OutMCast: 67 + OutBCast: 11 + InUCastOctects: 213182760 + OutUCastOctects: 22698443 + InMCastOctects: 6600 + OutMCastOctects: 8776 + InBCastOctects: 192 + OutBCastOctects: 704 + InOctects: 2131839552 + OutOctects: 226938073 + InPacketsDma: 95532300 + OutPacketsDma: 59503397 + InOctetsDma: 1137102462 + OutOctetsDma: 2394339518 + InDroppedDma: 0 + Queue[0] InPackets: 23567131 + Queue[0] OutPackets: 20070028 + Queue[0] InJumboPackets: 0 + Queue[0] InLroPackets: 0 + Queue[0] InErrors: 0 + Queue[1] InPackets: 45428967 + Queue[1] OutPackets: 11306178 + Queue[1] InJumboPackets: 0 + Queue[1] InLroPackets: 0 + Queue[1] InErrors: 0 + Queue[2] InPackets: 3187011 + Queue[2] OutPackets: 13080381 + Queue[2] InJumboPackets: 0 + Queue[2] InLroPackets: 0 + Queue[2] InErrors: 0 + Queue[3] InPackets: 23349136 + Queue[3] OutPackets: 15046810 + Queue[3] InJumboPackets: 0 + Queue[3] InLroPackets: 0 + Queue[3] InErrors: 0 + +Interrupt coalescing support +---------------------------- + + ITR mode, TX/RX coalescing timings could be viewed with:: + + ethtool -c + + and changed with:: + + ethtool -C tx-usecs rx-usecs + + To disable coalescing:: + + ethtool -C tx-usecs 0 rx-usecs 0 tx-max-frames 1 tx-max-frames 1 + +Wake on LAN support +------------------- + + WOL support by magic packet:: + + ethtool -s wol g + + To disable WOL:: + + ethtool -s wol d + +Set and check the driver message level +-------------------------------------- + + Set message level + + :: + + ethtool -s msglvl + + Level values: + + ====== ============================= + 0x0001 general driver status. + 0x0002 hardware probing. + 0x0004 link state. + 0x0008 periodic status check. + 0x0010 interface being brought down. + 0x0020 interface being brought up. + 0x0040 receive error. + 0x0080 transmit error. + 0x0200 interrupt handling. + 0x0400 transmit completion. + 0x0800 receive completion. + 0x1000 packet contents. + 0x2000 hardware status. + 0x4000 Wake-on-LAN status. + ====== ============================= + + By default, the level of debugging messages is set 0x0001(general driver status). + + Check message level + + :: + + ethtool | grep "Current message level" + + If you want to disable the output of messages:: + + ethtool -s msglvl 0 + +RX flow rules (ntuple filters) +------------------------------ + + There are separate rules supported, that applies in that order: + + 1. 16 VLAN ID rules + 2. 16 L2 EtherType rules + 3. 8 L3/L4 5-Tuple rules + + + The driver utilizes the ethtool interface for configuring ntuple filters, + via ``ethtool -N ``. + + To enable or disable the RX flow rules:: + + ethtool -K ethX ntuple + + When disabling ntuple filters, all the user programed filters are + flushed from the driver cache and hardware. All needed filters must + be re-added when ntuple is re-enabled. + + Because of the fixed order of the rules, the location of filters is also fixed: + + - Locations 0 - 15 for VLAN ID filters + - Locations 16 - 31 for L2 EtherType filters + - Locations 32 - 39 for L3/L4 5-tuple filters (locations 32, 36 for IPv6) + + The L3/L4 5-tuple (protocol, source and destination IP address, source and + destination TCP/UDP/SCTP port) is compared against 8 filters. For IPv4, up to + 8 source and destination addresses can be matched. For IPv6, up to 2 pairs of + addresses can be supported. Source and destination ports are only compared for + TCP/UDP/SCTP packets. + + To add a filter that directs packet to queue 5, use + ``<-N|-U|--config-nfc|--config-ntuple>`` switch:: + + ethtool -N flow-type udp4 src-ip 10.0.0.1 dst-ip 10.0.0.2 src-port 2000 dst-port 2001 action 5 + + - action is the queue number. + - loc is the rule number. + + For ``flow-type ip4|udp4|tcp4|sctp4|ip6|udp6|tcp6|sctp6`` you must set the loc + number within 32 - 39. + For ``flow-type ip4|udp4|tcp4|sctp4|ip6|udp6|tcp6|sctp6`` you can set 8 rules + for traffic IPv4 or you can set 2 rules for traffic IPv6. Loc number traffic + IPv6 is 32 and 36. + At the moment you can not use IPv4 and IPv6 filters at the same time. + + Example filter for IPv6 filter traffic:: + + sudo ethtool -N flow-type tcp6 src-ip 2001:db8:0:f101::1 dst-ip 2001:db8:0:f101::2 action 1 loc 32 + sudo ethtool -N flow-type ip6 src-ip 2001:db8:0:f101::2 dst-ip 2001:db8:0:f101::5 action -1 loc 36 + + Example filter for IPv4 filter traffic:: + + sudo ethtool -N flow-type udp4 src-ip 10.0.0.4 dst-ip 10.0.0.7 src-port 2000 dst-port 2001 loc 32 + sudo ethtool -N flow-type tcp4 src-ip 10.0.0.3 dst-ip 10.0.0.9 src-port 2000 dst-port 2001 loc 33 + sudo ethtool -N flow-type ip4 src-ip 10.0.0.6 dst-ip 10.0.0.4 loc 34 + + If you set action -1, then all traffic corresponding to the filter will be discarded. + + The maximum value action is 31. + + + The VLAN filter (VLAN id) is compared against 16 filters. + VLAN id must be accompanied by mask 0xF000. That is to distinguish VLAN filter + from L2 Ethertype filter with UserPriority since both User Priority and VLAN ID + are passed in the same 'vlan' parameter. + + To add a filter that directs packets from VLAN 2001 to queue 5:: + + ethtool -N flow-type ip4 vlan 2001 m 0xF000 action 1 loc 0 + + + L2 EtherType filters allows filter packet by EtherType field or both EtherType + and User Priority (PCP) field of 802.1Q. + UserPriority (vlan) parameter must be accompanied by mask 0x1FFF. That is to + distinguish VLAN filter from L2 Ethertype filter with UserPriority since both + User Priority and VLAN ID are passed in the same 'vlan' parameter. + + To add a filter that directs IP4 packess of priority 3 to queue 3:: + + ethtool -N flow-type ether proto 0x800 vlan 0x600 m 0x1FFF action 3 loc 16 + + To see the list of filters currently present:: + + ethtool <-u|-n|--show-nfc|--show-ntuple> + + Rules may be deleted from the table itself. This is done using:: + + sudo ethtool <-N|-U|--config-nfc|--config-ntuple> delete + + - loc is the rule number to be deleted. + + Rx filters is an interface to load the filter table that funnels all flow + into queue 0 unless an alternative queue is specified using "action". In that + case, any flow that matches the filter criteria will be directed to the + appropriate queue. RX filters is supported on all kernels 2.6.30 and later. + +RSS for UDP +----------- + + Currently, NIC does not support RSS for fragmented IP packets, which leads to + incorrect working of RSS for fragmented UDP traffic. To disable RSS for UDP the + RX Flow L3/L4 rule may be used. + + Example:: + + ethtool -N eth0 flow-type udp4 action 0 loc 32 + +UDP GSO hardware offload +------------------------ + + UDP GSO allows to boost UDP tx rates by offloading UDP headers allocation + into hardware. A special userspace socket option is required for this, + could be validated with /kernel/tools/testing/selftests/net/:: + + udpgso_bench_tx -u -4 -D 10.0.1.1 -s 6300 -S 100 + + Will cause sending out of 100 byte sized UDP packets formed from single + 6300 bytes user buffer. + + UDP GSO is configured by:: + + ethtool -K eth0 tx-udp-segmentation on + +Private flags (testing) +----------------------- + + Atlantic driver supports private flags for hardware custom features:: + + $ ethtool --show-priv-flags ethX + + Private flags for ethX: + DMASystemLoopback : off + PKTSystemLoopback : off + DMANetworkLoopback : off + PHYInternalLoopback: off + PHYExternalLoopback: off + + Example:: + + $ ethtool --set-priv-flags ethX DMASystemLoopback on + + DMASystemLoopback: DMA Host loopback. + PKTSystemLoopback: Packet buffer host loopback. + DMANetworkLoopback: Network side loopback on DMA block. + PHYInternalLoopback: Internal loopback on Phy. + PHYExternalLoopback: External loopback on Phy (with loopback ethernet cable). + + +Command Line Parameters +======================= +The following command line parameters are available on atlantic driver: + +aq_itr -Interrupt throttling mode +--------------------------------- +Accepted values: 0, 1, 0xFFFF + +Default value: 0xFFFF + +====== ============================================================== +0 Disable interrupt throttling. +1 Enable interrupt throttling and use specified tx and rx rates. +0xFFFF Auto throttling mode. Driver will choose the best RX and TX + interrupt throtting settings based on link speed. +====== ============================================================== + +aq_itr_tx - TX interrupt throttle rate +-------------------------------------- + +Accepted values: 0 - 0x1FF + +Default value: 0 + +TX side throttling in microseconds. Adapter will setup maximum interrupt delay +to this value. Minimum interrupt delay will be a half of this value + +aq_itr_rx - RX interrupt throttle rate +-------------------------------------- + +Accepted values: 0 - 0x1FF + +Default value: 0 + +RX side throttling in microseconds. Adapter will setup maximum interrupt delay +to this value. Minimum interrupt delay will be a half of this value + +.. note:: + + ITR settings could be changed in runtime by ethtool -c means (see below) + +Config file parameters +====================== + +For some fine tuning and performance optimizations, +some parameters can be changed in the {source_dir}/aq_cfg.h file. + +AQ_CFG_RX_PAGEORDER +------------------- + +Default value: 0 + +RX page order override. Thats a power of 2 number of RX pages allocated for +each descriptor. Received descriptor size is still limited by +AQ_CFG_RX_FRAME_MAX. + +Increasing pageorder makes page reuse better (actual on iommu enabled systems). + +AQ_CFG_RX_REFILL_THRES +---------------------- + +Default value: 32 + +RX refill threshold. RX path will not refill freed descriptors until the +specified number of free descriptors is observed. Larger values may help +better page reuse but may lead to packet drops as well. + +AQ_CFG_VECS_DEF +--------------- + +Number of queues + +Valid Range: 0 - 8 (up to AQ_CFG_VECS_MAX) + +Default value: 8 + +Notice this value will be capped by the number of cores available on the system. + +AQ_CFG_IS_RSS_DEF +----------------- + +Enable/disable Receive Side Scaling + +This feature allows the adapter to distribute receive processing +across multiple CPU-cores and to prevent from overloading a single CPU core. + +Valid values + +== ======== +0 disabled +1 enabled +== ======== + +Default value: 1 + +AQ_CFG_NUM_RSS_QUEUES_DEF +------------------------- + +Number of queues for Receive Side Scaling + +Valid Range: 0 - 8 (up to AQ_CFG_VECS_DEF) + +Default value: AQ_CFG_VECS_DEF + +AQ_CFG_IS_LRO_DEF +----------------- + +Enable/disable Large Receive Offload + +This offload enables the adapter to coalesce multiple TCP segments and indicate +them as a single coalesced unit to the OS networking subsystem. + +The system consumes less energy but it also introduces more latency in packets +processing. + +Valid values + +== ======== +0 disabled +1 enabled +== ======== + +Default value: 1 + +AQ_CFG_TX_CLEAN_BUDGET +---------------------- + +Maximum descriptors to cleanup on TX at once. + +Default value: 256 + +After the aq_cfg.h file changed the driver must be rebuilt to take effect. + +Support +======= + +If an issue is identified with the released source code on the supported +kernel with a supported adapter, email the specific information related +to the issue to aqn_support@marvell.com + +License +======= + +aQuantia Corporation Network Driver + +Copyright |copy| 2014 - 2019 aQuantia Corporation. + +This program is free software; you can redistribute it and/or modify it +under the terms and conditions of the GNU General Public License, +version 2, as published by the Free Software Foundation. diff --git a/Documentation/networking/device_drivers/ethernet/chelsio/cxgb.rst b/Documentation/networking/device_drivers/ethernet/chelsio/cxgb.rst new file mode 100644 index 000000000..435dce5fa --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/chelsio/cxgb.rst @@ -0,0 +1,393 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +============================================= +Chelsio N210 10Gb Ethernet Network Controller +============================================= + +Driver Release Notes for Linux + +Version 2.1.1 + +June 20, 2005 + +.. Contents + + INTRODUCTION + FEATURES + PERFORMANCE + DRIVER MESSAGES + KNOWN ISSUES + SUPPORT + + +Introduction +============ + + This document describes the Linux driver for Chelsio 10Gb Ethernet Network + Controller. This driver supports the Chelsio N210 NIC and is backward + compatible with the Chelsio N110 model 10Gb NICs. + + +Features +======== + +Adaptive Interrupts (adaptive-rx) +--------------------------------- + + This feature provides an adaptive algorithm that adjusts the interrupt + coalescing parameters, allowing the driver to dynamically adapt the latency + settings to achieve the highest performance during various types of network + load. + + The interface used to control this feature is ethtool. Please see the + ethtool manpage for additional usage information. + + By default, adaptive-rx is disabled. + To enable adaptive-rx:: + + ethtool -C adaptive-rx on + + To disable adaptive-rx, use ethtool:: + + ethtool -C adaptive-rx off + + After disabling adaptive-rx, the timer latency value will be set to 50us. + You may set the timer latency after disabling adaptive-rx:: + + ethtool -C rx-usecs + + An example to set the timer latency value to 100us on eth0:: + + ethtool -C eth0 rx-usecs 100 + + You may also provide a timer latency value while disabling adaptive-rx:: + + ethtool -C adaptive-rx off rx-usecs + + If adaptive-rx is disabled and a timer latency value is specified, the timer + will be set to the specified value until changed by the user or until + adaptive-rx is enabled. + + To view the status of the adaptive-rx and timer latency values:: + + ethtool -c + + +TCP Segmentation Offloading (TSO) Support +----------------------------------------- + + This feature, also known as "large send", enables a system's protocol stack + to offload portions of outbound TCP processing to a network interface card + thereby reducing system CPU utilization and enhancing performance. + + The interface used to control this feature is ethtool version 1.8 or higher. + Please see the ethtool manpage for additional usage information. + + By default, TSO is enabled. + To disable TSO:: + + ethtool -K tso off + + To enable TSO:: + + ethtool -K tso on + + To view the status of TSO:: + + ethtool -k + + +Performance +=========== + + The following information is provided as an example of how to change system + parameters for "performance tuning" an what value to use. You may or may not + want to change these system parameters, depending on your server/workstation + application. Doing so is not warranted in any way by Chelsio Communications, + and is done at "YOUR OWN RISK". Chelsio will not be held responsible for loss + of data or damage to equipment. + + Your distribution may have a different way of doing things, or you may prefer + a different method. These commands are shown only to provide an example of + what to do and are by no means definitive. + + Making any of the following system changes will only last until you reboot + your system. You may want to write a script that runs at boot-up which + includes the optimal settings for your system. + + Setting PCI Latency Timer:: + + setpci -d 1425:: + +* 0x0c.l=0x0000F800 + + Disabling TCP timestamp:: + + sysctl -w net.ipv4.tcp_timestamps=0 + + Disabling SACK:: + + sysctl -w net.ipv4.tcp_sack=0 + + Setting large number of incoming connection requests:: + + sysctl -w net.ipv4.tcp_max_syn_backlog=3000 + + Setting maximum receive socket buffer size:: + + sysctl -w net.core.rmem_max=1024000 + + Setting maximum send socket buffer size:: + + sysctl -w net.core.wmem_max=1024000 + + Set smp_affinity (on a multiprocessor system) to a single CPU:: + + echo 1 > /proc/irq//smp_affinity + + Setting default receive socket buffer size:: + + sysctl -w net.core.rmem_default=524287 + + Setting default send socket buffer size:: + + sysctl -w net.core.wmem_default=524287 + + Setting maximum option memory buffers:: + + sysctl -w net.core.optmem_max=524287 + + Setting maximum backlog (# of unprocessed packets before kernel drops):: + + sysctl -w net.core.netdev_max_backlog=300000 + + Setting TCP read buffers (min/default/max):: + + sysctl -w net.ipv4.tcp_rmem="10000000 10000000 10000000" + + Setting TCP write buffers (min/pressure/max):: + + sysctl -w net.ipv4.tcp_wmem="10000000 10000000 10000000" + + Setting TCP buffer space (min/pressure/max):: + + sysctl -w net.ipv4.tcp_mem="10000000 10000000 10000000" + + TCP window size for single connections: + + The receive buffer (RX_WINDOW) size must be at least as large as the + Bandwidth-Delay Product of the communication link between the sender and + receiver. Due to the variations of RTT, you may want to increase the buffer + size up to 2 times the Bandwidth-Delay Product. Reference page 289 of + "TCP/IP Illustrated, Volume 1, The Protocols" by W. Richard Stevens. + + At 10Gb speeds, use the following formula:: + + RX_WINDOW >= 1.25MBytes * RTT(in milliseconds) + Example for RTT with 100us: RX_WINDOW = (1,250,000 * 0.1) = 125,000 + + RX_WINDOW sizes of 256KB - 512KB should be sufficient. + + Setting the min, max, and default receive buffer (RX_WINDOW) size:: + + sysctl -w net.ipv4.tcp_rmem=" " + + TCP window size for multiple connections: + The receive buffer (RX_WINDOW) size may be calculated the same as single + connections, but should be divided by the number of connections. The + smaller window prevents congestion and facilitates better pacing, + especially if/when MAC level flow control does not work well or when it is + not supported on the machine. Experimentation may be necessary to attain + the correct value. This method is provided as a starting point for the + correct receive buffer size. + + Setting the min, max, and default receive buffer (RX_WINDOW) size is + performed in the same manner as single connection. + + +Driver Messages +=============== + + The following messages are the most common messages logged by syslog. These + may be found in /var/log/messages. + + Driver up:: + + Chelsio Network Driver - version 2.1.1 + + NIC detected:: + + eth#: Chelsio N210 1x10GBaseX NIC (rev #), PCIX 133MHz/64-bit + + Link up:: + + eth#: link is up at 10 Gbps, full duplex + + Link down:: + + eth#: link is down + + +Known Issues +============ + + These issues have been identified during testing. The following information + is provided as a workaround to the problem. In some cases, this problem is + inherent to Linux or to a particular Linux Distribution and/or hardware + platform. + + 1. Large number of TCP retransmits on a multiprocessor (SMP) system. + + On a system with multiple CPUs, the interrupt (IRQ) for the network + controller may be bound to more than one CPU. This will cause TCP + retransmits if the packet data were to be split across different CPUs + and re-assembled in a different order than expected. + + To eliminate the TCP retransmits, set smp_affinity on the particular + interrupt to a single CPU. You can locate the interrupt (IRQ) used on + the N110/N210 by using ifconfig:: + + ifconfig | grep Interrupt + + Set the smp_affinity to a single CPU:: + + echo 1 > /proc/irq//smp_affinity + + It is highly suggested that you do not run the irqbalance daemon on your + system, as this will change any smp_affinity setting you have applied. + The irqbalance daemon runs on a 10 second interval and binds interrupts + to the least loaded CPU determined by the daemon. To disable this daemon:: + + chkconfig --level 2345 irqbalance off + + By default, some Linux distributions enable the kernel feature, + irqbalance, which performs the same function as the daemon. To disable + this feature, add the following line to your bootloader:: + + noirqbalance + + Example using the Grub bootloader:: + + title Red Hat Enterprise Linux AS (2.4.21-27.ELsmp) + root (hd0,0) + kernel /vmlinuz-2.4.21-27.ELsmp ro root=/dev/hda3 noirqbalance + initrd /initrd-2.4.21-27.ELsmp.img + + 2. After running insmod, the driver is loaded and the incorrect network + interface is brought up without running ifup. + + When using 2.4.x kernels, including RHEL kernels, the Linux kernel + invokes a script named "hotplug". This script is primarily used to + automatically bring up USB devices when they are plugged in, however, + the script also attempts to automatically bring up a network interface + after loading the kernel module. The hotplug script does this by scanning + the ifcfg-eth# config files in /etc/sysconfig/network-scripts, looking + for HWADDR=. + + If the hotplug script does not find the HWADDRR within any of the + ifcfg-eth# files, it will bring up the device with the next available + interface name. If this interface is already configured for a different + network card, your new interface will have incorrect IP address and + network settings. + + To solve this issue, you can add the HWADDR= key to the + interface config file of your network controller. + + To disable this "hotplug" feature, you may add the driver (module name) + to the "blacklist" file located in /etc/hotplug. It has been noted that + this does not work for network devices because the net.agent script + does not use the blacklist file. Simply remove, or rename, the net.agent + script located in /etc/hotplug to disable this feature. + + 3. Transport Protocol (TP) hangs when running heavy multi-connection traffic + on an AMD Opteron system with HyperTransport PCI-X Tunnel chipset. + + If your AMD Opteron system uses the AMD-8131 HyperTransport PCI-X Tunnel + chipset, you may experience the "133-Mhz Mode Split Completion Data + Corruption" bug identified by AMD while using a 133Mhz PCI-X card on the + bus PCI-X bus. + + AMD states, "Under highly specific conditions, the AMD-8131 PCI-X Tunnel + can provide stale data via split completion cycles to a PCI-X card that + is operating at 133 Mhz", causing data corruption. + + AMD's provides three workarounds for this problem, however, Chelsio + recommends the first option for best performance with this bug: + + For 133Mhz secondary bus operation, limit the transaction length and + the number of outstanding transactions, via BIOS configuration + programming of the PCI-X card, to the following: + + Data Length (bytes): 1k + + Total allowed outstanding transactions: 2 + + Please refer to AMD 8131-HT/PCI-X Errata 26310 Rev 3.08 August 2004, + section 56, "133-MHz Mode Split Completion Data Corruption" for more + details with this bug and workarounds suggested by AMD. + + It may be possible to work outside AMD's recommended PCI-X settings, try + increasing the Data Length to 2k bytes for increased performance. If you + have issues with these settings, please revert to the "safe" settings + and duplicate the problem before submitting a bug or asking for support. + + .. note:: + + The default setting on most systems is 8 outstanding transactions + and 2k bytes data length. + + 4. On multiprocessor systems, it has been noted that an application which + is handling 10Gb networking can switch between CPUs causing degraded + and/or unstable performance. + + If running on an SMP system and taking performance measurements, it + is suggested you either run the latest netperf-2.4.0+ or use a binding + tool such as Tim Hockin's procstate utilities (runon) + . + + Binding netserver and netperf (or other applications) to particular + CPUs will have a significant difference in performance measurements. + You may need to experiment which CPU to bind the application to in + order to achieve the best performance for your system. + + If you are developing an application designed for 10Gb networking, + please keep in mind you may want to look at kernel functions + sched_setaffinity & sched_getaffinity to bind your application. + + If you are just running user-space applications such as ftp, telnet, + etc., you may want to try the runon tool provided by Tim Hockin's + procstate utility. You could also try binding the interface to a + particular CPU: runon 0 ifup eth0 + + +Support +======= + + If you have problems with the software or hardware, please contact our + customer support team via email at support@chelsio.com or check our website + at http://www.chelsio.com + +------------------------------------------------------------------------------- + +:: + + Chelsio Communications + 370 San Aleso Ave. + Suite 100 + Sunnyvale, CA 94085 + http://www.chelsio.com + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License, version 2, as +published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +THIS SOFTWARE IS PROVIDED ``AS IS`` AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +Copyright |copy| 2003-2005 Chelsio Communications. All rights reserved. diff --git a/Documentation/networking/device_drivers/ethernet/cirrus/cs89x0.rst b/Documentation/networking/device_drivers/ethernet/cirrus/cs89x0.rst new file mode 100644 index 000000000..e5c283940 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/cirrus/cs89x0.rst @@ -0,0 +1,647 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================================ +Cirrus Logic LAN CS8900/CS8920 Ethernet Adapters +================================================ + +.. note:: + + This document was contributed by Cirrus Logic for kernel 2.2.5. This version + has been updated for 2.3.48 by Andrew Morton. + + Still, this is too outdated! A major cleanup is needed here. + +Cirrus make a copy of this driver available at their website, as +described below. In general, you should use the driver version which +comes with your Linux distribution. + + +Linux Network Interface Driver ver. 2.00 + + +.. TABLE OF CONTENTS + + 1.0 CIRRUS LOGIC LAN CS8900/CS8920 ETHERNET ADAPTERS + 1.1 Product Overview + 1.2 Driver Description + 1.2.1 Driver Name + 1.2.2 File in the Driver Package + 1.3 System Requirements + 1.4 Licensing Information + + 2.0 ADAPTER INSTALLATION and CONFIGURATION + 2.1 CS8900-based Adapter Configuration + 2.2 CS8920-based Adapter Configuration + + 3.0 LOADING THE DRIVER AS A MODULE + + 4.0 COMPILING THE DRIVER + 4.1 Compiling the Driver as a Loadable Module + 4.2 Compiling the driver to support memory mode + 4.3 Compiling the driver to support Rx DMA + + 5.0 TESTING AND TROUBLESHOOTING + 5.1 Known Defects and Limitations + 5.2 Testing the Adapter + 5.2.1 Diagnostic Self-Test + 5.2.2 Diagnostic Network Test + 5.3 Using the Adapter's LEDs + 5.4 Resolving I/O Conflicts + + 6.0 TECHNICAL SUPPORT + 6.1 Contacting Cirrus Logic's Technical Support + 6.2 Information Required Before Contacting Technical Support + 6.3 Obtaining the Latest Driver Version + 6.4 Current maintainer + 6.5 Kernel boot parameters + + +1. Cirrus Logic LAN CS8900/CS8920 Ethernet Adapters +=================================================== + + +1.1. Product Overview +===================== + +The CS8900-based ISA Ethernet Adapters from Cirrus Logic follow +IEEE 802.3 standards and support half or full-duplex operation in ISA bus +computers on 10 Mbps Ethernet networks. The adapters are designed for operation +in 16-bit ISA or EISA bus expansion slots and are available in +10BaseT-only or 3-media configurations (10BaseT, 10Base2, and AUI for 10Base-5 +or fiber networks). + +CS8920-based adapters are similar to the CS8900-based adapter with additional +features for Plug and Play (PnP) support and Wakeup Frame recognition. As +such, the configuration procedures differ somewhat between the two types of +adapters. Refer to the "Adapter Configuration" section for details on +configuring both types of adapters. + + +1.2. Driver Description +======================= + +The CS8900/CS8920 Ethernet Adapter driver for Linux supports the Linux +v2.3.48 or greater kernel. It can be compiled directly into the kernel +or loaded at run-time as a device driver module. + +1.2.1 Driver Name: cs89x0 + +1.2.2 Files in the Driver Archive: + +The files in the driver at Cirrus' website include: + + =================== ==================================================== + readme.txt this file + build batch file to compile cs89x0.c. + cs89x0.c driver C code + cs89x0.h driver header file + cs89x0.o pre-compiled module (for v2.2.5 kernel) + config/Config.in sample file to include cs89x0 driver in the kernel. + config/Makefile sample file to include cs89x0 driver in the kernel. + config/Space.c sample file to include cs89x0 driver in the kernel. + =================== ==================================================== + + + +1.3. System Requirements +------------------------ + +The following hardware is required: + + * Cirrus Logic LAN (CS8900/20-based) Ethernet ISA Adapter + + * IBM or IBM-compatible PC with: + * An 80386 or higher processor + * 16 bytes of contiguous IO space available between 210h - 370h + * One available IRQ (5,10,11,or 12 for the CS8900, 3-7,9-15 for CS8920). + + * Appropriate cable (and connector for AUI, 10BASE-2) for your network + topology. + +The following software is required: + +* LINUX kernel version 2.3.48 or higher + + * CS8900/20 Setup Utility (DOS-based) + + * LINUX kernel sources for your kernel (if compiling into kernel) + + * GNU Toolkit (gcc and make) v2.6 or above (if compiling into kernel + or a module) + + + +1.4. Licensing Information +-------------------------- + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation, version 1. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +For a full copy of the GNU General Public License, write to the Free Software +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + + +2. Adapter Installation and Configuration +========================================= + +Both the CS8900 and CS8920-based adapters can be configured using parameters +stored in an on-board EEPROM. You must use the DOS-based CS8900/20 Setup +Utility if you want to change the adapter's configuration in EEPROM. + +When loading the driver as a module, you can specify many of the adapter's +configuration parameters on the command-line to override the EEPROM's settings +or for interface configuration when an EEPROM is not used. (CS8920-based +adapters must use an EEPROM.) See Section 3.0 LOADING THE DRIVER AS A MODULE. + +Since the CS8900/20 Setup Utility is a DOS-based application, you must install +and configure the adapter in a DOS-based system using the CS8900/20 Setup +Utility before installation in the target LINUX system. (Not required if +installing a CS8900-based adapter and the default configuration is acceptable.) + + +2.1. CS8900-based Adapter Configuration +--------------------------------------- + +CS8900-based adapters shipped from Cirrus Logic have been configured +with the following "default" settings:: + + Operation Mode: Memory Mode + IRQ: 10 + Base I/O Address: 300 + Memory Base Address: D0000 + Optimization: DOS Client + Transmission Mode: Half-duplex + BootProm: None + Media Type: Autodetect (3-media cards) or + 10BASE-T (10BASE-T only adapter) + +You should only change the default configuration settings if conflicts with +another adapter exists. To change the adapter's configuration, run the +CS8900/20 Setup Utility. + + +2.2. CS8920-based Adapter Configuration +--------------------------------------- + +CS8920-based adapters are shipped from Cirrus Logic configured as Plug +and Play (PnP) enabled. However, since the cs89x0 driver does NOT +support PnP, you must install the CS8920 adapter in a DOS-based PC and +run the CS8900/20 Setup Utility to disable PnP and configure the +adapter before installation in the target Linux system. Failure to do +this will leave the adapter inactive and the driver will be unable to +communicate with the adapter. + +:: + + **************************************************************** + * CS8920-BASED ADAPTERS: * + * * + * CS8920-BASED ADAPTERS ARE PLUG and PLAY ENABLED BY DEFAULT. * + * THE CS89X0 DRIVER DOES NOT SUPPORT PnP. THEREFORE, YOU MUST * + * RUN THE CS8900/20 SETUP UTILITY TO DISABLE PnP SUPPORT AND * + * TO ACTIVATE THE ADAPTER. * + **************************************************************** + + + + +3. Loading the Driver as a Module +================================= + +If the driver is compiled as a loadable module, you can load the driver module +with the 'modprobe' command. Many of the adapter's configuration parameters can +be specified as command-line arguments to the load command. This facility +provides a means to override the EEPROM's settings or for interface +configuration when an EEPROM is not used. + +Example:: + + insmod cs89x0.o io=0x200 irq=0xA media=aui + +This example loads the module and configures the adapter to use an IO port base +address of 200h, interrupt 10, and use the AUI media connection. The following +configuration options are available on the command line:: + + io=### - specify IO address (200h-360h) + irq=## - specify interrupt level + use_dma=1 - Enable DMA + dma=# - specify dma channel (Driver is compiled to support + Rx DMA only) + dmasize=# (16 or 64) - DMA size 16K or 64K. Default value is set to 16. + media=rj45 - specify media type + or media=bnc + or media=aui + or media=auto + duplex=full - specify forced half/full/autonegotiate duplex + or duplex=half + or duplex=auto + debug=# - debug level (only available if the driver was compiled + for debugging) + +**Notes:** + +a) If an EEPROM is present, any specified command-line parameter + will override the corresponding configuration value stored in + EEPROM. + +b) The "io" parameter must be specified on the command-line. + +c) The driver's hardware probe routine is designed to avoid + writing to I/O space until it knows that there is a cs89x0 + card at the written addresses. This could cause problems + with device probing. To avoid this behaviour, add one + to the ``io=`` module parameter. This doesn't actually change + the I/O address, but it is a flag to tell the driver + to partially initialise the hardware before trying to + identify the card. This could be dangerous if you are + not sure that there is a cs89x0 card at the provided address. + + For example, to scan for an adapter located at IO base 0x300, + specify an IO address of 0x301. + +d) The "duplex=auto" parameter is only supported for the CS8920. + +e) The minimum command-line configuration required if an EEPROM is + not present is: + + io + irq + media type (no autodetect) + +f) The following additional parameters are CS89XX defaults (values + used with no EEPROM or command-line argument). + + * DMA Burst = enabled + * IOCHRDY Enabled = enabled + * UseSA = enabled + * CS8900 defaults to half-duplex if not specified on command-line + * CS8920 defaults to autoneg if not specified on command-line + * Use reset defaults for other config parameters + * dma_mode = 0 + +g) You can use ifconfig to set the adapter's Ethernet address. + +h) Many Linux distributions use the 'modprobe' command to load + modules. This program uses the '/etc/conf.modules' file to + determine configuration information which is passed to a driver + module when it is loaded. All the configuration options which are + described above may be placed within /etc/conf.modules. + + For example:: + + > cat /etc/conf.modules + ... + alias eth0 cs89x0 + options cs89x0 io=0x0200 dma=5 use_dma=1 + ... + + In this example we are telling the module system that the + ethernet driver for this machine should use the cs89x0 driver. We + are asking 'modprobe' to pass the 'io', 'dma' and 'use_dma' + arguments to the driver when it is loaded. + +i) Cirrus recommend that the cs89x0 use the ISA DMA channels 5, 6 or + 7. You will probably find that other DMA channels will not work. + +j) The cs89x0 supports DMA for receiving only. DMA mode is + significantly more efficient. Flooding a 400 MHz Celeron machine + with large ping packets consumes 82% of its CPU capacity in non-DMA + mode. With DMA this is reduced to 45%. + +k) If your Linux kernel was compiled with inbuilt plug-and-play + support you will be able to find information about the cs89x0 card + with the command:: + + cat /proc/isapnp + +l) If during DMA operation you find erratic behavior or network data + corruption you should use your PC's BIOS to slow the EISA bus clock. + +m) If the cs89x0 driver is compiled directly into the kernel + (non-modular) then its I/O address is automatically determined by + ISA bus probing. The IRQ number, media options, etc are determined + from the card's EEPROM. + +n) If the cs89x0 driver is compiled directly into the kernel, DMA + mode may be selected by providing the kernel with a boot option + 'cs89x0_dma=N' where 'N' is the desired DMA channel number (5, 6 or 7). + + Kernel boot options may be provided on the LILO command line:: + + LILO boot: linux cs89x0_dma=5 + + or they may be placed in /etc/lilo.conf:: + + image=/boot/bzImage-2.3.48 + append="cs89x0_dma=5" + label=linux + root=/dev/hda5 + read-only + + The DMA Rx buffer size is hardwired to 16 kbytes in this mode. + (64k mode is not available). + + +4. Compiling the Driver +======================= + +The cs89x0 driver can be compiled directly into the kernel or compiled into +a loadable device driver module. + +Just use the standard way to configure the driver and compile the Kernel. + + +4.1. Compiling the Driver to Support Rx DMA +------------------------------------------- + +The compile-time optionality for DMA was removed in the 2.3 kernel +series. DMA support is now unconditionally part of the driver. It is +enabled by the 'use_dma=1' module option. + + +5. Testing and Troubleshooting +============================== + +5.1. Known Defects and Limitations +---------------------------------- + +Refer to the RELEASE.TXT file distributed as part of this archive for a list of +known defects, driver limitations, and work arounds. + + +5.2. Testing the Adapter +------------------------ + +Once the adapter has been installed and configured, the diagnostic option of +the CS8900/20 Setup Utility can be used to test the functionality of the +adapter and its network connection. Use the diagnostics 'Self Test' option to +test the functionality of the adapter with the hardware configuration you have +assigned. You can use the diagnostics 'Network Test' to test the ability of the +adapter to communicate across the Ethernet with another PC equipped with a +CS8900/20-based adapter card (it must also be running the CS8900/20 Setup +Utility). + +.. note:: + + The Setup Utility's diagnostics are designed to run in a + DOS-only operating system environment. DO NOT run the diagnostics + from a DOS or command prompt session under Windows 95, Windows NT, + OS/2, or other operating system. + +To run the diagnostics tests on the CS8900/20 adapter: + + 1. Boot DOS on the PC and start the CS8900/20 Setup Utility. + + 2. The adapter's current configuration is displayed. Hit the ENTER key to + get to the main menu. + + 4. Select 'Diagnostics' (ALT-G) from the main menu. + * Select 'Self-Test' to test the adapter's basic functionality. + * Select 'Network Test' to test the network connection and cabling. + + +5.2.1. Diagnostic Self-test +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The diagnostic self-test checks the adapter's basic functionality as well as +its ability to communicate across the ISA bus based on the system resources +assigned during hardware configuration. The following tests are performed: + + * IO Register Read/Write Test + + The IO Register Read/Write test insures that the CS8900/20 can be + accessed in IO mode, and that the IO base address is correct. + + * Shared Memory Test + + The Shared Memory test insures the CS8900/20 can be accessed in memory + mode and that the range of memory addresses assigned does not conflict + with other devices in the system. + + * Interrupt Test + + The Interrupt test insures there are no conflicts with the assigned IRQ + signal. + + * EEPROM Test + + The EEPROM test insures the EEPROM can be read. + + * Chip RAM Test + + The Chip RAM test insures the 4K of memory internal to the CS8900/20 is + working properly. + + * Internal Loop-back Test + + The Internal Loop Back test insures the adapter's transmitter and + receiver are operating properly. If this test fails, make sure the + adapter's cable is connected to the network (check for LED activity for + example). + + * Boot PROM Test + + The Boot PROM test insures the Boot PROM is present, and can be read. + Failure indicates the Boot PROM was not successfully read due to a + hardware problem or due to a conflicts on the Boot PROM address + assignment. (Test only applies if the adapter is configured to use the + Boot PROM option.) + +Failure of a test item indicates a possible system resource conflict with +another device on the ISA bus. In this case, you should use the Manual Setup +option to reconfigure the adapter by selecting a different value for the system +resource that failed. + + +5.2.2. Diagnostic Network Test +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The Diagnostic Network Test verifies a working network connection by +transferring data between two CS8900/20 adapters installed in different PCs +on the same network. (Note: the diagnostic network test should not be run +between two nodes across a router.) + +This test requires that each of the two PCs have a CS8900/20-based adapter +installed and have the CS8900/20 Setup Utility running. The first PC is +configured as a Responder and the other PC is configured as an Initiator. +Once the Initiator is started, it sends data frames to the Responder which +returns the frames to the Initiator. + +The total number of frames received and transmitted are displayed on the +Initiator's display, along with a count of the number of frames received and +transmitted OK or in error. The test can be terminated anytime by the user at +either PC. + +To setup the Diagnostic Network Test: + + 1. Select a PC with a CS8900/20-based adapter and a known working network + connection to act as the Responder. Run the CS8900/20 Setup Utility + and select 'Diagnostics -> Network Test -> Responder' from the main + menu. Hit ENTER to start the Responder. + + 2. Return to the PC with the CS8900/20-based adapter you want to test and + start the CS8900/20 Setup Utility. + + 3. From the main menu, Select 'Diagnostic -> Network Test -> Initiator'. + Hit ENTER to start the test. + +You may stop the test on the Initiator at any time while allowing the Responder +to continue running. In this manner, you can move to additional PCs and test +them by starting the Initiator on another PC without having to stop/start the +Responder. + + + +5.3. Using the Adapter's LEDs +----------------------------- + +The 2 and 3-media adapters have two LEDs visible on the back end of the board +located near the 10Base-T connector. + +Link Integrity LED: A "steady" ON of the green LED indicates a valid 10Base-T +connection. (Only applies to 10Base-T. The green LED has no significance for +a 10Base-2 or AUI connection.) + +TX/RX LED: The yellow LED lights briefly each time the adapter transmits or +receives data. (The yellow LED will appear to "flicker" on a typical network.) + + +5.4. Resolving I/O Conflicts +---------------------------- + +An IO conflict occurs when two or more adapter use the same ISA resource (IO +address, memory address or IRQ). You can usually detect an IO conflict in one +of four ways after installing and or configuring the CS8900/20-based adapter: + + 1. The system does not boot properly (or at all). + + 2. The driver cannot communicate with the adapter, reporting an "Adapter + not found" error message. + + 3. You cannot connect to the network or the driver will not load. + + 4. If you have configured the adapter to run in memory mode but the driver + reports it is using IO mode when loading, this is an indication of a + memory address conflict. + +If an IO conflict occurs, run the CS8900/20 Setup Utility and perform a +diagnostic self-test. Normally, the ISA resource in conflict will fail the +self-test. If so, reconfigure the adapter selecting another choice for the +resource in conflict. Run the diagnostics again to check for further IO +conflicts. + +In some cases, such as when the PC will not boot, it may be necessary to remove +the adapter and reconfigure it by installing it in another PC to run the +CS8900/20 Setup Utility. Once reinstalled in the target system, run the +diagnostics self-test to ensure the new configuration is free of conflicts +before loading the driver again. + +When manually configuring the adapter, keep in mind the typical ISA system +resource usage as indicated in the tables below. + +:: + + I/O Address Device IRQ Device + ----------- -------- --- -------- + 200-20F Game I/O adapter 3 COM2, Bus Mouse + 230-23F Bus Mouse 4 COM1 + 270-27F LPT3: third parallel port 5 LPT2 + 2F0-2FF COM2: second serial port 6 Floppy Disk controller + 320-32F Fixed disk controller 7 LPT1 + 8 Real-time Clock + 9 EGA/VGA display adapter + 12 Mouse (PS/2) + Memory Address Device 13 Math Coprocessor + -------------- --------------------- 14 Hard Disk controller + A000-BFFF EGA Graphics Adapter + A000-C7FF VGA Graphics Adapter + B000-BFFF Mono Graphics Adapter + B800-BFFF Color Graphics Adapter + E000-FFFF AT BIOS + + + + +6. Technical Support +==================== + +6.1. Contacting Cirrus Logic's Technical Support +------------------------------------------------ + +Cirrus Logic's CS89XX Technical Application Support can be reached at:: + + Telephone :(800) 888-5016 (from inside U.S. and Canada) + :(512) 442-7555 (from outside the U.S. and Canada) + Fax :(512) 912-3871 + Email :ethernet@crystal.cirrus.com + WWW :http://www.cirrus.com + + +6.2. Information Required before Contacting Technical Support +------------------------------------------------------------- + +Before contacting Cirrus Logic for technical support, be prepared to provide as +Much of the following information as possible. + +1.) Adapter type (CRD8900, CDB8900, CDB8920, etc.) + +2.) Adapter configuration + + * IO Base, Memory Base, IO or memory mode enabled, IRQ, DMA channel + * Plug and Play enabled/disabled (CS8920-based adapters only) + * Configured for media auto-detect or specific media type (which type). + +3.) PC System's Configuration + + * Plug and Play system (yes/no) + * BIOS (make and version) + * System make and model + * CPU (type and speed) + * System RAM + * SCSI Adapter + +4.) Software + + * CS89XX driver and version + * Your network operating system and version + * Your system's OS version + * Version of all protocol support files + +5.) Any Error Message displayed. + + + +6.3 Obtaining the Latest Driver Version +--------------------------------------- + +You can obtain the latest CS89XX drivers and support software from Cirrus Logic's +Web site. You can also contact Cirrus Logic's Technical Support (email: +ethernet@crystal.cirrus.com) and request that you be registered for automatic +software-update notification. + +Cirrus Logic maintains a web page at http://www.cirrus.com with the +latest drivers and technical publications. + + +6.4. Current maintainer +----------------------- + +In February 2000 the maintenance of this driver was assumed by Andrew +Morton. + +6.5 Kernel module parameters +---------------------------- + +For use in embedded environments with no cs89x0 EEPROM, the kernel boot +parameter ``cs89x0_media=`` has been implemented. Usage is:: + + cs89x0_media=rj45 or + cs89x0_media=aui or + cs89x0_media=bnc diff --git a/Documentation/networking/device_drivers/ethernet/davicom/dm9000.rst b/Documentation/networking/device_drivers/ethernet/davicom/dm9000.rst new file mode 100644 index 000000000..d5458da01 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/davicom/dm9000.rst @@ -0,0 +1,171 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================== +DM9000 Network driver +===================== + +Copyright 2008 Simtec Electronics, + + Ben Dooks + + +Introduction +------------ + +This file describes how to use the DM9000 platform-device based network driver +that is contained in the files drivers/net/dm9000.c and drivers/net/dm9000.h. + +The driver supports three DM9000 variants, the DM9000E which is the first chip +supported as well as the newer DM9000A and DM9000B devices. It is currently +maintained and tested by Ben Dooks, who should be CC: to any patches for this +driver. + + +Defining the platform device +---------------------------- + +The minimum set of resources attached to the platform device are as follows: + + 1) The physical address of the address register + 2) The physical address of the data register + 3) The IRQ line the device's interrupt pin is connected to. + +These resources should be specified in that order, as the ordering of the +two address regions is important (the driver expects these to be address +and then data). + +An example from arch/arm/mach-s3c2410/mach-bast.c is:: + + static struct resource bast_dm9k_resource[] = { + [0] = { + .start = S3C2410_CS5 + BAST_PA_DM9000, + .end = S3C2410_CS5 + BAST_PA_DM9000 + 3, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = S3C2410_CS5 + BAST_PA_DM9000 + 0x40, + .end = S3C2410_CS5 + BAST_PA_DM9000 + 0x40 + 0x3f, + .flags = IORESOURCE_MEM, + }, + [2] = { + .start = IRQ_DM9000, + .end = IRQ_DM9000, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, + } + }; + + static struct platform_device bast_device_dm9k = { + .name = "dm9000", + .id = 0, + .num_resources = ARRAY_SIZE(bast_dm9k_resource), + .resource = bast_dm9k_resource, + }; + +Note the setting of the IRQ trigger flag in bast_dm9k_resource[2].flags, +as this will generate a warning if it is not present. The trigger from +the flags field will be passed to request_irq() when registering the IRQ +handler to ensure that the IRQ is setup correctly. + +This shows a typical platform device, without the optional configuration +platform data supplied. The next example uses the same resources, but adds +the optional platform data to pass extra configuration data:: + + static struct dm9000_plat_data bast_dm9k_platdata = { + .flags = DM9000_PLATF_16BITONLY, + }; + + static struct platform_device bast_device_dm9k = { + .name = "dm9000", + .id = 0, + .num_resources = ARRAY_SIZE(bast_dm9k_resource), + .resource = bast_dm9k_resource, + .dev = { + .platform_data = &bast_dm9k_platdata, + } + }; + +The platform data is defined in include/linux/dm9000.h and described below. + + +Platform data +------------- + +Extra platform data for the DM9000 can describe the IO bus width to the +device, whether or not an external PHY is attached to the device and +the availability of an external configuration EEPROM. + +The flags for the platform data .flags field are as follows: + +DM9000_PLATF_8BITONLY + + The IO should be done with 8bit operations. + +DM9000_PLATF_16BITONLY + + The IO should be done with 16bit operations. + +DM9000_PLATF_32BITONLY + + The IO should be done with 32bit operations. + +DM9000_PLATF_EXT_PHY + + The chip is connected to an external PHY. + +DM9000_PLATF_NO_EEPROM + + This can be used to signify that the board does not have an + EEPROM, or that the EEPROM should be hidden from the user. + +DM9000_PLATF_SIMPLE_PHY + + Switch to using the simpler PHY polling method which does not + try and read the MII PHY state regularly. This is only available + when using the internal PHY. See the section on link state polling + for more information. + + The config symbol DM9000_FORCE_SIMPLE_PHY_POLL, Kconfig entry + "Force simple NSR based PHY polling" allows this flag to be + forced on at build time. + + +PHY Link state polling +---------------------- + +The driver keeps track of the link state and informs the network core +about link (carrier) availability. This is managed by several methods +depending on the version of the chip and on which PHY is being used. + +For the internal PHY, the original (and currently default) method is +to read the MII state, either when the status changes if we have the +necessary interrupt support in the chip or every two seconds via a +periodic timer. + +To reduce the overhead for the internal PHY, there is now the option +of using the DM9000_FORCE_SIMPLE_PHY_POLL config, or DM9000_PLATF_SIMPLE_PHY +platform data option to read the summary information without the +expensive MII accesses. This method is faster, but does not print +as much information. + +When using an external PHY, the driver currently has to poll the MII +link status as there is no method for getting an interrupt on link change. + + +DM9000A / DM9000B +----------------- + +These chips are functionally similar to the DM9000E and are supported easily +by the same driver. The features are: + + 1) Interrupt on internal PHY state change. This means that the periodic + polling of the PHY status may be disabled on these devices when using + the internal PHY. + + 2) TCP/UDP checksum offloading, which the driver does not currently support. + + +ethtool +------- + +The driver supports the ethtool interface for access to the driver +state information, the PHY state and the EEPROM. diff --git a/Documentation/networking/device_drivers/ethernet/dec/de4x5.rst b/Documentation/networking/device_drivers/ethernet/dec/de4x5.rst new file mode 100644 index 000000000..e03e9c631 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/dec/de4x5.rst @@ -0,0 +1,189 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================================== +DEC EtherWORKS Ethernet De4x5 cards +=================================== + + Originally, this driver was written for the Digital Equipment + Corporation series of EtherWORKS Ethernet cards: + + - DE425 TP/COAX EISA + - DE434 TP PCI + - DE435 TP/COAX/AUI PCI + - DE450 TP/COAX/AUI PCI + - DE500 10/100 PCI Fasternet + + but it will now attempt to support all cards which conform to the + Digital Semiconductor SROM Specification. The driver currently + recognises the following chips: + + - DC21040 (no SROM) + - DC21041[A] + - DC21140[A] + - DC21142 + - DC21143 + + So far the driver is known to work with the following cards: + + - KINGSTON + - Linksys + - ZNYX342 + - SMC8432 + - SMC9332 (w/new SROM) + - ZNYX31[45] + - ZNYX346 10/100 4 port (can act as a 10/100 bridge!) + + The driver has been tested on a relatively busy network using the DE425, + DE434, DE435 and DE500 cards and benchmarked with 'ttcp': it transferred + 16M of data to a DECstation 5000/200 as follows:: + + TCP UDP + TX RX TX RX + DE425 1030k 997k 1170k 1128k + DE434 1063k 995k 1170k 1125k + DE435 1063k 995k 1170k 1125k + DE500 1063k 998k 1170k 1125k in 10Mb/s mode + + All values are typical (in kBytes/sec) from a sample of 4 for each + measurement. Their error is +/-20k on a quiet (private) network and also + depend on what load the CPU has. + +---------------------------------------------------------------------------- + + The ability to load this driver as a loadable module has been included + and used extensively during the driver development (to save those long + reboot sequences). Loadable module support under PCI and EISA has been + achieved by letting the driver autoprobe as if it were compiled into the + kernel. Do make sure you're not sharing interrupts with anything that + cannot accommodate interrupt sharing! + + To utilise this ability, you have to do 8 things: + + 0) have a copy of the loadable modules code installed on your system. + 1) copy de4x5.c from the /linux/drivers/net directory to your favourite + temporary directory. + 2) for fixed autoprobes (not recommended), edit the source code near + line 5594 to reflect the I/O address you're using, or assign these when + loading by:: + + insmod de4x5 io=0xghh where g = bus number + hh = device number + + .. note:: + + autoprobing for modules is now supported by default. You may just + use:: + + insmod de4x5 + + to load all available boards. For a specific board, still use + the 'io=?' above. + 3) compile de4x5.c, but include -DMODULE in the command line to ensure + that the correct bits are compiled (see end of source code). + 4) if you are wanting to add a new card, goto 5. Otherwise, recompile a + kernel with the de4x5 configuration turned off and reboot. + 5) insmod de4x5 [io=0xghh] + 6) run the net startup bits for your new eth?? interface(s) manually + (usually /etc/rc.inet[12] at boot time). + 7) enjoy! + + To unload a module, turn off the associated interface(s) + 'ifconfig eth?? down' then 'rmmod de4x5'. + + Automedia detection is included so that in principle you can disconnect + from, e.g. TP, reconnect to BNC and things will still work (after a + pause while the driver figures out where its media went). My tests + using ping showed that it appears to work.... + + By default, the driver will now autodetect any DECchip based card. + Should you have a need to restrict the driver to DIGITAL only cards, you + can compile with a DEC_ONLY define, or if loading as a module, use the + 'dec_only=1' parameter. + + I've changed the timing routines to use the kernel timer and scheduling + functions so that the hangs and other assorted problems that occurred + while autosensing the media should be gone. A bonus for the DC21040 + auto media sense algorithm is that it can now use one that is more in + line with the rest (the DC21040 chip doesn't have a hardware timer). + The downside is the 1 'jiffies' (10ms) resolution. + + IEEE 802.3u MII interface code has been added in anticipation that some + products may use it in the future. + + The SMC9332 card has a non-compliant SROM which needs fixing - I have + patched this driver to detect it because the SROM format used complies + to a previous DEC-STD format. + + I have removed the buffer copies needed for receive on Intels. I cannot + remove them for Alphas since the Tulip hardware only does longword + aligned DMA transfers and the Alphas get alignment traps with non + longword aligned data copies (which makes them really slow). No comment. + + I have added SROM decoding routines to make this driver work with any + card that supports the Digital Semiconductor SROM spec. This will help + all cards running the dc2114x series chips in particular. Cards using + the dc2104x chips should run correctly with the basic driver. I'm in + debt to for the testing and feedback that helped get + this feature working. So far we have tested KINGSTON, SMC8432, SMC9332 + (with the latest SROM complying with the SROM spec V3: their first was + broken), ZNYX342 and LinkSys. ZNYX314 (dual 21041 MAC) and ZNYX 315 + (quad 21041 MAC) cards also appear to work despite their incorrectly + wired IRQs. + + I have added a temporary fix for interrupt problems when some SCSI cards + share the same interrupt as the DECchip based cards. The problem occurs + because the SCSI card wants to grab the interrupt as a fast interrupt + (runs the service routine with interrupts turned off) vs. this card + which really needs to run the service routine with interrupts turned on. + This driver will now add the interrupt service routine as a fast + interrupt if it is bounced from the slow interrupt. THIS IS NOT A + RECOMMENDED WAY TO RUN THE DRIVER and has been done for a limited time + until people sort out their compatibility issues and the kernel + interrupt service code is fixed. YOU SHOULD SEPARATE OUT THE FAST + INTERRUPT CARDS FROM THE SLOW INTERRUPT CARDS to ensure that they do not + run on the same interrupt. PCMCIA/CardBus is another can of worms... + + Finally, I think I have really fixed the module loading problem with + more than one DECchip based card. As a side effect, I don't mess with + the device structure any more which means that if more than 1 card in + 2.0.x is installed (4 in 2.1.x), the user will have to edit + linux/drivers/net/Space.c to make room for them. Hence, module loading + is the preferred way to use this driver, since it doesn't have this + limitation. + + Where SROM media detection is used and full duplex is specified in the + SROM, the feature is ignored unless lp->params.fdx is set at compile + time OR during a module load (insmod de4x5 args='eth??:fdx' [see + below]). This is because there is no way to automatically detect full + duplex links except through autonegotiation. When I include the + autonegotiation feature in the SROM autoconf code, this detection will + occur automatically for that case. + + Command line arguments are now allowed, similar to passing arguments + through LILO. This will allow a per adapter board set up of full duplex + and media. The only lexical constraints are: the board name (dev->name) + appears in the list before its parameters. The list of parameters ends + either at the end of the parameter list or with another board name. The + following parameters are allowed: + + ========= =============================================== + fdx for full duplex + autosense to set the media/speed; with the following + sub-parameters: + TP, TP_NW, BNC, AUI, BNC_AUI, 100Mb, 10Mb, AUTO + ========= =============================================== + + Case sensitivity is important for the sub-parameters. They *must* be + upper case. Examples:: + + insmod de4x5 args='eth1:fdx autosense=BNC eth0:autosense=100Mb'. + + For a compiled in driver, in linux/drivers/net/CONFIG, place e.g.:: + + DE4X5_OPTS = -DDE4X5_PARM='"eth0:fdx autosense=AUI eth2:autosense=TP"' + + Yes, I know full duplex isn't permissible on BNC or AUI; they're just + examples. By default, full duplex is turned off and AUTO is the default + autosense setting. In reality, I expect only the full duplex option to + be used. Note the use of single quotes in the two examples above and the + lack of commas to separate items. diff --git a/Documentation/networking/device_drivers/ethernet/dec/dmfe.rst b/Documentation/networking/device_drivers/ethernet/dec/dmfe.rst new file mode 100644 index 000000000..c4cf809ca --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/dec/dmfe.rst @@ -0,0 +1,71 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================================== +Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver for Linux +============================================================== + +Note: This driver doesn't have a maintainer. + + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + + +This driver provides kernel support for Davicom DM9102(A)/DM9132/DM9801 ethernet cards ( CNET +10/100 ethernet cards uses Davicom chipset too, so this driver supports CNET cards too ).If you +didn't compile this driver as a module, it will automatically load itself on boot and print a +line similar to:: + + dmfe: Davicom DM9xxx net driver, version 1.36.4 (2002-01-17) + +If you compiled this driver as a module, you have to load it on boot.You can load it with command:: + + insmod dmfe + +This way it will autodetect the device mode.This is the suggested way to load the module.Or you can pass +a mode= setting to module while loading, like:: + + insmod dmfe mode=0 # Force 10M Half Duplex + insmod dmfe mode=1 # Force 100M Half Duplex + insmod dmfe mode=4 # Force 10M Full Duplex + insmod dmfe mode=5 # Force 100M Full Duplex + +Next you should configure your network interface with a command similar to:: + + ifconfig eth0 172.22.3.18 + ^^^^^^^^^^^ + Your IP Address + +Then you may have to modify the default routing table with command:: + + route add default eth0 + + +Now your ethernet card should be up and running. + + +TODO: + +- Implement pci_driver::suspend() and pci_driver::resume() power management methods. +- Check on 64 bit boxes. +- Check and fix on big endian boxes. +- Test and make sure PCI latency is now correct for all cases. + + +Authors: + +Sten Wang : Original Author + +Contributors: + +- Marcelo Tosatti +- Alan Cox +- Jeff Garzik +- Vojtech Pavlik diff --git a/Documentation/networking/device_drivers/ethernet/dlink/dl2k.rst b/Documentation/networking/device_drivers/ethernet/dlink/dl2k.rst new file mode 100644 index 000000000..ccdb5d0d7 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/dlink/dl2k.rst @@ -0,0 +1,314 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================================================= +D-Link DL2000-based Gigabit Ethernet Adapter Installation +========================================================= + +May 23, 2002 + +.. Contents + + - Compatibility List + - Quick Install + - Compiling the Driver + - Installing the Driver + - Option parameter + - Configuration Script Sample + - Troubleshooting + + +Compatibility List +================== + +Adapter Support: + +- D-Link DGE-550T Gigabit Ethernet Adapter. +- D-Link DGE-550SX Gigabit Ethernet Adapter. +- D-Link DL2000-based Gigabit Ethernet Adapter. + + +The driver support Linux kernel 2.4.7 later. We had tested it +on the environments below. + + . Red Hat v6.2 (update kernel to 2.4.7) + . Red Hat v7.0 (update kernel to 2.4.7) + . Red Hat v7.1 (kernel 2.4.7) + . Red Hat v7.2 (kernel 2.4.7-10) + + +Quick Install +============= +Install linux driver as following command:: + + 1. make all + 2. insmod dl2k.ko + 3. ifconfig eth0 up 10.xxx.xxx.xxx netmask 255.0.0.0 + ^^^^^^^^^^^^^^^\ ^^^^^^^^\ + IP NETMASK + +Now eth0 should active, you can test it by "ping" or get more information by +"ifconfig". If tested ok, continue the next step. + +4. ``cp dl2k.ko /lib/modules/`uname -r`/kernel/drivers/net`` +5. Add the following line to /etc/modprobe.d/dl2k.conf:: + + alias eth0 dl2k + +6. Run ``depmod`` to updated module indexes. +7. Run ``netconfig`` or ``netconf`` to create configuration script ifcfg-eth0 + located at /etc/sysconfig/network-scripts or create it manually. + + [see - Configuration Script Sample] +8. Driver will automatically load and configure at next boot time. + +Compiling the Driver +==================== +In Linux, NIC drivers are most commonly configured as loadable modules. +The approach of building a monolithic kernel has become obsolete. The driver +can be compiled as part of a monolithic kernel, but is strongly discouraged. +The remainder of this section assumes the driver is built as a loadable module. +In the Linux environment, it is a good idea to rebuild the driver from the +source instead of relying on a precompiled version. This approach provides +better reliability since a precompiled driver might depend on libraries or +kernel features that are not present in a given Linux installation. + +The 3 files necessary to build Linux device driver are dl2k.c, dl2k.h and +Makefile. To compile, the Linux installation must include the gcc compiler, +the kernel source, and the kernel headers. The Linux driver supports Linux +Kernels 2.4.7. Copy the files to a directory and enter the following command +to compile and link the driver: + +CD-ROM drive +------------ + +:: + + [root@XXX /] mkdir cdrom + [root@XXX /] mount -r -t iso9660 -o conv=auto /dev/cdrom /cdrom + [root@XXX /] cd root + [root@XXX /root] mkdir dl2k + [root@XXX /root] cd dl2k + [root@XXX dl2k] cp /cdrom/linux/dl2k.tgz /root/dl2k + [root@XXX dl2k] tar xfvz dl2k.tgz + [root@XXX dl2k] make all + +Floppy disc drive +----------------- + +:: + + [root@XXX /] cd root + [root@XXX /root] mkdir dl2k + [root@XXX /root] cd dl2k + [root@XXX dl2k] mcopy a:/linux/dl2k.tgz /root/dl2k + [root@XXX dl2k] tar xfvz dl2k.tgz + [root@XXX dl2k] make all + +Installing the Driver +===================== + +Manual Installation +------------------- + + Once the driver has been compiled, it must be loaded, enabled, and bound + to a protocol stack in order to establish network connectivity. To load a + module enter the command:: + + insmod dl2k.o + + or:: + + insmod dl2k.o ; add parameter + +--------------------------------------------------------- + + example:: + + insmod dl2k.o media=100mbps_hd + + or:: + + insmod dl2k.o media=3 + + or:: + + insmod dl2k.o media=3,2 ; for 2 cards + +--------------------------------------------------------- + + Please reference the list of the command line parameters supported by + the Linux device driver below. + + The insmod command only loads the driver and gives it a name of the form + eth0, eth1, etc. To bring the NIC into an operational state, + it is necessary to issue the following command:: + + ifconfig eth0 up + + Finally, to bind the driver to the active protocol (e.g., TCP/IP with + Linux), enter the following command:: + + ifup eth0 + + Note that this is meaningful only if the system can find a configuration + script that contains the necessary network information. A sample will be + given in the next paragraph. + + The commands to unload a driver are as follows:: + + ifdown eth0 + ifconfig eth0 down + rmmod dl2k.o + + The following are the commands to list the currently loaded modules and + to see the current network configuration:: + + lsmod + ifconfig + + +Automated Installation +---------------------- + This section describes how to install the driver such that it is + automatically loaded and configured at boot time. The following description + is based on a Red Hat 6.0/7.0 distribution, but it can easily be ported to + other distributions as well. + +Red Hat v6.x/v7.x +----------------- + 1. Copy dl2k.o to the network modules directory, typically + /lib/modules/2.x.x-xx/net or /lib/modules/2.x.x/kernel/drivers/net. + 2. Locate the boot module configuration file, most commonly in the + /etc/modprobe.d/ directory. Add the following lines:: + + alias ethx dl2k + options dl2k + + where ethx will be eth0 if the NIC is the only ethernet adapter, eth1 if + one other ethernet adapter is installed, etc. Refer to the table in the + previous section for the list of optional parameters. + 3. Locate the network configuration scripts, normally the + /etc/sysconfig/network-scripts directory, and create a configuration + script named ifcfg-ethx that contains network information. + 4. Note that for most Linux distributions, Red Hat included, a configuration + utility with a graphical user interface is provided to perform steps 2 + and 3 above. + + +Parameter Description +===================== +You can install this driver without any additional parameter. However, if you +are going to have extensive functions then it is necessary to set extra +parameter. Below is a list of the command line parameters supported by the +Linux device +driver. + + +=============================== ============================================== +mtu=packet_size Specifies the maximum packet size. default + is 1500. + +media=media_type Specifies the media type the NIC operates at. + autosense Autosensing active media. + + =========== ========================= + 10mbps_hd 10Mbps half duplex. + 10mbps_fd 10Mbps full duplex. + 100mbps_hd 100Mbps half duplex. + 100mbps_fd 100Mbps full duplex. + 1000mbps_fd 1000Mbps full duplex. + 1000mbps_hd 1000Mbps half duplex. + 0 Autosensing active media. + 1 10Mbps half duplex. + 2 10Mbps full duplex. + 3 100Mbps half duplex. + 4 100Mbps full duplex. + 5 1000Mbps half duplex. + 6 1000Mbps full duplex. + =========== ========================= + + By default, the NIC operates at autosense. + 1000mbps_fd and 1000mbps_hd types are only + available for fiber adapter. + +vlan=n Specifies the VLAN ID. If vlan=0, the + Virtual Local Area Network (VLAN) function is + disable. + +jumbo=[0|1] Specifies the jumbo frame support. If jumbo=1, + the NIC accept jumbo frames. By default, this + function is disabled. + Jumbo frame usually improve the performance + int gigabit. + This feature need jumbo frame compatible + remote. + +rx_coalesce=m Number of rx frame handled each interrupt. +rx_timeout=n Rx DMA wait time for an interrupt. + If set rx_coalesce > 0, hardware only assert + an interrupt for m frames. Hardware won't + assert rx interrupt until m frames received or + reach timeout of n * 640 nano seconds. + Set proper rx_coalesce and rx_timeout can + reduce congestion collapse and overload which + has been a bottleneck for high speed network. + + For example, rx_coalesce=10 rx_timeout=800. + that is, hardware assert only 1 interrupt + for 10 frames received or timeout of 512 us. + +tx_coalesce=n Number of tx frame handled each interrupt. + Set n > 1 can reduce the interrupts + congestion usually lower performance of + high speed network card. Default is 16. + +tx_flow=[1|0] Specifies the Tx flow control. If tx_flow=0, + the Tx flow control disable else driver + autodetect. +rx_flow=[1|0] Specifies the Rx flow control. If rx_flow=0, + the Rx flow control enable else driver + autodetect. +=============================== ============================================== + + +Configuration Script Sample +=========================== +Here is a sample of a simple configuration script:: + + DEVICE=eth0 + USERCTL=no + ONBOOT=yes + POOTPROTO=none + BROADCAST=207.200.5.255 + NETWORK=207.200.5.0 + NETMASK=255.255.255.0 + IPADDR=207.200.5.2 + + +Troubleshooting +=============== +Q1. Source files contain ^ M behind every line. + + Make sure all files are Unix file format (no LF). Try the following + shell command to convert files:: + + cat dl2k.c | col -b > dl2k.tmp + mv dl2k.tmp dl2k.c + + OR:: + + cat dl2k.c | tr -d "\r" > dl2k.tmp + mv dl2k.tmp dl2k.c + +Q2: Could not find header files (``*.h``)? + + To compile the driver, you need kernel header files. After + installing the kernel source, the header files are usually located in + /usr/src/linux/include, which is the default include directory configured + in Makefile. For some distributions, there is a copy of header files in + /usr/src/include/linux and /usr/src/include/asm, that you can change the + INCLUDEDIR in Makefile to /usr/include without installing kernel source. + + Note that RH 7.0 didn't provide correct header files in /usr/include, + including those files will make a wrong version driver. + diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa.rst new file mode 100644 index 000000000..241c6c6f6 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa.rst @@ -0,0 +1,269 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================== +The QorIQ DPAA Ethernet Driver +============================== + +Authors: +- Madalin Bucur +- Camelia Groza + +.. Contents + + - DPAA Ethernet Overview + - DPAA Ethernet Supported SoCs + - Configuring DPAA Ethernet in your kernel + - DPAA Ethernet Frame Processing + - DPAA Ethernet Features + - DPAA IRQ Affinity and Receive Side Scaling + - Debugging + +DPAA Ethernet Overview +====================== + +DPAA stands for Data Path Acceleration Architecture and it is a +set of networking acceleration IPs that are available on several +generations of SoCs, both on PowerPC and ARM64. + +The Freescale DPAA architecture consists of a series of hardware blocks +that support Ethernet connectivity. The Ethernet driver depends upon the +following drivers in the Linux kernel: + + - Peripheral Access Memory Unit (PAMU) (* needed only for PPC platforms) + drivers/iommu/fsl_* + - Frame Manager (FMan) + drivers/net/ethernet/freescale/fman + - Queue Manager (QMan), Buffer Manager (BMan) + drivers/soc/fsl/qbman + +A simplified view of the dpaa_eth interfaces mapped to FMan MACs:: + + dpaa_eth /eth0\ ... /ethN\ + driver | | | | + ------------- ---- ----------- ---- ------------- + -Ports / Tx Rx \ ... / Tx Rx \ + FMan | | | | + -MACs | MAC0 | | MACN | + / dtsec0 \ ... / dtsecN \ (or tgec) + / \ / \(or memac) + --------- -------------- --- -------------- --------- + FMan, FMan Port, FMan SP, FMan MURAM drivers + --------------------------------------------------------- + FMan HW blocks: MURAM, MACs, Ports, SP + --------------------------------------------------------- + +The dpaa_eth relation to the QMan, BMan and FMan:: + + ________________________________ + dpaa_eth / eth0 \ + driver / \ + --------- -^- -^- -^- --- --------- + QMan driver / \ / \ / \ \ / | BMan | + |Rx | |Rx | |Tx | |Tx | | driver | + --------- |Dfl| |Err| |Cnf| |FQs| | | + QMan HW |FQ | |FQ | |FQs| | | | | + / \ / \ / \ \ / | | + --------- --- --- --- -v- --------- + | FMan QMI | | + | FMan HW FMan BMI | BMan HW | + ----------------------- -------- + +where the acronyms used above (and in the code) are: + +=============== =========================================================== +DPAA Data Path Acceleration Architecture +FMan DPAA Frame Manager +QMan DPAA Queue Manager +BMan DPAA Buffers Manager +QMI QMan interface in FMan +BMI BMan interface in FMan +FMan SP FMan Storage Profiles +MURAM Multi-user RAM in FMan +FQ QMan Frame Queue +Rx Dfl FQ default reception FQ +Rx Err FQ Rx error frames FQ +Tx Cnf FQ Tx confirmation FQs +Tx FQs transmission frame queues +dtsec datapath three speed Ethernet controller (10/100/1000 Mbps) +tgec ten gigabit Ethernet controller (10 Gbps) +memac multirate Ethernet MAC (10/100/1000/10000) +=============== =========================================================== + +DPAA Ethernet Supported SoCs +============================ + +The DPAA drivers enable the Ethernet controllers present on the following SoCs: + +PPC +- P1023 +- P2041 +- P3041 +- P4080 +- P5020 +- P5040 +- T1023 +- T1024 +- T1040 +- T1042 +- T2080 +- T4240 +- B4860 + +ARM +- LS1043A +- LS1046A + +Configuring DPAA Ethernet in your kernel +======================================== + +To enable the DPAA Ethernet driver, the following Kconfig options are required:: + + # common for arch/arm64 and arch/powerpc platforms + CONFIG_FSL_DPAA=y + CONFIG_FSL_FMAN=y + CONFIG_FSL_DPAA_ETH=y + CONFIG_FSL_XGMAC_MDIO=y + + # for arch/powerpc only + CONFIG_FSL_PAMU=y + + # common options needed for the PHYs used on the RDBs + CONFIG_VITESSE_PHY=y + CONFIG_REALTEK_PHY=y + CONFIG_AQUANTIA_PHY=y + +DPAA Ethernet Frame Processing +============================== + +On Rx, buffers for the incoming frames are retrieved from the buffers found +in the dedicated interface buffer pool. The driver initializes and seeds these +with one page buffers. + +On Tx, all transmitted frames are returned to the driver through Tx +confirmation frame queues. The driver is then responsible for freeing the +buffers. In order to do this properly, a backpointer is added to the buffer +before transmission that points to the skb. When the buffer returns to the +driver on a confirmation FQ, the skb can be correctly consumed. + +DPAA Ethernet Features +====================== + +Currently the DPAA Ethernet driver enables the basic features required for +a Linux Ethernet driver. The support for advanced features will be added +gradually. + +The driver has Rx and Tx checksum offloading for UDP and TCP. Currently the Rx +checksum offload feature is enabled by default and cannot be controlled through +ethtool. Also, rx-flow-hash and rx-hashing was added. The addition of RSS +provides a big performance boost for the forwarding scenarios, allowing +different traffic flows received by one interface to be processed by different +CPUs in parallel. + +The driver has support for multiple prioritized Tx traffic classes. Priorities +range from 0 (lowest) to 3 (highest). These are mapped to HW workqueues with +strict priority levels. Each traffic class contains NR_CPU TX queues. By +default, only one traffic class is enabled and the lowest priority Tx queues +are used. Higher priority traffic classes can be enabled with the mqprio +qdisc. For example, all four traffic classes are enabled on an interface with +the following command. Furthermore, skb priority levels are mapped to traffic +classes as follows: + + * priorities 0 to 3 - traffic class 0 (low priority) + * priorities 4 to 7 - traffic class 1 (medium-low priority) + * priorities 8 to 11 - traffic class 2 (medium-high priority) + * priorities 12 to 15 - traffic class 3 (high priority) + +:: + + tc qdisc add dev root handle 1: \ + mqprio num_tc 4 map 0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 hw 1 + +DPAA IRQ Affinity and Receive Side Scaling +========================================== + +Traffic coming on the DPAA Rx queues or on the DPAA Tx confirmation +queues is seen by the CPU as ingress traffic on a certain portal. +The DPAA QMan portal interrupts are affined each to a certain CPU. +The same portal interrupt services all the QMan portal consumers. + +By default the DPAA Ethernet driver enables RSS, making use of the +DPAA FMan Parser and Keygen blocks to distribute traffic on 128 +hardware frame queues using a hash on IP v4/v6 source and destination +and L4 source and destination ports, in present in the received frame. +When RSS is disabled, all traffic received by a certain interface is +received on the default Rx frame queue. The default DPAA Rx frame +queues are configured to put the received traffic into a pool channel +that allows any available CPU portal to dequeue the ingress traffic. +The default frame queues have the HOLDACTIVE option set, ensuring that +traffic bursts from a certain queue are serviced by the same CPU. +This ensures a very low rate of frame reordering. A drawback of this +is that only one CPU at a time can service the traffic received by a +certain interface when RSS is not enabled. + +To implement RSS, the DPAA Ethernet driver allocates an extra set of +128 Rx frame queues that are configured to dedicated channels, in a +round-robin manner. The mapping of the frame queues to CPUs is now +hardcoded, there is no indirection table to move traffic for a certain +FQ (hash result) to another CPU. The ingress traffic arriving on one +of these frame queues will arrive at the same portal and will always +be processed by the same CPU. This ensures intra-flow order preservation +and workload distribution for multiple traffic flows. + +RSS can be turned off for a certain interface using ethtool, i.e.:: + + # ethtool -N fm1-mac9 rx-flow-hash tcp4 "" + +To turn it back on, one needs to set rx-flow-hash for tcp4/6 or udp4/6:: + + # ethtool -N fm1-mac9 rx-flow-hash udp4 sfdn + +There is no independent control for individual protocols, any command +run for one of tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6 is +going to control the rx-flow-hashing for all protocols on that interface. + +Besides using the FMan Keygen computed hash for spreading traffic on the +128 Rx FQs, the DPAA Ethernet driver also sets the skb hash value when +the NETIF_F_RXHASH feature is on (active by default). This can be turned +on or off through ethtool, i.e.:: + + # ethtool -K fm1-mac9 rx-hashing off + # ethtool -k fm1-mac9 | grep hash + receive-hashing: off + # ethtool -K fm1-mac9 rx-hashing on + Actual changes: + receive-hashing: on + # ethtool -k fm1-mac9 | grep hash + receive-hashing: on + +Please note that Rx hashing depends upon the rx-flow-hashing being on +for that interface - turning off rx-flow-hashing will also disable the +rx-hashing (without ethtool reporting it as off as that depends on the +NETIF_F_RXHASH feature flag). + +Debugging +========= + +The following statistics are exported for each interface through ethtool: + + - interrupt count per CPU + - Rx packets count per CPU + - Tx packets count per CPU + - Tx confirmed packets count per CPU + - Tx S/G frames count per CPU + - Tx error count per CPU + - Rx error count per CPU + - Rx error count per type + - congestion related statistics: + + - congestion status + - time spent in congestion + - number of time the device entered congestion + - dropped packets count per cause + +The driver also exports the following information in sysfs: + + - the FQ IDs for each FQ type + /sys/devices/platform/soc/.fman/.ethernet/dpaa-ethernet./net/fm-mac/fqids + + - the ID of the buffer pool in use + /sys/devices/platform/soc/.fman/.ethernet/dpaa-ethernet./net/fm-mac/bpids diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/dpio-driver.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/dpio-driver.rst new file mode 100644 index 000000000..c50fd4663 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/dpio-driver.rst @@ -0,0 +1,160 @@ +.. include:: + +DPAA2 DPIO (Data Path I/O) Overview +=================================== + +:Copyright: |copy| 2016-2018 NXP + +This document provides an overview of the Freescale DPAA2 DPIO +drivers + +Introduction +============ + +A DPAA2 DPIO (Data Path I/O) is a hardware object that provides +interfaces to enqueue and dequeue frames to/from network interfaces +and other accelerators. A DPIO also provides hardware buffer +pool management for network interfaces. + +This document provides an overview the Linux DPIO driver, its +subcomponents, and its APIs. + +See +Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst +for a general overview of DPAA2 and the general DPAA2 driver architecture +in Linux. + +Driver Overview +--------------- + +The DPIO driver is bound to DPIO objects discovered on the fsl-mc bus and +provides services that: + + A. allow other drivers, such as the Ethernet driver, to enqueue and dequeue + frames for their respective objects + B. allow drivers to register callbacks for data availability notifications + when data becomes available on a queue or channel + C. allow drivers to manage hardware buffer pools + +The Linux DPIO driver consists of 3 primary components-- + DPIO object driver-- fsl-mc driver that manages the DPIO object + + DPIO service-- provides APIs to other Linux drivers for services + + QBman portal interface-- sends portal commands, gets responses:: + + fsl-mc other + bus drivers + | | + +---+----+ +------+-----+ + |DPIO obj| |DPIO service| + | driver |---| (DPIO) | + +--------+ +------+-----+ + | + +------+-----+ + | QBman | + | portal i/f | + +------------+ + | + hardware + + +The diagram below shows how the DPIO driver components fit with the other +DPAA2 Linux driver components:: + + +------------+ + | OS Network | + | Stack | + +------------+ +------------+ + | Allocator |. . . . . . . | Ethernet | + |(DPMCP,DPBP)| | (DPNI) | + +-.----------+ +---+---+----+ + . . ^ | + . . | | dequeue> + +-------------+ . | | + | DPRC driver | . +--------+ +------------+ + | (DPRC) | . . |DPIO obj| |DPIO service| + +----------+--+ | driver |-| (DPIO) | + | +--------+ +------+-----+ + | +------|-----+ + | | QBman | + +----+--------------+ | portal i/f | + | MC-bus driver | +------------+ + | | | + | /soc/fsl-mc | | + +-------------------+ | + | + =========================================|=========|======================== + +-+--DPIO---|-----------+ + | | | + | QBman Portal | + +-----------------------+ + + ============================================================================ + + +DPIO Object Driver (dpio-driver.c) +---------------------------------- + + The dpio-driver component registers with the fsl-mc bus to handle objects of + type "dpio". The implementation of probe() handles basic initialization + of the DPIO including mapping of the DPIO regions (the QBman SW portal) + and initializing interrupts and registering irq handlers. The dpio-driver + registers the probed DPIO with dpio-service. + +DPIO service (dpio-service.c, dpaa2-io.h) +------------------------------------------ + + The dpio service component provides queuing, notification, and buffers + management services to DPAA2 drivers, such as the Ethernet driver. A system + will typically allocate 1 DPIO object per CPU to allow queuing operations + to happen simultaneously across all CPUs. + + Notification handling + dpaa2_io_service_register() + + dpaa2_io_service_deregister() + + dpaa2_io_service_rearm() + + Queuing + dpaa2_io_service_pull_fq() + + dpaa2_io_service_pull_channel() + + dpaa2_io_service_enqueue_fq() + + dpaa2_io_service_enqueue_qd() + + dpaa2_io_store_create() + + dpaa2_io_store_destroy() + + dpaa2_io_store_next() + + Buffer pool management + dpaa2_io_service_release() + + dpaa2_io_service_acquire() + +QBman portal interface (qbman-portal.c) +--------------------------------------- + + The qbman-portal component provides APIs to do the low level hardware + bit twiddling for operations such as: + + - initializing Qman software portals + - building and sending portal commands + - portal interrupt configuration and processing + + The qbman-portal APIs are not public to other drivers, and are + only used by dpio-service. + +Other (dpaa2-fd.h, dpaa2-global.h) +---------------------------------- + + Frame descriptor and scatter-gather definitions and the APIs used to + manipulate them are defined in dpaa2-fd.h. + + Dequeue result struct and parsing APIs are defined in dpaa2-global.h. diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/ethernet-driver.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/ethernet-driver.rst new file mode 100644 index 000000000..682f3986c --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/ethernet-driver.rst @@ -0,0 +1,186 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +=============================== +DPAA2 Ethernet driver +=============================== + +:Copyright: |copy| 2017-2018 NXP + +This file provides documentation for the Freescale DPAA2 Ethernet driver. + +Supported Platforms +=================== +This driver provides networking support for Freescale DPAA2 SoCs, e.g. +LS2080A, LS2088A, LS1088A. + + +Architecture Overview +===================== +Unlike regular NICs, in the DPAA2 architecture there is no single hardware block +representing network interfaces; instead, several separate hardware resources +concur to provide the networking functionality: + +- network interfaces +- queues, channels +- buffer pools +- MAC/PHY + +All hardware resources are allocated and configured through the Management +Complex (MC) portals. MC abstracts most of these resources as DPAA2 objects +and exposes ABIs through which they can be configured and controlled. A few +hardware resources, like queues, do not have a corresponding MC object and +are treated as internal resources of other objects. + +For a more detailed description of the DPAA2 architecture and its object +abstractions see +*Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst*. + +Each Linux net device is built on top of a Datapath Network Interface (DPNI) +object and uses Buffer Pools (DPBPs), I/O Portals (DPIOs) and Concentrators +(DPCONs). + +Configuration interface:: + + ----------------------- + | DPAA2 Ethernet Driver | + ----------------------- + . . . + . . . + . . . . . . . . . . . . + . . . + . . . + ---------- ---------- ----------- + | DPBP API | | DPNI API | | DPCON API | + ---------- ---------- ----------- + . . . software + ======= . ========== . ============ . =================== + . . . hardware + ------------------------------------------ + | MC hardware portals | + ------------------------------------------ + . . . + . . . + ------ ------ ------- + | DPBP | | DPNI | | DPCON | + ------ ------ ------- + +The DPNIs are network interfaces without a direct one-on-one mapping to PHYs. +DPBPs represent hardware buffer pools. Packet I/O is performed in the context +of DPCON objects, using DPIO portals for managing and communicating with the +hardware resources. + +Datapath (I/O) interface:: + + ----------------------------------------------- + | DPAA2 Ethernet Driver | + ----------------------------------------------- + | ^ ^ | | + | | | | | + enqueue| dequeue| data | dequeue| seed | + (Tx) | (Rx, TxC)| avail.| request| buffers| + | | notify| | | + | | | | | + V | | V V + ----------------------------------------------- + | DPIO Driver | + ----------------------------------------------- + | | | | | software + | | | | | ================ + | | | | | hardware + ----------------------------------------------- + | I/O hardware portals | + ----------------------------------------------- + | ^ ^ | | + | | | | | + | | | V | + V | ================ V + ---------------------- | ------------- + queues ---------------------- | | Buffer pool | + ---------------------- | ------------- + ======================= + Channel + +Datapath I/O (DPIO) portals provide enqueue and dequeue services, data +availability notifications and buffer pool management. DPIOs are shared between +all DPAA2 objects (and implicitly all DPAA2 kernel drivers) that work with data +frames, but must be affine to the CPUs for the purpose of traffic distribution. + +Frames are transmitted and received through hardware frame queues, which can be +grouped in channels for the purpose of hardware scheduling. The Ethernet driver +enqueues TX frames on egress queues and after transmission is complete a TX +confirmation frame is sent back to the CPU. + +When frames are available on ingress queues, a data availability notification +is sent to the CPU; notifications are raised per channel, so even if multiple +queues in the same channel have available frames, only one notification is sent. +After a channel fires a notification, is must be explicitly rearmed. + +Each network interface can have multiple Rx, Tx and confirmation queues affined +to CPUs, and one channel (DPCON) for each CPU that services at least one queue. +DPCONs are used to distribute ingress traffic to different CPUs via the cores' +affine DPIOs. + +The role of hardware buffer pools is storage of ingress frame data. Each network +interface has a privately owned buffer pool which it seeds with kernel allocated +buffers. + + +DPNIs are decoupled from PHYs; a DPNI can be connected to a PHY through a DPMAC +object or to another DPNI through an internal link, but the connection is +managed by MC and completely transparent to the Ethernet driver. + +:: + + --------- --------- --------- + | eth if1 | | eth if2 | | eth ifn | + --------- --------- --------- + . . . + . . . + . . . + --------------------------- + | DPAA2 Ethernet Driver | + --------------------------- + . . . + . . . + . . . + ------ ------ ------ ------- + | DPNI | | DPNI | | DPNI | | DPMAC |----+ + ------ ------ ------ ------- | + | | | | | + | | | | ----- + =========== ================== | PHY | + ----- + +Creating a Network Interface +============================ +A net device is created for each DPNI object probed on the MC bus. Each DPNI has +a number of properties which determine the network interface configuration +options and associated hardware resources. + +DPNI objects (and the other DPAA2 objects needed for a network interface) can be +added to a container on the MC bus in one of two ways: statically, through a +Datapath Layout Binary file (DPL) that is parsed by MC at boot time; or created +dynamically at runtime, via the DPAA2 objects APIs. + + +Features & Offloads +=================== +Hardware checksum offloading is supported for TCP and UDP over IPv4/6 frames. +The checksum offloads can be independently configured on RX and TX through +ethtool. + +Hardware offload of unicast and multicast MAC filtering is supported on the +ingress path and permanently enabled. + +Scatter-gather frames are supported on both RX and TX paths. On TX, SG support +is configurable via ethtool; on RX it is always enabled. + +The DPAA2 hardware can process jumbo Ethernet frames of up to 10K bytes. + +The Ethernet driver defines a static flow hashing scheme that distributes +traffic based on a 5-tuple key: src IP, dst IP, IP proto, L4 src port, +L4 dst port. No user configuration is supported for now. + +Hardware specific statistics for the network interface as well as some +non-standard driver stats can be consulted through ethtool -S option. diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/index.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/index.rst new file mode 100644 index 000000000..ee40fcc5d --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/index.rst @@ -0,0 +1,11 @@ +=================== +DPAA2 Documentation +=================== + +.. toctree:: + :maxdepth: 1 + + overview + dpio-driver + ethernet-driver + mac-phy-support diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/mac-phy-support.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/mac-phy-support.rst new file mode 100644 index 000000000..51e6624fb --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/mac-phy-support.rst @@ -0,0 +1,191 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +======================= +DPAA2 MAC / PHY support +======================= + +:Copyright: |copy| 2019 NXP + +Overview +-------- + +The DPAA2 MAC / PHY support consists of a set of APIs that help DPAA2 network +drivers (dpaa2-eth, dpaa2-ethsw) interract with the PHY library. + +DPAA2 Software Architecture +--------------------------- + +Among other DPAA2 objects, the fsl-mc bus exports DPNI objects (abstracting a +network interface) and DPMAC objects (abstracting a MAC). The dpaa2-eth driver +probes on the DPNI object and connects to and configures a DPMAC object with +the help of phylink. + +Data connections may be established between a DPNI and a DPMAC, or between two +DPNIs. Depending on the connection type, the netif_carrier_[on/off] is handled +directly by the dpaa2-eth driver or by phylink. + +.. code-block:: none + + Sources of abstracted link state information presented by the MC firmware + + +--------------------------------------+ + +------------+ +---------+ | xgmac_mdio | + | net_device | | phylink |--| +-----+ +-----+ +-----+ +-----+ | + +------------+ +---------+ | | PHY | | PHY | | PHY | | PHY | | + | | | +-----+ +-----+ +-----+ +-----+ | + +------------------------------------+ | External MDIO bus | + | dpaa2-eth | +--------------------------------------+ + +------------------------------------+ + | | Linux + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + | | MC firmware + | /| V + +----------+ / | +----------+ + | | / | | | + | | | | | | + | DPNI |<------| |<------| DPMAC | + | | | | | | + | | \ |<---+ | | + +----------+ \ | | +----------+ + \| | + | + +--------------------------------------+ + | MC firmware polling MAC PCS for link | + | +-----+ +-----+ +-----+ +-----+ | + | | PCS | | PCS | | PCS | | PCS | | + | +-----+ +-----+ +-----+ +-----+ | + | Internal MDIO bus | + +--------------------------------------+ + + +Depending on an MC firmware configuration setting, each MAC may be in one of two modes: + +- DPMAC_LINK_TYPE_FIXED: the link state management is handled exclusively by + the MC firmware by polling the MAC PCS. Without the need to register a + phylink instance, the dpaa2-eth driver will not bind to the connected dpmac + object at all. + +- DPMAC_LINK_TYPE_PHY: The MC firmware is left waiting for link state update + events, but those are in fact passed strictly between the dpaa2-mac (based on + phylink) and its attached net_device driver (dpaa2-eth, dpaa2-ethsw), + effectively bypassing the firmware. + +Implementation +-------------- + +At probe time or when a DPNI's endpoint is dynamically changed, the dpaa2-eth +is responsible to find out if the peer object is a DPMAC and if this is the +case, to integrate it with PHYLINK using the dpaa2_mac_connect() API, which +will do the following: + + - look up the device tree for PHYLINK-compatible of binding (phy-handle) + - will create a PHYLINK instance associated with the received net_device + - connect to the PHY using phylink_of_phy_connect() + +The following phylink_mac_ops callback are implemented: + + - .validate() will populate the supported linkmodes with the MAC capabilities + only when the phy_interface_t is RGMII_* (at the moment, this is the only + link type supported by the driver). + + - .mac_config() will configure the MAC in the new configuration using the + dpmac_set_link_state() MC firmware API. + + - .mac_link_up() / .mac_link_down() will update the MAC link using the same + API described above. + +At driver unbind() or when the DPNI object is disconnected from the DPMAC, the +dpaa2-eth driver calls dpaa2_mac_disconnect() which will, in turn, disconnect +from the PHY and destroy the PHYLINK instance. + +In case of a DPNI-DPMAC connection, an 'ip link set dev eth0 up' would start +the following sequence of operations: + +(1) phylink_start() called from .dev_open(). +(2) The .mac_config() and .mac_link_up() callbacks are called by PHYLINK. +(3) In order to configure the HW MAC, the MC Firmware API + dpmac_set_link_state() is called. +(4) The firmware will eventually setup the HW MAC in the new configuration. +(5) A netif_carrier_on() call is made directly from PHYLINK on the associated + net_device. +(6) The dpaa2-eth driver handles the LINK_STATE_CHANGE irq in order to + enable/disable Rx taildrop based on the pause frame settings. + +.. code-block:: none + + +---------+ +---------+ + | PHYLINK |-------------->| eth0 | + +---------+ (5) +---------+ + (1) ^ | + | | + | v (2) + +-----------------------------------+ + | dpaa2-eth | + +-----------------------------------+ + | ^ (6) + | | + v (3) | + +---------+---------------+---------+ + | DPMAC | | DPNI | + +---------+ +---------+ + | MC Firmware | + +-----------------------------------+ + | + | + v (4) + +-----------------------------------+ + | HW MAC | + +-----------------------------------+ + +In case of a DPNI-DPNI connection, a usual sequence of operations looks like +the following: + +(1) ip link set dev eth0 up +(2) The dpni_enable() MC API called on the associated fsl_mc_device. +(3) ip link set dev eth1 up +(4) The dpni_enable() MC API called on the associated fsl_mc_device. +(5) The LINK_STATE_CHANGED irq is received by both instances of the dpaa2-eth + driver because now the operational link state is up. +(6) The netif_carrier_on() is called on the exported net_device from + link_state_update(). + +.. code-block:: none + + +---------+ +---------+ + | eth0 | | eth1 | + +---------+ +---------+ + | ^ ^ | + | | | | + (1) v | (6) (6) | v (3) + +---------+ +---------+ + |dpaa2-eth| |dpaa2-eth| + +---------+ +---------+ + | ^ ^ | + | | | | + (2) v | (5) (5) | v (4) + +---------+---------------+---------+ + | DPNI | | DPNI | + +---------+ +---------+ + | MC Firmware | + +-----------------------------------+ + + +Exported API +------------ + +Any DPAA2 driver that drivers endpoints of DPMAC objects should service its +_EVENT_ENDPOINT_CHANGED irq and connect/disconnect from the associated DPMAC +when necessary using the below listed API:: + + - int dpaa2_mac_connect(struct dpaa2_mac *mac); + - void dpaa2_mac_disconnect(struct dpaa2_mac *mac); + +A phylink integration is necessary only when the partner DPMAC is not of TYPE_FIXED. +One can check for this condition using the below API:: + + - bool dpaa2_mac_is_type_fixed(struct fsl_mc_device *dpmac_dev,struct fsl_mc_io *mc_io); + +Before connection to a MAC, the caller must allocate and populate the +dpaa2_mac structure with the associated net_device, a pointer to the MC portal +to be used and the actual fsl_mc_device structure of the DPMAC. diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst new file mode 100644 index 000000000..d638b5a8a --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst @@ -0,0 +1,405 @@ +.. include:: + +========================================================= +DPAA2 (Data Path Acceleration Architecture Gen2) Overview +========================================================= + +:Copyright: |copy| 2015 Freescale Semiconductor Inc. +:Copyright: |copy| 2018 NXP + +This document provides an overview of the Freescale DPAA2 architecture +and how it is integrated into the Linux kernel. + +Introduction +============ + +DPAA2 is a hardware architecture designed for high-speeed network +packet processing. DPAA2 consists of sophisticated mechanisms for +processing Ethernet packets, queue management, buffer management, +autonomous L2 switching, virtual Ethernet bridging, and accelerator +(e.g. crypto) sharing. + +A DPAA2 hardware component called the Management Complex (or MC) manages the +DPAA2 hardware resources. The MC provides an object-based abstraction for +software drivers to use the DPAA2 hardware. +The MC uses DPAA2 hardware resources such as queues, buffer pools, and +network ports to create functional objects/devices such as network +interfaces, an L2 switch, or accelerator instances. +The MC provides memory-mapped I/O command interfaces (MC portals) +which DPAA2 software drivers use to operate on DPAA2 objects. + +The diagram below shows an overview of the DPAA2 resource management +architecture:: + + +--------------------------------------+ + | OS | + | DPAA2 drivers | + | | | + +-----------------------------|--------+ + | + | (create,discover,connect + | config,use,destroy) + | + DPAA2 | + +------------------------| mc portal |-+ + | | | + | +- - - - - - - - - - - - -V- - -+ | + | | | | + | | Management Complex (MC) | | + | | | | + | +- - - - - - - - - - - - - - - -+ | + | | + | Hardware Hardware | + | Resources Objects | + | --------- ------- | + | -queues -DPRC | + | -buffer pools -DPMCP | + | -Eth MACs/ports -DPIO | + | -network interface -DPNI | + | profiles -DPMAC | + | -queue portals -DPBP | + | -MC portals ... | + | ... | + | | + +--------------------------------------+ + + +The MC mediates operations such as create, discover, +connect, configuration, and destroy. Fast-path operations +on data, such as packet transmit/receive, are not mediated by +the MC and are done directly using memory mapped regions in +DPIO objects. + +Overview of DPAA2 Objects +========================= + +The section provides a brief overview of some key DPAA2 objects. +A simple scenario is described illustrating the objects involved +in creating a network interfaces. + +DPRC (Datapath Resource Container) +---------------------------------- + +A DPRC is a container object that holds all the other +types of DPAA2 objects. In the example diagram below there +are 8 objects of 5 types (DPMCP, DPIO, DPBP, DPNI, and DPMAC) +in the container. + +:: + + +---------------------------------------------------------+ + | DPRC | + | | + | +-------+ +-------+ +-------+ +-------+ +-------+ | + | | DPMCP | | DPIO | | DPBP | | DPNI | | DPMAC | | + | +-------+ +-------+ +-------+ +---+---+ +---+---+ | + | | DPMCP | | DPIO | | + | +-------+ +-------+ | + | | DPMCP | | + | +-------+ | + | | + +---------------------------------------------------------+ + +From the point of view of an OS, a DPRC behaves similar to a plug and +play bus, like PCI. DPRC commands can be used to enumerate the contents +of the DPRC, discover the hardware objects present (including mappable +regions and interrupts). + +:: + + DPRC.1 (bus) + | + +--+--------+-------+-------+-------+ + | | | | | + DPMCP.1 DPIO.1 DPBP.1 DPNI.1 DPMAC.1 + DPMCP.2 DPIO.2 + DPMCP.3 + +Hardware objects can be created and destroyed dynamically, providing +the ability to hot plug/unplug objects in and out of the DPRC. + +A DPRC has a mappable MMIO region (an MC portal) that can be used +to send MC commands. It has an interrupt for status events (like +hotplug). +All objects in a container share the same hardware "isolation context". +This means that with respect to an IOMMU the isolation granularity +is at the DPRC (container) level, not at the individual object +level. + +DPRCs can be defined statically and populated with objects +via a config file passed to the MC when firmware starts it. + +DPAA2 Objects for an Ethernet Network Interface +----------------------------------------------- + +A typical Ethernet NIC is monolithic-- the NIC device contains TX/RX +queuing mechanisms, configuration mechanisms, buffer management, +physical ports, and interrupts. DPAA2 uses a more granular approach +utilizing multiple hardware objects. Each object provides specialized +functions. Groups of these objects are used by software to provide +Ethernet network interface functionality. This approach provides +efficient use of finite hardware resources, flexibility, and +performance advantages. + +The diagram below shows the objects needed for a simple +network interface configuration on a system with 2 CPUs. + +:: + + +---+---+ +---+---+ + CPU0 CPU1 + +---+---+ +---+---+ + | | + +---+---+ +---+---+ + DPIO DPIO + +---+---+ +---+---+ + \ / + \ / + \ / + +---+---+ + DPNI --- DPBP,DPMCP + +---+---+ + | + | + +---+---+ + DPMAC + +---+---+ + | + port/PHY + +Below the objects are described. For each object a brief description +is provided along with a summary of the kinds of operations the object +supports and a summary of key resources of the object (MMIO regions +and IRQs). + +DPMAC (Datapath Ethernet MAC) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Represents an Ethernet MAC, a hardware device that connects to an Ethernet +PHY and allows physical transmission and reception of Ethernet frames. + +- MMIO regions: none +- IRQs: DPNI link change +- commands: set link up/down, link config, get stats, + IRQ config, enable, reset + +DPNI (Datapath Network Interface) +Contains TX/RX queues, network interface configuration, and RX buffer pool +configuration mechanisms. The TX/RX queues are in memory and are identified +by queue number. + +- MMIO regions: none +- IRQs: link state +- commands: port config, offload config, queue config, + parse/classify config, IRQ config, enable, reset + +DPIO (Datapath I/O) +~~~~~~~~~~~~~~~~~~~ +Provides interfaces to enqueue and dequeue +packets and do hardware buffer pool management operations. The DPAA2 +architecture separates the mechanism to access queues (the DPIO object) +from the queues themselves. The DPIO provides an MMIO interface to +enqueue/dequeue packets. To enqueue something a descriptor is written +to the DPIO MMIO region, which includes the target queue number. +There will typically be one DPIO assigned to each CPU. This allows all +CPUs to simultaneously perform enqueue/dequeued operations. DPIOs are +expected to be shared by different DPAA2 drivers. + +- MMIO regions: queue operations, buffer management +- IRQs: data availability, congestion notification, buffer + pool depletion +- commands: IRQ config, enable, reset + +DPBP (Datapath Buffer Pool) +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Represents a hardware buffer pool. + +- MMIO regions: none +- IRQs: none +- commands: enable, reset + +DPMCP (Datapath MC Portal) +~~~~~~~~~~~~~~~~~~~~~~~~~~ +Provides an MC command portal. +Used by drivers to send commands to the MC to manage +objects. + +- MMIO regions: MC command portal +- IRQs: command completion +- commands: IRQ config, enable, reset + +Object Connections +================== +Some objects have explicit relationships that must +be configured: + +- DPNI <--> DPMAC +- DPNI <--> DPNI +- DPNI <--> L2-switch-port + + A DPNI must be connected to something such as a DPMAC, + another DPNI, or L2 switch port. The DPNI connection + is made via a DPRC command. + +:: + + +-------+ +-------+ + | DPNI | | DPMAC | + +---+---+ +---+---+ + | | + +==========+ + +- DPNI <--> DPBP + + A network interface requires a 'buffer pool' (DPBP + object) which provides a list of pointers to memory + where received Ethernet data is to be copied. The + Ethernet driver configures the DPBPs associated with + the network interface. + +Interrupts +========== +All interrupts generated by DPAA2 objects are message +interrupts. At the hardware level message interrupts +generated by devices will normally have 3 components-- +1) a non-spoofable 'device-id' expressed on the hardware +bus, 2) an address, 3) a data value. + +In the case of DPAA2 devices/objects, all objects in the +same container/DPRC share the same 'device-id'. +For ARM-based SoC this is the same as the stream ID. + + +DPAA2 Linux Drivers Overview +============================ + +This section provides an overview of the Linux kernel drivers for +DPAA2-- 1) the bus driver and associated "DPAA2 infrastructure" +drivers and 2) functional object drivers (such as Ethernet). + +As described previously, a DPRC is a container that holds the other +types of DPAA2 objects. It is functionally similar to a plug-and-play +bus controller. +Each object in the DPRC is a Linux "device" and is bound to a driver. +The diagram below shows the Linux drivers involved in a networking +scenario and the objects bound to each driver. A brief description +of each driver follows. + +:: + + +------------+ + | OS Network | + | Stack | + +------------+ +------------+ + | Allocator |. . . . . . . | Ethernet | + |(DPMCP,DPBP)| | (DPNI) | + +-.----------+ +---+---+----+ + . . ^ | + . . | | dequeue> + +-------------+ . | | + | DPRC driver | . +---+---V----+ +---------+ + | (DPRC) | . . . . . .| DPIO driver| | MAC | + +----------+--+ | (DPIO) | | (DPMAC) | + | +------+-----+ +-----+---+ + | | | + | | | + +--------+----------+ | +--+---+ + | MC-bus driver | | | PHY | + | | | |driver| + | /bus/fsl-mc | | +--+---+ + +-------------------+ | | + | | + ========================= HARDWARE =========|=================|====== + DPIO | + | | + DPNI---DPBP | + | | + DPMAC | + | | + PHY ---------------+ + ============================================|======================== + +A brief description of each driver is provided below. + +MC-bus driver +------------- +The MC-bus driver is a platform driver and is probed from a +node in the device tree (compatible "fsl,qoriq-mc") passed in by boot +firmware. It is responsible for bootstrapping the DPAA2 kernel +infrastructure. +Key functions include: + +- registering a new bus type named "fsl-mc" with the kernel, + and implementing bus call-backs (e.g. match/uevent/dev_groups) +- implementing APIs for DPAA2 driver registration and for device + add/remove +- creates an MSI IRQ domain +- doing a 'device add' to expose the 'root' DPRC, in turn triggering + a bind of the root DPRC to the DPRC driver + +The binding for the MC-bus device-tree node can be consulted at +*Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt*. +The sysfs bind/unbind interfaces for the MC-bus can be consulted at +*Documentation/ABI/testing/sysfs-bus-fsl-mc*. + +DPRC driver +----------- +The DPRC driver is bound to DPRC objects and does runtime management +of a bus instance. It performs the initial bus scan of the DPRC +and handles interrupts for container events such as hot plug by +re-scanning the DPRC. + +Allocator +--------- +Certain objects such as DPMCP and DPBP are generic and fungible, +and are intended to be used by other drivers. For example, +the DPAA2 Ethernet driver needs: + +- DPMCPs to send MC commands, to configure network interfaces +- DPBPs for network buffer pools + +The allocator driver registers for these allocatable object types +and those objects are bound to the allocator when the bus is probed. +The allocator maintains a pool of objects that are available for +allocation by other DPAA2 drivers. + +DPIO driver +----------- +The DPIO driver is bound to DPIO objects and provides services that allow +other drivers such as the Ethernet driver to enqueue and dequeue data for +their respective objects. +Key services include: + +- data availability notifications +- hardware queuing operations (enqueue and dequeue of data) +- hardware buffer pool management + +To transmit a packet the Ethernet driver puts data on a queue and +invokes a DPIO API. For receive, the Ethernet driver registers +a data availability notification callback. To dequeue a packet +a DPIO API is used. +There is typically one DPIO object per physical CPU for optimum +performance, allowing different CPUs to simultaneously enqueue +and dequeue data. + +The DPIO driver operates on behalf of all DPAA2 drivers +active in the kernel-- Ethernet, crypto, compression, +etc. + +Ethernet driver +--------------- +The Ethernet driver is bound to a DPNI and implements the kernel +interfaces needed to connect the DPAA2 network interface to +the network stack. +Each DPNI corresponds to a Linux network interface. + +MAC driver +---------- +An Ethernet PHY is an off-chip, board specific component and is managed +by the appropriate PHY driver via an mdio bus. The MAC driver +plays a role of being a proxy between the PHY driver and the +MC. It does this proxy via the MC commands to a DPMAC object. +If the PHY driver signals a link change, the MAC driver notifies +the MC via a DPMAC command. If a network interface is brought +up or down, the MC notifies the DPMAC driver via an interrupt and +the driver can take appropriate action. diff --git a/Documentation/networking/device_drivers/ethernet/freescale/gianfar.rst b/Documentation/networking/device_drivers/ethernet/freescale/gianfar.rst new file mode 100644 index 000000000..9c4a91d38 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/freescale/gianfar.rst @@ -0,0 +1,51 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========================== +The Gianfar Ethernet Driver +=========================== + +:Author: Andy Fleming +:Updated: 2005-07-28 + + +Checksum Offloading +=================== + +The eTSEC controller (first included in parts from late 2005 like +the 8548) has the ability to perform TCP, UDP, and IP checksums +in hardware. The Linux kernel only offloads the TCP and UDP +checksums (and always performs the pseudo header checksums), so +the driver only supports checksumming for TCP/IP and UDP/IP +packets. Use ethtool to enable or disable this feature for RX +and TX. + +VLAN +==== + +In order to use VLAN, please consult Linux documentation on +configuring VLANs. The gianfar driver supports hardware insertion and +extraction of VLAN headers, but not filtering. Filtering will be +done by the kernel. + +Multicasting +============ + +The gianfar driver supports using the group hash table on the +TSEC (and the extended hash table on the eTSEC) for multicast +filtering. On the eTSEC, the exact-match MAC registers are used +before the hash tables. See Linux documentation on how to join +multicast groups. + +Padding +======= + +The gianfar driver supports padding received frames with 2 bytes +to align the IP header to a 16-byte boundary, when supported by +hardware. + +Ethtool +======= + +The gianfar driver supports the use of ethtool for many +configuration options. You must run ethtool only on currently +open interfaces. See ethtool documentation for details. diff --git a/Documentation/networking/device_drivers/ethernet/google/gve.rst b/Documentation/networking/device_drivers/ethernet/google/gve.rst new file mode 100644 index 000000000..793693cef --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/google/gve.rst @@ -0,0 +1,123 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +============================================================== +Linux kernel driver for Compute Engine Virtual Ethernet (gve): +============================================================== + +Supported Hardware +=================== +The GVE driver binds to a single PCI device id used by the virtual +Ethernet device found in some Compute Engine VMs. + ++--------------+----------+---------+ +|Field | Value | Comments| ++==============+==========+=========+ +|Vendor ID | `0x1AE0` | Google | ++--------------+----------+---------+ +|Device ID | `0x0042` | | ++--------------+----------+---------+ +|Sub-vendor ID | `0x1AE0` | Google | ++--------------+----------+---------+ +|Sub-device ID | `0x0058` | | ++--------------+----------+---------+ +|Revision ID | `0x0` | | ++--------------+----------+---------+ +|Device Class | `0x200` | Ethernet| ++--------------+----------+---------+ + +PCI Bars +======== +The gVNIC PCI device exposes three 32-bit memory BARS: +- Bar0 - Device configuration and status registers. +- Bar1 - MSI-X vector table +- Bar2 - IRQ, RX and TX doorbells + +Device Interactions +=================== +The driver interacts with the device in the following ways: + - Registers + - A block of MMIO registers + - See gve_register.h for more detail + - Admin Queue + - See description below + - Reset + - At any time the device can be reset + - Interrupts + - See supported interrupts below + - Transmit and Receive Queues + - See description below + +Registers +--------- +All registers are MMIO and big endian. + +The registers are used for initializing and configuring the device as well as +querying device status in response to management interrupts. + +Admin Queue (AQ) +---------------- +The Admin Queue is a PAGE_SIZE memory block, treated as an array of AQ +commands, used by the driver to issue commands to the device and set up +resources.The driver and the device maintain a count of how many commands +have been submitted and executed. To issue AQ commands, the driver must do +the following (with proper locking): + +1) Copy new commands into next available slots in the AQ array +2) Increment its counter by he number of new commands +3) Write the counter into the GVE_ADMIN_QUEUE_DOORBELL register +4) Poll the ADMIN_QUEUE_EVENT_COUNTER register until it equals + the value written to the doorbell, or until a timeout. + +The device will update the status field in each AQ command reported as +executed through the ADMIN_QUEUE_EVENT_COUNTER register. + +Device Resets +------------- +A device reset is triggered by writing 0x0 to the AQ PFN register. +This causes the device to release all resources allocated by the +driver, including the AQ itself. + +Interrupts +---------- +The following interrupts are supported by the driver: + +Management Interrupt +~~~~~~~~~~~~~~~~~~~~ +The management interrupt is used by the device to tell the driver to +look at the GVE_DEVICE_STATUS register. + +The handler for the management irq simply queues the service task in +the workqueue to check the register and acks the irq. + +Notification Block Interrupts +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The notification block interrupts are used to tell the driver to poll +the queues associated with that interrupt. + +The handler for these irqs schedule the napi for that block to run +and poll the queues. + +Traffic Queues +-------------- +gVNIC's queues are composed of a descriptor ring and a buffer and are +assigned to a notification block. + +The descriptor rings are power-of-two-sized ring buffers consisting of +fixed-size descriptors. They advance their head pointer using a __be32 +doorbell located in Bar2. The tail pointers are advanced by consuming +descriptors in-order and updating a __be32 counter. Both the doorbell +and the counter overflow to zero. + +Each queue's buffers must be registered in advance with the device as a +queue page list, and packet data can only be put in those pages. + +Transmit +~~~~~~~~ +gve maps the buffers for transmit rings into a FIFO and copies the packets +into the FIFO before sending them to the NIC. + +Receive +~~~~~~~ +The buffers for receive rings are put into a data ring that is the same +length as the descriptor ring and the head and tail pointers advance over +the rings together. diff --git a/Documentation/networking/device_drivers/ethernet/huawei/hinic.rst b/Documentation/networking/device_drivers/ethernet/huawei/hinic.rst new file mode 100644 index 000000000..867ac8f4e --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/huawei/hinic.rst @@ -0,0 +1,128 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================================ +Linux Kernel Driver for Huawei Intelligent NIC(HiNIC) family +============================================================ + +Overview: +========= +HiNIC is a network interface card for the Data Center Area. + +The driver supports a range of link-speed devices (10GbE, 25GbE, 40GbE, etc.). +The driver supports also a negotiated and extendable feature set. + +Some HiNIC devices support SR-IOV. This driver is used for Physical Function +(PF). + +HiNIC devices support MSI-X interrupt vector for each Tx/Rx queue and +adaptive interrupt moderation. + +HiNIC devices support also various offload features such as checksum offload, +TCP Transmit Segmentation Offload(TSO), Receive-Side Scaling(RSS) and +LRO(Large Receive Offload). + + +Supported PCI vendor ID/device IDs: +=================================== + +19e5:1822 - HiNIC PF + + +Driver Architecture and Source Code: +==================================== + +hinic_dev - Implement a Logical Network device that is independent from +specific HW details about HW data structure formats. + +hinic_hwdev - Implement the HW details of the device and include the components +for accessing the PCI NIC. + +hinic_hwdev contains the following components: +=============================================== + +HW Interface: +============= + +The interface for accessing the pci device (DMA memory and PCI BARs). +(hinic_hw_if.c, hinic_hw_if.h) + +Configuration Status Registers Area that describes the HW Registers on the +configuration and status BAR0. (hinic_hw_csr.h) + +MGMT components: +================ + +Asynchronous Event Queues(AEQs) - The event queues for receiving messages from +the MGMT modules on the cards. (hinic_hw_eqs.c, hinic_hw_eqs.h) + +Application Programmable Interface commands(API CMD) - Interface for sending +MGMT commands to the card. (hinic_hw_api_cmd.c, hinic_hw_api_cmd.h) + +Management (MGMT) - the PF to MGMT channel that uses API CMD for sending MGMT +commands to the card and receives notifications from the MGMT modules on the +card by AEQs. Also set the addresses of the IO CMDQs in HW. +(hinic_hw_mgmt.c, hinic_hw_mgmt.h) + +IO components: +============== + +Completion Event Queues(CEQs) - The completion Event Queues that describe IO +tasks that are finished. (hinic_hw_eqs.c, hinic_hw_eqs.h) + +Work Queues(WQ) - Contain the memory and operations for use by CMD queues and +the Queue Pairs. The WQ is a Memory Block in a Page. The Block contains +pointers to Memory Areas that are the Memory for the Work Queue Elements(WQEs). +(hinic_hw_wq.c, hinic_hw_wq.h) + +Command Queues(CMDQ) - The queues for sending commands for IO management and is +used to set the QPs addresses in HW. The commands completion events are +accumulated on the CEQ that is configured to receive the CMDQ completion events. +(hinic_hw_cmdq.c, hinic_hw_cmdq.h) + +Queue Pairs(QPs) - The HW Receive and Send queues for Receiving and Transmitting +Data. (hinic_hw_qp.c, hinic_hw_qp.h, hinic_hw_qp_ctxt.h) + +IO - de/constructs all the IO components. (hinic_hw_io.c, hinic_hw_io.h) + +HW device: +========== + +HW device - de/constructs the HW Interface, the MGMT components on the +initialization of the driver and the IO components on the case of Interface +UP/DOWN Events. (hinic_hw_dev.c, hinic_hw_dev.h) + + +hinic_dev contains the following components: +=============================================== + +PCI ID table - Contains the supported PCI Vendor/Device IDs. +(hinic_pci_tbl.h) + +Port Commands - Send commands to the HW device for port management +(MAC, Vlan, MTU, ...). (hinic_port.c, hinic_port.h) + +Tx Queues - Logical Tx Queues that use the HW Send Queues for transmit. +The Logical Tx queue is not dependent on the format of the HW Send Queue. +(hinic_tx.c, hinic_tx.h) + +Rx Queues - Logical Rx Queues that use the HW Receive Queues for receive. +The Logical Rx queue is not dependent on the format of the HW Receive Queue. +(hinic_rx.c, hinic_rx.h) + +hinic_dev - de/constructs the Logical Tx and Rx Queues. +(hinic_main.c, hinic_dev.h) + + +Miscellaneous +============= + +Common functions that are used by HW and Logical Device. +(hinic_common.c, hinic_common.h) + + +Support +======= + +If an issue is identified with the released source code on the supported kernel +with a supported adapter, email the specific information related to the issue to +aviad.krawczyk@huawei.com. diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst new file mode 100644 index 000000000..cbb75a181 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -0,0 +1,60 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +Ethernet Device Drivers +======================= + +Device drivers for Ethernet and Ethernet-based virtual function devices. + +Contents: + +.. toctree:: + :maxdepth: 2 + + 3com/3c509 + 3com/vortex + amazon/ena + altera/altera_tse + aquantia/atlantic + chelsio/cxgb + cirrus/cs89x0 + dlink/dl2k + davicom/dm9000 + dec/de4x5 + dec/dmfe + freescale/dpaa + freescale/dpaa2/index + freescale/gianfar + google/gve + huawei/hinic + intel/e100 + intel/e1000 + intel/e1000e + intel/fm10k + intel/igb + intel/igbvf + intel/ixgb + intel/ixgbe + intel/ixgbevf + intel/i40e + intel/iavf + intel/ice + marvell/octeontx2 + mellanox/mlx5 + microsoft/netvsc + neterion/s2io + neterion/vxge + netronome/nfp + pensando/ionic + smsc/smc9 + stmicro/stmmac + ti/cpsw + ti/cpsw_switchdev + ti/tlan + toshiba/spider_net + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/networking/device_drivers/ethernet/intel/e100.rst b/Documentation/networking/device_drivers/ethernet/intel/e100.rst new file mode 100644 index 000000000..3d4a9ba21 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/intel/e100.rst @@ -0,0 +1,188 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +============================================================= +Linux Base Driver for the Intel(R) PRO/100 Family of Adapters +============================================================= + +June 1, 2018 + +Contents +======== + +- In This Release +- Identifying Your Adapter +- Building and Installation +- Driver Configuration Parameters +- Additional Configurations +- Known Issues +- Support + + +In This Release +=============== + +This file describes the Linux Base Driver for the Intel(R) PRO/100 Family of +Adapters. This driver includes support for Itanium(R)2-based systems. + +For questions related to hardware requirements, refer to the documentation +supplied with your Intel PRO/100 adapter. + +The following features are now available in supported kernels: + - Native VLANs + - Channel Bonding (teaming) + - SNMP + +Channel Bonding documentation can be found in the Linux kernel source: +/Documentation/networking/bonding.rst + + +Identifying Your Adapter +======================== + +For information on how to identify your adapter, and for the latest Intel +network drivers, refer to the Intel Support website: +https://www.intel.com/support + +Driver Configuration Parameters +=============================== + +The default value for each parameter is generally the recommended setting, +unless otherwise noted. + +Rx Descriptors: + Number of receive descriptors. A receive descriptor is a data + structure that describes a receive buffer and its attributes to the network + controller. The data in the descriptor is used by the controller to write + data from the controller to host memory. In the 3.x.x driver the valid range + for this parameter is 64-256. The default value is 256. This parameter can be + changed using the command:: + + ethtool -G eth? rx n + + Where n is the number of desired Rx descriptors. + +Tx Descriptors: + Number of transmit descriptors. A transmit descriptor is a data + structure that describes a transmit buffer and its attributes to the network + controller. The data in the descriptor is used by the controller to read + data from the host memory to the controller. In the 3.x.x driver the valid + range for this parameter is 64-256. The default value is 128. This parameter + can be changed using the command:: + + ethtool -G eth? tx n + + Where n is the number of desired Tx descriptors. + +Speed/Duplex: + The driver auto-negotiates the link speed and duplex settings by + default. The ethtool utility can be used as follows to force speed/duplex.:: + + ethtool -s eth? autoneg off speed {10|100} duplex {full|half} + + NOTE: setting the speed/duplex to incorrect values will cause the link to + fail. + +Event Log Message Level: + The driver uses the message level flag to log events + to syslog. The message level can be set at driver load time. It can also be + set using the command:: + + ethtool -s eth? msglvl n + + +Additional Configurations +========================= + +Configuring the Driver on Different Distributions +------------------------------------------------- + +Configuring a network driver to load properly when the system is started +is distribution dependent. Typically, the configuration process involves +adding an alias line to `/etc/modprobe.d/*.conf` as well as editing other +system startup scripts and/or configuration files. Many popular Linux +distributions ship with tools to make these changes for you. To learn +the proper way to configure a network device for your system, refer to +your distribution documentation. If during this process you are asked +for the driver or module name, the name for the Linux Base Driver for +the Intel PRO/100 Family of Adapters is e100. + +As an example, if you install the e100 driver for two PRO/100 adapters +(eth0 and eth1), add the following to a configuration file in +/etc/modprobe.d/:: + + alias eth0 e100 + alias eth1 e100 + +Viewing Link Messages +--------------------- + +In order to see link messages and other Intel driver information on your +console, you must set the dmesg level up to six. This can be done by +entering the following on the command line before loading the e100 +driver:: + + dmesg -n 6 + +If you wish to see all messages issued by the driver, including debug +messages, set the dmesg level to eight. + +NOTE: This setting is not saved across reboots. + +ethtool +------- + +The driver utilizes the ethtool interface for driver configuration and +diagnostics, as well as displaying statistical information. The ethtool +version 1.6 or later is required for this functionality. + +The latest release of ethtool can be found from +https://www.kernel.org/pub/software/network/ethtool/ + +Enabling Wake on LAN (WoL) +-------------------------- +WoL is provided through the ethtool utility. For instructions on +enabling WoL with ethtool, refer to the ethtool man page. WoL will be +enabled on the system during the next shut down or reboot. For this +driver version, in order to enable WoL, the e100 driver must be loaded +when shutting down or rebooting the system. + +NAPI +---- + +NAPI (Rx polling mode) is supported in the e100 driver. + +See https://wiki.linuxfoundation.org/networking/napi for more +information on NAPI. + +Multiple Interfaces on Same Ethernet Broadcast Network +------------------------------------------------------ + +Due to the default ARP behavior on Linux, it is not possible to have one +system on two IP networks in the same Ethernet broadcast domain +(non-partitioned switch) behave as expected. All Ethernet interfaces +will respond to IP traffic for any IP address assigned to the system. +This results in unbalanced receive traffic. + +If you have multiple interfaces in a server, either turn on ARP +filtering by + +(1) entering:: + + echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter + + (this only works if your kernel's version is higher than 2.4.5), or + +(2) installing the interfaces in separate broadcast domains (either + in different switches or in a switch partitioned to VLANs). + + +Support +======= +For general information, go to the Intel support website at: +https://www.intel.com/support/ + +or the Intel Wired Networking project hosted by Sourceforge at: +http://sourceforge.net/projects/e1000 +If an issue is identified with the released source code on a supported kernel +with a supported adapter, email the specific information related to the issue +to e1000-devel@lists.sf.net. diff --git a/Documentation/networking/device_drivers/ethernet/intel/e1000.rst b/Documentation/networking/device_drivers/ethernet/intel/e1000.rst new file mode 100644 index 000000000..4aaae0f7d --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/intel/e1000.rst @@ -0,0 +1,463 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +========================================================== +Linux Base Driver for Intel(R) Ethernet Network Connection +========================================================== + +Intel Gigabit Linux driver. +Copyright(c) 1999 - 2013 Intel Corporation. + +Contents +======== + +- Identifying Your Adapter +- Command Line Parameters +- Speed and Duplex Configuration +- Additional Configurations +- Support + +Identifying Your Adapter +======================== + +For more information on how to identify your adapter, go to the Adapter & +Driver ID Guide at: + + http://support.intel.com/support/go/network/adapter/idguide.htm + +For the latest Intel network drivers for Linux, refer to the following +website. In the search field, enter your adapter name or type, or use the +networking link on the left to search for your adapter: + + http://support.intel.com/support/go/network/adapter/home.htm + +Command Line Parameters +======================= + +The default value for each parameter is generally the recommended setting, +unless otherwise noted. + +NOTES: + For more information about the AutoNeg, Duplex, and Speed + parameters, see the "Speed and Duplex Configuration" section in + this document. + + For more information about the InterruptThrottleRate, + RxIntDelay, TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay + parameters, see the application note at: + http://www.intel.com/design/network/applnots/ap450.htm + +AutoNeg +------- + +(Supported only on adapters with copper connections) + +:Valid Range: 0x01-0x0F, 0x20-0x2F +:Default Value: 0x2F + +This parameter is a bit-mask that specifies the speed and duplex settings +advertised by the adapter. When this parameter is used, the Speed and +Duplex parameters must not be specified. + +NOTE: + Refer to the Speed and Duplex section of this readme for more + information on the AutoNeg parameter. + +Duplex +------ + +(Supported only on adapters with copper connections) + +:Valid Range: 0-2 (0=auto-negotiate, 1=half, 2=full) +:Default Value: 0 + +This defines the direction in which data is allowed to flow. Can be +either one or two-directional. If both Duplex and the link partner are +set to auto-negotiate, the board auto-detects the correct duplex. If the +link partner is forced (either full or half), Duplex defaults to half- +duplex. + +FlowControl +----------- + +:Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx) +:Default Value: Reads flow control settings from the EEPROM + +This parameter controls the automatic generation(Tx) and response(Rx) +to Ethernet PAUSE frames. + +InterruptThrottleRate +--------------------- + +(not supported on Intel(R) 82542, 82543 or 82544-based adapters) + +:Valid Range: + 0,1,3,4,100-100000 (0=off, 1=dynamic, 3=dynamic conservative, + 4=simplified balancing) +:Default Value: 3 + +The driver can limit the amount of interrupts per second that the adapter +will generate for incoming packets. It does this by writing a value to the +adapter that is based on the maximum amount of interrupts that the adapter +will generate per second. + +Setting InterruptThrottleRate to a value greater or equal to 100 +will program the adapter to send out a maximum of that many interrupts +per second, even if more packets have come in. This reduces interrupt +load on the system and can lower CPU utilization under heavy load, +but will increase latency as packets are not processed as quickly. + +The default behaviour of the driver previously assumed a static +InterruptThrottleRate value of 8000, providing a good fallback value for +all traffic types,but lacking in small packet performance and latency. +The hardware can handle many more small packets per second however, and +for this reason an adaptive interrupt moderation algorithm was implemented. + +Since 7.3.x, the driver has two adaptive modes (setting 1 or 3) in which +it dynamically adjusts the InterruptThrottleRate value based on the traffic +that it receives. After determining the type of incoming traffic in the last +timeframe, it will adjust the InterruptThrottleRate to an appropriate value +for that traffic. + +The algorithm classifies the incoming traffic every interval into +classes. Once the class is determined, the InterruptThrottleRate value is +adjusted to suit that traffic type the best. There are three classes defined: +"Bulk traffic", for large amounts of packets of normal size; "Low latency", +for small amounts of traffic and/or a significant percentage of small +packets; and "Lowest latency", for almost completely small packets or +minimal traffic. + +In dynamic conservative mode, the InterruptThrottleRate value is set to 4000 +for traffic that falls in class "Bulk traffic". If traffic falls in the "Low +latency" or "Lowest latency" class, the InterruptThrottleRate is increased +stepwise to 20000. This default mode is suitable for most applications. + +For situations where low latency is vital such as cluster or +grid computing, the algorithm can reduce latency even more when +InterruptThrottleRate is set to mode 1. In this mode, which operates +the same as mode 3, the InterruptThrottleRate will be increased stepwise to +70000 for traffic in class "Lowest latency". + +In simplified mode the interrupt rate is based on the ratio of TX and +RX traffic. If the bytes per second rate is approximately equal, the +interrupt rate will drop as low as 2000 interrupts per second. If the +traffic is mostly transmit or mostly receive, the interrupt rate could +be as high as 8000. + +Setting InterruptThrottleRate to 0 turns off any interrupt moderation +and may improve small packet latency, but is generally not suitable +for bulk throughput traffic. + +NOTE: + InterruptThrottleRate takes precedence over the TxAbsIntDelay and + RxAbsIntDelay parameters. In other words, minimizing the receive + and/or transmit absolute delays does not force the controller to + generate more interrupts than what the Interrupt Throttle Rate + allows. + +CAUTION: + If you are using the Intel(R) PRO/1000 CT Network Connection + (controller 82547), setting InterruptThrottleRate to a value + greater than 75,000, may hang (stop transmitting) adapters + under certain network conditions. If this occurs a NETDEV + WATCHDOG message is logged in the system event log. In + addition, the controller is automatically reset, restoring + the network connection. To eliminate the potential for the + hang, ensure that InterruptThrottleRate is set no greater + than 75,000 and is not set to 0. + +NOTE: + When e1000 is loaded with default settings and multiple adapters + are in use simultaneously, the CPU utilization may increase non- + linearly. In order to limit the CPU utilization without impacting + the overall throughput, we recommend that you load the driver as + follows:: + + modprobe e1000 InterruptThrottleRate=3000,3000,3000 + + This sets the InterruptThrottleRate to 3000 interrupts/sec for + the first, second, and third instances of the driver. The range + of 2000 to 3000 interrupts per second works on a majority of + systems and is a good starting point, but the optimal value will + be platform-specific. If CPU utilization is not a concern, use + RX_POLLING (NAPI) and default driver settings. + +RxDescriptors +------------- + +:Valid Range: + - 48-256 for 82542 and 82543-based adapters + - 48-4096 for all other supported adapters +:Default Value: 256 + +This value specifies the number of receive buffer descriptors allocated +by the driver. Increasing this value allows the driver to buffer more +incoming packets, at the expense of increased system memory utilization. + +Each descriptor is 16 bytes. A receive buffer is also allocated for each +descriptor and can be either 2048, 4096, 8192, or 16384 bytes, depending +on the MTU setting. The maximum MTU size is 16110. + +NOTE: + MTU designates the frame size. It only needs to be set for Jumbo + Frames. Depending on the available system resources, the request + for a higher number of receive descriptors may be denied. In this + case, use a lower number. + +RxIntDelay +---------- + +:Valid Range: 0-65535 (0=off) +:Default Value: 0 + +This value delays the generation of receive interrupts in units of 1.024 +microseconds. Receive interrupt reduction can improve CPU efficiency if +properly tuned for specific network traffic. Increasing this value adds +extra latency to frame reception and can end up decreasing the throughput +of TCP traffic. If the system is reporting dropped receives, this value +may be set too high, causing the driver to run out of available receive +descriptors. + +CAUTION: + When setting RxIntDelay to a value other than 0, adapters may + hang (stop transmitting) under certain network conditions. If + this occurs a NETDEV WATCHDOG message is logged in the system + event log. In addition, the controller is automatically reset, + restoring the network connection. To eliminate the potential + for the hang ensure that RxIntDelay is set to 0. + +RxAbsIntDelay +------------- + +(This parameter is supported only on 82540, 82545 and later adapters.) + +:Valid Range: 0-65535 (0=off) +:Default Value: 128 + +This value, in units of 1.024 microseconds, limits the delay in which a +receive interrupt is generated. Useful only if RxIntDelay is non-zero, +this value ensures that an interrupt is generated after the initial +packet is received within the set amount of time. Proper tuning, +along with RxIntDelay, may improve traffic throughput in specific network +conditions. + +Speed +----- + +(This parameter is supported only on adapters with copper connections.) + +:Valid Settings: 0, 10, 100, 1000 +:Default Value: 0 (auto-negotiate at all supported speeds) + +Speed forces the line speed to the specified value in megabits per second +(Mbps). If this parameter is not specified or is set to 0 and the link +partner is set to auto-negotiate, the board will auto-detect the correct +speed. Duplex should also be set when Speed is set to either 10 or 100. + +TxDescriptors +------------- + +:Valid Range: + - 48-256 for 82542 and 82543-based adapters + - 48-4096 for all other supported adapters +:Default Value: 256 + +This value is the number of transmit descriptors allocated by the driver. +Increasing this value allows the driver to queue more transmits. Each +descriptor is 16 bytes. + +NOTE: + Depending on the available system resources, the request for a + higher number of transmit descriptors may be denied. In this case, + use a lower number. + +TxIntDelay +---------- + +:Valid Range: 0-65535 (0=off) +:Default Value: 8 + +This value delays the generation of transmit interrupts in units of +1.024 microseconds. Transmit interrupt reduction can improve CPU +efficiency if properly tuned for specific network traffic. If the +system is reporting dropped transmits, this value may be set too high +causing the driver to run out of available transmit descriptors. + +TxAbsIntDelay +------------- + +(This parameter is supported only on 82540, 82545 and later adapters.) + +:Valid Range: 0-65535 (0=off) +:Default Value: 32 + +This value, in units of 1.024 microseconds, limits the delay in which a +transmit interrupt is generated. Useful only if TxIntDelay is non-zero, +this value ensures that an interrupt is generated after the initial +packet is sent on the wire within the set amount of time. Proper tuning, +along with TxIntDelay, may improve traffic throughput in specific +network conditions. + +XsumRX +------ + +(This parameter is NOT supported on the 82542-based adapter.) + +:Valid Range: 0-1 +:Default Value: 1 + +A value of '1' indicates that the driver should enable IP checksum +offload for received packets (both UDP and TCP) to the adapter hardware. + +Copybreak +--------- + +:Valid Range: 0-xxxxxxx (0=off) +:Default Value: 256 +:Usage: modprobe e1000.ko copybreak=128 + +Driver copies all packets below or equaling this size to a fresh RX +buffer before handing it up the stack. + +This parameter is different than other parameters, in that it is a +single (not 1,1,1 etc.) parameter applied to all driver instances and +it is also available during runtime at +/sys/module/e1000/parameters/copybreak + +SmartPowerDownEnable +-------------------- + +:Valid Range: 0-1 +:Default Value: 0 (disabled) + +Allows PHY to turn off in lower power states. The user can turn off +this parameter in supported chipsets. + +Speed and Duplex Configuration +============================== + +Three keywords are used to control the speed and duplex configuration. +These keywords are Speed, Duplex, and AutoNeg. + +If the board uses a fiber interface, these keywords are ignored, and the +fiber interface board only links at 1000 Mbps full-duplex. + +For copper-based boards, the keywords interact as follows: + +- The default operation is auto-negotiate. The board advertises all + supported speed and duplex combinations, and it links at the highest + common speed and duplex mode IF the link partner is set to auto-negotiate. + +- If Speed = 1000, limited auto-negotiation is enabled and only 1000 Mbps + is advertised (The 1000BaseT spec requires auto-negotiation.) + +- If Speed = 10 or 100, then both Speed and Duplex should be set. Auto- + negotiation is disabled, and the AutoNeg parameter is ignored. Partner + SHOULD also be forced. + +The AutoNeg parameter is used when more control is required over the +auto-negotiation process. It should be used when you wish to control which +speed and duplex combinations are advertised during the auto-negotiation +process. + +The parameter may be specified as either a decimal or hexadecimal value as +determined by the bitmap below. + +============== ====== ====== ======= ======= ====== ====== ======= ====== +Bit position 7 6 5 4 3 2 1 0 +Decimal Value 128 64 32 16 8 4 2 1 +Hex value 80 40 20 10 8 4 2 1 +Speed (Mbps) N/A N/A 1000 N/A 100 100 10 10 +Duplex Full Full Half Full Half +============== ====== ====== ======= ======= ====== ====== ======= ====== + +Some examples of using AutoNeg:: + + modprobe e1000 AutoNeg=0x01 (Restricts autonegotiation to 10 Half) + modprobe e1000 AutoNeg=1 (Same as above) + modprobe e1000 AutoNeg=0x02 (Restricts autonegotiation to 10 Full) + modprobe e1000 AutoNeg=0x03 (Restricts autonegotiation to 10 Half or 10 Full) + modprobe e1000 AutoNeg=0x04 (Restricts autonegotiation to 100 Half) + modprobe e1000 AutoNeg=0x05 (Restricts autonegotiation to 10 Half or 100 + Half) + modprobe e1000 AutoNeg=0x020 (Restricts autonegotiation to 1000 Full) + modprobe e1000 AutoNeg=32 (Same as above) + +Note that when this parameter is used, Speed and Duplex must not be specified. + +If the link partner is forced to a specific speed and duplex, then this +parameter should not be used. Instead, use the Speed and Duplex parameters +previously mentioned to force the adapter to the same speed and duplex. + +Additional Configurations +========================= + +Jumbo Frames +------------ + + Jumbo Frames support is enabled by changing the MTU to a value larger than + the default of 1500. Use the ifconfig command to increase the MTU size. + For example:: + + ifconfig eth mtu 9000 up + + This setting is not saved across reboots. It can be made permanent if + you add:: + + MTU=9000 + + to the file /etc/sysconfig/network-scripts/ifcfg-eth. This example + applies to the Red Hat distributions; other distributions may store this + setting in a different location. + +Notes: + Degradation in throughput performance may be observed in some Jumbo frames + environments. If this is observed, increasing the application's socket buffer + size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values may help. + See the specific application manual and /usr/src/linux*/Documentation/ + networking/ip-sysctl.txt for more details. + + - The maximum MTU setting for Jumbo Frames is 16110. This value coincides + with the maximum Jumbo Frames size of 16128. + + - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in + poor performance or loss of link. + + - Adapters based on the Intel(R) 82542 and 82573V/E controller do not + support Jumbo Frames. These correspond to the following product names:: + + Intel(R) PRO/1000 Gigabit Server Adapter + Intel(R) PRO/1000 PM Network Connection + +ethtool +------- + + The driver utilizes the ethtool interface for driver configuration and + diagnostics, as well as displaying statistical information. The ethtool + version 1.6 or later is required for this functionality. + + The latest release of ethtool can be found from + https://www.kernel.org/pub/software/network/ethtool/ + +Enabling Wake on LAN (WoL) +-------------------------- + + WoL is configured through the ethtool utility. + + WoL will be enabled on the system during the next shut down or reboot. + For this driver version, in order to enable WoL, the e1000 driver must be + loaded when shutting down or rebooting the system. + +Support +======= + +For general information, go to the Intel support website at: + + http://support.intel.com + +or the Intel Wired Networking project hosted by Sourceforge at: + + http://sourceforge.net/projects/e1000 + +If an issue is identified with the released source code on the supported +kernel with a supported adapter, email the specific information related +to the issue to e1000-devel@lists.sf.net diff --git a/Documentation/networking/device_drivers/ethernet/intel/e1000e.rst b/Documentation/networking/device_drivers/ethernet/intel/e1000e.rst new file mode 100644 index 000000000..f49cd370e --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/intel/e1000e.rst @@ -0,0 +1,383 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +===================================================== +Linux Driver for Intel(R) Ethernet Network Connection +===================================================== + +Intel Gigabit Linux driver. +Copyright(c) 2008-2018 Intel Corporation. + +Contents +======== + +- Identifying Your Adapter +- Command Line Parameters +- Additional Configurations +- Support + + +Identifying Your Adapter +======================== +For information on how to identify your adapter, and for the latest Intel +network drivers, refer to the Intel Support website: +https://www.intel.com/support + + +Command Line Parameters +======================= +If the driver is built as a module, the following optional parameters are used +by entering them on the command line with the modprobe command using this +syntax:: + + modprobe e1000e [