269 lines
7.3 KiB
Plaintext
269 lines
7.3 KiB
Plaintext
NOTE: Ethertap is now an obsolete facility, and is scheduled
|
|
to be removed in the 2.5.x kernel series. Those writing
|
|
applications using ethertap should convert their code to
|
|
use the TUN/TAP driver instead, see 'tuntap.txt' in this
|
|
directory for more details. -DaveM
|
|
|
|
Ethertap programming mini-HOWTO
|
|
-------------------------------
|
|
|
|
The ethertap driver was written by Jay Schulist <jschlst@samba.org>,
|
|
you should contact him for further information. This document was written by
|
|
bert hubert <bert.hubert@netherlabs.nl>. Updates are welcome.
|
|
|
|
What ethertap can do for you
|
|
----------------------------
|
|
|
|
Ethertap allows you to easily run your own network stack from userspace.
|
|
Tunnels can benefit greatly from this. You can also use it to do network
|
|
experiments. The alternative would be to use a raw socket to send data and
|
|
use libpcap to receive it. Using ethertap saves you this multiplicity and
|
|
also does ARP for you if you want.
|
|
|
|
The more technical blurb:
|
|
|
|
Ethertap provides packet reception and transmission for user space programs.
|
|
It can be viewed as a simple Ethernet device, which instead of receiving
|
|
packets from a network wire, it receives them from user space.
|
|
|
|
Ethertap can be used for anything from AppleTalk to IPX to even building
|
|
bridging tunnels. It also has many other general purpose uses.
|
|
|
|
Configuring your kernel
|
|
-----------------------
|
|
|
|
Firstly, you need this in Networking Options:
|
|
|
|
#
|
|
# Code maturity level options
|
|
#
|
|
CONFIG_EXPERIMENTAL=y
|
|
|
|
Then you need Netlink support:
|
|
|
|
CONFIG_NETLINK=y
|
|
|
|
This allows the kernel to exchange data with userspace applications. There
|
|
are two ways of doing this, the new way works with netlink sockets and I
|
|
have no experience with that yet. ANK uses it in his excellent iproute2
|
|
package, see for example rtmon.c. iproute2 can be found on
|
|
ftp://ftp.tux.org/pub/net/ip-routing/iproute2*
|
|
|
|
The new way is described, partly in netlink(7), available on
|
|
http://www.europe.redhat.com/documentation/man-pages/man7/netlink.7.php3
|
|
|
|
There is also a Netlink-HOWTO, available on http://snafu.freedom.org/linux2.2/docs/netlink-HOWTO.html
|
|
Sadly I know of no code using ethertap with this new interface.
|
|
|
|
The older way works by opening character special files with major node 36.
|
|
Enable this with:
|
|
|
|
CONFIG_NETLINK_DEV=m
|
|
|
|
Please be advised that this support is going to be dropped somewhere in the
|
|
future!
|
|
|
|
Then finally in the Network Devices section,
|
|
|
|
CONFIG_ETHERTAP=m
|
|
|
|
You can include it directly in the kernel if you want, of course, no need
|
|
for modules.
|
|
|
|
Setting it all up
|
|
-----------------
|
|
|
|
First we need to create the /dev/tap0 device node:
|
|
|
|
# mknod /dev/tap0 c 36 16
|
|
# mknod /dev/tap1 c 36 17
|
|
(etc)
|
|
|
|
Include the relevant modules (ethertap.o, netlink_dev.o, perhaps netlink.o),
|
|
and bring up your tap0 device:
|
|
|
|
# ifconfig tap0 10.0.0.123 up
|
|
|
|
Now your device is up and running, you can ping it as well. This is what
|
|
confused me to no end, because nothing is connected to our ethertap as yet,
|
|
how is it that we can ping it?
|
|
|
|
It turns out that the ethertap is just like a regular network interface -
|
|
even when it's down you can ping it. We need to route stuff to it:
|
|
|
|
# route add -host 10.0.0.124 gw 10.0.0.123
|
|
|
|
Now we can read /dev/tap0 and when we ping 10.0.0.124 from our
|
|
localhost, output should appear on the screen.
|
|
|
|
# cat /dev/tap0
|
|
:ßVU:9````````````````````````þýþET@?'
|
|
|
|
|
|
Getting this to work from other hosts
|
|
-------------------------------------
|
|
|
|
For this to work, you often need proxy ARP.
|
|
|
|
# echo 1 > /proc/sys/net/ipv4/conf/eth0/proxy_arp
|
|
|
|
eth0 here stands for the interface that connects to 'other hosts'.
|
|
|
|
Chances are that you are trying this on a non-routing desktop computer, so
|
|
you need to enable ip forwarding:
|
|
|
|
# echo 1 > /proc/sys/net/ipv4/ip_forward
|
|
|
|
You should now be able to ping 10.0.0.124 from other hosts on your
|
|
10.0.0.0/8 subnet. If you are using public ip space, it should work from
|
|
everywhere.
|
|
|
|
ARP
|
|
---
|
|
|
|
If we were to take things very literally, your tcp/ip pseudo stack would
|
|
also have to implement ARP and MAC addresses. This is often a bit silly as
|
|
the ethertap device is a figment of our imagination anyway. However, should
|
|
you want to go 'all the way', you can add the 'arp' flag to ifconfig:
|
|
|
|
# ifconfig tap0 10.0.0.123 up arp
|
|
|
|
This may also be useful when implementing a bridge, which needs to bridge
|
|
ARP packets as well.
|
|
|
|
The sample program below will no longer work then, because it does not
|
|
implement ARP.
|
|
|
|
Sample program
|
|
--------------
|
|
|
|
A sample program is included somewhere in the bowels of the netfilter
|
|
source. I've extracted this program and list it here. It implements a very
|
|
tiny part of the IP stack and can respond to any pings it receives. It gets
|
|
confused if it receives ARP, as it tries to parse it by treating it as an IP
|
|
packet.
|
|
|
|
/* Simple program to listen to /dev/tap0 and reply to pings. */
|
|
#include <fcntl.h>
|
|
#include <netinet/ip.h>
|
|
#include <netinet/ip_icmp.h>
|
|
#if defined(__GLIBC__) && (__GLIBC__ == 2)
|
|
#include <netinet/tcp.h>
|
|
#include <netinet/udp.h>
|
|
#else
|
|
#include <linux/tcp.h>
|
|
#include <linux/udp.h>
|
|
#endif
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <errno.h>
|
|
#include <unistd.h>
|
|
|
|
u_int16_t csum_partial(void *buffer, unsigned int len, u_int16_t prevsum)
|
|
{
|
|
u_int32_t sum = 0;
|
|
u_int16_t *ptr = buffer;
|
|
|
|
while (len > 1) {
|
|
sum += *ptr++;
|
|
len -= 2;
|
|
}
|
|
if (len) {
|
|
union {
|
|
u_int8_t byte;
|
|
u_int16_t wyde;
|
|
} odd;
|
|
odd.wyde = 0;
|
|
odd.byte = *((u_int8_t *)ptr);
|
|
sum += odd.wyde;
|
|
}
|
|
sum = (sum >> 16) + (sum & 0xFFFF);
|
|
sum += prevsum;
|
|
return (sum + (sum >> 16));
|
|
}
|
|
|
|
int main()
|
|
{
|
|
int fd, len;
|
|
union {
|
|
struct {
|
|
char etherhdr[16];
|
|
struct iphdr ip;
|
|
} fmt;
|
|
unsigned char raw[65536];
|
|
} u;
|
|
|
|
fd = open("/dev/tap0", O_RDWR);
|
|
if (fd < 0) {
|
|
perror("Opening `/dev/tap0'");
|
|
return 1;
|
|
}
|
|
|
|
/* u.fmt.ip.ihl in host order! Film at 11. */
|
|
while ((len = read(fd, &u, sizeof(u))) > 0) {
|
|
u_int32_t tmp;
|
|
struct icmphdr *icmp
|
|
= (void *)((u_int32_t *)&u.fmt.ip + u.fmt.ip.ihl );
|
|
struct tcphdr *tcp = (void *)icmp;
|
|
struct udphdr *udp = (void *)icmp;
|
|
|
|
fprintf(stderr, "SRC = %u.%u.%u.%u DST = %u.%u.%u.%u\n",
|
|
(ntohl(u.fmt.ip.saddr) >> 24) & 0xFF,
|
|
(ntohl(u.fmt.ip.saddr) >> 16) & 0xFF,
|
|
(ntohl(u.fmt.ip.saddr) >> 8) & 0xFF,
|
|
(ntohl(u.fmt.ip.saddr) >> 0) & 0xFF,
|
|
(ntohl(u.fmt.ip.daddr) >> 24) & 0xFF,
|
|
(ntohl(u.fmt.ip.daddr) >> 16) & 0xFF,
|
|
(ntohl(u.fmt.ip.daddr) >> 8) & 0xFF,
|
|
(ntohl(u.fmt.ip.daddr) >> 0) & 0xFF);
|
|
|
|
switch (u.fmt.ip.protocol) {
|
|
case IPPROTO_ICMP:
|
|
if (icmp->type == ICMP_ECHO) {
|
|
fprintf(stderr, "PONG! (iphdr = %u bytes)\n",
|
|
(unsigned int)((char *)icmp
|
|
- (char *)&u.fmt.ip));
|
|
|
|
/* Turn it around */
|
|
tmp = u.fmt.ip.saddr;
|
|
u.fmt.ip.saddr = u.fmt.ip.daddr;
|
|
u.fmt.ip.daddr = tmp;
|
|
|
|
icmp->type = ICMP_ECHOREPLY;
|
|
icmp->checksum = 0;
|
|
icmp->checksum
|
|
= ~csum_partial(icmp,
|
|
ntohs(u.fmt.ip.tot_len)
|
|
- u.fmt.ip.ihl*4, 0);
|
|
|
|
{
|
|
unsigned int i;
|
|
for (i = 44;
|
|
i < ntohs(u.fmt.ip.tot_len); i++){
|
|
printf("%u:0x%02X ", i,
|
|
((unsigned char *)
|
|
&u.fmt.ip)[i]);
|
|
}
|
|
printf("\n");
|
|
}
|
|
write(fd, &u, len);
|
|
}
|
|
break;
|
|
case IPPROTO_TCP:
|
|
fprintf(stderr, "TCP: %u -> %u\n", ntohs(tcp->source),
|
|
ntohs(tcp->dest));
|
|
break;
|
|
|
|
case IPPROTO_UDP:
|
|
fprintf(stderr, "UDP: %u -> %u\n", ntohs(udp->source),
|
|
ntohs(udp->dest));
|
|
break;
|
|
}
|
|
}
|
|
if (len < 0)
|
|
perror("Reading from `/dev/tap0'");
|
|
else fprintf(stderr, "Empty read from `/dev/tap0'");
|
|
return len < 0 ? 1 : 0;
|
|
}
|
|
|