(2006-08-06) rescue-bootcd

This commit is contained in:
2006-08-06 00:00:00 +02:00
parent 2f796b816a
commit decb062d20
21091 changed files with 7076462 additions and 0 deletions

View File

@@ -0,0 +1,17 @@
#
# Makefile for the Linux networking core.
#
obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o gen_stats.o gen_estimator.o
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += flow.o dev.o ethtool.o dev_mcast.o dst.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o
obj-$(CONFIG_SYSFS) += net-sysfs.o
obj-$(CONFIG_NETFILTER) += netfilter.o
obj-$(CONFIG_NET_DIVERT) += dv.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NET_RADIO) += wireless.o
obj-$(CONFIG_NETPOLL) += netpoll.o

View File

@@ -0,0 +1,495 @@
/*
* SUCS NET3:
*
* Generic datagram handling routines. These are generic for all
* protocols. Possibly a generic IP version on top of these would
* make sense. Not tonight however 8-).
* This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
* NetROM layer all have identical poll code and mostly
* identical recvmsg() code. So we share it here. The poll was
* shared before but buried in udp.c so I moved it.
*
* Authors: Alan Cox <alan@redhat.com>. (datagram_poll() from old
* udp.c code)
*
* Fixes:
* Alan Cox : NULL return from skb_peek_copy()
* understood
* Alan Cox : Rewrote skb_read_datagram to avoid the
* skb_peek_copy stuff.
* Alan Cox : Added support for SOCK_SEQPACKET.
* IPX can no longer use the SO_TYPE hack
* but AX.25 now works right, and SPX is
* feasible.
* Alan Cox : Fixed write poll of non IP protocol
* crash.
* Florian La Roche: Changed for my new skbuff handling.
* Darryl Miles : Fixed non-blocking SOCK_SEQPACKET.
* Linus Torvalds : BSD semantic fixes.
* Alan Cox : Datagram iovec handling
* Darryl Miles : Fixed non-blocking SOCK_STREAM.
* Alan Cox : POSIXisms
* Pete Wyckoff : Unconnected accept() fix.
*
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/inet.h>
#include <linux/tcp.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/poll.h>
#include <linux/highmem.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/checksum.h>
/*
* Is a socket 'connection oriented' ?
*/
static inline int connection_based(struct sock *sk)
{
return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
}
/*
* Wait for a packet..
*/
static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
{
int error;
DEFINE_WAIT(wait);
prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
/* Socket errors? */
error = sock_error(sk);
if (error)
goto out_err;
if (!skb_queue_empty(&sk->sk_receive_queue))
goto out;
/* Socket shut down? */
if (sk->sk_shutdown & RCV_SHUTDOWN)
goto out_noerr;
/* Sequenced packets can come disconnected.
* If so we report the problem
*/
error = -ENOTCONN;
if (connection_based(sk) &&
!(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
goto out_err;
/* handle signals */
if (signal_pending(current))
goto interrupted;
error = 0;
*timeo_p = schedule_timeout(*timeo_p);
out:
finish_wait(sk->sk_sleep, &wait);
return error;
interrupted:
error = sock_intr_errno(*timeo_p);
out_err:
*err = error;
goto out;
out_noerr:
*err = 0;
error = 1;
goto out;
}
/**
* skb_recv_datagram - Receive a datagram skbuff
* @sk - socket
* @flags - MSG_ flags
* @noblock - blocking operation?
* @err - error code returned
*
* Get a datagram skbuff, understands the peeking, nonblocking wakeups
* and possible races. This replaces identical code in packet, raw and
* udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
* the long standing peek and read race for datagram sockets. If you
* alter this routine remember it must be re-entrant.
*
* This function will lock the socket if a skb is returned, so the caller
* needs to unlock the socket in that case (usually by calling
* skb_free_datagram)
*
* * It does not lock socket since today. This function is
* * free of race conditions. This measure should/can improve
* * significantly datagram socket latencies at high loads,
* * when data copying to user space takes lots of time.
* * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
* * 8) Great win.)
* * --ANK (980729)
*
* The order of the tests when we find no data waiting are specified
* quite explicitly by POSIX 1003.1g, don't change them without having
* the standard around please.
*/
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
int noblock, int *err)
{
struct sk_buff *skb;
long timeo;
/*
* Caller is allowed not to check sk->sk_err before skb_recv_datagram()
*/
int error = sock_error(sk);
if (error)
goto no_packet;
timeo = sock_rcvtimeo(sk, noblock);
do {
/* Again only user level code calls this function, so nothing
* interrupt level will suddenly eat the receive_queue.
*
* Look at current nfs client by the way...
* However, this function was corrent in any case. 8)
*/
if (flags & MSG_PEEK) {
unsigned long cpu_flags;
spin_lock_irqsave(&sk->sk_receive_queue.lock,
cpu_flags);
skb = skb_peek(&sk->sk_receive_queue);
if (skb)
atomic_inc(&skb->users);
spin_unlock_irqrestore(&sk->sk_receive_queue.lock,
cpu_flags);
} else
skb = skb_dequeue(&sk->sk_receive_queue);
if (skb)
return skb;
/* User doesn't want to wait */
error = -EAGAIN;
if (!timeo)
goto no_packet;
} while (!wait_for_packet(sk, err, &timeo));
return NULL;
no_packet:
*err = error;
return NULL;
}
void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
{
kfree_skb(skb);
}
/*
* Copy a datagram to a linear buffer.
*/
int skb_copy_datagram(const struct sk_buff *skb, int offset, char __user *to, int size)
{
struct iovec iov = {
.iov_base = to,
.iov_len =size,
};
return skb_copy_datagram_iovec(skb, offset, &iov, size);
}
/**
* skb_copy_datagram_iovec - Copy a datagram to an iovec.
* @skb - buffer to copy
* @offset - offset in the buffer to start copying from
* @iovec - io vector to copy to
* @len - amount of data to copy from buffer to iovec
*
* Note: the iovec is modified during the copy.
*/
int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
struct iovec *to, int len)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
/* Copy header. */
if (copy > 0) {
if (copy > len)
copy = len;
if (memcpy_toiovec(to, skb->data + offset, copy))
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
}
/* Copy paged appendix. Hmm... why does this look so complicated? */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
BUG_TRAP(start <= offset + len);
end = start + skb_shinfo(skb)->frags[i].size;
if ((copy = end - offset) > 0) {
int err;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
struct page *page = frag->page;
if (copy > len)
copy = len;
vaddr = kmap(page);
err = memcpy_toiovec(to, vaddr + frag->page_offset +
offset - start, copy);
kunmap(page);
if (err)
goto fault;
if (!(len -= copy))
return 0;
offset += copy;
}
start = end;
}
if (skb_shinfo(skb)->frag_list) {
struct sk_buff *list = skb_shinfo(skb)->frag_list;
for (; list; list = list->next) {
int end;
BUG_TRAP(start <= offset + len);
end = start + list->len;
if ((copy = end - offset) > 0) {
if (copy > len)
copy = len;
if (skb_copy_datagram_iovec(list,
offset - start,
to, copy))
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
}
start = end;
}
}
if (!len)
return 0;
fault:
return -EFAULT;
}
int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
u8 __user *to, int len, unsigned int *csump)
{
int start = skb_headlen(skb);
int pos = 0;
int i, copy = start - offset;
/* Copy header. */
if (copy > 0) {
int err = 0;
if (copy > len)
copy = len;
*csump = csum_and_copy_to_user(skb->data + offset, to, copy,
*csump, &err);
if (err)
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
to += copy;
pos = copy;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
BUG_TRAP(start <= offset + len);
end = start + skb_shinfo(skb)->frags[i].size;
if ((copy = end - offset) > 0) {
unsigned int csum2;
int err = 0;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
struct page *page = frag->page;
if (copy > len)
copy = len;
vaddr = kmap(page);
csum2 = csum_and_copy_to_user(vaddr +
frag->page_offset +
offset - start,
to, copy, 0, &err);
kunmap(page);
if (err)
goto fault;
*csump = csum_block_add(*csump, csum2, pos);
if (!(len -= copy))
return 0;
offset += copy;
to += copy;
pos += copy;
}
start = end;
}
if (skb_shinfo(skb)->frag_list) {
struct sk_buff *list = skb_shinfo(skb)->frag_list;
for (; list; list=list->next) {
int end;
BUG_TRAP(start <= offset + len);
end = start + list->len;
if ((copy = end - offset) > 0) {
unsigned int csum2 = 0;
if (copy > len)
copy = len;
if (skb_copy_and_csum_datagram(list,
offset - start,
to, copy,
&csum2))
goto fault;
*csump = csum_block_add(*csump, csum2, pos);
if ((len -= copy) == 0)
return 0;
offset += copy;
to += copy;
pos += copy;
}
start = end;
}
}
if (!len)
return 0;
fault:
return -EFAULT;
}
/**
* skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
* @skb - skbuff
* @hlen - hardware length
* @iovec - io vector
*
* Caller _must_ check that skb will fit to this iovec.
*
* Returns: 0 - success.
* -EINVAL - checksum failure.
* -EFAULT - fault during copy. Beware, in this case iovec
* can be modified!
*/
int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb,
int hlen, struct iovec *iov)
{
unsigned int csum;
int chunk = skb->len - hlen;
/* Skip filled elements.
* Pretty silly, look at memcpy_toiovec, though 8)
*/
while (!iov->iov_len)
iov++;
if (iov->iov_len < chunk) {
if ((unsigned short)csum_fold(skb_checksum(skb, 0, chunk + hlen,
skb->csum)))
goto csum_error;
if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
goto fault;
} else {
csum = csum_partial(skb->data, hlen, skb->csum);
if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
chunk, &csum))
goto fault;
if ((unsigned short)csum_fold(csum))
goto csum_error;
iov->iov_len -= chunk;
iov->iov_base += chunk;
}
return 0;
csum_error:
return -EINVAL;
fault:
return -EFAULT;
}
/**
* datagram_poll - generic datagram poll
* @file - file struct
* @sock - socket
* @wait - poll table
*
* Datagram poll: Again totally generic. This also handles
* sequenced packet sockets providing the socket receive queue
* is only ever holding data ready to receive.
*
* Note: when you _don't_ use this routine for this protocol,
* and you use a different write policy from sock_writeable()
* then please supply your own write_space callback.
*/
unsigned int datagram_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
unsigned int mask;
poll_wait(file, sk->sk_sleep, wait);
mask = 0;
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
mask |= POLLERR;
if (sk->sk_shutdown == SHUTDOWN_MASK)
mask |= POLLHUP;
/* readable? */
if (!skb_queue_empty(&sk->sk_receive_queue) ||
(sk->sk_shutdown & RCV_SHUTDOWN))
mask |= POLLIN | POLLRDNORM;
/* Connection-based need to check for termination and startup */
if (connection_based(sk)) {
if (sk->sk_state == TCP_CLOSE)
mask |= POLLHUP;
/* connection hasn't started yet? */
if (sk->sk_state == TCP_SYN_SENT)
return mask;
}
/* writable? */
if (sock_writeable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
return mask;
}
EXPORT_SYMBOL(datagram_poll);
EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
EXPORT_SYMBOL(skb_copy_datagram);
EXPORT_SYMBOL(skb_copy_datagram_iovec);
EXPORT_SYMBOL(skb_free_datagram);
EXPORT_SYMBOL(skb_recv_datagram);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,299 @@
/*
* Linux NET3: Multicast List maintenance.
*
* Authors:
* Tim Kordas <tjk@nostromo.eeap.cwru.edu>
* Richard Underwood <richard@wuzz.demon.co.uk>
*
* Stir fried together from the IP multicast and CAP patches above
* Alan Cox <Alan.Cox@linux.org>
*
* Fixes:
* Alan Cox : Update the device on a real delete
* rather than any time but...
* Alan Cox : IFF_ALLMULTI support.
* Alan Cox : New format set_multicast_list() calls.
* Gleb Natapov : Remove dev_mc_lock.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
#include <net/ip.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/arp.h>
/*
* Device multicast list maintenance.
*
* This is used both by IP and by the user level maintenance functions.
* Unlike BSD we maintain a usage count on a given multicast address so
* that a casual user application can add/delete multicasts used by
* protocols without doing damage to the protocols when it deletes the
* entries. It also helps IP as it tracks overlapping maps.
*
* Device mc lists are changed by bh at least if IPv6 is enabled,
* so that it must be bh protected.
*
* We block accesses to device mc filters with dev->xmit_lock.
*/
/*
* Update the multicast list into the physical NIC controller.
*/
static void __dev_mc_upload(struct net_device *dev)
{
/* Don't do anything till we up the interface
* [dev_open will call this function so the list will
* stay sane]
*/
if (!(dev->flags&IFF_UP))
return;
/*
* Devices with no set multicast or which have been
* detached don't get set.
*/
if (dev->set_multicast_list == NULL ||
!netif_device_present(dev))
return;
dev->set_multicast_list(dev);
}
void dev_mc_upload(struct net_device *dev)
{
spin_lock_bh(&dev->xmit_lock);
__dev_mc_upload(dev);
spin_unlock_bh(&dev->xmit_lock);
}
/*
* Delete a device level multicast
*/
int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
{
int err = 0;
struct dev_mc_list *dmi, **dmip;
spin_lock_bh(&dev->xmit_lock);
for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) {
/*
* Find the entry we want to delete. The device could
* have variable length entries so check these too.
*/
if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
alen == dmi->dmi_addrlen) {
if (glbl) {
int old_glbl = dmi->dmi_gusers;
dmi->dmi_gusers = 0;
if (old_glbl == 0)
break;
}
if (--dmi->dmi_users)
goto done;
/*
* Last user. So delete the entry.
*/
*dmip = dmi->next;
dev->mc_count--;
kfree(dmi);
/*
* We have altered the list, so the card
* loaded filter is now wrong. Fix it
*/
__dev_mc_upload(dev);
spin_unlock_bh(&dev->xmit_lock);
return 0;
}
}
err = -ENOENT;
done:
spin_unlock_bh(&dev->xmit_lock);
return err;
}
/*
* Add a device level multicast
*/
int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
{
int err = 0;
struct dev_mc_list *dmi, *dmi1;
dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC);
spin_lock_bh(&dev->xmit_lock);
for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
dmi->dmi_addrlen == alen) {
if (glbl) {
int old_glbl = dmi->dmi_gusers;
dmi->dmi_gusers = 1;
if (old_glbl)
goto done;
}
dmi->dmi_users++;
goto done;
}
}
if ((dmi = dmi1) == NULL) {
spin_unlock_bh(&dev->xmit_lock);
return -ENOMEM;
}
memcpy(dmi->dmi_addr, addr, alen);
dmi->dmi_addrlen = alen;
dmi->next = dev->mc_list;
dmi->dmi_users = 1;
dmi->dmi_gusers = glbl ? 1 : 0;
dev->mc_list = dmi;
dev->mc_count++;
__dev_mc_upload(dev);
spin_unlock_bh(&dev->xmit_lock);
return 0;
done:
spin_unlock_bh(&dev->xmit_lock);
if (dmi1)
kfree(dmi1);
return err;
}
/*
* Discard multicast list when a device is downed
*/
void dev_mc_discard(struct net_device *dev)
{
spin_lock_bh(&dev->xmit_lock);
while (dev->mc_list != NULL) {
struct dev_mc_list *tmp = dev->mc_list;
dev->mc_list = tmp->next;
if (tmp->dmi_users > tmp->dmi_gusers)
printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users);
kfree(tmp);
}
dev->mc_count = 0;
spin_unlock_bh(&dev->xmit_lock);
}
#ifdef CONFIG_PROC_FS
static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
{
struct net_device *dev;
loff_t off = 0;
read_lock(&dev_base_lock);
for (dev = dev_base; dev; dev = dev->next) {
if (off++ == *pos)
return dev;
}
return NULL;
}
static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct net_device *dev = v;
++*pos;
return dev->next;
}
static void dev_mc_seq_stop(struct seq_file *seq, void *v)
{
read_unlock(&dev_base_lock);
}
static int dev_mc_seq_show(struct seq_file *seq, void *v)
{
struct dev_mc_list *m;
struct net_device *dev = v;
spin_lock_bh(&dev->xmit_lock);
for (m = dev->mc_list; m; m = m->next) {
int i;
seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
dev->name, m->dmi_users, m->dmi_gusers);
for (i = 0; i < m->dmi_addrlen; i++)
seq_printf(seq, "%02x", m->dmi_addr[i]);
seq_putc(seq, '\n');
}
spin_unlock_bh(&dev->xmit_lock);
return 0;
}
static struct seq_operations dev_mc_seq_ops = {
.start = dev_mc_seq_start,
.next = dev_mc_seq_next,
.stop = dev_mc_seq_stop,
.show = dev_mc_seq_show,
};
static int dev_mc_seq_open(struct inode *inode, struct file *file)
{
return seq_open(file, &dev_mc_seq_ops);
}
static struct file_operations dev_mc_seq_fops = {
.owner = THIS_MODULE,
.open = dev_mc_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
#endif
void __init dev_mcast_init(void)
{
proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops);
}
EXPORT_SYMBOL(dev_mc_add);
EXPORT_SYMBOL(dev_mc_delete);
EXPORT_SYMBOL(dev_mc_upload);

View File

@@ -0,0 +1,278 @@
/*
* net/core/dst.c Protocol independent destination cache.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*/
#include <linux/bitops.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/sched.h>
#include <linux/skbuff.h>
#include <linux/string.h>
#include <linux/types.h>
#include <net/dst.h>
/* Locking strategy:
* 1) Garbage collection state of dead destination cache
* entries is protected by dst_lock.
* 2) GC is run only from BH context, and is the only remover
* of entries.
* 3) Entries are added to the garbage list from both BH
* and non-BH context, so local BH disabling is needed.
* 4) All operations modify state, so a spinlock is used.
*/
static struct dst_entry *dst_garbage_list;
#if RT_CACHE_DEBUG >= 2
static atomic_t dst_total = ATOMIC_INIT(0);
#endif
static spinlock_t dst_lock = SPIN_LOCK_UNLOCKED;
static unsigned long dst_gc_timer_expires;
static unsigned long dst_gc_timer_inc = DST_GC_MAX;
static void dst_run_gc(unsigned long);
static void ___dst_free(struct dst_entry * dst);
static struct timer_list dst_gc_timer =
TIMER_INITIALIZER(dst_run_gc, DST_GC_MIN, 0);
static void dst_run_gc(unsigned long dummy)
{
int delayed = 0;
struct dst_entry * dst, **dstp;
if (!spin_trylock(&dst_lock)) {
mod_timer(&dst_gc_timer, jiffies + HZ/10);
return;
}
del_timer(&dst_gc_timer);
dstp = &dst_garbage_list;
while ((dst = *dstp) != NULL) {
if (atomic_read(&dst->__refcnt)) {
dstp = &dst->next;
delayed++;
continue;
}
*dstp = dst->next;
dst = dst_destroy(dst);
if (dst) {
/* NOHASH and still referenced. Unless it is already
* on gc list, invalidate it and add to gc list.
*
* Note: this is temporary. Actually, NOHASH dst's
* must be obsoleted when parent is obsoleted.
* But we do not have state "obsoleted, but
* referenced by parent", so it is right.
*/
if (dst->obsolete > 1)
continue;
___dst_free(dst);
dst->next = *dstp;
*dstp = dst;
dstp = &dst->next;
}
}
if (!dst_garbage_list) {
dst_gc_timer_inc = DST_GC_MAX;
goto out;
}
if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
dst_gc_timer_expires = DST_GC_MAX;
dst_gc_timer_inc += DST_GC_INC;
dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
#if RT_CACHE_DEBUG >= 2
printk("dst_total: %d/%d %ld\n",
atomic_read(&dst_total), delayed, dst_gc_timer_expires);
#endif
add_timer(&dst_gc_timer);
out:
spin_unlock(&dst_lock);
}
static int dst_discard_in(struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
static int dst_discard_out(struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
void * dst_alloc(struct dst_ops * ops)
{
struct dst_entry * dst;
if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
if (ops->gc())
return NULL;
}
dst = kmem_cache_alloc(ops->kmem_cachep, SLAB_ATOMIC);
if (!dst)
return NULL;
memset(dst, 0, ops->entry_size);
atomic_set(&dst->__refcnt, 0);
dst->ops = ops;
dst->lastuse = jiffies;
dst->path = dst;
dst->input = dst_discard_in;
dst->output = dst_discard_out;
#if RT_CACHE_DEBUG >= 2
atomic_inc(&dst_total);
#endif
atomic_inc(&ops->entries);
return dst;
}
static void ___dst_free(struct dst_entry * dst)
{
/* The first case (dev==NULL) is required, when
protocol module is unloaded.
*/
if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
dst->input = dst_discard_in;
dst->output = dst_discard_out;
}
dst->obsolete = 2;
}
void __dst_free(struct dst_entry * dst)
{
spin_lock_bh(&dst_lock);
___dst_free(dst);
dst->next = dst_garbage_list;
dst_garbage_list = dst;
if (dst_gc_timer_inc > DST_GC_INC) {
dst_gc_timer_inc = DST_GC_INC;
dst_gc_timer_expires = DST_GC_MIN;
mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
}
spin_unlock_bh(&dst_lock);
}
struct dst_entry *dst_destroy(struct dst_entry * dst)
{
struct dst_entry *child;
struct neighbour *neigh;
struct hh_cache *hh;
again:
neigh = dst->neighbour;
hh = dst->hh;
child = dst->child;
dst->hh = NULL;
if (hh && atomic_dec_and_test(&hh->hh_refcnt))
kfree(hh);
if (neigh) {
dst->neighbour = NULL;
neigh_release(neigh);
}
atomic_dec(&dst->ops->entries);
if (dst->ops->destroy)
dst->ops->destroy(dst);
if (dst->dev)
dev_put(dst->dev);
#if RT_CACHE_DEBUG >= 2
atomic_dec(&dst_total);
#endif
kmem_cache_free(dst->ops->kmem_cachep, dst);
dst = child;
if (dst) {
if (atomic_dec_and_test(&dst->__refcnt)) {
/* We were real parent of this dst, so kill child. */
if (dst->flags&DST_NOHASH)
goto again;
} else {
/* Child is still referenced, return it for freeing. */
if (dst->flags&DST_NOHASH)
return dst;
/* Child is still in his hash table */
}
}
return NULL;
}
/* Dirty hack. We did it in 2.2 (in __dst_free),
* we have _very_ good reasons not to repeat
* this mistake in 2.3, but we have no choice
* now. _It_ _is_ _explicit_ _deliberate_
* _race_ _condition_.
*
* Commented and originally written by Alexey.
*/
static void dst_ifdown(struct dst_entry *dst, int unregister)
{
struct net_device *dev = dst->dev;
if (!unregister) {
dst->input = dst_discard_in;
dst->output = dst_discard_out;
}
do {
if (unregister) {
dst->dev = &loopback_dev;
dev_hold(&loopback_dev);
dev_put(dev);
if (dst->neighbour && dst->neighbour->dev == dev) {
dst->neighbour->dev = &loopback_dev;
dev_put(dev);
dev_hold(&loopback_dev);
}
}
if (dst->ops->ifdown)
dst->ops->ifdown(dst, unregister);
} while ((dst = dst->child) && dst->flags & DST_NOHASH &&
dst->dev == dev);
}
static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
struct dst_entry *dst;
switch (event) {
case NETDEV_UNREGISTER:
case NETDEV_DOWN:
spin_lock_bh(&dst_lock);
for (dst = dst_garbage_list; dst; dst = dst->next) {
if (dst->dev == dev)
dst_ifdown(dst, event != NETDEV_DOWN);
}
spin_unlock_bh(&dst_lock);
break;
}
return NOTIFY_DONE;
}
struct notifier_block dst_dev_notifier = {
.notifier_call = dst_dev_event,
};
void __init dst_init(void)
{
register_netdevice_notifier(&dst_dev_notifier);
}
EXPORT_SYMBOL(__dst_free);
EXPORT_SYMBOL(dst_alloc);
EXPORT_SYMBOL(dst_destroy);

View File

@@ -0,0 +1,555 @@
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Generic frame diversion
*
* Authors:
* Benoit LOCHER: initial integration within the kernel with support for ethernet
* Dave Miller: improvement on the code (correctness, performance and source files)
*
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <net/dst.h>
#include <net/arp.h>
#include <net/sock.h>
#include <net/ipv6.h>
#include <net/ip.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/checksum.h>
#include <linux/divert.h>
#include <linux/sockios.h>
const char sysctl_divert_version[32]="0.46"; /* Current version */
static int __init dv_init(void)
{
return 0;
}
module_init(dv_init);
/*
* Allocate a divert_blk for a device. This must be an ethernet nic.
*/
int alloc_divert_blk(struct net_device *dev)
{
int alloc_size = (sizeof(struct divert_blk) + 3) & ~3;
if (dev->type == ARPHRD_ETHER) {
printk(KERN_DEBUG "divert: allocating divert_blk for %s\n",
dev->name);
dev->divert = (struct divert_blk *)
kmalloc(alloc_size, GFP_KERNEL);
if (dev->divert == NULL) {
printk(KERN_DEBUG "divert: unable to allocate divert_blk for %s\n",
dev->name);
return -ENOMEM;
} else {
memset(dev->divert, 0, sizeof(struct divert_blk));
}
dev_hold(dev);
} else {
printk(KERN_DEBUG "divert: not allocating divert_blk for non-ethernet device %s\n",
dev->name);
dev->divert = NULL;
}
return 0;
}
/*
* Free a divert_blk allocated by the above function, if it was
* allocated on that device.
*/
void free_divert_blk(struct net_device *dev)
{
if (dev->divert) {
kfree(dev->divert);
dev->divert=NULL;
dev_put(dev);
printk(KERN_DEBUG "divert: freeing divert_blk for %s\n",
dev->name);
} else {
printk(KERN_DEBUG "divert: no divert_blk to free, %s not ethernet\n",
dev->name);
}
}
/*
* Adds a tcp/udp (source or dest) port to an array
*/
static int add_port(u16 ports[], u16 port)
{
int i;
if (port == 0)
return -EINVAL;
/* Storing directly in network format for performance,
* thanks Dave :)
*/
port = htons(port);
for (i = 0; i < MAX_DIVERT_PORTS; i++) {
if (ports[i] == port)
return -EALREADY;
}
for (i = 0; i < MAX_DIVERT_PORTS; i++) {
if (ports[i] == 0) {
ports[i] = port;
return 0;
}
}
return -ENOBUFS;
}
/*
* Removes a port from an array tcp/udp (source or dest)
*/
static int remove_port(u16 ports[], u16 port)
{
int i;
if (port == 0)
return -EINVAL;
/* Storing directly in network format for performance,
* thanks Dave !
*/
port = htons(port);
for (i = 0; i < MAX_DIVERT_PORTS; i++) {
if (ports[i] == port) {
ports[i] = 0;
return 0;
}
}
return -EINVAL;
}
/* Some basic sanity checks on the arguments passed to divert_ioctl() */
static int check_args(struct divert_cf *div_cf, struct net_device **dev)
{
char devname[32];
int ret;
if (dev == NULL)
return -EFAULT;
/* GETVERSION: all other args are unused */
if (div_cf->cmd == DIVCMD_GETVERSION)
return 0;
/* Network device index should reasonably be between 0 and 1000 :) */
if (div_cf->dev_index < 0 || div_cf->dev_index > 1000)
return -EINVAL;
/* Let's try to find the ifname */
sprintf(devname, "eth%d", div_cf->dev_index);
*dev = dev_get_by_name(devname);
/* dev should NOT be null */
if (*dev == NULL)
return -EINVAL;
ret = 0;
/* user issuing the ioctl must be a super one :) */
if (!capable(CAP_SYS_ADMIN)) {
ret = -EPERM;
goto out;
}
/* Device must have a divert_blk member NOT null */
if ((*dev)->divert == NULL)
ret = -EINVAL;
out:
dev_put(*dev);
return ret;
}
/*
* control function of the diverter
*/
#define DVDBG(a) \
printk(KERN_DEBUG "divert_ioctl() line %d %s\n", __LINE__, (a))
int divert_ioctl(unsigned int cmd, struct divert_cf __user *arg)
{
struct divert_cf div_cf;
struct divert_blk *div_blk;
struct net_device *dev;
int ret;
switch (cmd) {
case SIOCGIFDIVERT:
DVDBG("SIOCGIFDIVERT, copy_from_user");
if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf)))
return -EFAULT;
DVDBG("before check_args");
ret = check_args(&div_cf, &dev);
if (ret)
return ret;
DVDBG("after checkargs");
div_blk = dev->divert;
DVDBG("befre switch()");
switch (div_cf.cmd) {
case DIVCMD_GETSTATUS:
/* Now, just give the user the raw divert block
* for him to play with :)
*/
if (copy_to_user(div_cf.arg1.ptr, dev->divert,
sizeof(struct divert_blk)))
return -EFAULT;
break;
case DIVCMD_GETVERSION:
DVDBG("GETVERSION: checking ptr");
if (div_cf.arg1.ptr == NULL)
return -EINVAL;
DVDBG("GETVERSION: copying data to userland");
if (copy_to_user(div_cf.arg1.ptr,
sysctl_divert_version, 32))
return -EFAULT;
DVDBG("GETVERSION: data copied");
break;
default:
return -EINVAL;
}
break;
case SIOCSIFDIVERT:
if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf)))
return -EFAULT;
ret = check_args(&div_cf, &dev);
if (ret)
return ret;
div_blk = dev->divert;
switch(div_cf.cmd) {
case DIVCMD_RESET:
div_blk->divert = 0;
div_blk->protos = DIVERT_PROTO_NONE;
memset(div_blk->tcp_dst, 0,
MAX_DIVERT_PORTS * sizeof(u16));
memset(div_blk->tcp_src, 0,
MAX_DIVERT_PORTS * sizeof(u16));
memset(div_blk->udp_dst, 0,
MAX_DIVERT_PORTS * sizeof(u16));
memset(div_blk->udp_src, 0,
MAX_DIVERT_PORTS * sizeof(u16));
return 0;
case DIVCMD_DIVERT:
switch(div_cf.arg1.int32) {
case DIVARG1_ENABLE:
if (div_blk->divert)
return -EALREADY;
div_blk->divert = 1;
break;
case DIVARG1_DISABLE:
if (!div_blk->divert)
return -EALREADY;
div_blk->divert = 0;
break;
default:
return -EINVAL;
}
break;
case DIVCMD_IP:
switch(div_cf.arg1.int32) {
case DIVARG1_ENABLE:
if (div_blk->protos & DIVERT_PROTO_IP)
return -EALREADY;
div_blk->protos |= DIVERT_PROTO_IP;
break;
case DIVARG1_DISABLE:
if (!(div_blk->protos & DIVERT_PROTO_IP))
return -EALREADY;
div_blk->protos &= ~DIVERT_PROTO_IP;
break;
default:
return -EINVAL;
}
break;
case DIVCMD_TCP:
switch(div_cf.arg1.int32) {
case DIVARG1_ENABLE:
if (div_blk->protos & DIVERT_PROTO_TCP)
return -EALREADY;
div_blk->protos |= DIVERT_PROTO_TCP;
break;
case DIVARG1_DISABLE:
if (!(div_blk->protos & DIVERT_PROTO_TCP))
return -EALREADY;
div_blk->protos &= ~DIVERT_PROTO_TCP;
break;
default:
return -EINVAL;
}
break;
case DIVCMD_TCPDST:
switch(div_cf.arg1.int32) {
case DIVARG1_ADD:
return add_port(div_blk->tcp_dst,
div_cf.arg2.uint16);
case DIVARG1_REMOVE:
return remove_port(div_blk->tcp_dst,
div_cf.arg2.uint16);
default:
return -EINVAL;
}
break;
case DIVCMD_TCPSRC:
switch(div_cf.arg1.int32) {
case DIVARG1_ADD:
return add_port(div_blk->tcp_src,
div_cf.arg2.uint16);
case DIVARG1_REMOVE:
return remove_port(div_blk->tcp_src,
div_cf.arg2.uint16);
default:
return -EINVAL;
}
break;
case DIVCMD_UDP:
switch(div_cf.arg1.int32) {
case DIVARG1_ENABLE:
if (div_blk->protos & DIVERT_PROTO_UDP)
return -EALREADY;
div_blk->protos |= DIVERT_PROTO_UDP;
break;
case DIVARG1_DISABLE:
if (!(div_blk->protos & DIVERT_PROTO_UDP))
return -EALREADY;
div_blk->protos &= ~DIVERT_PROTO_UDP;
break;
default:
return -EINVAL;
}
break;
case DIVCMD_UDPDST:
switch(div_cf.arg1.int32) {
case DIVARG1_ADD:
return add_port(div_blk->udp_dst,
div_cf.arg2.uint16);
case DIVARG1_REMOVE:
return remove_port(div_blk->udp_dst,
div_cf.arg2.uint16);
default:
return -EINVAL;
}
break;
case DIVCMD_UDPSRC:
switch(div_cf.arg1.int32) {
case DIVARG1_ADD:
return add_port(div_blk->udp_src,
div_cf.arg2.uint16);
case DIVARG1_REMOVE:
return remove_port(div_blk->udp_src,
div_cf.arg2.uint16);
default:
return -EINVAL;
}
break;
case DIVCMD_ICMP:
switch(div_cf.arg1.int32) {
case DIVARG1_ENABLE:
if (div_blk->protos & DIVERT_PROTO_ICMP)
return -EALREADY;
div_blk->protos |= DIVERT_PROTO_ICMP;
break;
case DIVARG1_DISABLE:
if (!(div_blk->protos & DIVERT_PROTO_ICMP))
return -EALREADY;
div_blk->protos &= ~DIVERT_PROTO_ICMP;
break;
default:
return -EINVAL;
}
break;
default:
return -EINVAL;
}
break;
default:
return -EINVAL;
}
return 0;
}
/*
* Check if packet should have its dest mac address set to the box itself
* for diversion
*/
#define ETH_DIVERT_FRAME(skb) \
memcpy(eth_hdr(skb), skb->dev->dev_addr, ETH_ALEN); \
skb->pkt_type=PACKET_HOST
void divert_frame(struct sk_buff *skb)
{
struct ethhdr *eth = eth_hdr(skb);
struct iphdr *iph;
struct tcphdr *tcph;
struct udphdr *udph;
struct divert_blk *divert = skb->dev->divert;
int i, src, dst;
unsigned char *skb_data_end = skb->data + skb->len;
/* Packet is already aimed at us, return */
if (!memcmp(eth, skb->dev->dev_addr, ETH_ALEN))
return;
/* proto is not IP, do nothing */
if (eth->h_proto != htons(ETH_P_IP))
return;
/* Divert all IP frames ? */
if (divert->protos & DIVERT_PROTO_IP) {
ETH_DIVERT_FRAME(skb);
return;
}
/* Check for possible (maliciously) malformed IP frame (thanks Dave) */
iph = (struct iphdr *) skb->data;
if (((iph->ihl<<2)+(unsigned char*)(iph)) >= skb_data_end) {
printk(KERN_INFO "divert: malformed IP packet !\n");
return;
}
switch (iph->protocol) {
/* Divert all ICMP frames ? */
case IPPROTO_ICMP:
if (divert->protos & DIVERT_PROTO_ICMP) {
ETH_DIVERT_FRAME(skb);
return;
}
break;
/* Divert all TCP frames ? */
case IPPROTO_TCP:
if (divert->protos & DIVERT_PROTO_TCP) {
ETH_DIVERT_FRAME(skb);
return;
}
/* Check for possible (maliciously) malformed IP
* frame (thanx Dave)
*/
tcph = (struct tcphdr *)
(((unsigned char *)iph) + (iph->ihl<<2));
if (((unsigned char *)(tcph+1)) >= skb_data_end) {
printk(KERN_INFO "divert: malformed TCP packet !\n");
return;
}
/* Divert some tcp dst/src ports only ?*/
for (i = 0; i < MAX_DIVERT_PORTS; i++) {
dst = divert->tcp_dst[i];
src = divert->tcp_src[i];
if ((dst && dst == tcph->dest) ||
(src && src == tcph->source)) {
ETH_DIVERT_FRAME(skb);
return;
}
}
break;
/* Divert all UDP frames ? */
case IPPROTO_UDP:
if (divert->protos & DIVERT_PROTO_UDP) {
ETH_DIVERT_FRAME(skb);
return;
}
/* Check for possible (maliciously) malformed IP
* packet (thanks Dave)
*/
udph = (struct udphdr *)
(((unsigned char *)iph) + (iph->ihl<<2));
if (((unsigned char *)(udph+1)) >= skb_data_end) {
printk(KERN_INFO
"divert: malformed UDP packet !\n");
return;
}
/* Divert some udp dst/src ports only ? */
for (i = 0; i < MAX_DIVERT_PORTS; i++) {
dst = divert->udp_dst[i];
src = divert->udp_src[i];
if ((dst && dst == udph->dest) ||
(src && src == udph->source)) {
ETH_DIVERT_FRAME(skb);
return;
}
}
break;
}
}

View File

@@ -0,0 +1,819 @@
/*
* net/core/ethtool.c - Ethtool ioctl handler
* Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx>
*
* This file is where we call all the ethtool_ops commands to get
* the information ethtool needs. We fall back to calling do_ioctl()
* for drivers which haven't been converted to ethtool_ops yet.
*
* It's GPL, stupid.
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/ethtool.h>
#include <linux/netdevice.h>
#include <asm/uaccess.h>
/*
* Some useful ethtool_ops methods that're device independent.
* If we find that all drivers want to do the same thing here,
* we can turn these into dev_() function calls.
*/
u32 ethtool_op_get_link(struct net_device *dev)
{
return netif_carrier_ok(dev) ? 1 : 0;
}
u32 ethtool_op_get_tx_csum(struct net_device *dev)
{
return (dev->features & NETIF_F_IP_CSUM) != 0;
}
int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
{
if (data)
dev->features |= NETIF_F_IP_CSUM;
else
dev->features &= ~NETIF_F_IP_CSUM;
return 0;
}
u32 ethtool_op_get_sg(struct net_device *dev)
{
return (dev->features & NETIF_F_SG) != 0;
}
int ethtool_op_set_sg(struct net_device *dev, u32 data)
{
if (data)
dev->features |= NETIF_F_SG;
else
dev->features &= ~NETIF_F_SG;
return 0;
}
u32 ethtool_op_get_tso(struct net_device *dev)
{
return (dev->features & NETIF_F_TSO) != 0;
}
int ethtool_op_set_tso(struct net_device *dev, u32 data)
{
if (data)
dev->features |= NETIF_F_TSO;
else
dev->features &= ~NETIF_F_TSO;
return 0;
}
/* Handlers for each ethtool command */
static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
{
struct ethtool_cmd cmd = { ETHTOOL_GSET };
int err;
if (!dev->ethtool_ops->get_settings)
return -EOPNOTSUPP;
err = dev->ethtool_ops->get_settings(dev, &cmd);
if (err < 0)
return err;
if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
return -EFAULT;
return 0;
}
static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
{
struct ethtool_cmd cmd;
if (!dev->ethtool_ops->set_settings)
return -EOPNOTSUPP;
if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
return -EFAULT;
return dev->ethtool_ops->set_settings(dev, &cmd);
}
static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
{
struct ethtool_drvinfo info;
struct ethtool_ops *ops = dev->ethtool_ops;
if (!ops->get_drvinfo)
return -EOPNOTSUPP;
memset(&info, 0, sizeof(info));
info.cmd = ETHTOOL_GDRVINFO;
ops->get_drvinfo(dev, &info);
if (ops->self_test_count)
info.testinfo_len = ops->self_test_count(dev);
if (ops->get_stats_count)
info.n_stats = ops->get_stats_count(dev);
if (ops->get_regs_len)
info.regdump_len = ops->get_regs_len(dev);
if (ops->get_eeprom_len)
info.eedump_len = ops->get_eeprom_len(dev);
if (copy_to_user(useraddr, &info, sizeof(info)))
return -EFAULT;
return 0;
}
static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
{
struct ethtool_regs regs;
struct ethtool_ops *ops = dev->ethtool_ops;
void *regbuf;
int reglen, ret;
if (!ops->get_regs || !ops->get_regs_len)
return -EOPNOTSUPP;
if (copy_from_user(&regs, useraddr, sizeof(regs)))
return -EFAULT;
reglen = ops->get_regs_len(dev);
if (regs.len > reglen)
regs.len = reglen;
regbuf = kmalloc(reglen, GFP_USER);
if (!regbuf)
return -ENOMEM;
ops->get_regs(dev, &regs, regbuf);
ret = -EFAULT;
if (copy_to_user(useraddr, &regs, sizeof(regs)))
goto out;
useraddr += offsetof(struct ethtool_regs, data);
if (copy_to_user(useraddr, regbuf, regs.len))
goto out;
ret = 0;
out:
kfree(regbuf);
return ret;
}
static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
{
struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
if (!dev->ethtool_ops->get_wol)
return -EOPNOTSUPP;
dev->ethtool_ops->get_wol(dev, &wol);
if (copy_to_user(useraddr, &wol, sizeof(wol)))
return -EFAULT;
return 0;
}
static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
{
struct ethtool_wolinfo wol;
if (!dev->ethtool_ops->set_wol)
return -EOPNOTSUPP;
if (copy_from_user(&wol, useraddr, sizeof(wol)))
return -EFAULT;
return dev->ethtool_ops->set_wol(dev, &wol);
}
static int ethtool_get_msglevel(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GMSGLVL };
if (!dev->ethtool_ops->get_msglevel)
return -EOPNOTSUPP;
edata.data = dev->ethtool_ops->get_msglevel(dev);
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
static int ethtool_set_msglevel(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
if (!dev->ethtool_ops->set_msglevel)
return -EOPNOTSUPP;
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
dev->ethtool_ops->set_msglevel(dev, edata.data);
return 0;
}
static int ethtool_nway_reset(struct net_device *dev)
{
if (!dev->ethtool_ops->nway_reset)
return -EOPNOTSUPP;
return dev->ethtool_ops->nway_reset(dev);
}
static int ethtool_get_link(struct net_device *dev, void __user *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GLINK };
if (!dev->ethtool_ops->get_link)
return -EOPNOTSUPP;
edata.data = dev->ethtool_ops->get_link(dev);
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
{
struct ethtool_eeprom eeprom;
struct ethtool_ops *ops = dev->ethtool_ops;
u8 *data;
int ret;
if (!ops->get_eeprom || !ops->get_eeprom_len)
return -EOPNOTSUPP;
if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
return -EFAULT;
/* Check for wrap and zero */
if (eeprom.offset + eeprom.len <= eeprom.offset)
return -EINVAL;
/* Check for exceeding total eeprom len */
if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
return -EINVAL;
data = kmalloc(eeprom.len, GFP_USER);
if (!data)
return -ENOMEM;
ret = -EFAULT;
if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
goto out;
ret = ops->get_eeprom(dev, &eeprom, data);
if (ret)
goto out;
ret = -EFAULT;
if (copy_to_user(useraddr, &eeprom, sizeof(eeprom)))
goto out;
if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
goto out;
ret = 0;
out:
kfree(data);
return ret;
}
static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
{
struct ethtool_eeprom eeprom;
struct ethtool_ops *ops = dev->ethtool_ops;
u8 *data;
int ret;
if (!ops->set_eeprom || !ops->get_eeprom_len)
return -EOPNOTSUPP;
if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
return -EFAULT;
/* Check for wrap and zero */
if (eeprom.offset + eeprom.len <= eeprom.offset)
return -EINVAL;
/* Check for exceeding total eeprom len */
if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
return -EINVAL;
data = kmalloc(eeprom.len, GFP_USER);
if (!data)
return -ENOMEM;
ret = -EFAULT;
if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
goto out;
ret = ops->set_eeprom(dev, &eeprom, data);
if (ret)
goto out;
if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
ret = -EFAULT;
out:
kfree(data);
return ret;
}
static int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
{
struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE };
if (!dev->ethtool_ops->get_coalesce)
return -EOPNOTSUPP;
dev->ethtool_ops->get_coalesce(dev, &coalesce);
if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
return -EFAULT;
return 0;
}
static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
{
struct ethtool_coalesce coalesce;
if (!dev->ethtool_ops->get_coalesce)
return -EOPNOTSUPP;
if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
return -EFAULT;
return dev->ethtool_ops->set_coalesce(dev, &coalesce);
}
static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr)
{
struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM };
if (!dev->ethtool_ops->get_ringparam)
return -EOPNOTSUPP;
dev->ethtool_ops->get_ringparam(dev, &ringparam);
if (copy_to_user(useraddr, &ringparam, sizeof(ringparam)))
return -EFAULT;
return 0;
}
static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr)
{
struct ethtool_ringparam ringparam;
if (!dev->ethtool_ops->set_ringparam)
return -EOPNOTSUPP;
if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
return -EFAULT;
return dev->ethtool_ops->set_ringparam(dev, &ringparam);
}
static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr)
{
struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
if (!dev->ethtool_ops->get_pauseparam)
return -EOPNOTSUPP;
dev->ethtool_ops->get_pauseparam(dev, &pauseparam);
if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam)))
return -EFAULT;
return 0;
}
static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
{
struct ethtool_pauseparam pauseparam;
if (!dev->ethtool_ops->get_pauseparam)
return -EOPNOTSUPP;
if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam)))
return -EFAULT;
return dev->ethtool_ops->set_pauseparam(dev, &pauseparam);
}
static int ethtool_get_rx_csum(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GRXCSUM };
if (!dev->ethtool_ops->get_rx_csum)
return -EOPNOTSUPP;
edata.data = dev->ethtool_ops->get_rx_csum(dev);
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
if (!dev->ethtool_ops->set_rx_csum)
return -EOPNOTSUPP;
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
dev->ethtool_ops->set_rx_csum(dev, edata.data);
return 0;
}
static int ethtool_get_tx_csum(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GTXCSUM };
if (!dev->ethtool_ops->get_tx_csum)
return -EOPNOTSUPP;
edata.data = dev->ethtool_ops->get_tx_csum(dev);
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
static int __ethtool_set_sg(struct net_device *dev, u32 data)
{
int err;
if (!data && dev->ethtool_ops->set_tso) {
err = dev->ethtool_ops->set_tso(dev, 0);
if (err)
return err;
}
return dev->ethtool_ops->set_sg(dev, data);
}
static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
int err;
if (!dev->ethtool_ops->set_tx_csum)
return -EOPNOTSUPP;
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
if (!edata.data && dev->ethtool_ops->set_sg) {
err = __ethtool_set_sg(dev, 0);
if (err)
return err;
}
return dev->ethtool_ops->set_tx_csum(dev, edata.data);
}
static int ethtool_get_sg(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GSG };
if (!dev->ethtool_ops->get_sg)
return -EOPNOTSUPP;
edata.data = dev->ethtool_ops->get_sg(dev);
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
if (!dev->ethtool_ops->set_sg)
return -EOPNOTSUPP;
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
if (edata.data &&
!(dev->features & (NETIF_F_IP_CSUM |
NETIF_F_NO_CSUM |
NETIF_F_HW_CSUM)))
return -EINVAL;
return __ethtool_set_sg(dev, edata.data);
}
static int ethtool_get_tso(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GTSO };
if (!dev->ethtool_ops->get_tso)
return -EOPNOTSUPP;
edata.data = dev->ethtool_ops->get_tso(dev);
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
static int ethtool_set_tso(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
if (!dev->ethtool_ops->set_tso)
return -EOPNOTSUPP;
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
if (edata.data && !(dev->features & NETIF_F_SG))
return -EINVAL;
return dev->ethtool_ops->set_tso(dev, edata.data);
}
static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
{
struct ethtool_test test;
struct ethtool_ops *ops = dev->ethtool_ops;
u64 *data;
int ret;
if (!ops->self_test || !ops->self_test_count)
return -EOPNOTSUPP;
if (copy_from_user(&test, useraddr, sizeof(test)))
return -EFAULT;
test.len = ops->self_test_count(dev);
data = kmalloc(test.len * sizeof(u64), GFP_USER);
if (!data)
return -ENOMEM;
ops->self_test(dev, &test, data);
ret = -EFAULT;
if (copy_to_user(useraddr, &test, sizeof(test)))
goto out;
useraddr += sizeof(test);
if (copy_to_user(useraddr, data, test.len * sizeof(u64)))
goto out;
ret = 0;
out:
kfree(data);
return ret;
}
static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
{
struct ethtool_gstrings gstrings;
struct ethtool_ops *ops = dev->ethtool_ops;
u8 *data;
int ret;
if (!ops->get_strings)
return -EOPNOTSUPP;
if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
return -EFAULT;
switch (gstrings.string_set) {
case ETH_SS_TEST:
if (!ops->self_test_count)
return -EOPNOTSUPP;
gstrings.len = ops->self_test_count(dev);
break;
case ETH_SS_STATS:
if (!ops->get_stats_count)
return -EOPNOTSUPP;
gstrings.len = ops->get_stats_count(dev);
break;
default:
return -EINVAL;
}
data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
if (!data)
return -ENOMEM;
ops->get_strings(dev, gstrings.string_set, data);
ret = -EFAULT;
if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
goto out;
useraddr += sizeof(gstrings);
if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
goto out;
ret = 0;
out:
kfree(data);
return ret;
}
static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
{
struct ethtool_value id;
if (!dev->ethtool_ops->phys_id)
return -EOPNOTSUPP;
if (copy_from_user(&id, useraddr, sizeof(id)))
return -EFAULT;
return dev->ethtool_ops->phys_id(dev, id.data);
}
static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
{
struct ethtool_stats stats;
struct ethtool_ops *ops = dev->ethtool_ops;
u64 *data;
int ret;
if (!ops->get_ethtool_stats || !ops->get_stats_count)
return -EOPNOTSUPP;
if (copy_from_user(&stats, useraddr, sizeof(stats)))
return -EFAULT;
stats.n_stats = ops->get_stats_count(dev);
data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER);
if (!data)
return -ENOMEM;
ops->get_ethtool_stats(dev, &stats, data);
ret = -EFAULT;
if (copy_to_user(useraddr, &stats, sizeof(stats)))
goto out;
useraddr += sizeof(stats);
if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
goto out;
ret = 0;
out:
kfree(data);
return ret;
}
/* The main entry point in this file. Called from net/core/dev.c */
int dev_ethtool(struct ifreq *ifr)
{
struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
void __user *useraddr = ifr->ifr_data;
u32 ethcmd;
int rc;
/*
* XXX: This can be pushed down into the ethtool_* handlers that
* need it. Keep existing behaviour for the moment.
*/
if (!capable(CAP_NET_ADMIN))
return -EPERM;
if (!dev || !netif_device_present(dev))
return -ENODEV;
if (!dev->ethtool_ops)
goto ioctl;
if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
return -EFAULT;
if(dev->ethtool_ops->begin)
if ((rc = dev->ethtool_ops->begin(dev)) < 0)
return rc;
switch (ethcmd) {
case ETHTOOL_GSET:
rc = ethtool_get_settings(dev, useraddr);
break;
case ETHTOOL_SSET:
rc = ethtool_set_settings(dev, useraddr);
break;
case ETHTOOL_GDRVINFO:
rc = ethtool_get_drvinfo(dev, useraddr);
break;
case ETHTOOL_GREGS:
rc = ethtool_get_regs(dev, useraddr);
break;
case ETHTOOL_GWOL:
rc = ethtool_get_wol(dev, useraddr);
break;
case ETHTOOL_SWOL:
rc = ethtool_set_wol(dev, useraddr);
break;
case ETHTOOL_GMSGLVL:
rc = ethtool_get_msglevel(dev, useraddr);
break;
case ETHTOOL_SMSGLVL:
rc = ethtool_set_msglevel(dev, useraddr);
break;
case ETHTOOL_NWAY_RST:
rc = ethtool_nway_reset(dev);
break;
case ETHTOOL_GLINK:
rc = ethtool_get_link(dev, useraddr);
break;
case ETHTOOL_GEEPROM:
rc = ethtool_get_eeprom(dev, useraddr);
break;
case ETHTOOL_SEEPROM:
rc = ethtool_set_eeprom(dev, useraddr);
break;
case ETHTOOL_GCOALESCE:
rc = ethtool_get_coalesce(dev, useraddr);
break;
case ETHTOOL_SCOALESCE:
rc = ethtool_set_coalesce(dev, useraddr);
break;
case ETHTOOL_GRINGPARAM:
rc = ethtool_get_ringparam(dev, useraddr);
break;
case ETHTOOL_SRINGPARAM:
rc = ethtool_set_ringparam(dev, useraddr);
break;
case ETHTOOL_GPAUSEPARAM:
rc = ethtool_get_pauseparam(dev, useraddr);
break;
case ETHTOOL_SPAUSEPARAM:
rc = ethtool_set_pauseparam(dev, useraddr);
break;
case ETHTOOL_GRXCSUM:
rc = ethtool_get_rx_csum(dev, useraddr);
break;
case ETHTOOL_SRXCSUM:
rc = ethtool_set_rx_csum(dev, useraddr);
break;
case ETHTOOL_GTXCSUM:
rc = ethtool_get_tx_csum(dev, useraddr);
break;
case ETHTOOL_STXCSUM:
rc = ethtool_set_tx_csum(dev, useraddr);
break;
case ETHTOOL_GSG:
rc = ethtool_get_sg(dev, useraddr);
break;
case ETHTOOL_SSG:
rc = ethtool_set_sg(dev, useraddr);
break;
case ETHTOOL_GTSO:
rc = ethtool_get_tso(dev, useraddr);
break;
case ETHTOOL_STSO:
rc = ethtool_set_tso(dev, useraddr);
break;
case ETHTOOL_TEST:
rc = ethtool_self_test(dev, useraddr);
break;
case ETHTOOL_GSTRINGS:
rc = ethtool_get_strings(dev, useraddr);
break;
case ETHTOOL_PHYS_ID:
rc = ethtool_phys_id(dev, useraddr);
break;
case ETHTOOL_GSTATS:
rc = ethtool_get_stats(dev, useraddr);
break;
default:
rc = -EOPNOTSUPP;
}
if(dev->ethtool_ops->complete)
dev->ethtool_ops->complete(dev);
return rc;
ioctl:
if (dev->do_ioctl)
return dev->do_ioctl(dev, ifr, SIOCETHTOOL);
return -EOPNOTSUPP;
}
EXPORT_SYMBOL(dev_ethtool);
EXPORT_SYMBOL(ethtool_op_get_link);
EXPORT_SYMBOL(ethtool_op_get_sg);
EXPORT_SYMBOL(ethtool_op_get_tso);
EXPORT_SYMBOL(ethtool_op_get_tx_csum);
EXPORT_SYMBOL(ethtool_op_set_sg);
EXPORT_SYMBOL(ethtool_op_set_tso);
EXPORT_SYMBOL(ethtool_op_set_tx_csum);

View File

@@ -0,0 +1,432 @@
/*
* Linux Socket Filter - Kernel level socket filtering
*
* Author:
* Jay Schulist <jschlst@samba.org>
*
* Based on the design of:
* - The Berkeley Packet Filter
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Andi Kleen - Fix a few bad bugs and races.
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <linux/filter.h>
/* No hurry in this branch */
static u8 *load_pointer(struct sk_buff *skb, int k)
{
u8 *ptr = NULL;
if (k >= SKF_NET_OFF)
ptr = skb->nh.raw + k - SKF_NET_OFF;
else if (k >= SKF_LL_OFF)
ptr = skb->mac.raw + k - SKF_LL_OFF;
if (ptr >= skb->head && ptr < skb->tail)
return ptr;
return NULL;
}
/**
* sk_run_filter - run a filter on a socket
* @skb: buffer to run the filter on
* @filter: filter to apply
* @flen: length of filter
*
* Decode and apply filter instructions to the skb->data.
* Return length to keep, 0 for none. skb is the data we are
* filtering, filter is the array of filter instructions, and
* len is the number of filter blocks in the array.
*/
int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
{
unsigned char *data = skb->data;
/* len is UNSIGNED. Byte wide insns relies only on implicit
type casts to prevent reading arbitrary memory locations.
*/
unsigned int len = skb->len-skb->data_len;
struct sock_filter *fentry; /* We walk down these */
u32 A = 0; /* Accumulator */
u32 X = 0; /* Index Register */
u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
int k;
int pc;
/*
* Process array of filter instructions.
*/
for (pc = 0; pc < flen; pc++) {
fentry = &filter[pc];
switch (fentry->code) {
case BPF_ALU|BPF_ADD|BPF_X:
A += X;
continue;
case BPF_ALU|BPF_ADD|BPF_K:
A += fentry->k;
continue;
case BPF_ALU|BPF_SUB|BPF_X:
A -= X;
continue;
case BPF_ALU|BPF_SUB|BPF_K:
A -= fentry->k;
continue;
case BPF_ALU|BPF_MUL|BPF_X:
A *= X;
continue;
case BPF_ALU|BPF_MUL|BPF_K:
A *= fentry->k;
continue;
case BPF_ALU|BPF_DIV|BPF_X:
if (X == 0)
return 0;
A /= X;
continue;
case BPF_ALU|BPF_DIV|BPF_K:
if (fentry->k == 0)
return 0;
A /= fentry->k;
continue;
case BPF_ALU|BPF_AND|BPF_X:
A &= X;
continue;
case BPF_ALU|BPF_AND|BPF_K:
A &= fentry->k;
continue;
case BPF_ALU|BPF_OR|BPF_X:
A |= X;
continue;
case BPF_ALU|BPF_OR|BPF_K:
A |= fentry->k;
continue;
case BPF_ALU|BPF_LSH|BPF_X:
A <<= X;
continue;
case BPF_ALU|BPF_LSH|BPF_K:
A <<= fentry->k;
continue;
case BPF_ALU|BPF_RSH|BPF_X:
A >>= X;
continue;
case BPF_ALU|BPF_RSH|BPF_K:
A >>= fentry->k;
continue;
case BPF_ALU|BPF_NEG:
A = -A;
continue;
case BPF_JMP|BPF_JA:
pc += fentry->k;
continue;
case BPF_JMP|BPF_JGT|BPF_K:
pc += (A > fentry->k) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JGE|BPF_K:
pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JEQ|BPF_K:
pc += (A == fentry->k) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JSET|BPF_K:
pc += (A & fentry->k) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JGT|BPF_X:
pc += (A > X) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JGE|BPF_X:
pc += (A >= X) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JEQ|BPF_X:
pc += (A == X) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JSET|BPF_X:
pc += (A & X) ? fentry->jt : fentry->jf;
continue;
case BPF_LD|BPF_W|BPF_ABS:
k = fentry->k;
load_w:
if (k >= 0 && (unsigned int)(k+sizeof(u32)) <= len) {
A = ntohl(*(u32*)&data[k]);
continue;
}
if (k < 0) {
u8 *ptr;
if (k >= SKF_AD_OFF)
break;
ptr = load_pointer(skb, k);
if (ptr) {
A = ntohl(*(u32*)ptr);
continue;
}
} else {
u32 _tmp, *p;
p = skb_header_pointer(skb, k, 4, &_tmp);
if (p != NULL) {
A = ntohl(*p);
continue;
}
}
return 0;
case BPF_LD|BPF_H|BPF_ABS:
k = fentry->k;
load_h:
if (k >= 0 && (unsigned int)(k + sizeof(u16)) <= len) {
A = ntohs(*(u16*)&data[k]);
continue;
}
if (k < 0) {
u8 *ptr;
if (k >= SKF_AD_OFF)
break;
ptr = load_pointer(skb, k);
if (ptr) {
A = ntohs(*(u16*)ptr);
continue;
}
} else {
u16 _tmp, *p;
p = skb_header_pointer(skb, k, 2, &_tmp);
if (p != NULL) {
A = ntohs(*p);
continue;
}
}
return 0;
case BPF_LD|BPF_B|BPF_ABS:
k = fentry->k;
load_b:
if (k >= 0 && (unsigned int)k < len) {
A = data[k];
continue;
}
if (k < 0) {
u8 *ptr;
if (k >= SKF_AD_OFF)
break;
ptr = load_pointer(skb, k);
if (ptr) {
A = *ptr;
continue;
}
} else {
u8 _tmp, *p;
p = skb_header_pointer(skb, k, 1, &_tmp);
if (p != NULL) {
A = *p;
continue;
}
}
return 0;
case BPF_LD|BPF_W|BPF_LEN:
A = len;
continue;
case BPF_LDX|BPF_W|BPF_LEN:
X = len;
continue;
case BPF_LD|BPF_W|BPF_IND:
k = X + fentry->k;
goto load_w;
case BPF_LD|BPF_H|BPF_IND:
k = X + fentry->k;
goto load_h;
case BPF_LD|BPF_B|BPF_IND:
k = X + fentry->k;
goto load_b;
case BPF_LDX|BPF_B|BPF_MSH:
if (fentry->k >= len)
return 0;
X = (data[fentry->k] & 0xf) << 2;
continue;
case BPF_LD|BPF_IMM:
A = fentry->k;
continue;
case BPF_LDX|BPF_IMM:
X = fentry->k;
continue;
case BPF_LD|BPF_MEM:
A = mem[fentry->k];
continue;
case BPF_LDX|BPF_MEM:
X = mem[fentry->k];
continue;
case BPF_MISC|BPF_TAX:
X = A;
continue;
case BPF_MISC|BPF_TXA:
A = X;
continue;
case BPF_RET|BPF_K:
return ((unsigned int)fentry->k);
case BPF_RET|BPF_A:
return ((unsigned int)A);
case BPF_ST:
mem[fentry->k] = A;
continue;
case BPF_STX:
mem[fentry->k] = X;
continue;
default:
/* Invalid instruction counts as RET */
return 0;
}
/*
* Handle ancillary data, which are impossible
* (or very difficult) to get parsing packet contents.
*/
switch (k-SKF_AD_OFF) {
case SKF_AD_PROTOCOL:
A = htons(skb->protocol);
continue;
case SKF_AD_PKTTYPE:
A = skb->pkt_type;
continue;
case SKF_AD_IFINDEX:
A = skb->dev->ifindex;
continue;
default:
return 0;
}
}
return 0;
}
/**
* sk_chk_filter - verify socket filter code
* @filter: filter to verify
* @flen: length of filter
*
* Check the user's filter code. If we let some ugly
* filter code slip through kaboom! The filter must contain
* no references or jumps that are out of range, no illegal instructions
* and no backward jumps. It must end with a RET instruction
*
* Returns 0 if the rule set is legal or a negative errno code if not.
*/
int sk_chk_filter(struct sock_filter *filter, int flen)
{
struct sock_filter *ftest;
int pc;
if (((unsigned int)flen >= (~0U / sizeof(struct sock_filter))) || flen == 0)
return -EINVAL;
/* check the filter code now */
for (pc = 0; pc < flen; pc++) {
/* all jumps are forward as they are not signed */
ftest = &filter[pc];
if (BPF_CLASS(ftest->code) == BPF_JMP) {
/* but they mustn't jump off the end */
if (BPF_OP(ftest->code) == BPF_JA) {
/*
* Note, the large ftest->k might cause loops.
* Compare this with conditional jumps below,
* where offsets are limited. --ANK (981016)
*/
if (ftest->k >= (unsigned)(flen-pc-1))
return -EINVAL;
} else {
/* for conditionals both must be safe */
if (pc + ftest->jt +1 >= flen ||
pc + ftest->jf +1 >= flen)
return -EINVAL;
}
}
/* check that memory operations use valid addresses. */
if (ftest->k >= BPF_MEMWORDS) {
/* but it might not be a memory operation... */
switch (ftest->code) {
case BPF_ST:
case BPF_STX:
case BPF_LD|BPF_MEM:
case BPF_LDX|BPF_MEM:
return -EINVAL;
}
}
}
/*
* The program must end with a return. We don't care where they
* jumped within the script (its always forwards) but in the end
* they _will_ hit this.
*/
return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
}
/**
* sk_attach_filter - attach a socket filter
* @fprog: the filter program
* @sk: the socket to use
*
* Attach the user's filter code. We first run some sanity checks on
* it to make sure it does not explode on us later. If an error
* occurs or there is insufficient memory for the filter a negative
* errno code is returned. On success the return is zero.
*/
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct sk_filter *fp;
unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
int err;
/* Make sure new filter is there and in the right amounts. */
if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS)
return -EINVAL;
fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
if (!fp)
return -ENOMEM;
if (copy_from_user(fp->insns, fprog->filter, fsize)) {
sock_kfree_s(sk, fp, fsize+sizeof(*fp));
return -EFAULT;
}
atomic_set(&fp->refcnt, 1);
fp->len = fprog->len;
err = sk_chk_filter(fp->insns, fp->len);
if (!err) {
struct sk_filter *old_fp;
spin_lock_bh(&sk->sk_lock.slock);
old_fp = sk->sk_filter;
sk->sk_filter = fp;
spin_unlock_bh(&sk->sk_lock.slock);
fp = old_fp;
}
if (fp)
sk_filter_release(sk, fp);
return err;
}
EXPORT_SYMBOL(sk_chk_filter);
EXPORT_SYMBOL(sk_run_filter);

View File

@@ -0,0 +1,371 @@
/* flow.c: Generic flow cache.
*
* Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
* Copyright (C) 2003 David S. Miller (davem@redhat.com)
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/random.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/completion.h>
#include <linux/percpu.h>
#include <linux/bitops.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <net/flow.h>
#include <asm/atomic.h>
#include <asm/semaphore.h>
struct flow_cache_entry {
struct flow_cache_entry *next;
u16 family;
u8 dir;
struct flowi key;
u32 genid;
void *object;
atomic_t *object_ref;
};
atomic_t flow_cache_genid = ATOMIC_INIT(0);
static u32 flow_hash_shift;
#define flow_hash_size (1 << flow_hash_shift)
static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
#define flow_table(cpu) (per_cpu(flow_tables, cpu))
static kmem_cache_t *flow_cachep;
static int flow_lwm, flow_hwm;
struct flow_percpu_info {
int hash_rnd_recalc;
u32 hash_rnd;
int count;
} ____cacheline_aligned;
static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
#define flow_hash_rnd_recalc(cpu) \
(per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
#define flow_hash_rnd(cpu) \
(per_cpu(flow_hash_info, cpu).hash_rnd)
#define flow_count(cpu) \
(per_cpu(flow_hash_info, cpu).count)
static struct timer_list flow_hash_rnd_timer;
#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
struct flow_flush_info {
atomic_t cpuleft;
struct completion completion;
};
static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
static void flow_cache_new_hashrnd(unsigned long arg)
{
int i;
for_each_cpu(i)
flow_hash_rnd_recalc(i) = 1;
flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
add_timer(&flow_hash_rnd_timer);
}
static void __flow_cache_shrink(int cpu, int shrink_to)
{
struct flow_cache_entry *fle, **flp;
int i;
for (i = 0; i < flow_hash_size; i++) {
int k = 0;
flp = &flow_table(cpu)[i];
while ((fle = *flp) != NULL && k < shrink_to) {
k++;
flp = &fle->next;
}
while ((fle = *flp) != NULL) {
*flp = fle->next;
if (fle->object)
atomic_dec(fle->object_ref);
kmem_cache_free(flow_cachep, fle);
flow_count(cpu)--;
}
}
}
static void flow_cache_shrink(int cpu)
{
int shrink_to = flow_lwm / flow_hash_size;
__flow_cache_shrink(cpu, shrink_to);
}
static void flow_new_hash_rnd(int cpu)
{
get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
flow_hash_rnd_recalc(cpu) = 0;
__flow_cache_shrink(cpu, 0);
}
static u32 flow_hash_code(struct flowi *key, int cpu)
{
u32 *k = (u32 *) key;
return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
(flow_hash_size - 1));
}
#if (BITS_PER_LONG == 64)
typedef u64 flow_compare_t;
#else
typedef u32 flow_compare_t;
#endif
extern void flowi_is_missized(void);
/* I hear what you're saying, use memcmp. But memcmp cannot make
* important assumptions that we can here, such as alignment and
* constant size.
*/
static int flow_key_compare(struct flowi *key1, struct flowi *key2)
{
flow_compare_t *k1, *k1_lim, *k2;
const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
if (sizeof(struct flowi) % sizeof(flow_compare_t))
flowi_is_missized();
k1 = (flow_compare_t *) key1;
k1_lim = k1 + n_elem;
k2 = (flow_compare_t *) key2;
do {
if (*k1++ != *k2++)
return 1;
} while (k1 < k1_lim);
return 0;
}
void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
flow_resolve_t resolver)
{
struct flow_cache_entry *fle, **head;
unsigned int hash;
int cpu;
local_bh_disable();
cpu = smp_processor_id();
fle = NULL;
/* Packet really early in init? Making flow_cache_init a
* pre-smp initcall would solve this. --RR */
if (!flow_table(cpu))
goto nocache;
if (flow_hash_rnd_recalc(cpu))
flow_new_hash_rnd(cpu);
hash = flow_hash_code(key, cpu);
head = &flow_table(cpu)[hash];
for (fle = *head; fle; fle = fle->next) {
if (fle->family == family &&
fle->dir == dir &&
flow_key_compare(key, &fle->key) == 0) {
if (fle->genid == atomic_read(&flow_cache_genid)) {
void *ret = fle->object;
if (ret)
atomic_inc(fle->object_ref);
local_bh_enable();
return ret;
}
break;
}
}
if (!fle) {
if (flow_count(cpu) > flow_hwm)
flow_cache_shrink(cpu);
fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC);
if (fle) {
fle->next = *head;
*head = fle;
fle->family = family;
fle->dir = dir;
memcpy(&fle->key, key, sizeof(*key));
fle->object = NULL;
flow_count(cpu)++;
}
}
nocache:
{
void *obj;
atomic_t *obj_ref;
resolver(key, family, dir, &obj, &obj_ref);
if (fle) {
fle->genid = atomic_read(&flow_cache_genid);
if (fle->object)
atomic_dec(fle->object_ref);
fle->object = obj;
fle->object_ref = obj_ref;
if (obj)
atomic_inc(fle->object_ref);
}
local_bh_enable();
return obj;
}
}
static void flow_cache_flush_tasklet(unsigned long data)
{
struct flow_flush_info *info = (void *)data;
int i;
int cpu;
cpu = smp_processor_id();
for (i = 0; i < flow_hash_size; i++) {
struct flow_cache_entry *fle;
fle = flow_table(cpu)[i];
for (; fle; fle = fle->next) {
unsigned genid = atomic_read(&flow_cache_genid);
if (!fle->object || fle->genid == genid)
continue;
fle->object = NULL;
atomic_dec(fle->object_ref);
}
}
if (atomic_dec_and_test(&info->cpuleft))
complete(&info->completion);
}
static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
static void flow_cache_flush_per_cpu(void *data)
{
struct flow_flush_info *info = data;
int cpu;
struct tasklet_struct *tasklet;
cpu = smp_processor_id();
tasklet = flow_flush_tasklet(cpu);
tasklet->data = (unsigned long)info;
tasklet_schedule(tasklet);
}
void flow_cache_flush(void)
{
struct flow_flush_info info;
static DECLARE_MUTEX(flow_flush_sem);
/* Don't want cpus going down or up during this. */
lock_cpu_hotplug();
down(&flow_flush_sem);
atomic_set(&info.cpuleft, num_online_cpus());
init_completion(&info.completion);
local_bh_disable();
smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0);
flow_cache_flush_tasklet((unsigned long)&info);
local_bh_enable();
wait_for_completion(&info.completion);
up(&flow_flush_sem);
unlock_cpu_hotplug();
}
static void __devinit flow_cache_cpu_prepare(int cpu)
{
struct tasklet_struct *tasklet;
unsigned long order;
for (order = 0;
(PAGE_SIZE << order) <
(sizeof(struct flow_cache_entry *)*flow_hash_size);
order++)
/* NOTHING */;
flow_table(cpu) = (struct flow_cache_entry **)
__get_free_pages(GFP_KERNEL, order);
if (!flow_table(cpu))
panic("NET: failed to allocate flow cache order %lu\n", order);
memset(flow_table(cpu), 0, PAGE_SIZE << order);
flow_hash_rnd_recalc(cpu) = 1;
flow_count(cpu) = 0;
tasklet = flow_flush_tasklet(cpu);
tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
}
#ifdef CONFIG_HOTPLUG_CPU
static int flow_cache_cpu(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
if (action == CPU_DEAD)
__flow_cache_shrink((unsigned long)hcpu, 0);
return NOTIFY_OK;
}
#endif /* CONFIG_HOTPLUG_CPU */
static int __init flow_cache_init(void)
{
int i;
flow_cachep = kmem_cache_create("flow_cache",
sizeof(struct flow_cache_entry),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
if (!flow_cachep)
panic("NET: failed to allocate flow cache slab\n");
flow_hash_shift = 10;
flow_lwm = 2 * flow_hash_size;
flow_hwm = 4 * flow_hash_size;
init_timer(&flow_hash_rnd_timer);
flow_hash_rnd_timer.function = flow_cache_new_hashrnd;
flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
add_timer(&flow_hash_rnd_timer);
for_each_cpu(i)
flow_cache_cpu_prepare(i);
hotcpu_notifier(flow_cache_cpu, 0);
return 0;
}
module_init(flow_cache_init);
EXPORT_SYMBOL(flow_cache_genid);
EXPORT_SYMBOL(flow_cache_lookup);

View File

@@ -0,0 +1,250 @@
/*
* net/sched/gen_estimator.c Simple rate estimator.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* Changes:
* Jamal Hadi Salim - moved it to net/core and reshulfed
* names to make it usable in general net subsystem.
*/
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <net/sock.h>
#include <net/gen_stats.h>
/*
This code is NOT intended to be used for statistics collection,
its purpose is to provide a base for statistical multiplexing
for controlled load service.
If you need only statistics, run a user level daemon which
periodically reads byte counters.
Unfortunately, rate estimation is not a very easy task.
F.e. I did not find a simple way to estimate the current peak rate
and even failed to formulate the problem 8)8)
So I preferred not to built an estimator into the scheduler,
but run this task separately.
Ideally, it should be kernel thread(s), but for now it runs
from timers, which puts apparent top bounds on the number of rated
flows, has minimal overhead on small, but is enough
to handle controlled load service, sets of aggregates.
We measure rate over A=(1<<interval) seconds and evaluate EWMA:
avrate = avrate*(1-W) + rate*W
where W is chosen as negative power of 2: W = 2^(-ewma_log)
The resulting time constant is:
T = A/(-ln(1-W))
NOTES.
* The stored value for avbps is scaled by 2^5, so that maximal
rate is ~1Gbit, avpps is scaled by 2^10.
* Minimal interval is HZ/4=250msec (it is the greatest common divisor
for HZ=100 and HZ=1024 8)), maximal interval
is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
are too expensive, longer ones can be implemented
at user level painlessly.
*/
#define EST_MAX_INTERVAL 5
struct gen_estimator
{
struct gen_estimator *next;
struct gnet_stats_basic *bstats;
struct gnet_stats_rate_est *rate_est;
spinlock_t *stats_lock;
unsigned interval;
int ewma_log;
u64 last_bytes;
u32 last_packets;
u32 avpps;
u32 avbps;
};
struct gen_estimator_head
{
struct timer_list timer;
struct gen_estimator *list;
};
static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
/* Estimator array lock */
static rwlock_t est_lock = RW_LOCK_UNLOCKED;
static void est_timer(unsigned long arg)
{
int idx = (int)arg;
struct gen_estimator *e;
read_lock(&est_lock);
for (e = elist[idx].list; e; e = e->next) {
u64 nbytes;
u32 npackets;
u32 rate;
spin_lock(e->stats_lock);
nbytes = e->bstats->bytes;
npackets = e->bstats->packets;
rate = (nbytes - e->last_bytes)<<(7 - idx);
e->last_bytes = nbytes;
e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
e->rate_est->bps = (e->avbps+0xF)>>5;
rate = (npackets - e->last_packets)<<(12 - idx);
e->last_packets = npackets;
e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
e->rate_est->pps = (e->avpps+0x1FF)>>10;
spin_unlock(e->stats_lock);
}
mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
read_unlock(&est_lock);
}
/**
* gen_new_estimator - create a new rate estimator
* @bstats: basic statistics
* @rate_est: rate estimator statistics
* @stats_lock: statistics lock
* @opt: rate estimator configuration TLV
*
* Creates a new rate estimator with &bstats as source and &rate_est
* as destination. A new timer with the interval specified in the
* configuration TLV is created. Upon each interval, the latest statistics
* will be read from &bstats and the estimated rate will be stored in
* &rate_est with the statistics lock grabed during this period.
*
* Returns 0 on success or a negative error code.
*/
int gen_new_estimator(struct gnet_stats_basic *bstats,
struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt)
{
struct gen_estimator *est;
struct gnet_estimator *parm = RTA_DATA(opt);
if (RTA_PAYLOAD(opt) < sizeof(*parm))
return -EINVAL;
if (parm->interval < -2 || parm->interval > 3)
return -EINVAL;
est = kmalloc(sizeof(*est), GFP_KERNEL);
if (est == NULL)
return -ENOBUFS;
memset(est, 0, sizeof(*est));
est->interval = parm->interval + 2;
est->bstats = bstats;
est->rate_est = rate_est;
est->stats_lock = stats_lock;
est->ewma_log = parm->ewma_log;
est->last_bytes = bstats->bytes;
est->avbps = rate_est->bps<<5;
est->last_packets = bstats->packets;
est->avpps = rate_est->pps<<10;
est->next = elist[est->interval].list;
if (est->next == NULL) {
init_timer(&elist[est->interval].timer);
elist[est->interval].timer.data = est->interval;
elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
elist[est->interval].timer.function = est_timer;
add_timer(&elist[est->interval].timer);
}
write_lock_bh(&est_lock);
elist[est->interval].list = est;
write_unlock_bh(&est_lock);
return 0;
}
/**
* gen_kill_estimator - remove a rate estimator
* @bstats: basic statistics
* @rate_est: rate estimator statistics
*
* Removes the rate estimator specified by &bstats and &rate_est
* and deletes the timer.
*/
void gen_kill_estimator(struct gnet_stats_basic *bstats,
struct gnet_stats_rate_est *rate_est)
{
int idx;
struct gen_estimator *est, **pest;
for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
int killed = 0;
pest = &elist[idx].list;
while ((est=*pest) != NULL) {
if (est->rate_est != rate_est || est->bstats != bstats) {
pest = &est->next;
continue;
}
write_lock_bh(&est_lock);
*pest = est->next;
write_unlock_bh(&est_lock);
kfree(est);
killed++;
}
if (killed && elist[idx].list == NULL)
del_timer(&elist[idx].timer);
}
}
/**
* gen_replace_estimator - replace rate estimator configruation
* @bstats: basic statistics
* @rate_est: rate estimator statistics
* @stats_lock: statistics lock
* @opt: rate estimator configuration TLV
*
* Replaces the configuration of a rate estimator by calling
* gen_kill_estimator() and gen_new_estimator().
*
* Returns 0 on success or a negative error code.
*/
int
gen_replace_estimator(struct gnet_stats_basic *bstats,
struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock,
struct rtattr *opt)
{
gen_kill_estimator(bstats, rate_est);
return gen_new_estimator(bstats, rate_est, stats_lock, opt);
}
EXPORT_SYMBOL(gen_kill_estimator);
EXPORT_SYMBOL(gen_new_estimator);
EXPORT_SYMBOL(gen_replace_estimator);

View File

@@ -0,0 +1,221 @@
/*
* net/core/gen_stats.c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Thomas Graf <tgraf@suug.ch>
* Jamal Hadi Salim
* Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* See Documentation/networking/gen_stats.txt
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/socket.h>
#include <linux/rtnetlink.h>
#include <linux/gen_stats.h>
#include <net/gen_stats.h>
static inline int
gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
{
RTA_PUT(d->skb, type, size, buf);
return 0;
rtattr_failure:
spin_unlock_bh(d->lock);
return -1;
}
/**
* gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
* @skb: socket buffer to put statistics TLVs into
* @type: TLV type for top level statistic TLV
* @tc_stats_type: TLV type for backward compatibility struct tc_stats TLV
* @xstats_type: TLV type for backward compatibility xstats TLV
* @lock: statistics lock
* @d: dumping handle
*
* Initializes the dumping handle, grabs the statistic lock and appends
* an empty TLV header to the socket buffer for use a container for all
* other statistic TLVS.
*
* The dumping handle is marked to be in backward compatibility mode telling
* all gnet_stats_copy_XXX() functions to fill a local copy of struct tc_stats.
*
* Returns 0 on success or -1 if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
int xstats_type, spinlock_t *lock, struct gnet_dump *d)
{
spin_lock_bh(lock);
d->lock = lock;
d->tail = (struct rtattr *) skb->tail;
d->skb = skb;
d->compat_tc_stats = tc_stats_type;
d->compat_xstats = xstats_type;
d->xstats = NULL;
if (d->compat_tc_stats)
memset(&d->tc_stats, 0, sizeof(d->tc_stats));
return gnet_stats_copy(d, type, NULL, 0);
}
/**
* gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
* @skb: socket buffer to put statistics TLVs into
* @type: TLV type for top level statistic TLV
* @lock: statistics lock
* @d: dumping handle
*
* Initializes the dumping handle, grabs the statistic lock and appends
* an empty TLV header to the socket buffer for use a container for all
* other statistic TLVS.
*
* Returns 0 on success or -1 if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
struct gnet_dump *d)
{
return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
}
/**
* gnet_stats_copy_basic - copy basic statistics into statistic TLV
* @d: dumping handle
* @b: basic statistics
*
* Appends the basic statistics to the top level TLV created by
* gnet_stats_start_copy().
*
* Returns 0 on success or -1 with the statistic lock released
* if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic *b)
{
if (d->compat_tc_stats) {
d->tc_stats.bytes = b->bytes;
d->tc_stats.packets = b->packets;
}
return gnet_stats_copy(d, TCA_STATS_BASIC, b, sizeof(*b));
}
/**
* gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
* @d: dumping handle
* @r: rate estimator statistics
*
* Appends the rate estimator statistics to the top level TLV created by
* gnet_stats_start_copy().
*
* Returns 0 on success or -1 with the statistic lock released
* if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r)
{
if (d->compat_tc_stats) {
d->tc_stats.bps = r->bps;
d->tc_stats.pps = r->pps;
}
return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r));
}
/**
* gnet_stats_copy_queue - copy queue statistics into statistics TLV
* @d: dumping handle
* @q: queue statistics
*
* Appends the queue statistics to the top level TLV created by
* gnet_stats_start_copy().
*
* Returns 0 on success or -1 with the statistic lock released
* if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q)
{
if (d->compat_tc_stats) {
d->tc_stats.drops = q->drops;
d->tc_stats.qlen = q->qlen;
d->tc_stats.backlog = q->backlog;
d->tc_stats.overlimits = q->overlimits;
}
return gnet_stats_copy(d, TCA_STATS_QUEUE, q, sizeof(*q));
}
/**
* gnet_stats_copy_app - copy application specific statistics into statistics TLV
* @d: dumping handle
* @st: application specific statistics data
* @len: length of data
*
* Appends the application sepecific statistics to the top level TLV created by
* gnet_stats_start_copy() and remembers the data for XSTATS if the dumping
* handle is in backward compatibility mode.
*
* Returns 0 on success or -1 with the statistic lock released
* if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
{
if (d->compat_xstats)
d->xstats = (struct rtattr *) d->skb->tail;
return gnet_stats_copy(d, TCA_STATS_APP, st, len);
}
/**
* gnet_stats_finish_copy - finish dumping procedure
* @d: dumping handle
*
* Corrects the length of the top level TLV to include all TLVs added
* by gnet_stats_copy_XXX() calls. Adds the backward compatibility TLVs
* if gnet_stats_start_copy_compat() was used and releases the statistics
* lock.
*
* Returns 0 on success or -1 with the statistic lock released
* if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_finish_copy(struct gnet_dump *d)
{
d->tail->rta_len = d->skb->tail - (u8 *) d->tail;
if (d->compat_tc_stats)
if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
sizeof(d->tc_stats)) < 0)
return -1;
if (d->compat_xstats && d->xstats) {
if (gnet_stats_copy(d, d->compat_xstats, RTA_DATA(d->xstats),
RTA_PAYLOAD(d->xstats)) < 0)
return -1;
}
spin_unlock_bh(d->lock);
return 0;
}
EXPORT_SYMBOL(gnet_stats_start_copy);
EXPORT_SYMBOL(gnet_stats_start_copy_compat);
EXPORT_SYMBOL(gnet_stats_copy_basic);
EXPORT_SYMBOL(gnet_stats_copy_rate_est);
EXPORT_SYMBOL(gnet_stats_copy_queue);
EXPORT_SYMBOL(gnet_stats_copy_app);
EXPORT_SYMBOL(gnet_stats_finish_copy);

View File

@@ -0,0 +1,262 @@
/*
* iovec manipulation routines.
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Fixes:
* Andrew Lunn : Errors in iovec copying.
* Pedro Roque : Added memcpy_fromiovecend and
* csum_..._fromiovecend.
* Andi Kleen : fixed error handling for 2.1
* Alexey Kuznetsov: 2.1 optimisations
* Andi Kleen : Fix csum*fromiovecend for IPv6.
*/
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <asm/uaccess.h>
#include <asm/byteorder.h>
#include <net/checksum.h>
#include <net/sock.h>
/*
* Verify iovec. The caller must ensure that the iovec is big enough
* to hold the message iovec.
*
* Save time not doing verify_area. copy_*_user will make this work
* in any case.
*/
int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
{
int size, err, ct;
if (m->msg_namelen) {
if (mode == VERIFY_READ) {
err = move_addr_to_kernel(m->msg_name, m->msg_namelen,
address);
if (err < 0)
return err;
}
m->msg_name = address;
} else {
m->msg_name = NULL;
}
size = m->msg_iovlen * sizeof(struct iovec);
if (copy_from_user(iov, m->msg_iov, size))
return -EFAULT;
m->msg_iov = iov;
err = 0;
for (ct = 0; ct < m->msg_iovlen; ct++) {
err += iov[ct].iov_len;
/*
* Goal is not to verify user data, but to prevent returning
* negative value, which is interpreted as errno.
* Overflow is still possible, but it is harmless.
*/
if (err < 0)
return -EMSGSIZE;
}
return err;
}
/*
* Copy kernel to iovec. Returns -EFAULT on error.
*
* Note: this modifies the original iovec.
*/
int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
{
while (len > 0) {
if (iov->iov_len) {
int copy = min_t(unsigned int, iov->iov_len, len);
if (copy_to_user(iov->iov_base, kdata, copy))
return -EFAULT;
kdata += copy;
len -= copy;
iov->iov_len -= copy;
iov->iov_base += copy;
}
iov++;
}
return 0;
}
/*
* In kernel copy to iovec. Returns -EFAULT on error.
*
* Note: this modifies the original iovec.
*/
void memcpy_tokerneliovec(struct iovec *iov, unsigned char *kdata, int len)
{
while (len > 0) {
if (iov->iov_len) {
int copy = min_t(unsigned int, iov->iov_len, len);
memcpy(iov->iov_base, kdata, copy);
kdata += copy;
len -= copy;
iov->iov_len -= copy;
iov->iov_base += copy;
}
iov++;
}
}
/*
* Copy iovec to kernel. Returns -EFAULT on error.
*
* Note: this modifies the original iovec.
*/
int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
{
while (len > 0) {
if (iov->iov_len) {
int copy = min_t(unsigned int, len, iov->iov_len);
if (copy_from_user(kdata, iov->iov_base, copy))
return -EFAULT;
len -= copy;
kdata += copy;
iov->iov_base += copy;
iov->iov_len -= copy;
}
iov++;
}
return 0;
}
/*
* For use with ip_build_xmit
*/
int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
int len)
{
/* Skip over the finished iovecs */
while (offset >= iov->iov_len) {
offset -= iov->iov_len;
iov++;
}
while (len > 0) {
u8 __user *base = iov->iov_base + offset;
int copy = min_t(unsigned int, len, iov->iov_len - offset);
offset = 0;
if (copy_from_user(kdata, base, copy))
return -EFAULT;
len -= copy;
kdata += copy;
iov++;
}
return 0;
}
/*
* And now for the all-in-one: copy and checksum from a user iovec
* directly to a datagram
* Calls to csum_partial but the last must be in 32 bit chunks
*
* ip_build_xmit must ensure that when fragmenting only the last
* call to this function will be unaligned also.
*/
int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
int offset, unsigned int len, int *csump)
{
int csum = *csump;
int partial_cnt = 0, err = 0;
/* Skip over the finished iovecs */
while (offset >= iov->iov_len) {
offset -= iov->iov_len;
iov++;
}
while (len > 0) {
u8 __user *base = iov->iov_base + offset;
int copy = min_t(unsigned int, len, iov->iov_len - offset);
offset = 0;
/* There is a remnant from previous iov. */
if (partial_cnt) {
int par_len = 4 - partial_cnt;
/* iov component is too short ... */
if (par_len > copy) {
if (copy_from_user(kdata, base, copy))
goto out_fault;
kdata += copy;
base += copy;
partial_cnt += copy;
len -= copy;
iov++;
if (len)
continue;
*csump = csum_partial(kdata - partial_cnt,
partial_cnt, csum);
goto out;
}
if (copy_from_user(kdata, base, par_len))
goto out_fault;
csum = csum_partial(kdata - partial_cnt, 4, csum);
kdata += par_len;
base += par_len;
copy -= par_len;
len -= par_len;
partial_cnt = 0;
}
if (len > copy) {
partial_cnt = copy % 4;
if (partial_cnt) {
copy -= partial_cnt;
if (copy_from_user(kdata + copy, base + copy,
partial_cnt))
goto out_fault;
}
}
if (copy) {
csum = csum_and_copy_from_user(base, kdata, copy,
csum, &err);
if (err)
goto out;
}
len -= copy + partial_cnt;
kdata += copy + partial_cnt;
iov++;
}
*csump = csum;
out:
return err;
out_fault:
err = -EFAULT;
goto out;
}
EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
EXPORT_SYMBOL(memcpy_fromiovec);
EXPORT_SYMBOL(memcpy_fromiovecend);
EXPORT_SYMBOL(memcpy_toiovec);
EXPORT_SYMBOL(memcpy_tokerneliovec);

View File

@@ -0,0 +1,137 @@
/*
* Linux network device link state notification
*
* Author:
* Stefan Rompf <sux@loplof.de>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*/
#include <linux/config.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/if.h>
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/jiffies.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/bitops.h>
#include <asm/types.h>
enum lw_bits {
LW_RUNNING = 0,
LW_SE_USED
};
static unsigned long linkwatch_flags;
static unsigned long linkwatch_nextevent;
static void linkwatch_event(void *dummy);
static DECLARE_WORK(linkwatch_work, linkwatch_event, NULL);
static LIST_HEAD(lweventlist);
static spinlock_t lweventlist_lock = SPIN_LOCK_UNLOCKED;
struct lw_event {
struct list_head list;
struct net_device *dev;
};
/* Avoid kmalloc() for most systems */
static struct lw_event singleevent;
/* Must be called with the rtnl semaphore held */
void linkwatch_run_queue(void)
{
LIST_HEAD(head);
struct list_head *n, *next;
spin_lock_irq(&lweventlist_lock);
list_splice_init(&lweventlist, &head);
spin_unlock_irq(&lweventlist_lock);
list_for_each_safe(n, next, &head) {
struct lw_event *event = list_entry(n, struct lw_event, list);
struct net_device *dev = event->dev;
if (event == &singleevent) {
clear_bit(LW_SE_USED, &linkwatch_flags);
} else {
kfree(event);
}
/* We are about to handle this device,
* so new events can be accepted
*/
clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
if (dev->flags & IFF_UP) {
netdev_state_change(dev);
}
dev_put(dev);
}
}
static void linkwatch_event(void *dummy)
{
/* Limit the number of linkwatch events to one
* per second so that a runaway driver does not
* cause a storm of messages on the netlink
* socket
*/
linkwatch_nextevent = jiffies + HZ;
clear_bit(LW_RUNNING, &linkwatch_flags);
rtnl_shlock();
linkwatch_run_queue();
rtnl_shunlock();
}
void linkwatch_fire_event(struct net_device *dev)
{
if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
unsigned long flags;
struct lw_event *event;
if (test_and_set_bit(LW_SE_USED, &linkwatch_flags)) {
event = kmalloc(sizeof(struct lw_event), GFP_ATOMIC);
if (unlikely(event == NULL)) {
clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
return;
}
} else {
event = &singleevent;
}
dev_hold(dev);
event->dev = dev;
spin_lock_irqsave(&lweventlist_lock, flags);
list_add_tail(&event->list, &lweventlist);
spin_unlock_irqrestore(&lweventlist_lock, flags);
if (!test_and_set_bit(LW_RUNNING, &linkwatch_flags)) {
unsigned long thisevent = jiffies;
if (thisevent >= linkwatch_nextevent) {
schedule_work(&linkwatch_work);
} else {
schedule_delayed_work(&linkwatch_work, linkwatch_nextevent - thisevent);
}
}
}
}
EXPORT_SYMBOL(linkwatch_fire_event);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,461 @@
/*
* net-sysfs.c - network device class and attributes
*
* Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/wireless.h>
#define to_class_dev(obj) container_of(obj,struct class_device,kobj)
#define to_net_dev(class) container_of(class, struct net_device, class_dev)
static const char fmt_hex[] = "%#x\n";
static const char fmt_dec[] = "%d\n";
static const char fmt_ulong[] = "%lu\n";
static inline int dev_isalive(const struct net_device *dev)
{
return dev->reg_state == NETREG_REGISTERED;
}
/* use same locking rules as GIF* ioctl's */
static ssize_t netdev_show(const struct class_device *cd, char *buf,
ssize_t (*format)(const struct net_device *, char *))
{
struct net_device *net = to_net_dev(cd);
ssize_t ret = -EINVAL;
read_lock(&dev_base_lock);
if (dev_isalive(net))
ret = (*format)(net, buf);
read_unlock(&dev_base_lock);
return ret;
}
/* generate a show function for simple field */
#define NETDEVICE_SHOW(field, format_string) \
static ssize_t format_##field(const struct net_device *net, char *buf) \
{ \
return sprintf(buf, format_string, net->field); \
} \
static ssize_t show_##field(struct class_device *cd, char *buf) \
{ \
return netdev_show(cd, buf, format_##field); \
}
/* use same locking and permission rules as SIF* ioctl's */
static ssize_t netdev_store(struct class_device *dev,
const char *buf, size_t len,
int (*set)(struct net_device *, unsigned long))
{
struct net_device *net = to_net_dev(dev);
char *endp;
unsigned long new;
int ret = -EINVAL;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
new = simple_strtoul(buf, &endp, 0);
if (endp == buf)
goto err;
rtnl_lock();
if (dev_isalive(net)) {
if ((ret = (*set)(net, new)) == 0)
ret = len;
}
rtnl_unlock();
err:
return ret;
}
/* generate a read-only network device class attribute */
#define NETDEVICE_ATTR(field, format_string) \
NETDEVICE_SHOW(field, format_string) \
static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL) \
NETDEVICE_ATTR(addr_len, fmt_dec);
NETDEVICE_ATTR(iflink, fmt_dec);
NETDEVICE_ATTR(ifindex, fmt_dec);
NETDEVICE_ATTR(features, fmt_hex);
NETDEVICE_ATTR(type, fmt_dec);
/* use same locking rules as GIFHWADDR ioctl's */
static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
{
int i;
char *cp = buf;
for (i = 0; i < len; i++)
cp += sprintf(cp, "%02x%c", addr[i],
i == (len - 1) ? '\n' : ':');
return cp - buf;
}
static ssize_t show_address(struct class_device *dev, char *buf)
{
struct net_device *net = to_net_dev(dev);
ssize_t ret = -EINVAL;
read_lock(&dev_base_lock);
if (dev_isalive(net))
ret = format_addr(buf, net->dev_addr, net->addr_len);
read_unlock(&dev_base_lock);
return ret;
}
static ssize_t show_broadcast(struct class_device *dev, char *buf)
{
struct net_device *net = to_net_dev(dev);
if (dev_isalive(net))
return format_addr(buf, net->broadcast, net->addr_len);
return -EINVAL;
}
static ssize_t show_carrier(struct class_device *dev, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
if (netif_running(netdev)) {
return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev));
}
return -EINVAL;
}
static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
static CLASS_DEVICE_ATTR(broadcast, S_IRUGO, show_broadcast, NULL);
static CLASS_DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
/* read-write attributes */
NETDEVICE_SHOW(mtu, fmt_dec);
static int change_mtu(struct net_device *net, unsigned long new_mtu)
{
return dev_set_mtu(net, (int) new_mtu);
}
static ssize_t store_mtu(struct class_device *dev, const char *buf, size_t len)
{
return netdev_store(dev, buf, len, change_mtu);
}
static CLASS_DEVICE_ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu);
NETDEVICE_SHOW(flags, fmt_hex);
static int change_flags(struct net_device *net, unsigned long new_flags)
{
return dev_change_flags(net, (unsigned) new_flags);
}
static ssize_t store_flags(struct class_device *dev, const char *buf, size_t len)
{
return netdev_store(dev, buf, len, change_flags);
}
static CLASS_DEVICE_ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags);
NETDEVICE_SHOW(tx_queue_len, fmt_ulong);
static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
{
net->tx_queue_len = new_len;
return 0;
}
static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, size_t len)
{
return netdev_store(dev, buf, len, change_tx_queue_len);
}
static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
store_tx_queue_len);
static struct class_device_attribute *net_class_attributes[] = {
&class_device_attr_ifindex,
&class_device_attr_iflink,
&class_device_attr_addr_len,
&class_device_attr_tx_queue_len,
&class_device_attr_features,
&class_device_attr_mtu,
&class_device_attr_flags,
&class_device_attr_type,
&class_device_attr_address,
&class_device_attr_broadcast,
&class_device_attr_carrier,
NULL
};
/* Show a given an attribute in the statistics group */
static ssize_t netstat_show(const struct class_device *cd, char *buf,
unsigned long offset)
{
struct net_device *dev = to_net_dev(cd);
struct net_device_stats *stats;
ssize_t ret = -EINVAL;
if (offset > sizeof(struct net_device_stats) ||
offset % sizeof(unsigned long) != 0)
WARN_ON(1);
read_lock(&dev_base_lock);
if (dev_isalive(dev) && dev->get_stats &&
(stats = (*dev->get_stats)(dev)))
ret = sprintf(buf, fmt_ulong,
*(unsigned long *)(((u8 *) stats) + offset));
read_unlock(&dev_base_lock);
return ret;
}
/* generate a read-only statistics attribute */
#define NETSTAT_ENTRY(name) \
static ssize_t show_##name(struct class_device *cd, char *buf) \
{ \
return netstat_show(cd, buf, \
offsetof(struct net_device_stats, name)); \
} \
static CLASS_DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
NETSTAT_ENTRY(rx_packets);
NETSTAT_ENTRY(tx_packets);
NETSTAT_ENTRY(rx_bytes);
NETSTAT_ENTRY(tx_bytes);
NETSTAT_ENTRY(rx_errors);
NETSTAT_ENTRY(tx_errors);
NETSTAT_ENTRY(rx_dropped);
NETSTAT_ENTRY(tx_dropped);
NETSTAT_ENTRY(multicast);
NETSTAT_ENTRY(collisions);
NETSTAT_ENTRY(rx_length_errors);
NETSTAT_ENTRY(rx_over_errors);
NETSTAT_ENTRY(rx_crc_errors);
NETSTAT_ENTRY(rx_frame_errors);
NETSTAT_ENTRY(rx_fifo_errors);
NETSTAT_ENTRY(rx_missed_errors);
NETSTAT_ENTRY(tx_aborted_errors);
NETSTAT_ENTRY(tx_carrier_errors);
NETSTAT_ENTRY(tx_fifo_errors);
NETSTAT_ENTRY(tx_heartbeat_errors);
NETSTAT_ENTRY(tx_window_errors);
NETSTAT_ENTRY(rx_compressed);
NETSTAT_ENTRY(tx_compressed);
static struct attribute *netstat_attrs[] = {
&class_device_attr_rx_packets.attr,
&class_device_attr_tx_packets.attr,
&class_device_attr_rx_bytes.attr,
&class_device_attr_tx_bytes.attr,
&class_device_attr_rx_errors.attr,
&class_device_attr_tx_errors.attr,
&class_device_attr_rx_dropped.attr,
&class_device_attr_tx_dropped.attr,
&class_device_attr_multicast.attr,
&class_device_attr_collisions.attr,
&class_device_attr_rx_length_errors.attr,
&class_device_attr_rx_over_errors.attr,
&class_device_attr_rx_crc_errors.attr,
&class_device_attr_rx_frame_errors.attr,
&class_device_attr_rx_fifo_errors.attr,
&class_device_attr_rx_missed_errors.attr,
&class_device_attr_tx_aborted_errors.attr,
&class_device_attr_tx_carrier_errors.attr,
&class_device_attr_tx_fifo_errors.attr,
&class_device_attr_tx_heartbeat_errors.attr,
&class_device_attr_tx_window_errors.attr,
&class_device_attr_rx_compressed.attr,
&class_device_attr_tx_compressed.attr,
NULL
};
static struct attribute_group netstat_group = {
.name = "statistics",
.attrs = netstat_attrs,
};
#ifdef WIRELESS_EXT
/* helper function that does all the locking etc for wireless stats */
static ssize_t wireless_show(struct class_device *cd, char *buf,
ssize_t (*format)(const struct iw_statistics *,
char *))
{
struct net_device *dev = to_net_dev(cd);
const struct iw_statistics *iw;
ssize_t ret = -EINVAL;
read_lock(&dev_base_lock);
if (dev_isalive(dev) && dev->get_wireless_stats
&& (iw = dev->get_wireless_stats(dev)) != NULL)
ret = (*format)(iw, buf);
read_unlock(&dev_base_lock);
return ret;
}
/* show function template for wireless fields */
#define WIRELESS_SHOW(name, field, format_string) \
static ssize_t format_iw_##name(const struct iw_statistics *iw, char *buf) \
{ \
return sprintf(buf, format_string, iw->field); \
} \
static ssize_t show_iw_##name(struct class_device *cd, char *buf) \
{ \
return wireless_show(cd, buf, format_iw_##name); \
} \
static CLASS_DEVICE_ATTR(name, S_IRUGO, show_iw_##name, NULL)
WIRELESS_SHOW(status, status, fmt_hex);
WIRELESS_SHOW(link, qual.qual, fmt_dec);
WIRELESS_SHOW(level, qual.level, fmt_dec);
WIRELESS_SHOW(noise, qual.noise, fmt_dec);
WIRELESS_SHOW(nwid, discard.nwid, fmt_dec);
WIRELESS_SHOW(crypt, discard.code, fmt_dec);
WIRELESS_SHOW(fragment, discard.fragment, fmt_dec);
WIRELESS_SHOW(misc, discard.misc, fmt_dec);
WIRELESS_SHOW(retries, discard.retries, fmt_dec);
WIRELESS_SHOW(beacon, miss.beacon, fmt_dec);
static struct attribute *wireless_attrs[] = {
&class_device_attr_status.attr,
&class_device_attr_link.attr,
&class_device_attr_level.attr,
&class_device_attr_noise.attr,
&class_device_attr_nwid.attr,
&class_device_attr_crypt.attr,
&class_device_attr_fragment.attr,
&class_device_attr_retries.attr,
&class_device_attr_misc.attr,
&class_device_attr_beacon.attr,
NULL
};
static struct attribute_group wireless_group = {
.name = "wireless",
.attrs = wireless_attrs,
};
#endif
#ifdef CONFIG_HOTPLUG
static int netdev_hotplug(struct class_device *cd, char **envp,
int num_envp, char *buf, int size)
{
struct net_device *dev = to_net_dev(cd);
int i = 0;
int n;
/* pass interface in env to hotplug. */
envp[i++] = buf;
n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1;
buf += n;
size -= n;
if ((size <= 0) || (i >= num_envp))
return -ENOMEM;
envp[i] = NULL;
return 0;
}
#endif
/*
* netdev_release -- destroy and free a dead device.
* Called when last reference to class_device kobject is gone.
*/
static void netdev_release(struct class_device *cd)
{
struct net_device *dev
= container_of(cd, struct net_device, class_dev);
BUG_ON(dev->reg_state != NETREG_RELEASED);
kfree((char *)dev - dev->padded);
}
static struct class net_class = {
.name = "net",
.release = netdev_release,
#ifdef CONFIG_HOTPLUG
.hotplug = netdev_hotplug,
#endif
};
void netdev_unregister_sysfs(struct net_device * net)
{
struct class_device * class_dev = &(net->class_dev);
if (net->get_stats)
sysfs_remove_group(&class_dev->kobj, &netstat_group);
#ifdef WIRELESS_EXT
if (net->get_wireless_stats)
sysfs_remove_group(&class_dev->kobj, &wireless_group);
#endif
class_device_del(class_dev);
}
/* Create sysfs entries for network device. */
int netdev_register_sysfs(struct net_device *net)
{
struct class_device *class_dev = &(net->class_dev);
int i;
struct class_device_attribute *attr;
int ret;
class_dev->class = &net_class;
class_dev->class_data = net;
strlcpy(class_dev->class_id, net->name, BUS_ID_SIZE);
if ((ret = class_device_register(class_dev)))
goto out;
for (i = 0; (attr = net_class_attributes[i]) != NULL; i++) {
if ((ret = class_device_create_file(class_dev, attr)))
goto out_unreg;
}
if (net->get_stats &&
(ret = sysfs_create_group(&class_dev->kobj, &netstat_group)))
goto out_unreg;
#ifdef WIRELESS_EXT
if (net->get_wireless_stats &&
(ret = sysfs_create_group(&class_dev->kobj, &wireless_group)))
goto out_cleanup;
return 0;
out_cleanup:
if (net->get_stats)
sysfs_remove_group(&class_dev->kobj, &netstat_group);
#else
return 0;
#endif
out_unreg:
printk(KERN_WARNING "%s: sysfs attribute registration failed %d\n",
net->name, ret);
class_device_unregister(class_dev);
out:
return ret;
}
int netdev_sysfs_init(void)
{
return class_register(&net_class);
}

View File

@@ -0,0 +1,842 @@
/* netfilter.c: look after the filters for various protocols.
* Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
*
* Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
* way.
*
* Rusty Russell (C)2000 -- This code is GPL.
*
* February 2000: Modified by James Morris to have 1 queue per protocol.
* 15-Mar-2000: Added NF_REPEAT --RR.
* 08-May-2003: Internal logging interface added by Jozsef Kadlecsik.
*/
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/netfilter.h>
#include <net/protocol.h>
#include <linux/init.h>
#include <linux/skbuff.h>
#include <linux/wait.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/if.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmp.h>
#include <net/sock.h>
#include <net/route.h>
#include <linux/ip.h>
/* In this code, we can be waiting indefinitely for userspace to
* service a packet if a hook returns NF_QUEUE. We could keep a count
* of skbuffs queued for userspace, and not deregister a hook unless
* this is zero, but that sucks. Now, we simply check when the
* packets come back: if the hook is gone, the packet is discarded. */
#ifdef CONFIG_NETFILTER_DEBUG
#define NFDEBUG(format, args...) printk(format , ## args)
#else
#define NFDEBUG(format, args...)
#endif
/* Sockopts only registered and called from user context, so
net locking would be overkill. Also, [gs]etsockopt calls may
sleep. */
static DECLARE_MUTEX(nf_sockopt_mutex);
struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
static LIST_HEAD(nf_sockopts);
static spinlock_t nf_hook_lock = SPIN_LOCK_UNLOCKED;
/*
* A queue handler may be registered for each protocol. Each is protected by
* long term mutex. The handler must provide an an outfn() to accept packets
* for queueing and must reinject all packets it receives, no matter what.
*/
static struct nf_queue_handler_t {
nf_queue_outfn_t outfn;
void *data;
} queue_handler[NPROTO];
static rwlock_t queue_handler_lock = RW_LOCK_UNLOCKED;
int nf_register_hook(struct nf_hook_ops *reg)
{
struct list_head *i;
spin_lock_bh(&nf_hook_lock);
list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
if (reg->priority < ((struct nf_hook_ops *)i)->priority)
break;
}
list_add_rcu(&reg->list, i->prev);
spin_unlock_bh(&nf_hook_lock);
synchronize_net();
return 0;
}
void nf_unregister_hook(struct nf_hook_ops *reg)
{
spin_lock_bh(&nf_hook_lock);
list_del_rcu(&reg->list);
spin_unlock_bh(&nf_hook_lock);
synchronize_net();
}
/* Do exclusive ranges overlap? */
static inline int overlap(int min1, int max1, int min2, int max2)
{
return max1 > min2 && min1 < max2;
}
/* Functions to register sockopt ranges (exclusive). */
int nf_register_sockopt(struct nf_sockopt_ops *reg)
{
struct list_head *i;
int ret = 0;
if (down_interruptible(&nf_sockopt_mutex) != 0)
return -EINTR;
list_for_each(i, &nf_sockopts) {
struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
if (ops->pf == reg->pf
&& (overlap(ops->set_optmin, ops->set_optmax,
reg->set_optmin, reg->set_optmax)
|| overlap(ops->get_optmin, ops->get_optmax,
reg->get_optmin, reg->get_optmax))) {
NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
ops->set_optmin, ops->set_optmax,
ops->get_optmin, ops->get_optmax,
reg->set_optmin, reg->set_optmax,
reg->get_optmin, reg->get_optmax);
ret = -EBUSY;
goto out;
}
}
list_add(&reg->list, &nf_sockopts);
out:
up(&nf_sockopt_mutex);
return ret;
}
void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
{
/* No point being interruptible: we're probably in cleanup_module() */
restart:
down(&nf_sockopt_mutex);
if (reg->use != 0) {
/* To be woken by nf_sockopt call... */
/* FIXME: Stuart Young's name appears gratuitously. */
set_current_state(TASK_UNINTERRUPTIBLE);
reg->cleanup_task = current;
up(&nf_sockopt_mutex);
schedule();
goto restart;
}
list_del(&reg->list);
up(&nf_sockopt_mutex);
}
#ifdef CONFIG_NETFILTER_DEBUG
#include <net/ip.h>
#include <net/tcp.h>
#include <linux/netfilter_ipv4.h>
static void debug_print_hooks_ip(unsigned int nf_debug)
{
if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
printk("PRE_ROUTING ");
nf_debug ^= (1 << NF_IP_PRE_ROUTING);
}
if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
printk("LOCAL_IN ");
nf_debug ^= (1 << NF_IP_LOCAL_IN);
}
if (nf_debug & (1 << NF_IP_FORWARD)) {
printk("FORWARD ");
nf_debug ^= (1 << NF_IP_FORWARD);
}
if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
printk("LOCAL_OUT ");
nf_debug ^= (1 << NF_IP_LOCAL_OUT);
}
if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
printk("POST_ROUTING ");
nf_debug ^= (1 << NF_IP_POST_ROUTING);
}
if (nf_debug)
printk("Crap bits: 0x%04X", nf_debug);
printk("\n");
}
void nf_dump_skb(int pf, struct sk_buff *skb)
{
printk("skb: pf=%i %s dev=%s len=%u\n",
pf,
skb->sk ? "(owned)" : "(unowned)",
skb->dev ? skb->dev->name : "(no dev)",
skb->len);
switch (pf) {
case PF_INET: {
const struct iphdr *ip = skb->nh.iph;
__u32 *opt = (__u32 *) (ip + 1);
int opti;
__u16 src_port = 0, dst_port = 0;
if (ip->protocol == IPPROTO_TCP
|| ip->protocol == IPPROTO_UDP) {
struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
src_port = ntohs(tcp->source);
dst_port = ntohs(tcp->dest);
}
printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
" L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
ip->protocol, NIPQUAD(ip->saddr),
src_port, NIPQUAD(ip->daddr),
dst_port,
ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
ntohs(ip->frag_off), ip->ttl);
for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
printk(" O=0x%8.8X", *opt++);
printk("\n");
}
}
}
void nf_debug_ip_local_deliver(struct sk_buff *skb)
{
/* If it's a loopback packet, it must have come through
* NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
* NF_IP_LOCAL_IN. Otherwise, must have gone through
* NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */
if (!skb->dev) {
printk("ip_local_deliver: skb->dev is NULL.\n");
}
else if (strcmp(skb->dev->name, "lo") == 0) {
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
| (1 << NF_IP_POST_ROUTING)
| (1 << NF_IP_PRE_ROUTING)
| (1 << NF_IP_LOCAL_IN))) {
printk("ip_local_deliver: bad loopback skb: ");
debug_print_hooks_ip(skb->nf_debug);
nf_dump_skb(PF_INET, skb);
}
}
else {
if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
| (1<<NF_IP_LOCAL_IN))) {
printk("ip_local_deliver: bad non-lo skb: ");
debug_print_hooks_ip(skb->nf_debug);
nf_dump_skb(PF_INET, skb);
}
}
}
void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
{
if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
| (1 << NF_IP_POST_ROUTING))) {
printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
newskb);
debug_print_hooks_ip(newskb->nf_debug);
nf_dump_skb(PF_INET, newskb);
}
/* Clear to avoid confusing input check */
newskb->nf_debug = 0;
}
void nf_debug_ip_finish_output2(struct sk_buff *skb)
{
/* If it's owned, it must have gone through the
* NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
* Otherwise, must have gone through
* NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
*/
if (skb->sk) {
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
| (1 << NF_IP_POST_ROUTING))) {
printk("ip_finish_output: bad owned skb = %p: ", skb);
debug_print_hooks_ip(skb->nf_debug);
nf_dump_skb(PF_INET, skb);
}
} else {
if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
| (1 << NF_IP_FORWARD)
| (1 << NF_IP_POST_ROUTING))) {
/* Fragments, entunnelled packets, TCP RSTs
generated by ipt_REJECT will have no
owners, but still may be local */
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
| (1 << NF_IP_POST_ROUTING))){
printk("ip_finish_output:"
" bad unowned skb = %p: ",skb);
debug_print_hooks_ip(skb->nf_debug);
nf_dump_skb(PF_INET, skb);
}
}
}
}
#endif /*CONFIG_NETFILTER_DEBUG*/
/* Call get/setsockopt() */
static int nf_sockopt(struct sock *sk, int pf, int val,
char __user *opt, int *len, int get)
{
struct list_head *i;
struct nf_sockopt_ops *ops;
int ret;
if (down_interruptible(&nf_sockopt_mutex) != 0)
return -EINTR;
list_for_each(i, &nf_sockopts) {
ops = (struct nf_sockopt_ops *)i;
if (ops->pf == pf) {
if (get) {
if (val >= ops->get_optmin
&& val < ops->get_optmax) {
ops->use++;
up(&nf_sockopt_mutex);
ret = ops->get(sk, val, opt, len);
goto out;
}
} else {
if (val >= ops->set_optmin
&& val < ops->set_optmax) {
ops->use++;
up(&nf_sockopt_mutex);
ret = ops->set(sk, val, opt, *len);
goto out;
}
}
}
}
up(&nf_sockopt_mutex);
return -ENOPROTOOPT;
out:
down(&nf_sockopt_mutex);
ops->use--;
if (ops->cleanup_task)
wake_up_process(ops->cleanup_task);
up(&nf_sockopt_mutex);
return ret;
}
int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
int len)
{
return nf_sockopt(sk, pf, val, opt, &len, 0);
}
int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
{
return nf_sockopt(sk, pf, val, opt, len, 1);
}
static unsigned int nf_iterate(struct list_head *head,
struct sk_buff **skb,
int hook,
const struct net_device *indev,
const struct net_device *outdev,
struct list_head **i,
int (*okfn)(struct sk_buff *),
int hook_thresh)
{
/*
* The caller must not block between calls to this
* function because of risk of continuing from deleted element.
*/
list_for_each_continue_rcu(*i, head) {
struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
if (hook_thresh > elem->priority)
continue;
/* Optimization: we don't need to hold module
reference here, since function can't sleep. --RR */
switch (elem->hook(hook, skb, indev, outdev, okfn)) {
case NF_QUEUE:
return NF_QUEUE;
case NF_STOLEN:
return NF_STOLEN;
case NF_DROP:
return NF_DROP;
case NF_REPEAT:
*i = (*i)->prev;
break;
#ifdef CONFIG_NETFILTER_DEBUG
case NF_ACCEPT:
break;
default:
NFDEBUG("Evil return from %p(%u).\n",
elem->hook, hook);
#endif
}
}
return NF_ACCEPT;
}
int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
{
int ret;
write_lock_bh(&queue_handler_lock);
if (queue_handler[pf].outfn)
ret = -EBUSY;
else {
queue_handler[pf].outfn = outfn;
queue_handler[pf].data = data;
ret = 0;
}
write_unlock_bh(&queue_handler_lock);
return ret;
}
/* The caller must flush their queue before this */
int nf_unregister_queue_handler(int pf)
{
write_lock_bh(&queue_handler_lock);
queue_handler[pf].outfn = NULL;
queue_handler[pf].data = NULL;
write_unlock_bh(&queue_handler_lock);
return 0;
}
/*
* Any packet that leaves via this function must come back
* through nf_reinject().
*/
static int nf_queue(struct sk_buff *skb,
struct list_head *elem,
int pf, unsigned int hook,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *))
{
int status;
struct nf_info *info;
#ifdef CONFIG_BRIDGE_NETFILTER
struct net_device *physindev = NULL;
struct net_device *physoutdev = NULL;
#endif
/* QUEUE == DROP if noone is waiting, to be safe. */
read_lock(&queue_handler_lock);
if (!queue_handler[pf].outfn) {
read_unlock(&queue_handler_lock);
kfree_skb(skb);
return 1;
}
info = kmalloc(sizeof(*info), GFP_ATOMIC);
if (!info) {
if (net_ratelimit())
printk(KERN_ERR "OOM queueing packet %p\n",
skb);
read_unlock(&queue_handler_lock);
kfree_skb(skb);
return 1;
}
*info = (struct nf_info) {
(struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
/* If it's going away, ignore hook. */
if (!try_module_get(info->elem->owner)) {
read_unlock(&queue_handler_lock);
kfree(info);
return 0;
}
/* Bump dev refs so they don't vanish while packet is out */
if (indev) dev_hold(indev);
if (outdev) dev_hold(outdev);
#ifdef CONFIG_BRIDGE_NETFILTER
if (skb->nf_bridge) {
physindev = skb->nf_bridge->physindev;
if (physindev) dev_hold(physindev);
physoutdev = skb->nf_bridge->physoutdev;
if (physoutdev) dev_hold(physoutdev);
}
#endif
status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
read_unlock(&queue_handler_lock);
if (status < 0) {
/* James M doesn't say fuck enough. */
if (indev) dev_put(indev);
if (outdev) dev_put(outdev);
#ifdef CONFIG_BRIDGE_NETFILTER
if (physindev) dev_put(physindev);
if (physoutdev) dev_put(physoutdev);
#endif
module_put(info->elem->owner);
kfree(info);
kfree_skb(skb);
return 1;
}
return 1;
}
int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *),
int hook_thresh)
{
struct list_head *elem;
unsigned int verdict;
int ret = 0;
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();
#ifdef CONFIG_NETFILTER_DEBUG
if (skb->nf_debug & (1 << hook)) {
printk("nf_hook: hook %i already set.\n", hook);
nf_dump_skb(pf, skb);
}
skb->nf_debug |= (1 << hook);
#endif
elem = &nf_hooks[pf][hook];
next_hook:
verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
outdev, &elem, okfn, hook_thresh);
if (verdict == NF_QUEUE) {
NFDEBUG("nf_hook: Verdict = QUEUE.\n");
if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn))
goto next_hook;
}
switch (verdict) {
case NF_ACCEPT:
ret = okfn(skb);
break;
case NF_DROP:
kfree_skb(skb);
ret = -EPERM;
break;
}
rcu_read_unlock();
return ret;
}
void nf_reinject(struct sk_buff *skb, struct nf_info *info,
unsigned int verdict)
{
struct list_head *elem = &info->elem->list;
struct list_head *i;
rcu_read_lock();
/* Release those devices we held, or Alexey will kill me. */
if (info->indev) dev_put(info->indev);
if (info->outdev) dev_put(info->outdev);
#ifdef CONFIG_BRIDGE_NETFILTER
if (skb->nf_bridge) {
if (skb->nf_bridge->physindev)
dev_put(skb->nf_bridge->physindev);
if (skb->nf_bridge->physoutdev)
dev_put(skb->nf_bridge->physoutdev);
}
#endif
/* Drop reference to owner of hook which queued us. */
module_put(info->elem->owner);
list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
if (i == elem)
break;
}
if (elem == &nf_hooks[info->pf][info->hook]) {
/* The module which sent it to userspace is gone. */
NFDEBUG("%s: module disappeared, dropping packet.\n",
__FUNCTION__);
verdict = NF_DROP;
}
/* Continue traversal iff userspace said ok... */
if (verdict == NF_REPEAT) {
elem = elem->prev;
verdict = NF_ACCEPT;
}
if (verdict == NF_ACCEPT) {
next_hook:
verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
&skb, info->hook,
info->indev, info->outdev, &elem,
info->okfn, INT_MIN);
}
switch (verdict) {
case NF_ACCEPT:
info->okfn(skb);
break;
case NF_QUEUE:
if (!nf_queue(skb, elem, info->pf, info->hook,
info->indev, info->outdev, info->okfn))
goto next_hook;
break;
}
rcu_read_unlock();
if (verdict == NF_DROP)
kfree_skb(skb);
kfree(info);
return;
}
#ifdef CONFIG_INET
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
int ip_route_me_harder(struct sk_buff **pskb)
{
struct iphdr *iph = (*pskb)->nh.iph;
struct rtable *rt;
struct flowi fl = {};
struct dst_entry *odst;
unsigned int hh_len;
/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
* packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
*/
if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
fl.nl_u.ip4_u.daddr = iph->daddr;
fl.nl_u.ip4_u.saddr = iph->saddr;
fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
#ifdef CONFIG_IP_ROUTE_FWMARK
fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
#endif
fl.proto = iph->protocol;
if (ip_route_output_key(&rt, &fl) != 0)
return -1;
/* Drop old route. */
dst_release((*pskb)->dst);
(*pskb)->dst = &rt->u.dst;
} else {
/* non-local src, find valid iif to satisfy
* rp-filter when calling ip_route_input. */
fl.nl_u.ip4_u.daddr = iph->saddr;
if (ip_route_output_key(&rt, &fl) != 0)
return -1;
odst = (*pskb)->dst;
if (ip_route_input(*pskb, iph->daddr, iph->saddr,
RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
dst_release(&rt->u.dst);
return -1;
}
dst_release(&rt->u.dst);
dst_release(odst);
}
if ((*pskb)->dst->error)
return -1;
/* Change in oif may mean change in hh_len. */
hh_len = (*pskb)->dst->dev->hard_header_len;
if (skb_headroom(*pskb) < hh_len) {
struct sk_buff *nskb;
nskb = skb_realloc_headroom(*pskb, hh_len);
if (!nskb)
return -1;
if ((*pskb)->sk)
skb_set_owner_w(nskb, (*pskb)->sk);
kfree_skb(*pskb);
*pskb = nskb;
}
return 0;
}
EXPORT_SYMBOL(ip_route_me_harder);
int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
{
struct sk_buff *nskb;
unsigned int iplen;
if (writable_len > (*pskb)->len)
return 0;
/* Not exclusive use of packet? Must copy. */
if (skb_shared(*pskb) || skb_cloned(*pskb))
goto copy_skb;
/* Alexey says IP hdr is always modifiable and linear, so ok. */
if (writable_len <= (*pskb)->nh.iph->ihl*4)
return 1;
iplen = writable_len - (*pskb)->nh.iph->ihl*4;
/* DaveM says protocol headers are also modifiable. */
switch ((*pskb)->nh.iph->protocol) {
case IPPROTO_TCP: {
struct tcphdr _hdr, *hp;
hp = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
sizeof(_hdr), &_hdr);
if (hp == NULL)
goto copy_skb;
if (writable_len <= (*pskb)->nh.iph->ihl*4 + hp->doff*4)
goto pull_skb;
goto copy_skb;
}
case IPPROTO_UDP:
if (writable_len<=(*pskb)->nh.iph->ihl*4+sizeof(struct udphdr))
goto pull_skb;
goto copy_skb;
case IPPROTO_ICMP:
if (writable_len
<= (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr))
goto pull_skb;
goto copy_skb;
/* Insert other cases here as desired */
}
copy_skb:
nskb = skb_copy(*pskb, GFP_ATOMIC);
if (!nskb)
return 0;
BUG_ON(skb_is_nonlinear(nskb));
/* Rest of kernel will get very unhappy if we pass it a
suddenly-orphaned skbuff */
if ((*pskb)->sk)
skb_set_owner_w(nskb, (*pskb)->sk);
kfree_skb(*pskb);
*pskb = nskb;
return 1;
pull_skb:
return pskb_may_pull(*pskb, writable_len);
}
EXPORT_SYMBOL(skb_ip_make_writable);
#endif /*CONFIG_INET*/
/* Internal logging interface, which relies on the real
LOG target modules */
#define NF_LOG_PREFIXLEN 128
static nf_logfn *nf_logging[NPROTO]; /* = NULL */
static int reported = 0;
static spinlock_t nf_log_lock = SPIN_LOCK_UNLOCKED;
int nf_log_register(int pf, nf_logfn *logfn)
{
int ret = -EBUSY;
/* Any setup of logging members must be done before
* substituting pointer. */
spin_lock(&nf_log_lock);
if (!nf_logging[pf]) {
rcu_assign_pointer(nf_logging[pf], logfn);
ret = 0;
}
spin_unlock(&nf_log_lock);
return ret;
}
void nf_log_unregister(int pf, nf_logfn *logfn)
{
spin_lock(&nf_log_lock);
if (nf_logging[pf] == logfn)
nf_logging[pf] = NULL;
spin_unlock(&nf_log_lock);
/* Give time to concurrent readers. */
synchronize_net();
}
void nf_log_packet(int pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
const char *fmt, ...)
{
va_list args;
char prefix[NF_LOG_PREFIXLEN];
nf_logfn *logfn;
rcu_read_lock();
logfn = rcu_dereference(nf_logging[pf]);
if (logfn) {
va_start(args, fmt);
vsnprintf(prefix, sizeof(prefix), fmt, args);
va_end(args);
/* We must read logging before nf_logfn[pf] */
logfn(hooknum, skb, in, out, prefix);
} else if (!reported) {
printk(KERN_WARNING "nf_log_packet: can\'t log yet, "
"no backend logging module loaded in!\n");
reported++;
}
rcu_read_unlock();
}
EXPORT_SYMBOL(nf_log_register);
EXPORT_SYMBOL(nf_log_unregister);
EXPORT_SYMBOL(nf_log_packet);
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
manufactured ICMP or RST packets will not be associated with it. */
void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
{
void (*attach)(struct sk_buff *, struct sk_buff *);
if (skb->nfct && (attach = ip_ct_attach) != NULL) {
mb(); /* Just to be sure: must be read before executing this */
attach(new, skb);
}
}
void __init netfilter_init(void)
{
int i, h;
for (i = 0; i < NPROTO; i++) {
for (h = 0; h < NF_MAX_HOOKS; h++)
INIT_LIST_HEAD(&nf_hooks[i][h]);
}
}
EXPORT_SYMBOL(ip_ct_attach);
EXPORT_SYMBOL(nf_ct_attach);
EXPORT_SYMBOL(nf_getsockopt);
EXPORT_SYMBOL(nf_hook_slow);
EXPORT_SYMBOL(nf_hooks);
EXPORT_SYMBOL(nf_register_hook);
EXPORT_SYMBOL(nf_register_queue_handler);
EXPORT_SYMBOL(nf_register_sockopt);
EXPORT_SYMBOL(nf_reinject);
EXPORT_SYMBOL(nf_setsockopt);
EXPORT_SYMBOL(nf_unregister_hook);
EXPORT_SYMBOL(nf_unregister_queue_handler);
EXPORT_SYMBOL(nf_unregister_sockopt);

View File

@@ -0,0 +1,681 @@
/*
* Common framework for low-level network console, dump, and debugger code
*
* Sep 8 2003 Matt Mackall <mpm@selenic.com>
*
* based on the netconsole code from:
*
* Copyright (C) 2001 Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2002 Red Hat, Inc.
*/
#include <linux/smp_lock.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
#include <linux/inetdevice.h>
#include <linux/inet.h>
#include <linux/interrupt.h>
#include <linux/netpoll.h>
#include <linux/sched.h>
#include <linux/rcupdate.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <asm/unaligned.h>
/*
* We maintain a small pool of fully-sized skbs, to make sure the
* message gets out even in extreme OOM situations.
*/
#define MAX_SKBS 32
#define MAX_UDP_CHUNK 1460
static spinlock_t skb_list_lock = SPIN_LOCK_UNLOCKED;
static int nr_skbs;
static struct sk_buff *skbs;
static spinlock_t rx_list_lock = SPIN_LOCK_UNLOCKED;
static LIST_HEAD(rx_list);
static atomic_t trapped;
spinlock_t netpoll_poll_lock = SPIN_LOCK_UNLOCKED;
#define NETPOLL_RX_ENABLED 1
#define NETPOLL_RX_DROP 2
#define MAX_SKB_SIZE \
(MAX_UDP_CHUNK + sizeof(struct udphdr) + \
sizeof(struct iphdr) + sizeof(struct ethhdr))
static void zap_completion_queue(void);
static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
unsigned short ulen, u32 saddr, u32 daddr)
{
if (uh->check == 0)
return 0;
if (skb->ip_summed == CHECKSUM_HW)
return csum_tcpudp_magic(
saddr, daddr, ulen, IPPROTO_UDP, skb->csum);
skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
return csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
}
void netpoll_poll(struct netpoll *np)
{
/*
* In cases where there is bi-directional communications, reading
* only one message at a time can lead to packets being dropped by
* the network adapter, forcing superfluous retries and possibly
* timeouts. Thus, we set our budget to a more reasonable value.
*/
int budget = 16;
unsigned long flags;
if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
return;
/* Process pending work on NIC */
np->dev->poll_controller(np->dev);
/* If scheduling is stopped, tickle NAPI bits */
spin_lock_irqsave(&netpoll_poll_lock, flags);
if (np->dev->poll &&
test_bit(__LINK_STATE_RX_SCHED, &np->dev->state)) {
np->dev->netpoll_rx |= NETPOLL_RX_DROP;
atomic_inc(&trapped);
np->dev->poll(np->dev, &budget);
atomic_dec(&trapped);
np->dev->netpoll_rx &= ~NETPOLL_RX_DROP;
}
spin_unlock_irqrestore(&netpoll_poll_lock, flags);
zap_completion_queue();
}
static void refill_skbs(void)
{
struct sk_buff *skb;
unsigned long flags;
spin_lock_irqsave(&skb_list_lock, flags);
while (nr_skbs < MAX_SKBS) {
skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
if (!skb)
break;
skb->next = skbs;
skbs = skb;
nr_skbs++;
}
spin_unlock_irqrestore(&skb_list_lock, flags);
}
static void zap_completion_queue(void)
{
unsigned long flags;
struct softnet_data *sd = &get_cpu_var(softnet_data);
if (sd->completion_queue) {
struct sk_buff *clist;
local_irq_save(flags);
clist = sd->completion_queue;
sd->completion_queue = NULL;
local_irq_restore(flags);
while (clist != NULL) {
struct sk_buff *skb = clist;
clist = clist->next;
__kfree_skb(skb);
}
}
put_cpu_var(softnet_data);
}
static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve)
{
int once = 1, count = 0;
unsigned long flags;
struct sk_buff *skb = NULL;
zap_completion_queue();
repeat:
if (nr_skbs < MAX_SKBS)
refill_skbs();
skb = alloc_skb(len, GFP_ATOMIC);
if (!skb) {
spin_lock_irqsave(&skb_list_lock, flags);
skb = skbs;
if (skb)
skbs = skb->next;
skb->next = NULL;
nr_skbs--;
spin_unlock_irqrestore(&skb_list_lock, flags);
}
if(!skb) {
count++;
if (once && (count == 1000000)) {
printk("out of netpoll skbs!\n");
once = 0;
}
netpoll_poll(np);
goto repeat;
}
atomic_set(&skb->users, 1);
skb_reserve(skb, reserve);
return skb;
}
void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
int status;
repeat:
if(!np || !np->dev || !netif_running(np->dev)) {
__kfree_skb(skb);
return;
}
spin_lock(&np->dev->xmit_lock);
np->dev->xmit_lock_owner = smp_processor_id();
/*
* network drivers do not expect to be called if the queue is
* stopped.
*/
if (netif_queue_stopped(np->dev)) {
np->dev->xmit_lock_owner = -1;
spin_unlock(&np->dev->xmit_lock);
netpoll_poll(np);
goto repeat;
}
status = np->dev->hard_start_xmit(skb, np->dev);
np->dev->xmit_lock_owner = -1;
spin_unlock(&np->dev->xmit_lock);
/* transmit busy */
if(status) {
netpoll_poll(np);
goto repeat;
}
}
void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
int total_len, eth_len, ip_len, udp_len;
struct sk_buff *skb;
struct udphdr *udph;
struct iphdr *iph;
struct ethhdr *eth;
udp_len = len + sizeof(*udph);
ip_len = eth_len = udp_len + sizeof(*iph);
total_len = eth_len + ETH_HLEN;
skb = find_skb(np, total_len, total_len - len);
if (!skb)
return;
memcpy(skb->data, msg, len);
skb->len += len;
udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
udph->source = htons(np->local_port);
udph->dest = htons(np->remote_port);
udph->len = htons(udp_len);
udph->check = 0;
iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
/* iph->version = 4; iph->ihl = 5; */
put_unaligned(0x45, (unsigned char *)iph);
iph->tos = 0;
put_unaligned(htons(ip_len), &(iph->tot_len));
iph->id = 0;
iph->frag_off = 0;
iph->ttl = 64;
iph->protocol = IPPROTO_UDP;
iph->check = 0;
put_unaligned(htonl(np->local_ip), &(iph->saddr));
put_unaligned(htonl(np->remote_ip), &(iph->daddr));
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
eth->h_proto = htons(ETH_P_IP);
memcpy(eth->h_source, np->local_mac, 6);
memcpy(eth->h_dest, np->remote_mac, 6);
netpoll_send_skb(np, skb);
}
static void arp_reply(struct sk_buff *skb)
{
struct arphdr *arp;
unsigned char *arp_ptr;
int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
u32 sip, tip;
struct sk_buff *send_skb;
unsigned long flags;
struct list_head *p;
struct netpoll *np = NULL;
spin_lock_irqsave(&rx_list_lock, flags);
list_for_each(p, &rx_list) {
np = list_entry(p, struct netpoll, rx_list);
if ( np->dev == skb->dev )
break;
np = NULL;
}
spin_unlock_irqrestore(&rx_list_lock, flags);
if (!np) return;
/* No arp on this interface */
if (skb->dev->flags & IFF_NOARP)
return;
if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
(2 * skb->dev->addr_len) +
(2 * sizeof(u32)))))
return;
skb->h.raw = skb->nh.raw = skb->data;
arp = skb->nh.arph;
if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
arp->ar_pro != htons(ETH_P_IP) ||
arp->ar_op != htons(ARPOP_REQUEST))
return;
arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len;
memcpy(&sip, arp_ptr, 4);
arp_ptr += 4 + skb->dev->addr_len;
memcpy(&tip, arp_ptr, 4);
/* Should we ignore arp? */
if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip))
return;
size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev),
LL_RESERVED_SPACE(np->dev));
if (!send_skb)
return;
send_skb->nh.raw = send_skb->data;
arp = (struct arphdr *) skb_put(send_skb, size);
send_skb->dev = skb->dev;
send_skb->protocol = htons(ETH_P_ARP);
/* Fill the device header for the ARP frame */
if (np->dev->hard_header &&
np->dev->hard_header(send_skb, skb->dev, ptype,
np->remote_mac, np->local_mac,
send_skb->len) < 0) {
kfree_skb(send_skb);
return;
}
/*
* Fill out the arp protocol part.
*
* we only support ethernet device type,
* which (according to RFC 1390) should always equal 1 (Ethernet).
*/
arp->ar_hrd = htons(np->dev->type);
arp->ar_pro = htons(ETH_P_IP);
arp->ar_hln = np->dev->addr_len;
arp->ar_pln = 4;
arp->ar_op = htons(type);
arp_ptr=(unsigned char *)(arp + 1);
memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
arp_ptr += np->dev->addr_len;
memcpy(arp_ptr, &tip, 4);
arp_ptr += 4;
memcpy(arp_ptr, np->remote_mac, np->dev->addr_len);
arp_ptr += np->dev->addr_len;
memcpy(arp_ptr, &sip, 4);
netpoll_send_skb(np, send_skb);
}
int netpoll_rx(struct sk_buff *skb)
{
int proto, len, ulen;
struct iphdr *iph;
struct udphdr *uh;
struct netpoll *np;
struct list_head *p;
unsigned long flags;
if (skb->dev->type != ARPHRD_ETHER)
goto out;
/* check if netpoll clients need ARP */
if (skb->protocol == __constant_htons(ETH_P_ARP) &&
atomic_read(&trapped)) {
arp_reply(skb);
return 1;
}
proto = ntohs(eth_hdr(skb)->h_proto);
if (proto != ETH_P_IP)
goto out;
if (skb->pkt_type == PACKET_OTHERHOST)
goto out;
if (skb_shared(skb))
goto out;
iph = (struct iphdr *)skb->data;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto out;
if (iph->ihl < 5 || iph->version != 4)
goto out;
if (!pskb_may_pull(skb, iph->ihl*4))
goto out;
if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
goto out;
len = ntohs(iph->tot_len);
if (skb->len < len || len < iph->ihl*4)
goto out;
if (iph->protocol != IPPROTO_UDP)
goto out;
len -= iph->ihl*4;
uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
ulen = ntohs(uh->len);
if (ulen != len)
goto out;
if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr) < 0)
goto out;
spin_lock_irqsave(&rx_list_lock, flags);
list_for_each(p, &rx_list) {
np = list_entry(p, struct netpoll, rx_list);
if (np->dev && np->dev != skb->dev)
continue;
if (np->local_ip && np->local_ip != ntohl(iph->daddr))
continue;
if (np->remote_ip && np->remote_ip != ntohl(iph->saddr))
continue;
if (np->local_port && np->local_port != ntohs(uh->dest))
continue;
spin_unlock_irqrestore(&rx_list_lock, flags);
if (np->rx_hook)
np->rx_hook(np, ntohs(uh->source),
(char *)(uh+1),
ulen - sizeof(struct udphdr));
return 1;
}
spin_unlock_irqrestore(&rx_list_lock, flags);
out:
return atomic_read(&trapped);
}
int netpoll_parse_options(struct netpoll *np, char *opt)
{
char *cur=opt, *delim;
if(*cur != '@') {
if ((delim = strchr(cur, '@')) == NULL)
goto parse_failed;
*delim=0;
np->local_port=simple_strtol(cur, NULL, 10);
cur=delim;
}
cur++;
printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port);
if(*cur != '/') {
if ((delim = strchr(cur, '/')) == NULL)
goto parse_failed;
*delim=0;
np->local_ip=ntohl(in_aton(cur));
cur=delim;
printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
np->name, HIPQUAD(np->local_ip));
}
cur++;
if ( *cur != ',') {
/* parse out dev name */
if ((delim = strchr(cur, ',')) == NULL)
goto parse_failed;
*delim=0;
strlcpy(np->dev_name, cur, sizeof(np->dev_name));
cur=delim;
}
cur++;
printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name);
if ( *cur != '@' ) {
/* dst port */
if ((delim = strchr(cur, '@')) == NULL)
goto parse_failed;
*delim=0;
np->remote_port=simple_strtol(cur, NULL, 10);
cur=delim;
}
cur++;
printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port);
/* dst ip */
if ((delim = strchr(cur, '/')) == NULL)
goto parse_failed;
*delim=0;
np->remote_ip=ntohl(in_aton(cur));
cur=delim+1;
printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
np->name, HIPQUAD(np->remote_ip));
if( *cur != 0 )
{
/* MAC address */
if ((delim = strchr(cur, ':')) == NULL)
goto parse_failed;
*delim=0;
np->remote_mac[0]=simple_strtol(cur, NULL, 16);
cur=delim+1;
if ((delim = strchr(cur, ':')) == NULL)
goto parse_failed;
*delim=0;
np->remote_mac[1]=simple_strtol(cur, NULL, 16);
cur=delim+1;
if ((delim = strchr(cur, ':')) == NULL)
goto parse_failed;
*delim=0;
np->remote_mac[2]=simple_strtol(cur, NULL, 16);
cur=delim+1;
if ((delim = strchr(cur, ':')) == NULL)
goto parse_failed;
*delim=0;
np->remote_mac[3]=simple_strtol(cur, NULL, 16);
cur=delim+1;
if ((delim = strchr(cur, ':')) == NULL)
goto parse_failed;
*delim=0;
np->remote_mac[4]=simple_strtol(cur, NULL, 16);
cur=delim+1;
np->remote_mac[5]=simple_strtol(cur, NULL, 16);
}
printk(KERN_INFO "%s: remote ethernet address "
"%02x:%02x:%02x:%02x:%02x:%02x\n",
np->name,
np->remote_mac[0],
np->remote_mac[1],
np->remote_mac[2],
np->remote_mac[3],
np->remote_mac[4],
np->remote_mac[5]);
return 0;
parse_failed:
printk(KERN_INFO "%s: couldn't parse config at %s!\n",
np->name, cur);
return -1;
}
int netpoll_setup(struct netpoll *np)
{
struct net_device *ndev = NULL;
struct in_device *in_dev;
if (np->dev_name)
ndev = dev_get_by_name(np->dev_name);
if (!ndev) {
printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
np->name, np->dev_name);
return -1;
}
if (!ndev->poll_controller) {
printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
np->name, np->dev_name);
goto release;
}
if (!netif_running(ndev)) {
unsigned short oflags;
unsigned long atmost, atleast;
printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
np->name, np->dev_name);
oflags = ndev->flags;
rtnl_shlock();
if (dev_change_flags(ndev, oflags | IFF_UP) < 0) {
printk(KERN_ERR "%s: failed to open %s\n",
np->name, np->dev_name);
rtnl_shunlock();
goto release;
}
rtnl_shunlock();
atleast = jiffies + HZ/10;
atmost = jiffies + 10*HZ;
while (!netif_carrier_ok(ndev)) {
if (time_after(jiffies, atmost)) {
printk(KERN_NOTICE
"%s: timeout waiting for carrier\n",
np->name);
break;
}
cond_resched();
}
if (time_before(jiffies, atleast)) {
printk(KERN_NOTICE "%s: carrier detect appears flaky,"
" waiting 10 seconds\n",
np->name);
while (time_before(jiffies, atmost))
cond_resched();
}
}
if (!memcmp(np->local_mac, "\0\0\0\0\0\0", 6) && ndev->dev_addr)
memcpy(np->local_mac, ndev->dev_addr, 6);
if (!np->local_ip) {
rcu_read_lock();
in_dev = __in_dev_get(ndev);
if (!in_dev || !in_dev->ifa_list) {
rcu_read_unlock();
printk(KERN_ERR "%s: no IP address for %s, aborting\n",
np->name, np->dev_name);
goto release;
}
np->local_ip = ntohl(in_dev->ifa_list->ifa_local);
rcu_read_unlock();
printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
np->name, HIPQUAD(np->local_ip));
}
np->dev = ndev;
if(np->rx_hook) {
unsigned long flags;
np->dev->netpoll_rx = NETPOLL_RX_ENABLED;
spin_lock_irqsave(&rx_list_lock, flags);
list_add(&np->rx_list, &rx_list);
spin_unlock_irqrestore(&rx_list_lock, flags);
}
return 0;
release:
dev_put(ndev);
return -1;
}
void netpoll_cleanup(struct netpoll *np)
{
if (np->rx_hook) {
unsigned long flags;
spin_lock_irqsave(&rx_list_lock, flags);
list_del(&np->rx_list);
spin_unlock_irqrestore(&rx_list_lock, flags);
}
if (np->dev)
np->dev->netpoll_rx = 0;
dev_put(np->dev);
np->dev = NULL;
}
int netpoll_trap(void)
{
return atomic_read(&trapped);
}
void netpoll_set_trap(int trap)
{
if (trap)
atomic_inc(&trapped);
else
atomic_dec(&trapped);
}
EXPORT_SYMBOL(netpoll_set_trap);
EXPORT_SYMBOL(netpoll_trap);
EXPORT_SYMBOL(netpoll_parse_options);
EXPORT_SYMBOL(netpoll_setup);
EXPORT_SYMBOL(netpoll_cleanup);
EXPORT_SYMBOL(netpoll_send_skb);
EXPORT_SYMBOL(netpoll_send_udp);
EXPORT_SYMBOL(netpoll_poll);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,685 @@
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Routing netlink socket interface: protocol independent part.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Fixes:
* Vitaly E. Lavrov RTA_OK arithmetics was wrong.
*/
#include <linux/config.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/capability.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/security.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/string.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/arp.h>
#include <net/route.h>
#include <net/udp.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
DECLARE_MUTEX(rtnl_sem);
void rtnl_lock(void)
{
rtnl_shlock();
}
void rtnl_unlock(void)
{
rtnl_shunlock();
netdev_run_todo();
}
int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
{
memset(tb, 0, sizeof(struct rtattr*)*maxattr);
while (RTA_OK(rta, len)) {
unsigned flavor = rta->rta_type;
if (flavor && flavor <= maxattr)
tb[flavor-1] = rta;
rta = RTA_NEXT(rta, len);
}
return 0;
}
struct sock *rtnl;
struct rtnetlink_link * rtnetlink_links[NPROTO];
static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] =
{
NLMSG_LENGTH(sizeof(struct ifinfomsg)),
NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
NLMSG_LENGTH(sizeof(struct rtmsg)),
NLMSG_LENGTH(sizeof(struct ndmsg)),
NLMSG_LENGTH(sizeof(struct rtmsg)),
NLMSG_LENGTH(sizeof(struct tcmsg)),
NLMSG_LENGTH(sizeof(struct tcmsg)),
NLMSG_LENGTH(sizeof(struct tcmsg)),
NLMSG_LENGTH(sizeof(struct tcamsg))
};
static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] =
{
IFLA_MAX,
IFA_MAX,
RTA_MAX,
NDA_MAX,
RTA_MAX,
TCA_MAX,
TCA_MAX,
TCA_MAX,
TCAA_MAX
};
void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
{
struct rtattr *rta;
int size = RTA_LENGTH(attrlen);
rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size));
rta->rta_type = attrtype;
rta->rta_len = size;
memcpy(RTA_DATA(rta), data, attrlen);
}
int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
{
int err = 0;
NETLINK_CB(skb).dst_groups = group;
if (echo)
atomic_inc(&skb->users);
netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
if (echo)
err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
return err;
}
int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
{
struct rtattr *mx = (struct rtattr*)skb->tail;
int i;
RTA_PUT(skb, RTA_METRICS, 0, NULL);
for (i=0; i<RTAX_MAX; i++) {
if (metrics[i])
RTA_PUT(skb, i+1, sizeof(u32), metrics+i);
}
mx->rta_len = skb->tail - (u8*)mx;
if (mx->rta_len == RTA_LENGTH(0))
skb_trim(skb, (u8*)mx - skb->data);
return 0;
rtattr_failure:
skb_trim(skb, (u8*)mx - skb->data);
return -1;
}
static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change)
{
struct ifinfomsg *r;
struct nlmsghdr *nlh;
unsigned char *b = skb->tail;
nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r));
if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
r = NLMSG_DATA(nlh);
r->ifi_family = AF_UNSPEC;
r->ifi_type = dev->type;
r->ifi_index = dev->ifindex;
r->ifi_flags = dev_get_flags(dev);
r->ifi_change = change;
RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
if (1) {
u32 txqlen = dev->tx_queue_len;
RTA_PUT(skb, IFLA_TXQLEN, sizeof(txqlen), &txqlen);
}
if (1) {
u32 weight = dev->weight;
RTA_PUT(skb, IFLA_WEIGHT, sizeof(weight), &weight);
}
if (1) {
struct rtnl_link_ifmap map = {
.mem_start = dev->mem_start,
.mem_end = dev->mem_end,
.base_addr = dev->base_addr,
.irq = dev->irq,
.dma = dev->dma,
.port = dev->if_port,
};
RTA_PUT(skb, IFLA_MAP, sizeof(map), &map);
}
if (dev->addr_len) {
RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
}
if (1) {
u32 mtu = dev->mtu;
RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
}
if (dev->ifindex != dev->iflink) {
u32 iflink = dev->iflink;
RTA_PUT(skb, IFLA_LINK, sizeof(iflink), &iflink);
}
if (dev->qdisc_sleeping)
RTA_PUT(skb, IFLA_QDISC,
strlen(dev->qdisc_sleeping->ops->id) + 1,
dev->qdisc_sleeping->ops->id);
if (dev->master) {
u32 master = dev->master->ifindex;
RTA_PUT(skb, IFLA_MASTER, sizeof(master), &master);
}
if (dev->get_stats) {
unsigned long *stats = (unsigned long*)dev->get_stats(dev);
if (stats) {
struct rtattr *a;
__u32 *s;
int i;
int n = sizeof(struct rtnl_link_stats)/4;
a = __RTA_PUT(skb, IFLA_STATS, n*4);
s = RTA_DATA(a);
for (i=0; i<n; i++)
s[i] = stats[i];
}
}
nlh->nlmsg_len = skb->tail - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
skb_trim(skb, b - skb->data);
return -1;
}
int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
int idx;
int s_idx = cb->args[0];
struct net_device *dev;
read_lock(&dev_base_lock);
for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
if (idx < s_idx)
continue;
if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0)
break;
}
read_unlock(&dev_base_lock);
cb->args[0] = idx;
return skb->len;
}
static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct ifinfomsg *ifm = NLMSG_DATA(nlh);
struct rtattr **ida = arg;
struct net_device *dev;
int err, send_addr_notify = 0;
dev = dev_get_by_index(ifm->ifi_index);
if (!dev)
return -ENODEV;
err = -EINVAL;
if (ifm->ifi_flags)
dev_change_flags(dev, ifm->ifi_flags);
if (ida[IFLA_MAP - 1]) {
struct rtnl_link_ifmap *u_map;
struct ifmap k_map;
if (!dev->set_config) {
err = -EOPNOTSUPP;
goto out;
}
if (!netif_device_present(dev)) {
err = -ENODEV;
goto out;
}
if (ida[IFLA_MAP - 1]->rta_len != RTA_LENGTH(sizeof(*u_map)))
goto out;
u_map = RTA_DATA(ida[IFLA_MAP - 1]);
k_map.mem_start = (unsigned long) u_map->mem_start;
k_map.mem_end = (unsigned long) u_map->mem_end;
k_map.base_addr = (unsigned short) u_map->base_addr;
k_map.irq = (unsigned char) u_map->irq;
k_map.dma = (unsigned char) u_map->dma;
k_map.port = (unsigned char) u_map->port;
err = dev->set_config(dev, &k_map);
if (err)
goto out;
}
if (ida[IFLA_ADDRESS - 1]) {
if (!dev->set_mac_address) {
err = -EOPNOTSUPP;
goto out;
}
if (!netif_device_present(dev)) {
err = -ENODEV;
goto out;
}
if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len))
goto out;
err = dev->set_mac_address(dev, RTA_DATA(ida[IFLA_ADDRESS - 1]));
if (err)
goto out;
send_addr_notify = 1;
}
if (ida[IFLA_BROADCAST - 1]) {
if (ida[IFLA_BROADCAST - 1]->rta_len != RTA_LENGTH(dev->addr_len))
goto out;
memcpy(dev->broadcast, RTA_DATA(ida[IFLA_BROADCAST - 1]),
dev->addr_len);
send_addr_notify = 1;
}
if (ida[IFLA_MTU - 1]) {
if (ida[IFLA_MTU - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
goto out;
err = dev_set_mtu(dev, *((u32 *) RTA_DATA(ida[IFLA_MTU - 1])));
if (err)
goto out;
}
if (ida[IFLA_TXQLEN - 1]) {
if (ida[IFLA_TXQLEN - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
goto out;
dev->tx_queue_len = *((u32 *) RTA_DATA(ida[IFLA_TXQLEN - 1]));
}
if (ida[IFLA_WEIGHT - 1]) {
if (ida[IFLA_WEIGHT - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
goto out;
dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1]));
}
if (ida[IFLA_IFNAME - 1]) {
char ifname[IFNAMSIZ];
if (ida[IFLA_IFNAME - 1]->rta_len > RTA_LENGTH(sizeof(ifname)))
goto out;
memset(ifname, 0, sizeof(ifname));
memcpy(ifname, RTA_DATA(ida[IFLA_IFNAME - 1]),
RTA_PAYLOAD(ida[IFLA_IFNAME - 1]));
ifname[IFNAMSIZ - 1] = '\0';
err = dev_change_name(dev, ifname);
if (err)
goto out;
}
err = 0;
out:
if (send_addr_notify)
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
dev_put(dev);
return err;
}
static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
{
int idx;
int s_idx = cb->family;
if (s_idx == 0)
s_idx = 1;
for (idx=1; idx<NPROTO; idx++) {
int type = cb->nlh->nlmsg_type-RTM_BASE;
if (idx < s_idx || idx == PF_PACKET)
continue;
if (rtnetlink_links[idx] == NULL ||
rtnetlink_links[idx][type].dumpit == NULL)
continue;
if (idx > s_idx)
memset(&cb->args[0], 0, sizeof(cb->args));
if (rtnetlink_links[idx][type].dumpit(skb, cb))
break;
}
cb->family = idx;
return skb->len;
}
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
{
struct sk_buff *skb;
int size = NLMSG_SPACE(sizeof(struct ifinfomsg) +
sizeof(struct rtnl_link_ifmap) +
sizeof(struct rtnl_link_stats) + 128);
skb = alloc_skb(size, GFP_KERNEL);
if (!skb)
return;
if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change) < 0) {
kfree_skb(skb);
return;
}
NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL);
}
static int rtnetlink_done(struct netlink_callback *cb)
{
return 0;
}
/* Protected by RTNL sempahore. */
static struct rtattr **rta_buf;
static int rtattr_max;
/* Process one rtnetlink message. */
static __inline__ int
rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
{
struct rtnetlink_link *link;
struct rtnetlink_link *link_tab;
int sz_idx, kind;
int min_len;
int family;
int type;
int err;
/* Only requests are handled by kernel now */
if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
return 0;
type = nlh->nlmsg_type;
/* A control message: ignore them */
if (type < RTM_BASE)
return 0;
/* Unknown message: reply with EINVAL */
if (type > RTM_MAX)
goto err_inval;
type -= RTM_BASE;
/* All the messages must have at least 1 byte length */
if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
return 0;
family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
if (family >= NPROTO) {
*errp = -EAFNOSUPPORT;
return -1;
}
link_tab = rtnetlink_links[family];
if (link_tab == NULL)
link_tab = rtnetlink_links[PF_UNSPEC];
link = &link_tab[type];
sz_idx = type>>2;
kind = type&3;
if (kind != 2 && security_netlink_recv(skb)) {
*errp = -EPERM;
return -1;
}
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
u32 rlen;
if (link->dumpit == NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
if (link->dumpit == NULL)
goto err_inval;
if ((*errp = netlink_dump_start(rtnl, skb, nlh,
link->dumpit,
rtnetlink_done)) != 0) {
return -1;
}
rlen = NLMSG_ALIGN(nlh->nlmsg_len);
if (rlen > skb->len)
rlen = skb->len;
skb_pull(skb, rlen);
return -1;
}
memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
min_len = rtm_min[sz_idx];
if (nlh->nlmsg_len < min_len)
goto err_inval;
if (nlh->nlmsg_len > min_len) {
int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
while (RTA_OK(attr, attrlen)) {
unsigned flavor = attr->rta_type;
if (flavor) {
if (flavor > rta_max[sz_idx])
goto err_inval;
rta_buf[flavor-1] = attr;
}
attr = RTA_NEXT(attr, attrlen);
}
}
if (link->doit == NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
if (link->doit == NULL)
goto err_inval;
err = link->doit(skb, nlh, (void *)&rta_buf[0]);
*errp = err;
return err;
err_inval:
*errp = -EINVAL;
return -1;
}
/*
* Process one packet of messages.
* Malformed skbs with wrong lengths of messages are discarded silently.
*/
static inline int rtnetlink_rcv_skb(struct sk_buff *skb)
{
int err;
struct nlmsghdr * nlh;
while (skb->len >= NLMSG_SPACE(0)) {
u32 rlen;
nlh = (struct nlmsghdr *)skb->data;
if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
return 0;
rlen = NLMSG_ALIGN(nlh->nlmsg_len);
if (rlen > skb->len)
rlen = skb->len;
if (rtnetlink_rcv_msg(skb, nlh, &err)) {
/* Not error, but we must interrupt processing here:
* Note, that in this case we do not pull message
* from skb, it will be processed later.
*/
if (err == 0)
return -1;
netlink_ack(skb, nlh, err);
} else if (nlh->nlmsg_flags&NLM_F_ACK)
netlink_ack(skb, nlh, 0);
skb_pull(skb, rlen);
}
return 0;
}
/*
* rtnetlink input queue processing routine:
* - try to acquire shared lock. If it is failed, defer processing.
* - feed skbs to rtnetlink_rcv_skb, until it refuse a message,
* that will occur, when a dump started and/or acquisition of
* exclusive lock failed.
*/
static void rtnetlink_rcv(struct sock *sk, int len)
{
do {
struct sk_buff *skb;
if (rtnl_shlock_nowait())
return;
while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
if (rtnetlink_rcv_skb(skb)) {
if (skb->len)
skb_queue_head(&sk->sk_receive_queue,
skb);
else
kfree_skb(skb);
break;
}
kfree_skb(skb);
}
up(&rtnl_sem);
netdev_run_todo();
} while (rtnl && rtnl->sk_receive_queue.qlen);
}
static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
{
[RTM_GETLINK - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo },
[RTM_SETLINK - RTM_BASE] = { .doit = do_setlink },
[RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
[RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
[RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add },
[RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete },
[RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info }
};
static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
switch (event) {
case NETDEV_UNREGISTER:
rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
break;
case NETDEV_REGISTER:
rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
break;
case NETDEV_UP:
case NETDEV_DOWN:
rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
break;
case NETDEV_CHANGE:
case NETDEV_GOING_DOWN:
break;
default:
rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
break;
}
return NOTIFY_DONE;
}
static struct notifier_block rtnetlink_dev_notifier = {
.notifier_call = rtnetlink_event,
};
void __init rtnetlink_init(void)
{
int i;
rtattr_max = 0;
for (i = 0; i < ARRAY_SIZE(rta_max); i++)
if (rta_max[i] > rtattr_max)
rtattr_max = rta_max[i];
rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL);
if (!rta_buf)
panic("rtnetlink_init: cannot allocate rta_buf\n");
rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv);
if (rtnl == NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
register_netdevice_notifier(&rtnetlink_dev_notifier);
rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
}
EXPORT_SYMBOL(__rta_fill);
EXPORT_SYMBOL(rtattr_parse);
EXPORT_SYMBOL(rtnetlink_dump_ifinfo);
EXPORT_SYMBOL(rtnetlink_links);
EXPORT_SYMBOL(rtnetlink_put_metrics);
EXPORT_SYMBOL(rtnl);
EXPORT_SYMBOL(rtnl_lock);
EXPORT_SYMBOL(rtnl_sem);
EXPORT_SYMBOL(rtnl_unlock);

View File

@@ -0,0 +1,291 @@
/* scm.c - Socket level control messages processing.
*
* Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
* Alignment and value checking mods by Craig Metz
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
#include <linux/signal.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/stat.h>
#include <linux/socket.h>
#include <linux/file.h>
#include <linux/fcntl.h>
#include <linux/net.h>
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/security.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/compat.h>
#include <net/scm.h>
/*
* Only allow a user to send credentials, that they could set with
* setu(g)id.
*/
static __inline__ int scm_check_creds(struct ucred *creds)
{
if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
((creds->uid == current->uid || creds->uid == current->euid ||
creds->uid == current->suid) || capable(CAP_SETUID)) &&
((creds->gid == current->gid || creds->gid == current->egid ||
creds->gid == current->sgid) || capable(CAP_SETGID))) {
return 0;
}
return -EPERM;
}
static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
{
int *fdp = (int*)CMSG_DATA(cmsg);
struct scm_fp_list *fpl = *fplp;
struct file **fpp;
int i, num;
num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
if (num <= 0)
return 0;
if (num > SCM_MAX_FD)
return -EINVAL;
if (!fpl)
{
fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
if (!fpl)
return -ENOMEM;
*fplp = fpl;
fpl->count = 0;
}
fpp = &fpl->fp[fpl->count];
if (fpl->count + num > SCM_MAX_FD)
return -EINVAL;
/*
* Verify the descriptors and increment the usage count.
*/
for (i=0; i< num; i++)
{
int fd = fdp[i];
struct file *file;
if (fd < 0 || !(file = fget(fd)))
return -EBADF;
*fpp++ = file;
fpl->count++;
}
return num;
}
void __scm_destroy(struct scm_cookie *scm)
{
struct scm_fp_list *fpl = scm->fp;
int i;
if (fpl) {
scm->fp = NULL;
for (i=fpl->count-1; i>=0; i--)
fput(fpl->fp[i]);
kfree(fpl);
}
}
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
{
struct cmsghdr *cmsg;
int err;
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg))
{
err = -EINVAL;
/* Verify that cmsg_len is at least sizeof(struct cmsghdr) */
/* The first check was omitted in <= 2.2.5. The reasoning was
that parser checks cmsg_len in any case, so that
additional check would be work duplication.
But if cmsg_level is not SOL_SOCKET, we do not check
for too short ancillary data object at all! Oops.
OK, let's add it...
*/
if (!CMSG_OK(msg, cmsg))
goto error;
if (cmsg->cmsg_level != SOL_SOCKET)
continue;
switch (cmsg->cmsg_type)
{
case SCM_RIGHTS:
err=scm_fp_copy(cmsg, &p->fp);
if (err<0)
goto error;
break;
case SCM_CREDENTIALS:
if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
goto error;
memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred));
err = scm_check_creds(&p->creds);
if (err)
goto error;
break;
default:
goto error;
}
}
if (p->fp && !p->fp->count)
{
kfree(p->fp);
p->fp = NULL;
}
return 0;
error:
scm_destroy(p);
return err;
}
int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
{
struct cmsghdr __user *cm = (struct cmsghdr __user *)msg->msg_control;
struct cmsghdr cmhdr;
int cmlen = CMSG_LEN(len);
int err;
if (MSG_CMSG_COMPAT & msg->msg_flags)
return put_cmsg_compat(msg, level, type, len, data);
if (cm==NULL || msg->msg_controllen < sizeof(*cm)) {
msg->msg_flags |= MSG_CTRUNC;
return 0; /* XXX: return error? check spec. */
}
if (msg->msg_controllen < cmlen) {
msg->msg_flags |= MSG_CTRUNC;
cmlen = msg->msg_controllen;
}
cmhdr.cmsg_level = level;
cmhdr.cmsg_type = type;
cmhdr.cmsg_len = cmlen;
err = -EFAULT;
if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
goto out;
if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
goto out;
cmlen = CMSG_SPACE(len);
msg->msg_control += cmlen;
msg->msg_controllen -= cmlen;
err = 0;
out:
return err;
}
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
{
struct cmsghdr __user *cm = (struct cmsghdr __user*)msg->msg_control;
int fdmax = 0;
int fdnum = scm->fp->count;
struct file **fp = scm->fp->fp;
int __user *cmfptr;
int err = 0, i;
if (MSG_CMSG_COMPAT & msg->msg_flags) {
scm_detach_fds_compat(msg, scm);
return;
}
if (msg->msg_controllen > sizeof(struct cmsghdr))
fdmax = ((msg->msg_controllen - sizeof(struct cmsghdr))
/ sizeof(int));
if (fdnum < fdmax)
fdmax = fdnum;
for (i=0, cmfptr=(int __user *)CMSG_DATA(cm); i<fdmax; i++, cmfptr++)
{
int new_fd;
err = security_file_receive(fp[i]);
if (err)
break;
err = get_unused_fd();
if (err < 0)
break;
new_fd = err;
err = put_user(new_fd, cmfptr);
if (err) {
put_unused_fd(new_fd);
break;
}
/* Bump the usage count and install the file. */
get_file(fp[i]);
fd_install(new_fd, fp[i]);
}
if (i > 0)
{
int cmlen = CMSG_LEN(i*sizeof(int));
if (!err)
err = put_user(SOL_SOCKET, &cm->cmsg_level);
if (!err)
err = put_user(SCM_RIGHTS, &cm->cmsg_type);
if (!err)
err = put_user(cmlen, &cm->cmsg_len);
if (!err) {
cmlen = CMSG_SPACE(i*sizeof(int));
msg->msg_control += cmlen;
msg->msg_controllen -= cmlen;
}
}
if (i < fdnum || (fdnum && fdmax <= 0))
msg->msg_flags |= MSG_CTRUNC;
/*
* All of the files that fit in the message have had their
* usage counts incremented, so we just free the list.
*/
__scm_destroy(scm);
}
struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
{
struct scm_fp_list *new_fpl;
int i;
if (!fpl)
return NULL;
new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
if (new_fpl) {
for (i=fpl->count-1; i>=0; i--)
get_file(fpl->fp[i]);
memcpy(new_fpl, fpl, sizeof(*fpl));
}
return new_fpl;
}
EXPORT_SYMBOL(__scm_destroy);
EXPORT_SYMBOL(__scm_send);
EXPORT_SYMBOL(put_cmsg);
EXPORT_SYMBOL(scm_detach_fds);
EXPORT_SYMBOL(scm_fp_dup);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,287 @@
/*
* SUCS NET3:
*
* Generic stream handling routines. These are generic for most
* protocols. Even IP. Tonight 8-).
* This is used because TCP, LLC (others too) layer all have mostly
* identical sendmsg() and recvmsg() code.
* So we (will) share it here.
*
* Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
* (from old tcp.c code)
* Alan Cox <alan@redhat.com> (Borrowed comments 8-))
*/
#include <linux/module.h>
#include <linux/net.h>
#include <linux/signal.h>
#include <linux/tcp.h>
#include <linux/wait.h>
#include <net/sock.h>
/**
* sk_stream_write_space - stream socket write_space callback.
* sk - socket
*
* FIXME: write proper description
*/
void sk_stream_write_space(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
clear_bit(SOCK_NOSPACE, &sock->flags);
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
wake_up_interruptible(sk->sk_sleep);
if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
sock_wake_async(sock, 2, POLL_OUT);
}
}
EXPORT_SYMBOL(sk_stream_write_space);
/**
* sk_stream_wait_connect - Wait for a socket to get into the connected state
* @sk - sock to wait on
* @timeo_p - for how long to wait
*
* Must be called with the socket locked.
*/
int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
{
struct task_struct *tsk = current;
DEFINE_WAIT(wait);
while (1) {
if (sk->sk_err)
return sock_error(sk);
if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
return -EPIPE;
if (!*timeo_p)
return -EAGAIN;
if (signal_pending(tsk))
return sock_intr_errno(*timeo_p);
prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
sk->sk_write_pending++;
if (sk_wait_event(sk, timeo_p,
!((1 << sk->sk_state) &
~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))))
break;
finish_wait(sk->sk_sleep, &wait);
sk->sk_write_pending--;
}
return 0;
}
EXPORT_SYMBOL(sk_stream_wait_connect);
/**
* sk_stream_closing - Return 1 if we still have things to send in our buffers.
* @sk - socket to verify
*/
static inline int sk_stream_closing(struct sock *sk)
{
return (1 << sk->sk_state) &
(TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
}
void sk_stream_wait_close(struct sock *sk, long timeout)
{
if (timeout) {
DEFINE_WAIT(wait);
do {
prepare_to_wait(sk->sk_sleep, &wait,
TASK_INTERRUPTIBLE);
if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
break;
} while (!signal_pending(current) && timeout);
finish_wait(sk->sk_sleep, &wait);
}
}
EXPORT_SYMBOL(sk_stream_wait_close);
/**
* sk_stream_wait_memory - Wait for more memory for a socket
* @sk - socket to wait for memory
* @timeo_p - for how long
*/
int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
{
int err = 0;
long vm_wait = 0;
long current_timeo = *timeo_p;
DEFINE_WAIT(wait);
if (sk_stream_memory_free(sk))
current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2;
while (1) {
set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
if (!*timeo_p)
goto do_nonblock;
if (signal_pending(current))
goto do_interrupted;
clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
if (sk_stream_memory_free(sk) && !vm_wait)
break;
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
sk->sk_write_pending++;
sk_wait_event(sk, &current_timeo, sk_stream_memory_free(sk) &&
vm_wait);
sk->sk_write_pending--;
if (vm_wait) {
vm_wait -= current_timeo;
current_timeo = *timeo_p;
if (current_timeo != MAX_SCHEDULE_TIMEOUT &&
(current_timeo -= vm_wait) < 0)
current_timeo = 0;
vm_wait = 0;
}
*timeo_p = current_timeo;
}
out:
finish_wait(sk->sk_sleep, &wait);
return err;
do_error:
err = -EPIPE;
goto out;
do_nonblock:
err = -EAGAIN;
goto out;
do_interrupted:
err = sock_intr_errno(*timeo_p);
goto out;
}
EXPORT_SYMBOL(sk_stream_wait_memory);
void sk_stream_rfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
sk->sk_forward_alloc += skb->truesize;
}
EXPORT_SYMBOL(sk_stream_rfree);
int sk_stream_error(struct sock *sk, int flags, int err)
{
if (err == -EPIPE)
err = sock_error(sk) ? : -EPIPE;
if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
send_sig(SIGPIPE, current, 0);
return err;
}
EXPORT_SYMBOL(sk_stream_error);
void __sk_stream_mem_reclaim(struct sock *sk)
{
if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM) {
atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
sk->sk_prot->memory_allocated);
sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
if (*sk->sk_prot->memory_pressure &&
(atomic_read(sk->sk_prot->memory_allocated) <
sk->sk_prot->sysctl_mem[0]))
*sk->sk_prot->memory_pressure = 0;
}
}
EXPORT_SYMBOL(__sk_stream_mem_reclaim);
int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
{
int amt = sk_stream_pages(size);
sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
atomic_add(amt, sk->sk_prot->memory_allocated);
/* Under limit. */
if (atomic_read(sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) {
if (*sk->sk_prot->memory_pressure)
*sk->sk_prot->memory_pressure = 0;
return 1;
}
/* Over hard limit. */
if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) {
sk->sk_prot->enter_memory_pressure();
goto suppress_allocation;
}
/* Under pressure. */
if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1])
sk->sk_prot->enter_memory_pressure();
if (kind) {
if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0])
return 1;
} else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0])
return 1;
if (!*sk->sk_prot->memory_pressure ||
sk->sk_prot->sysctl_mem[2] > atomic_read(sk->sk_prot->sockets_allocated) *
sk_stream_pages(sk->sk_wmem_queued +
atomic_read(&sk->sk_rmem_alloc) +
sk->sk_forward_alloc))
return 1;
suppress_allocation:
if (!kind) {
sk_stream_moderate_sndbuf(sk);
/* Fail only if socket is _under_ its sndbuf.
* In this case we cannot block, so that we have to fail.
*/
if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
return 1;
}
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
atomic_sub(amt, sk->sk_prot->memory_allocated);
return 0;
}
EXPORT_SYMBOL(sk_stream_mem_schedule);
void sk_stream_kill_queues(struct sock *sk)
{
/* First the read buffer. */
__skb_queue_purge(&sk->sk_receive_queue);
/* Next, the error queue. */
__skb_queue_purge(&sk->sk_error_queue);
/* Next, the write queue. */
BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
/* Account for returned memory. */
sk_stream_mem_reclaim(sk);
BUG_TRAP(!sk->sk_wmem_queued);
BUG_TRAP(!sk->sk_forward_alloc);
/* It is _impossible_ for the backlog to contain anything
* when we get here. All user references to this socket
* have gone away, only the net layer knows can touch it.
*/
}
EXPORT_SYMBOL(sk_stream_kill_queues);

View File

@@ -0,0 +1,182 @@
/* -*- linux-c -*-
* sysctl_net_core.c: sysctl interface to net core subsystem.
*
* Begun April 1, 1996, Mike Shaver.
* Added /proc/sys/net/core directory entry (empty =) ). [MS]
*/
#include <linux/mm.h>
#include <linux/sysctl.h>
#include <linux/config.h>
#include <linux/module.h>
#ifdef CONFIG_SYSCTL
extern int netdev_max_backlog;
extern int weight_p;
extern int no_cong_thresh;
extern int no_cong;
extern int lo_cong;
extern int mod_cong;
extern int netdev_fastroute;
extern int net_msg_cost;
extern int net_msg_burst;
extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;
extern int sysctl_core_destroy_delay;
extern int sysctl_optmem_max;
extern int sysctl_somaxconn;
#ifdef CONFIG_NET_DIVERT
extern char sysctl_divert_version[];
#endif /* CONFIG_NET_DIVERT */
/*
* This strdup() is used for creating copies of network
* device names to be handed over to sysctl.
*/
char *net_sysctl_strdup(const char *s)
{
char *rv = kmalloc(strlen(s)+1, GFP_KERNEL);
if (rv)
strcpy(rv, s);
return rv;
}
ctl_table core_table[] = {
#ifdef CONFIG_NET
{
.ctl_name = NET_CORE_WMEM_MAX,
.procname = "wmem_max",
.data = &sysctl_wmem_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_RMEM_MAX,
.procname = "rmem_max",
.data = &sysctl_rmem_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_WMEM_DEFAULT,
.procname = "wmem_default",
.data = &sysctl_wmem_default,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_RMEM_DEFAULT,
.procname = "rmem_default",
.data = &sysctl_rmem_default,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_DEV_WEIGHT,
.procname = "dev_weight",
.data = &weight_p,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_MAX_BACKLOG,
.procname = "netdev_max_backlog",
.data = &netdev_max_backlog,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_NO_CONG_THRESH,
.procname = "no_cong_thresh",
.data = &no_cong_thresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_NO_CONG,
.procname = "no_cong",
.data = &no_cong,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_LO_CONG,
.procname = "lo_cong",
.data = &lo_cong,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_MOD_CONG,
.procname = "mod_cong",
.data = &mod_cong,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_MSG_COST,
.procname = "message_cost",
.data = &net_msg_cost,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
.strategy = &sysctl_jiffies,
},
{
.ctl_name = NET_CORE_MSG_BURST,
.procname = "message_burst",
.data = &net_msg_burst,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = NET_CORE_OPTMEM_MAX,
.procname = "optmem_max",
.data = &sysctl_optmem_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
#ifdef CONFIG_NET_DIVERT
{
.ctl_name = NET_CORE_DIVERT_VERSION,
.procname = "divert_version",
.data = (void *)sysctl_divert_version,
.maxlen = 32,
.mode = 0444,
.proc_handler = &proc_dostring
},
#endif /* CONFIG_NET_DIVERT */
#endif /* CONFIG_NET */
{
.ctl_name = NET_CORE_SOMAXCONN,
.procname = "somaxconn",
.data = &sysctl_somaxconn,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{ .ctl_name = 0 }
};
EXPORT_SYMBOL(net_sysctl_strdup);
#endif

View File

@@ -0,0 +1,155 @@
/*
* Generic address resultion entity
*
* Authors:
* net_random Alan Cox
* net_ratelimit Andy Kleen
*
* Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/random.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <asm/system.h>
#include <asm/uaccess.h>
/*
This is a maximally equidistributed combined Tausworthe generator
based on code from GNU Scientific Library 1.5 (30 Jun 2004)
x_n = (s1_n ^ s2_n ^ s3_n)
s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19))
s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25))
s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11))
The period of this generator is about 2^88.
From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
Generators", Mathematics of Computation, 65, 213 (1996), 203--213.
This is available on the net from L'Ecuyer's home page,
http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps
There is an erratum in the paper "Tables of Maximally
Equidistributed Combined LFSR Generators", Mathematics of
Computation, 68, 225 (1999), 261--269:
http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
... the k_j most significant bits of z_j must be non-
zero, for each j. (Note: this restriction also applies to the
computer code given in [4], but was mistakenly not mentioned in
that paper.)
This affects the seeding procedure by imposing the requirement
s1 > 1, s2 > 7, s3 > 15.
*/
struct nrnd_state {
u32 s1, s2, s3;
};
static DEFINE_PER_CPU(struct nrnd_state, net_rand_state);
static u32 __net_random(struct nrnd_state *state)
{
#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12);
state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4);
state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17);
return (state->s1 ^ state->s2 ^ state->s3);
}
static void __net_srandom(struct nrnd_state *state, unsigned long s)
{
if (s == 0)
s = 1; /* default seed is 1 */
#define LCG(n) (69069 * n)
state->s1 = LCG(s);
state->s2 = LCG(state->s1);
state->s3 = LCG(state->s2);
/* "warm it up" */
__net_random(state);
__net_random(state);
__net_random(state);
__net_random(state);
__net_random(state);
__net_random(state);
}
unsigned long net_random(void)
{
unsigned long r;
struct nrnd_state *state = &get_cpu_var(net_rand_state);
r = __net_random(state);
put_cpu_var(state);
return r;
}
void net_srandom(unsigned long entropy)
{
struct nrnd_state *state = &get_cpu_var(net_rand_state);
__net_srandom(state, state->s1^entropy);
put_cpu_var(state);
}
void __init net_random_init(void)
{
int i;
for (i = 0; i < NR_CPUS; i++) {
struct nrnd_state *state = &per_cpu(net_rand_state,i);
__net_srandom(state, i+jiffies);
}
}
static int net_random_reseed(void)
{
int i;
unsigned long seed[NR_CPUS];
get_random_bytes(seed, sizeof(seed));
for (i = 0; i < NR_CPUS; i++) {
struct nrnd_state *state = &per_cpu(net_rand_state,i);
__net_srandom(state, seed[i]);
}
return 0;
}
late_initcall(net_random_reseed);
int net_msg_cost = 5*HZ;
int net_msg_burst = 10;
/*
* All net warning printk()s should be guarded by this function.
*/
int net_ratelimit(void)
{
return __printk_ratelimit(net_msg_cost, net_msg_burst);
}
EXPORT_SYMBOL(net_random);
EXPORT_SYMBOL(net_ratelimit);
EXPORT_SYMBOL(net_srandom);

File diff suppressed because it is too large Load Diff