--- //depot/vendor/freebsd/src/sys/conf/files 2008/12/25 07:40:15 +++ //depot/user/rwatson/netisr/src/sys/conf/files 2008/12/29 20:11:36 @@ -2188,6 +2188,7 @@ net/mppcc.c optional netgraph_mppc_compression net/mppcd.c optional netgraph_mppc_compression net/netisr.c standard +net/netisr2.c optional netisr2 net/ppp_deflate.c optional ppp_deflate net/ppp_tty.c optional ppp net/pfil.c optional ether | inet --- //depot/vendor/freebsd/src/sys/conf/options 2008/12/25 07:40:15 +++ //depot/user/rwatson/netisr/src/sys/conf/options 2008/12/29 20:11:36 @@ -404,6 +404,7 @@ MROUTING opt_mrouting.h NCP NETATALK opt_atalk.h +NETISR2 opt_netisr.h NFSLOCKD PPP_BSDCOMP opt_ppp.h PPP_DEFLATE opt_ppp.h --- //depot/vendor/freebsd/src/sys/i386/conf/GENERIC 2008/12/02 19:15:16 +++ //depot/user/rwatson/netisr/src/sys/i386/conf/GENERIC 2008/12/15 10:06:37 @@ -28,6 +28,8 @@ makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols +options BREAK_TO_DEBUGGER + options SCHED_ULE # ULE scheduler options PREEMPTION # Enable kernel thread preemption options INET # InterNETworking --- //depot/vendor/freebsd/src/sys/kern/kern_mbuf.c 2008/12/18 16:00:17 +++ //depot/user/rwatson/netisr/src/sys/kern/kern_mbuf.c 2008/12/29 20:11:36 @@ -420,6 +420,7 @@ m->m_pkthdr.csum_data = 0; m->m_pkthdr.tso_segsz = 0; m->m_pkthdr.ether_vtag = 0; + m->m_pkthdr.affinity = MBUF_AFFINITY_NONE; SLIST_INIT(&m->m_pkthdr.tags); #ifdef MAC /* If the label init fails, fail the alloc */ @@ -644,6 +645,7 @@ m->m_pkthdr.csum_data = 0; m->m_pkthdr.tso_segsz = 0; m->m_pkthdr.ether_vtag = 0; + m->m_pkthdr.affinity = MBUF_AFFINITY_NONE; SLIST_INIT(&m->m_pkthdr.tags); #ifdef MAC /* If the label init fails, fail the alloc */ --- //depot/vendor/freebsd/src/sys/net/if_ethersubr.c 2008/12/17 10:52:03 +++ //depot/user/rwatson/netisr/src/sys/net/if_ethersubr.c 2008/12/17 13:55:52 @@ -36,6 +36,7 @@ #include "opt_ipx.h" #include "opt_mac.h" #include "opt_netgraph.h" +#include "opt_netisr.h" #include "opt_carp.h" #include "opt_mbuf_profiling.h" @@ -56,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -799,8 +801,13 @@ case ETHERTYPE_IP: if ((m = ip_fastforward(m)) == NULL) return; +#ifdef NETISR2 + netisr2_dispatch(NETISR_IP, m); + return; +#else isr = NETISR_IP; break; +#endif case ETHERTYPE_ARP: if (ifp->if_flags & IFF_NOARP) { @@ -808,9 +815,14 @@ m_freem(m); return; } +#ifdef NETISR2 + netisr2_dispatch(NETISR_ARP, m); + return; +#else isr = NETISR_ARP; break; #endif +#endif #ifdef IPX case ETHERTYPE_IPX: if (ef_inputp && ef_inputp(ifp, eh, m) == 0) --- //depot/vendor/freebsd/src/sys/net/if_loop.c 2008/12/02 21:40:16 +++ //depot/user/rwatson/netisr/src/sys/net/if_loop.c 2008/12/15 10:06:37 @@ -38,6 +38,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipx.h" +#include "opt_netisr.h" #include #include @@ -55,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -281,9 +283,23 @@ switch (af) { #ifdef INET case AF_INET: +#ifdef NETISR2 + ifp->if_ipackets++; + ifp->if_ibytes += m->m_pkthdr.len; + + /* + * XXXRW: Is this the right place to do this? + */ + if (m->m_pkthdr.affinity != MBUF_AFFINITY_NONE) + netisr2_queue_cpu(NETISR_IP, m, m->m_pkthdr.affinity); + else + netisr2_queue(NETISR_IP, m); + return (0); +#else isr = NETISR_IP; break; #endif +#endif #ifdef INET6 case AF_INET6: m->m_flags |= M_LOOP; --- //depot/vendor/freebsd/src/sys/netinet/if_ether.c 2008/12/24 05:35:14 +++ //depot/user/rwatson/netisr/src/sys/netinet/if_ether.c 2008/12/29 20:11:36 @@ -40,6 +40,7 @@ #include "opt_inet.h" #include "opt_mac.h" +#include "opt_netisr.h" #include "opt_carp.h" #include @@ -59,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -801,5 +803,8 @@ arpintrq.ifq_maxlen = 50; mtx_init(&arpintrq.ifq_mtx, "arp_inq", NULL, MTX_DEF); netisr_register(NETISR_ARP, arpintr, &arpintrq, 0); +#ifdef NETISR2 + netisr2_register(NETISR_ARP, arpintr, NULL, NULL, "arp", 50); +#endif } SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0); --- //depot/vendor/freebsd/src/sys/netinet/in_pcb.c 2008/12/17 13:00:15 +++ //depot/user/rwatson/netisr/src/sys/netinet/in_pcb.c 2008/12/17 13:55:52 @@ -38,6 +38,7 @@ #include "opt_ipsec.h" #include "opt_inet6.h" #include "opt_mac.h" +#include "opt_netisr.h" #include #include @@ -62,6 +63,8 @@ #include #include +#include +#include #include #include @@ -173,7 +176,19 @@ "Minimum time to keep sequental port " "allocation before switching to a random one"); +#ifdef NETISR2 /* + * Configure affinity strategy for loopback connections; if true, we use an + * inverse affinity for transmit/receive CPUs in order to try to place the + * sending and receiving ends of a loopback socket on different CPUs. + * Otherwise, we try to place them on the same CPU. + */ +int inpaffinity_inverse = 1; +SYSCTL_INT(_net_inet, OID_AUTO, affinity_inverse, CTLFLAG_RW, + &inpaffinity_inverse, 0, "Inverse affinity for send/receive"); +#endif + +/* * in_pcb.c: manage the Protocol Control Blocks. * * NOTE: It is assumed that most of these functions will be called with @@ -248,6 +263,60 @@ return (error); } +/* + * An inpcb has had its tuple updated, so update its affinity to match where + * the work will be assigned. + * + * XXXRW: This is surely not the right idea, but it is *an* idea. + * + * XXXRW: inp_affinity includes ports for non-TCP sockets, but netisr2 + * affinity doesn't since we aren't assured access to the port numbers as a + * result of fragmentation. + * + * XXXRW: Make sure the local vs. foreign ports are aligned with netisr2. + * + * XXXRW: In the future, we might want an INP_AFFINITY flag to indicate that + * the user has requested an affinity, and so we shouldn't overwrite it with + * an automatically generated one. + */ +static void +in_pcbaffinity(struct inpcb *inp) +{ + + INP_WLOCK_ASSERT(inp); + +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) { + inp->inp_recvaffinity = 0; + inp->inp_sendaffinity = 0; + } else { +#endif /* INET6 */ + /* + * We maintain two affinities: + * + * inp_recvaffinity -- the preferred CPU on which to run + * various tasks associated with receive on the socket, such + * as netisr2 processing of inbound packets. + * + * inp_sendaffinity -- the preferred CPU on which to schedule + * processing after it leaves the protocol code. I.e., the + * netisr2 work thread or interface transmit queue. + */ +#ifdef NETISR2 + inp->inp_recvaffinity = ip_affinity_port(inp->inp_faddr, + inp->inp_fport, inp->inp_laddr, inp->inp_lport); + if (inpaffinity_inverse) + inp->inp_sendaffinity = ip_affinity_port( + inp->inp_laddr, inp->inp_lport, inp->inp_faddr, + inp->inp_fport); + else + inp->inp_sendaffinity = inp->inp_recvaffinity; +#else + inp->inp_recvaffinity = inp->inp_sendaffinity = 0; +#endif + } +} + int in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) { @@ -269,6 +338,7 @@ inp->inp_lport = 0; return (EAGAIN); } + in_pcbaffinity(inp); if (anonport) inp->inp_flags |= INP_ANONPORT; return (0); @@ -1106,6 +1176,10 @@ { struct inpcb *inp, *inp_temp; + /* + * We acquire a write lock on pcbinfo here because TCP will need on, + * but for UDP we could be using a read lock. + */ INP_INFO_WLOCK(pcbinfo); LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { INP_WLOCK(inp); @@ -1474,6 +1548,8 @@ INP_INFO_WLOCK_ASSERT(pcbinfo); INP_WLOCK_ASSERT(inp); + in_pcbaffinity(inp); + #ifdef INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; --- //depot/vendor/freebsd/src/sys/netinet/in_pcb.h 2008/12/17 13:05:14 +++ //depot/user/rwatson/netisr/src/sys/netinet/in_pcb.h 2008/12/17 13:55:52 @@ -162,6 +162,8 @@ struct socket *inp_socket; /* (i) back pointer to socket */ struct ucred *inp_cred; /* (c) cache of socket cred */ u_int32_t inp_flow; /* (i) IPv6 flow information */ + u_int inp_recvaffinity; /* (i) Receive CPU affinity. */ + u_int inp_sendaffinity; /* (i) Send CPU affinity. */ int inp_flags; /* (i) generic IP/datagram flags */ u_char inp_vflag; /* (i) IP version flag (v4/v6) */ u_char inp_ip_ttl; /* (i) time to live proto */ --- //depot/vendor/freebsd/src/sys/netinet/ip_divert.c 2008/12/10 23:15:16 +++ //depot/user/rwatson/netisr/src/sys/netinet/ip_divert.c 2008/12/15 10:06:37 @@ -544,6 +544,7 @@ inp = sotoinpcb(so); KASSERT(inp != NULL, ("div_shutdown: inp == NULL")); + /* XXXRW: Perhaps INP_RLOCK? */ INP_WLOCK(inp); socantsendmore(so); INP_WUNLOCK(inp); --- //depot/vendor/freebsd/src/sys/netinet/ip_dummynet.c 2008/12/10 23:15:16 +++ //depot/user/rwatson/netisr/src/sys/netinet/ip_dummynet.c 2008/12/15 10:06:37 @@ -31,6 +31,7 @@ #define DUMMYNET_DEBUG #include "opt_inet6.h" +#include "opt_netisr.h" /* * This module implements IP dummynet, a bandwidth limiter/delay emulator @@ -74,6 +75,7 @@ #include #include #include +#include #include #include #include @@ -896,7 +898,11 @@ ip = mtod(m, struct ip *); ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); +#ifdef NETISR2 + netisr2_dispatch(NETISR_IP, m); +#else netisr_dispatch(NETISR_IP, m); +#endif break; #ifdef INET6 case DN_TO_IP6_IN: --- //depot/vendor/freebsd/src/sys/netinet/ip_input.c 2008/12/15 06:20:16 +++ //depot/user/rwatson/netisr/src/sys/netinet/ip_input.c 2008/12/15 10:17:04 @@ -37,6 +37,7 @@ #include "opt_ipstealth.h" #include "opt_ipsec.h" #include "opt_mac.h" +#include "opt_netisr.h" #include "opt_carp.h" #include @@ -62,6 +63,7 @@ #include #include #include +#include #include #include @@ -80,6 +82,7 @@ #ifdef IPSEC #include #endif /* IPSEC */ +#include #include @@ -223,7 +226,73 @@ static void ip_freef(struct ipqhead *, struct ipq *); +#ifdef NETISR2 /* + * Select a CPU affinity for an inbound packet without reference to any + * stateful CPU affinity in the inpcb. Used for initial work placement as + * looking up the inpcb affinity would currently be too expensive. + * + * XXXRW: This trade-off may change, especially as tcbinfo lock contention + * is reduced. + * + * XXXRW: We should probably be updating global IP stats when we drop here. + * + * XXXRW: A lot of this is identical to the first bit of ip_input() -- is + * reproducing it a good idea? + */ +int ip_print_hash; +static struct mbuf * +ip_lookup_cpu(struct mbuf *m, u_int *cpuidp, u_int *strengthp) +{ + struct ip *ip; + + M_ASSERTPKTHDR(m); + + if (m->m_pkthdr.len < sizeof(struct ip)) { + m_freem(m); + return (NULL); + } + + if (m->m_len < sizeof (struct ip) && + (m = m_pullup(m, sizeof (struct ip))) == NULL) + return (NULL); + + ip = mtod(m, struct ip *); + + if (ip->ip_v != IPVERSION) { + m_freem(m); + return (NULL); + } + + /* + * Where protocols can provide a more specific CPU affinity based on + * information not visible to IP, do that. + */ + switch (ip->ip_p) { + case IPPROTO_TCP: + if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) + break; + return (tcp4_lookup_cpu(m, cpuidp, strengthp)); + } + + /* + * Where there isn't a more specific protocol handler, just use the + * IP addresses. + */ + *cpuidp = ip_affinity(ip->ip_src, ip->ip_dst); + + /* + * Allow direct dispatch on any CPU, as we require source ordering, + * but not strict execution of TCP on only one CPU for a connection. + */ + *strengthp = NETISR2_AFFINITY_WEAK; + if (ip_print_hash) + printf("Assigning packet cpuid %u\n", *cpuidp); + return (m); +} +#endif + +/* * IP initialization: fill in IP protocol switch table. * All protocols not implemented in kernel go to raw IP protocol handler. */ @@ -319,6 +388,10 @@ ipintrq.ifq_maxlen = ipqmaxlen; mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF); netisr_register(NETISR_IP, ip_input, &ipintrq, 0); +#ifdef NETISR2 + netisr2_register(NETISR_IP, ip_input, ip_lookup_cpu, NULL, "ip", + ipqmaxlen); +#endif } void --- //depot/vendor/freebsd/src/sys/netinet/ip_output.c 2008/12/15 06:20:16 +++ //depot/user/rwatson/netisr/src/sys/netinet/ip_output.c 2008/12/15 10:17:04 @@ -37,6 +37,7 @@ #include "opt_mac.h" #include "opt_mbuf_stress_test.h" #include "opt_mpath.h" +#include "opt_netisr.h" #include #include @@ -54,6 +55,7 @@ #include #include +#include #include #include #ifdef RADIX_MPATH @@ -150,6 +152,12 @@ ip = mtod(m, struct ip *); /* + * Propagate transmit affinity to mbuf header. + */ + if (inp != NULL) + m->m_pkthdr.affinity = inp->inp_sendaffinity; + + /* * Fill in IP header. If we are not allowing fragmentation, * then the ip_id field is meaningless, but we don't set it * to zero. Doing so causes various problems when devices along @@ -437,6 +445,11 @@ sendit: #ifdef IPSEC + /* + * XXXRW: Check that it's OK with IPSEC that we may be passing down + * a read lock on inpcb. Is IPSEC using the inp lock to protect its + * own data? + */ switch(ip_ipsec_output(&m, inp, &flags, &error, &ro, &iproute, &dst, &ia, &ifp)) { case 1: goto bad; @@ -455,6 +468,10 @@ if (!PFIL_HOOKED(&inet_pfil_hook)) goto passout; + /* + * XXXRW: Confirm that pfil users aren't piggy-backing on the inpcb + * lock to protect data. + */ /* Run through list of hooks for output packets. */ odst.s_addr = ip->ip_dst.s_addr; error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, inp); @@ -479,7 +496,11 @@ m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; +#ifdef NETISR2 + error = netisr2_queue(NETISR_IP, m); +#else error = netisr_queue(NETISR_IP, m); +#endif goto done; } else goto again; /* Redo the routing table lookup. */ @@ -498,7 +519,11 @@ m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; +#ifdef NETISR2 + error = netisr2_queue(NETISR_IP, m); +#else error = netisr_queue(NETISR_IP, m); +#endif goto done; } /* Or forward to some other address? */ --- //depot/vendor/freebsd/src/sys/netinet/ip_var.h 2008/12/11 16:30:14 +++ //depot/user/rwatson/netisr/src/sys/netinet/ip_var.h 2008/12/15 10:06:37 @@ -237,6 +237,68 @@ void in_delayed_cksum(struct mbuf *m); +#ifdef _NET_NETISR2_H_ +/* + * Calculate possible affinity for a connection based solely on the source + * and destination IP addresses. Does not take into account any stateful + * affinity requested by the socket owner. + * + * XXXRW: Nothing suggests that this is a good hash, but at least it is + * asymetric. + */ +static __inline u_int +ip_flowid(struct in_addr sip, struct in_addr dip) +{ + + return ((sip.s_addr << 1) ^ (dip.s_addr) ^ (sip.s_addr >> 31)); +} + +static __inline u_int +ip_affinity(struct in_addr sip, struct in_addr dip) +{ + + return (netisr2_flowid2cpuid(ip_flowid(sip, dip))); +} + +/* + * Calculate possible affinity for a connection based on source and + * destination IP addresses and port numbers. Does not take into account any + * stateful affinity requested by the socket owner. + * + * XXXRW: Nothing suggests that this is a good hash, but at least it is + * asymetric. + * + * XXXRW: This should be aligned with the RSS hash algorithm and parameters + * used on 10gbps cards so as to arrange for an alignment of affinity. + */ +static __inline u_int +ip_flowid_port(struct in_addr sip, u_short sport, struct in_addr dip, + u_short dport) +{ + u_int flowid; + + /* + * First portion is the same as the IP-only flow ID. + */ + flowid = ip_flowid(sip, dip); + + /* + * Now mix in port numbers. + */ + flowid ^= sport; + flowid ^= dport << 1; + return (flowid); +} + +static __inline u_int +ip_affinity_port(struct in_addr sip, u_short sport, struct in_addr dip, + u_short dport) +{ + + return (netisr2_flowid2cpuid(ip_flowid_port(sip, sport, dip, dport))); +} +#endif /* _NET_NETISR2_H_ */ + #endif /* _KERNEL */ #endif /* !_NETINET_IP_VAR_H_ */ --- //depot/vendor/freebsd/src/sys/netinet/raw_ip.c 2008/12/16 03:21:29 +++ //depot/user/rwatson/netisr/src/sys/netinet/raw_ip.c 2008/12/17 13:55:52 @@ -206,6 +206,9 @@ INP_RLOCK_ASSERT(last); + /* + * XXXRW: Check that IPSEC doesn't mind a read-locked inpcb. + */ #ifdef IPSEC /* check AH/ESP integrity. */ if (ipsec4_in_reject(n, last)) { @@ -863,6 +866,9 @@ { struct inpcb *inp; + /* + * XXXRW: Actually, this could probably be a read lock... + */ inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_shutdown: inp == NULL")); --- //depot/vendor/freebsd/src/sys/netinet/tcp_input.c 2008/12/17 13:00:15 +++ //depot/user/rwatson/netisr/src/sys/netinet/tcp_input.c 2008/12/17 13:55:52 @@ -37,6 +37,7 @@ #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_mac.h" +#include "opt_netisr.h" #include "opt_tcpdebug.h" #include @@ -58,6 +59,8 @@ #include #include +#include +#include #include #define TCPSTATES /* for logging */ @@ -250,6 +253,46 @@ (tp->t_flags & TF_RXWIN0SENT) == 0) && \ (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN))) +#ifdef NETISR2 +/* + * Generate a cpuid for an mbuf containing a TCP/IPv4 header. + * + * XXXRW: Should bump error stats when dropping a packet due to truncation, + * etc? + */ +struct mbuf * +tcp4_lookup_cpu(struct mbuf *m, u_int *cpuidp, u_int *strengthp) +{ + struct tcphdr *th; + struct ip *ip; + int hlen; + + KASSERT(m->m_len >= sizeof(*ip), ("tcp4_lookup_cpud: short")); + ip = mtod(m, struct ip *); + KASSERT(ip->ip_p == IPPROTO_TCP, ("tcp4_lookup_cpu: not TCP")); + hlen = ip->ip_hl << 2; + if (hlen < sizeof(struct ip)) { + m_freem(m); + return (NULL); + } + if (hlen + sizeof(*th) > m->m_len) { + if ((m = m_pullup(m, hlen + sizeof(*th))) == NULL) + return (NULL); + ip = mtod(m, struct ip *); + } + th = (struct tcphdr *)((caddr_t)ip + hlen); + *cpuidp = ip_affinity_port(ip->ip_src, th->th_sport, ip->ip_dst, + th->th_dport); + + /* + * Allow TCP to run on any CPU as long as it is source-ordered, since + * we prefer direct dispatch to migration overhead. + */ + *strengthp = NETISR2_AFFINITY_WEAK; + return (m); +} +#endif + /* * TCP input handling is split into multiple parts: * tcp6_input is a thin wrapper around tcp_input for the extended @@ -653,6 +696,7 @@ if (thflags & TH_SYN) tcp_dooptions(&to, optp, optlen, TO_SYN); + /* * NB: tcp_twcheck unlocks the INP and frees the mbuf. */ @@ -661,6 +705,7 @@ INP_INFO_WUNLOCK(&V_tcbinfo); return; } + /* * The TCPCB may no longer exist if the connection is winding * down or it is in the CLOSED state. Either way we drop the @@ -1240,6 +1285,20 @@ TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) { /* + * By definition, the header-predicted case won't transition + * states, so we shouldn't hold a global write lock here. + * + * XXXRW: Due to tcp_read_locking, temporarily disable + * assertion. + */ +#if 0 + INP_INFO_RLOCK_ASSERT(&tcbinfo); + KASSERT(ti_locked == TI_RLOCKED, + ("tcp_do_segment: header prediction ti_locked %d", + ti_locked)); +#endif + + /* * If last ACK falls within this segment's sequence numbers, * record the timestamp. * NOTE that the test is modified according to the latest @@ -1265,6 +1324,9 @@ TAILQ_EMPTY(&tp->snd_holes)))) { /* * This is a pure ack for outstanding data. + * + * XXXRW: temporarily tolerate a write lock + * here. */ if (ti_locked == TI_RLOCKED) INP_INFO_RUNLOCK(&V_tcbinfo); @@ -2908,7 +2970,7 @@ char *cp = mtod(m, caddr_t) + cnt; struct tcpcb *tp = sototcpcb(so); - INP_WLOCK_ASSERT(tp->t_inpcb); + INP_LOCK_ASSERT(tp->t_inpcb); tp->t_iobc = *cp; tp->t_oobflags |= TCPOOB_HAVEDATA; @@ -3055,7 +3117,7 @@ const size_t min_protoh = sizeof(struct tcpiphdr); #endif - INP_WLOCK_ASSERT(tp->t_inpcb); + INP_LOCK_ASSERT(tp->t_inpcb); /* Initialize. */ #ifdef INET6 @@ -3376,7 +3438,7 @@ tcp_seq onxt = tp->snd_nxt; u_long ocwnd = tp->snd_cwnd; - INP_WLOCK_ASSERT(tp->t_inpcb); + INP_LOCK_ASSERT(tp->t_inpcb); tcp_timer_activate(tp, TT_REXMT, 0); tp->t_rtttime = 0; --- //depot/vendor/freebsd/src/sys/netinet/tcp_subr.c 2008/12/17 13:00:15 +++ //depot/user/rwatson/netisr/src/sys/netinet/tcp_subr.c 2008/12/17 13:55:52 @@ -426,7 +426,10 @@ { struct tcphdr *th = (struct tcphdr *)tcp_ptr; - INP_WLOCK_ASSERT(inp); + /* + * XXXRW: A read lock here is fine, but that won't ever happen. + */ + INP_LOCK_ASSERT(inp); #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) { @@ -620,7 +623,7 @@ * Packet is associated with a socket, so allow the * label of the response to reflect the socket label. */ - INP_WLOCK_ASSERT(inp); + INP_LOCK_ASSERT(inp); mac_inpcb_create_mbuf(inp, m); } else { /* --- //depot/vendor/freebsd/src/sys/netinet/tcp_syncache.c 2008/12/17 13:00:15 +++ //depot/user/rwatson/netisr/src/sys/netinet/tcp_syncache.c 2008/12/17 13:55:52 @@ -37,6 +37,7 @@ #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_mac.h" +#include "opt_netisr.h" #include #include @@ -59,6 +60,8 @@ #include #include +#include +#include #include #include @@ -1413,6 +1416,12 @@ htons(tlen + optlen - hlen + IPPROTO_TCP)); m->m_pkthdr.csum_flags = CSUM_TCP; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); +#ifdef NETISR2 + m->m_pkthdr.affinity = ip_affinity_port(ip->ip_src, + th->th_sport, ip->ip_dst, th->th_dport); +#else + m->m_pkthdr.affinity = 0; +#endif error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL); } return (error); --- //depot/vendor/freebsd/src/sys/netinet/tcp_var.h 2008/12/13 19:15:13 +++ //depot/user/rwatson/netisr/src/sys/netinet/tcp_var.h 2008/12/15 10:06:37 @@ -617,6 +617,8 @@ u_long tcp_hc_getmtu(struct in_conninfo *); void tcp_hc_updatemtu(struct in_conninfo *, u_long); void tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *); +struct mbuf *tcp4_lookup_cpu(struct mbuf *m, u_int *cpuidp, + u_int *strengthp); extern struct pr_usrreqs tcp_usrreqs; extern u_long tcp_sendspace; --- //depot/vendor/freebsd/src/sys/netinet/udp_usrreq.c 2008/12/10 23:15:16 +++ //depot/user/rwatson/netisr/src/sys/netinet/udp_usrreq.c 2008/12/15 10:06:37 @@ -1268,6 +1268,9 @@ { struct inpcb *inp; + /* + * XXXRW: Possibly could be rlock. + */ inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp_shutdown: inp == NULL")); INP_WLOCK(inp); --- //depot/vendor/freebsd/src/sys/sys/mbuf.h 2008/11/22 08:50:14 +++ //depot/user/rwatson/netisr/src/sys/sys/mbuf.h 2008/12/15 10:06:37 @@ -42,6 +42,7 @@ #ifdef WITNESS #include #endif +#include /* UINT_MAX */ #endif /* @@ -124,6 +125,9 @@ u_int16_t tso_segsz; /* TSO segment size */ u_int16_t ether_vtag; /* Ethernet 802.1p+q vlan tag */ SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ + +#define MBUF_AFFINITY_NONE UINT_MAX + u_int affinity; /* CPU/queue to process on */ }; /* --- //depot/vendor/freebsd/src/sys/sys/pcpu.h 2008/08/19 20:00:16 +++ //depot/user/rwatson/netisr/src/sys/sys/pcpu.h 2008/11/04 10:50:52 @@ -65,6 +65,7 @@ struct thread *pc_idlethread; /* Idle thread */ struct thread *pc_fpcurthread; /* Fp state owner */ struct thread *pc_deadthread; /* Zombie thread or NULL */ + struct thread *pc_netisr2; /* netisr2 thread. */ struct pcb *pc_curpcb; /* Current pcb */ uint64_t pc_switchtime; int pc_switchticks;