--- //depot/vendor/freebsd/src/sys/netinet/in_pcb.c 2008/12/02 21:40:16 +++ //depot/user/rwatson/udp/src/sys/netinet/in_pcb.c 2008/12/07 13:08:51 @@ -1,7 +1,7 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993, 1995 * The Regents of the University of California. - * Copyright (c) 2007 Robert N. M. Watson + * Copyright (c) 2007-2008 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -238,6 +238,7 @@ #endif INP_WLOCK(inp); inp->inp_gencnt = ++pcbinfo->ipi_gencnt; + inp->inp_refcount = 1; /* Reference from the inpcbinfo */ #if defined(IPSEC) || defined(MAC) out: @@ -872,14 +873,10 @@ } /* - * Historically, in_pcbdetach() included the functionality now found in - * in_pcbfree() and in_pcbdrop(). They are now broken out to reflect the - * more complex life cycle of TCP. - * - * in_pcbdetach() is responsibe for disconnecting the socket from an inpcb. + * in_pcbdetach() is responsibe for disassociating a socket from an inpcb. * For most protocols, this will be invoked immediately prior to calling - * in_pcbfree(). However, for TCP the inpcb may significantly outlive the - * socket, in which case in_pcbfree() may be deferred. + * in_pcbfree(). However, with TCP the inpcb may significantly outlive the + * socket, in which case in_pcbfree() is deferred. */ void in_pcbdetach(struct inpcb *inp) @@ -892,15 +889,19 @@ } /* - * in_pcbfree() is responsible for freeing an already-detached inpcb, as well - * as removing it from any global inpcb lists it might be on. + * in_pcbfree_internal() frees an inpcb that has been detached from its + * socket, and whose reference count has reached 0. It will also remove the + * inpcb from any global lists it might remain on. */ -void -in_pcbfree(struct inpcb *inp) +static void +in_pcbfree_internal(struct inpcb *inp) { struct inpcbinfo *ipi = inp->inp_pcbinfo; - KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); + KASSERT(inp->inp_socket == NULL, + ("%s: inp_socket != NULL", __func__)); + KASSERT(inp->inp_refcount == 0, + ("%s: refcount !0", __func__)); INP_INFO_WLOCK_ASSERT(ipi); INP_WLOCK_ASSERT(inp); @@ -932,6 +933,77 @@ } /* + * in_pcbref() bumps the reference count on an inpcb in order to maintain + * stability of an inpcb pointer despite the inpcb lock being released. This + * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded, + * but where the inpcb lock is already held. + * + * While the inpcb will not be freed, releasing the inpcb lock means that the + * connection's state may change, so the caller should be careful to + * revalidate any cached state on reacquiring the lock. Drop the reference + * using in_pcbrele(). + */ +void +in_pcbref(struct inpcb *inp) +{ + + INP_WLOCK_ASSERT(inp); + + KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__)); + + inp->inp_refcount++; +} + +/* + * Drop a refcount on an inpcb elevated using in_pcbref(); because a call to + * in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we + * return a flag indicating whether or not the inpcb remains valid. If it is + * valid, we return with the inpcb lock held. + */ +int +in_pcbrele(struct inpcb *inp) +{ +#ifdef INVARIANTS + struct inpcbinfo *ipi = inp->inp_pcbinfo; +#endif + + KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__)); + + INP_INFO_WLOCK_ASSERT(ipi); + INP_WLOCK_ASSERT(inp); + + inp->inp_refcount--; + if (inp->inp_refcount > 0) + return (0); + in_pcbfree_internal(inp); + return (1); +} + +/* + * Unconditionally schedule an inpcb to be freed by decrementing its + * reference count, which should occur only after the inpcb has been detached + * from its socket. If another thread holds a temporary reference (acquired + * using in_pcbref()) then the free is deferred until that reference is + * released using in_pcbrele(), but the inpcb is still unlocked. + */ +void +in_pcbfree(struct inpcb *inp) +{ +#ifdef INVARIANTS + struct inpcbinfo *ipi = inp->inp_pcbinfo; +#endif + + KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", + __func__)); + + INP_INFO_WLOCK_ASSERT(ipi); + INP_WLOCK_ASSERT(inp); + + if (!in_pcbrele(inp)) + INP_WUNLOCK(inp); +} + +/* * in_pcbdrop() removes an inpcb from hashed lists, releasing its address and * port reservation, and preventing it from being returned by inpcb lookups. * --- //depot/vendor/freebsd/src/sys/netinet/in_pcb.h 2008/11/19 09:45:46 +++ //depot/user/rwatson/udp/src/sys/netinet/in_pcb.h 2008/11/26 09:51:19 @@ -170,6 +170,7 @@ u_char inp_ip_p; /* (c) protocol proto */ u_char inp_ip_minttl; /* (i) minimum TTL or drop */ uint32_t inp_ispare1; /* (x) connection id / queue id */ + u_int inp_refcount; /* (i) refcount */ void *inp_pspare[2]; /* (x) rtentry / general use */ /* Local and foreign ports, local and foreign addr. */ @@ -478,7 +483,9 @@ struct in_addr, u_int, int, struct ifnet *); void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr, int, struct inpcb *(*)(struct inpcb *, int)); +void in_pcbref(struct inpcb *); void in_pcbrehash(struct inpcb *); +int in_pcbrele(struct inpcb *); void in_pcbsetsolabel(struct socket *so); int in_getpeeraddr(struct socket *so, struct sockaddr **nam); int in_getsockaddr(struct socket *so, struct sockaddr **nam); --- //depot/vendor/freebsd/src/sys/netinet/tcp_input.c 2008/12/02 21:40:16 +++ //depot/user/rwatson/udp/src/sys/netinet/tcp_input.c 2008/12/07 16:28:52 @@ -166,6 +166,30 @@ CTLFLAG_RW, tcp_autorcvbuf_max, 0, "Max size of automatic receive buffer"); +int tcp_read_locking = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, read_locking, CTLFLAG_RW, + &tcp_read_locking, 0, "Enable read locking strategy"); + +int tcp_rlock_atfirst; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, rlock_atfirst, CTLFLAG_RD, + &tcp_rlock_atfirst, 0, ""); + +int tcp_wlock_atfirst; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_wlock_atfirst, CTLFLAG_RD, + &tcp_wlock_atfirst, 0, ""); + +int tcp_wlock_upgraded; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, wlock_upgraded, CTLFLAG_RD, + &tcp_wlock_upgraded, 0, ""); + +int tcp_wlock_relocked; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, wlock_relocked, CTLFLAG_RD, + &tcp_wlock_relocked, 0, ""); + +int tcp_wlock_looped; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, wlock_looped, CTLFLAG_RD, + &tcp_wlock_looped, 0, ""); + #ifdef VIMAGE_GLOBALS struct inpcbhead tcb; struct inpcbinfo tcbinfo; @@ -174,7 +198,8 @@ static void tcp_dooptions(struct tcpopt *, u_char *, int, int); static void tcp_do_segment(struct mbuf *, struct tcphdr *, - struct socket *, struct tcpcb *, int, int, uint8_t); + struct socket *, struct tcpcb *, int, int, uint8_t, + int); static void tcp_dropwithreset(struct mbuf *, struct tcphdr *, struct tcpcb *, int, int); static void tcp_pulloutofband(struct socket *, @@ -298,6 +323,10 @@ #endif struct tcpopt to; /* options in this segment */ char *s = NULL; /* address and port logging */ + int ti_locked; +#define TI_UNLOCKED 1 +#define TI_RLOCKED 2 +#define TI_WLOCKED 3 #ifdef TCPDEBUG /* @@ -450,11 +479,34 @@ drop_hdrlen = off0 + off; /* - * Locate pcb for segment. + * Locate pcb for segment, which requires a lock on tcbinfo. + * Optimisticaly acquire a global read lock unless header flags + * necessarily imply a state change. There are two cases where we + * might discover later we need a write lock despite the flags: ACKs + * moving a connection out of the syncache, and ACK relating to a + * connection in TIMEWAIT. */ - INP_INFO_WLOCK(&V_tcbinfo); + if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || + tcp_read_locking == 0) { + INP_INFO_WLOCK(&V_tcbinfo); + ti_locked = TI_WLOCKED; + tcp_wlock_atfirst++; + } else { + INP_INFO_RLOCK(&V_tcbinfo); + ti_locked = TI_RLOCKED; + tcp_rlock_atfirst++; + } + findpcb: - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); +#ifdef INVARIANTS + if (ti_locked == TI_RLOCKED) + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + else if (ti_locked == TI_WLOCKED) + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + else + panic("%s: findpcb ti_locked %d\n", __func__, ti_locked); +#endif + #ifdef IPFIREWALL_FORWARD /* * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. @@ -561,14 +613,46 @@ } /* - * A previous connection in TIMEWAIT state is supposed to catch - * stray or duplicate segments arriving late. If this segment - * was a legitimate new connection attempt the old INPCB gets - * removed and we can try again to find a listening socket. + * A previous connection in TIMEWAIT state is supposed to catch stray + * or duplicate segments arriving late. If this segment was a + * legitimate new connection attempt the old INPCB gets removed and + * we can try again to find a listening socket. + * + * At this point, due to earlier optimism, we may hold a read lock on + * the inpcbinfo, rather than a write lock. If so, we need to + * upgrade, or if that fails, acquire a reference on the inpcb, drop + * all locks, acquire a global write lock, and then re-acquire the + * inpcb lock. We may at that point discover that another thread has + * tried to free the inpcb, in which case we need to loop back and + * try to find a new inpcb to deliver to. */ if (inp->inp_vflag & INP_TIMEWAIT) { + KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, + ("%s: INP_TIMEWAIT ti_locked %d", __func__, ti_locked)); + if (ti_locked == TI_RLOCKED) { + if (rw_try_upgrade(&V_tcbinfo.ipi_lock) == 0) { + in_pcbref(inp); + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_WLOCK(&V_tcbinfo); + ti_locked = TI_WLOCKED; + INP_WLOCK(inp); + if (in_pcbrele(inp)) { + tcp_wlock_looped++; + inp = NULL; + goto findpcb; + } + tcp_wlock_relocked++; + } else { + ti_locked = TI_WLOCKED; + tcp_wlock_upgraded++; + } + } + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + if (thflags & TH_SYN) tcp_dooptions(&to, optp, optlen, TO_SYN); + /* * NB: tcp_twcheck unlocks the INP and frees the mbuf. */ @@ -588,6 +673,40 @@ goto dropwithreset; } + /* + * We've identified a valid inpcb, but it could be that we need an + * inpcbinfo write lock and have only a read lock. In this case, + * attempt to upgrade/relock using the same strategy as the TIMEWAIT + * case above. + */ + if (tp->t_state != TCPS_ESTABLISHED || + (thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || + tcp_read_locking == 0) { + KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, + ("%s: upgrade check ti_locked %d", __func__, ti_locked)); + + if (ti_locked == TI_RLOCKED) { + if (rw_try_upgrade(&V_tcbinfo.ipi_lock) == 0) { + in_pcbref(inp); + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_WLOCK(&V_tcbinfo); + ti_locked = TI_WLOCKED; + INP_WLOCK(inp); + if (in_pcbrele(inp)) { + tcp_wlock_looped++; + inp = NULL; + goto findpcb; + } + tcp_wlock_relocked++; + } else { + ti_locked = TI_WLOCKED; + tcp_wlock_upgraded++; + } + } + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + } + #ifdef MAC INP_WLOCK_ASSERT(inp); if (mac_inpcb_check_deliver(inp, m)) @@ -700,7 +819,7 @@ * the mbuf chain and unlocks the inpcb. */ tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, - iptos); + iptos, ti_locked); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); return; } @@ -900,13 +1019,18 @@ * state. tcp_do_segment() always consumes the mbuf chain, unlocks * the inpcb, and unlocks pcbinfo. */ - tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos); + tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); return; dropwithreset: - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); - INP_INFO_WUNLOCK(&V_tcbinfo); + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); + else if (ti_locked == TI_WLOCKED) + INP_INFO_WUNLOCK(&V_tcbinfo); + else + panic("%s: dropwithreset ti_locked %d", __func__, ti_locked); + ti_locked = TI_UNLOCKED; if (inp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); @@ -917,10 +1041,16 @@ goto drop; dropunlock: - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); + else if (ti_locked == TI_WLOCKED) + INP_INFO_WUNLOCK(&V_tcbinfo); + else + panic("%s: dropunlock ti_locked %d", __func__, ti_locked); + ti_locked = TI_UNLOCKED; + if (inp != NULL) INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); drop: INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); @@ -932,11 +1062,11 @@ static void tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, - struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos) + struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, + int ti_locked) { INIT_VNET_INET(tp->t_vnet); int thflags, acked, ourfinisacked, needoutput = 0; - int headlocked = 1; int rstreason, todrop, win; u_long tiwin; struct tcpopt to; @@ -952,7 +1082,35 @@ #endif thflags = th->th_flags; - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + /* + * If this is either a state-changing packet or current state isn't + * established, we require a write lock on tcbinfo. Otherwise, we + * allow either a read lock or a write lock, as we may have acquired + * a write lock due to a race. + * + * Require a global write lock for SYN/SIN/RST segments or + * non-established connections; otherwise accept either a read or + * write lock, as we may have conservatively acquired a write lock in + * certain cases in tcp_input() (is this still true?). Currently we + * will never enter with no lock, so we try to drop it quickly in the + * common pure ack/pure data cases. + */ + if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || + tp->t_state != TCPS_ESTABLISHED) { + KASSERT(ti_locked == TI_WLOCKED, ("%s ti_locked %d for " + "SYN/FIN/RST/!EST", __func__, ti_locked)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + } else { +#ifdef INVARIANTS + if (ti_locked == TI_RLOCKED) + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + else if (ti_locked == TI_WLOCKED) + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + else + panic("%s: ti_locked %d for EST", __func__, + ti_locked); +#endif + } INP_WLOCK_ASSERT(tp->t_inpcb); KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", __func__)); @@ -1106,14 +1263,20 @@ !IN_FASTRECOVERY(tp) && (to.to_flags & TOF_SACK) == 0 && TAILQ_EMPTY(&tp->snd_holes)))) { - KASSERT(headlocked, - ("%s: headlocked", __func__)); - INP_INFO_WUNLOCK(&V_tcbinfo); - headlocked = 0; /* * This is a pure ack for outstanding data. */ + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); + else if (ti_locked == TI_WLOCKED) + INP_INFO_WUNLOCK(&V_tcbinfo); + else + panic("%s: ti_locked %d on pure ACK", + __func__, ti_locked); + ti_locked = TI_UNLOCKED; + ++V_tcpstat.tcps_predack; + /* * "bad retransmit" recovery. */ @@ -1200,14 +1363,20 @@ tlen <= sbspace(&so->so_rcv)) { int newsize = 0; /* automatic sockbuf scaling */ - KASSERT(headlocked, ("%s: headlocked", __func__)); - INP_INFO_WUNLOCK(&V_tcbinfo); - headlocked = 0; /* - * This is a pure, in-sequence data packet - * with nothing on the reassembly queue and - * we have enough buffer space to take it. + * This is a pure, in-sequence data packet with + * nothing on the reassembly queue and we have enough + * buffer space to take it. */ + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); + else if (ti_locked == TI_WLOCKED) + INP_INFO_WUNLOCK(&V_tcbinfo); + else + panic("%s: ti_locked %d on pure data " + "segment", __func__, ti_locked); + ti_locked = TI_UNLOCKED; + /* Clean receiver SACK report if present */ if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks) tcp_clean_sackreport(tp); @@ -1434,8 +1603,9 @@ tp->t_state = TCPS_SYN_RECEIVED; } - KASSERT(headlocked, ("%s: trimthenstep6: head not locked", - __func__)); + KASSERT(ti_locked == TI_WLOCKED, + ("trimthenstep6: ti_locked %d", ti_locked)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); /* @@ -1563,17 +1733,23 @@ case TCPS_CLOSE_WAIT: so->so_error = ECONNRESET; close: + KASSERT(ti_locked == TI_WLOCKED, + ("tcp_do_segment: TH_RST 1 ti_locked %d", + ti_locked)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + tp->t_state = TCPS_CLOSED; V_tcpstat.tcps_drops++; - KASSERT(headlocked, ("%s: trimthenstep6: " - "tcp_close: head not locked", __func__)); tp = tcp_close(tp); break; case TCPS_CLOSING: case TCPS_LAST_ACK: - KASSERT(headlocked, ("%s: trimthenstep6: " - "tcp_close.2: head not locked", __func__)); + KASSERT(ti_locked == TI_WLOCKED, + ("tcp_do_segment: TH_RST 2 ti_locked %d", + ti_locked)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + tp = tcp_close(tp); break; } @@ -1678,8 +1854,10 @@ tp->t_state > TCPS_CLOSE_WAIT && tlen) { char *s; - KASSERT(headlocked, ("%s: trimthenstep6: tcp_close.3: head " - "not locked", __func__)); + KASSERT(ti_locked == TI_WLOCKED, ("%s: SS_NOFDEREF && " + "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data after socket " "was closed, sending RST and removing tcpcb\n", @@ -1751,8 +1929,10 @@ * error and we send an RST and drop the connection. */ if (thflags & TH_SYN) { - KASSERT(headlocked, ("%s: tcp_drop: trimthenstep6: " - "head not locked", __func__)); + KASSERT(ti_locked == TI_WLOCKED, + ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + tp = tcp_drop(tp, ECONNRESET); rstreason = BANDLIM_UNLIMITED; goto drop; @@ -2039,8 +2219,9 @@ } process_ACK: - KASSERT(headlocked, ("%s: process_ACK: head not locked", - __func__)); + INP_INFO_LOCK_ASSERT(&V_tcbinfo); + KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, + ("tcp_input: process_ACK ti_locked %d", ti_locked)); INP_WLOCK_ASSERT(tp->t_inpcb); acked = th->th_ack - tp->snd_una; @@ -2197,11 +2378,9 @@ */ case TCPS_CLOSING: if (ourfinisacked) { - KASSERT(headlocked, ("%s: process_ACK: " - "head not locked", __func__)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); tcp_twstart(tp); INP_INFO_WUNLOCK(&V_tcbinfo); - headlocked = 0; m_freem(m); return; } @@ -2215,8 +2394,7 @@ */ case TCPS_LAST_ACK: if (ourfinisacked) { - KASSERT(headlocked, ("%s: process_ACK: " - "tcp_close: head not locked", __func__)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); tp = tcp_close(tp); goto drop; } @@ -2225,7 +2403,9 @@ } step6: - KASSERT(headlocked, ("%s: step6: head not locked", __func__)); + INP_INFO_LOCK_ASSERT(&V_tcbinfo); + KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, + ("tcp_do_segment: step6 ti_locked %d", ti_locked)); INP_WLOCK_ASSERT(tp->t_inpcb); /* @@ -2311,7 +2491,9 @@ tp->rcv_up = tp->rcv_nxt; } dodata: /* XXX */ - KASSERT(headlocked, ("%s: dodata: head not locked", __func__)); + INP_INFO_LOCK_ASSERT(&V_tcbinfo); + KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, + ("tcp_do_segment: dodata ti_locked %d", ti_locked)); INP_WLOCK_ASSERT(tp->t_inpcb); /* @@ -2430,15 +2612,25 @@ * standard timers. */ case TCPS_FIN_WAIT_2: - KASSERT(headlocked == 1, ("%s: dodata: " - "TCP_FIN_WAIT_2: head not locked", __func__)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + KASSERT(ti_locked == TI_WLOCKED, ("%s: dodata " + "TCP_FIN_WAIT_2 ti_locked: %d", __func__, + ti_locked)); + tcp_twstart(tp); INP_INFO_WUNLOCK(&V_tcbinfo); return; } } - INP_INFO_WUNLOCK(&V_tcbinfo); - headlocked = 0; + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); + else if (ti_locked == TI_WLOCKED) + INP_INFO_WUNLOCK(&V_tcbinfo); + else + panic("%s: dodata epilogue ti_locked %d", __func__, + ti_locked); + ti_locked = TI_UNLOCKED; + #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, @@ -2452,10 +2644,11 @@ (void) tcp_output(tp); check_delack: - KASSERT(headlocked == 0, ("%s: check_delack: head locked", - __func__)); + KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", + __func__, ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); + if (tp->t_flags & TF_DELACK) { tp->t_flags &= ~TF_DELACK; tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); @@ -2464,7 +2657,9 @@ return; dropafterack: - KASSERT(headlocked, ("%s: dropafterack: head not locked", __func__)); + KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, + ("tcp_do_segment: dropafterack ti_locked %d", ti_locked)); + /* * Generate an ACK dropping incoming segment if it occupies * sequence space, where the ACK reflects our state. @@ -2491,8 +2686,15 @@ tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif - KASSERT(headlocked, ("%s: headlocked should be 1", __func__)); - INP_INFO_WUNLOCK(&V_tcbinfo); + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); + else if (ti_locked == TI_WLOCKED) + INP_INFO_WUNLOCK(&V_tcbinfo); + else + panic("%s: dropafterack epilogue ti_locked %d", __func__, + ti_locked); + ti_locked = TI_UNLOCKED; + tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); INP_WUNLOCK(tp->t_inpcb); @@ -2500,8 +2702,13 @@ return; dropwithreset: - KASSERT(headlocked, ("%s: dropwithreset: head not locked", __func__)); - INP_INFO_WUNLOCK(&V_tcbinfo); + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); + else if (ti_locked == TI_WLOCKED) + INP_INFO_WUNLOCK(&V_tcbinfo); + else + panic("%s: dropwithreset ti_locked %d", __func__, ti_locked); + ti_locked = TI_UNLOCKED; if (tp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); @@ -2511,6 +2718,16 @@ return; drop: + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); + else if (ti_locked == TI_WLOCKED) + INP_INFO_WUNLOCK(&V_tcbinfo); +#ifdef INVARIANTS + else + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); +#endif + ti_locked = TI_UNLOCKED; + /* * Drop space held by incoming segment and return. */ @@ -2521,8 +2738,6 @@ #endif if (tp != NULL) INP_WUNLOCK(tp->t_inpcb); - if (headlocked) - INP_INFO_WUNLOCK(&V_tcbinfo); m_freem(m); }