--- //depot/vendor/freebsd/src/sys/netinet/udp_usrreq.c 2008/04/17 21:41:23 +++ //depot/user/rwatson/netisr/src/sys/netinet/udp_usrreq.c 2008/04/18 23:09:59 @@ -195,7 +195,7 @@ struct sockaddr_in6 udp_in6; #endif - INP_WLOCK_ASSERT(inp); + INP_RLOCK_ASSERT(inp); #ifdef IPSEC /* Check AH/ESP integrity. */ @@ -217,6 +217,11 @@ if (inp->inp_vflag & INP_IPV6) { int savedflags; + /* + * XXXRW: we have an rlock so this is bad. Should + * be passing a flag to ip6_savecontrol rather than + * changing the inpcb flag. + */ savedflags = inp->inp_flags; inp->inp_flags &= ~INP_UNMAPPABLEOPTS; ip6_savecontrol(inp, n, &opts); @@ -412,7 +417,7 @@ inp->inp_fport != uh->uh_sport) continue; - INP_WLOCK(inp); + INP_RLOCK(inp); /* * Handle socket delivery policy for any-source @@ -469,7 +474,7 @@ } } if (blocked != 0) { - INP_WUNLOCK(inp); + INP_RUNLOCK(inp); continue; } } @@ -480,7 +485,7 @@ if (n != NULL) udp_append(last, ip, n, iphlen + sizeof(struct udphdr), &udp_in); - INP_WUNLOCK(last); + INP_RUNLOCK(last); } last = inp; /* @@ -507,7 +512,7 @@ } udp_append(last, ip, m, iphlen + sizeof(struct udphdr), &udp_in); - INP_WUNLOCK(last); + INP_RUNLOCK(last); INP_INFO_RUNLOCK(&udbinfo); return; } @@ -546,17 +551,17 @@ /* * Check the minimum TTL for socket. */ - INP_WLOCK(inp); + INP_RLOCK(inp); if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) goto badheadlocked; udp_append(inp, ip, m, iphlen + sizeof(struct udphdr), &udp_in); - INP_WUNLOCK(inp); + INP_RUNLOCK(inp); INP_INFO_RUNLOCK(&udbinfo); return; badheadlocked: if (inp) - INP_WUNLOCK(inp); + INP_RUNLOCK(inp); INP_INFO_RUNLOCK(&udbinfo); badunlocked: m_freem(m); @@ -570,7 +575,11 @@ udp_notify(struct inpcb *inp, int errno) { - INP_WLOCK_ASSERT(inp); + /* + * XXXRW: Could be rlock assert, but in_pcbnotifyall() acquires a + * wlock due to TCP. + */ + INP_LOCK_ASSERT(inp); inp->inp_socket->so_error = errno; sorwakeup(inp->inp_socket); @@ -612,11 +621,11 @@ inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport, ip->ip_src, uh->uh_sport, 0, NULL); if (inp != NULL) { - INP_WLOCK(inp); + INP_RLOCK(inp); if (inp->inp_socket != NULL) { udp_notify(inp, inetctlerrmap[cmd]); } - INP_WUNLOCK(inp); + INP_RUNLOCK(inp); } INP_INFO_RUNLOCK(&udbinfo); } else @@ -674,11 +683,11 @@ INP_INFO_RLOCK(&udbinfo); for (inp = LIST_FIRST(udbinfo.ipi_listhead), i = 0; inp && i < n; inp = LIST_NEXT(inp, inp_list)) { - INP_WLOCK(inp); + INP_RLOCK(inp); if (inp->inp_gencnt <= gencnt && cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) inp_list[i++] = inp; - INP_WUNLOCK(inp); + INP_RUNLOCK(inp); } INP_INFO_RUNLOCK(&udbinfo); n = i; @@ -686,7 +695,7 @@ error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; - INP_WLOCK(inp); + INP_RLOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; bzero(&xi, sizeof(xi)); @@ -696,10 +705,10 @@ if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); xi.xi_inp.inp_gencnt = inp->inp_gencnt; - INP_WUNLOCK(inp); + INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); } else - INP_WUNLOCK(inp); + INP_RUNLOCK(inp); } if (!error) { /* @@ -837,12 +846,33 @@ return (error); } - if (src.sin_family == AF_INET || addr != NULL) { + /* + * We may enter binding/connecting routines and manipulate global + * address lists if a source address is requested explicitly, or if + * an explicit destination is requested and a connect has not yet + * been performed. In these cases, acquire the global lock. + * + * XXXRW: In some of these cases, if may be sufficient to acquire a + * read lock? + * + * XXXRW: This isn't right, as we are looking at the inp values + * before acquiring the pcbinfo lock. In the future, we'll want to + * detect this race and handle it... + */ + if (addr != NULL && (inp->inp_laddr.s_addr == INADDR_ANY && + inp->inp_lport == 0)) { INP_INFO_WLOCK(&udbinfo); + INP_WLOCK(inp); + unlock_udbinfo = 2; + } else if ((addr != NULL && (inp->inp_laddr.s_addr == INADDR_ANY || + inp->inp_lport == 0)) || src.sin_family == AF_INET) { + INP_INFO_RLOCK(&udbinfo); + INP_RLOCK(inp); unlock_udbinfo = 1; - } else + } else { unlock_udbinfo = 0; - INP_WLOCK(inp); + INP_RLOCK(inp); + } #ifdef MAC mac_inpcb_create_mbuf(inp, m); @@ -856,6 +886,7 @@ laddr = inp->inp_laddr; lport = inp->inp_lport; if (src.sin_family == AF_INET) { + INP_INFO_LOCK_ASSERT(&udbinfo); if ((lport == 0) || (laddr.s_addr == INADDR_ANY && src.sin_addr.s_addr == INADDR_ANY)) { @@ -868,35 +899,76 @@ goto release; } + /* + * If a UDP socket has been connected, then a local address/port will + * have been selected and bound. + * + * If a UDP socket has not been connected to, then an explicit + * destination address must be used, in which case local address/port + * may not have been selected and bound. + */ if (addr) { + if (inp->inp_faddr.s_addr != INADDR_ANY) { + error = EISCONN; + goto release; + } + + /* + * Jail may rewrite the destination address, so let it do + * that before we use it. + */ sin = (struct sockaddr_in *)addr; if (jailed(td->td_ucred)) prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr); - if (inp->inp_faddr.s_addr != INADDR_ANY) { - error = EISCONN; - goto release; - } - error = in_pcbconnect_setup(inp, addr, &laddr.s_addr, &lport, - &faddr.s_addr, &fport, NULL, td->td_ucred); - if (error) - goto release; - /* Commit the local port if newly assigned. */ - if (inp->inp_laddr.s_addr == INADDR_ANY && + /* + * If local address or port hasn't yet been selected, do that + * now. Once a port is selected, we commit the binding back + * to the socket; we also commit the binding of the address + * if in jail. + */ + if (inp->inp_laddr.s_addr == INADDR_ANY || inp->inp_lport == 0) { + INP_INFO_LOCK_ASSERT(&udbinfo); + error = in_pcbconnect_setup(inp, addr, &laddr.s_addr, + &lport, &faddr.s_addr, &fport, NULL, + td->td_ucred); + if (error) + goto release; + /* - * Remember addr if jailed, to prevent rebinding. + * XXXRW: Why not commit the port if the address is + * !INADDR_ANY? */ - if (jailed(td->td_ucred)) - inp->inp_laddr = laddr; - inp->inp_lport = lport; - if (in_pcbinshash(inp) != 0) { - inp->inp_lport = 0; - error = EAGAIN; - goto release; + /* Commit the local port if newly assigned. */ + if (inp->inp_laddr.s_addr == INADDR_ANY && + inp->inp_lport == 0) { + INP_INFO_WLOCK_ASSERT(&udbinfo); + INP_WLOCK_ASSERT(inp); + /* + * Remember addr if jailed, to prevent + * rebinding. + */ + if (jailed(td->td_ucred)) + inp->inp_laddr = laddr; + inp->inp_lport = lport; + if (in_pcbinshash(inp) != 0) { + inp->inp_lport = 0; + error = EAGAIN; + goto release; + } + inp->inp_flags |= INP_ANONPORT; } - inp->inp_flags |= INP_ANONPORT; + } else { + /* + * XXXRW: Is it OK that we haven't checked for a + * colliding binding for these foreign address and + * port combined with the existing binding for local + * address and port? + */ + faddr = sin->sin_addr; + fport = sin->sin_port; } } else { faddr = inp->inp_faddr; @@ -969,17 +1041,27 @@ ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ udpstat.udps_opackets++; - if (unlock_udbinfo) + if (unlock_udbinfo == 2) INP_INFO_WUNLOCK(&udbinfo); + else if (unlock_udbinfo == 1) + INP_INFO_RUNLOCK(&udbinfo); error = ip_output(m, inp->inp_options, NULL, ipflags, inp->inp_moptions, inp); - INP_WUNLOCK(inp); + if (unlock_udbinfo == 2) + INP_WUNLOCK(inp); + else + INP_RUNLOCK(inp); return (error); release: - INP_WUNLOCK(inp); - if (unlock_udbinfo) + if (unlock_udbinfo == 2) { + INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&udbinfo); + } else if (unlock_udbinfo == 1) { + INP_RUNLOCK(inp); + INP_INFO_RUNLOCK(&udbinfo); + } else + INP_RUNLOCK(inp); m_freem(m); return (error); } @@ -1146,6 +1228,9 @@ { struct inpcb *inp; + /* + * XXXRW: Possibly could be rlock. + */ inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp_shutdown: inp == NULL")); INP_WLOCK(inp);