Index: uipc_usrreq.c =================================================================== --- uipc_usrreq.c (revision 189501) +++ uipc_usrreq.c (working copy) @@ -1,7 +1,7 @@ /*- * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. - * Copyright (c) 2004-2008 Robert N. M. Watson + * Copyright (c) 2004-2009 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -99,13 +99,19 @@ #include +/* + * Locking key: + * (l) Locked using list lock + * (g) Locked using global linkage lock + */ + static uma_zone_t unp_zone; -static unp_gen_t unp_gencnt; -static u_int unp_count; /* Count of local sockets. */ +static unp_gen_t unp_gencnt; /* (l) */ +static u_int unp_count; /* (l) Count of local sockets. */ static ino_t unp_ino; /* Prototype for fake inode numbers. */ -static int unp_rights; /* File descriptors in flight. */ -static struct unp_head unp_shead; /* List of local stream sockets. */ -static struct unp_head unp_dhead; /* List of local datagram sockets. */ +static int unp_rights; /* (g) File descriptors in flight. */ +static struct unp_head unp_shead; /* (l) List of stream sockets. */ +static struct unp_head unp_dhead; /* (l) List of datagram sockets. */ static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; @@ -152,14 +158,21 @@ /*- * Locking and synchronization: * - * The global UNIX domain socket rwlock (unp_global_rwlock) protects all - * global variables, including the linked lists tracking the set of allocated - * UNIX domain sockets. The global rwlock also serves to prevent deadlock - * when more than one PCB lock is acquired at a time (i.e., during - * connect()). Finally, the global rwlock protects uncounted references from - * vnodes to sockets bound to those vnodes: to safely dereference the - * v_socket pointer, the global rwlock must be held while a full reference is - * acquired. + * Three types of locks are defined in the local domain socket + * implementation: a global list mutex, a global linkage rwlock, and per- + * unpcb mutexes. Of the global locks, the list lock protects the socket + * count, global generation number, and stream/datagram global lists. The + * linkage lock allowes following the inter-unpcb connection pointers without + * acquiring per-unpcb locks, as well as preventing deadlock when more than + * one per-unpcb lock is acquired at a time. + + * Two global UNIX domain sockets exist: an rwlock protecting the global + * lists of all sockets, and an rwlock protecting remaining variables and + * inter-socket linkage. The latter lock also serves to prevent deaadlock + * when more than one PCB lock is acquired a time (i.e., during connect()). + * Finally, the linkage lock protects uncounted references from vnodes to + * sockets bound to those vnodes: to safely dereference the v_socket pointer, + * the linkage rwlock must be held while a full reference is acquired. * * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer, * allocated in pru_attach() and freed in pru_detach(). The validity of that @@ -197,25 +210,27 @@ * to perform namei() and other file system operations. */ static struct rwlock unp_global_rwlock; +static struct mtx unp_list_lock; -#define UNP_GLOBAL_LOCK_INIT() rw_init(&unp_global_rwlock, \ +#define UNP_LINK_LOCK_INIT() rw_init(&unp_global_rwlock, \ "unp_global_rwlock") -#define UNP_GLOBAL_LOCK_ASSERT() rw_assert(&unp_global_rwlock, \ +#define UNP_LINK_LOCK_ASSERT() rw_assert(&unp_global_rwlock, \ RA_LOCKED) -#define UNP_GLOBAL_UNLOCK_ASSERT() rw_assert(&unp_global_rwlock, \ +#define UNP_LINK_UNLOCK_ASSERT() rw_assert(&unp_global_rwlock, \ RA_UNLOCKED) -#define UNP_GLOBAL_WLOCK() rw_wlock(&unp_global_rwlock) -#define UNP_GLOBAL_WUNLOCK() rw_wunlock(&unp_global_rwlock) -#define UNP_GLOBAL_WLOCK_ASSERT() rw_assert(&unp_global_rwlock, \ +#define UNP_LINK_RLOCK() rw_rlock(&unp_global_rwlock) +#define UNP_LINK_RUNLOCK() rw_runlock(&unp_global_rwlock) +#define UNP_LINK_WLOCK() rw_wlock(&unp_global_rwlock) +#define UNP_LINK_WUNLOCK() rw_wunlock(&unp_global_rwlock) +#define UNP_LINK_WLOCK_ASSERT() rw_assert(&unp_global_rwlock, \ RA_WLOCKED) -#define UNP_GLOBAL_WOWNED() rw_wowned(&unp_global_rwlock) -#define UNP_GLOBAL_RLOCK() rw_rlock(&unp_global_rwlock) -#define UNP_GLOBAL_RUNLOCK() rw_runlock(&unp_global_rwlock) -#define UNP_GLOBAL_RLOCK_ASSERT() rw_assert(&unp_global_rwlock, \ - RA_RLOCKED) +#define UNP_LIST_LOCK_INIT() mtx_init(&unp_list_lock, \ + "unp_list_lock", NULL, MTX_DEF) +#define UNP_LIST_LOCK() mtx_lock(&unp_list_lock) +#define UNP_LIST_UNLOCK() mtx_unlock(&unp_list_lock) #define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \ "unp_mtx", "unp_mtx", \ @@ -285,7 +300,7 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_abort: unp == NULL")); - UNP_GLOBAL_WLOCK(); + UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 != NULL) { @@ -294,7 +309,7 @@ UNP_PCB_UNLOCK(unp2); } UNP_PCB_UNLOCK(unp); - UNP_GLOBAL_WUNLOCK(); + UNP_LINK_WUNLOCK(); } static int @@ -311,7 +326,7 @@ KASSERT(unp != NULL, ("uipc_accept: unp == NULL")); *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); - UNP_GLOBAL_RLOCK(); + UNP_LINK_RLOCK(); unp2 = unp->unp_conn; if (unp2 != NULL && unp2->unp_addr != NULL) { UNP_PCB_LOCK(unp2); @@ -322,7 +337,7 @@ sa = &sun_noname; bcopy(sa, *nam, sa->sa_len); } - UNP_GLOBAL_RUNLOCK(); + UNP_LINK_RUNLOCK(); return (0); } @@ -331,7 +346,7 @@ { u_long sendspace, recvspace; struct unpcb *unp; - int error, locked; + int error; KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL")); if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { @@ -362,22 +377,12 @@ so->so_pcb = unp; unp->unp_refcount = 1; - /* - * uipc_attach() may be called indirectly from within the UNIX domain - * socket code via sonewconn() in unp_connect(). Since rwlocks can - * not be recursed, we do the closest thing. - */ - locked = 0; - if (!UNP_GLOBAL_WOWNED()) { - UNP_GLOBAL_WLOCK(); - locked = 1; - } + UNP_LIST_LOCK(); unp->unp_gencnt = ++unp_gencnt; unp_count++; LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead : &unp_shead, unp, unp_link); - if (locked) - UNP_GLOBAL_WUNLOCK(); + UNP_LIST_UNLOCK(); return (0); } @@ -474,14 +479,14 @@ ASSERT_VOP_ELOCKED(vp, "uipc_bind"); soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); - UNP_GLOBAL_WLOCK(); + UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); vp->v_socket = unp->unp_socket; unp->unp_vnode = vp; unp->unp_addr = soun; unp->unp_flags &= ~UNP_BINDING; UNP_PCB_UNLOCK(unp); - UNP_GLOBAL_WUNLOCK(); + UNP_LINK_WUNLOCK(); VOP_UNLOCK(vp, 0); vn_finished_write(mp); VFS_UNLOCK_GIANT(vfslocked); @@ -503,9 +508,9 @@ int error; KASSERT(td == curthread, ("uipc_connect: td != curthread")); - UNP_GLOBAL_WLOCK(); + UNP_LINK_WLOCK(); error = unp_connect(so, nam, td); - UNP_GLOBAL_WUNLOCK(); + UNP_LINK_WUNLOCK(); return (error); } @@ -517,7 +522,7 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_close: unp == NULL")); - UNP_GLOBAL_WLOCK(); + UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 != NULL) { @@ -526,7 +531,7 @@ UNP_PCB_UNLOCK(unp2); } UNP_PCB_UNLOCK(unp); - UNP_GLOBAL_WUNLOCK(); + UNP_LINK_WUNLOCK(); } static int @@ -535,7 +540,7 @@ struct unpcb *unp, *unp2; int error; - UNP_GLOBAL_WLOCK(); + UNP_LINK_WLOCK(); unp = so1->so_pcb; KASSERT(unp != NULL, ("uipc_connect2: unp == NULL")); UNP_PCB_LOCK(unp); @@ -545,7 +550,7 @@ error = unp_connect2(so1, so2, PRU_CONNECT2); UNP_PCB_UNLOCK(unp2); UNP_PCB_UNLOCK(unp); - UNP_GLOBAL_WUNLOCK(); + UNP_LINK_WUNLOCK(); return (error); } @@ -560,12 +565,13 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); - UNP_GLOBAL_WLOCK(); + UNP_LINK_WLOCK(); + UNP_LIST_LOCK(); UNP_PCB_LOCK(unp); - LIST_REMOVE(unp, unp_link); unp->unp_gencnt = ++unp_gencnt; --unp_count; + UNP_LIST_UNLOCK(); /* * XXXRW: Should assert vp->v_socket == so. @@ -593,7 +599,7 @@ UNP_PCB_UNLOCK(ref); } local_unp_rights = unp_rights; - UNP_GLOBAL_WUNLOCK(); + UNP_LINK_WUNLOCK(); unp->unp_socket->so_pcb = NULL; saved_unp_addr = unp->unp_addr; unp->unp_addr = NULL; @@ -625,7 +631,7 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL")); - UNP_GLOBAL_WLOCK(); + UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 != NULL) { @@ -634,7 +640,7 @@ UNP_PCB_UNLOCK(unp2); } UNP_PCB_UNLOCK(unp); - UNP_GLOBAL_WUNLOCK(); + UNP_LINK_WUNLOCK(); return (0); } @@ -768,9 +774,9 @@ if (control != NULL && (error = unp_internalize(&control, td))) goto release; if ((nam != NULL) || (flags & PRUS_EOF)) - UNP_GLOBAL_WLOCK(); + UNP_LINK_WLOCK(); else - UNP_GLOBAL_RLOCK(); + UNP_LINK_RLOCK(); switch (so->so_type) { case SOCK_DGRAM: { @@ -778,7 +784,7 @@ unp2 = unp->unp_conn; if (nam != NULL) { - UNP_GLOBAL_WLOCK_ASSERT(); + UNP_LINK_WLOCK_ASSERT(); if (unp2 != NULL) { error = EISCONN; break; @@ -819,7 +825,7 @@ error = ENOBUFS; } if (nam != NULL) { - UNP_GLOBAL_WLOCK_ASSERT(); + UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK(unp2); unp_disconnect(unp, unp2); UNP_PCB_UNLOCK(unp2); @@ -831,7 +837,7 @@ case SOCK_STREAM: if ((so->so_state & SS_ISCONNECTED) == 0) { if (nam != NULL) { - UNP_GLOBAL_WLOCK_ASSERT(); + UNP_LINK_WLOCK_ASSERT(); error = unp_connect(so, nam, td); if (error) break; /* XXX */ @@ -915,9 +921,9 @@ } if ((nam != NULL) || (flags & PRUS_EOF)) - UNP_GLOBAL_WUNLOCK(); + UNP_LINK_WUNLOCK(); else - UNP_GLOBAL_RUNLOCK(); + UNP_LINK_RUNLOCK(); if (control != NULL && error != 0) unp_dispose(control); @@ -940,7 +946,7 @@ KASSERT(unp != NULL, ("uipc_sense: unp == NULL")); sb->st_blksize = so->so_snd.sb_hiwat; - UNP_GLOBAL_RLOCK(); + UNP_LINK_RLOCK(); UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (so->so_type == SOCK_STREAM && unp2 != NULL) { @@ -952,7 +958,7 @@ unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; sb->st_ino = unp->unp_ino; UNP_PCB_UNLOCK(unp); - UNP_GLOBAL_RUNLOCK(); + UNP_LINK_RUNLOCK(); return (0); } @@ -964,12 +970,12 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL")); - UNP_GLOBAL_WLOCK(); + UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); socantsendmore(so); unp_shutdown(unp); UNP_PCB_UNLOCK(unp); - UNP_GLOBAL_WUNLOCK(); + UNP_LINK_WUNLOCK(); return (0); } @@ -1139,7 +1145,7 @@ char buf[SOCK_MAXADDRLEN]; struct sockaddr *sa; - UNP_GLOBAL_WLOCK_ASSERT(); + UNP_LINK_WLOCK_ASSERT(); unp = sotounpcb(so); KASSERT(unp != NULL, ("unp_connect: unp == NULL")); @@ -1155,7 +1161,7 @@ UNP_PCB_UNLOCK(unp); return (EALREADY); } - UNP_GLOBAL_WUNLOCK(); + UNP_LINK_WUNLOCK(); unp->unp_flags |= UNP_CONNECTING; UNP_PCB_UNLOCK(unp); @@ -1194,7 +1200,7 @@ * Lock global lock for two reasons: make sure v_socket is stable, * and to protect simultaneous locking of multiple pcbs. */ - UNP_GLOBAL_WLOCK(); + UNP_LINK_WLOCK(); so2 = vp->v_socket; if (so2 == NULL) { error = ECONNREFUSED; @@ -1272,7 +1278,7 @@ UNP_PCB_UNLOCK(unp2); UNP_PCB_UNLOCK(unp); bad2: - UNP_GLOBAL_WUNLOCK(); + UNP_LINK_WUNLOCK(); if (vfslocked) /* * Giant has been previously acquired. This means filesystem @@ -1284,7 +1290,7 @@ vput(vp); VFS_UNLOCK_GIANT(vfslocked); free(sa, M_SONAME); - UNP_GLOBAL_WLOCK(); + UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); unp->unp_flags &= ~UNP_CONNECTING; UNP_PCB_UNLOCK(unp); @@ -1302,7 +1308,7 @@ unp2 = sotounpcb(so2); KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL")); - UNP_GLOBAL_WLOCK_ASSERT(); + UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK_ASSERT(unp); UNP_PCB_LOCK_ASSERT(unp2); @@ -1339,7 +1345,7 @@ KASSERT(unp2 != NULL, ("unp_disconnect: unp2 == NULL")); - UNP_GLOBAL_WLOCK_ASSERT(); + UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK_ASSERT(unp); UNP_PCB_LOCK_ASSERT(unp2); @@ -1399,10 +1405,10 @@ * OK, now we're committed to doing something. */ xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); - UNP_GLOBAL_RLOCK(); + UNP_LIST_LOCK(); gencnt = unp_gencnt; n = unp_count; - UNP_GLOBAL_RUNLOCK(); + UNP_LIST_UNLOCK(); xug->xug_len = sizeof *xug; xug->xug_count = n; @@ -1416,7 +1422,7 @@ unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); - UNP_GLOBAL_RLOCK(); + UNP_LIST_LOCK(); for (unp = LIST_FIRST(head), i = 0; unp && i < n; unp = LIST_NEXT(unp, unp_link)) { UNP_PCB_LOCK(unp); @@ -1431,7 +1437,7 @@ } UNP_PCB_UNLOCK(unp); } - UNP_GLOBAL_RUNLOCK(); + UNP_LIST_UNLOCK(); n = i; /* In case we lost some during malloc. */ error = 0; @@ -1499,7 +1505,7 @@ struct unpcb *unp2; struct socket *so; - UNP_GLOBAL_WLOCK_ASSERT(); + UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK_ASSERT(unp); unp2 = unp->unp_conn; @@ -1516,7 +1522,7 @@ struct socket *so = unp->unp_socket; struct unpcb *unp2; - UNP_GLOBAL_WLOCK_ASSERT(); + UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK_ASSERT(unp); so->so_error = errno; @@ -1556,7 +1562,7 @@ int f; u_int newlen; - UNP_GLOBAL_UNLOCK_ASSERT(); + UNP_LINK_UNLOCK_ASSERT(); error = 0; if (controlp != NULL) /* controlp == NULL => free control messages */ @@ -1666,7 +1672,8 @@ LIST_INIT(&unp_dhead); LIST_INIT(&unp_shead); TASK_INIT(&unp_gc_task, 0, unp_gc, NULL); - UNP_GLOBAL_LOCK_INIT(); + UNP_LINK_LOCK_INIT(); + UNP_LIST_LOCK_INIT(); } static int @@ -1686,7 +1693,7 @@ int error, oldfds; u_int newlen; - UNP_GLOBAL_UNLOCK_ASSERT(); + UNP_LINK_UNLOCK_ASSERT(); error = 0; *controlp = NULL; @@ -1880,14 +1887,14 @@ { struct unpcb *unp; - UNP_GLOBAL_WLOCK(); + UNP_LINK_WLOCK(); if ((unp = fptounp(fp)) != NULL) { unp->unp_file = fp; unp->unp_msgcount++; } fhold(fp); unp_rights++; - UNP_GLOBAL_WUNLOCK(); + UNP_LINK_WUNLOCK(); } static void @@ -1895,11 +1902,11 @@ { struct unpcb *unp; - UNP_GLOBAL_WLOCK(); + UNP_LINK_WLOCK(); if ((unp = fptounp(fp)) != NULL) unp->unp_msgcount--; unp_rights--; - UNP_GLOBAL_WUNLOCK(); + UNP_LINK_WUNLOCK(); } /* @@ -1987,7 +1994,7 @@ int i; unp_taskcount++; - UNP_GLOBAL_RLOCK(); + UNP_LIST_LOCK(); /* * First clear all gc flags from previous runs. */ @@ -2008,7 +2015,7 @@ LIST_FOREACH(unp, *head, unp_link) unp_gc_process(unp); } while (unp_marked); - UNP_GLOBAL_RUNLOCK(); + UNP_LIST_UNLOCK(); if (unp_unreachable == 0) return; @@ -2022,7 +2029,7 @@ * Iterate looking for sockets which have been specifically marked * as as unreachable and store them locally. */ - UNP_GLOBAL_RLOCK(); + UNP_LIST_LOCK(); for (i = 0, head = heads; *head != NULL; head++) LIST_FOREACH(unp, *head, unp_link) if (unp->unp_gcflag & UNPGC_DEAD) { @@ -2033,7 +2040,7 @@ KASSERT(i <= unp_unreachable, ("unp_gc: incorrect unreachable count.")); } - UNP_GLOBAL_RUNLOCK(); + UNP_LIST_UNLOCK(); /* * Now flush all sockets, free'ing rights. This will free the Index: subr_witness.c =================================================================== --- subr_witness.c (revision 189501) +++ subr_witness.c (working copy) @@ -522,6 +522,8 @@ /* * UNIX Domain Sockets */ + { "unp_global_rwlock", &lock_class_rw }, + { "unp_list_lock", &lock_class_mtx_sleep }, { "unp", &lock_class_mtx_sleep }, { "so_snd", &lock_class_mtx_sleep }, { NULL, NULL },