Add a new socket option, IP_SUBSET, which allows the socket to match only a subset of UDP datagrams otherwise received on the socket by specifying a number of parts to break the binding into, and which specific part is desired. This allows constructs like the following: bzero(&sin, sizeof(sin)); sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); sin.sin_addr.s_addr = htonl(INADDR_ANY); sin.sin_port = htons(5000); bzero(&is, sizeof(is)); is.is_strategy = IP_SUBSET_STRATEGY_DEFAULT; is.is_count = SOCK_ARRAY_LEN; optval = 1; for (i = 0; i < SOCK_ARRAY_LEN; i++) { sock_array[i] = socket(PF_INET, SOCK_DGRAM, 0); if (sock_array[i] < 0) err(-1, "socket %d", i); if (setsockopt(sock_array[i], SOL_SOCKET, SO_REUSEPORT, &optval, sizeof(optval)) < 0) &optval, sizeof(optval)) < 0) err(-1, "socket %d setsockopt SO_REUSEPORT", i); if (bind(sock_array[i], (struct sockaddr *)&sin, sizeof(sin)) < 0) err(-1, "socket %d bind", i); is.is_member = i; if (setsockopt(sock_array[i], IPPROTO_UDP, IP_SUBSET, &is, sizeof(is)) < 0) err(-1, "socket %d setsockopt IP_SUBSET", i); } Notice that SO_REUSEPORT is required in order to allow otherwise colliding UDP IP/port bindings to coexist. Currently a very poor hash is used to place work based on the IP/port tuple, and something significantly better might well be preferred. However, what's here is probably still useful for prototyping purposes. Once sockets are set up, they behave as otherwise normal UDP sockets, so all the normal advice applies: bind the specific IP and port for each socket so as to avoid the stack having to figure out what source IP/port to use each time, etc. Index: netinet/udp_var.h =================================================================== --- netinet/udp_var.h (revision 185413) +++ netinet/udp_var.h (working copy) @@ -103,6 +103,7 @@ extern int udp_log_in_vain; void udp_ctlinput(int, struct sockaddr *, void *); +int udp_ctloutput(struct socket *so, struct sockopt *sopt); void udp_init(void); void udp_input(struct mbuf *, int); struct inpcb *udp_notify(struct inpcb *inp, int errno); Index: netinet/in.h =================================================================== --- netinet/in.h (revision 185413) +++ netinet/in.h (working copy) @@ -486,6 +486,17 @@ #define MCAST_BLOCK_SOURCE 84 /* block a source */ #define MCAST_UNBLOCK_SOURCE 85 /* unblock a source */ +/* Binding subsets. */ +#define IP_SUBSET 86 /* get/set binding subset */ + +struct ip_subset { + u_int is_strategy; + u_int is_count; + u_int is_member; +}; + +#define IP_SUBSET_STRATEGY_DEFAULT 0 + /* * Defaults and limits for options */ Index: netinet/in_pcb.c =================================================================== --- netinet/in_pcb.c (revision 185413) +++ netinet/in_pcb.c (working copy) @@ -1222,11 +1222,20 @@ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif - if (inp->inp_faddr.s_addr == faddr.s_addr && - inp->inp_laddr.s_addr == laddr.s_addr && - inp->inp_fport == fport && - inp->inp_lport == lport) - return (inp); + if (inp->inp_faddr.s_addr != faddr.s_addr || + inp->inp_laddr.s_addr != laddr.s_addr || + inp->inp_fport != fport || + inp->inp_lport != lport) + continue; + if (inp->inp_subset_count) { + /* + * XXXRW: This is not the hash you are looking for. + */ + if ((faddr.s_addr ^ laddr.s_addr ^ fport ^ lport) % + inp->inp_subset_count != inp->inp_subset_member) + continue; + } + return (inp); } /* @@ -1250,16 +1259,29 @@ if (ifp && ifp->if_type == IFT_FAITH && (inp->inp_flags & INP_FAITH) == 0) continue; - if (inp->inp_laddr.s_addr == laddr.s_addr) - return (inp); - else if (inp->inp_laddr.s_addr == INADDR_ANY) { + if (inp->inp_laddr.s_addr != laddr.s_addr && + inp->inp_laddr.s_addr != INADDR_ANY) + continue; + if (inp->inp_subset_count) { + /* + * XXXRW: This is not the hash you + * are looking for. + */ + if ((faddr.s_addr ^ laddr.s_addr ^ + fport ^ lport) % + inp->inp_subset_count != + inp->inp_subset_member) + continue; + } + if (inp->inp_laddr.s_addr == INADDR_ANY) { #ifdef INET6 if (inp->inp_vflag & INP_IPV6PROTO) local_wild_mapped = inp; else #endif local_wild = inp; - } + } else /* inp_laddr.s_addr == laddr.s_addr */ + return (inp); } } #ifdef INET6 Index: netinet/in_pcb.h =================================================================== --- netinet/in_pcb.h (revision 185413) +++ netinet/in_pcb.h (working copy) @@ -207,6 +207,9 @@ } inp_depend6; LIST_ENTRY(inpcb) inp_portlist; /* (i/p) */ struct inpcbport *inp_phd; /* (i/p) head of this list */ + u_int inp_subset_strategy; + u_int inp_subset_count; + u_int inp_subset_member; #define inp_zero_size offsetof(struct inpcb, inp_gencnt) inp_gen_t inp_gencnt; /* (c) generation count of this instance */ struct rwlock inp_lock; Index: netinet/in_proto.c =================================================================== --- netinet/in_proto.c (revision 185413) +++ netinet/in_proto.c (working copy) @@ -122,7 +122,7 @@ .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = udp_input, .pr_ctlinput = udp_ctlinput, - .pr_ctloutput = ip_ctloutput, + .pr_ctloutput = udp_ctloutput, .pr_init = udp_init, .pr_usrreqs = &udp_usrreqs }, Index: netinet/udp_usrreq.c =================================================================== --- netinet/udp_usrreq.c (revision 185413) +++ netinet/udp_usrreq.c (working copy) @@ -643,6 +643,55 @@ udp_notify); } +int +udp_ctloutput(struct socket *so, struct sockopt *sopt) +{ + INIT_VNET_INET(so->so_vnet); + struct ip_subset is; + struct inpcb *inp; + int error; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("udp_ctloutput: inp == NULL")); + + if (sopt->sopt_level != IPPROTO_UDP) + return (ip_ctloutput(so, sopt)); + + switch (sopt->sopt_dir) { + case SOPT_GET: + switch (sopt->sopt_name) { + case IP_SUBSET: + bzero(&is, sizeof(is)); + INP_RLOCK(inp); + is.is_strategy = inp->inp_subset_strategy; + is.is_count = inp->inp_subset_count; + is.is_member = inp->inp_subset_member; + INP_RUNLOCK(inp); + return (sooptcopyout(sopt, &is, sizeof(is))); + } + break; + + case SOPT_SET: + switch (sopt->sopt_name) { + case IP_SUBSET: + error = sooptcopyin(sopt, &is, sizeof(is), + sizeof(is)); + if (error) + return (error); + if (is.is_strategy != IP_SUBSET_STRATEGY_DEFAULT) + return (EINVAL); + INP_WLOCK(inp); + inp->inp_subset_strategy = is.is_strategy; + inp->inp_subset_count = is.is_count; + inp->inp_subset_member = is.is_member; + INP_WUNLOCK(inp); + return (0); + } + break; + } + return (ENOPROTOOPT); +} + static int udp_pcblist(SYSCTL_HANDLER_ARGS) {