From 2da62906b1e298695e1bb725927041cd59942c98 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 14 Mar 2015 21:13:46 -0400 Subject: [net] drop 'size' argument of sock_recvmsg() all callers have it equal to msg_data_left(msg). Signed-off-by: Al Viro --- net/socket.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'net/socket.c') diff --git a/net/socket.c b/net/socket.c index 5f77a8e93830bd..956426e347af9f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -709,17 +709,16 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, - size_t size, int flags) + int flags) { - return sock->ops->recvmsg(sock, msg, size, flags); + return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags); } -int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, - int flags) +int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags) { - int err = security_socket_recvmsg(sock, msg, size, flags); + int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags); - return err ?: sock_recvmsg_nosec(sock, msg, size, flags); + return err ?: sock_recvmsg_nosec(sock, msg, flags); } EXPORT_SYMBOL(sock_recvmsg); @@ -746,7 +745,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size); set_fs(KERNEL_DS); - result = sock_recvmsg(sock, msg, size, flags); + result = sock_recvmsg(sock, msg, flags); set_fs(oldfs); return result; } @@ -796,7 +795,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) if (!iov_iter_count(to)) /* Match SYS5 behaviour */ return 0; - res = sock_recvmsg(sock, &msg, iov_iter_count(to), msg.msg_flags); + res = sock_recvmsg(sock, &msg, msg.msg_flags); *to = msg.msg_iter; return res; } @@ -1696,7 +1695,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, msg.msg_iocb = NULL; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags); + err = sock_recvmsg(sock, &msg, flags); if (err >= 0 && addr != NULL) { err2 = move_addr_to_user(&address, @@ -2073,7 +2072,7 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; unsigned long cmsg_ptr; - int total_len, len; + int len; ssize_t err; /* kernel mode address */ @@ -2091,7 +2090,6 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov); if (err < 0) return err; - total_len = iov_iter_count(&msg_sys->msg_iter); cmsg_ptr = (unsigned long)msg_sys->msg_control; msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); @@ -2101,8 +2099,7 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, - total_len, flags); + err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags); if (err < 0) goto out_freeiov; len = err; -- cgit 1.2.3-korg From c14ac9451c34832554db33386a4393be8bba3a7b Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Sat, 2 Apr 2016 23:08:12 -0400 Subject: sock: enable timestamping using control messages Currently, SOL_TIMESTAMPING can only be enabled using setsockopt. This is very costly when users want to sample writes to gather tx timestamps. Add support for enabling SO_TIMESTAMPING via control messages by using tsflags added in `struct sockcm_cookie` (added in the previous patches in this series) to set the tx_flags of the last skb created in a sendmsg. With this patch, the timestamp recording bits in tx_flags of the skbuff is overridden if SO_TIMESTAMPING is passed in a cmsg. Please note that this is only effective for overriding the recording timestamps flags. Users should enable timestamp reporting (e.g., SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID) using socket options and then should ask for SOF_TIMESTAMPING_TX_* using control messages per sendmsg to sample timestamps for each write. Signed-off-by: Soheil Hassas Yeganeh Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/tun.c | 3 ++- include/net/ipv6.h | 6 ++++-- include/net/sock.h | 10 ++++++---- net/can/raw.c | 2 +- net/ipv4/ping.c | 5 +++-- net/ipv4/raw.c | 11 ++++++----- net/ipv4/tcp.c | 20 +++++++++++++++----- net/ipv4/udp.c | 7 ++++--- net/ipv6/icmp.c | 6 ++++-- net/ipv6/ip6_output.c | 15 +++++++++------ net/ipv6/ping.c | 3 ++- net/ipv6/raw.c | 5 ++--- net/ipv6/udp.c | 7 ++++--- net/l2tp/l2tp_ip6.c | 2 +- net/packet/af_packet.c | 30 +++++++++++++++++++++++++----- net/socket.c | 10 +++++----- 16 files changed, 93 insertions(+), 49 deletions(-) (limited to 'net/socket.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 510e90a6bb2618..9abc36bf77eae3 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -861,7 +861,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) goto drop; if (skb->sk && sk_fullsock(skb->sk)) { - sock_tx_timestamp(skb->sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(skb->sk, skb->sk->sk_tsflags, + &skb_shinfo(skb)->tx_flags); sw_tx_timestamp(skb); } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d0aeb97aec5d2b..55ee1eb7d026df 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -867,7 +867,8 @@ int ip6_append_data(struct sock *sk, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, - struct rt6_info *rt, unsigned int flags, int dontfrag); + struct rt6_info *rt, unsigned int flags, int dontfrag, + const struct sockcm_cookie *sockc); int ip6_push_pending_frames(struct sock *sk); @@ -884,7 +885,8 @@ struct sk_buff *ip6_make_skb(struct sock *sk, void *from, int length, int transhdrlen, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, struct rt6_info *rt, - unsigned int flags, int dontfrag); + unsigned int flags, int dontfrag, + const struct sockcm_cookie *sockc); static inline struct sk_buff *ip6_finish_skb(struct sock *sk) { diff --git a/include/net/sock.h b/include/net/sock.h index af012da5e608d1..e91b87f54f99b3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2057,19 +2057,21 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, sk->sk_stamp = skb->tstamp; } -void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags); +void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); /** * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped * @sk: socket sending this packet + * @tsflags: timestamping flags to use * @tx_flags: completed with instructions for time stamping * * Note : callers should take care of initial *tx_flags value (usually 0) */ -static inline void sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags) +static inline void sock_tx_timestamp(const struct sock *sk, __u16 tsflags, + __u8 *tx_flags) { - if (unlikely(sk->sk_tsflags)) - __sock_tx_timestamp(sk, tx_flags); + if (unlikely(tsflags)) + __sock_tx_timestamp(tsflags, tx_flags); if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) *tx_flags |= SKBTX_WIFI_STATUS; } diff --git a/net/can/raw.c b/net/can/raw.c index 2e67b1423cd32d..972c187d40abb0 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -755,7 +755,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (err < 0) goto free_skb; - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(sk, sk->sk_tsflags, &skb_shinfo(skb)->tx_flags); skb->dev = dev; skb->sk = sk; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 670639bf9f7eb4..66ddcb60519a16 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -737,6 +737,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) /* no remote port */ } + ipc.sockc.tsflags = sk->sk_tsflags; ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.oif = sk->sk_bound_dev_if; @@ -744,8 +745,6 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc.ttl = 0; ipc.tos = -1; - sock_tx_timestamp(sk, &ipc.tx_flags); - if (msg->msg_controllen) { err = ip_cmsg_send(sk, msg, &ipc, false); if (unlikely(err)) { @@ -768,6 +767,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) rcu_read_unlock(); } + sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); + saddr = ipc.addr; ipc.addr = faddr = daddr; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 088ce665fc7bc6..438f50c1a6765c 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -339,8 +339,8 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, struct msghdr *msg, size_t length, - struct rtable **rtp, - unsigned int flags) + struct rtable **rtp, unsigned int flags, + const struct sockcm_cookie *sockc) { struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); @@ -379,7 +379,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->ip_summed = CHECKSUM_NONE; - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); skb->transport_header = skb->network_header; err = -EFAULT; @@ -540,6 +540,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) daddr = inet->inet_daddr; } + ipc.sockc.tsflags = sk->sk_tsflags; ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.tx_flags = 0; @@ -638,10 +639,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (inet->hdrincl) err = raw_send_hdrinc(sk, &fl4, msg, len, - &rt, msg->msg_flags); + &rt, msg->msg_flags, &ipc.sockc); else { - sock_tx_timestamp(sk, &ipc.tx_flags); + sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); if (!ipc.addr) ipc.addr = fl4.daddr; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ce3c9eb901a0de..4d73858991afd0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -428,13 +428,13 @@ void tcp_init_sock(struct sock *sk) } EXPORT_SYMBOL(tcp_init_sock); -static void tcp_tx_timestamp(struct sock *sk, struct sk_buff *skb) +static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) { - if (sk->sk_tsflags) { + if (sk->sk_tsflags || tsflags) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); - sock_tx_timestamp(sk, &shinfo->tx_flags); + sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags); if (shinfo->tx_flags & SKBTX_ANY_TSTAMP) shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; tcb->txstamp_ack = !!(shinfo->tx_flags & SKBTX_ACK_TSTAMP); @@ -959,7 +959,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, offset += copy; size -= copy; if (!size) { - tcp_tx_timestamp(sk, skb); + tcp_tx_timestamp(sk, sk->sk_tsflags, skb); goto out; } @@ -1079,6 +1079,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; + struct sockcm_cookie sockc; int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; bool sg; @@ -1121,6 +1122,15 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) /* 'common' sending to sendq */ } + sockc.tsflags = sk->sk_tsflags; + if (msg->msg_controllen) { + err = sock_cmsg_send(sk, msg, &sockc); + if (unlikely(err)) { + err = -EINVAL; + goto out_err; + } + } + /* This should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -1239,7 +1249,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) copied += copy; if (!msg_data_left(msg)) { - tcp_tx_timestamp(sk, skb); + tcp_tx_timestamp(sk, sockc.tsflags, skb); goto out; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bccb4e11047ab3..45ff590661f4bd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1027,12 +1027,11 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) */ connected = 1; } - ipc.addr = inet->inet_saddr; + ipc.sockc.tsflags = sk->sk_tsflags; + ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; - sock_tx_timestamp(sk, &ipc.tx_flags); - if (msg->msg_controllen) { err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6); if (unlikely(err)) { @@ -1059,6 +1058,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) saddr = ipc.addr; ipc.addr = faddr = daddr; + sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); + if (ipc.opt && ipc.opt->opt.srr) { if (!daddr) return -EINVAL; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 0a37ddc7af5157..6b573ebe49deff 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) struct icmp6hdr tmp_hdr; struct flowi6 fl6; struct icmpv6_msg msg; + struct sockcm_cookie sockc_unused = {0}; int iif = 0; int addr_type = 0; int len; @@ -527,7 +528,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6, (struct rt6_info *)dst, - MSG_DONTWAIT, np->dontfrag); + MSG_DONTWAIT, np->dontfrag, &sockc_unused); if (err) { ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); @@ -566,6 +567,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) int hlimit; u8 tclass; u32 mark = IP6_REPLY_MARK(net, skb->mark); + struct sockcm_cookie sockc_unused = {0}; saddr = &ipv6_hdr(skb)->daddr; @@ -617,7 +619,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6, (struct rt6_info *)dst, MSG_DONTWAIT, - np->dontfrag); + np->dontfrag, &sockc_unused); if (err) { ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9428345d3a0787..612f3d138bf05e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1258,7 +1258,8 @@ static int __ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - unsigned int flags, int dontfrag) + unsigned int flags, int dontfrag, + const struct sockcm_cookie *sockc) { struct sk_buff *skb, *skb_prev = NULL; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; @@ -1329,7 +1330,7 @@ static int __ip6_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { - sock_tx_timestamp(sk, &tx_flags); + sock_tx_timestamp(sk, sockc->tsflags, &tx_flags); if (tx_flags & SKBTX_ANY_SW_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) tskey = sk->sk_tskey++; @@ -1565,7 +1566,8 @@ int ip6_append_data(struct sock *sk, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, - struct rt6_info *rt, unsigned int flags, int dontfrag) + struct rt6_info *rt, unsigned int flags, int dontfrag, + const struct sockcm_cookie *sockc) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -1593,7 +1595,8 @@ int ip6_append_data(struct sock *sk, return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, &np->cork, sk_page_frag(sk), getfrag, - from, length, transhdrlen, flags, dontfrag); + from, length, transhdrlen, flags, dontfrag, + sockc); } EXPORT_SYMBOL_GPL(ip6_append_data); @@ -1752,7 +1755,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags, - int dontfrag) + int dontfrag, const struct sockcm_cookie *sockc) { struct inet_cork_full cork; struct inet6_cork v6_cork; @@ -1779,7 +1782,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork, ¤t->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, - flags, dontfrag); + flags, dontfrag, sockc); if (err) { __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork); return ERR_PTR(err); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index c382db7a2e7319..da1cff79e447fd 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -62,6 +62,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct dst_entry *dst; struct rt6_info *rt; struct pingfakehdr pfh; + struct sockcm_cookie junk = {0}; pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); @@ -144,7 +145,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) err = ip6_append_data(sk, ping_getfrag, &pfh, len, 0, hlimit, np->tclass, NULL, &fl6, rt, - MSG_DONTWAIT, np->dontfrag); + MSG_DONTWAIT, np->dontfrag, &junk); if (err) { ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index f175ec0a97ce9c..b07ce21983aaf1 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -822,8 +822,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (fl6.flowi6_oif == 0) fl6.flowi6_oif = sk->sk_bound_dev_if; - sockc.tsflags = 0; - + sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); @@ -901,7 +900,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) lock_sock(sk); err = ip6_append_data(sk, raw6_getfrag, &rfv, len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, - msg->msg_flags, dontfrag); + msg->msg_flags, dontfrag, &sockc); if (err) ip6_flush_pending_frames(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2a787af421637c..b772a7641fbdd8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1248,7 +1248,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; - sockc.tsflags = 0; + sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { opt = &opt_space; @@ -1324,7 +1324,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) skb = ip6_make_skb(sk, getfrag, msg, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, - msg->msg_flags, dontfrag); + msg->msg_flags, dontfrag, &sockc); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) err = udp_v6_send_skb(skb, &fl6); @@ -1351,7 +1351,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, - corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag, + &sockc); if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 4f29a4a0f36062..1a38f20b1ca620 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -627,7 +627,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, - msg->msg_flags, dontfrag); + msg->msg_flags, dontfrag, &sockc_unused); if (err) ip6_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1ecfa710ca9803..0007e23202e4eb 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1837,6 +1837,7 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name); struct sk_buff *skb = NULL; struct net_device *dev; + struct sockcm_cookie sockc; __be16 proto = 0; int err; int extra_len = 0; @@ -1925,12 +1926,21 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, goto out_unlock; } + sockc.tsflags = 0; + if (msg->msg_controllen) { + err = sock_cmsg_send(sk, msg, &sockc); + if (unlikely(err)) { + err = -EINVAL; + goto out_unlock; + } + } + skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); if (unlikely(extra_len == 4)) skb->no_fcs = 1; @@ -2486,7 +2496,8 @@ static int packet_snd_vnet_gso(struct sk_buff *skb, static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, void *frame, struct net_device *dev, void *data, int tp_len, - __be16 proto, unsigned char *addr, int hlen, int copylen) + __be16 proto, unsigned char *addr, int hlen, int copylen, + const struct sockcm_cookie *sockc) { union tpacket_uhdr ph; int to_write, offset, len, nr_frags, len_max; @@ -2500,7 +2511,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->dev = dev; skb->priority = po->sk.sk_priority; skb->mark = po->sk.sk_mark; - sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); skb_shinfo(skb)->destructor_arg = ph.raw; skb_reserve(skb, hlen); @@ -2624,6 +2635,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) struct sk_buff *skb; struct net_device *dev; struct virtio_net_hdr *vnet_hdr = NULL; + struct sockcm_cookie sockc; __be16 proto; int err, reserve = 0; void *ph; @@ -2655,6 +2667,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); } + sockc.tsflags = 0; + if (msg->msg_controllen) { + err = sock_cmsg_send(&po->sk, msg, &sockc); + if (unlikely(err)) + goto out; + } + err = -ENXIO; if (unlikely(dev == NULL)) goto out; @@ -2712,7 +2731,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) goto out_status; } tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto, - addr, hlen, copylen); + addr, hlen, copylen, &sockc); if (likely(tp_len >= 0) && tp_len > dev->mtu + reserve && !po->has_vnet_hdr && @@ -2851,6 +2870,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; + sockc.tsflags = 0; sockc.mark = sk->sk_mark; if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); @@ -2908,7 +2928,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_free; } - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { diff --git a/net/socket.c b/net/socket.c index 5f77a8e93830bd..979d3146b081d7 100644 --- a/net/socket.c +++ b/net/socket.c @@ -587,20 +587,20 @@ void sock_release(struct socket *sock) } EXPORT_SYMBOL(sock_release); -void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags) +void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) { u8 flags = *tx_flags; - if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE) + if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) flags |= SKBTX_HW_TSTAMP; - if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) + if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) flags |= SKBTX_SW_TSTAMP; - if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED) + if (tsflags & SOF_TIMESTAMPING_TX_SCHED) flags |= SKBTX_SCHED_TSTAMP; - if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK) + if (tsflags & SOF_TIMESTAMPING_TX_ACK) flags |= SKBTX_ACK_TSTAMP; *tx_flags = flags; -- cgit 1.2.3-korg From 1e1d04e678cf72442f57ce82803c7a407769135f Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 5 Apr 2016 17:10:15 +0200 Subject: net: introduce lockdep_is_held and update various places to use it The socket is either locked if we hold the slock spin_lock for lock_sock_fast and unlock_sock_fast or we own the lock (sk_lock.owned != 0). Check for this and at the same time improve that the current thread/cpu is really holding the lock. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/sock.h | 12 ++++++++++-- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/cipso_ipv4.c | 3 ++- net/ipv4/ip_sockglue.c | 4 ++-- net/ipv4/tcp_ipv4.c | 8 +++----- net/ipv6/ipv6_sockglue.c | 6 ++++-- net/ipv6/tcp_ipv6.c | 2 +- net/socket.c | 2 +- 10 files changed, 26 insertions(+), 17 deletions(-) (limited to 'net/socket.c') diff --git a/include/net/sock.h b/include/net/sock.h index 91cee51086dc47..eb2d7c3e120b25 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1360,6 +1360,14 @@ do { \ lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ } while (0) +static bool lockdep_sock_is_held(const struct sock *csk) +{ + struct sock *sk = (struct sock *)csk; + + return lockdep_is_held(&sk->sk_lock) || + lockdep_is_held(&sk->sk_lock.slock); +} + void lock_sock_nested(struct sock *sk, int subclass); static inline void lock_sock(struct sock *sk) @@ -1598,8 +1606,8 @@ static inline void sk_rethink_txhash(struct sock *sk) static inline struct dst_entry * __sk_dst_get(struct sock *sk) { - return rcu_dereference_check(sk->sk_dst_cache, sock_owned_by_user(sk) || - lockdep_is_held(&sk->sk_lock.slock)); + return rcu_dereference_check(sk->sk_dst_cache, + lockdep_sock_is_held(sk)); } static inline struct dst_entry * diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 6438c5a7efc411..f6d183f8f33222 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -62,7 +62,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) nexthop = daddr = usin->sin_addr.s_addr; inet_opt = rcu_dereference_protected(inet->inet_opt, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); if (inet_opt != NULL && inet_opt->opt.srr) { if (daddr == 0) return -EINVAL; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 71bf1deba4c5ed..8ceb3cebcad4b6 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -868,7 +868,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a38b9910af6014..8217cd22f921d1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1107,7 +1107,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) struct ip_options_rcu *inet_opt; inet_opt = rcu_dereference_protected(inet->inet_opt, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index bdb2a07ec363b7..40d6b87713a132 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1933,7 +1933,8 @@ int cipso_v4_sock_setattr(struct sock *sk, sk_inet = inet_sk(sk); - old = rcu_dereference_protected(sk_inet->inet_opt, sock_owned_by_user(sk)); + old = rcu_dereference_protected(sk_inet->inet_opt, + lockdep_sock_is_held(sk)); if (sk_inet->is_icsk) { sk_conn = inet_csk(sk); if (old) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 1b7c0776c805b5..89b5f3bd669436 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -642,7 +642,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, if (err) break; old = rcu_dereference_protected(inet->inet_opt, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); if (inet->is_icsk) { struct inet_connection_sock *icsk = inet_csk(sk); #if IS_ENABLED(CONFIG_IPV6) @@ -1302,7 +1302,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, struct ip_options_rcu *inet_opt; inet_opt = rcu_dereference_protected(inet->inet_opt, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); opt->optlen = 0; if (inet_opt) memcpy(optbuf, &inet_opt->opt, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 456ff3d6a13223..f4f2a0a3849d3d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -157,7 +157,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) nexthop = daddr = usin->sin_addr.s_addr; inet_opt = rcu_dereference_protected(inet->inet_opt, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); if (inet_opt && inet_opt->opt.srr) { if (!daddr) return -EINVAL; @@ -882,8 +882,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, /* caller either holds rcu_read_lock() or socket lock */ md5sig = rcu_dereference_check(tp->md5sig_info, - sock_owned_by_user(sk) || - lockdep_is_held((spinlock_t *)&sk->sk_lock.slock)); + lockdep_sock_is_held(sk)); if (!md5sig) return NULL; #if IS_ENABLED(CONFIG_IPV6) @@ -928,8 +927,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, } md5sig = rcu_dereference_protected(tp->md5sig_info, - sock_owned_by_user(sk) || - lockdep_is_held(&sk->sk_lock.slock)); + lockdep_sock_is_held(sk)); if (!md5sig) { md5sig = kmalloc(sizeof(*md5sig), gfp); if (!md5sig) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a5557d22f89ea9..4ff4b29894ebfe 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -407,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) break; - opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = rcu_dereference_protected(np->opt, + lockdep_sock_is_held(sk)); opt = ipv6_renew_options(sk, opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); @@ -1124,7 +1125,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct ipv6_txoptions *opt; lock_sock(sk); - opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = rcu_dereference_protected(np->opt, + lockdep_sock_is_held(sk)); len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len); release_sock(sk); /* check if ipv6_getsockopt_sticky() returns err code */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7cde1b6fdda3fc..0e621bc1ae11c8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -234,7 +234,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; - opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); diff --git a/net/socket.c b/net/socket.c index 979d3146b081d7..afa3c347071735 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1046,7 +1046,7 @@ static int sock_fasync(int fd, struct file *filp, int on) return -EINVAL; lock_sock(sk); - wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk)); + wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk)); fasync_helper(fd, filp, on, &wq->fasync_list); if (!wq->fasync_list) -- cgit 1.2.3-korg From 0a2cf20c3fb62ad4717276b5303bf831f7b29d54 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Wed, 27 Apr 2016 23:39:01 -0400 Subject: tcp: remove SKBTX_ACK_TSTAMP since it is redundant The SKBTX_ACK_TSTAMP flag is set in skb_shinfo->tx_flags when the timestamp of the TCP acknowledgement should be reported on error queue. Since accessing skb_shinfo is likely to incur a cache-line miss at the time of receiving the ack, the txstamp_ack bit was added in tcp_skb_cb, which is set iff the SKBTX_ACK_TSTAMP flag is set for an skb. This makes SKBTX_ACK_TSTAMP flag redundant. Remove the SKBTX_ACK_TSTAMP and instead use the txstamp_ack bit everywhere. Note that this frees one bit in shinfo->tx_flags. Signed-off-by: Soheil Hassas Yeganeh Acked-by: Martin KaFai Lau Suggested-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +----- net/ipv4/tcp.c | 5 +++-- net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_output.c | 17 +++++++++++------ net/socket.c | 3 --- 5 files changed, 16 insertions(+), 18 deletions(-) (limited to 'net/socket.c') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a1ce63979ad887..c84a5a1078c552 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -382,14 +382,10 @@ enum { /* generate software time stamp when entering packet scheduling */ SKBTX_SCHED_TSTAMP = 1 << 6, - - /* generate software timestamp on peer data acknowledgment */ - SKBTX_ACK_TSTAMP = 1 << 7, }; #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ - SKBTX_SCHED_TSTAMP | \ - SKBTX_ACK_TSTAMP) + SKBTX_SCHED_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) /* diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 53890a730ff43a..91993782a94797 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -435,9 +435,10 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags); - if (shinfo->tx_flags & SKBTX_ANY_TSTAMP) + if (tsflags & SOF_TIMESTAMPING_TX_ACK) + tcb->txstamp_ack = 1; + if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; - tcb->txstamp_ack = !!(shinfo->tx_flags & SKBTX_ACK_TSTAMP); } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0d5239c283cb5e..70c370b937621b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3087,8 +3087,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, return; shinfo = skb_shinfo(skb); - if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) && - !before(shinfo->tskey, prior_snd_una) && + if (!before(shinfo->tskey, prior_snd_una) && before(shinfo->tskey, tcp_sk(sk)->snd_una)) __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b1b2045ac3a965..b3a31b4df57c6c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1111,11 +1111,17 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de tcp_verify_left_out(tp); } +static bool tcp_has_tx_tstamp(const struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->txstamp_ack || + (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP); +} + static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2) { struct skb_shared_info *shinfo = skb_shinfo(skb); - if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) && + if (unlikely(tcp_has_tx_tstamp(skb)) && !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) { struct skb_shared_info *shinfo2 = skb_shinfo(skb2); u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP; @@ -2446,13 +2452,12 @@ u32 __tcp_select_window(struct sock *sk) void tcp_skb_collapse_tstamp(struct sk_buff *skb, const struct sk_buff *next_skb) { - const struct skb_shared_info *next_shinfo = skb_shinfo(next_skb); - u8 tsflags = next_shinfo->tx_flags & SKBTX_ANY_TSTAMP; - - if (unlikely(tsflags)) { + if (unlikely(tcp_has_tx_tstamp(next_skb))) { + const struct skb_shared_info *next_shinfo = + skb_shinfo(next_skb); struct skb_shared_info *shinfo = skb_shinfo(skb); - shinfo->tx_flags |= tsflags; + shinfo->tx_flags |= next_shinfo->tx_flags & SKBTX_ANY_TSTAMP; shinfo->tskey = next_shinfo->tskey; TCP_SKB_CB(skb)->txstamp_ack |= TCP_SKB_CB(next_skb)->txstamp_ack; diff --git a/net/socket.c b/net/socket.c index 5dbb0bbe12a7d7..7789d79609dd1a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -600,9 +600,6 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) if (tsflags & SOF_TIMESTAMPING_TX_SCHED) flags |= SKBTX_SCHED_TSTAMP; - if (tsflags & SOF_TIMESTAMPING_TX_ACK) - flags |= SKBTX_ACK_TSTAMP; - *tx_flags = flags; } EXPORT_SYMBOL(__sock_tx_timestamp); -- cgit 1.2.3-korg