aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-11-03 15:41:12 -0400
committerDavid S. Miller <davem@davemloft.net>2016-11-03 15:41:12 -0400
commita799126864986076e576d7bec549ae055eaa4490 (patch)
tree89cb7c09e7b2a523a0fa8f266aae125fc163db93
parent1b99e5e854d4de2d795c4dbed7cc59c65ae372fe (diff)
parentdbd1759e6a9c4596d8609897c414da603da80914 (diff)
downloadnet-a799126864986076e576d7bec549ae055eaa4490.tar.gz
Merge branch 'ip-recvfragsize-cmsg'
Willem de Bruijn says: ==================== ip: add RECVFRAGSIZE cmsg On IP datagrams and raw sockets, when packets arrive fragmented, expose the largest received fragment size through a new cmsg. Protocols implemented on top of these sockets may use this, for instance, to inform peers to lower MSS on platforms that silently allow send calls to exceed PMTU and cause fragmentation. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/ipv6.h5
-rw-r--r--include/net/inet_sock.h1
-rw-r--r--include/uapi/linux/in.h1
-rw-r--r--include/uapi/linux/in6.h1
-rw-r--r--net/ipv4/ip_sockglue.c26
-rw-r--r--net/ipv6/datagram.c5
-rw-r--r--net/ipv6/ipv6_sockglue.c8
-rw-r--r--net/ipv6/reassembly.c7
8 files changed, 51 insertions, 3 deletions
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index ca1ad9ebbc92b7..1afb6e8d35c353 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -229,8 +229,9 @@ struct ipv6_pinfo {
rxflow:1,
rxtclass:1,
rxpmtu:1,
- rxorigdstaddr:1;
- /* 2 bits hole */
+ rxorigdstaddr:1,
+ recvfragsize:1;
+ /* 1 bits hole */
} bits;
__u16 all;
} rxopt;
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 236a81034fefec..c9cff977a7fb2c 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -228,6 +228,7 @@ struct inet_sock {
#define IP_CMSG_PASSSEC BIT(5)
#define IP_CMSG_ORIGDSTADDR BIT(6)
#define IP_CMSG_CHECKSUM BIT(7)
+#define IP_CMSG_RECVFRAGSIZE BIT(8)
/**
* sk_to_full_sk - Access to a full socket
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index eaf94919291aaf..4e557f4e95538a 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -117,6 +117,7 @@ struct in_addr {
#define IP_NODEFRAG 22
#define IP_CHECKSUM 23
#define IP_BIND_ADDRESS_NO_PORT 24
+#define IP_RECVFRAGSIZE 25
/* IP_MTU_DISCOVER values */
#define IP_PMTUDISC_DONT 0 /* Never send DF frames */
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index b39ea4f2e701d2..46444f8fbee4ee 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -283,6 +283,7 @@ struct in6_flowlabel_req {
#define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR
#define IPV6_TRANSPARENT 75
#define IPV6_UNICAST_IF 76
+#define IPV6_RECVFRAGSIZE 77
/*
* Multicast Routing:
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b8a2d63d1fb82f..ecbaae200131a8 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -97,6 +97,17 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
}
+static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb)
+{
+ int val;
+
+ if (IPCB(skb)->frag_max_size == 0)
+ return;
+
+ val = IPCB(skb)->frag_max_size;
+ put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val);
+}
+
static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
int tlen, int offset)
{
@@ -218,6 +229,9 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,
if (flags & IP_CMSG_CHECKSUM)
ip_cmsg_recv_checksum(msg, skb, tlen, offset);
+
+ if (flags & IP_CMSG_RECVFRAGSIZE)
+ ip_cmsg_recv_fragsize(msg, skb);
}
EXPORT_SYMBOL(ip_cmsg_recv_offset);
@@ -614,6 +628,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
case IP_MULTICAST_LOOP:
case IP_RECVORIGDSTADDR:
case IP_CHECKSUM:
+ case IP_RECVFRAGSIZE:
if (optlen >= sizeof(int)) {
if (get_user(val, (int __user *) optval))
return -EFAULT;
@@ -726,6 +741,14 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
}
break;
+ case IP_RECVFRAGSIZE:
+ if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
+ goto e_inval;
+ if (val)
+ inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE;
+ else
+ inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
+ break;
case IP_TOS: /* This sets both TOS and Precedence */
if (sk->sk_type == SOCK_STREAM) {
val &= ~INET_ECN_MASK;
@@ -1357,6 +1380,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_CHECKSUM:
val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
break;
+ case IP_RECVFRAGSIZE:
+ val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0;
+ break;
case IP_TOS:
val = inet->tos;
break;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 37874e2f30edf9..620c79a0130a77 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -715,6 +715,11 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
}
}
+ if (np->rxopt.bits.recvfragsize && opt->frag_max_size) {
+ int val = opt->frag_max_size;
+
+ put_cmsg(msg, SOL_IPV6, IPV6_RECVFRAGSIZE, sizeof(val), &val);
+ }
}
void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 636ec56f5f5028..6c126780fcf2be 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -868,6 +868,10 @@ pref_skip_coa:
np->autoflowlabel = valbool;
retv = 0;
break;
+ case IPV6_RECVFRAGSIZE:
+ np->rxopt.bits.recvfragsize = valbool;
+ retv = 0;
+ break;
}
release_sock(sk);
@@ -1310,6 +1314,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->autoflowlabel;
break;
+ case IPV6_RECVFRAGSIZE:
+ val = np->rxopt.bits.recvfragsize;
+ break;
+
default:
return -ENOPROTOOPT;
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 3815e8505ed2a3..e1da5b888cc490 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -211,7 +211,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
{
struct sk_buff *prev, *next;
struct net_device *dev;
- int offset, end;
+ int offset, end, fragsize;
struct net *net = dev_net(skb_dst(skb)->dev);
u8 ecn;
@@ -336,6 +336,10 @@ found:
fq->ecn |= ecn;
add_frag_mem_limit(fq->q.net, skb->truesize);
+ fragsize = -skb_network_offset(skb) + skb->len;
+ if (fragsize > fq->q.max_size)
+ fq->q.max_size = fragsize;
+
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
*/
@@ -495,6 +499,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
IP6CB(head)->nhoff = nhoff;
IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
+ IP6CB(head)->frag_max_size = fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */
skb_postpush_rcsum(head, skb_network_header(head),