rfc9293xml2.original.xml   rfc9293.xml 
<?xml version="1.0" encoding="UTF-8"?> <?xml version='1.0' encoding='utf-8'?>
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [ <!DOCTYPE rfc [
<!-- A set of on-line citation libraries are maintained on the xml2rfc web site. <!ENTITY nbsp "&#160;">
The next line defines an entity named RFC2629, which contains the necessary <!ENTITY zwsp "&#8203;">
XML <!ENTITY nbhy "&#8209;">
for the reference element, and is used much later in the file. This XML co <!ENTITY wj "&#8288;">
ntains an
anchor (also RFC2629) which can be used to cross-reference this item in the
text.
You can also use local file names instead of a URI. The environment variab
le
XML_LIBRARY provides a search path of directories to look at to locate a
relative path name for the file. There has to be one entity for each item t
o be
referenced. -->
<!ENTITY RFC0791 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.0791.xml">
<!ENTITY RFC0793 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.0793.xml">
<!ENTITY RFC0879 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.0879.xml">
<!ENTITY RFC0896 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.0896.xml">
<!ENTITY RFC1011 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.1011.xml">
<!ENTITY RFC1122 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.1122.xml">
<!ENTITY RFC1191 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.1191.xml">
<!ENTITY RFC1349 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.1349.xml">
<!ENTITY RFC1644 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.1644.xml">
<!ENTITY RFC2018 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.2018.xml">
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.2119.xml">
<!ENTITY RFC2474 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.2474.xml">
<!ENTITY RFC2525 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.2525.xml">
<!ENTITY RFC2675 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.2675.xml">
<!ENTITY RFC2873 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.2873.xml">
<!ENTITY RFC2883 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.2883.xml">
<!ENTITY RFC2914 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.2914.xml">
<!ENTITY RFC2923 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.2923.xml">
<!ENTITY RFC3168 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.3168.xml">
<!ENTITY RFC3449 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.3449.xml">
<!ENTITY RFC3465 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.3465.xml">
<!ENTITY RFC4727 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.4727.xml">
<!ENTITY RFC4821 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.4821.xml">
<!ENTITY RFC4953 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.4953.xml">
<!ENTITY RFC4987 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.4987.xml">
<!ENTITY RFC5033 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.5033.xml">
<!ENTITY RFC5044 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.5044.xml">
<!ENTITY RFC5461 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.5461.xml">
<!ENTITY RFC5570 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.5570.xml">
<!ENTITY RFC5681 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.5681.xml">
<!ENTITY RFC5795 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.5795.xml">
<!ENTITY RFC5925 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.5925.xml">
<!ENTITY RFC5961 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.5961.xml">
<!ENTITY RFC6093 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.6093.xml">
<!ENTITY RFC6191 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.6191.xml">
<!ENTITY RFC6298 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.6298.xml">
<!ENTITY RFC6429 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.6429.xml">
<!ENTITY RFC6528 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.6528.xml">
<!ENTITY RFC6633 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.6633.xml">
<!ENTITY RFC6691 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.6691.xml">
<!ENTITY RFC6864 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.6864.xml">
<!ENTITY RFC6994 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.6994.xml">
<!ENTITY RFC7094 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.7094.xml">
<!ENTITY RFC7323 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.7323.xml">
<!ENTITY RFC7413 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.7413.xml">
<!ENTITY RFC7414 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.7414.xml">
<!ENTITY RFC7657 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.7657.xml">
<!ENTITY RFC8087 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.8087.xml">
<!ENTITY RFC8095 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.8095.xml">
<!ENTITY RFC8174 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.8174.xml">
<!ENTITY RFC8200 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.8200.xml">
<!ENTITY RFC8201 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.8201.xml">
<!ENTITY RFC8303 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.8303.xml">
<!ENTITY RFC8504 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.8504.xml">
<!ENTITY RFC8546 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.8546.xml">
<!ENTITY RFC8548 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.8548.xml">
<!ENTITY RFC8558 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.8558.xml">
<!ENTITY RFC8684 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.8684.xml">
<!ENTITY RFC8961 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.8961.xml">
<!ENTITY RFC9000 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.9000.xml">
<!ENTITY RFC9065 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC
.9065.xml">
<!-- There is also a library of current Internet Draft citations. It isn't a go
od idea to
actually use one for the template because it might have disappeared when yo
u come to test
this template. This is the form of the entity definition
&lt;!ENTITY I-D.mrose-writing-rfcs SYSTEM
"http://xml.resource.org/public/rfc/bibxml3/reference.I-D.mrose-writing-rfc
s.xml">
corresponding to a draft filename draft-mrose-writing-rfcs-nn.txt. The cita
tion will be
to the most recent draft in the sequence, and is updated roughly hourly on
the web site.
For working group drafts, the same principle applies: file name starts draf
t-ietf-wgname-..
and entity file is reference.I-D.ietf-wgname-... The corresponding entity
name is
I-D.ietf-wgname-... (I-D.mrose-writing-rfcs for the other example). Of cou
rse this doesn't
change when the draft version changes.
-->
<!ENTITY I-D.gont-tcpm-tcp-seccomp-prec SYSTEM "https://xml2rfc.tools.ietf.org/p
ublic/rfc/bibxml3/reference.I-D.draft-gont-tcpm-tcp-seccomp-prec-00.xml">
<!ENTITY I-D.gont-tcpm-tcp-seq-validation SYSTEM "https://xml2rfc.tools.ietf.org
/public/rfc/bibxml3/reference.I-D.draft-gont-tcpm-tcp-seq-validation-04.xml">
<!ENTITY I-D.ietf-tcpm-tcp-edo SYSTEM "https://xml2rfc.tools.ietf.org/public/rfc
/bibxml3/reference.I-D.draft-ietf-tcpm-tcp-edo-10.xml">
<!ENTITY I-D.mcquistin-augmented-ascii-diagrams SYSTEM "https://xml2rfc.tools.ie
tf.org/public/rfc/bibxml3/reference.I-D.draft-mcquistin-augmented-ascii-diagrams
-08.xml">
<!ENTITY I-D.iab-use-it-or-lose-it SYSTEM "https://xml2rfc.tools.ietf.org/public
/rfc/bibxml3/reference.I-D.draft-iab-use-it-or-lose-it-02.xml">
<!-- Fudge for XMLmind which doesn't have this built in -->
<!ENTITY nbsp "&#160;">
]> ]>
<rfc xmlns:xi="http://www.w3.org/2001/XInclude" category="std" obsoletes="793, 8
<!-- Extra statement used by XSLT processors to control the output style. --> 79, 2873, 6093, 6429, 6528, 6691" updates="1011, 1122, 5961" ipr="pre5378Trust20
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?> 0902" number="9293" docName="draft-ietf-tcpm-rfc793bis-28" submissionType="IETF"
consensus="true" xml:lang="en" tocInclude="true" tocDepth="3" symRefs="false" s
<!-- Processing Instructions can be placed here but if you are editing ortRefs="true" version="3">
with XMLmind (and maybe other XML editors) they are better placed
after the rfc element start tag as shown below. -->
<!-- Information about the document.
category values: std, bcp, info, exp, and historic
For Internet-Drafts, specify attribute "ipr".
(ipr values are: full3667, noModification3667, noDerivatives3667),
Also for Internet-Drafts, can specify values for
attributes "docName" and, if relevant, "iprExtract". Note
that the value for iprExtract is the anchor attribute
value of a section (such as a MIB specification) that can be
extracted for separate publication, and is only
useful whenhe value of "ipr" is not "full3667". -->
<rfc
category="std"
obsoletes="793, 879, 2873, 6093, 6429, 6528, 6691"
updates="5961, 1011, 1122"
ipr="pre5378Trust200902"
docName="draft-ietf-tcpm-rfc793bis-28" >
<!-- Processing Instructions- PIs (for a complete list and description,
see file http://xml.resource.org/authoring/README.html and below... --
>
<!-- Some of the more generally applicable PIs that most I-Ds might want to
use -->
<!-- Try to enforce the ID-nits conventions and DTD validity -->
<?rfc strict="yes" ?>
<!-- Items used when reviewing the document -->
<?rfc comments="no" ?> <!-- Controls display of <cref> elements -->
<?rfc inline="no" ?> <!-- When no, put comments at end in comments sectio
n,
otherwise, put inline -->
<?rfc editing="no" ?> <!-- When yes, insert editing marks: editing marks c
onsist of a
string such as <29> printed in the blank line a
t the
beginning of each paragraph of text. -->
<!-- Create Table of Contents (ToC) and set some options for it.
Note the ToC may be omitted for very short documents,but idnits insists
on a ToC
if the document has more than 15 pages. -->
<?rfc toc="yes"?>
<?rfc tocompact="yes"?> <!-- If "yes" eliminates blank lines before main sect
ion entries. -->
<?rfc tocdepth="3"?> <!-- Sets the number of levels of sections/subsection
s... in ToC -->
<!-- Choose the options for the references.
Some like symbolic tags in the references (and citations) and others pr
efer
numbers. The RFC Editor always uses symbolic tags.
The tags used are the anchor attributes of the references. -->
<?rfc symrefs="no"?>
<?rfc sortrefs="yes" ?> <!-- If "yes", causes the references to be sorted in
order of tags.
This doesn't have any effect unless symrefs is
"yes" also. -->
<!-- These two save paper: Just setting compact to "yes" makes savings by no
t starting each
main section on a new page but does not omit the blank lines between li
st items.
If subcompact is also "yes" the blank lines between list items are also
omitted. -->
<?rfc compact="yes" ?>
<?rfc subcompact="no" ?>
<!-- end of list of popular I-D processing instructions -->
<!-- ***** FRONT MATTER ***** -->
<front> <front>
<!-- The abbreviated title is used in the page header - it is only necessary <title abbrev="TCP">Transmission Control Protocol (TCP)</title>
if the <seriesInfo name="STD" value="7" />
full title is longer than 42 characters --> <seriesInfo name="RFC" value="9293" />
<title abbrev="TCP Specification">Transmission Control Protocol (TCP) Specif <author fullname="Wesley M. Eddy" initials="W." surname="Eddy" role="editor"
ication</title> >
<!-- add 'role="editor"' below for the editors if appropriate -->
<author
fullname="Wesley M. Eddy"
initials="W."
surname="Eddy"
role="editor">
<!-- abbrev not needed but can be used for the header
if the full organization name is too long -->
<organization abbrev="MTI Systems">MTI Systems</organization> <organization abbrev="MTI Systems">MTI Systems</organization>
<address> <address>
<postal> <postal>
<!-- I've omitted my street address here --> <country>United States of America</country>
<street/> </postal>
<city/>
<!--
The IETF seems to meet once a year in Minneapolis,
so that's practically my US address. If so, I would
add the following elements:
<region>MN</region>
<code>55403</code>
However, if I lived in France, the <code> comes before the city.
xml2rfc
preserves the order of <city>, <region>, <code> and <country> el
ements in
output so that they can reflect any possible the national scheme
-->
<!-- The country element is supposed to contain an ISO3166 two l
etter country
code. -->
<country>US</country>
</postal>
<email>wes@mti-systems.com</email> <email>wes@mti-systems.com</email>
<!--
If I had a phone, fax machine, and a URI, I could add the following:
<phone>+1-408-555-1234</phone>
<facsimile>+1-555-911-9111</facsimile>
<uri>http://www.example.com/</uri>
-->
</address>
</author>
<!--
<author
fullname="Andre Oppermann"
initials="A."
surname="Oppermann">
<organization>FreeBSD</organization>
<address>
<email>andre@freebsd.org</email>
</address> </address>
</author> </author>
<date year="2022"/> <!-- month="March" is no longer necessary <date year="2022" month="August"/>
note also, day="30" is optional -->
<!-- WARNING: If the month and year are the current ones, xml2rfc will fill
in the day for
you. If only the year is specified, xml2rfc will fill in the current da
y and month
irrespective of the day. This silliness should be fixed in v1.31. -->
<!-- Meta-data Declarations -->
<!-- Notice the use of &amp; as an escape for & which would otherwise
start an entity declaration, whereas we want a literal &. -->
<area>Transport</area> <area>Transport</area>
<workgroup>TCPM</workgroup>
<!-- WG name at the upperleft corner of the doc, <keyword>TCP</keyword>
IETF fine for individual submissions. You can also <keyword>TCPM</keyword>
omit this element in which case in defaults to "Network Working Group" <keyword>transport layer</keyword>
- <keyword>internet transport</keyword>
a hangover from the ancient history of the IETF! -->
<workgroup>Internet Engineering Task Force</workgroup>
<!-- The DTD allows multiple area and workgroup elements but only the first
one has any
effect on output. -->
<!-- You can add <keyword/> elements here. They will be incorporated into H
TML output
files in a meta tag but they have no effect on text or nroff output. --
>
<abstract> <abstract>
<t>This document specifies the Transmission Control Protocol (TCP). TCP <t>This document specifies the Transmission Control Protocol (TCP). TCP i
is an important transport layer protocol in the Internet protocol stack, and ha s an important transport-layer protocol in the Internet protocol stack, and it h
s continuously evolved over decades of use and growth of the Internet. Over thi as continuously evolved over decades of use and growth of the Internet. Over th
s time, a number of changes have been made to TCP as it was specified in RFC 793 is time, a number of changes have been made to TCP as it was specified in RFC 79
, though these have only been documented in a piecemeal fashion. This document 3, though these have only been documented in a piecemeal fashion. This document
collects and brings those changes together with the protocol specification from collects and brings those changes together with the protocol specification from
RFC 793. This document obsoletes RFC 793, as well as RFCs 879, 2873, 6093, 6429 RFC 793. This document obsoletes RFC 793, as well as RFCs 879, 2873, 6093, 642
, 6528, and 6691 that updated parts of RFC 793. It updates RFCs 1011 and 1122, 9, 6528, and 6691 that updated parts of RFC 793. It updates RFCs 1011 and 1122,
and should be considered as a replacement for the portions of those document dea and it should be considered as a replacement for the portions of those document
ling with TCP requirements. It also updates RFC 5961 by adding a small clarific s dealing with TCP requirements. It also updates RFC 5961 by adding a small cla
ation in reset handling while in the SYN-RECEIVED state. The TCP header control rification in reset handling while in the SYN-RECEIVED state. The TCP header co
bits from RFC 793 have also been updated based on RFC 3168.</t> ntrol bits from RFC 793 have also been updated based on RFC 3168.</t>
<t>RFC EDITOR NOTE: If approved for publication as an RFC, this should b
e marked additionally as "STD: 7" and replace RFC 793 in that role.</t>
</abstract> </abstract>
</front>
</front> <middle>
<section numbered="true" toc="default">
<middle> <name>Purpose and Scope</name>
<section title="Purpose and Scope"> <t>
<t> In 1981, <xref target="RFC0793" format="default">RFC 793</xref> was rele
In 1981, <xref target="RFC0793">RFC 793</xref> was released, documenting ased, documenting the Transmission Control Protocol (TCP) and replacing earlier
the Transmission Control Protocol (TCP), and replacing earlier specifications f published specifications for TCP.
or TCP that had been published in the past. </t>
</t> <t>
<t> Since then, TCP has been widely implemented, and it has been used as a t
Since then, TCP has been widely implemented, and has been used as a tran ransport protocol for numerous applications on the Internet.
sport protocol for numerous applications on the Internet. </t>
</t> <t>
<t> For several decades, RFC 793 plus a number of other documents have combi
For several decades, RFC 793 plus a number of other documents have combi ned to serve as the core specification for TCP <xref target="RFC7414" format="de
ned to serve as the core specification for TCP <xref target="RFC7414"></xref>. fault"/>. Over time, a number of errata have been filed against RFC 793. There
Over time, a number of errata have been filed against RFC 793. There have also have also been deficiencies found and resolved in security, performance, and ma
been deficiencies found and resolved in security, performance, and many other as ny other aspects. The number of enhancements has grown over time across many se
pects. The number of enhancements has grown over time across many separate docu parate documents. These were never accumulated together into a comprehensive up
ments. These were never accumulated together into a comprehensive update to the date to the base specification.
base specification. </t>
</t> <t>
<t> The purpose of this document is to bring together all of the IETF Standa
The purpose of this document is to bring together all of the IETF Standa rds Track changes and other clarifications that have been made to the base TCP f
rds Track changes and other clarifications that have been made to the base TCP f unctional specification (RFC 793) and to unify them into an updated version of t
unctional specification and unify them into an updated version of RFC 793. he specification.
</t> </t>
<t> <t>
Some companion documents are referenced for important algorithms that are Some companion documents are referenced for important algorithms that are
used by TCP (e.g. for congestion control), but have not been completely include used by TCP (e.g., for congestion control) but have not been completely include
d in this document. This is a conscious choice, as this base specification can d in this document. This is a conscious choice, as this base specification can
be used with multiple additional algorithms that are developed and incorporated be used with multiple additional algorithms that are developed and incorporated
separately. This document focuses on the common basis all TCP implementations mu separately. This document focuses on the common basis that all TCP implementatio
st support in order to interoperate. Since some additional TCP features have be ns must support in order to interoperate. Since some additional TCP features ha
come quite complicated themselves (e.g. advanced loss recovery and congestion co ve become quite complicated themselves (e.g., advanced loss recovery and congest
ntrol), future companion documents may attempt to similarly bring these together ion control), future companion documents may attempt to similarly bring these to
. gether.
</t> </t>
<t> <t>
In addition to the protocol specification that describes the TCP segment In addition to the protocol specification that describes the TCP segment
format, generation, and processing rules that are to be implemented in code, RF format, generation, and processing rules that are to be implemented in code, RF
C 793 and other updates also contain informative and descriptive text for reader C 793 and other updates also contain informative and descriptive text for reader
s to understand aspects of the protocol design and operation. This document doe s to understand aspects of the protocol design and operation. This document doe
s not attempt to alter or update this informative text, and is focused only on u s not attempt to alter or update this informative text and is focused only on up
pdating the normative protocol specification. This document preserves reference dating the normative protocol specification. This document preserves references
s to the documentation containing the important explanations and rationale, wher to the documentation containing the important explanations and rationale, where
e appropriate. appropriate.
</t> </t>
<t> <t>
This document is intended to be useful both in checking existing TCP imp lementations for conformance purposes, as well as in writing new implementations . This document is intended to be useful both in checking existing TCP imp lementations for conformance purposes, as well as in writing new implementations .
</t> </t>
</section> </section>
<section title="Introduction"> <section numbered="true" toc="default">
<t>RFC 793 contains a discussion of the TCP design goals and provides ex <name>Introduction</name>
amples of its operation, including examples of connection establishment, connect <t>RFC 793 contains a discussion of the TCP design goals and provides exam
ion termination, and packet retransmission to repair losses. ples of its operation, including examples of connection establishment, connectio
</t> n termination, and packet retransmission to repair losses.
<t> </t>
This document describes the basic functionality expected in modern TCP i <t>
mplementations, and replaces the protocol specification in RFC 793. It does not This document describes the basic functionality expected in modern TCP i
replicate or attempt to update the introduction and philosophy content in Secti mplementations and replaces the protocol specification in RFC 793. It does not
ons 1 and 2 of RFC 793. Other documents are referenced to provide explanation o replicate or attempt to update the introduction and philosophy content in Sectio
f the theory of operation, rationale, and detailed discussion of design decision ns 1 and 2 of RFC 793. Other documents are referenced to provide explanations o
s. This document only focuses on the normative behavior of the protocol. f the theory of operation, rationale, and detailed discussion of design decision
</t> s. This document only focuses on the normative behavior of the protocol.
<t> </t>
The &quot;TCP Roadmap&quot; <xref target="RFC7414"/> provides a more exte <t>
nsive guide to the RFCs that define TCP and describe various important algorithm The "TCP Roadmap" <xref target="RFC7414" format="default"/> provides a mo
s. The TCP Roadmap contains sections on strongly encouraged enhancements that im re extensive guide to the RFCs that define TCP and describe various important al
prove performance and other aspects of TCP beyond the basic operation specified gorithms. The TCP Roadmap contains sections on strongly encouraged enhancements
in this document. As one example, implementing congestion control (e.g. <xref t that improve performance and other aspects of TCP beyond the basic operation spe
arget="RFC5681"/>) is a TCP requirement, but is a complex topic on its own, and cified in this document. As one example, implementing congestion control (e.g.,
not described in detail in this document, as there are many options and possibil <xref target="RFC5681" format="default"/>) is a TCP requirement, but it is a co
ities that do not impact basic interoperability. Similarly, most TCP implementa mplex topic on its own and not described in detail in this document, as there ar
tions today include the high-performance extensions in <xref target="RFC7323"/>, e many options and possibilities that do not impact basic interoperability. Sim
but these are not strictly required or discussed in this document. Multipath c ilarly, most TCP implementations today include the high-performance extensions i
onsiderations for TCP are also specified separately in <xref target="RFC8684"/>. n <xref target="RFC7323" format="default"/>, but these are not strictly required
</t> or discussed in this document. Multipath considerations for TCP are also speci
fied separately in <xref target="RFC8684" format="default"/>.
</t>
<t>
A list of changes from RFC 793 is contained in <xref target="changes" fo
rmat="default"/>.
</t>
<section numbered="true" toc="default">
<name>Requirements Language</name>
<t> <t>
A list of changes from RFC 793 is contained in <xref target="changes"/>.
</t>
<section title="Requirements Language"> The key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>", "<bcp14>REQUIRED
<t> </bcp14>",
"<bcp14>SHALL</bcp14>", "<bcp14>SHALL NOT</bcp14>", "<bcp14>SHOULD</bcp14>", "<b
The key words &quot;MUST&quot;, &quot;MUST NOT&quot;, &quot;REQUIRED&quot;, cp14>SHOULD
&quot;SHALL&quot;, &quot;SHALL NOT&quot;, &quot;SHOULD&quot;, &quot;SHOULD NOT</bcp14>", "<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>",
NOT&quot;, &quot;RECOMMENDED&quot;, &quot;NOT RECOMMENDED&quot;, "<bcp14>MAY</bcp14>", and "<bcp14>OPTIONAL</bcp14>" in this document are to be
&quot;MAY&quot;, and &quot;OPTIONAL&quot; in this document are to be interpreted as described in BCP&nbsp;14 <xref target="RFC2119"/> <xref target="R
interpreted as described in BCP 14 <xref target="RFC2119"/><xref FC8174"/> when, and only when, they appear in all capitals, as shown
target="RFC8174"/> when, and only when, they appear in all capitals, as shown
here. here.
</t> </t>
<t> <t>
Each use of RFC 2119 keywords in the document is individually labeled and Each use of RFC 2119 keywords in the document is individually labeled and
referenced in <xref target="reqs"/> that summarizes implementation referenced in <xref target="reqs" format="default"/>, which summarizes implement ation
requirements. requirements.
</t> </t>
<t> <t>
Sentences using &quot;MUST&quot; are labeled as &quot;MUST-X&quot; with X being Sentences using "<bcp14>MUST</bcp14>" are labeled as "MUST-X" with X being
a numeric identifier enabling the requirement to be located easily when a numeric identifier enabling the requirement to be located easily when
referenced from <xref target="reqs"/>. referenced from <xref target="reqs" format="default"/>.
</t> </t>
<t> <t>
Similarly, sentences using &quot;SHOULD&quot; are labeled with Similarly, sentences using "<bcp14>SHOULD</bcp14>" are labeled with
&quot;SHLD-X&quot;, &quot;MAY&quot; with &quot;MAY-X&quot;, and "SHLD-X", "<bcp14>MAY</bcp14>" with "MAY-X", and
&quot;RECOMMENDED&quot; with &quot;REC-X&quot;. "<bcp14>RECOMMENDED</bcp14>" with "REC-X".
</t> </t>
<t> <t>
For the purposes of this labeling, &quot;SHOULD NOT&quot; and &quot;MUST For the purposes of this labeling, "<bcp14>SHOULD NOT</bcp14>" and "<bcp14>MUST
NOT&quot; are labeled the same as &quot;SHOULD&quot; and &quot;MUST&quot; NOT</bcp14>" are labeled the same as "<bcp14>SHOULD</bcp14>" and "<bcp14>MUST</b
cp14>"
instances. instances.
</t> </t>
</section>
</section> <section numbered="true" toc="default">
<name>Key TCP Concepts</name>
<section title="Key TCP Concepts"> <t>TCP provides a reliable, in-order, byte-stream service to application
s.</t>
<t>TCP provides a reliable, in-order, byte-stream service to applications.</t> <t>The application byte-stream is conveyed over the network via TCP segm
ents,
<t>The application byte-stream is conveyed over the network via TCP segments,
with each TCP segment sent as an Internet Protocol (IP) datagram.</t> with each TCP segment sent as an Internet Protocol (IP) datagram.</t>
<t>TCP reliability consists of detecting packet losses (via sequence num
<t>TCP reliability consists of detecting packet losses (via sequence numbers) bers)
and errors (via per-segment checksums), as well as correction and errors (via per-segment checksums), as well as correction
via retransmission.</t> via retransmission.</t>
<t>TCP supports unicast delivery of data. There are anycast application
<t>TCP supports unicast delivery of data. Anycast applications exist that s that
successfully use TCP without modifications, though there is some risk of can successfully use TCP without modifications, though there is some risk of
instability due to changes of lower-layer forwarding behavior <xref target="RFC7 instability due to changes of lower-layer forwarding behavior <xref target="RFC7
094"/>.</t> 094" format="default"/>.</t>
<t>TCP is connection oriented, though it does not inherently include a l
<t>TCP is connection-oriented, though does not inherently include a liveness iveness
detection capability.</t> detection capability.</t>
<t>Data flow is supported bidirectionally over TCP connections, though
<t>Data flow is supported bidirectionally over TCP connections, though
applications are free to send data only unidirectionally, if they so applications are free to send data only unidirectionally, if they so
choose.</t> choose.</t>
<t>TCP uses port numbers to identify application services and to multipl
<t>TCP uses port numbers to identify application services and to multiplex ex
distinct flows between hosts.</t> distinct flows between hosts.</t>
<t>A more detailed description of TCP features compared to other transpo
<t>A more detailed description of TCP features compared to other transport rt
protocols can be found in Section 3.1 of <xref target="RFC8095"/>. Further protocols can be found in <xref target="RFC8095" section="3.1" sectionFormat="of
" format="default"/>. Further
description of the motivations for developing TCP and its role in the Internet description of the motivations for developing TCP and its role in the Internet
protocol stack can be found in Section 2 of <xref target="RFC0793"/> and earlier versions protocol stack can be found in <xref target="RFC0793" section="2" sectionFormat= "of" format="default"/> and earlier versions
of the TCP specification.</t> of the TCP specification.</t>
</section>
</section>
</section> </section>
<section numbered="true" toc="default">
<section title="Functional Specification"> <name>Functional Specification</name>
<section numbered="true" toc="default">
<section title="Header Format"> <name>Header Format</name>
<t>
<t>
TCP segments are sent as internet datagrams. The Internet Protocol (IP) TCP segments are sent as internet datagrams. The Internet Protocol (IP)
header carries several information fields, including the source and header carries several information fields, including the source and
destination host addresses <xref target="RFC0791"/> <xref target="RFC8200"/>. destination host addresses <xref target="RFC0791" format="default"/> <xref tar
A TCP header follows the IP get="RFC8200" format="default"/>. A TCP header follows the IP
headers, supplying information specific to the TCP protocol. This headers, supplying information specific to TCP. This
division allows for the existence of host level protocols other than division allows for the existence of host-level protocols other than
TCP. In early development of the Internet suite of protocols, the IP header f TCP. In the early development of the Internet suite of protocols, the IP head
ields had been a part of TCP. er fields had been a part of TCP.
</t> </t>
<t> <t>
This document describes the TCP protocol. The TCP protocol uses TCP Headers. This document describes TCP, which uses TCP headers.
</t> </t>
<t>A TCP Header, followed by any user data in the segment, is formatted as follo <t>A TCP header, followed by any user data in the segment, is formatted
ws, using the style from <xref target="I-D.mcquistin-augmented-ascii-diagrams"/> as follows, using the style from <xref target="I-D.mcquistin-augmented-ascii-dia
:</t> grams" format="default"/>:</t>
<figure anchor="header_format" title="TCP Header Format"> <figure anchor="header_format">
<artwork> <name>TCP Header Format</name>
<artwork name="" type="" align="left" alt=""><![CDATA[
0 1 2 3 0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Source Port | Destination Port | | Source Port | Destination Port |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Sequence Number | | Sequence Number |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Acknowledgment Number | | Acknowledgment Number |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Data | |C|E|U|A|P|R|S|F| | | Data | |C|E|U|A|P|R|S|F| |
skipping to change at line 399 skipping to change at line 180
| Checksum | Urgent Pointer | | Checksum | Urgent Pointer |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| [Options] | | [Options] |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| : | :
: Data : : Data :
: | : |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Note that one tick mark represents one bit position. Note that one tick mark represents one bit position.
</artwork> ]]></artwork>
</figure> </figure>
<t> <t>
where: where:
<list style="hanging" hangIndent="2"> </t>
<t hangText="Source Port: 16 bits."> <dl newline="false" spacing="normal" indent="2">
<vspace /> <dt>Source Port:</dt>
<vspace /> <dd>
<t>
16 bits
</t>
<t>
The source port number. The source port number.
</t> </t>
<t hangText="Destination Port: 16 bits."> </dd>
<vspace /> <dt>Destination Port:</dt>
<vspace /> <dd>
<t>
16 bits
</t>
<t>
The destination port number. The destination port number.
</t> </t>
<t hangText="Sequence Number: 32 bits."> </dd>
<vspace /> <dt>Sequence Number:</dt>
<vspace /> <dd>
<t>
32 bits
</t>
<t>
The sequence number of the first data octet in this segment (except The sequence number of the first data octet in this segment (except
when the SYN flag is set). If SYN is set the sequence number is the when the SYN flag is set). If SYN is set, the sequence number is the
initial sequence number (ISN) and the first data octet is ISN+1. initial sequence number (ISN) and the first data octet is ISN+1.
</t> </t>
<t hangText="Acknowledgment Number: 32 bits."> </dd>
<vspace /> <dt>Acknowledgment Number:</dt>
<vspace /> <dd>
<t>
32 bits
</t>
<t>
If the ACK control bit is set, this field contains the value of the If the ACK control bit is set, this field contains the value of the
next sequence number the sender of the segment is expecting to next sequence number the sender of the segment is expecting to
receive. Once a connection is established, this is always sent. receive. Once a connection is established, this is always sent.
</t> </t>
<t hangText="Data Offset (DOffset): 4 bits."> </dd>
<vspace /> <dt>Data Offset (DOffset):</dt>
<vspace /> <dd>
The number of 32 bit words in the TCP Header. This indicates where <t>
4 bits
</t>
<t>
The number of 32-bit words in the TCP header. This indicates where
the data begins. The TCP header (even one including options) is an the data begins. The TCP header (even one including options) is an
integer multiple of 32 bits long. integer multiple of 32 bits long.
</t> </t>
<t hangText="Reserved (Rsrvd): 4 bits."> </dd>
<vspace /> <dt>Reserved (Rsrvd):</dt>
<vspace /> <dd>
A set of control bits reserved for future use. Must be zero in generated se <t>
gments and must be ignored in received segments, if corresponding future feature 4 bits
s are unimplemented by the sending or receiving host. </t>
</t> <t>
<t> A set of control bits reserved for future use. Must be zero in generated se
The control bits are also known as &quot;flags&quot;. Assignment is managed gments and must be ignored in received segments if the corresponding future feat
by IANA from the &quot;TCP Header Flags&quot; registry <xref target="header-flag ures are not implemented by the sending or receiving host.
s-registry"/>. The currently assigned control bits are CWR, ECE, URG, ACK, PSH, </t>
RST, SYN, and FIN. </dd>
</t> <dt>Control bits:</dt>
<t hangText="CWR: 1 bit."> <dd>
<vspace /> <t>
<vspace /> The control bits are also known as "flags". Assignment is managed by IANA fr
Congestion Window Reduced (see <xref target="RFC3168"/>). om the "TCP Header Flags" registry <xref target="TCP-parameters-registry" format
</t> ="default"/>. The currently assigned control bits are CWR, ECE, URG, ACK, PSH,
<t hangText="ECE: 1 bit."> RST, SYN, and FIN.
<vspace /> </t>
<vspace /> <dl newline="false" spacing="normal" indent="4">
ECN-Echo (see <xref target="RFC3168"/>). <dt>CWR:</dt>
</t> <dd>
<t hangText="URG: 1 bit."> <t>
<vspace /> 1 bit
<vspace /> </t>
Urgent Pointer field is significant. <t>
</t> Congestion Window Reduced (see <xref target="RFC3168" format="default"/>).
<t hangText="ACK: 1 bit."> </t>
<vspace /> </dd>
<vspace /> <dt>ECE:</dt>
<dd>
<t>
1 bit
</t>
<t>
ECN-Echo (see <xref target="RFC3168" format="default"/>).
</t>
</dd>
<dt>URG:</dt>
<dd>
<t>
1 bit
</t>
<t>
Urgent pointer field is significant.
</t>
</dd>
<dt>ACK:</dt>
<dd>
<t>
1 bit
</t>
<t>
Acknowledgment field is significant. Acknowledgment field is significant.
</t> </t>
<t hangText="PSH: 1 bit."> </dd>
<vspace /> <dt>PSH:</dt>
<vspace /> <dd>
Push Function (see the Send Call description in <xref target="user-api"/>). <t>
</t> 1 bit
<t hangText="RST: 1 bit."> </t>
<vspace /> <t>
<vspace /> Push function (see the Send Call description in <xref target="user-api" form
at="default"/>).
</t>
</dd>
<dt>RST:</dt>
<dd>
<t>
1 bit
</t>
<t>
Reset the connection. Reset the connection.
</t> </t>
<t hangText="SYN: 1 bit."> </dd>
<vspace /> <dt>SYN:</dt>
<vspace /> <dd>
<t>
1 bit
</t>
<t>
Synchronize sequence numbers. Synchronize sequence numbers.
</t> </t>
<t hangText="FIN: 1 bit."> </dd>
<vspace /> <dt>FIN:</dt>
<vspace /> <dd>
<t>
1 bit
</t>
<t>
No more data from sender. No more data from sender.
</t> </t>
<t hangText="Window: 16 bits."> </dd>
<vspace /> </dl>
<vspace /> </dd>
<dt>Window:</dt>
<dd>
<t>
16 bits
</t>
<t>
The number of data octets beginning with the one indicated in the The number of data octets beginning with the one indicated in the
acknowledgment field that the sender of this segment is willing to acknowledgment field that the sender of this segment is willing to
accept. The value is shifted when the Window Scaling extension is used accept. The value is shifted when the window scaling extension is used
<xref target="RFC7323"/>. <xref target="RFC7323" format="default"/>.
<vspace /> </t>
<vspace /> <t>
The window size MUST be treated as an unsigned number, or else The window size <bcp14>MUST</bcp14> be treated as an unsigned number, or els
e
large window sizes will appear like negative windows and TCP will large window sizes will appear like negative windows and TCP will
not work (MUST-1). It is RECOMMENDED that implementations will reserve not work (MUST-1). It is <bcp14>RECOMMENDED</bcp14> that implementations wi ll reserve
32-bit fields for the send and receive window sizes in the connection 32-bit fields for the send and receive window sizes in the connection
record and do all window computations with 32 bits (REC-1). record and do all window computations with 32 bits (REC-1).
</t> </t>
<t hangText="Checksum: 16 bits."> </dd>
<vspace /> <dt>Checksum:</dt>
<vspace /> <dd>
The checksum field is the 16 bit ones' complement of the ones' <t>
complement sum of all 16 bit words in the header and text. The checksum com 16 bits
putation needs to ensure the 16-bit alignment of the data being summed. If a </t>
<t>
The checksum field is the 16-bit ones' complement of the ones'
complement sum of all 16-bit words in the header and text. The checksum com
putation needs to ensure the 16-bit alignment of the data being summed. If a
segment contains an odd number of header and text octets, alignment can be a chieved by segment contains an odd number of header and text octets, alignment can be a chieved by
padding the last octet with zeros on its right to padding the last octet with zeros on its right to
form a 16 bit word for checksum purposes. The pad is not form a 16-bit word for checksum purposes. The pad is not
transmitted as part of the segment. While computing the checksum, transmitted as part of the segment. While computing the checksum,
the checksum field itself is replaced with zeros. the checksum field itself is replaced with zeros.
</t> </t>
<t> <t>
The checksum also covers a pseudo header (<xref target="v4pseudo"/>) concept The checksum also covers a pseudo-header (<xref target="v4pseudo" format="de
ually prefixed to the TCP fault"/>) conceptually prefixed to the TCP
header. The pseudo header is 96 bits for IPv4 and 320 bits for IPv6. header. The pseudo-header is 96 bits for IPv4 and 320 bits for IPv6.
Including the pseudo header in the checksum gives the TCP connection Including the pseudo-header in the checksum gives the TCP connection
protection against misrouted segments. This information is carried in IP he aders protection against misrouted segments. This information is carried in IP he aders
and is transferred across the TCP/Network interface in the arguments or and is transferred across the TCP/network interface in the arguments or
results of calls by the TCP implementation on the IP layer. results of calls by the TCP implementation on the IP layer.
<figure anchor="v4pseudo" title="IPv4 Pseudo Header"><artwork> </t>
<figure anchor="v4pseudo">
<name>IPv4 Pseudo-header</name>
<artwork name="" type="" align="left" alt=""><![CDATA[
+--------+--------+--------+--------+ +--------+--------+--------+--------+
| Source Address | | Source Address |
+--------+--------+--------+--------+ +--------+--------+--------+--------+
| Destination Address | | Destination Address |
+--------+--------+--------+--------+ +--------+--------+--------+--------+
| zero | PTCL | TCP Length | | zero | PTCL | TCP Length |
+--------+--------+--------+--------+ +--------+--------+--------+--------+
</artwork></figure> ]]></artwork>
<list style="hanging" hangIndent="2"> </figure>
<t hangText="Pseudo header components for IPv4:"> <dl newline="true" spacing="normal" indent="2">
<vspace /> <dt>Pseudo-header components for IPv4:</dt>
<vspace /> <dd>
<list> <dl newline="false" spacing="normal">
<t>Source Address: the IPv4 source address in network byte order</t> <dt>Source Address:</dt>
<t>Destination Address: the IPv4 destination address in network byte order< <dd>the IPv4 source address in network byte order</dd>
/t> <dt>Destination Address:</dt>
<t>zero: bits set to zero</t> <dd>the IPv4 destination address in network byte order</dd>
<t>PTCL: the protocol number from the IP header</t> <dt>zero:</dt>
<t>TCP Length: <dd>bits set to zero</dd>
<dt>PTCL:</dt>
<dd>the protocol number from the IP header</dd>
<dt>TCP Length:</dt>
<dd>
the TCP header length plus the data length in the TCP header length plus the data length in
octets (this is not an explicitly transmitted quantity, but is octets (this is not an explicitly transmitted quantity but is
computed), and it does not count the 12 octets of the pseudo computed), and it does not count the 12 octets of the pseudo-header.</dd>
header.</t> </dl>
</list> </dd>
</t> </dl>
<t> <t>
For IPv6, the pseudo header is defined in Section 8.1 of RFC 8200 <xref t For IPv6, the pseudo-header is defined in Section <xref target="RFC8200"
arget="RFC8200"/>, and contains the IPv6 Source Address and Destination Address, section="8.1" sectionFormat="bare" format="default"/> of RFC 8200 <xref target="
an Upper Layer Packet Length (a 32-bit value otherwise equivalent to TCP Length RFC8200" format="default"/> and contains the IPv6 Source Address and Destination
in the IPv4 pseudo header), three bytes of zero-padding, and a Next Header valu Address, an Upper-Layer Packet Length (a 32-bit value otherwise equivalent to T
e (differing from the IPv6 header value in the case of extension headers present CP Length in the IPv4 pseudo-header), three bytes of zero padding, and a Next He
in between IPv6 and TCP). ader value, which differs from the IPv6 header value if there are extension head
ers present between IPv6 and TCP.
</t> </t>
<t> <t>
The TCP checksum is never optional. The sender MUST generate it (MUST-2) The TCP checksum is never optional. The sender <bcp14>MUST</bcp14> gener
and the receiver MUST check it (MUST-3). ate it (MUST-2)
and the receiver <bcp14>MUST</bcp14> check it (MUST-3).
</t> </t>
</list> </dd>
</t> <dt>Urgent Pointer:</dt>
<dd>
<t hangText="Urgent Pointer: 16 bits."> <t>
<vspace /> 16 bits
<vspace /> </t>
<t>
This field communicates the current value of the urgent pointer as a This field communicates the current value of the urgent pointer as a
positive offset from the sequence number in this segment. The positive offset from the sequence number in this segment. The
urgent pointer points to the sequence number of the octet following the urge nt data. This field is only to be interpreted in segments with urgent pointer points to the sequence number of the octet following the urge nt data. This field is only to be interpreted in segments with
the URG control bit set. the URG control bit set.
</t> </t>
<t hangText="Options: [TCP Option]; size(Options) == (DOffset-5)*32; present o </dd>
nly when DOffset > 5. Note that this size expression also includes any padding <dt>Options:</dt>
trailing the actual options present."> <dd>
<vspace /> <t>
<vspace /> [TCP Option]; size(Options) == (DOffset-5)*32; present only when DOffset &gt
; 5.
Note that this size expression also includes any padding trailing the actual
options present.
</t>
<t>
Options may occupy space at the end of the TCP header and are a Options may occupy space at the end of the TCP header and are a
multiple of 8 bits in length. All options are included in the multiple of 8 bits in length. All options are included in the
checksum. An option may begin on any octet boundary. There are two checksum. An option may begin on any octet boundary. There are two
cases for the format of an option: cases for the format of an option:
<list> </t>
<t>Case 1: A single octet of option-kind.</t> <dl newline="false" spacing="normal">
<dt>Case 1:</dt>
<t>Case 2: An octet of option-kind (Kind), an octet of option-length, and <dd>A single octet of option-kind.</dd>
the actual option-data octets.</t> <dt>Case 2:</dt>
</list> <dd>An octet of option-kind (Kind), an octet of option-length, and
</t> the actual option-data octets.</dd>
<t> </dl>
<t>
The option-length counts the two octets of option-kind and The option-length counts the two octets of option-kind and
option-length as well as the option-data octets. option-length as well as the option-data octets.
</t> </t>
<t> <t>
Note that the list of options may be shorter than the data offset Note that the list of options may be shorter than the Data Offset
field might imply. The content of the header beyond the field might imply. The content of the header beyond the
End-of-Option option MUST be header padding of zeros (MUST-69). End of Option List Option <bcp14>MUST</bcp14> be header padding of zeros (MU ST-69).
</t> </t>
<t> <t>
The list of all currently defined options is managed by IANA <xref target="T The list of all currently defined options is managed by IANA <xref target="T
CP-parameters-registry"/>, and each option is defined in other RFCs, as indicate CP-parameters-registry" format="default"/>, and each option is defined in other
d there. That set includes experimental options that can be extended to support RFCs, as indicated there. That set includes experimental options that can be ex
multiple concurrent usages <xref target="RFC6994"/>.</t> tended to support multiple concurrent usages <xref target="RFC6994" format="defa
<t> ult"/>.</t>
A given TCP implementation can support any currently defined options, but the <t>
following options MUST be supported (MUST-4 - note Maximum Segment Size option A given TCP implementation can support any currently defined options, but the
support is also part of MUST-19 in <xref target="pmtud"/>):</t> following options <bcp14>MUST</bcp14> be supported (MUST-4 -- note Maximum Segm
<t> ent Size Option support is also part of MUST-14 in <xref target="mss" format="de
<figure><artwork> fault"/>):</t>
Kind Length Meaning <table>
---- ------ ------- <name>Mandatory Option Set</name>
0 - End of option list. <thead>
1 - No-Operation. <tr>
2 4 Maximum Segment Size. <th>Kind</th>
</artwork></figure></t> <th>Length</th>
<t> <th>Meaning</th>
These options are specified in detail in <xref target="Option-Definitions" / </tr>
>.<vspace/><vspace/> </thead>
A TCP implementation MUST be able to receive a TCP option in any segment (MU <tbody>
ST-5).<vspace /><vspace /> <tr>
A TCP implementation MUST (MUST-6) ignore without error any TCP option it do <td>0</td>
es not <td>-</td>
<td>End of Option List Option.</td>
</tr>
<tr>
<td>1</td>
<td>-</td>
<td>No-Operation.</td>
</tr>
<tr>
<td>2</td>
<td>4</td>
<td>Maximum Segment Size.</td>
</tr>
</tbody>
</table>
<t>
These options are specified in detail in <xref target="Option-Definitions" f
ormat="default"/>.</t>
<t>
A TCP implementation <bcp14>MUST</bcp14> be able to receive a TCP Option in
any segment (MUST-5).</t>
<t>
A TCP implementation <bcp14>MUST</bcp14> (MUST-6) ignore without error any T
CP Option it does not
implement, assuming that the option has a length field. All implement, assuming that the option has a length field. All
TCP options except End of option list and No-Operation MUST have length fiel TCP Options except End of Option List Option (EOL) and No-Operation (NOP) <b
ds, including all future options (MUST-68). cp14>MUST</bcp14> have length fields, including all future options (MUST-68).
TCP implementations MUST be prepared to handle an illegal option length TCP implementations <bcp14>MUST</bcp14> be prepared to handle an illegal opt
ion length
(e.g., zero); a suggested procedure is to (e.g., zero); a suggested procedure is to
reset the connection and log the error cause (MUST-7). reset the connection and log the error cause (MUST-7).
</t> </t>
<t>Note: There is ongoing work to extend the space available for TCP O
<t>Note: There is ongoing work to extend the space available for TCP options, ptions, such as <xref target="I-D.ietf-tcpm-tcp-edo" format="default"/>.</t>
such as <xref target="I-D.ietf-tcpm-tcp-edo"/>.</t> </dd>
<dt>Data:</dt>
<t hangText="Data: variable length."> <dd>
<vspace /> <t>
<vspace /> variable length
</t>
<t>
User data carried by the TCP segment. User data carried by the TCP segment.
</t> </t>
</list> </dd>
</t> </dl>
</section> </section>
<section title="Specific Option Definitions" anchor="Option-Definitions"> <section anchor="Option-Definitions" numbered="true" toc="default">
<t> <name>Specific Option Definitions</name>
A TCP Option, in the mandatory option set, is one of: an End of Option L <t>
ist Option, a No-Operation Option, or a Maximum Segment Size Option. A TCP Option, in the mandatory option set, is one of an End of Option Li
</t> st Option, a No-Operation Option, or a Maximum Segment Size Option.
<t>An End of Option List Option is formatted as follows:</t> </t>
<figure><artwork> <t>An End of Option List Option is formatted as follows:</t>
<artwork name="" type="" align="left" alt=""><![CDATA[
0 0
0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
| 0 | | 0 |
+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
</artwork></figure> ]]></artwork>
<t> <t>
where: where:
<list style="hanging" hangIndent="2"> </t>
<t hangText="Kind: 1 byte; Kind == 0."> <dl newline="false" spacing="normal" indent="2">
<vspace /> <dt>Kind:</dt>
<vspace /> <dd>
<t>
1 byte; Kind == 0.
</t>
<t>
This option code indicates the end of the option list. This This option code indicates the end of the option list. This
might not coincide with the end of the TCP header according to might not coincide with the end of the TCP header according to
the Data Offset field. This is used at the end of all options, the Data Offset field. This is used at the end of all options,
not the end of each option, and need only be used if the end of not the end of each option, and need only be used if the end of
the options would not otherwise coincide with the end of the TCP the options would not otherwise coincide with the end of the TCP
header. header.
</t> </t>
</list> </dd>
</t> </dl>
<t>A No-Operation Option is formatted as follows:</t>
<t>A No-Operation Option is formatted as follows:</t> <artwork name="" type="" align="left" alt=""><![CDATA[
<figure><artwork>
0 0
0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
| 1 | | 1 |
+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
</artwork></figure> ]]></artwork>
<t>where: <t>where:
<list style="hanging" hangIndent="2"> </t>
<t hangText="Kind: 1 byte; Kind == 1."> <dl newline="false" spacing="normal" indent="2">
<vspace /> <dt>Kind:</dt>
<vspace /> <dd>
<t>
1 byte; Kind == 1.
</t>
<t>
This option code can be used between options, for example, to This option code can be used between options, for example, to
align the beginning of a subsequent option on a word boundary. align the beginning of a subsequent option on a word boundary.
There is no guarantee that senders will use this option, so There is no guarantee that senders will use this option, so
receivers MUST be prepared to process options even if they do receivers <bcp14>MUST</bcp14> be prepared to process options even if the y do
not begin on a word boundary (MUST-64). not begin on a word boundary (MUST-64).
</t> </t>
</list> </dd>
</t> </dl>
<t>A Maximum Segment Size Option is formatted as follows:</t>
<t>A Maximum Segment Size Option is formatted as follows:</t> <artwork name="" type="" align="left" alt=""><![CDATA[
<figure><artwork>
0 1 2 3 0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| 2 | Length | Maximum Segment Size (MSS) | | 2 | Length | Maximum Segment Size (MSS) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
</artwork></figure> ]]></artwork>
<t>where: <t>where:
<list style="hanging" hangIndent="2"> </t>
<t hangText="Kind: 1 byte; Kind == 2."> <dl newline="false" spacing="normal" indent="2">
<vspace /> <dt>Kind:</dt>
<vspace /> <dd>
<t>
1 byte; Kind == 2.
</t>
<t>
If this option is present, then it communicates the maximum If this option is present, then it communicates the maximum
receive segment size at the TCP endpoint that sends this segment. receive segment size at the TCP endpoint that sends this segment.
This value is limited by the IP reassembly limit. This field may be sen t in the initial connection request This value is limited by the IP reassembly limit. This field may be sen t in the initial connection request
(i.e., in segments with the SYN control bit set) and MUST NOT (i.e., in segments with the SYN control bit set) and <bcp14>MUST NOT</bc p14>
be sent in other segments (MUST-65). If this be sent in other segments (MUST-65). If this
option is not used, any segment size is allowed. option is not used, any segment size is allowed.
A more complete description of this option is provided in <xref target=" A more complete description of this option is provided in <xref target="
mss"/>. mss" format="default"/>.
</t> </t>
<t hangText="Length: 1 byte; Length == 4."> </dd>
<vspace /> <dt>Length:</dt>
<vspace /> <dd>
<t>
1 byte; Length == 4.
</t>
<t>
Length of the option in bytes. Length of the option in bytes.
</t> </t>
<t hangText="Maximum Segment Size (MSS): 2 bytes."> </dd>
<vspace /> <dt>Maximum Segment Size (MSS):</dt>
<vspace /> <dd>
<t>
2 bytes.
</t>
<t>
The maximum receive segment size at the TCP endpoint that sends this seg ment. The maximum receive segment size at the TCP endpoint that sends this seg ment.
</t> </t>
</list> </dd>
</t> </dl>
<section numbered="true" toc="default">
<section title="Other Common Options"> <name>Other Common Options</name>
<t> <t>
Additional RFCs define some other commonly used options that are recommended to Additional RFCs define some other commonly used options that are recommended to
implement for high performance, but not necessary for basic TCP interoperability implement for high performance but are not necessary for basic TCP interoperabil
. These are the TCP Selective Acknowledgement (SACK) option <xref target="RFC20 ity. These are the TCP Selective Acknowledgment (SACK) Option <xref target="RFC
18"/><xref target="RFC2883"/>, TCP Timestamp (TS) option <xref target="RFC7323"/ 2018" format="default"/> <xref target="RFC2883" format="default"/>, TCP Timestam
>, and TCP Window Scaling (WS) option <xref target="RFC7323"/>. p (TS) Option <xref target="RFC7323" format="default"/>, and TCP Window Scale (W
</t> S) Option <xref target="RFC7323" format="default"/>.
</section> </t>
</section>
<section title="Experimental TCP Options"> <section numbered="true" toc="default">
<t> <name>Experimental TCP Options</name>
Experimental TCP option values are defined in <xref target="RFC4727"/>, and <xre <t>
f target="RFC6994"/> describes the current recommended usage for these experimen Experimental TCP Option values are defined in <xref target="RFC4727" format="def
tal values. ault"/>, and <xref target="RFC6994" format="default"/> describes the current rec
</t> ommended usage for these experimental values.
</section> </t>
</section> </section>
<section title="TCP Terminology Overview"> </section>
<t> <section numbered="true" toc="default">
This section includes an overview of key terms needed to understand the detailed <name>TCP Terminology Overview</name>
protocol operation in the rest of the document. There is a glossary of terms i <t>
n <xref target="glossary"/>. This section includes an overview of key terms needed to understand the detailed
protocol operation in the rest of the document. There is a glossary of terms i
n <xref target="glossary" format="default"/>.
</t> </t>
<section title="Key Connection State Variables"> <section numbered="true" toc="default">
<t> <name>Key Connection State Variables</name>
Before we can discuss very much about the operation of the TCP implementation <t>
we need Before we can discuss the operation of the TCP implementation in detail, we ne
ed
to introduce some detailed terminology. The maintenance of a TCP to introduce some detailed terminology. The maintenance of a TCP
connection requires maintaining state for several variables. We conceive connection requires maintaining state for several variables. We conceive
of these variables being stored in a connection record called a of these variables being stored in a connection record called a
Transmission Control Block or TCB. Among the variables stored in the Transmission Control Block or TCB. Among the variables stored in the
TCB are the local and remote IP addresses and port numbers, the IP security le TCB are the local and remote IP addresses and port numbers, the IP security le
vel and compartment vel, and compartment
of the connection (see <xref target="seccomp"/>), pointers to the user's send of the connection (see <xref target="seccomp" format="default"/>), pointers to
and receive the user's send and receive
buffers, pointers to the retransmit queue and to the current segment. buffers, pointers to the retransmit queue and to the current segment.
In addition, several variables relating to the send and receive In addition, several variables relating to the send and receive
sequence numbers are stored in the TCB. sequence numbers are stored in the TCB.
</t> </t>
<t><figure><artwork> <table>
Send Sequence Variables: <name>Send Sequence Variables</name>
<thead>
SND.UNA - send unacknowledged <tr>
SND.NXT - send next <th>Variable</th>
SND.WND - send window <th>Description</th>
SND.UP - send urgent pointer </tr>
SND.WL1 - segment sequence number used for last window update </thead>
SND.WL2 - segment acknowledgment number used for last window <tbody>
update <tr>
ISS - initial send sequence number <td>SND.UNA</td>
<td>send unacknowledged</td>
Receive Sequence Variables: </tr>
<tr>
RCV.NXT - receive next <td>SND.NXT</td>
RCV.WND - receive window <td>send next</td>
RCV.UP - receive urgent pointer </tr>
IRS - initial receive sequence number <tr>
</artwork></figure></t> <td>SND.WND</td>
<t> <td>send window</td>
</tr>
<tr>
<td>SND.UP</td>
<td>send urgent pointer</td>
</tr>
<tr>
<td>SND.WL1</td>
<td>segment sequence number used for last window update</td>
</tr>
<tr>
<td>SND.WL2</td>
<td>segment acknowledgment number used for last window update</t
d>
</tr>
<tr>
<td>ISS</td>
<td>initial send sequence number</td>
</tr>
</tbody>
</table>
<table>
<name>Receive Sequence Variables</name>
<thead>
<tr>
<th>Variable</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>RCV.NXT</td>
<td>receive next</td>
</tr>
<tr>
<td>RCV.WND</td>
<td>receive window</td>
</tr>
<tr>
<td>RCV.UP</td>
<td>receive urgent pointer</td>
</tr>
<tr>
<td>IRS</td>
<td>initial receive sequence number</td>
</tr>
</tbody>
</table>
<t>
The following diagrams may help to relate some of these variables to The following diagrams may help to relate some of these variables to
the sequence space. the sequence space.
</t> </t>
<figure anchor="send_seq_space" title="Send Sequence Space"> <figure anchor="send_seq_space">
<artwork> <name>Send Sequence Space</name>
<artwork name="" type="" align="left" alt=""><![CDATA[
1 2 3 4 1 2 3 4
----------|----------|----------|---------- ----------|----------|----------|----------
SND.UNA SND.NXT SND.UNA SND.UNA SND.NXT SND.UNA
+SND.WND +SND.WND
1 - old sequence numbers that have been acknowledged 1 - old sequence numbers that have been acknowledged
2 - sequence numbers of unacknowledged data 2 - sequence numbers of unacknowledged data
3 - sequence numbers allowed for new data transmission 3 - sequence numbers allowed for new data transmission
4 - future sequence numbers that are not yet allowed 4 - future sequence numbers that are not yet allowed
</artwork> ]]></artwork>
</figure> </figure>
<t> <t>
The send window is the portion of the sequence space labeled 3 in The send window is the portion of the sequence space labeled 3 in
<xref target="send_seq_space" />. <xref target="send_seq_space" format="default"/>.
</t> </t>
<figure anchor="recv_seq_space" title="Receive Sequence Space"> <figure anchor="recv_seq_space">
<artwork> <name>Receive Sequence Space</name>
<artwork name="" type="" align="left" alt=""><![CDATA[
1 2 3 1 2 3
----------|----------|---------- ----------|----------|----------
RCV.NXT RCV.NXT RCV.NXT RCV.NXT
+RCV.WND +RCV.WND
1 - old sequence numbers that have been acknowledged 1 - old sequence numbers that have been acknowledged
2 - sequence numbers allowed for new reception 2 - sequence numbers allowed for new reception
3 - future sequence numbers that are not yet allowed 3 - future sequence numbers that are not yet allowed
</artwork> ]]></artwork>
</figure> </figure>
<t> <t>
The receive window is the portion of the sequence space labeled 2 in The receive window is the portion of the sequence space labeled 2 in
<xref target="recv_seq_space" />. <xref target="recv_seq_space" format="default"/>.
</t> </t>
<t> <t>
There are also some variables used frequently in the discussion that There are also some variables used frequently in the discussion that
take their values from the fields of the current segment. take their values from the fields of the current segment.
</t> </t>
<t>Current Segment Variables: <table>
<figure><artwork> <name>Current Segment Variables</name>
SEG.SEQ - segment sequence number <thead>
SEG.ACK - segment acknowledgment number <tr>
SEG.LEN - segment length <th>Variable</th>
SEG.WND - segment window <th>Description</th>
SEG.UP - segment urgent pointer </tr>
</artwork></figure> </thead>
</t> <tbody>
</section> <tr>
<td>SEG.SEQ</td>
<section title="State Machine Overview"> <td>segment sequence number</td>
<t> </tr>
<tr>
<td>SEG.ACK</td>
<td>segment acknowledgment number</td>
</tr>
<tr>
<td>SEG.LEN</td>
<td>segment length</td>
</tr>
<tr>
<td>SEG.WND</td>
<td>segment window</td>
</tr>
<tr>
<td>SEG.UP</td>
<td>segment urgent pointer</td>
</tr>
</tbody>
</table>
</section>
<section numbered="true" toc="default">
<name>State Machine Overview</name>
<t>
A connection progresses through a series of states during its A connection progresses through a series of states during its
lifetime. The states are: LISTEN, SYN-SENT, SYN-RECEIVED, lifetime. The states are: LISTEN, SYN-SENT, SYN-RECEIVED,
ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK,
TIME-WAIT, and the fictional state CLOSED. CLOSED is fictional TIME-WAIT, and the fictional state CLOSED. CLOSED is fictional
because it represents the state when there is no TCB, and therefore, because it represents the state when there is no TCB, and therefore,
no connection. Briefly the meanings of the states are: no connection. Briefly the meanings of the states are:
</t> </t>
<t><list> <dl>
<t>LISTEN - represents waiting for a connection request from any remote <dt>LISTEN -</dt><dd>represents waiting for a connection request fro
TCP peer and port.</t> m any remote
TCP peer and port.</dd>
<t>SYN-SENT - represents waiting for a matching connection request <dt>SYN-SENT -</dt><dd>represents waiting for a matching connection
after having sent a connection request.</t> request
after having sent a connection request.</dd>
<t>SYN-RECEIVED - represents waiting for a confirming connection <dt>SYN-RECEIVED -</dt><dd>represents waiting for a confirming conne
ction
request acknowledgment after having both received and sent a request acknowledgment after having both received and sent a
connection request.</t> connection request.</dd>
<dt>ESTABLISHED -</dt><dd>represents an open connection, data receiv
<t>ESTABLISHED - represents an open connection, data received can be ed can be
delivered to the user. The normal state for the data transfer phase delivered to the user. The normal state for the data transfer phase
of the connection.</t> of the connection.</dd>
<dt>FIN-WAIT-1 -</dt><dd>represents waiting for a connection termina
<t>FIN-WAIT-1 - represents waiting for a connection termination request tion request
from the remote TCP peer, or an acknowledgment of the connection from the remote TCP peer, or an acknowledgment of the connection
termination request previously sent.</t> termination request previously sent.</dd>
<dt>FIN-WAIT-2 -</dt><dd>represents waiting for a connection termina
<t>FIN-WAIT-2 - represents waiting for a connection termination request tion request
from the remote TCP peer.</t> from the remote TCP peer.</dd>
<dt>CLOSE-WAIT -</dt><dd>represents waiting for a connection termina
<t>CLOSE-WAIT - represents waiting for a connection termination request tion request
from the local user.</t> from the local user.</dd>
<dt>CLOSING -</dt><dd>represents waiting for a connection terminatio
<t>CLOSING - represents waiting for a connection termination request n request
acknowledgment from the remote TCP peer.</t> acknowledgment from the remote TCP peer.</dd>
<dt>LAST-ACK -</dt><dd>represents waiting for an acknowledgment of t
<t>LAST-ACK - represents waiting for an acknowledgment of the he
connection termination request previously sent to the remote TCP peer connection termination request previously sent to the remote TCP peer
(this termination request sent to the remote TCP peer already included an ac (this termination request sent to the remote TCP peer already included an ac
knowledgment of the termination request sent from the remote TCP peer).</t> knowledgment of the termination request sent from the remote TCP peer).</dd>
<dt>TIME-WAIT -</dt><dd>represents waiting for enough time to pass t
<t>TIME-WAIT - represents waiting for enough time to pass to be sure o be sure
the remote TCP peer received the acknowledgment of its connection the remote TCP peer received the acknowledgment of its connection
termination request, and to avoid new connections being impacted by delayed termination request and to avoid new connections being impacted by delayed
segments from previous connections.</t> segments from previous connections.</dd>
<dt>CLOSED -</dt><dd>represents no connection state at all.</dd>
<t>CLOSED - represents no connection state at all.</t> </dl>
</list></t> <t>
<t>
A TCP connection progresses from one state to another in response to A TCP connection progresses from one state to another in response to
events. The events are the user calls, OPEN, SEND, RECEIVE, CLOSE, events. The events are the user calls, OPEN, SEND, RECEIVE, CLOSE,
ABORT, and STATUS; the incoming segments, particularly those ABORT, and STATUS; the incoming segments, particularly those
containing the SYN, ACK, RST and FIN flags; and timeouts. containing the SYN, ACK, RST, and FIN flags; and timeouts.
</t> </t>
<t> <t>
The OPEN call specifies The OPEN call specifies
whether connection establishment is to be actively pursued, or to whether connection establishment is to be actively pursued, or to
be passively waited for. be passively waited for.
</t> </t>
<t> <t>
A passive OPEN request means that the process wants to accept incoming A passive OPEN request means that the process wants to accept incoming
connection requests, in contrast to an active OPEN attempting to initiate a co nnection. connection requests, in contrast to an active OPEN attempting to initiate a co nnection.
</t> </t>
<t> <t>
The state diagram in <xref target="conn_states" /> illustrates only state chan The state diagram in <xref target="conn_states" format="default"/> illustrates
ges, together only state changes, together
with the causing events and resulting actions, but addresses neither with the causing events and resulting actions, but addresses neither
error conditions nor actions that are not connected with state error conditions nor actions that are not connected with state
changes. In a later section, more detail is offered with respect to changes. In a later section, more detail is offered with respect to
the reaction of the TCP implementation to events. Some state names are abbrev iated or hyphenated differently in the diagram from how they appear elsewhere in the document. the reaction of the TCP implementation to events. Some state names are abbrev iated or hyphenated differently in the diagram from how they appear elsewhere in the document.
</t> </t>
<t> <dl>
NOTA BENE: This diagram is only a summary and must not be taken as <dt>
NOTA BENE:</dt><dd>This diagram is only a summary and must not be taken as
the total specification. Many details are not included. the total specification. Many details are not included.
</t> </dd>
<figure anchor="conn_states" title="TCP Connection State Diagram"> </dl>
<artwork> <figure anchor="conn_states">
<name>TCP Connection State Diagram</name>
<artwork name="" type="" align="left" alt=""><![CDATA[
+---------+ ---------\ active OPEN +---------+ ---------\ active OPEN
| CLOSED | \ ----------- | CLOSED | \ -----------
+---------+&lt;---------\ \ create TCB +---------+<---------\ \ create TCB
| ^ \ \ snd SYN | ^ \ \ snd SYN
passive OPEN | | CLOSE \ \ passive OPEN | | CLOSE \ \
------------ | | ---------- \ \ ------------ | | ---------- \ \
create TCB | | delete TCB \ \ create TCB | | delete TCB \ \
V | \ \ V | \ \
rcv RST (note 1) +---------+ CLOSE | \ rcv RST (note 1) +---------+ CLOSE | \
-------------------->| LISTEN | ---------- | | -------------------->| LISTEN | ---------- | |
/ +---------+ delete TCB | | / +---------+ delete TCB | |
/ rcv SYN | | SEND | | / rcv SYN | | SEND | |
/ ----------- | | ------- | V / ----------- | | ------- | V
+--------+ snd SYN,ACK / \ snd SYN +--------+ +--------+ snd SYN,ACK / \ snd SYN +--------+
| |&lt;----------------- ------------------>| | | |<----------------- ------------------>| |
| SYN | rcv SYN | SYN | | SYN | rcv SYN | SYN |
| RCVD |&lt;-----------------------------------------------| SENT | | RCVD |<-----------------------------------------------| SENT |
| | snd SYN,ACK | | | | snd SYN,ACK | |
| |------------------ -------------------| | | |------------------ -------------------| |
+--------+ rcv ACK of SYN \ / rcv SYN,ACK +--------+ +--------+ rcv ACK of SYN \ / rcv SYN,ACK +--------+
| -------------- | | ----------- | -------------- | | -----------
| x | | snd ACK | x | | snd ACK
| V V | V V
| CLOSE +---------+ | CLOSE +---------+
| ------- | ESTAB | | ------- | ESTAB |
| snd FIN +---------+ | snd FIN +---------+
| CLOSE | | rcv FIN | CLOSE | | rcv FIN
V ------- | | ------- V ------- | | -------
+---------+ snd FIN / \ snd ACK +---------+ +---------+ snd FIN / \ snd ACK +---------+
| FIN |&lt;---------------- ------------------>| CLOSE | | FIN |<---------------- ------------------>| CLOSE |
| WAIT-1 |------------------ | WAIT | | WAIT-1 |------------------ | WAIT |
+---------+ rcv FIN \ +---------+ +---------+ rcv FIN \ +---------+
| rcv ACK of FIN ------- | CLOSE | | rcv ACK of FIN ------- | CLOSE |
| -------------- snd ACK | ------- | | -------------- snd ACK | ------- |
V x V snd FIN V V x V snd FIN V
+---------+ +---------+ +---------+ +---------+ +---------+ +---------+
|FINWAIT-2| | CLOSING | | LAST-ACK| |FINWAIT-2| | CLOSING | | LAST-ACK|
+---------+ +---------+ +---------+ +---------+ +---------+ +---------+
| rcv ACK of FIN | rcv ACK of FIN | | rcv ACK of FIN | rcv ACK of FIN |
| rcv FIN -------------- | Timeout=2MSL -------------- | | rcv FIN -------------- | Timeout=2MSL -------------- |
| ------- x V ------------ x V | ------- x V ------------ x V
\ snd ACK +---------+delete TCB +---------+ \ snd ACK +---------+delete TCB +---------+
-------------------->|TIME-WAIT|------------------->| CLOSED | -------------------->|TIME-WAIT|------------------->| CLOSED |
+---------+ +---------+ +---------+ +---------+
</artwork> ]]></artwork>
</figure> </figure>
<t>The following notes apply to <xref target="conn_states"/>: <t>The following notes apply to <xref target="conn_states" format="def
<list> ault"/>:
<t>
Note 1: The transition from SYN-RECEIVED to LISTEN on receiving a RST is
conditional on having reached SYN-RECEIVED after a passive open.
</t> </t>
<t> <dl>
Note 2: The figure omits a transition from FIN-WAIT-1 to TIME-WAIT if <dt>
Note 1:</dt><dd>The transition from SYN-RECEIVED to LISTEN on receiving a RST is
conditional on having reached SYN-RECEIVED after a passive OPEN.
</dd>
<dt>
Note 2:</dt><dd>The figure omits a transition from FIN-WAIT-1 to TIME-WAIT if
a FIN is received and the local FIN is also acknowledged. a FIN is received and the local FIN is also acknowledged.
</t> </dd>
<t> <dt>
Note 3: A RST can be sent from any state with a corresponding transition to TIME Note 3:</dt><dd>A RST can be sent from any state with a corresponding transition
-WAIT (see <xref target="FTY99"/> for rationale). These transitions are not exp to TIME-WAIT (see <xref target="FTY99" format="default"/> for rationale). Thes
licitly shown, otherwise the diagram would become very difficult to read. Simil e transitions are not explicitly shown; otherwise, the diagram would become very
arly, receipt of a RST from any state results in a transition to LISTEN or CLOSE difficult to read. Similarly, receipt of a RST from any state results in a tra
D, though this is also omitted from the diagram for legibility. nsition to LISTEN or CLOSED, though this is also omitted from the diagram for le
</t> gibility.
</list></t> </dd>
</section> </dl>
</section> </section>
<section title="Sequence Numbers"> </section>
<t> <section anchor="sequence-numbers" numbered="true" toc="default">
<name>Sequence Numbers</name>
<t>
A fundamental notion in the design is that every octet of data sent A fundamental notion in the design is that every octet of data sent
over a TCP connection has a sequence number. Since every octet is over a TCP connection has a sequence number. Since every octet is
sequenced, each of them can be acknowledged. The acknowledgment sequenced, each of them can be acknowledged. The acknowledgment
mechanism employed is cumulative so that an acknowledgment of sequence mechanism employed is cumulative so that an acknowledgment of sequence
number X indicates that all octets up to but not including X have been number X indicates that all octets up to but not including X have been
received. This mechanism allows for straight-forward duplicate received. This mechanism allows for straightforward duplicate
detection in the presence of retransmission. Numbering of octets detection in the presence of retransmission. The numbering scheme of octets
within a segment is that the first data octet immediately following within a segment is as follows: the first data octet immediately following
the header is the lowest numbered, and the following octets are the header is the lowest numbered, and the following octets are
numbered consecutively. numbered consecutively.
</t> </t>
<t> <t>
It is essential to remember that the actual sequence number space is It is essential to remember that the actual sequence number space is
finite, though large. This space ranges from 0 to 2**32 - 1. finite, though large. This space ranges from 0 to 2<sup>32</sup> - 1.
Since the space is finite, all arithmetic dealing with sequence Since the space is finite, all arithmetic dealing with sequence
numbers must be performed modulo 2**32. This unsigned arithmetic numbers must be performed modulo 2<sup>32</sup>. This unsigned arithmetic
preserves the relationship of sequence numbers as they cycle from preserves the relationship of sequence numbers as they cycle from
2**32 - 1 to 0 again. There are some subtleties to computer modulo 2<sup>32</sup> - 1 to 0 again. There are some subtleties to computer modulo
arithmetic, so great care should be taken in programming the arithmetic, so great care should be taken in programming the
comparison of such values. The symbol "=&lt;" means "less than or equal" comparison of such values. The symbol "=&lt;" means "less than or equal"
(modulo 2**32). (modulo 2<sup>32</sup>).
</t> </t>
<t> <t>
The typical kinds of sequence number comparisons that the TCP implementation m ust The typical kinds of sequence number comparisons that the TCP implementation m ust
perform include: perform include:
</t> </t>
<t><list> <ol type="(%c)" spacing="normal">
<t>(a) Determining that an acknowledgment refers to some sequence <li>Determining that an acknowledgment refers to some sequence
number sent but not yet acknowledged.</t> number sent but not yet acknowledged.</li>
<li>Determining that all sequence numbers occupied by a segment
<t>(b) Determining that all sequence numbers occupied by a segment
have been acknowledged (e.g., to remove the segment from a have been acknowledged (e.g., to remove the segment from a
retransmission queue).</t> retransmission queue).</li>
<li>Determining that an incoming segment contains sequence numbers
<t>(c) Determining that an incoming segment contains sequence numbers
that are expected (i.e., that the segment "overlaps" the that are expected (i.e., that the segment "overlaps" the
receive window).</t> receive window).</li>
</list></t> </ol>
<t> <t>
In response to sending data the TCP endpoint will receive acknowledgments. Th In response to sending data, the TCP endpoint will receive acknowledgments. T
e he
following comparisons are needed to process the acknowledgments. following comparisons are needed to process the acknowledgments:
</t> </t>
<t><list> <t indent="3">
<t>SND.UNA = oldest unacknowledged sequence number</t> SND.UNA = oldest unacknowledged sequence number
</t>
<t>SND.NXT = next sequence number to be sent</t> <t indent="3">
SND.NXT = next sequence number to be sent
<t>SEG.ACK = acknowledgment from the receiving TCP peer (next sequence </t>
number expected by the receiving TCP peer)</t> <t indent="3">
SEG.ACK = acknowledgment from the receiving TCP peer (next sequence
<t>SEG.SEQ = first sequence number of a segment</t> number expected by the receiving TCP peer)
</t>
<t>SEG.LEN = the number of octets occupied by the data in the segment <t indent="3">
(counting SYN and FIN)</t> SEG.SEQ = first sequence number of a segment
</t>
<t>SEG.SEQ+SEG.LEN-1 = last sequence number of a segment</t> <t indent="3">
</list></t> SEG.LEN = the number of octets occupied by the data in the segment
<t> (counting SYN and FIN)
A new acknowledgment (called an "acceptable ack"), is one for which </t>
<t indent="3">
SEG.SEQ+SEG.LEN-1 = last sequence number of a segment
</t>
<t>
A new acknowledgment (called an "acceptable ack") is one for which
the inequality below holds: the inequality below holds:
</t> </t>
<t><list> <t indent="3">
<t>SND.UNA &lt; SEG.ACK =&lt; SND.NXT</t> SND.UNA &lt; SEG.ACK =&lt; SND.NXT
</list></t> </t>
<t> <t>
A segment on the retransmission queue is fully acknowledged if the sum A segment on the retransmission queue is fully acknowledged if the sum
of its sequence number and length is less or equal than the of its sequence number and length is less than or equal to the
acknowledgment value in the incoming segment. acknowledgment value in the incoming segment.
</t> </t>
<t> <t>
When data is received the following comparisons are needed: When data is received, the following comparisons are needed:
</t> </t>
<t><list> <t indent="3">
<t>RCV.NXT = next sequence number expected on an incoming segment, and RCV.NXT = next sequence number expected on an incoming segment, and
is the left or lower edge of the receive window</t> is the left or lower edge of the receive window
</t>
<t>RCV.NXT+RCV.WND-1 = last sequence number expected on an incoming <t indent="3">
segment, and is the right or upper edge of the receive window</t> RCV.NXT+RCV.WND-1 = last sequence number expected on an incoming
segment, and is the right or upper edge of the receive window
<t>SEG.SEQ = first sequence number occupied by the incoming segment</t> </t>
<t indent="3">
<t>SEG.SEQ+SEG.LEN-1 = last sequence number occupied by the incoming SEG.SEQ = first sequence number occupied by the incoming segment
segment</t> </t>
</list></t> <t indent="3">
<t> SEG.SEQ+SEG.LEN-1 = last sequence number occupied by the incoming
segment
</t>
<t>
A segment is judged to occupy a portion of valid receive sequence A segment is judged to occupy a portion of valid receive sequence
space if space if
</t> </t>
<t><list> <t indent="3">
<t>RCV.NXT =&lt; SEG.SEQ &lt; RCV.NXT+RCV.WND</t> RCV.NXT =&lt; SEG.SEQ &lt; RCV.NXT+RCV.WND
</list></t> </t>
<t>
<t>
or or
</t> </t>
<t><list> <t indent="3">
<t>RCV.NXT =&lt; SEG.SEQ+SEG.LEN-1 &lt; RCV.NXT+RCV.WND</t> RCV.NXT =&lt; SEG.SEQ+SEG.LEN-1 &lt; RCV.NXT+RCV.WND
</list></t> </t>
<t> <t>
The first part of this test checks to see if the beginning of the The first part of this test checks to see if the beginning of the
segment falls in the window, the second part of the test checks to see segment falls in the window, the second part of the test checks to see
if the end of the segment falls in the window; if the segment passes if the end of the segment falls in the window; if the segment passes
either part of the test it contains data in the window. either part of the test, it contains data in the window.
</t> </t>
<t> <t>
Actually, it is a little more complicated than this. Due to zero Actually, it is a little more complicated than this. Due to zero
windows and zero length segments, we have four cases for the windows and zero-length segments, we have four cases for the
acceptability of an incoming segment: acceptability of an incoming segment:
</t> </t>
<t><figure><artwork> <table>
Segment Receive Test <name>Segment Acceptability Tests</name>
Length Window <thead>
------- ------- ------------------------------------------- <tr>
<th>Segment Length</th>
0 0 SEG.SEQ = RCV.NXT <th>Receive Window</th>
<th>Test</th>
0 >0 RCV.NXT =&lt; SEG.SEQ &lt; RCV.NXT+RCV.WND </tr>
</thead>
>0 0 not acceptable <tbody>
<tr>
>0 >0 RCV.NXT =&lt; SEG.SEQ &lt; RCV.NXT+RCV.WND <td>0</td>
or RCV.NXT =&lt; SEG.SEQ+SEG.LEN-1 &lt; RCV.NXT+RCV.WND <td>0</td>
</artwork></figure></t> <td>SEG.SEQ = RCV.NXT</td>
<t> </tr>
<tr>
<td>0</td>
<td>&gt;0</td>
<td>RCV.NXT =&lt; SEG.SEQ &lt; RCV.NXT+RCV.WND</td>
</tr>
<tr>
<td>&gt;0</td>
<td>0</td>
<td>not acceptable</td>
</tr>
<tr>
<td>&gt;0</td>
<td>&gt;0</td>
<td>
<t>RCV.NXT =&lt; SEG.SEQ &lt; RCV.NXT+RCV.WND</t>
<t>or</t>
<t>RCV.NXT =&lt; SEG.SEQ+SEG.LEN-1 &lt; RCV.NXT+RCV.WND</t>
</td>
</tr>
</tbody>
</table>
<t>
Note that when the receive window is zero no segments should be Note that when the receive window is zero no segments should be
acceptable except ACK segments. Thus, it is possible for a TCP implementation to acceptable except ACK segments. Thus, it is possible for a TCP implementation to
maintain a zero receive window while transmitting data and receiving maintain a zero receive window while transmitting data and receiving
ACKs. A TCP receiver MUST ACKs. A TCP receiver <bcp14>MUST</bcp14>
process the RST and URG fields of all incoming segments, even when the receive window is zero (MUST-66). process the RST and URG fields of all incoming segments, even when the receive window is zero (MUST-66).
</t> </t>
<t> <t>
We have taken advantage of the numbering scheme to protect certain We have taken advantage of the numbering scheme to protect certain
control information as well. This is achieved by implicitly including control information as well. This is achieved by implicitly including
some control flags in the sequence space so they can be retransmitted some control flags in the sequence space so they can be retransmitted
and acknowledged without confusion (i.e., one and only one copy of the and acknowledged without confusion (i.e., one and only one copy of the
control will be acted upon). Control information is not physically control will be acted upon). Control information is not physically
carried in the segment data space. Consequently, we must adopt rules carried in the segment data space. Consequently, we must adopt rules
for implicitly assigning sequence numbers to control. The SYN and FIN for implicitly assigning sequence numbers to control. The SYN and FIN
are the only controls requiring this protection, and these controls are the only controls requiring this protection, and these controls
are used only at connection opening and closing. For sequence number are used only at connection opening and closing. For sequence number
purposes, the SYN is considered to occur before the first actual data purposes, the SYN is considered to occur before the first actual data
octet of the segment in which it occurs, while the FIN is considered octet of the segment in which it occurs, while the FIN is considered
to occur after the last actual data octet in a segment in which it to occur after the last actual data octet in a segment in which it
occurs. The segment length (SEG.LEN) includes both data and sequence occurs. The segment length (SEG.LEN) includes both data and sequence
space-occupying controls. When a SYN is present then SEG.SEQ is the space-occupying controls. When a SYN is present, then SEG.SEQ is the
sequence number of the SYN. sequence number of the SYN.
</t> </t>
<section title="Initial Sequence Number Selection"> <section numbered="true" toc="default">
<t> <name>Initial Sequence Number Selection</name>
<t>
A connection is defined by a pair of A connection is defined by a pair of
sockets. Connections can be reused. New instances of a connection will be re ferred to as sockets. Connections can be reused. New instances of a connection will be re ferred to as
incarnations of the connection. The problem that arises from this is incarnations of the connection. The problem that arises from this is
-- &quot;how does the TCP implementation identify duplicate segments from prev -- "how does the TCP implementation identify duplicate segments from previous
ious incarnations of the connection?" This problem becomes apparent if the
incarnations of the connection?&quot; This problem becomes apparent if the
connection is being opened and closed in quick succession, or if the connection is being opened and closed in quick succession, or if the
connection breaks with loss of memory and is then reestablished. connection breaks with loss of memory and is then reestablished.
To support this, the TIME-WAIT state limits the rate of connection reuse, To support this, the TIME-WAIT state limits the rate of connection reuse,
while the initial sequence number selection described below further protects while the initial sequence number selection described below further protects
against ambiguity about what incarnation of a connection an incoming packet against ambiguity about which incarnation of a connection an incoming packet
corresponds to. corresponds to.
</t> </t>
<t> <t>
To avoid confusion we must prevent segments from one incarnation of a To avoid confusion, we must prevent segments from one incarnation of a
connection from being used while the same sequence numbers may still connection from being used while the same sequence numbers may still
be present in the network from an earlier incarnation. We want to be present in the network from an earlier incarnation. We want to
assure this, even if a TCP endpoint loses all knowledge of the assure this even if a TCP endpoint loses all knowledge of the
sequence numbers it has been using. When new connections are created, sequence numbers it has been using. When new connections are created,
an initial sequence number (ISN) generator is employed that selects a an initial sequence number (ISN) generator is employed that selects a
new 32 bit ISN. There are security issues that result if an off-path new 32-bit ISN. There are security issues that result if an off-path
attacker is able to predict or guess ISN values <xref target="RFC6528"/>. attacker is able to predict or guess ISN values <xref target="RFC6528" format=
"default"/>.
</t> </t>
<t> <t>
TCP Initial Sequence Numbers are generated from a number sequence that TCP initial sequence numbers are generated from a number sequence that
monotonically increases until it wraps, known loosely as a &quot;clock&quot;. monotonically increases until it wraps, known loosely as a "clock".
This clock is a 32-bit counter that typically increments at least once every This clock is a 32-bit counter that typically increments at least once every
roughly 4 microseconds, although it is neither assumed to be realtime nor roughly 4 microseconds, although it is neither assumed to be realtime nor
precise, and need not persist across reboots. The clock component is intended precise, and need not persist across reboots. The clock component is intended
to ensure that with a Maximum Segment Lifetime (MSL), generated ISNs will be to ensure that with a Maximum Segment Lifetime (MSL), generated ISNs will be
unique, since it cycles approximately every 4.55 hours, which is much longer unique since it cycles approximately every 4.55 hours, which is much longer
than the MSL. than the MSL. Please note that for modern networks that support high data
rates where the connection might start and quickly advance sequence numbers to
overlap within the MSL, it is recommended to implement the Timestamp Option as
mentioned later in <xref target="tcp_quiet_time_concept"/>.
</t> </t>
<t> <t>
A TCP implementation MUST use the above type of &quot;clock&quot; for clock-dr A TCP implementation <bcp14>MUST</bcp14> use the above type of "clock" for clo
iven selection of initial sequence numbers (MUST-8), and ck-driven selection of initial sequence numbers (MUST-8), and
SHOULD generate its Initial Sequence Numbers with the expression: <bcp14>SHOULD</bcp14> generate its initial sequence numbers with the expressio
n:
</t> </t>
<t> <t>
ISN = M + F(localip, localport, remoteip, remoteport, secretkey) ISN = M + F(localip, localport, remoteip, remoteport, secretkey)
</t> </t>
<t> <t>
where M is the 4 microsecond timer, and F() is a pseudorandom where M is the 4 microsecond timer, and F() is a pseudorandom
function (PRF) of the connection's identifying parameters ("localip, localpor t, remoteip, remoteport") and a secret key ("secretkey") (SHLD-1). F() MUST NOT be computable from the outside (MUST-9), or an attacker could still guess at se quence numbers from the ISN used for some other connection. The PRF could be im plemented as a cryptographic hash of the concatenation of the TCP connection par ameters and some secret data. For discussion of the selection of a specific has h algorithm and management of the secret key data, please see Section 3 of <xref target="RFC6528"/>. function (PRF) of the connection's identifying parameters ("localip, localpor t, remoteip, remoteport") and a secret key ("secretkey") (SHLD-1). F() <bcp14>M UST NOT</bcp14> be computable from the outside (MUST-9), or an attacker could st ill guess at sequence numbers from the ISN used for some other connection. The PRF could be implemented as a cryptographic hash of the concatenation of the TCP connection parameters and some secret data. For discussion of the selection of a specific hash algorithm and management of the secret key data, please see <xr ef target="RFC6528" section="3" sectionFormat="of" format="default"/>.
</t> </t>
<t>
<t>
For each connection there is a send sequence number and a receive For each connection there is a send sequence number and a receive
sequence number. The initial send sequence number (ISS) is chosen by sequence number. The initial send sequence number (ISS) is chosen by
the data sending TCP peer, and the initial receive sequence number (IRS) is the data sending TCP peer, and the initial receive sequence number (IRS) is
learned during the connection establishing procedure. learned during the connection-establishing procedure.
</t> </t>
<t> <t>
For a connection to be established or initialized, the two TCP peers must For a connection to be established or initialized, the two TCP peers must
synchronize on each other's initial sequence numbers. This is done in synchronize on each other's initial sequence numbers. This is done in
an exchange of connection establishing segments carrying a control bit an exchange of connection-establishing segments carrying a control bit
called "SYN" (for synchronize) and the initial sequence numbers. As a called "SYN" (for synchronize) and the initial sequence numbers. As a
shorthand, segments carrying the SYN bit are also called "SYNs". shorthand, segments carrying the SYN bit are also called "SYNs".
Hence, the solution requires a suitable mechanism for picking an Hence, the solution requires a suitable mechanism for picking an
initial sequence number and a slightly involved handshake to exchange initial sequence number and a slightly involved handshake to exchange
the ISNs. the ISNs.
</t> </t>
<t> <t>
The synchronization requires each side to send its own initial The synchronization requires each side to send its own initial
sequence number and to receive a confirmation of it in acknowledgment sequence number and to receive a confirmation of it in acknowledgment
from the remote TCP peer. Each side must also receive the remote peer's from the remote TCP peer. Each side must also receive the remote peer's
initial sequence number and send a confirming acknowledgment. initial sequence number and send a confirming acknowledgment.
</t> </t>
<t><figure><artwork> <artwork name="" type="" align="left" alt=""><![CDATA[
1) A --&gt; B SYN my sequence number is X 1) A --> B SYN my sequence number is X
2) A &lt;-- B ACK your sequence number is X 2) A <-- B ACK your sequence number is X
3) A &lt;-- B SYN my sequence number is Y 3) A <-- B SYN my sequence number is Y
4) A --&gt; B ACK your sequence number is Y 4) A --> B ACK your sequence number is Y
</artwork></figure></t> ]]></artwork>
<t> <t>
Because steps 2 and 3 can be combined in a single message this is Because steps 2 and 3 can be combined in a single message this is
called the three-way (or three message) handshake (3WHS). called the three-way (or three message) handshake (3WHS).
</t> </t>
<t> <t>
A 3WHS is necessary because sequence numbers are not A 3WHS is necessary because sequence numbers are not
tied to a global clock in the network, and TCP implementations may have differ ent tied to a global clock in the network, and TCP implementations may have differ ent
mechanisms for picking the ISNs. The receiver of the first SYN has mechanisms for picking the ISNs. The receiver of the first SYN has
no way of knowing whether the segment was an old one or not, no way of knowing whether the segment was an old one or not,
unless it remembers the last sequence number used on the connection unless it remembers the last sequence number used on the connection
(which is not always possible), and so it must ask the sender to (which is not always possible), and so it must ask the sender to
verify this SYN. The three-way handshake and the advantages of a verify this SYN. The three-way handshake and the advantages of a
clock-driven scheme for ISN selection are discussed in <xref target="DS78"/>. clock-driven scheme for ISN selection are discussed in <xref target="DS78" for mat="default"/>.
</t> </t>
</section> </section>
<section title="Knowing When to Keep Quiet"> <section numbered="true" toc="default">
<t> <name>Knowing When to Keep Quiet</name>
<t>
A theoretical problem exists where data could be corrupted due to confusion A theoretical problem exists where data could be corrupted due to confusion
between old segments in the network and new ones after a host reboots, if the between old segments in the network and new ones after a host reboots if the
same port numbers and sequence space are reused. The &quot;Quiet Time&quot; same port numbers and sequence space are reused. The "quiet time"
concept discussed below addresses this and the discussion of it is included concept discussed below addresses this, and the discussion of it is included
for situations where it might be relevant, although it is not felt to be for situations where it might be relevant, although it is not felt to be
necessary in most current implementations. The problem was more relevant necessary in most current implementations. The problem was more relevant
earlier in the history of TCP. In practical use on the Internet today, the earlier in the history of TCP. In practical use on the Internet today, the
error-prone conditions are sufficiently unlikely that it is felt safe to error-prone conditions are sufficiently unlikely that it is safe to
ignore. Reasons why it is now negligible include: (a) ISS and ephemeral port ignore. Reasons why it is now negligible include: (a) ISS and ephemeral port
randomization have reduced likelihood of reuse of port numbers and sequence nu mbers randomization have reduced likelihood of reuse of port numbers and sequence nu mbers
after reboots, (b) the effective MSL of the Internet has declined as links after reboots, (b) the effective MSL of the Internet has declined as links
have become faster, and (c) reboots often taking longer than an MSL anyways. have become faster, and (c) reboots often taking longer than an MSL anyways.
</t> </t>
<t> <t>
To be sure that a TCP implementation does not create a segment carrying a To be sure that a TCP implementation does not create a segment carrying a
sequence number that may be duplicated by an old segment remaining in the sequence number that may be duplicated by an old segment remaining in the
network, the TCP endpoint must keep quiet for an MSL before assigning any network, the TCP endpoint must keep quiet for an MSL before assigning any
sequence numbers upon starting up or recovering from a situation where memory sequence numbers upon starting up or recovering from a situation where memory
of sequence numbers in use was lost. For this specification the MSL is taken of sequence numbers in use was lost. For this specification the MSL is taken
to be 2 minutes. This is an engineering choice, and may be changed if to be 2 minutes. This is an engineering choice, and may be changed if
experience indicates it is desirable to do so. Note that if a TCP endpoint experience indicates it is desirable to do so. Note that if a TCP endpoint
is reinitialized in some sense, yet retains its memory of sequence numbers in is reinitialized in some sense, yet retains its memory of sequence numbers in
use, then it need not wait at all; it must only be sure to use sequence use, then it need not wait at all; it must only be sure to use sequence
numbers larger than those recently used. numbers larger than those recently used.
</t> </t>
</section> </section>
<section title="The TCP Quiet Time Concept"> <section anchor="tcp_quiet_time_concept" numbered="true" toc="default">
<t> <name>The TCP Quiet Time Concept</name>
<t>
Hosts that for any reason lose Hosts that for any reason lose
knowledge of the last sequence numbers transmitted on knowledge of the last sequence numbers transmitted on
each active (i.e., not closed) connection shall delay emitting any each active (i.e., not closed) connection shall delay emitting any
TCP segments for at least the agreed MSL TCP segments for at least the agreed MSL
in the internet system that the host is a part of. In the in the internet system that the host is a part of. In the
paragraphs below, an explanation for this specification is given. paragraphs below, an explanation for this specification is given.
TCP implementors may violate the "quiet time" restriction, but only TCP implementers may violate the "quiet time" restriction, but only
at the risk of causing some old data to be accepted as new or new at the risk of causing some old data to be accepted as new or new
data rejected as old duplicated data by some receivers in the internet data rejected as old duplicated data by some receivers in the internet
system. system.
</t> </t>
<t> <t>
TCP endpoints consume sequence number space each time a segment is formed an d TCP endpoints consume sequence number space each time a segment is formed an d
entered into the network output queue at a source host. The entered into the network output queue at a source host. The
duplicate detection and sequencing algorithm in the TCP protocol duplicate detection and sequencing algorithm in TCP
relies on the unique binding of segment data to sequence space to relies on the unique binding of segment data to sequence space to
the extent that sequence numbers will not cycle through all 2**32 the extent that sequence numbers will not cycle through all 2<sup>32</sup>
values before the segment data bound to those sequence numbers has values before the segment data bound to those sequence numbers has
been delivered and acknowledged by the receiver and all duplicate been delivered and acknowledged by the receiver and all duplicate
copies of the segments have "drained" from the internet. Without copies of the segments have "drained" from the internet. Without
such an assumption, two distinct TCP segments could conceivably be such an assumption, two distinct TCP segments could conceivably be
assigned the same or overlapping sequence numbers, causing confusion assigned the same or overlapping sequence numbers, causing confusion
at the receiver as to which data is new and which is old. Remember at the receiver as to which data is new and which is old. Remember
that each segment is bound to as many consecutive sequence numbers that each segment is bound to as many consecutive sequence numbers
as there are octets of data and SYN or FIN flags in the segment. as there are octets of data and SYN or FIN flags in the segment.
</t> </t>
<t> <t>
Under normal conditions, TCP implementations keep track of the next sequence number Under normal conditions, TCP implementations keep track of the next sequence number
to emit and the oldest awaiting acknowledgment so as to avoid to emit and the oldest awaiting acknowledgment so as to avoid
mistakenly using a sequence number over before its first use has mistakenly reusing a sequence number before its first use has
been acknowledged. This alone does not guarantee that old duplicate been acknowledged. This alone does not guarantee that old duplicate
data is drained from the net, so the sequence space has been made data is drained from the net, so the sequence space has been made
large to reduce the probability that a wandering duplicate will large to reduce the probability that a wandering duplicate will
cause trouble upon arrival. At 2 megabits/sec. it takes 4.5 hours cause trouble upon arrival. At 2 megabits/sec., it takes 4.5 hours
to use up 2**32 octets of sequence space. Since the maximum segment to use up 2<sup>32</sup> octets of sequence space. Since the maximum segmen
t
lifetime in the net is not likely to exceed a few tens of seconds, lifetime in the net is not likely to exceed a few tens of seconds,
this is deemed ample protection for foreseeable nets, even if data this is deemed ample protection for foreseeable nets, even if data
rates escalate to 10s of megabits/sec. At 100 megabits/sec, the rates escalate to 10s of megabits/sec. At 100 megabits/sec., the
cycle time is 5.4 minutes, which may be a little short, but still cycle time is 5.4 minutes, which may be a little short but still
within reason. Much higher data rates are possible today, with implications within reason. Much higher data rates are possible today, with implications
described in the final paragraph of this subsection. described in the final paragraph of this subsection.
</t> </t>
<t> <t>
The basic duplicate detection and sequencing algorithm in TCP can be The basic duplicate detection and sequencing algorithm in TCP can be
defeated, however, if a source TCP endpoint does not have any memory of the defeated, however, if a source TCP endpoint does not have any memory of the
sequence numbers it last used on a given connection. For example, if sequence numbers it last used on a given connection. For example, if
the TCP implementation were to start all connections with sequence number 0, then the TCP implementation were to start all connections with sequence number 0, then
upon the host rebooting, a TCP peer might re-form an earlier upon the host rebooting, a TCP peer might re-form an earlier
connection (possibly after half-open connection resolution) and emit connection (possibly after half-open connection resolution) and emit
packets with sequence numbers identical to or overlapping with packets with sequence numbers identical to or overlapping with
packets still in the network, which were emitted on an earlier packets still in the network, which were emitted on an earlier
incarnation of the same connection. In the absence of knowledge incarnation of the same connection. In the absence of knowledge
about the sequence numbers used on a particular connection, the TCP about the sequence numbers used on a particular connection, the TCP
specification recommends that the source delay for MSL seconds specification recommends that the source delay for MSL seconds
before emitting segments on the connection, to allow time for before emitting segments on the connection, to allow time for
segments from the earlier connection incarnation to drain from the segments from the earlier connection incarnation to drain from the
system. system.
</t> </t>
<t> <t>
Even hosts that can remember the time of day and used it to select Even hosts that can remember the time of day and use it to select
initial sequence number values are not immune from this problem initial sequence number values are not immune from this problem
(i.e., even if time of day is used to select an initial sequence (i.e., even if time of day is used to select an initial sequence
number for each new connection incarnation). number for each new connection incarnation).
</t> </t>
<t> <t>
Suppose, for example, that a connection is opened starting with Suppose, for example, that a connection is opened starting with
sequence number S. Suppose that this connection is not used much sequence number S. Suppose that this connection is not used much
and that eventually the initial sequence number function (ISN(t)) and that eventually the initial sequence number function (ISN(t))
takes on a value equal to the sequence number, say S1, of the last takes on a value equal to the sequence number, say S1, of the last
segment sent by this TCP endpoint on a particular connection. Now suppose, segment sent by this TCP endpoint on a particular connection. Now suppose,
at this instant, the host reboots and establishes a new at this instant, the host reboots and establishes a new
incarnation of the connection. The initial sequence number chosen is incarnation of the connection. The initial sequence number chosen is
S1 = ISN(t) -- last used sequence number on old incarnation of S1 = ISN(t) -- last used sequence number on old incarnation of
connection! If the recovery occurs quickly enough, any old connection! If the recovery occurs quickly enough, any old
duplicates in the net bearing sequence numbers in the neighborhood duplicates in the net bearing sequence numbers in the neighborhood
of S1 may arrive and be treated as new packets by the receiver of of S1 may arrive and be treated as new packets by the receiver of
the new incarnation of the connection. the new incarnation of the connection.
</t> </t>
<t> <t>
The problem is that the recovering host may not know for how long it The problem is that the recovering host may not know for how long it
was down between rebooting nor does it know whether there are still old dupl icates in was down between rebooting nor does it know whether there are still old dupl icates in
the system from earlier connection incarnations. the system from earlier connection incarnations.
</t> </t>
<t> <t>
One way to deal with this problem is to deliberately delay emitting One way to deal with this problem is to deliberately delay emitting
segments for one MSL after recovery from a reboot - this is the &quot;quiet segments for one MSL after recovery from a reboot -- this is the "quiet
time&quot; specification. Hosts that prefer to avoid waiting and are time" specification. Hosts that prefer to avoid waiting and are
willing to risk possible confusion of old and new packets at a given willing to risk possible confusion of old and new packets at a given
destination may choose not to wait for the &quot;quiet time&quot;. destination may choose not to wait for the "quiet time".
Implementors may provide TCP users with the ability to select on a Implementers may provide TCP users with the ability to select on a
connection by connection basis whether to wait after a reboot, or may connection-by-connection basis whether to wait after a reboot, or may
informally implement the &quot;quiet time&quot; for all connections. informally implement the "quiet time" for all connections.
Obviously, even where a user selects to &quot;wait,&quot; this is not Obviously, even where a user selects to "wait", this is not
necessary after the host has been &quot;up&quot; for at least MSL seconds. necessary after the host has been "up" for at least MSL seconds.
</t> </t>
<t> <t>
To summarize: every segment emitted occupies one or more sequence To summarize: every segment emitted occupies one or more sequence
numbers in the sequence space, the numbers occupied by a segment are numbers in the sequence space, and the numbers occupied by a segment are
&quot;busy&quot; or &quot;in use&quot; until MSL seconds have passed, upon r "busy" or "in use" until MSL seconds have passed. Upon rebooting, a
ebooting a
block of space-time is occupied by the octets and SYN or FIN flags of any po tentially still in-flight block of space-time is occupied by the octets and SYN or FIN flags of any po tentially still in-flight
segments, and if a new connection is started too soon and uses any of the segments. If a new connection is started too soon and uses any of the
sequence numbers in the space-time footprint of those potentially still in-f light segments of sequence numbers in the space-time footprint of those potentially still in-f light segments of
the previous connection incarnation, there is a potential sequence the previous connection incarnation, there is a potential sequence
number overlap area that could cause confusion at the receiver. number overlap area that could cause confusion at the receiver.
</t> </t>
<t> <t>
High performance cases will have shorter cycle times than those in the High-performance cases will have shorter cycle times than those in the
megabits per second that the base TCP design described above considers. megabits per second that the base TCP design described above considers.
At 1 Gbps, the cycle time is 34 seconds, only 3 seconds at 10 Gbps, and At 1 Gbps, the cycle time is 34 seconds, only 3 seconds at 10 Gbps, and
around a third of a second at 100 Gbps. In these higher performance cases, around a third of a second at 100 Gbps. In these higher-performance cases,
TCP Timestamp options and Protection Against Wrapped Sequences (PAWS) <xref TCP Timestamp Options and Protection Against Wrapped Sequences (PAWS) <xref
target="RFC7323"/> provide the needed capability to detect and discard old target="RFC7323" format="default"/> provide the needed capability to detect and
discard old
duplicates. duplicates.
</t> </t>
</section> </section>
</section> </section>
<section title="Establishing a connection"> <section numbered="true" toc="default">
<t> <name>Establishing a Connection</name>
The &quot;three-way handshake&quot; is the procedure used to establish a <t>
The "three-way handshake" is the procedure used to establish a
connection. This procedure normally is initiated by one TCP peer and connection. This procedure normally is initiated by one TCP peer and
responded to by another TCP peer. The procedure also works if two TCP peers responded to by another TCP peer. The procedure also works if two TCP peers
simultaneously initiate the procedure. When simultaneous open simultaneously initiate the procedure. When simultaneous open
occurs, each TCP peer receives a "SYN" segment that carries no occurs, each TCP peer receives a SYN segment that carries no
acknowledgment after it has sent a &quot;SYN&quot;. Of course, the arrival of acknowledgment after it has sent a SYN. Of course, the arrival of
an old duplicate &quot;SYN&quot; segment can potentially make it appear, to th an old duplicate SYN segment can potentially make it appear, to the
e
recipient, that a simultaneous connection initiation is in progress. recipient, that a simultaneous connection initiation is in progress.
Proper use of &quot;reset&quot; segments can disambiguate these cases. Proper use of "reset" segments can disambiguate these cases.
</t> </t>
<t> <t>
Several examples of connection initiation follow. Although these Several examples of connection initiation follow. Although these
examples do not show connection synchronization using data-carrying examples do not show connection synchronization using data-carrying
segments, this is perfectly legitimate, so long as the receiving TCP endpoint segments, this is perfectly legitimate, so long as the receiving TCP endpoint
doesn't deliver the data to the user until it is clear the data is doesn't deliver the data to the user until it is clear the data is
valid (e.g., the data is buffered at the receiver until the valid (e.g., the data is buffered at the receiver until the
connection reaches the ESTABLISHED state, given that the three-way handshake connection reaches the ESTABLISHED state, given that the three-way handshake
reduces the possibility of false connections). It is reduces the possibility of false connections). It is
a trade-off between memory and messages to provide a trade-off between memory and messages to provide
information for this checking. information for this checking.
</t> </t>
<t> <t>
The simplest 3WHS is shown in <xref target="handshake" />. The The simplest 3WHS is shown in <xref target="handshake" format="default"/>. Th
e
figures should be interpreted in the following way. Each line is figures should be interpreted in the following way. Each line is
numbered for reference purposes. Right arrows (--&gt;) indicate numbered for reference purposes. Right arrows (--&gt;) indicate
departure of a TCP segment from TCP peer A to TCP peer B, or arrival of a departure of a TCP segment from TCP Peer A to TCP Peer B or arrival of a
segment at B from A. Left arrows (&lt;--), indicate the reverse. segment at B from A. Left arrows (&lt;--) indicate the reverse.
Ellipsis (...) indicates a segment that is still in the network Ellipses (...) indicate a segment that is still in the network
(delayed). (delayed).
Comments appear in parentheses. TCP connection states represent the state AFT ER Comments appear in parentheses. TCP connection states represent the state AFT ER
the departure or arrival of the segment (whose contents are shown in the departure or arrival of the segment (whose contents are shown in
the center of each line). Segment contents are shown in abbreviated the center of each line). Segment contents are shown in abbreviated
form, with sequence number, control flags, and ACK field. Other form, with sequence number, control flags, and ACK field. Other
fields such as window, addresses, lengths, and text have been left out fields such as window, addresses, lengths, and text have been left out
in the interest of clarity. in the interest of clarity.
</t> </t>
<figure anchor="handshake" title="Basic 3-Way Handshake for Connection Synchroni <figure anchor="handshake">
zation"> <name>Basic Three-Way Handshake for Connection Synchronization</name>
<artwork> <artwork name="" type="" align="left" alt=""><![CDATA[
TCP Peer A TCP Peer B TCP Peer A TCP Peer B
1. CLOSED LISTEN 1. CLOSED LISTEN
2. SYN-SENT --> &lt;SEQ=100>&lt;CTL=SYN&gt; --&gt; SYN-RECE IVED 2. SYN-SENT --> <SEQ=100><CTL=SYN> --> SYN-RECEIVED
3. ESTABLISHED <-- &lt;SEQ=300&gt;&lt;ACK=101&gt;&lt;CTL=SYN,ACK&gt; &lt;-- SYN-RECEIVED 3. ESTABLISHED <-- <SEQ=300><ACK=101><CTL=SYN,ACK> <-- SYN-RECEIVED
4. ESTABLISHED --> &lt;SEQ=101&gt;&lt;ACK=301&gt;&lt;CTL=ACK&gt; --&gt ; ESTABLISHED 4. ESTABLISHED --> <SEQ=101><ACK=301><CTL=ACK> -->; ESTABLISHED
5. ESTABLISHED --&gt; &lt;SEQ=101&gt;&lt;ACK=301&gt;&lt;CTL=ACK&gt;&lt;DATA&gt; 5. ESTABLISHED --> <SEQ=101><ACK=301><CTL=ACK><DATA> --> ESTABLISHED
--&gt; ESTABLISHED ]]></artwork>
</artwork> </figure>
</figure> <t>
<t> In line 2 of <xref target="handshake" format="default"/>, TCP Peer A begins by
In line 2 of <xref target="handshake" />, TCP Peer A begins by sending a SYN s sending a SYN segment
egment
indicating that it will use sequence numbers starting with sequence indicating that it will use sequence numbers starting with sequence
number 100. In line 3, TCP Peer B sends a SYN and acknowledges the SYN it number 100. In line 3, TCP Peer B sends a SYN and acknowledges the SYN it
received from TCP Peer A. Note that the acknowledgment field indicates TCP Pe er received from TCP Peer A. Note that the acknowledgment field indicates TCP Pe er
B is now expecting to hear sequence 101, acknowledging the SYN that B is now expecting to hear sequence 101, acknowledging the SYN that
occupied sequence 100. occupied sequence 100.
</t> </t>
<t> <t>
At line 4, TCP Peer A responds with an empty segment containing an ACK for At line 4, TCP Peer A responds with an empty segment containing an ACK for
TCP Peer B's SYN; and in line 5, TCP Peer A sends some data. Note that the TCP Peer B's SYN; and in line 5, TCP Peer A sends some data. Note that the
sequence number of the segment in line 5 is the same as in line 4 sequence number of the segment in line 5 is the same as in line 4
because the ACK does not occupy sequence number space (if it did, we because the ACK does not occupy sequence number space (if it did, we
would wind up ACKing ACKs!). would wind up ACKing ACKs!).
</t> </t>
<t> <t>
Simultaneous initiation is only slightly more complex, as is shown in Simultaneous initiation is only slightly more complex, as is shown in
<xref target="simul_connect" />. Each TCP peer's connection state cycles from CLOSED to SYN-SENT to SYN-RECEIVED to ESTABLISHED. <xref target="simul_connect" format="default"/>. Each TCP peer's connection s tate cycles from CLOSED to SYN-SENT to SYN-RECEIVED to ESTABLISHED.
</t> </t>
<figure anchor="simul_connect" title="Simultaneous Connection Synchronization"> <figure anchor="simul_connect">
<artwork> <name>Simultaneous Connection Synchronization</name>
<artwork name="" type="" align="left" alt=""><![CDATA[
TCP Peer A TCP Peer B TCP Peer A TCP Peer B
1. CLOSED CLOSED 1. CLOSED CLOSED
2. SYN-SENT --> &lt;SEQ=100&gt;&lt;CTL=SYN&gt; ... 2. SYN-SENT --> <SEQ=100><CTL=SYN&gt; ...
3. SYN-RECEIVED <-- &lt;SEQ=300&gt;&lt;CTL=SYN&gt; &lt;-- SYN-S ENT 3. SYN-RECEIVED <-- <SEQ=300><CTL=SYN> <-- SYN-SENT
4. ... <SEQ=100>&lt;CTL=SYN&gt; --&gt; SYN-RECEIVE D 4. ... <SEQ=100><CTL=SYN> --> SYN-RECEIVED
5. SYN-RECEIVED --> <SEQ=100>&lt;ACK=301>&lt;CTL=SYN,ACK&gt; ... 5. SYN-RECEIVED --> <SEQ=100><ACK=301><CTL=SYN,ACK&gt; ...
6. ESTABLISHED <-- &lt;SEQ=300&gt;&lt;ACK=101&gt;&lt;CTL=SYN,ACK&gt; &lt;-- SYN-RECEIVED 6. ESTABLISHED <-- <SEQ=300><ACK=101><CTL=SYN,ACK> <-- SYN-RECEIVED
7. ... &lt;SEQ=100&gt;&lt;ACK=301&gt;&lt;CTL=SYN,ACK&gt; --&gt; ES 7. ... <SEQ=100><ACK=301><CTL=SYN,ACK> --> ESTABLISHED
TABLISHED ]]></artwork>
</artwork> </figure>
</figure> <t>
<t> A TCP implementation <bcp14>MUST</bcp14> support simultaneous open attempts (MUS
A TCP implementation MUST support simultaneous open attempts (MUST-10). T-10).
</t> </t>
<t> <t>
Note that a TCP implementation MUST keep track of whether a Note that a TCP implementation <bcp14>MUST</bcp14> keep track of whether a
connection has reached SYN-RECEIVED state as the result of a connection has reached SYN-RECEIVED state as the result of a
passive OPEN or an active OPEN (MUST-11). passive OPEN or an active OPEN (MUST-11).
</t> </t>
<t> <t>
The principal reason for the three-way handshake is to prevent old The principal reason for the three-way handshake is to prevent old
duplicate connection initiations from causing confusion. To deal with duplicate connection initiations from causing confusion. To deal with
this, a special control message, reset, is specified. If the this, a special control message, reset, is specified. If the
receiving TCP peer is in a non-synchronized state (i.e., SYN-SENT, receiving TCP peer is in a non-synchronized state (i.e., SYN-SENT,
SYN-RECEIVED), it returns to LISTEN on receiving an acceptable reset. SYN-RECEIVED), it returns to LISTEN on receiving an acceptable reset.
If the TCP peer is in one of the synchronized states (ESTABLISHED, If the TCP peer is in one of the synchronized states (ESTABLISHED,
FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT), it FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT), it
aborts the connection and informs its user. We discuss this latter aborts the connection and informs its user. We discuss this latter
case under "half-open" connections below. case under "half-open" connections below.
</t> </t>
<figure anchor="dup_syn" title="Recovery from Old Duplicate SYN"> <figure anchor="dup_syn">
<artwork> <name>Recovery from Old Duplicate SYN</name>
<artwork name="" type="" align="left" alt=""><![CDATA[
TCP Peer A TCP Peer B TCP Peer A TCP Peer B
1. CLOSED LISTEN 1. CLOSED LISTEN
2. SYN-SENT --> <SEQ=100>&lt;CTL=SYN> ... 2. SYN-SENT --> <SEQ=100>&lt;CTL=SYN> ...
3. (duplicate) ... <SEQ=90>&lt;CTL=SYN> --> SYN-RECEIVED 3. (duplicate) ... <SEQ=90>&lt;CTL=SYN> --> SYN-RECEIVED
4. SYN-SENT <-- &lt;SEQ=300>&lt;ACK=91>&lt;CTL=SYN,ACK> &lt;-- SYN-RECEI VED 4. SYN-SENT <-- <SEQ=300><ACK=91><CTL=SYN,ACK> <-- SYN-RECEIVED
5. SYN-SENT --> <SEQ=91>&lt;CTL=RST> --> LISTEN 5. SYN-SENT --> <SEQ=91>&lt;CTL=RST> --> LISTEN
6. ... <SEQ=100>&lt;CTL=SYN> --> SYN-RECEIVED 6. ... <SEQ=100>&lt;CTL=SYN> --> SYN-RECEIVED
7. ESTABLISHED <-- &lt;SEQ=400>&lt;ACK=101>&lt;CTL=SYN,ACK> &lt;-- SYN-RECE IVED 7. ESTABLISHED <-- <SEQ=400><ACK=101><CTL=SYN,ACK> <-- SYN-RECEIVED
8. ESTABLISHED --> &lt;SEQ=101>&lt;ACK=401>&lt;CTL=ACK> --> ESTABLISHED 8. ESTABLISHED --> <SEQ=101><ACK=401><CTL=ACK> --> ESTABLISHED
</artwork> ]]></artwork>
</figure> </figure>
<t> <t>
As a simple example of recovery from old duplicates, consider As a simple example of recovery from old duplicates, consider
<xref target="dup_syn" />. At line 3, an old duplicate SYN arrives at TCP Pee r B. TCP Peer B <xref target="dup_syn" format="default"/>. At line 3, an old duplicate SYN ar rives at TCP Peer B. TCP Peer B
cannot tell that this is an old duplicate, so it responds normally cannot tell that this is an old duplicate, so it responds normally
(line 4). TCP Peer A detects that the ACK field is incorrect and returns a (line 4). TCP Peer A detects that the ACK field is incorrect and returns a
RST (reset) with its SEQ field selected to make the segment RST (reset) with its SEQ field selected to make the segment
believable. TCP Peer B, on receiving the RST, returns to the LISTEN state. believable. TCP Peer B, on receiving the RST, returns to the LISTEN state.
When the original SYN finally arrives at line 6, the When the original SYN finally arrives at line 6, the
synchronization proceeds normally. If the SYN at line 6 had arrived synchronization proceeds normally. If the SYN at line 6 had arrived
before the RST, a more complex exchange might have occurred with RST's before the RST, a more complex exchange might have occurred with RSTs
sent in both directions. sent in both directions.
</t> </t>
<section title="Half-Open Connections and Other Anomalies"> <section numbered="true" toc="default">
<t> <name>Half-Open Connections and Other Anomalies</name>
An established connection is said to be &quot;half-open&quot; if one of the <t>
An established connection is said to be "half-open" if one of the
TCP peers has closed or aborted the connection at its end without the TCP peers has closed or aborted the connection at its end without the
knowledge of the other, or if the two ends of the connection have knowledge of the other, or if the two ends of the connection have
become desynchronized owing to a failure or reboot that resulted in loss of become desynchronized owing to a failure or reboot that resulted in loss of
memory. Such connections will automatically become reset if an memory. Such connections will automatically become reset if an
attempt is made to send data in either direction. However, half-open attempt is made to send data in either direction. However, half-open
connections are expected to be unusual. connections are expected to be unusual.
</t> </t>
<t> <t>
If at site A the connection no longer exists, then an attempt by the If at site A the connection no longer exists, then an attempt by the
user at site B to send any data on it will result in the site B TCP endpoint user at site B to send any data on it will result in the site B TCP endpoint
receiving a reset control message. Such a message indicates to the receiving a reset control message. Such a message indicates to the
site B TCP endpoint that something is wrong, and it is expected to abort the site B TCP endpoint that something is wrong, and it is expected to abort the
connection. connection.
</t> </t>
<t> <t>
Assume that two user processes A and B are communicating with one Assume that two user processes A and B are communicating with one
another when a failure or reboot occurs causing loss of memory to A's TCP impl ementation. another when a failure or reboot occurs causing loss of memory to A's TCP impl ementation.
Depending on the operating system supporting A's TCP implementation, it is lik ely Depending on the operating system supporting A's TCP implementation, it is lik ely
that some error recovery mechanism exists. When the TCP endpoint is up again, that some error recovery mechanism exists. When the TCP endpoint is up again,
A is likely to start again from the beginning or from a recovery A is likely to start again from the beginning or from a recovery
point. As a result, A will probably try to OPEN the connection again point. As a result, A will probably try to OPEN the connection again
or try to SEND on the connection it believes open. In the latter or try to SEND on the connection it believes open. In the latter
case, it receives the error message &quot;connection not open&quot; from the case, it receives the error message "connection not open" from the
local (A's) TCP implementation. In an attempt to establish the connection, A' s TCP implementation local (A's) TCP implementation. In an attempt to establish the connection, A' s TCP implementation
will send a segment containing SYN. This scenario leads to the will send a segment containing SYN. This scenario leads to the
example shown in <xref target="half_open" />. After TCP Peer A reboots, the u example shown in <xref target="half_open" format="default"/>. After TCP Peer
ser attempts to A reboots, the user attempts to
re-open the connection. TCP Peer B, in the meantime, thinks the connection reopen the connection. TCP Peer B, in the meantime, thinks the connection
is open. is open.
</t> </t>
<figure anchor="half_open" title="Half-Open Connection Discovery"> <figure anchor="half_open">
<artwork> <name>Half-Open Connection Discovery</name>
<artwork name="" type="" align="left" alt=""><![CDATA[
TCP Peer A TCP Peer B TCP Peer A TCP Peer B
1. (REBOOT) (send 300,receive 100) 1. (REBOOT) (send 300,receive 100)
2. CLOSED ESTABLISHED 2. CLOSED ESTABLISHED
3. SYN-SENT --> <SEQ=400>&lt;CTL=SYN> --> (??) 3. SYN-SENT --> <SEQ=400>&lt;CTL=SYN> --> (??)
4. (!!) <-- &lt;SEQ=300>&lt;ACK=100>&lt;CTL=ACK> &lt;-- ESTABLISHE D 4. (!!) <-- <SEQ=300><ACK=100><CTL=ACK> <-- ESTABLISHED
5. SYN-SENT --> <SEQ=100>&lt;CTL=RST> --> (Abort!!) 5. SYN-SENT --> <SEQ=100>&lt;CTL=RST> --> (Abort!!)
6. SYN-SENT CLOSED 6. SYN-SENT CLOSED
7. SYN-SENT --> &lt;SEQ=400>&lt;CTL=SYN> --> 7. SYN-SENT --> <SEQ=400><CTL=SYN> -->
</artwork> ]]></artwork>
</figure> </figure>
<t> <t>
When the SYN arrives at line 3, TCP Peer B, being in a synchronized state, When the SYN arrives at line 3, TCP Peer B, being in a synchronized state,
and the incoming segment outside the window, responds with an and the incoming segment outside the window, responds with an
acknowledgment indicating what sequence it next expects to hear (ACK acknowledgment indicating what sequence it next expects to hear (ACK
100). TCP Peer A sees that this segment does not acknowledge anything it 100). TCP Peer A sees that this segment does not acknowledge anything it
sent and, being unsynchronized, sends a reset (RST) because it has sent and, being unsynchronized, sends a reset (RST) because it has
detected a half-open connection. TCP Peer B aborts at line 5. TCP Peer A wil l detected a half-open connection. TCP Peer B aborts at line 5. TCP Peer A wil l
continue to try to establish the connection; the problem is now continue to try to establish the connection; the problem is now
reduced to the basic 3-way handshake of <xref target="handshake" />. reduced to the basic three-way handshake of <xref target="handshake" format="d efault"/>.
</t> </t>
<t> <t>
An interesting alternative case occurs when TCP Peer A reboots and TCP Peer B An interesting alternative case occurs when TCP Peer A reboots and TCP Peer B
tries to send data on what it thinks is a synchronized connection. tries to send data on what it thinks is a synchronized connection.
This is illustrated in <xref target="crash" />. In this case, the data arrivi ng at This is illustrated in <xref target="crash" format="default"/>. In this case, the data arriving at
TCP Peer A from TCP Peer B (line 2) is unacceptable because no such connection TCP Peer A from TCP Peer B (line 2) is unacceptable because no such connection
exists, so TCP Peer A sends a RST. The RST is acceptable so TCP Peer B exists, so TCP Peer A sends a RST. The RST is acceptable so TCP Peer B
processes it and aborts the connection. processes it and aborts the connection.
</t> </t>
<figure anchor="crash" title="Active Side Causes Half-Open Connection Discover <figure anchor="crash">
y"> <name>Active Side Causes Half-Open Connection Discovery</name>
<artwork> <artwork name="" type="" align="left" alt=""><![CDATA[
TCP Peer A TCP Peer B TCP Peer A TCP Peer B
1. (REBOOT) (send 300,receive 100) 1. (REBOOT) (send 300,receive 100)
2. (??) <-- &lt;SEQ=300>&lt;ACK=100>&lt;DATA=10>&lt;CTL=ACK> &lt;-- ESTAB LISHED 2. (??) <-- <SEQ=300><ACK=100><DATA=10><CTL=ACK> <-- ESTABLISHED
3. --> &lt;SEQ=100>&lt;CTL=RST> --> (ABORT!!) 3. --> <SEQ=100><CTL=RST> --> (ABORT!!)
</artwork> ]]></artwork>
</figure> </figure>
<t> <t>
In <xref target="passive_reset" />, two TCP Peers A and B with passive connect In <xref target="passive_reset" format="default"/>, two TCP Peers A and B with
ions passive connections
waiting for SYN are depicted. An old duplicate arriving at TCP Peer B (line 2 ) stirs B waiting for SYN are depicted. An old duplicate arriving at TCP Peer B (line 2 ) stirs B
into action. A SYN-ACK is returned (line 3) and causes TCP A to into action. A SYN-ACK is returned (line 3) and causes TCP A to
generate a RST (the ACK in line 3 is not acceptable). TCP Peer B accepts generate a RST (the ACK in line 3 is not acceptable). TCP Peer B accepts
the reset and returns to its passive LISTEN state. the reset and returns to its passive LISTEN state.
</t> </t>
<figure anchor="passive_reset" title="Old Duplicate SYN Initiates a Reset on two <figure anchor="passive_reset">
Passive Sockets"> <name>Old Duplicate SYN Initiates a Reset on Two Passive Sockets</na
<artwork> me>
<artwork name="" type="" align="left" alt=""><![CDATA[
TCP Peer A TCP Peer B TCP Peer A TCP Peer B
1. LISTEN LISTEN 1. LISTEN LISTEN
2. ... <SEQ=Z>&lt;CTL=SYN> --> SYN-RECEIVED 2. ... <SEQ=Z>&lt;CTL=SYN> --> SYN-RECEIVED
3. (??) <-- &lt;SEQ=X>&lt;ACK=Z+1>&lt;CTL=SYN,ACK> &lt;-- SYN-RECEIVED 3. (??) <-- <SEQ=X><ACK=Z+1><CTL=SYN,ACK> &lt;-- SYN-RECEIVED
4. --> <SEQ=Z+1>&lt;CTL=RST> --> (return to LISTEN!) 4. --> <SEQ=Z+1>&lt;CTL=RST> --> (return to LISTEN!)
5. LISTEN LISTEN 5. LISTEN LISTEN
</artwork> ]]></artwork>
</figure> </figure>
<t> <t>
A variety of other cases are possible, all of which are accounted for A variety of other cases are possible, all of which are accounted for
by the following rules for RST generation and processing. by the following rules for RST generation and processing.
</t> </t>
</section> </section>
<section title="Reset Generation"> <section numbered="true" toc="default">
<t> <name>Reset Generation</name>
A TCP user or application can issue a reset on a connection at any time, thoug <t>
h reset events are also generated by the protocol itself when various error cond A TCP user or application can issue a reset on a connection at any time, thoug
itions occur, as described below. The side of a connection issuing a reset shou h reset events are also generated by the protocol itself when various error cond
ld enter the TIME-WAIT state, as this generally helps to reduce the load on busy itions occur, as described below. The side of a connection issuing a reset shou
servers for reasons described in <xref target="FTY99"/>. ld enter the TIME-WAIT state, as this generally helps to reduce the load on busy
servers for reasons described in <xref target="FTY99" format="default"/>.
</t> </t>
<t> <t>
As a general rule, reset (RST) is sent whenever a segment arrives As a general rule, reset (RST) is sent whenever a segment arrives
that apparently is not intended for the current connection. A reset that apparently is not intended for the current connection. A reset
must not be sent if it is not clear that this is the case. must not be sent if it is not clear that this is the case.
</t> </t>
<t> <t>
There are three groups of states: There are three groups of states:
</t> </t>
<t><list> <ol type="1" spacing="normal">
<t> <li>
1. If the connection does not exist (CLOSED) then a reset is sent <t>
If the connection does not exist (CLOSED), then a reset is sent
in response to any incoming segment except another reset. A SYN in response to any incoming segment except another reset. A SYN
segment that does not match an existing connection is rejected segment that does not match an existing connection is rejected
by this means. by this means.
</t> </t>
<t> <t>
If the incoming segment has the ACK bit set, the reset takes its If the incoming segment has the ACK bit set, the reset takes its
sequence number from the ACK field of the segment, otherwise the sequence number from the ACK field of the segment; otherwise, the
reset has sequence number zero and the ACK field is set to the sum reset has sequence number zero and the ACK field is set to the sum
of the sequence number and segment length of the incoming segment. of the sequence number and segment length of the incoming segment.
The connection remains in the CLOSED state. The connection remains in the CLOSED state.
</t> </t>
<t> </li>
2. If the connection is in any non-synchronized state (LISTEN, <li>
<t>
If the connection is in any non-synchronized state (LISTEN,
SYN-SENT, SYN-RECEIVED), and the incoming segment acknowledges SYN-SENT, SYN-RECEIVED), and the incoming segment acknowledges
something not yet sent (the segment carries an unacceptable ACK), or something not yet sent (the segment carries an unacceptable ACK), or
if an incoming segment has a security level or compartment <xref target="sec comp"/> that if an incoming segment has a security level or compartment (<xref target="se ccomp" format="default"/>) that
does not exactly match the level and compartment requested for the does not exactly match the level and compartment requested for the
connection, a reset is sent. connection, a reset is sent.
</t> </t>
<t> <t>
If the incoming segment has an ACK field, the reset takes its If the incoming segment has an ACK field, the reset takes its
sequence number from the ACK field of the segment, otherwise the sequence number from the ACK field of the segment; otherwise, the
reset has sequence number zero and the ACK field is set to the sum reset has sequence number zero and the ACK field is set to the sum
of the sequence number and segment length of the incoming segment. of the sequence number and segment length of the incoming segment.
The connection remains in the same state. The connection remains in the same state.
</t> </t>
<t> </li>
3. If the connection is in a synchronized state (ESTABLISHED, <li>
<t>
If the connection is in a synchronized state (ESTABLISHED,
FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT), FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT),
any unacceptable segment (out of window sequence number or any unacceptable segment (out-of-window sequence number or
unacceptable acknowledgment number) must be responded to with an empty unacceptable acknowledgment number) must be responded to with an empty
acknowledgment segment (without any user data) containing the current send-s equence number acknowledgment segment (without any user data) containing the current send s equence number
and an acknowledgment indicating the next sequence number expected and an acknowledgment indicating the next sequence number expected
to be received, and the connection remains in the same state. to be received, and the connection remains in the same state.
</t> </t>
<t> <t>
If an incoming segment has a security level or compartment If an incoming segment has a security level or compartment
that does not exactly match the level and compartment that does not exactly match the level and compartment
requested for the connection, a reset is sent and requested for the connection, a reset is sent and
the connection goes to the CLOSED state. The reset takes its sequence the connection goes to the CLOSED state. The reset takes its sequence
number from the ACK field of the incoming segment. number from the ACK field of the incoming segment.
</t> </t>
</list></t> </li>
</section> </ol>
<section title="Reset Processing"> </section>
<t> <section numbered="true" toc="default">
<name>Reset Processing</name>
<t>
In all states except SYN-SENT, all reset (RST) segments are validated In all states except SYN-SENT, all reset (RST) segments are validated
by checking their SEQ-fields. A reset is valid if its sequence number by checking their SEQ fields. A reset is valid if its sequence number
is in the window. In the SYN-SENT state (a RST received in response is in the window. In the SYN-SENT state (a RST received in response
to an initial SYN), the RST is acceptable if the ACK field to an initial SYN), the RST is acceptable if the ACK field
acknowledges the SYN. acknowledges the SYN.
</t> </t>
<t> <t>
The receiver of a RST first validates it, then changes state. If the The receiver of a RST first validates it, then changes state. If the
receiver was in the LISTEN state, it ignores it. If the receiver was receiver was in the LISTEN state, it ignores it. If the receiver was
in SYN-RECEIVED state and had previously been in the LISTEN state, in SYN-RECEIVED state and had previously been in the LISTEN state,
then the receiver returns to the LISTEN state, otherwise the receiver then the receiver returns to the LISTEN state; otherwise, the receiver
aborts the connection and goes to the CLOSED state. If the receiver aborts the connection and goes to the CLOSED state. If the receiver
was in any other state, it aborts the connection and advises the user was in any other state, it aborts the connection and advises the user
and goes to the CLOSED state. and goes to the CLOSED state.
</t> </t>
<t> <t>
TCP implementations SHOULD allow a received RST segment to include data (SHLD- TCP implementations <bcp14>SHOULD</bcp14> allow a received RST segment to incl
2). ude data (SHLD-2).
It has been suggested that a RST segment could contain diagnostic data that It has been suggested that a RST segment could contain diagnostic data that
explains the cause of the RST. No standard has yet been established for such data. explains the cause of the RST. No standard has yet been established for such data.
</t> </t>
</section> </section>
</section> </section>
<section title="Closing a Connection"> <section numbered="true" toc="default">
<t> <name>Closing a Connection</name>
CLOSE is an operation meaning &quot;I have no more data to send.&quot; The <t>
CLOSE is an operation meaning "I have no more data to send." The
notion of closing a full-duplex connection is subject to ambiguous notion of closing a full-duplex connection is subject to ambiguous
interpretation, of course, since it may not be obvious how to treat interpretation, of course, since it may not be obvious how to treat
the receiving side of the connection. We have chosen to treat CLOSE the receiving side of the connection. We have chosen to treat CLOSE
in a simplex fashion. The user who CLOSEs may continue to RECEIVE in a simplex fashion. The user who CLOSEs may continue to RECEIVE
until the TCP receiver is told that the remote peer has CLOSED also. Thus, a program until the TCP receiver is told that the remote peer has CLOSED also. Thus, a program
could initiate several SENDs followed by a CLOSE, and then continue to could initiate several SENDs followed by a CLOSE, and then continue to
RECEIVE until signaled that a RECEIVE failed because the remote peer RECEIVE until signaled that a RECEIVE failed because the remote peer
has CLOSED. The TCP implementation will signal a user, even if no has CLOSED. The TCP implementation will signal a user, even if no
RECEIVEs are outstanding, that the remote peer has closed, so the user RECEIVEs are outstanding, that the remote peer has closed, so the user
can terminate their side gracefully. A TCP implementation will reliably deliv er all can terminate their side gracefully. A TCP implementation will reliably deliv er all
buffers SENT before the connection was CLOSED so a user who expects no buffers SENT before the connection was CLOSED so a user who expects no
data in return need only wait to hear the connection was CLOSED data in return need only wait to hear the connection was CLOSED
successfully to know that all their data was received at the destination successfully to know that all their data was received at the destination
TCP endpoint. Users must keep reading connections they close for sending unti l TCP endpoint. Users must keep reading connections they close for sending unti l
the TCP implementation indicates there is no more data. the TCP implementation indicates there is no more data.
</t> </t>
<t> <t>
There are essentially three cases: There are essentially three cases:
</t> </t>
<t><list> <ol type="%d)" spacing="normal">
<t> <li>
1) The user initiates by telling the TCP implementation to CLOSE the connect The user initiates by telling the TCP implementation to CLOSE the connection
ion (TCP Peer A in <xref target="normal_close"/>). (TCP Peer A in <xref target="normal_close" format="default"/>).
</t> </li>
<t> <li>
2) The remote TCP endpoint initiates by sending a FIN control signal (TCP Pe The remote TCP endpoint initiates by sending a FIN control signal (TCP Peer
er B in <xref target="normal_close"/>). B in <xref target="normal_close" format="default"/>).
</t> </li>
<t> <li>
3) Both users CLOSE simultaneously (<xref target="simul_close"/>). Both users CLOSE simultaneously (<xref target="simul_close" format="default"
/>).
</li>
</ol>
<dl newline="false" spacing="normal">
<dt>Case 1:</dt>
<dd>
<t>
Local user initiates the close
</t> </t>
</list></t> <t>
<t><list style="hanging">
<t hangText="Case 1: Local user initiates the close"><vspace />
<vspace />
In this case, a FIN segment can be constructed and placed on the In this case, a FIN segment can be constructed and placed on the
outgoing segment queue. No further SENDs from the user will be outgoing segment queue. No further SENDs from the user will be
accepted by the TCP implementation, and it enters the FIN-WAIT-1 state. REC EIVEs accepted by the TCP implementation, and it enters the FIN-WAIT-1 state. REC EIVEs
are allowed in this state. All segments preceding and including FIN are allowed in this state. All segments preceding and including FIN
will be retransmitted until acknowledged. When the other TCP peer has will be retransmitted until acknowledged. When the other TCP peer has
both acknowledged the FIN and sent a FIN of its own, the first TCP peer both acknowledged the FIN and sent a FIN of its own, the first TCP peer
can ACK this FIN. Note that a TCP endpoint receiving a FIN will ACK but not can ACK this FIN. Note that a TCP endpoint receiving a FIN will ACK but not
send its own FIN until its user has CLOSED the connection also. send its own FIN until its user has CLOSED the connection also.
</t> </t>
<t hangText="Case 2: TCP endpoint receives a FIN from the network"><vspace /> </dd>
<vspace /> <dt>Case 2:</dt>
<dd>
<t>
TCP endpoint receives a FIN from the network
</t>
<t>
If an unsolicited FIN arrives from the network, the receiving TCP endpoint If an unsolicited FIN arrives from the network, the receiving TCP endpoint
can ACK it and tell the user that the connection is closing. The can ACK it and tell the user that the connection is closing. The
user will respond with a CLOSE, upon which the TCP endpoint can send a FIN t o user will respond with a CLOSE, upon which the TCP endpoint can send a FIN t o
the other TCP peer after sending any remaining data. The TCP endpoint then waits the other TCP peer after sending any remaining data. The TCP endpoint then waits
until its own FIN is acknowledged whereupon it deletes the until its own FIN is acknowledged whereupon it deletes the
connection. If an ACK is not forthcoming, after the user timeout connection. If an ACK is not forthcoming, after the user timeout
the connection is aborted and the user is told. the connection is aborted and the user is told.
</t> </t>
<t hangText="Case 3: Both users close simultaneously"><vspace /> </dd>
<vspace /> <dt>Case 3:</dt>
<dd>
<t>
Both users close simultaneously
</t>
<t>
A simultaneous CLOSE by users at both ends of a connection causes A simultaneous CLOSE by users at both ends of a connection causes
FIN segments to be exchanged (<xref target="simul_close"/>). When all segme nts preceding the FINs FIN segments to be exchanged (<xref target="simul_close" format="default"/>) . When all segments preceding the FINs
have been processed and acknowledged, each TCP peer can ACK the FIN it have been processed and acknowledged, each TCP peer can ACK the FIN it
has received. Both will, upon receiving these ACKs, delete the has received. Both will, upon receiving these ACKs, delete the
connection. connection.
</t> </t>
</list></t> </dd>
<figure anchor="normal_close" title="Normal Close Sequence"> </dl>
<artwork> <figure anchor="normal_close">
<name>Normal Close Sequence</name>
<artwork name="" type="" align="left" alt=""><![CDATA[
TCP Peer A TCP Peer B TCP Peer A TCP Peer B
1. ESTABLISHED ESTABLISHED 1. ESTABLISHED ESTABLISHED
2. (Close) 2. (Close)
FIN-WAIT-1 --> &lt;SEQ=100&gt;&lt;ACK=300&gt;&lt;CTL=FIN,ACK&gt; --&gt; CLOSE-WAIT FIN-WAIT-1 --> <SEQ=100><ACK=300><CTL=FIN,ACK> --> CLOSE-WAIT
3. FIN-WAIT-2 <-- &lt;SEQ=300&gt;&lt;ACK=101&gt;&lt;CTL=ACK&gt; &lt;-- CLOSE-WAIT 3. FIN-WAIT-2 <-- <SEQ=300><ACK=101><CTL=ACK> <-- CLOSE-WAIT
4. (Close) 4. (Close)
TIME-WAIT <-- &lt;SEQ=300&gt;&lt;ACK=101&gt;&lt;CTL=FIN,ACK&gt; &lt;-- LAST-ACK TIME-WAIT <-- <SEQ=300><ACK=101><CTL=FIN,ACK> <-- LAST-ACK
5. TIME-WAIT --> &lt;SEQ=101&gt;&lt;ACK=301&gt;&lt;CTL=ACK&gt; --&gt; CLOSED 5. TIME-WAIT --> <SEQ=101><ACK=301><CTL=ACK> --> CLOSED
6. (2 MSL) 6. (2 MSL)
CLOSED CLOSED
</artwork> ]]></artwork>
</figure> </figure>
<figure anchor="simul_close" title="Simultaneous Close Sequence"> <figure anchor="simul_close">
<artwork> <name>Simultaneous Close Sequence</name>
<artwork name="" type="" align="left" alt=""><![CDATA[
TCP Peer A TCP Peer B TCP Peer A TCP Peer B
1. ESTABLISHED ESTABLISHED 1. ESTABLISHED ESTABLISHED
2. (Close) (Close) 2. (Close) (Close)
FIN-WAIT-1 --&gt; &lt;SEQ=100&gt;&lt;ACK=300&gt;&lt;CTL=FIN,ACK&gt; ... FI FIN-WAIT-1 --> <SEQ=100><ACK=300><CTL=FIN,ACK> ... FIN-WAIT-1
N-WAIT-1 <-- <SEQ=300><ACK=100><CTL=FIN,ACK> <--
&lt;-- &lt;SEQ=300&gt;&lt;ACK=100&gt;&lt;CTL=FIN,ACK&gt; &lt;-- ... <SEQ=100><ACK=300><CTL=FIN,ACK> -->
... &lt;SEQ=100&gt;&lt;ACK=300&gt;&lt;CTL=FIN,ACK&gt; --&gt;
3. CLOSING --&gt; &lt;SEQ=101&gt;&lt;ACK=301&gt;&lt;CTL=ACK&gt; ... CL 3. CLOSING --> <SEQ=101><ACK=301><CTL=ACK> ... CLOSING
OSING <-- <SEQ=301><ACK=101><CTL=ACK> <--
&lt;-- &lt;SEQ=301&gt;&lt;ACK=101&gt;&lt;CTL=ACK&gt; &lt;-- ... <SEQ=101><ACK=301><CTL=ACK> -->
... &lt;SEQ=101&gt;&lt;ACK=301&gt;&lt;CTL=ACK&gt; --&gt;
4. TIME-WAIT TIME-WAIT 4. TIME-WAIT TIME-WAIT
(2 MSL) (2 MSL) (2 MSL) (2 MSL)
CLOSED CLOSED CLOSED CLOSED
</artwork> ]]></artwork>
</figure> </figure>
<t> <t>
A TCP connection may terminate in two ways: (1) the normal A TCP connection may terminate in two ways: (1) the normal
TCP close sequence using a FIN handshake (<xref target="normal_close "/>), and (2) an &quot;abort&quot; TCP close sequence using a FIN handshake (<xref target="normal_close " format="default"/>), and (2) an "abort"
in which one or more RST segments are sent and the in which one or more RST segments are sent and the
connection state is immediately discarded. If the local connection state is immediately discarded. If the local
TCP connection is closed by the remote side due to a FIN or TCP connection is closed by the remote side due to a FIN or
RST received from the remote side, then the local RST received from the remote side, then the local
application MUST be informed whether it closed normally or application <bcp14>MUST</bcp14> be informed whether it closed normal ly or
was aborted (MUST-12). was aborted (MUST-12).
</t> </t>
<t> <t>
</t> </t>
<section title="Half-Closed Connections"> <section numbered="true" toc="default">
<t> <name>Half-Closed Connections</name>
<t>
The normal TCP close sequence delivers buffered data The normal TCP close sequence delivers buffered data
reliably in both directions. Since the two directions of a reliably in both directions. Since the two directions of a
TCP connection are closed independently, it is possible for TCP connection are closed independently, it is possible for
a connection to be &quot;half closed,&quot; i.e., closed in only one a connection to be "half closed", i.e., closed in only one
direction, and a host is permitted to continue sending data direction, and a host is permitted to continue sending data
in the open direction on a half-closed connection. in the open direction on a half-closed connection.
</t> </t>
<t> <t>
A host MAY implement a &quot;half-duplex&quot; TCP close sequence, s A host <bcp14>MAY</bcp14> implement a "half-duplex" TCP close sequen
o ce, so
that an application that has called CLOSE cannot continue to that an application that has called CLOSE cannot continue to
read data from the connection (MAY-1). If such a host issues a read data from the connection (MAY-1). If such a host issues a
CLOSE call while received data is still pending in the TCP connectio n, or CLOSE call while received data is still pending in the TCP connectio n, or
if new data is received after CLOSE is called, its TCP implementatio n if new data is received after CLOSE is called, its TCP implementatio n
SHOULD send a RST to show that data was lost (SHLD-3). See <xref <bcp14>SHOULD</bcp14> send a RST to show that data was lost (SHLD-3).
target="RFC2525"/> section 2.17 for discussion. See <xref target="RFC2525" section="2.17" sectionFormat="comma" format="defaul
t"/> for discussion.
</t> </t>
<t> <t>
When a connection is closed actively, it MUST linger in the When a connection is closed actively, it <bcp14>MUST</bcp14> linger
in the
TIME-WAIT state for a time 2xMSL (Maximum Segment Lifetime) (MUST-13 ). TIME-WAIT state for a time 2xMSL (Maximum Segment Lifetime) (MUST-13 ).
However, it MAY accept a new SYN from the remote TCP endpoint to However, it <bcp14>MAY</bcp14> accept a new SYN from the remote TCP endpoint to
reopen the connection directly from TIME-WAIT state (MAY-2), if it: reopen the connection directly from TIME-WAIT state (MAY-2), if it:
<list> </t>
<t> <ol type="(%d)" spacing="normal">
(1) assigns its initial sequence number for the new <li>
assigns its initial sequence number for the new
connection to be larger than the largest sequence connection to be larger than the largest sequence
number it used on the previous connection incarnation, number it used on the previous connection incarnation,
and and
</t> </li>
<t> <li>
(2) returns to TIME-WAIT state if the SYN turns out to be returns to TIME-WAIT state if the SYN turns out to be
an old duplicate. an old duplicate.
</t> </li>
</list> </ol>
</t> <t>
<t>
When the TCP Timestamp options are available, an improved algorithm is When the TCP Timestamp Options are available, an improved algorithm is
described in <xref target="RFC6191"/> in order to support higher connection described in <xref target="RFC6191" format="default"/> in order to support highe
r connection
establishment rates. This algorithm for reducing TIME-WAIT is a Best Current establishment rates. This algorithm for reducing TIME-WAIT is a Best Current
Practice that SHOULD be implemented, since timestamp options are commonly used, Practice that <bcp14>SHOULD</bcp14> be implemented since Timestamp Options are c ommonly used,
and using them to reduce TIME-WAIT provides benefits for busy Internet servers ( SHLD-4). and using them to reduce TIME-WAIT provides benefits for busy Internet servers ( SHLD-4).
</t> </t>
</section> </section>
</section> </section>
<section numbered="true" toc="default">
<section title="Segmentation"> <name>Segmentation</name>
<t> The term &quot;segmentation&quot; refers to the activity TCP performs wh <t> The term "segmentation" refers to the activity TCP performs when ing
en ingesting a stream of bytes from a sending application and packetizing that s esting a stream of bytes from a sending application and packetizing that stream
tream of bytes into TCP segments. Individual TCP segments often do not correspo of bytes into TCP segments. Individual TCP segments often do not correspond one
nd one-for-one to individual send (or socket write) calls from the application. -for-one to individual send (or socket write) calls from the application. Appli
Applications may perform writes at the granularity of messages in the upper lay cations may perform writes at the granularity of messages in the upper-layer pro
er protocol, but TCP guarantees no boundary coherence between the TCP segments s tocol, but TCP guarantees no correlation between the boundaries of TCP segments
ent and received versus user application data read or write buffer boundaries. sent and received and the boundaries of the read or write buffers of user applic
In some specific protocols, such as Remote Direct Memory Access (RDMA) using Dir ation data. In some specific protocols, such as Remote Direct Memory Access (RD
ect Data Placement (DDP) and Marker PDU Aligned Framing (MPA) <xref target="RFC5 MA) using Direct Data Placement (DDP) and Marker PDU Aligned Framing (MPA) <xref
044"/>, there are performance optimizations possible when the relation between T target="RFC5044" format="default"/>, there are performance optimizations possib
CP segments and application data units can be controlled, and MPA includes a spe le when the relation between TCP segments and application data units can be cont
cific mechanism for detecting and verifying this relationship between TCP segmen rolled, and MPA includes a specific mechanism for detecting and verifying this r
ts and application message data structures, but this is specific to applications elationship between TCP segments and application message data structures, but th
like RDMA. In general, multiple goals influence the sizing of TCP segments cre is is specific to applications like RDMA. In general, multiple goals influence
ated by a TCP implementation.</t> the sizing of TCP segments created by a TCP implementation.</t>
<t>Goals driving the sending of larger segments include:
<t>Goals driving the sending of larger segments include: </t>
<list style="symbols"> <ul spacing="normal">
<t>Reducing the number of packets in flight within the network.</t> <li>Reducing the number of packets in flight within the network.</li>
<t>Increasing processing efficiency and potential performance by enabling <li>Increasing processing efficiency and potential performance by enab
a smaller number of interrupts and inter-layer interactions.</t> ling a smaller number of interrupts and inter-layer interactions.</li>
<t>Limiting the overhead of TCP headers.</t> <li>Limiting the overhead of TCP headers.</li>
</list> </ul>
</t> <t>Note that the performance benefits of sending larger segments may dec
<t>Note that the performance benefits of sending larger segments may decreas rease as the size increases, and there may be boundaries where advantages are re
e as the size increases, and there may be boundaries where advantages are revers versed. For instance, on some implementation architectures, 1025 bytes within a
ed. For instance, on some implementation architectures, 1025 bytes within a seg segment could lead to worse performance than 1024 bytes, due purely to data ali
ment could lead to worse performance than 1024 bytes, due purely to data alignme gnment on copy operations.</t>
nt on copy operations.</t> <t>Goals driving the sending of smaller segments include:
<t>Goals driving the sending of smaller segments include: </t>
<list style="symbols"> <ul spacing="normal">
<t>Avoiding sending a TCP segment that would result in an IP datagram larger <li>Avoiding sending a TCP segment that would result in an IP datagram
than the smallest MTU along an IP network path, because this results in either larger than the smallest MTU along an IP network path because this results in
packet loss or packet fragmentation. Making matters worse, some firewalls or m either packet loss or packet fragmentation. Making matters worse, some firewall
iddleboxes may drop fragmented packets or ICMP messages related to fragmentation s or middleboxes may drop fragmented packets or ICMP messages related to fragmen
.</t> tation.</li>
<t>Preventing delays to the application data stream, especially when TCP is <li>Preventing delays to the application data stream, especially when
waiting on the application to generate more data, or when the application is wai TCP is waiting on the application to generate more data, or when the application
ting on an event or input from its peer in order to generate more data.</t> is waiting on an event or input from its peer in order to generate more data.</
<t>Enabling &quot;fate sharing&quot; between TCP segments and lower-layer da li>
ta units (e.g. below IP, for links with cell or frame sizes smaller than the IP <li>Enabling "fate sharing" between TCP segments and lower-layer data
MTU).</t> units (e.g., below IP, for links with cell or frame sizes smaller than the IP MT
</list> U).</li>
</t> </ul>
<t>Towards meeting these competing sets of goals, TCP includes several m
<t>Towards meeting these competing sets of goals, TCP includes several mecha echanisms, including the Maximum Segment Size Option, Path MTU Discovery, the Na
nisms, including the Maximum Segment Size option, Path MTU Discovery, the Nagle gle algorithm, and support for IPv6 Jumbograms, as discussed in the following su
algorithm, and support for IPv6 Jumbograms, as discussed in the following subsec bsections.</t>
tions.</t> <section anchor="mss" numbered="true" toc="default">
<name>Maximum Segment Size Option</name>
<section title="Maximum Segment Size Option" anchor="mss"> <t>
<t> TCP endpoints <bcp14>MUST</bcp14> implement both sending and receiving the M
TCP endpoints MUST implement both sending and receiving the MSS option (MUST SS Option (MUST-14).
-14). </t>
</t> <t>
<t> TCP implementations <bcp14>SHOULD</bcp14> send an MSS Option in
TCP implementations SHOULD send an MSS option in
every SYN segment when its receive MSS differs from the every SYN segment when its receive MSS differs from the
default 536 for IPv4 or 1220 for IPv6 (SHLD-5), and MAY send it always (MAY- default 536 for IPv4 or 1220 for IPv6 (SHLD-5), and <bcp14>MAY</bcp14> send
3). it always (MAY-3).
</t> </t>
<t> <t>
If an MSS option is not received at connection setup, TCP implementations If an MSS Option is not received at connection setup, TCP implementations
MUST assume a default send MSS of 536 (576 - 40) for IPv4 or 1220 (1280 - 60 <bcp14>MUST</bcp14> assume a default send MSS of 536 (576 - 40) for IPv4 or
) for IPv6 (MUST-15). 1220 (1280 - 60) for IPv6 (MUST-15).
</t> </t>
<t> <t>
The maximum size of a segment that TCP endpoint really sends, the The maximum size of a segment that a TCP endpoint really sends, the
&quot;effective send MSS,&quot; MUST be the smaller (MUST-16) of the send MS "effective send MSS", <bcp14>MUST</bcp14> be the smaller (MUST-16) of the se
S nd MSS
(that reflects the available reassembly buffer size at the (that reflects the available reassembly buffer size at the
remote host, the EMTU_R <xref target="RFC1122"/>) and the largest transmissi remote host, the EMTU_R <xref target="RFC1122" format="default"/>) and the l
on size permitted by the IP layer (EMTU_S <xref target="RFC1122"/>): argest transmission size permitted by the IP layer (EMTU_S <xref target="RFC1122
<list style="hanging" hangIndent="4"> " format="default"/>):
<t>Eff.snd.MSS = </t>
<list style="hanging" hangIndent="4"> <t>
<t>min(SendMSS+20, MMS_S) - TCPhdrsize - IPoptionsize</t> Eff.snd.MSS = min(SendMSS+20, MMS_S) - TCPhdrsize - IPoptionsize
</list> </t>
</t> <t>
</list>
where: where:
<list style="symbols"> </t>
<t> <ul spacing="normal">
<li>
SendMSS is the MSS value received from the remote host, SendMSS is the MSS value received from the remote host,
or the default 536 for IPv4 or 1220 for IPv6, if no MSS option is receiv or the default 536 for IPv4 or 1220 for IPv6, if no MSS Option is receiv
ed. ed.
</t> </li>
<t> <li>
MMS_S is the maximum size for a transport-layer message MMS_S is the maximum size for a transport-layer message
that TCP may send. that TCP may send.
</t> </li>
<t> <li>
TCPhdrsize is the size of the fixed TCP header and any options. This is TCPhdrsize is the size of the fixed TCP header and any options. This is
20 in the (rare) case that no options are present, but may be larger if TCP opt 20 in the (rare) case that no options are present but may be larger if TCP Opti
ions are to be sent. Note that some options might not be included on all segmen ons are to be sent. Note that some options might not be included on all segment
ts, but that for each segment sent, the sender should adjust the data length acc s, but that for each segment sent, the sender should adjust the data length acco
ordingly, within the Eff.snd.MSS. rdingly, within the Eff.snd.MSS.
</t> </li>
<t> <li>
IPoptionsize is the size of any IPv4 options or IPv6 extension headers a ssociated with a TCP connection. Note that some options or extension headers mi ght not be included on all packets, but that for each segment sent, the sender s hould adjust the data length accordingly, within the Eff.snd.MSS. IPoptionsize is the size of any IPv4 options or IPv6 extension headers a ssociated with a TCP connection. Note that some options or extension headers mi ght not be included on all packets, but that for each segment sent, the sender s hould adjust the data length accordingly, within the Eff.snd.MSS.
</t> </li>
</list> </ul>
</t> <t>
<t> The MSS value to be sent in an MSS Option should be equal to the
The MSS value to be sent in an MSS option should be equal to the
effective MTU minus the fixed IP and TCP headers. By ignoring both effective MTU minus the fixed IP and TCP headers. By ignoring both
IP and TCP options when calculating the value for the MSS option, if IP and TCP Options when calculating the value for the MSS Option, if
there are any IP or TCP options to be sent in a packet, then the there are any IP or TCP Options to be sent in a packet, then the
sender must decrease the size of the TCP data accordingly. RFC 6691 <xref ta sender must decrease the size of the TCP data accordingly. RFC 6691 <xref ta
rget="RFC6691"/> rget="RFC6691" format="default"/>
discusses this in greater detail. discusses this in greater detail.
</t> </t>
<t> <t>
The MSS value to be sent in an MSS option must be less than The MSS value to be sent in an MSS Option must be less than
or equal to: or equal to:
<list> </t>
<t>MMS_R - 20</t> <t indent="3">
</list> MMS_R - 20
</t>
<t>
where MMS_R is the maximum size for a transport-layer where MMS_R is the maximum size for a transport-layer
message that can be received (and reassembled at the IP layer) (MUST-67). T CP obtains message that can be received (and reassembled at the IP layer) (MUST-67). T CP obtains
MMS_R and MMS_S from the IP layer; see the generic call MMS_R and MMS_S from the IP layer; see the generic call
GET_MAXSIZES in Section 3.4 of RFC 1122. These are defined in terms of thei GET_MAXSIZES in Section <xref target="RFC1122" section="3.4" sectionFormat="
r IP MTU equivalents, EMTU_R and EMTU_S <xref target="RFC1122"/>. bare" format="default"/> of RFC 1122. These are defined in terms of their IP MT
</t> U equivalents, EMTU_R and EMTU_S <xref target="RFC1122" format="default"/>.
<t> </t>
<t>
When TCP is used in a situation where either the IP or TCP headers When TCP is used in a situation where either the IP or TCP headers
are not fixed, the sender must reduce the amount of TCP data in are not fixed, the sender must reduce the amount of TCP data in
any given packet by the number of octets used by the IP and TCP any given packet by the number of octets used by the IP and TCP
options. This has been a point of confusion historically, as explained in R options. This has been a point of confusion historically, as explained in R
FC 6691, Section 3.1. FC 6691, Section <xref target="RFC6691" section="3.1" sectionFormat="bare" forma
</t> t="default"/>.
</section> </t>
<section title="Path MTU Discovery" anchor="pmtud"> </section>
<section anchor="pmtud" numbered="true" toc="default">
<t>A TCP implementation may be aware of the MTU on directly connected links, <name>Path MTU Discovery</name>
but will rarely have insight about MTUs across an entire network path. For IPv4 <t>A TCP implementation may be aware of the MTU on directly connected
, RFC 1122 recommends an IP-layer default effective MTU of less than or equal to links, but will rarely have insight about MTUs across an entire network path. F
576 for destinations not directly connected, and for IPv6 this would be 1280. or IPv4, RFC 1122 recommends an IP-layer default effective MTU of less than or e
Using these fixed values limits TCP connection performance and efficiency. Inst qual to 576 for destinations not directly connected, and for IPv6 this would be
ead, implementation of Path MTU Discovery (PMTUD) and Packetization Layer Path M 1280. Using these fixed values limits TCP connection performance and efficiency
TU Discovery (PLPMTUD) is strongly recommended in order for TCP to improve segme . Instead, implementation of Path MTU Discovery (PMTUD) and Packetization Layer
ntation decisions. Both PMTUD and PLPMTUD help TCP choose segment sizes that av Path MTU Discovery (PLPMTUD) is strongly recommended in order for TCP to improv
oid both on-path (for IPv4) and source fragmentation (IPv4 and IPv6).</t> e segmentation decisions. Both PMTUD and PLPMTUD help TCP choose segment sizes
that avoid both on-path (for IPv4) and source fragmentation (IPv4 and IPv6).</t>
<t>PMTUD for IPv4 <xref target="RFC1191"/> or IPv6 <xref target="RFC8201"/> i <t>PMTUD for IPv4 <xref target="RFC1191" format="default"/> or IPv6 <x
s implemented in conjunction between TCP, IP, and ICMP protocols. It relies bot ref target="RFC8201" format="default"/> is implemented in conjunction between TC
h on avoiding source fragmentation and setting the IPv4 DF (don't fragment) flag P, IP, and ICMP. It relies both on avoiding source fragmentation and setting th
, the latter to inhibit on-path fragmentation. It relies on ICMP errors from ro e IPv4 DF (don't fragment) flag, the latter to inhibit on-path fragmentation. I
uters along the path, whenever a segment is too large to traverse a link. Sever t relies on ICMP errors from routers along the path whenever a segment is too la
al adjustments to a TCP implementation with PMTUD are described in RFC 2923 in o rge to traverse a link. Several adjustments to a TCP implementation with PMTUD
rder to deal with problems experienced in practice <xref target="RFC2923"/>. PL are described in RFC 2923 in order to deal with problems experienced in practice
PMTUD <xref target="RFC4821"/> is a Standards Track improvement to PMTUD that re <xref target="RFC2923" format="default"/>. PLPMTUD <xref target="RFC4821" form
laxes the requirement for ICMP support across a path, and improves performance i at="default"/> is a Standards Track improvement to PMTUD that relaxes the requir
n cases where ICMP is not consistently conveyed, but still tries to avoid source ement for ICMP support across a path, and improves performance in cases where IC
fragmentation. The mechanisms in all four of these RFCs are recommended to be MP is not consistently conveyed, but still tries to avoid source fragmentation.
included in TCP implementations.</t> The mechanisms in all four of these RFCs are recommended to be included in TCP
implementations.</t>
<t> <t>
The TCP MSS option specifies an upper bound for the size of packets The TCP MSS Option specifies an upper bound for the size of packets
that can be received (see <xref target="RFC6691"/>). Hence, setting the valu that can be received (see <xref target="RFC6691" format="default"/>). Hence,
e in the MSS option too setting the value in the MSS Option too
small can impact the ability for PMTUD or PLPMTUD to find a larger small can impact the ability for PMTUD or PLPMTUD to find a larger
path MTU. RFC 1191 discusses this implication of many older TCP implementati ons setting the TCP MSS to 536 (corresponding to the IPv4 576 byte default MTU) for non-local destinations, rather than deriving it from the MTUs of connected i nterfaces as recommended. path MTU. RFC 1191 discusses this implication of many older TCP implementati ons setting the TCP MSS to 536 (corresponding to the IPv4 576 byte default MTU) for non-local destinations, rather than deriving it from the MTUs of connected i nterfaces as recommended.
</t> </t>
</section>
</section> <section numbered="true" toc="default">
<section title="Interfaces with Variable MTU Values"> <name>Interfaces with Variable MTU Values</name>
<t> <t>
The effective MTU can sometimes vary, as when used with variable The effective MTU can sometimes vary, as when used with variable
compression, e.g., RObust Header Compression (ROHC) <xref target="RFC5795"/>. It is compression, e.g., RObust Header Compression (ROHC) <xref target="RFC5795" fo rmat="default"/>. It is
tempting for a TCP implementation to advertise the largest possible MSS, to tempting for a TCP implementation to advertise the largest possible MSS, to
support the most efficient use of compressed payloads. support the most efficient use of compressed payloads.
Unfortunately, some compression schemes occasionally need to transmit Unfortunately, some compression schemes occasionally need to transmit
full headers (and thus smaller payloads) to resynchronize state at full headers (and thus smaller payloads) to resynchronize state at
their endpoint compressors/decompressors. If the largest MTU is used their endpoint compressors/decompressors. If the largest MTU is used
to calculate the value to advertise in the MSS option, TCP to calculate the value to advertise in the MSS Option, TCP
retransmission may interfere with compressor resynchronization. retransmission may interfere with compressor resynchronization.
</t> </t>
<t> <t>
As a result, when the effective MTU of an interface varies packet-to-packet, TCP implementations As a result, when the effective MTU of an interface varies packet-to-packet, TCP implementations
SHOULD use the smallest effective MTU of the interface to calculate <bcp14>SHOULD</bcp14> use the smallest effective MTU of the interface to calc
the value to advertise in the MSS option (SHLD-6). ulate
</t> the value to advertise in the MSS Option (SHLD-6).
</section> </t>
<section title="Nagle Algorithm" anchor="nagle"> </section>
<t>The &quot;Nagle algorithm&quot; was described in RFC 896 <xref target="RF <section anchor="nagle" numbered="true" toc="default">
C0896"/> and was recommended in RFC 1122 <xref target="RFC1122"/> for mitigation <name>Nagle Algorithm</name>
of an early problem of too many small packets being generated. It has been imp <t>The "Nagle algorithm" was described in RFC 896 <xref target="RFC089
lemented in most current TCP code bases, sometimes with minor variations (see <x 6" format="default"/> and was recommended in RFC 1122 <xref target="RFC1122" for
ref target="minshall"/>).</t> mat="default"/> for mitigation of an early problem of too many small packets bei
<t>If there is unacknowledged data (i.e., SND.NXT &gt; SND.UNA), then the se ng generated. It has been implemented in most current TCP code bases, sometimes
nding TCP endpoint buffers all user data (regardless of the PSH bit), until the with minor variations (see <xref target="minshall" format="default"/>).</t>
outstanding data has been acknowledged or until the TCP endpoint can send a full <t>If there is unacknowledged data (i.e., SND.NXT &gt; SND.UNA), then
-sized segment (Eff.snd.MSS bytes).</t> the sending TCP endpoint buffers all user data (regardless of the PSH bit) until
<t>A TCP implementation SHOULD implement the Nagle Algorithm to coalesce sho the outstanding data has been acknowledged or until the TCP endpoint can send a
rt segments (SHLD-7). However, there MUST be a way for an application to disabl full-sized segment (Eff.snd.MSS bytes).</t>
e the Nagle algorithm on an individual connection (MUST-17). In all cases, send <t>A TCP implementation <bcp14>SHOULD</bcp14> implement the Nagle algo
ing data is also subject to the limitation imposed by the Slow Start algorithm < rithm to coalesce short segments (SHLD-7). However, there <bcp14>MUST</bcp14> b
xref target="RFC5681"/>.</t> e a way for an application to disable the Nagle algorithm on an individual conne
<t> ction (MUST-17). In all cases, sending data is also subject to the limitation i
Since there can be problematic interactions between the Nagle Algorithm and mposed by the slow start algorithm <xref target="RFC5681" format="default"/>.</t
delayed acknowledgements, some implementations use minor variations of the Nagle >
algorithm, such as the one described in <xref target="minshall"/>. <t>
</t> Since there can be problematic interactions between the Nagle algorithm and
</section> delayed acknowledgments, some implementations use minor variations of the Nagle
<section title="IPv6 Jumbograms"> algorithm, such as the one described in <xref target="minshall" format="default"
<t> />.
</t>
</section>
<section numbered="true" toc="default">
<name>IPv6 Jumbograms</name>
<t>
In order to support TCP over IPv6 Jumbograms, implementations need to In order to support TCP over IPv6 Jumbograms, implementations need to
be able to send TCP segments larger than the 64KB limit that the MSS option c an convey. RFC 2675 <xref target="RFC2675"/> be able to send TCP segments larger than the 64-KB limit that the MSS Option can convey. RFC 2675 <xref target="RFC2675" format="default"/>
defines that an MSS value of 65,535 bytes is to be treated as infinity, and P ath defines that an MSS value of 65,535 bytes is to be treated as infinity, and P ath
MTU Discovery <xref target="RFC8201"/> is used to determine the actual MSS. MTU Discovery <xref target="RFC8201" format="default"/> is used to determine
</t> the actual MSS.
<t> </t>
The Jumbo Payload option need not be implemented or understood by IPv6 nodes <t>
that do not support attachment to links with a MTU greater than 65,575 <xref tar The Jumbo Payload Option need not be implemented or understood by IPv6 nodes
get="RFC2675"/>, and the present IPv6 Node Requirements does not include support that do not support attachment to links with an MTU greater than 65,575 <xref ta
for Jumbograms <xref target="RFC8504"/>. rget="RFC2675" format="default"/>, and the present IPv6 Node Requirements does n
</t> ot include support for Jumbograms <xref target="RFC8504" format="default"/>.
</section> </t>
</section> </section>
<section title="Data Communication" anchor="datacomm"> </section>
<t> <section anchor="datacomm" numbered="true" toc="default">
Once the connection is established data is communicated by the <name>Data Communication</name>
<t>
Once the connection is established, data is communicated by the
exchange of segments. Because segments may be lost due to errors exchange of segments. Because segments may be lost due to errors
(checksum test failure), or network congestion, TCP uses (checksum test failure) or network congestion, TCP uses
retransmission to ensure delivery of every segment. retransmission to ensure delivery of every segment.
Duplicate segments may arrive due to network or TCP retransmission. Duplicate segments may arrive due to network or TCP retransmission.
As discussed in the section on sequence numbers, the TCP implementation perfor ms As discussed in the section on sequence numbers (<xref target="sequence-number s"/>), the TCP implementation performs
certain tests on the sequence and acknowledgment numbers in the certain tests on the sequence and acknowledgment numbers in the
segments to verify their acceptability. segments to verify their acceptability.
</t> </t>
<t> <t>
The sender of data keeps track of the next sequence number to use in The sender of data keeps track of the next sequence number to use in
the variable SND.NXT. The receiver of data keeps track of the next the variable SND.NXT. The receiver of data keeps track of the next
sequence number to expect in the variable RCV.NXT. The sender of data sequence number to expect in the variable RCV.NXT. The sender of data
keeps track of the oldest unacknowledged sequence number in the keeps track of the oldest unacknowledged sequence number in the
variable SND.UNA. If the data flow is momentarily idle and all data variable SND.UNA. If the data flow is momentarily idle and all data
sent has been acknowledged then the three variables will be equal. sent has been acknowledged, then the three variables will be equal.
</t> </t>
<t> <t>
When the sender creates a segment and transmits it the sender advances When the sender creates a segment and transmits it, the sender advances
SND.NXT. When the receiver accepts a segment it advances RCV.NXT and SND.NXT. When the receiver accepts a segment, it advances RCV.NXT and
sends an acknowledgment. When the data sender receives an sends an acknowledgment. When the data sender receives an
acknowledgment it advances SND.UNA. The extent to which the values of acknowledgment, it advances SND.UNA. The extent to which the values of
these variables differ is a measure of the delay in the communication. these variables differ is a measure of the delay in the communication.
The amount by which the variables are advanced is the length of the The amount by which the variables are advanced is the length of the
data and SYN or FIN flags in the segment. Note that once in the ESTABLISHED s tate all data and SYN or FIN flags in the segment. Note that, once in the ESTABLISHED state, all
segments must carry current acknowledgment information. segments must carry current acknowledgment information.
</t> </t>
<t> <t>
The CLOSE user call implies a push function (see <xref target="user-api"/>), a The CLOSE user call implies a push function (see <xref target="user-api" forma
s does the FIN control t="default"/>), as does the FIN control
flag in an incoming segment. flag in an incoming segment.
</t> </t>
<section title="Retransmission Timeout" anchor="RTO"> <section anchor="RTO" numbered="true" toc="default">
<t> <name>Retransmission Timeout</name>
<t>
Because of the variability of the networks that compose an Because of the variability of the networks that compose an
internetwork system and the wide range of uses of TCP connections the internetwork system and the wide range of uses of TCP connections, the
retransmission timeout (RTO) must be dynamically determined. retransmission timeout (RTO) must be dynamically determined.
</t> </t>
<t> <t>
The RTO MUST be computed according to the The RTO <bcp14>MUST</bcp14> be computed according to the
algorithm in <xref target="RFC6298"/>, including Karn's algorithm for taki algorithm in <xref target="RFC6298" format="default"/>, including Karn's a
ng RTT samples (MUST-18). lgorithm for taking RTT samples (MUST-18).
</t> </t>
<t> <t>
RFC 793 contains an early example procedure for computing the RTO, based o RFC 793 contains an early example procedure for computing the RTO, based o
n work mentioned in IEN 177 <xref target="IEN177"/>. This was then replaced by n work mentioned in IEN 177 <xref target="IEN177" format="default"/>. This was
the algorithm described in RFC 1122, and subsequently updated in RFC 2988, and t then replaced by the algorithm described in RFC 1122, which was subsequently upd
hen again in RFC 6298. ated in RFC 2988 and then again in RFC 6298.
</t> </t>
<t> <t>
RFC 1122 allows that if a retransmitted packet is identical to the original RFC 1122 allows that if a retransmitted packet is identical to the original
packet (which implies not only that the data boundaries have not changed, but packet (which implies not only that the data boundaries have not changed, but
also that none of the headers have changed), then the same IPv4 Identification also that none of the headers have changed), then the same IPv4 Identification
field MAY be used (see Section 3.2.1.5 of RFC 1122) (MAY-4). The same IP field <bcp14>MAY</bcp14> be used (see Section <xref target="RFC1122" section="3.
identification field may be reused anyways, since it is only meaningful when a 2.1.5" sectionFormat="bare" format="default"/> of RFC 1122) (MAY-4). The same I
datagram is fragmented <xref target="RFC6864"/>. TCP implementations should not P
rely on or typically Identification field may be reused anyways since it is only meaningful when a
datagram is fragmented <xref target="RFC6864" format="default"/>. TCP implement
ations should not rely on or typically
interact with this IPv4 header field in any way. It is not a reasonable way to interact with this IPv4 header field in any way. It is not a reasonable way to
either indicate duplicate sent segments, nor to identify duplicate received indicate duplicate sent segments nor to identify duplicate received
segments. segments.
</t> </t>
</section> </section>
<section title="TCP Congestion Control"> <section numbered="true" toc="default">
<t>RFC 2914 <xref target="RFC2914"/> explains the importance of congestion contr <name>TCP Congestion Control</name>
ol for the Internet.</t> <t>RFC 2914 <xref target="RFC2914" format="default"/> explains the imp
ortance of congestion control for the Internet.</t>
<t>RFC 1122 required implementation of Van Jacobson's congestion control algorit <t>RFC 1122 required implementation of Van Jacobson's congestion contr
hms slow start and congestion avoidance together with exponential back-off for s ol algorithms slow start and congestion avoidance together with exponential back
uccessive RTO values for the same segment. RFC 2581 provided IETF Standards Tra off for successive RTO values for the same segment. RFC 2581 provided IETF Stan
ck description of slow start and congestion avoidance, along with fast retransmi dards Track description of slow start and congestion avoidance, along with fast
t and fast recovery. RFC 5681 is the current description of these algorithms an retransmit and fast recovery. RFC 5681 is the current description of these algo
d is the current Standards Track specification providing guidelines for TCP cong rithms and is the current Standards Track specification providing guidelines for
estion control. RFC 6298 describes exponential back-off of RTO values, including TCP congestion control. RFC 6298 describes exponential backoff of RTO values, i
keeping the backed-off value until a subsequent segment with new data has been ncluding keeping the backed-off value until a subsequent segment with new data h
sent and acknowledged without retransmission.</t> as been sent and acknowledged without retransmission.</t>
<t>A TCP endpoint <bcp14>MUST</bcp14> implement the basic congestion c
<t>A TCP endpoint MUST implement the basic congestion control algorithms slow st ontrol algorithms slow start, congestion avoidance, and exponential backoff of R
art, congestion avoidance, and exponential back-off of RTO to avoid creating con TO to avoid creating congestion collapse conditions (MUST-19). RFC 5681 and RFC
gestion collapse conditions (MUST-19). RFC 5681 and RFC 6298 describe the basic 6298 describe the basic algorithms on the IETF Standards Track that are broadly
algorithms on the IETF Standards Track that are broadly applicable. Multiple o applicable. Multiple other suitable algorithms exist and have been widely used
ther suitable algorithms exist and have been widely used. Many TCP implementati . Many TCP implementations support a set of alternative algorithms that can be
ons support a set of alternative algorithms that can be configured for use on th configured for use on the endpoint. An endpoint <bcp14>MAY</bcp14> implement su
e endpoint. An endpoint MAY implement such alternative algorithms provided that ch alternative algorithms provided that the algorithms are conformant with the T
the algorithms are conformant with the TCP specifications from the IETF Standar CP specifications from the IETF Standards Track as described in RFC 2914, RFC 50
ds Track as described in RFC 2914, RFC 5033 <xref target="RFC5033"/>, and RFC 89 33 <xref target="RFC5033" format="default"/>, and RFC 8961 <xref target="RFC8961
61 <xref target="RFC8961"/> (MAY-18).</t> " format="default"/> (MAY-18).</t>
<t>Explicit Congestion Notification (ECN) was defined in RFC 3168 and
<t>Explicit Congestion Notification (ECN) was defined in RFC 3168 and is an IETF is an IETF Standards Track enhancement that has many benefits <xref target="RFC8
Standards Track enhancement that has many benefits <xref target="RFC8087"/>.</t 087" format="default"/>.</t>
> <t>A TCP endpoint <bcp14>SHOULD</bcp14> implement ECN as described in
RFC 3168 (SHLD-8).</t>
<t>A TCP endpoint SHOULD implement ECN as described in RFC 3168 (SHLD-8).</t> </section>
<section anchor="connfail" numbered="true" toc="default">
</section> <name>TCP Connection Failures</name>
<section title="TCP Connection Failures" anchor="connfail"> <t>
<t>
Excessive retransmission of the same segment by a TCP endpoint Excessive retransmission of the same segment by a TCP endpoint
indicates some failure of the remote host or the Internet indicates some failure of the remote host or the internetwork
path. This failure may be of short or long duration. The path. This failure may be of short or long duration. The
following procedure MUST be used to handle excessive following procedure <bcp14>MUST</bcp14> be used to handle excessive
retransmissions of data segments (MUST-20): retransmissions of data segments (MUST-20):
</t> </t>
<t> <ol type="(%c)" spacing="normal">
<list> <li>
<t> There are two thresholds R1 and R2 measuring the amount
(a) There are two thresholds R1 and R2 measuring the amount
of retransmission that has occurred for the same of retransmission that has occurred for the same
segment. R1 and R2 might be measured in time units or segment. R1 and R2 might be measured in time units or
as a count of retransmissions (with the current RTO and as a count of retransmissions (with the current RTO and
corresponding backoffs as a conversion factor, if needed). corresponding backoffs as a conversion factor, if needed).
</t> </li>
<t> <li>
(b) When the number of transmissions of the same segment When the number of transmissions of the same segment
reaches or exceeds threshold R1, pass negative advice reaches or exceeds threshold R1, pass negative advice
(see Section 3.3.1.4 of <xref target="RFC1122"/>) to the IP lay er, to trigger (see <xref target="RFC1122" section="3.3.1.4" sectionFormat="of " format="default"/>) to the IP layer, to trigger
dead-gateway diagnosis. dead-gateway diagnosis.
</t> </li>
<t> <li>
(c) When the number of transmissions of the same segment When the number of transmissions of the same segment
reaches a threshold R2 greater than R1, close the reaches a threshold R2 greater than R1, close the
connection. connection.
</t> </li>
<t> <li>
(d) An application MUST (MUST-21) be able to set the value for R2 f An application <bcp14>MUST</bcp14> (MUST-21) be able to set the
or value for R2 for
a particular connection. For example, an interactive a particular connection. For example, an interactive
application might set R2 to "infinity," giving the user application might set R2 to "infinity", giving the user
control over when to disconnect. control over when to disconnect.
</t> </li>
<t> <li>
(e) TCP implementations SHOULD inform the application of the delive TCP implementations <bcp14>SHOULD</bcp14> inform the applicatio
ry n of the delivery
problem (unless such information has been disabled by problem (unless such information has been disabled by
the application; see Asynchronous Reports section), when R1 is the application; see the "Asynchronous Reports" section (<xref target="asynchronous-reports"/>)), when R1 is
reached and before R2 (SHLD-9). This will allow a remote login reached and before R2 (SHLD-9). This will allow a remote login
application program to inform the user, application program to inform the user,
for example. for example.
</t> </li>
</list></t> </ol>
<t> <t>
The value of R1 SHOULD correspond to at least 3 The value of R1 <bcp14>SHOULD</bcp14> correspond to at least 3
retransmissions, at the current RTO (SHLD-10). The value of R2 SHOU retransmissions, at the current RTO (SHLD-10). The value of R2 <bcp
LD 14>SHOULD</bcp14>
correspond to at least 100 seconds (SHLD-11). correspond to at least 100 seconds (SHLD-11).
</t> </t>
<t> <t>
An attempt to open a TCP connection could fail with An attempt to open a TCP connection could fail with
excessive retransmissions of the SYN segment or by receipt excessive retransmissions of the SYN segment or by receipt
of a RST segment or an ICMP Port Unreachable. SYN of a RST segment or an ICMP Port Unreachable. SYN
retransmissions MUST be handled in the general way just retransmissions <bcp14>MUST</bcp14> be handled in the general way ju st
described for data retransmissions, including notification described for data retransmissions, including notification
of the application layer. of the application layer.
</t> </t>
<t> <t>
However, the values of R1 and R2 may be different for SYN However, the values of R1 and R2 may be different for SYN
and data segments. In particular, R2 for a SYN segment MUST and data segments. In particular, R2 for a SYN segment <bcp14>MUST< /bcp14>
be set large enough to provide retransmission of the segment be set large enough to provide retransmission of the segment
for at least 3 minutes (MUST-23). The application can close the for at least 3 minutes (MUST-23). The application can close the
connection (i.e., give up on the open attempt) sooner, of connection (i.e., give up on the open attempt) sooner, of
course. course.
</t> </t>
</section> </section>
<section title="TCP Keep-Alives"> <section numbered="true" toc="default">
<t> <name>TCP Keep-Alives</name>
<t>
A TCP connection is said to be &quot;idle&quot; if for some long A TCP connection is said to be "idle" if for some long
amount of time there have been no incoming segments received and amount of time there have been no incoming segments received and
there is no new or unacknowledged data to be sent. there is no new or unacknowledged data to be sent.
</t> </t>
<t> <t>
Implementors MAY include "keep-alives" in their TCP implementations Implementers <bcp14>MAY</bcp14> include "keep-alives" in their TCP im
plementations
(MAY-5), although this practice is not universally accepted. Some (MAY-5), although this practice is not universally accepted. Some
TCP implementations, however, have included a keep-alive mechanism. TCP implementations, however, have included a keep-alive mechanism.
To confirm that an idle connection is still active, these To confirm that an idle connection is still active, these
implementations send a probe segment designed to elicit a response implementations send a probe segment designed to elicit a response
from the TCP peer. Such a segment generally contains SEG.SEQ = from the TCP peer. Such a segment generally contains SEG.SEQ =
SND.NXT-1 and may or may not contain one garbage octet of data. SND.NXT-1 and may or may not contain one garbage octet of data.
If keep-alives are included, the application MUST be able to turn If keep-alives are included, the application <bcp14>MUST</bcp14> be a
them on or off for each TCP connection (MUST-24), and they MUST ble to turn
them on or off for each TCP connection (MUST-24), and they <bcp14>MUS
T</bcp14>
default to off (MUST-25). default to off (MUST-25).
</t> </t>
<t> <t>
Keep-alive packets MUST only be sent when no sent data is outstandin Keep-alive packets <bcp14>MUST</bcp14> only be sent when no sent dat
g, a is outstanding,
and no data or and no data or
acknowledgement packets have been received for the acknowledgment packets have been received for the
connection within an interval (MUST-26). This interval MUST be connection within an interval (MUST-26). This interval <bcp14>MUST<
configurable (MUST-27) and MUST default to no less than two hours (M /bcp14> be
UST-28). configurable (MUST-27) and <bcp14>MUST</bcp14> default to no less th
an two hours (MUST-28).
</t> </t>
<t> <t>
It is extremely important to remember that ACK segments that It is extremely important to remember that ACK segments that
contain no data are not reliably transmitted by TCP. contain no data are not reliably transmitted by TCP.
Consequently, if a keep-alive mechanism is implemented it Consequently, if a keep-alive mechanism is implemented it
MUST NOT interpret failure to respond to any specific probe <bcp14>MUST NOT</bcp14> interpret failure to respond to any specific probe
as a dead connection (MUST-29). as a dead connection (MUST-29).
</t> </t>
<t> <t>
An implementation SHOULD send a keep-alive segment with no An implementation <bcp14>SHOULD</bcp14> send a keep-alive segment wi
data (SHLD-12); however, it MAY be configurable to send a keep-alive th no
data (SHLD-12); however, it <bcp14>MAY</bcp14> be configurable to se
nd a keep-alive
segment containing one garbage octet (MAY-6), for compatibility with segment containing one garbage octet (MAY-6), for compatibility with
erroneous TCP implementations. erroneous TCP implementations.
</t> </t>
</section> </section>
<section title="The Communication of Urgent Information" anchor="urgent"> <section anchor="urgent" numbered="true" toc="default">
<t> <name>The Communication of Urgent Information</name>
As a result of implementation differences and middlebox interactions, new appl <t>
ications SHOULD NOT employ the TCP urgent mechanism (SHLD-13). However, TCP imp As a result of implementation differences and middlebox interactions, new appl
lementations MUST still include support for the urgent mechanism (MUST-30). Inf ications <bcp14>SHOULD NOT</bcp14> employ the TCP urgent mechanism (SHLD-13). H
ormation on how some TCP implementations interpret the urgent pointer can be fou owever, TCP implementations <bcp14>MUST</bcp14> still include support for the ur
nd in RFC 6093 <xref target="RFC6093"/>. gent mechanism (MUST-30). Information on how some TCP implementations interpret
the urgent pointer can be found in RFC 6093 <xref target="RFC6093" format="defa
ult"/>.
</t> </t>
<t> <t>
The objective of the TCP urgent mechanism is to allow the sending user The objective of the TCP urgent mechanism is to allow the sending user
to stimulate the receiving user to accept some urgent data and to to stimulate the receiving user to accept some urgent data and to
permit the receiving TCP endpoint to indicate to the receiving user when all permit the receiving TCP endpoint to indicate to the receiving user when all
the currently known urgent data has been received by the user. the currently known urgent data has been received by the user.
</t> </t>
<t> <t>
This mechanism permits a point in the data stream to be designated as This mechanism permits a point in the data stream to be designated as
the end of urgent information. Whenever this point is in advance of the end of urgent information. Whenever this point is in advance of
the receive sequence number (RCV.NXT) at the receiving TCP endpoint, that TCP the receive sequence number (RCV.NXT) at the receiving TCP endpoint, then the
must tell the user to go into &quot;urgent mode&quot;; when the receive sequen TCP implementation
ce must tell the user to go into "urgent mode"; when the receive sequence
number catches up to the urgent pointer, the TCP implementation must tell user to go number catches up to the urgent pointer, the TCP implementation must tell user to go
into &quot;normal mode&quot;. If the urgent pointer is updated while the user into "normal mode". If the urgent pointer is updated while the user
is in &quot;urgent mode&quot;, the update will be invisible to the user. is in "urgent mode", the update will be invisible to the user.
</t> </t>
<t> <t>
The method employs an urgent field that is carried in all segments The method employs an urgent field that is carried in all segments
transmitted. The URG control flag indicates that the urgent field is transmitted. The URG control flag indicates that the urgent field is
meaningful and must be added to the segment sequence number to yield meaningful and must be added to the segment sequence number to yield
the urgent pointer. The absence of this flag indicates that there is the urgent pointer. The absence of this flag indicates that there is
no urgent data outstanding. no urgent data outstanding.
</t> </t>
<t> <t>
To send an urgent indication the user must also send at least one data To send an urgent indication, the user must also send at least one data
octet. If the sending user also indicates a push, timely delivery of octet. If the sending user also indicates a push, timely delivery of
the urgent information to the destination process is enhanced. Note that beca the urgent information to the destination process is enhanced. Note that beca
use changes in the urgent pointer correspond to data being written by a sending use changes in the urgent pointer correspond to data being written by a sending
application, the urgent pointer can not &quot;recede&quot; in the sequence space application, the urgent pointer cannot "recede" in the sequence space, but a TCP
, but a TCP receiver should be robust to invalid urgent pointer values. receiver should be robust to invalid urgent pointer values.
</t>
<t>
A TCP implementation MUST support a sequence of urgent data of any length (MUS
T-31). <xref target="RFC1122"/>
</t> </t>
<t> <t>
The urgent pointer MUST point to the sequence number of the octet following the A TCP implementation <bcp14>MUST</bcp14> support a sequence of urgent data of
urgent data (MUST-62). any length (MUST-31) <xref target="RFC1122" format="default"/>.
</t>
<t>
The urgent pointer <bcp14>MUST</bcp14> point to the sequence number of the octet
following the urgent data (MUST-62).
</t> </t>
<t> <t>
A TCP implementation MUST (MUST-32) inform the application layer asynchronousl A TCP implementation <bcp14>MUST</bcp14> (MUST-32) inform the application laye
y whenever it receives an Urgent pointer and there was previously no pending urg r asynchronously whenever it receives an urgent pointer and there was previously
ent data, or whenever the Urgent pointer advances in the data stream. The TCP i no pending urgent data, or whenever the urgent pointer advances in the data str
mplementation MUST (MUST-33) provide a way for the application to learn how much eam. The TCP implementation <bcp14>MUST</bcp14> (MUST-33) provide a way for the
urgent data remains to be read from the connection, or at least to determine wh application to learn how much urgent data remains to be read from the connectio
ether more urgent data remains to be read <xref target="RFC1122"/>. n, or at least to determine whether more urgent data remains to be read <xref ta
rget="RFC1122" format="default"/>.
</t> </t>
</section> </section>
<section title="Managing the Window"> <section numbered="true" toc="default">
<t> <name>Managing the Window</name>
<t>
The window sent in each segment indicates the range of sequence The window sent in each segment indicates the range of sequence
numbers the sender of the window (the data receiver) is currently numbers the sender of the window (the data receiver) is currently
prepared to accept. There is an assumption that this is related to prepared to accept. There is an assumption that this is related to
the currently available data buffer space available for this the data buffer space currently available for this
connection. connection.
</t> </t>
<t> <t>
The sending TCP endpoint packages the data to be transmitted into segments The sending TCP endpoint packages the data to be transmitted into segments
that fit the current window, and may repackage segments on the that fit the current window, and may repackage segments on the
retransmission queue. Such repackaging is not required, but may be retransmission queue. Such repackaging is not required but may be
helpful. helpful.
</t> </t>
<t> <t>
In a connection with a one-way data flow, the window information will In a connection with a one-way data flow, the window information will
be carried in acknowledgment segments that all have the same sequence be carried in acknowledgment segments that all have the same sequence
number, so there will be no way to reorder them if they arrive out of number, so there will be no way to reorder them if they arrive out of
order. This is not a serious problem, but it will allow the window order. This is not a serious problem, but it will allow the window
information to be on occasion temporarily based on old reports from information to be on occasion temporarily based on old reports from
the data receiver. A refinement to avoid this problem is to act on the data receiver. A refinement to avoid this problem is to act on
the window information from segments that carry the highest the window information from segments that carry the highest
acknowledgment number (that is segments with acknowledgment number acknowledgment number (that is, segments with an acknowledgment number
equal or greater than the highest previously received). equal to or greater than the highest previously received).
</t> </t>
<t> <t>
Indicating a large window encourages transmissions. If more data Indicating a large window encourages transmissions. If more data
arrives than can be accepted, it will be discarded. This will result arrives than can be accepted, it will be discarded. This will result
in excessive retransmissions, adding unnecessarily to the load on the in excessive retransmissions, adding unnecessarily to the load on the
network and the TCP endpoints. Indicating a small window may restrict the network and the TCP endpoints. Indicating a small window may restrict the
transmission of data to the point of introducing a round trip delay transmission of data to the point of introducing a round-trip delay
between each new segment transmitted. between each new segment transmitted.
</t> </t>
<t> <t>
The mechanisms provided allow a TCP endpoint to advertise a large window and t o The mechanisms provided allow a TCP endpoint to advertise a large window and t o
subsequently advertise a much smaller window without having accepted subsequently advertise a much smaller window without having accepted
that much data. This, so-called &quot;shrinking the window,&quot; is strongly that much data. This so-called "shrinking the window" is strongly
discouraged. The robustness principle <xref target="RFC1122"/> dictates that discouraged. The robustness principle <xref target="RFC1122" format="default"
TCP peers will not /> dictates that TCP peers will not
shrink the window themselves, but will be prepared for such behavior shrink the window themselves, but will be prepared for such behavior
on the part of other TCP peers. on the part of other TCP peers.
</t> </t>
<t> <t>
A TCP receiver SHOULD NOT shrink the window, i.e., move the A TCP receiver <bcp14>SHOULD NOT</bcp14> shrink the window, i.e., move the
right window edge to the left (SHLD-14). However, a sending TCP peer MUST right window edge to the left (SHLD-14). However, a sending TCP peer <bcp14>M
UST</bcp14>
be robust against window shrinking, which may cause the be robust against window shrinking, which may cause the
&quot;usable window&quot; (see <xref target="SWSsender"/>) to become negative (MUST-34). "usable window" (see <xref target="SWSsender" format="default"/>) to become ne gative (MUST-34).
</t> </t>
<t> <t>
If this happens, the sender SHOULD NOT send new data (SHLD-15), but If this happens, the sender <bcp14>SHOULD NOT</bcp14> send new data (SHLD-15),
SHOULD retransmit normally the old unacknowledged data but
between SND.UNA and SND.UNA+SND.WND (SHLD-16). The sender MAY also <bcp14>SHOULD</bcp14> retransmit normally the old unacknowledged data
retransmit old data beyond SND.UNA+SND.WND (MAY-7), but SHOULD NOT between SND.UNA and SND.UNA+SND.WND (SHLD-16). The sender <bcp14>MAY</bcp14>
also
retransmit old data beyond SND.UNA+SND.WND (MAY-7), but <bcp14>SHOULD NOT</bcp
14>
time out the connection if data beyond the right window edge time out the connection if data beyond the right window edge
is not acknowledged (SHLD-17). If the window shrinks to zero, the TCP impleme ntation is not acknowledged (SHLD-17). If the window shrinks to zero, the TCP impleme ntation
MUST probe it in the standard way (described below) (MUST-35). <bcp14>MUST</bcp14> probe it in the standard way (described below) (MUST-35).
</t> </t>
<section title="Zero Window Probing" anchor="zwp"> <section anchor="zwp" numbered="true" toc="default">
<t> <name>Zero-Window Probing</name>
<t>
The sending TCP peer must regularly transmit at least one octet of new data The sending TCP peer must regularly transmit at least one octet of new data
(if available) or retransmit to the receiving TCP peer even if the send (if available), or retransmit to the receiving TCP peer even if the send
window is zero, in order to &quot;probe&quot; the window. This window is zero, in order to "probe" the window. This
retransmission is essential to guarantee that when either TCP peer has a zero retransmission is essential to guarantee that when either TCP peer has a zero
window the re-opening of the window will be reliably reported to the other. window the reopening of the window will be reliably reported to the other.
This is referred to as Zero-Window Probing (ZWP) in other documents. This is referred to as Zero-Window Probing (ZWP) in other documents.
</t> </t>
<t> <t>
Probing of zero (offered) windows MUST be supported (MUST-36). Probing of zero (offered) windows <bcp14>MUST</bcp14> be supported (MUST-36).
</t> </t>
<t> <t>
A TCP implementation MAY keep its offered receive window closed A TCP implementation <bcp14>MAY</bcp14> keep its offered receive window closed
indefinitely (MAY-8). As long as the receiving TCP peer continues to indefinitely (MAY-8). As long as the receiving TCP peer continues to
send acknowledgments in response to the probe segments, the send acknowledgments in response to the probe segments, the
sending TCP peer MUST allow the connection to stay open (MUST-37). This sending TCP peer <bcp14>MUST</bcp14> allow the connection to stay open (MUST-3
enables TCP to function in scenarios such as the &quot;printer 7). This
ran out of paper&quot; situation described in Section 4.2.2.17 enables TCP to function in scenarios such as the "printer
of <xref target="RFC1122"/>. The behavior is subject to the implementation's ran out of paper" situation described in
resource <xref target="RFC1122" section="4.2.2.17" sectionFormat="of" format="default"/
management concerns, as noted in <xref target="RFC6429"/>. >. The behavior is subject to the implementation's resource
management concerns, as noted in <xref target="RFC6429" format="default"/>.
</t> </t>
<t> <t>
When the receiving TCP peer has a zero window and a segment arrives it must When the receiving TCP peer has a zero window and a segment arrives, it must
still send an acknowledgment showing its next expected sequence number still send an acknowledgment showing its next expected sequence number
and current window (zero). and current window (zero).
</t> </t>
<t> <t>
The transmitting host SHOULD send the first zero-window probe when a zero The transmitting host <bcp14>SHOULD</bcp14> send the first zero-window probe whe
window has existed for the retransmission timeout period (SHLD-29) (<xref n a zero
target="RTO"/>), and SHOULD increase exponentially the interval between window has existed for the retransmission timeout period (SHLD-29) (<xref target
="RTO" format="default"/>), and <bcp14>SHOULD</bcp14> increase exponentially the
interval between
successive probes (SHLD-30). successive probes (SHLD-30).
</t> </t>
</section> </section>
<section numbered="true" toc="default">
<section title="Silly Window Syndrome Avoidance"> <name>Silly Window Syndrome Avoidance</name>
<t>The &quot;Silly Window Syndrome&quot; (SWS) is a stable pattern of small incr <t>The "Silly Window Syndrome" (SWS) is a stable pattern of small in
emental window movements resulting in extremely poor TCP performance. Algorithm cremental window movements resulting in extremely poor TCP performance. Algorit
s to avoid SWS are described below for both the sending side and the receiving s hms to avoid SWS are described below for both the sending side and the receiving
ide. RFC 1122 contains more detailed discussion of the SWS problem. Note that side. RFC 1122 contains more detailed discussion of the SWS problem. Note tha
the Nagle algorithm and the sender SWS avoidance algorithm play complementary ro t the Nagle algorithm and the sender SWS avoidance algorithm play complementary
les in improving performance. The Nagle algorithm discourages sending tiny segm roles in improving performance. The Nagle algorithm discourages sending tiny se
ents when the data to be sent increases in small increments, while the SWS avoid gments when the data to be sent increases in small increments, while the SWS avo
ance algorithm discourages small segments resulting from the right window edge a idance algorithm discourages small segments resulting from the right window edge
dvancing in small increments.</t> advancing in small increments.</t>
<section anchor="SWSsender" numbered="true" toc="default">
<section title="Sender's Algorithm - When to Send Data" anchor="SWSsender"> <name>Sender's Algorithm -- When to Send Data</name>
<t> <t>
A TCP implementation MUST include a SWS avoidance algorithm in the s A TCP implementation <bcp14>MUST</bcp14> include a SWS avoidance alg
ender (MUST-38). orithm in the sender (MUST-38).
</t> </t>
<t> <t>
The Nagle algorithm from <xref target="nagle"/> additionally describes how to co The Nagle algorithm from <xref target="nagle" format="default"/> additionally de
alesce short segments. scribes how to coalesce short segments.
</t> </t>
<t> <t>
The sender's SWS avoidance algorithm is more difficult The sender's SWS avoidance algorithm is more difficult
than the receiver's, because the sender does not know than the receiver's because the sender does not know
(directly) the receiver's total buffer space RCV.BUFF. (directly) the receiver's total buffer space (RCV.BUFF).
An approach that has been found to work well is for An approach that has been found to work well is for
the sender to calculate Max(SND.WND), the maximum send the sender to calculate Max(SND.WND), which is the maximum send
window it has seen so far on the connection, and to use window it has seen so far on the connection, and to use
this value as an estimate of RCV.BUFF. Unfortunately, this value as an estimate of RCV.BUFF. Unfortunately,
this can only be an estimate; the receiver may at any this can only be an estimate; the receiver may at any
time reduce the size of RCV.BUFF. To avoid a resulting time reduce the size of RCV.BUFF. To avoid a resulting
deadlock, it is necessary to have a timeout to force deadlock, it is necessary to have a timeout to force
transmission of data, overriding the SWS avoidance transmission of data, overriding the SWS avoidance
algorithm. In practice, this timeout should seldom algorithm. In practice, this timeout should seldom
occur. occur.
</t> </t>
<t> <t>
The &quot;usable window&quot; is:<list><t> The "usable window" is:</t>
U = SND.UNA + SND.WND - SND.NXT</t></list> <t indent="3">U = SND.UNA + SND.WND - SND.NXT</t>
<t>
i.e., the offered window less the amount of data sent i.e., the offered window less the amount of data sent
but not acknowledged. If D is the amount of data but not acknowledged. If D is the amount of data
queued in the sending TCP endpoint but not yet sent, then the queued in the sending TCP endpoint but not yet sent, then the
following set of rules is recommended. following set of rules is recommended.
</t> </t>
<t> <t>
Send data:<list style="hanging" hangIndent="5"> Send data:</t>
<ol type="(%d)" spacing="normal">
<t hangText="(1)"> <li>
if a maximum-sized segment can be sent, i.e., if:<list><t> <t>
if a maximum-sized segment can be sent, i.e., if:</t>
<t indent="3">
min(D,U) >= Eff.snd.MSS;</t></list> min(D,U) &gt;= Eff.snd.MSS;</t>
</t> </li>
<t hangText="(2)"> <li>
<t>
or if the data is pushed and all queued data can or if the data is pushed and all queued data can
be sent now, i.e., if:<list><t> be sent now, i.e., if:</t>
<t indent="3">
[SND.NXT = SND.UNA and] PUSHED and D &lt;= U</t></list>
[SND.NXT = SND.UNA and] PUSHed and D &lt;= U
</t>
<t>
(the bracketed condition is imposed by the Nagle (the bracketed condition is imposed by the Nagle
algorithm); algorithm);
</t> </t>
<t hangText="(3)"> </li>
<li>
<t>
or if at least a fraction Fs of the maximum window or if at least a fraction Fs of the maximum window
can be sent, i.e., if:<list><t> can be sent, i.e., if:</t>
<t indent="3">
[SND.NXT = SND.UNA and]<list><t>
min(D,U) >= Fs * Max(SND.WND);</t></list></t></list> [SND.NXT = SND.UNA and]</t>
<t indent="6">
</t> min(D,U) &gt;= Fs * Max(SND.WND);</t>
<t hangText="(4)"> </li>
<li>
or if the override timeout or if the override timeout
occurs. occurs.</li>
</t></list> </ol>
</t> <t>
<t>
Here Fs is a fraction whose recommended value is 1/2. Here Fs is a fraction whose recommended value is 1/2.
The override timeout should be in the range 0.1 - 1.0 The override timeout should be in the range 0.1 - 1.0
seconds. It may be convenient to combine this timer seconds. It may be convenient to combine this timer
with the timer used to probe zero windows with the timer used to probe zero windows
(<xref target="zwp"/>). (<xref target="zwp" format="default"/>).
</t> </t>
</section>
</section> <section numbered="true" toc="default">
<section title="Receiver's Algorithm - When to Send a Window Update"> <name>Receiver's Algorithm -- When to Send a Window Update</name>
<t> <t>
A TCP implementation MUST include a SWS avoidance algorithm in the r A TCP implementation <bcp14>MUST</bcp14> include a SWS avoidance alg
eceiver (MUST-39). orithm in the receiver (MUST-39).
</t> </t>
<t> <t>
The receiver's SWS avoidance algorithm determines when The receiver's SWS avoidance algorithm determines when
the right window edge may be advanced; this is the right window edge may be advanced; this is
customarily known as &quot;updating the window&quot;. This customarily known as "updating the window". This
algorithm combines with the delayed ACK algorithm algorithm combines with the delayed ACK algorithm
(<xref target="delACK"/>) to determine when an ACK segment (<xref target="delACK" format="default"/>) to determine when an ACK segment
containing the current window will really be sent to containing the current window will really be sent to
the receiver. the receiver.
</t> </t>
<t> <t>
The solution to receiver SWS is to avoid advancing the The solution to receiver SWS is to avoid advancing the
right window edge RCV.NXT+RCV.WND in small increments, right window edge RCV.NXT+RCV.WND in small increments,
even if data is received from the network in small even if data is received from the network in small
segments. segments.
</t> </t>
<t> <t>
Suppose the total receive buffer space is RCV.BUFF. At Suppose the total receive buffer space is RCV.BUFF. At
any given moment, RCV.USER octets of this total may be any given moment, RCV.USER octets of this total may be
tied up with data that has been received and tied up with data that has been received and
acknowledged but that the user process has not yet acknowledged but that the user process has not yet
consumed. When the connection is quiescent, RCV.WND = consumed. When the connection is quiescent, RCV.WND =
RCV.BUFF and RCV.USER = 0. RCV.BUFF and RCV.USER = 0.
</t> </t>
<t> <t>
Keeping the right window edge fixed as data arrives and Keeping the right window edge fixed as data arrives and
is acknowledged requires that the receiver offer less is acknowledged requires that the receiver offer less
than its full buffer space, i.e., the receiver must than its full buffer space, i.e., the receiver must
specify a RCV.WND that keeps RCV.NXT+RCV.WND constant specify a RCV.WND that keeps RCV.NXT+RCV.WND constant
as RCV.NXT increases. Thus, the total buffer space as RCV.NXT increases. Thus, the total buffer space
RCV.BUFF is generally divided into three parts: RCV.BUFF is generally divided into three parts:
</t> </t>
<t><figure><artwork> <artwork name="" type="" align="left" alt=""><![CDATA[
|<------- RCV.BUFF ---------------->|
|&lt;------- RCV.BUFF ----------------&gt;|
1 2 3 1 2 3
----|---------|------------------|------|---- ----|---------|------------------|------|----
RCV.NXT ^ RCV.NXT ^
(Fixed) (Fixed)
1 - RCV.USER = data received but not yet consumed; 1 - RCV.USER = data received but not yet consumed;
2 - RCV.WND = space advertised to sender; 2 - RCV.WND = space advertised to sender;
3 - Reduction = space available but not yet 3 - Reduction = space available but not yet
advertised. advertised.
]]></artwork>
</artwork></figure></t> <t>
<t>
The suggested SWS avoidance algorithm for the receiver The suggested SWS avoidance algorithm for the receiver
is to keep RCV.NXT+RCV.WND fixed until the reduction is to keep RCV.NXT+RCV.WND fixed until the reduction
satisfies: satisfies:
</t> </t>
<t><figure><artwork> <artwork name="" type="" align="left" alt=""><![CDATA[
RCV.BUFF - RCV.USER - RCV.WND >= RCV.BUFF - RCV.USER - RCV.WND >=
min( Fr * RCV.BUFF, Eff.snd.MSS ) min( Fr * RCV.BUFF, Eff.snd.MSS )
</artwork></figure></t> ]]></artwork>
<t> <t>
where Fr is a fraction whose recommended value is 1/2, where Fr is a fraction whose recommended value is 1/2,
and Eff.snd.MSS is the effective send MSS for the and Eff.snd.MSS is the effective send MSS for the
connection (see <xref target="mss"/>). When the inequality connection (see <xref target="mss" format="default"/>). When the in equality
is satisfied, RCV.WND is set to RCV.BUFF-RCV.USER. is satisfied, RCV.WND is set to RCV.BUFF-RCV.USER.
</t> </t>
<t> <t>
Note that the general effect of this algorithm is to Note that the general effect of this algorithm is to
advance RCV.WND in increments of Eff.snd.MSS (for advance RCV.WND in increments of Eff.snd.MSS (for
realistic receive buffers: Eff.snd.MSS &lt; RCV.BUFF/2). realistic receive buffers: Eff.snd.MSS &lt; RCV.BUFF/2).
Note also that the receiver must use its own Note also that the receiver must use its own
Eff.snd.MSS, making the assumption that it is the same as the sender 's. Eff.snd.MSS, making the assumption that it is the same as the sender 's.
</t> </t>
</section>
</section> </section>
</section> <section anchor="delACK" numbered="true" toc="default">
<section title="Delayed Acknowledgements - When to Send an ACK Segment" anchor=" <name>Delayed Acknowledgments -- When to Send an ACK Segment</name>
delACK"> <t>
<t>
A host that is receiving a stream of TCP data segments can A host that is receiving a stream of TCP data segments can
increase efficiency in both the Internet and the hosts by increase efficiency in both the network and the hosts by
sending fewer than one ACK (acknowledgment) segment per data sending fewer than one ACK (acknowledgment) segment per data
segment received; this is known as a &quot;delayed ACK&quot;. segment received; this is known as a "delayed ACK".
</t> </t>
<t> <t>
A TCP endpoint SHOULD implement a delayed ACK (SHLD-18), but an ACK A TCP endpoint <bcp14>SHOULD</bcp14> implement a delayed ACK (SHLD-18
should not be excessively delayed; in particular, the delay MUST be ), but an ACK
less than 0.5 seconds (MUST-40). An ACK SHOULD be generated for at should not be excessively delayed; in particular, the delay <bcp14>MU
ST</bcp14> be
less than 0.5 seconds (MUST-40). An ACK <bcp14>SHOULD</bcp14> be gen
erated for at
least every second full-sized segment or 2*RMSS bytes of new data least every second full-sized segment or 2*RMSS bytes of new data
(where RMSS is the MSS specified by the TCP endpoint receiving the (where RMSS is the MSS specified by the TCP endpoint receiving the
segments to be acknowledged, or the default value if not specified) segments to be acknowledged, or the default value if not specified)
(SHLD-19). Excessive delays on ACKs can disturb the round-trip (SHLD-19). Excessive delays on ACKs can disturb the round-trip
timing and packet &quot;clocking&quot; algorithms. More complete timing and packet "clocking" algorithms. More complete
discussion of delayed ACK behavior is in Section 4.2 of RFC 5681 discussion of delayed ACK behavior is in Section <xref target="RFC568
<xref target="RFC5681"/>, including recommendations to immediately 1" section="4.2" sectionFormat="bare" format="default"/> of RFC 5681
<xref target="RFC5681" format="default"/>, including recommendations
to immediately
acknowledge out-of-order segments, segments above a gap in sequence acknowledge out-of-order segments, segments above a gap in sequence
space, or segments that fill all or part of a gap, in order to space, or segments that fill all or part of a gap, in order to
accelerate loss recovery. accelerate loss recovery.
</t> </t>
<t> <t>
Note that there are several current Note that there are several current
practices that further lead to a reduced number of ACKs, including practices that further lead to a reduced number of ACKs, including
generic receive offload (GRO) <xref target="offload"/>, ACK compressi generic receive offload (GRO) <xref target="offload" format="default"
on, and ACK decimation />, ACK compression, and ACK decimation
<xref target="RFC3449"/>. <xref target="RFC3449" format="default"/>.
</t> </t>
</section> </section>
</section> </section>
</section>
</section> <section numbered="true" toc="default">
<section title="Interfaces"> <name>Interfaces</name>
<t> <t>
There are of course two interfaces of concern: the user/TCP interface There are of course two interfaces of concern: the user/TCP interface
and the TCP/lower level interface. We have a fairly elaborate model and the TCP/lower-level interface. We have a fairly elaborate model
of the user/TCP interface, but the interface to the lower level of the user/TCP interface, but the interface to the lower-level
protocol module is left unspecified here, since it will be specified protocol module is left unspecified here since it will be specified
in detail by the specification of the lower level protocol. For the in detail by the specification of the lower-level protocol. For the
case that the lower level is IP we note some of the parameter values case that the lower level is IP, we note some of the parameter values
that TCP implementations might use. that TCP implementations might use.
</t> </t>
<section title="User/TCP Interface" anchor="user-api"> <section anchor="user-api" numbered="true" toc="default">
<t> <name>User/TCP Interface</name>
<t>
The following functional description of user commands to the TCP implementat ion is, The following functional description of user commands to the TCP implementat ion is,
at best, fictional, since every operating system will have different at best, fictional, since every operating system will have different
facilities. Consequently, we must warn readers that different TCP facilities. Consequently, we must warn readers that different TCP
implementations may have different user interfaces. However, all implementations may have different user interfaces. However, all
TCP implementations must provide a certain minimum set of services to guaran tee TCP implementations must provide a certain minimum set of services to guaran tee
that all TCP implementations can support the same protocol that all TCP implementations can support the same protocol
hierarchy. This section specifies the functional interfaces hierarchy. This section specifies the functional interfaces
required of all TCP implementations. required of all TCP implementations.
</t> </t>
<t> <t>
Section 3.1 of <xref target="RFC8303"/> also identifies primitives provided by T <xref target="RFC8303" section="3.1" sectionFormat="of" format="default"/> also
CP, and could be used as an additional reference for implementers. identifies primitives provided by TCP and could be used as an additional referen
ce for implementers.
</t> </t>
<t> <t>
The following sections functionally characterize a USER/TCP The following sections functionally characterize a user/TCP
interface. The notation used is similar to most procedure or interface. The notation used is similar to most procedure or
function calls in high level languages, but this usage is not function calls in high-level languages, but this usage is not
meant to rule out trap type service calls. meant to rule out trap-type service calls.
</t> </t>
<t> <t>
The user commands described below specify the basic functions the The user commands described below specify the basic functions the
TCP implementation must perform to support interprocess communication. TCP implementation must perform to support interprocess communication.
Individual implementations must define their own exact format, and Individual implementations must define their own exact format and
may provide combinations or subsets of the basic functions in may provide combinations or subsets of the basic functions in
single calls. In particular, some implementations may wish to single calls. In particular, some implementations may wish to
automatically OPEN a connection on the first SEND or RECEIVE automatically OPEN a connection on the first SEND or RECEIVE
issued by the user for a given connection. issued by the user for a given connection.
</t> </t>
<t> <t>
In providing interprocess communication facilities, the TCP implementation must In providing interprocess communication facilities, the TCP implementation must
not only accept commands, but must also return information to the not only accept commands, but must also return information to the
processes it serves. The latter consists of: processes it serves. The latter consists of:
<list> </t>
<t> <ol type="(%c)" spacing="normal">
(a) general information about a connection (e.g., interrupts, <li>
general information about a connection (e.g., interrupts,
remote close, binding of unspecified remote socket). remote close, binding of unspecified remote socket).
</t> </li>
<t> <li>
(b) replies to specific user commands indicating success or replies to specific user commands indicating success or
various types of failure. various types of failure.
</t> </li>
</list> </ol>
</t> <section numbered="true" toc="default">
<name>Open</name>
<section title="Open"> <t>
<t>
<list>
<t>
Format: OPEN (local port, remote socket, active/passive Format: OPEN (local port, remote socket, active/passive
[, timeout] [, DiffServ field] [, security/compartment] [, timeout] [, Diffserv field] [, security/compartment]
[local IP address,] [, options]) [, local IP address] [, options])
-&gt; local connection name -&gt; local connection name
</t> </t>
<t> <t>
If the active/passive flag is set to passive, then this is a If the active/passive flag is set to passive, then this is a
call to LISTEN for an incoming connection. A passive open may call to LISTEN for an incoming connection. A passive OPEN may
have either a fully specified remote socket to wait for a have either a fully specified remote socket to wait for a
particular connection or an unspecified remote socket to wait particular connection or an unspecified remote socket to wait
for any call. A fully specified passive call can be made active for any call. A fully specified passive call can be made active
by the subsequent execution of a SEND. by the subsequent execution of a SEND.
</t> </t>
<t> <t>
A transmission control block (TCB) is created and partially A transmission control block (TCB) is created and partially
filled in with data from the OPEN command parameters. filled in with data from the OPEN command parameters.
</t> </t>
<t> <t>
Every passive OPEN call either creates a new connection Every passive OPEN call either creates a new connection
record in LISTEN state, or it returns an error; it MUST NOT record in LISTEN state, or it returns an error; it <bcp14>MUST NOT</bcp1 4>
affect any previously created connection record (MUST-41). affect any previously created connection record (MUST-41).
</t> </t>
<t> <t>
A TCP implementation that supports multiple concurrent connections MUST A TCP implementation that supports multiple concurrent connections <bcp1
provide 4>MUST</bcp14> provide
an OPEN call that will functionally allow an application to an OPEN call that will functionally allow an application to
LISTEN on a port while a connection block with the same LISTEN on a port while a connection block with the same
local port is in SYN-SENT or SYN-RECEIVED state (MUST-42). local port is in SYN-SENT or SYN-RECEIVED state (MUST-42).
</t> </t>
<t> <t>
On an active OPEN command, the TCP endpoint will begin the procedure to On an active OPEN command, the TCP endpoint will begin the procedure to
synchronize (i.e., establish) the connection at once. synchronize (i.e., establish) the connection at once.
</t> </t>
<t> <t>
The timeout, if present, permits the caller to set up a timeout The timeout, if present, permits the caller to set up a timeout
for all data submitted to TCP. If data is not successfully for all data submitted to TCP. If data is not successfully
delivered to the destination within the timeout period, the TCP endpoint delivered to the destination within the timeout period, the TCP endpoint
will abort the connection. The present global default is five will abort the connection. The present global default is five
minutes. minutes.
</t> </t>
<t>
<t>
The TCP implementation or some component of the operating system will ve rify The TCP implementation or some component of the operating system will ve rify
the user's authority to open a connection with the specified the user's authority to open a connection with the specified
DiffServ field value or security/compartment. The absence of a Diffserv field value or security/compartment. The absence of a
DiffServ field value Diffserv field value
or security/compartment specification in the OPEN call indicates or security/compartment specification in the OPEN call indicates
the default values must be used. the default values must be used.
</t> </t>
<t> <t>
TCP will accept incoming requests as matching only if the TCP will accept incoming requests as matching only if the
security/compartment information is exactly the same as that security/compartment information is exactly the same as that
requested in the OPEN call. requested in the OPEN call.
</t> </t>
<t> <t>
The DiffServ field value indicated by the user only impacts outgoing packets, ma The Diffserv field value indicated by the user only impacts outgoing packets, ma
y be altered en route through the network, and has no direct bearing or relation y be altered en route through the network, and has no direct bearing or relation
to received packets. to received packets.
</t> </t>
<t> <t>
A local connection name will be returned to the user by the TCP implemen tation. A local connection name will be returned to the user by the TCP implemen tation.
The local connection name can then be used as a short-hand term The local connection name can then be used as a shorthand term
for the connection defined by the &lt;local socket, remote socket> for the connection defined by the &lt;local socket, remote socket&gt;
pair. pair.
</t> </t>
<t> <t>
The optional &quot;local IP address&quot; parameter MUST be supported The optional "local IP address" parameter <bcp14>MUST</bcp14> be supporte
d
to allow the specification of the local IP address (MUST-43). This enab les to allow the specification of the local IP address (MUST-43). This enab les
applications that need to select the local IP address used when applications that need to select the local IP address used when
multihoming is present. multihoming is present.
</t> </t>
<t>
<t> A passive OPEN call with a specified "local IP address"
A passive OPEN call with a specified &quot;local IP address&quot;
parameter will await an incoming connection request to parameter will await an incoming connection request to
that address. If the parameter is unspecified, a that address. If the parameter is unspecified, a
passive OPEN will await an incoming connection request passive OPEN will await an incoming connection request
to any local IP address, and then bind the local IP to any local IP address and then bind the local IP
address of the connection to the particular address address of the connection to the particular address
that is used. that is used.
</t> </t>
<t> <t>
For an active OPEN call, a specified &quot;local IP address&quot; parameter For an active OPEN call, a specified "local IP address" parameter
will be used for opening the connection. If the parameter is unspecified, the will be used for opening the connection. If the parameter is unspecified, the
host will choose an appropriate local IP address (see RFC 1122 section 3.3.4.2). host will choose an appropriate local IP address (see RFC 1122, Section <xref ta rget="RFC1122" section="3.3.4.2" sectionFormat="bare" format="default"/>).
</t> </t>
<t>
<t>
If an application on a multihomed host does not specify the If an application on a multihomed host does not specify the
local IP address when actively opening a TCP connection, local IP address when actively opening a TCP connection,
then the TCP implementation MUST ask the IP layer to select a local IP then the TCP implementation <bcp14>MUST</bcp14> ask the IP layer to sele ct a local IP
address before sending the (first) SYN (MUST-44). See the function address before sending the (first) SYN (MUST-44). See the function
GET_SRCADDR() in Section 3.4 of RFC 1122. GET_SRCADDR() in Section <xref target="RFC1122" section="3.4" sectionFor mat="bare" format="default"/> of RFC 1122.
</t> </t>
<t> <t>
At all other times, a previous segment has either been sent At all other times, a previous segment has either been sent
or received on this connection, and TCP implementations MUST use the sam e or received on this connection, and TCP implementations <bcp14>MUST</bcp 14> use the same
local address that was used in those previous local address that was used in those previous
segments (MUST-45). segments (MUST-45).
</t> </t>
<t> <t>
A TCP implementation MUST reject as an error a local OPEN A TCP implementation <bcp14>MUST</bcp14> reject as an error a local
OPEN
call for an invalid remote IP address (e.g., a broadcast or call for an invalid remote IP address (e.g., a broadcast or
multicast address) (MUST-46). multicast address) (MUST-46).
</t> </t>
</list>
</t>
</section>
<section title="Send"> </section>
<t> <section numbered="true" toc="default">
<list> <name>Send</name>
<t> <t>
Format: SEND (local connection name, buffer address, byte Format: SEND (local connection name, buffer address, byte
count, PUSH flag (optional), URGENT flag [,timeout]) count, URGENT flag [, PUSH flag] [, timeout])
</t> </t>
<t> <t>
This call causes the data contained in the indicated user buffer This call causes the data contained in the indicated user buffer
to be sent on the indicated connection. If the connection has to be sent on the indicated connection. If the connection has
not been opened, the SEND is considered an error. Some not been opened, the SEND is considered an error. Some
implementations may allow users to SEND first; in which case, an implementations may allow users to SEND first; in which case, an
automatic OPEN would be done. For example, this might be one way automatic OPEN would be done. For example, this might be one way
for application data to be included in SYN segments. If the calling proce ss is not for application data to be included in SYN segments. If the calling proce ss is not
authorized to use this connection, an error is returned. authorized to use this connection, an error is returned.
</t> </t>
<t> <t>
A TCP endpoint MAY implement PUSH flags on SEND calls (MAY-15). If PUSH flags a A TCP endpoint <bcp14>MAY</bcp14> implement PUSH flags on SEND calls (MAY-15).
re not If PUSH flags are not
implemented, then the sending TCP peer: (1) MUST NOT buffer data indefinitely (M implemented, then the sending TCP peer: (1) <bcp14>MUST NOT</bcp14> buffer data
UST-60), and indefinitely (MUST-60), and
(2) MUST set the PSH bit in the last buffered segment (i.e., when there is no (2) <bcp14>MUST</bcp14> set the PSH bit in the last buffered segment (i.e., when
there is no
more queued data to be sent) (MUST-61). The remaining description below assumes the PUSH more queued data to be sent) (MUST-61). The remaining description below assumes the PUSH
flag is supported on SEND calls. flag is supported on SEND calls.
</t> </t>
<t> <t>
If the PUSH flag is set, the application intends the data to be If the PUSH flag is set, the application intends the data to be
transmitted promptly to the receiver, and the PUSH bit will be set in the last transmitted promptly to the receiver, and the PSH bit will be set in the last
TCP segment created from the buffer. TCP segment created from the buffer.
</t> </t>
<t> <t>
The PSH bit is not a record marker and is independent of segment boundaries. The PSH bit is not a record marker and is independent of segment boundaries.
The transmitter SHOULD collapse successive bits when it packetizes data, to The transmitter <bcp14>SHOULD</bcp14> collapse successive bits when it packetize s data, to
send the largest possible segment (SHLD-27). send the largest possible segment (SHLD-27).
</t> </t>
<t> <t>
If the PUSH flag is not set, the data may be combined with data from If the PUSH flag is not set, the data may be combined with data from
subsequent SENDs for transmission efficiency. subsequent SENDs for transmission efficiency.
When an application issues a series of When an application issues a series of
SEND calls without setting the PUSH flag, the TCP implementation MAY aggregate t he data SEND calls without setting the PUSH flag, the TCP implementation <bcp14>MAY</bcp 14> aggregate the data
internally without sending it (MAY-16). internally without sending it (MAY-16).
Note that when the Nagle Note that when the Nagle
algorithm is in use, TCP implementations may buffer the data before sending, wit hout regard to algorithm is in use, TCP implementations may buffer the data before sending, wit hout regard to
the PUSH flag (see <xref target="nagle"/>). the PUSH flag (see <xref target="nagle" format="default"/>).
</t> </t>
<t> <t>
An application program is logically required to set the PUSH flag in a SEND An application program is logically required to set the PUSH flag in a SEND
call whenever it needs to force delivery of the data to avoid a communication call whenever it needs to force delivery of the data to avoid a communication
deadlock. However, a TCP implementation SHOULD send a maximum-sized segment whe deadlock. However, a TCP implementation <bcp14>SHOULD</bcp14> send a maximum-si
never zed segment whenever
possible (SHLD-28), to improve performance (see <xref target="SWSsender"/>). possible (SHLD-28) to improve performance (see <xref target="SWSsender" format="
default"/>).
</t> </t>
<t>
<t> New applications <bcp14>SHOULD NOT</bcp14> set the URGENT flag <xref tar
New applications SHOULD NOT set the URGENT flag <xref target="RFC6093"/> get="RFC6093" format="default"/> due to implementation differences and middlebox
due to implementation differences and middlebox issues (SHLD-13). issues (SHLD-13).
</t> </t>
<t> <t>
If the URGENT flag is set, segments sent to the destination TCP peer If the URGENT flag is set, segments sent to the destination TCP peer
will have the urgent pointer set. The receiving TCP peer will signal will have the urgent pointer set. The receiving TCP peer will signal
the urgent condition to the receiving process if the urgent the urgent condition to the receiving process if the urgent
pointer indicates that data preceding the urgent pointer has not pointer indicates that data preceding the urgent pointer has not
been consumed by the receiving process. The purpose of urgent been consumed by the receiving process. The purpose of the URGENT flag
is to stimulate the receiver to process the urgent data and to is to stimulate the receiver to process the urgent data and to
indicate to the receiver when all the currently known urgent indicate to the receiver when all the currently known urgent
data has been received. The number of times the sending user's data has been received. The number of times the sending user's
TCP implementation signals urgent will not necessarily be equal to the n umber TCP implementation signals urgent will not necessarily be equal to the n umber
of times the receiving user will be notified of the presence of of times the receiving user will be notified of the presence of
urgent data. urgent data.
</t> </t>
<t> <t>
If no remote socket was specified in the OPEN, but the If no remote socket was specified in the OPEN, but the
connection is established (e.g., because a LISTENing connection connection is established (e.g., because a LISTENing connection
has become specific due to a remote segment arriving for the has become specific due to a remote segment arriving for the
local socket), then the designated buffer is sent to the implied local socket), then the designated buffer is sent to the implied
remote socket. Users who make use of OPEN with an unspecified remote socket. Users who make use of OPEN with an unspecified
remote socket can make use of SEND without ever explicitly remote socket can make use of SEND without ever explicitly
knowing the remote socket address. knowing the remote socket address.
</t> </t>
<t> <t>
However, if a SEND is attempted before the remote socket However, if a SEND is attempted before the remote socket
becomes specified, an error will be returned. Users can use the becomes specified, an error will be returned. Users can use the
STATUS call to determine the status of the connection. Some STATUS call to determine the status of the connection. Some
TCP implementations may notify the user when an unspecified TCP implementations may notify the user when an unspecified
socket is bound. socket is bound.
</t> </t>
<t> <t>
If a timeout is specified, the current user timeout for this If a timeout is specified, the current user timeout for this
connection is changed to the new one. connection is changed to the new one.
</t> </t>
<t> <t>
In the simplest implementation, SEND would not return control to In the simplest implementation, SEND would not return control to
the sending process until either the transmission was complete the sending process until either the transmission was complete
or the timeout had been exceeded. However, this simple method or the timeout had been exceeded. However, this simple method
is both subject to deadlocks (for example, both sides of the is both subject to deadlocks (for example, both sides of the
connection might try to do SENDs before doing any RECEIVEs) and connection might try to do SENDs before doing any RECEIVEs) and
offers poor performance, so it is not recommended. A more offers poor performance, so it is not recommended. A more
sophisticated implementation would return immediately to allow sophisticated implementation would return immediately to allow
the process to run concurrently with network I/O, and, the process to run concurrently with network I/O, and,
furthermore, to allow multiple SENDs to be in progress. furthermore, to allow multiple SENDs to be in progress.
Multiple SENDs are served in first come, first served order, so Multiple SENDs are served in first come, first served order, so
the TCP endpoint will queue those it cannot service immediately. the TCP endpoint will queue those it cannot service immediately.
</t> </t>
<t> <t>
We have implicitly assumed an asynchronous user interface in We have implicitly assumed an asynchronous user interface in
which a SEND later elicits some kind of SIGNAL or which a SEND later elicits some kind of SIGNAL or
pseudo-interrupt from the serving TCP endpoint. An alternative is to pseudo-interrupt from the serving TCP endpoint. An alternative is to
return a response immediately. For instance, SENDs might return return a response immediately. For instance, SENDs might return
immediate local acknowledgment, even if the segment sent had not immediate local acknowledgment, even if the segment sent had not
been acknowledged by the distant TCP endpoint. We could optimistically been acknowledged by the distant TCP endpoint. We could optimistically
assume eventual success. If we are wrong, the connection will assume eventual success. If we are wrong, the connection will
close anyway due to the timeout. In implementations of this close anyway due to the timeout. In implementations of this
kind (synchronous), there will still be some asynchronous kind (synchronous), there will still be some asynchronous
signals, but these will deal with the connection itself, and not signals, but these will deal with the connection itself, and not
with specific segments or buffers. with specific segments or buffers.
</t> </t>
<t> <t>
In order for the process to distinguish among error or success In order for the process to distinguish among error or success
indications for different SENDs, it might be appropriate for the indications for different SENDs, it might be appropriate for the
buffer address to be returned along with the coded response to buffer address to be returned along with the coded response to
the SEND request. TCP-to-user signals are discussed below, the SEND request. TCP-to-user signals are discussed below,
indicating the information that should be returned to the indicating the information that should be returned to the
calling process. calling process.
</t> </t>
</list>
</t>
</section>
<section title="Receive"> </section>
<t> <section numbered="true" toc="default">
<list> <name>Receive</name>
<t>
<t>
Format: RECEIVE (local connection name, buffer address, byte Format: RECEIVE (local connection name, buffer address, byte
count) -> byte count, urgent flag, push flag (optional) count) -> byte count, URGENT flag [, PUSH flag]
</t> </t>
<t> <t>
This command allocates a receiving buffer associated with the This command allocates a receiving buffer associated with the
specified connection. If no OPEN precedes this command or the specified connection. If no OPEN precedes this command or the
calling process is not authorized to use this connection, an calling process is not authorized to use this connection, an
error is returned. error is returned.
</t> </t>
<t> <t>
In the simplest implementation, control would not return to the In the simplest implementation, control would not return to the
calling program until either the buffer was filled, or some calling program until either the buffer was filled or some
error occurred, but this scheme is highly subject to deadlocks. error occurred, but this scheme is highly subject to deadlocks.
A more sophisticated implementation would permit several A more sophisticated implementation would permit several
RECEIVEs to be outstanding at once. These would be filled as RECEIVEs to be outstanding at once. These would be filled as
segments arrive. This strategy permits increased throughput at segments arrive. This strategy permits increased throughput at
the cost of a more elaborate scheme (possibly asynchronous) to the cost of a more elaborate scheme (possibly asynchronous) to
notify the calling program that a PUSH has been seen or a buffer notify the calling program that a PUSH has been seen or a buffer
filled. filled.
</t> </t>
<t>
A TCP receiver MAY pass a received PSH flag to the application layer via the <t>
A TCP receiver <bcp14>MAY</bcp14> pass a received PSH bit to the application lay
er via the
PUSH flag in the interface (MAY-17), but it is not required (this was clarified in RFC PUSH flag in the interface (MAY-17), but it is not required (this was clarified in RFC
1122 section 4.2.2.2). The remainder of text describing the RECEIVE call below 1122, Section <xref target="RFC1122" section="4.2.2.2" sectionFormat="bare" form at="default"/>). The remainder of text describing the RECEIVE call below
assumes that passing the PUSH indication is supported. assumes that passing the PUSH indication is supported.
</t> </t>
<t> <t>
If enough data arrive to fill the buffer before a PUSH is seen, If enough data arrive to fill the buffer before a PUSH is seen,
the PUSH flag will not be set in the response to the RECEIVE. the PUSH flag will not be set in the response to the RECEIVE.
The buffer will be filled with as much data as it can hold. If The buffer will be filled with as much data as it can hold. If
a PUSH is seen before the buffer is filled the buffer will be a PUSH is seen before the buffer is filled, the buffer will be
returned partially filled and PUSH indicated. returned partially filled and PUSH indicated.
</t> </t>
<t> <t>
If there is urgent data the user will have been informed as soon If there is urgent data, the user will have been informed as soon
as it arrived via a TCP-to-user signal. The receiving user as it arrived via a TCP-to-user signal. The receiving user
should thus be in &quot;urgent mode&quot;. If the URGENT flag is on, should thus be in "urgent mode". If the URGENT flag is on,
additional urgent data remains. If the URGENT flag is off, this additional urgent data remains. If the URGENT flag is off, this
call to RECEIVE has returned all the urgent data, and the user call to RECEIVE has returned all the urgent data, and the user
may now leave &quot;urgent mode&quot;. Note that data following the may now leave "urgent mode". Note that data following the
urgent pointer (non-urgent data) cannot be delivered to the user urgent pointer (non-urgent data) cannot be delivered to the user
in the same buffer with preceding urgent data unless the in the same buffer with preceding urgent data unless the
boundary is clearly marked for the user. boundary is clearly marked for the user.
</t> </t>
<t> <t>
To distinguish among several outstanding RECEIVEs and to take To distinguish among several outstanding RECEIVEs and to take
care of the case that a buffer is not completely filled, the care of the case that a buffer is not completely filled, the
return code is accompanied by both a buffer pointer and a byte return code is accompanied by both a buffer pointer and a byte
count indicating the actual length of the data received. count indicating the actual length of the data received.
</t> </t>
<t> <t>
Alternative implementations of RECEIVE might have the TCP endpoint Alternative implementations of RECEIVE might have the TCP endpoint
allocate buffer storage, or the TCP endpoint might share a ring buffer allocate buffer storage, or the TCP endpoint might share a ring buffer
with the user. with the user.
</t> </t>
</list>
</t>
</section>
<section title="Close"> </section>
<t> <section numbered="true" toc="default">
<list> <name>Close</name>
<t>
<t>
Format: CLOSE (local connection name) Format: CLOSE (local connection name)
</t> </t>
<t> <t>
This command causes the connection specified to be closed. If This command causes the connection specified to be closed. If
the connection is not open or the calling process is not the connection is not open or the calling process is not
authorized to use this connection, an error is returned. authorized to use this connection, an error is returned.
Closing connections is intended to be a graceful operation in Closing connections is intended to be a graceful operation in
the sense that outstanding SENDs will be transmitted (and the sense that outstanding SENDs will be transmitted (and
retransmitted), as flow control permits, until all have been retransmitted), as flow control permits, until all have been
serviced. Thus, it should be acceptable to make several SEND serviced. Thus, it should be acceptable to make several SEND
calls, followed by a CLOSE, and expect all the data to be sent calls, followed by a CLOSE, and expect all the data to be sent
to the destination. It should also be clear that users should to the destination. It should also be clear that users should
continue to RECEIVE on CLOSING connections, since the remote peer continue to RECEIVE on CLOSING connections since the remote peer
may be trying to transmit the last of its data. Thus, CLOSE may be trying to transmit the last of its data. Thus, CLOSE
means "I have no more to send" but does not mean "I will not means "I have no more to send" but does not mean "I will not
receive any more." It may happen (if the user level protocol is receive any more." It may happen (if the user-level protocol is
not well-thought-out) that the closing side is unable to get rid not well thought out) that the closing side is unable to get rid
of all its data before timing out. In this event, CLOSE turns of all its data before timing out. In this event, CLOSE turns
into ABORT, and the closing TCP peer gives up. into ABORT, and the closing TCP peer gives up.
</t> </t>
<t> <t>
The user may CLOSE the connection at any time on their own The user may CLOSE the connection at any time on their own
initiative, or in response to various prompts from the TCP implementatio n initiative, or in response to various prompts from the TCP implementatio n
(e.g., remote close executed, transmission timeout exceeded, (e.g., remote close executed, transmission timeout exceeded,
destination inaccessible). destination inaccessible).
</t> </t>
<t> <t>
Because closing a connection requires communication with the Because closing a connection requires communication with the
remote TCP peer, connections may remain in the closing state for a remote TCP peer, connections may remain in the closing state for a
short time. Attempts to reopen the connection before the TCP peer short time. Attempts to reopen the connection before the TCP peer
replies to the CLOSE command will result in error responses. replies to the CLOSE command will result in error responses.
</t> </t>
<t> <t>
Close also implies push function. Close also implies push function.
</t> </t>
</list>
</t>
</section>
<section title="Status"> </section>
<t> <section numbered="true" toc="default">
<list> <name>Status</name>
<t>
Format: STATUS (local connection name) -> status data <t>
Format: STATUS (local connection name) -&gt; status data
</t> </t>
<t> <t>
This is an implementation dependent user command and could be This is an implementation-dependent user command and could be
excluded without adverse effect. Information returned would excluded without adverse effect. Information returned would
typically come from the TCB associated with the connection. typically come from the TCB associated with the connection.
</t> </t>
<t>
<t>
This command returns a data block containing the following This command returns a data block containing the following
information: information:
<list> </t>
<t>local socket,<vspace /> <ul spacing="normal" empty="true">
remote socket,<vspace />
local connection name,<vspace /> <li>local socket,</li>
receive window,<vspace /> <li>
send window,<vspace /> remote socket,</li>
connection state,<vspace /> <li>
number of buffers awaiting acknowledgment,<vspace /> local connection name,</li>
number of buffers pending receipt,<vspace /> <li>
urgent state,<vspace /> receive window,</li>
DiffServ field value,<vspace /> <li>
security/compartment,<vspace /> send window,</li>
and transmission timeout.</t> <li>
</list> connection state,</li>
</t> <li>
<t> number of buffers awaiting acknowledgment,</li>
<li>
number of buffers pending receipt,</li>
<li>
urgent state,</li>
<li>
Diffserv field value,</li>
<li>
security/compartment, and</li>
<li>
transmission timeout.</li>
</ul>
<t>
Depending on the state of the connection, or on the Depending on the state of the connection, or on the
implementation itself, some of this information may not be implementation itself, some of this information may not be
available or meaningful. If the calling process is not available or meaningful. If the calling process is not
authorized to use this connection, an error is returned. This authorized to use this connection, an error is returned. This
prevents unauthorized processes from gaining information about a prevents unauthorized processes from gaining information about a
connection. connection.
</t> </t>
</list>
</t>
</section>
<section title="Abort"> </section>
<t> <section numbered="true" toc="default">
<list> <name>Abort</name>
<t>
<t>
Format: ABORT (local connection name) Format: ABORT (local connection name)
</t> </t>
<t> <t>
This command causes all pending SENDs and RECEIVES to be This command causes all pending SENDs and RECEIVES to be
aborted, the TCB to be removed, and a special RESET message to aborted, the TCB to be removed, and a special RST message to
be sent to the remote TCP peer of the connection. be sent to the remote TCP peer of the connection.
Depending on the implementation, users may receive abort Depending on the implementation, users may receive abort
indications for each outstanding SEND or RECEIVE, or may simply indications for each outstanding SEND or RECEIVE, or may simply
receive an ABORT-acknowledgment. receive an ABORT-acknowledgment.
</t> </t>
</list>
</t>
</section>
<section title="Flush"> </section>
<t> <section numbered="true" toc="default">
<list> <name>Flush</name>
<t>
<t>
Some TCP implementations have included a FLUSH call, which Some TCP implementations have included a FLUSH call, which
will empty the TCP send queue of any data that the user will empty the TCP send queue of any data that the user
has issued SEND calls for but is still to the right of the has issued SEND calls for but is still to the right of the
current send window. That is, it flushes as much queued current send window. That is, it flushes as much queued
send data as possible without losing sequence number send data as possible without losing sequence number
synchronization. The FLUSH call MAY be implemented (MAY-14). synchronization. The FLUSH call <bcp14>MAY</bcp14> be implemented ( MAY-14).
</t> </t>
</list>
</t>
</section>
<section title="Asynchronous Reports"> </section>
<t> <section anchor="asynchronous-reports" numbered="true" toc="default">
<list> <name>Asynchronous Reports</name>
<t>
There MUST be a mechanism for reporting soft TCP error <t>
There <bcp14>MUST</bcp14> be a mechanism for reporting soft TCP erro
r
conditions to the application (MUST-47). Generically, we assume thi s conditions to the application (MUST-47). Generically, we assume thi s
takes the form of an application-supplied ERROR_REPORT takes the form of an application-supplied ERROR_REPORT
routine that may be upcalled asynchronously from routine that may be upcalled asynchronously from
the transport layer: the transport layer:
<list><t> </t>
<ul spacing="normal" empty="true">
<li>
ERROR_REPORT(local connection name, reason, subreason) ERROR_REPORT(local connection name, reason, subreason)
</t></list> </li>
</ul>
<t>
The precise encoding of the reason and subreason parameters The precise encoding of the reason and subreason parameters
is not specified here. However, the conditions that are is not specified here. However, the conditions that are
reported asynchronously to the application MUST include: reported asynchronously to the application <bcp14>MUST</bcp14> inclu
<list><t> de:
* ICMP error message arrived (see <xref target="icmp"/> for descr </t>
iption of handling each ICMP message type, since some message types need to be s <ul spacing="normal">
uppressed from generating reports to the application) <li>
</t><t> ICMP error message arrived (see <xref target="icmp" format="def
* Excessive retransmissions (see <xref target="connfail"/>) ault"/> for description of handling each ICMP message type since some message ty
</t><t> pes need to be suppressed from generating reports to the application)
* Urgent pointer advance (see <xref target="urgent"/>) </li>
</t></list> <li>
Excessive retransmissions (see <xref target="connfail" format="d
efault"/>)
</li>
<li>
Urgent pointer advance (see <xref target="urgent" format="defau
lt"/>)
</li>
</ul>
<t>
However, an application program that does not want to However, an application program that does not want to
receive such ERROR_REPORT calls SHOULD be able to receive such ERROR_REPORT calls <bcp14>SHOULD</bcp14> be able to
effectively disable these calls (SHLD-20). effectively disable these calls (SHLD-20).
</t> </t>
</list>
</t>
</section>
<section title="Set Differentiated Services Field (IPv4 TOS or IPv6 Traffic Clas </section>
s)"> <section numbered="true" toc="default">
<t> <name>Set Differentiated Services Field (IPv4 TOS or IPv6 Traffic Cl
<list> ass)</name>
<t>
The application layer MUST be able to specify the Differentiated Ser <t>
vices field The application layer <bcp14>MUST</bcp14> be able to specify the Dif
for segments that are sent on a connection (MUST-48). The Different ferentiated Services field
iated Services field includes the 6-bit Differentiated Services Code Point (DSCP for segments that are sent on a connection (MUST-48). The Different
) value. iated Services field includes the 6-bit Differentiated Services Codepoint (DSCP)
It is not required, but the application SHOULD be able to value.
change the Differentiated Services field during the connection lifet It is not required, but the application <bcp14>SHOULD</bcp14> be abl
ime (SHLD-21). TCP implementations SHOULD e to
change the Differentiated Services field during the connection lifet
ime (SHLD-21). TCP implementations <bcp14>SHOULD</bcp14>
pass the current Differentiated Services field value without change to the IP layer, pass the current Differentiated Services field value without change to the IP layer,
when it sends segments on the connection (SHLD-22). when it sends segments on the connection (SHLD-22).
</t> </t>
<t> <t>
The Differentiated Services field will be specified independently in each direction on The Differentiated Services field will be specified independently in each direction on
the connection, so that the receiver application will the connection, so that the receiver application will
specify the Differentiated Services field used for ACK segments. specify the Differentiated Services field used for ACK segments.
</t> </t>
<t> <t>
TCP implementations MAY pass the most recently received Differentiat TCP implementations <bcp14>MAY</bcp14> pass the most recently receiv
ed Services field up to the ed Differentiated Services field up to the
application (MAY-9). application (MAY-9).
</t> </t>
</list>
</t>
</section>
</section> </section>
<section title="TCP/Lower-Level Interface"> </section>
<t> <section numbered="true" toc="default">
The TCP endpoint calls on a lower level protocol module to actually send and <name>TCP/Lower-Level Interface</name>
receive information over a network. The two current standard Internet Proto <t>
col (IP) versions layered below TCP are IPv4 <xref target="RFC0791"/> and IPv6 < The TCP endpoint calls on a lower-level protocol module to actually send and
xref target="RFC8200"/>. receive information over a network. The two current standard Internet Proto
col (IP) versions layered below TCP are IPv4 <xref target="RFC0791" format="defa
ult"/> and IPv6 <xref target="RFC8200" format="default"/>.
</t> </t>
<t> <t>
If the lower level protocol is IPv4 it provides arguments for a type If the lower-level protocol is IPv4, it provides arguments for a type
of service (used within the Differentiated Services field) and for a time to live. TCP uses the following settings of service (used within the Differentiated Services field) and for a time to live. TCP uses the following settings
for these parameters: for these parameters:
<list> </t>
<t> <dl>
DiffServ field: The IP header value for the DiffServ field is given by the <dt>
user. This includes the bits of the DiffServ Code Point (DSCP). Diffserv field:</dt><dd>The IP header value for the Diffserv field is give
</t> n by the user. This includes the bits of the Diffserv Codepoint (DSCP).
<t> </dd>
Time to Live (TTL): The TTL value used to send TCP segments MUST be config <dt>
urable (MUST-49). Time to Live (TTL):</dt><dd><t>The TTL value used to send TCP segments <bc
<list> p14>MUST</bcp14> be configurable (MUST-49).
<t> </t>
Note that RFC 793 specified one minute (60 seconds) as a constant for <ul spacing="normal">
the TTL, because the assumed maximum segment lifetime was two minutes. This was <li>
intended to explicitly ask that a segment be destroyed if it cannot be Note that RFC 793 specified one minute (60 seconds) as a constant for
delivered by the internet system within one minute. RFC 1122 changed this speci the TTL because the assumed maximum segment lifetime was two minutes. This was
fication to require that the TTL be configurable. intended to explicitly ask that a segment be destroyed if it could not be
</t> delivered by the internet system within one minute. RFC 1122 updated RFC 793 to
<t> require that the TTL be configurable.
Note that the DiffServ field is permitted to change during a connection </li>
(Section 4.2.4.2 of RFC 1122). However, the application interface might <li>
Note that the Diffserv field is permitted to change during a connection
(Section <xref target="RFC1122" section="4.2.4.2" sectionFormat="bare" fo
rmat="default"/> of RFC 1122). However, the application interface might
not support this ability, and the application does not have knowledge not support this ability, and the application does not have knowledge
about individual TCP segments, so this can only be done on a coarse about individual TCP segments, so this can only be done on a coarse
granularity, at best. This limitation is further discussed in RFC 7657 granularity, at best. This limitation is further discussed in RFC 7657
(sec 5.1, 5.3, and 6) <xref target="RFC7657"/>. Generally, an (Sections <xref target="RFC7657" section="5.1" sectionFormat="bare" forma
application SHOULD NOT change the DiffServ field value during the course t="default"/>, <xref target="RFC7657" section="5.3" sectionFormat="bare" format=
"default"/>, and <xref target="RFC7657" section="6" sectionFormat="bare" format=
"default"/>) <xref target="RFC7657" format="default"/>. Generally, an
application <bcp14>SHOULD NOT</bcp14> change the Diffserv field value dur
ing the course
of a connection (SHLD-23). of a connection (SHLD-23).
</t> </li>
</list>
</t> </ul>
</list></t> </dd>
<t> </dl>
Any lower level protocol will have to provide the source address, <t>
Any lower-level protocol will have to provide the source address,
destination address, and protocol fields, and some way to determine destination address, and protocol fields, and some way to determine
the &quot;TCP length&quot;, both to provide the functional equivalent servic e the "TCP length", both to provide the functional equivalent service
of IP and to be used in the TCP checksum. of IP and to be used in the TCP checksum.
</t> </t>
<t> <t>
When received options are passed up to TCP from the IP When received options are passed up to TCP from the IP
layer, a TCP implementation MUST ignore options that it does not understand (MUST-50). layer, a TCP implementation <bcp14>MUST</bcp14> ignore options that it does not understand (MUST-50).
</t> </t>
<t> <t>
A TCP implementation MAY support the Time Stamp (MAY-10) and Record Route (M A TCP implementation <bcp14>MAY</bcp14> support the Timestamp (MAY-10) and R
AY-11) options. ecord Route (MAY-11) Options.
</t> </t>
<section numbered="true" toc="default">
<section title="Source Routing"> <name>Source Routing</name>
<t> <t>
If the lower level is IP (or other protocol that provides this If the lower level is IP (or other protocol that provides this
feature) and source routing is used, the interface must allow the feature) and source routing is used, the interface must allow the
route information to be communicated. This is especially important route information to be communicated. This is especially important
so that the source and destination addresses used in the TCP so that the source and destination addresses used in the TCP
checksum be the originating source and ultimate destination. It is checksum be the originating source and ultimate destination. It is
also important to preserve the return route to answer connection also important to preserve the return route to answer connection
requests. requests.
</t> </t>
<t> <t>
An application MUST be able to specify a source route when An application <bcp14>MUST</bcp14> be able to specify a source route when
it actively opens a TCP connection (MUST-51), and this MUST take it actively opens a TCP connection (MUST-51), and this <bcp14>MUST</bcp14> t
ake
precedence over a source route received in a datagram (MUST-52). precedence over a source route received in a datagram (MUST-52).
</t> </t>
<t> <t>
When a TCP connection is OPENed passively and a packet When a TCP connection is OPENed passively and a packet
arrives with a completed IP Source Route option (containing arrives with a completed IP Source Route Option (containing
a return route), TCP implementations MUST save the return route and use it a return route), TCP implementations <bcp14>MUST</bcp14> save the return rou
te and use it
for all segments sent on this connection (MUST-53). If a different for all segments sent on this connection (MUST-53). If a different
source route arrives in a later segment, the later source route arrives in a later segment, the later
definition SHOULD override the earlier one (SHLD-24). definition <bcp14>SHOULD</bcp14> override the earlier one (SHLD-24).
</t> </t>
</section> </section>
<section title="ICMP Messages" anchor="icmp"> <section anchor="icmp" numbered="true" toc="default">
<t> <name>ICMP Messages</name>
TCP implementations MUST act on an ICMP error message passed up from <t>
the IP TCP implementations <bcp14>MUST</bcp14> act on an ICMP error message
passed up from the IP
layer, directing it to the connection that created the layer, directing it to the connection that created the
error (MUST-54). The necessary demultiplexing information can be error (MUST-54). The necessary demultiplexing information can be
found in the IP header contained within the ICMP message. found in the IP header contained within the ICMP message.
</t> </t>
<t> <t>
This applies to ICMPv6 in addition to IPv4 ICMP. This applies to ICMPv6 in addition to IPv4 ICMP.
</t> </t>
<t> <t>
<xref target="RFC5461"/> contains discussion of specific ICMP and IC <xref target="RFC5461" format="default"/> contains discussion of spe
MPv6 messages classified as either &quot;soft&quot; or &quot;hard&quot; errors t cific ICMP and ICMPv6 messages classified as either "soft" or "hard" errors that
hat may bear different responses. Treatment for classes of ICMP messages is des may bear different responses. Treatment for classes of ICMP messages is descri
cribed below: bed below:
</t> </t>
<t> <dl newline="true" spacing="normal" indent="2">
<list style="hanging" hangIndent="2"> <dt>Source Quench</dt>
<t hangText="Source Quench"><vspace /> <dd>
TCP implementations MUST silently discard any received ICMP Sou TCP implementations <bcp14>MUST</bcp14> silently discard any re
rce Quench messages (MUST-55). See <xref target="RFC6633"/> for discussion. ceived ICMP Source Quench messages (MUST-55). See <xref target="RFC6633" format
</t> ="default"/> for discussion.
<t hangText="Soft Errors"><vspace /> </dd>
For IPv4 ICMP these include: Destination Unreachable -- codes 0 <dt>Soft Errors</dt>
, 1, 5; Time Exceeded -- codes 0, 1; and Parameter Problem.<vspace /> <dd>
For ICMPv6 these include: Destination Unreachable -- codes 0, 3 <t>
; Time Exceeded -- codes 0, 1; and Parameter Problem -- codes 0, 1, 2.<vspace / For IPv4 ICMP, these include: Destination Unreachable -- codes
> 0, 1, 5; Time Exceeded -- codes 0, 1; and Parameter Problem.</t>
<t>
For ICMPv6, these include: Destination Unreachable -- codes 0,
3; Time Exceeded -- codes 0, 1; and Parameter Problem -- codes 0, 1, 2.</t>
<t>
Since these Unreachable messages indicate soft error Since these Unreachable messages indicate soft error
conditions, TCP implementations MUST NOT abort the connection ( conditions, a TCP implementation <bcp14>MUST NOT</bcp14> abort
MUST-56), and it the connection (MUST-56), and it
SHOULD make the information available to the <bcp14>SHOULD</bcp14> make the information available to the
application (SHLD-25). application (SHLD-25).
</t> </t>
<t hangText="Hard Errors"><vspace /> </dd>
For ICMP these include Destination Unreachable -- codes 2-4.<vs <dt>Hard Errors</dt>
pace /> <dd>
<t>
For ICMP these include Destination Unreachable -- codes 2-4.</t
>
<t>
These are hard error conditions, so TCP implementations SHOULD These are hard error conditions, so TCP implementations <bcp14>
abort SHOULD</bcp14> abort
the connection (SHLD-26). <xref target="RFC5461"/> notes that the connection (SHLD-26). <xref target="RFC5461" format="defau
lt"/> notes that
some implementations do not abort connections when an some implementations do not abort connections when an
ICMP hard error is received for a connection that is ICMP hard error is received for a connection that is
in any of the synchronized states. in any of the synchronized states.
</t> </t>
</list> </dd>
</t> </dl>
<t> <t>
Note that <xref target="RFC5461"/> section 4 describes widespread implementation Note that <xref target="RFC5461" section="4" sectionFormat="comma" format="defau
behavior that treats soft errors as hard errors during connection establishment lt"/> describes widespread implementation behavior that treats soft errors as ha
. rd errors during connection establishment.
</t> </t>
</section> </section>
<section title="Source Address Validation"> <section numbered="true" toc="default">
<t> <name>Source Address Validation</name>
<t>
RFC 1122 requires addresses to be validated in incoming SYN packets: RFC 1122 requires addresses to be validated in incoming SYN packets:
<list>
<t>
An incoming SYN with an invalid source address MUST be
ignored either by TCP or by the IP layer (MUST-63) (Section
3.2.1.3 of <xref target="RFC1122"/>).
</t> </t>
<t> <blockquote>
A TCP implementation MUST silently discard an incoming SYN <t>
segment that is addressed to a broadcast or multicast An incoming SYN with an invalid source address <bcp14>MUST</bcp14> b
address (MUST-57). e
ignored either by TCP or by the IP layer [(MUST-63)] (see Section
<xref target="RFC1122" section="3.2.1.3" sectionFormat="bare"/>).
</t> </t>
</list> <t>
A TCP implementation <bcp14>MUST</bcp14> silently discard an incomin
g SYN
segment that is addressed to a broadcast or multicast
address [(MUST-57)].
</t> </t>
<t>This prevents connection state and replies from being erroneously generated, </blockquote>
and implementers should note that this guidance is applicable to all incoming se
gments, not just SYNs, as specifically indicated in RFC 1122.</t> <t>This prevents connection state and replies from being erroneously
</section> generated, and implementers should note that this guidance is applicable to all
</section> incoming segments, not just SYNs, as specifically indicated in RFC 1122.</t>
</section> </section>
<section title="Event Processing"> </section>
<t> </section>
<section numbered="true" toc="default">
<name>Event Processing</name>
<t>
The processing depicted in this section is an example of one possible The processing depicted in this section is an example of one possible
implementation. Other implementations may have slightly different implementation. Other implementations may have slightly different
processing sequences, but they should differ from those in this processing sequences, but they should differ from those in this
section only in detail, not in substance. section only in detail, not in substance.
</t> </t>
<t> <t>
The activity of the TCP endpoint can be characterized as responding to events. The activity of the TCP endpoint can be characterized as responding to events.
The events that occur can be cast into three categories: user calls, The events that occur can be cast into three categories: user calls,
arriving segments, and timeouts. This section describes the arriving segments, and timeouts. This section describes the
processing the TCP endpoint does in response to each of the events. In many processing the TCP endpoint does in response to each of the events. In many
cases the processing required depends on the state of the connection. cases, the processing required depends on the state of the connection.
</t> </t>
<t> <t>
Events that occur: Events that occur:
<list> </t>
<t>User Calls <ul spacing="normal" empty="true">
<list> <li>
<t>OPEN<vspace /> <t>User Calls</t>
SEND<vspace /> <ul spacing="normal" empty="true">
RECEIVE<vspace /> <li>OPEN</li>
CLOSE<vspace /> <li>SEND</li>
ABORT<vspace /> <li>RECEIVE</li>
STATUS</t> <li>CLOSE</li>
</list> <li>ABORT</li>
</t> <li>STATUS</li>
<t>Arriving Segments </ul>
<list><t>SEGMENT ARRIVES</t></list> </li>
</t> <li>
<t>Timeouts <t>Arriving Segments</t>
<list> <ul spacing="normal" empty="true">
<t>USER TIMEOUT<vspace /> <li>SEGMENT ARRIVES</li>
RETRANSMISSION TIMEOUT<vspace /> </ul>
TIME-WAIT TIMEOUT<vspace /> </li>
</t></list> <li>
</t> <t>Timeouts</t>
</list> <ul spacing="normal" empty="true">
</t> <li>USER TIMEOUT</li>
<t> <li>RETRANSMISSION TIMEOUT</li>
<li>TIME-WAIT TIMEOUT</li>
</ul>
</li>
</ul>
<t>
The model of the TCP/user interface is that user commands receive an The model of the TCP/user interface is that user commands receive an
immediate return and possibly a delayed response via an event or immediate return and possibly a delayed response via an event or
pseudo interrupt. In the following descriptions, the term "signal" pseudo-interrupt. In the following descriptions, the term "signal"
means cause a delayed response. means cause a delayed response.
</t> </t>
<t> <t>
Error responses in this document are identified by character strings. For exa mple, user Error responses in this document are identified by character strings. For exa mple, user
commands referencing connections that do not exist receive "error: commands referencing connections that do not exist receive "error:
connection not open". connection not open".
</t> </t>
<t> <t>
Please note in the following that all arithmetic on sequence numbers, Please note in the following that all arithmetic on sequence numbers,
acknowledgment numbers, windows, et cetera, is modulo 2**32 (the size acknowledgment numbers, windows, et cetera, is modulo 2<sup>32</sup> (the size
of the sequence number space). Also note that "=&lt;" means less than or of the sequence number space). Also note that "=&lt;" means less than or
equal to (modulo 2**32). equal to (modulo 2<sup>32</sup>).
</t> </t>
<t> <t>
A natural way to think about processing incoming segments is to A natural way to think about processing incoming segments is to
imagine that they are first tested for proper sequence number (i.e., imagine that they are first tested for proper sequence number (i.e.,
that their contents lie in the range of the expected "receive window" that their contents lie in the range of the expected "receive window"
in the sequence number space) and then that they are generally queued in the sequence number space) and then that they are generally queued
and processed in sequence number order. and processed in sequence number order.
</t> </t>
<t> <t>
When a segment overlaps other already received segments we reconstruct When a segment overlaps other already received segments, we reconstruct
the segment to contain just the new data, and adjust the header fields the segment to contain just the new data and adjust the header fields
to be consistent. to be consistent.
</t> </t>
<t> <t>
Note that if no state change is mentioned the TCP connection stays in the same Note that if no state change is mentioned, the TCP connection stays in the sam
e
state. state.
</t> </t>
<section title="OPEN Call"> <section numbered="true" toc="default">
<t> <name>OPEN Call</name>
<list> <t>CLOSED STATE (i.e., TCB does not exist)
<t>CLOSED STATE (i.e., TCB does not exist) </t>
<list> <ul spacing="normal">
<t> <li>
Create a new transmission control block (TCB) to hold connection Create a new transmission control block (TCB) to hold connection
state information. Fill in local socket identifier, remote state information. Fill in local socket identifier, remote
socket, DiffServ field, security/compartment, and user timeout socket, Diffserv field, security/compartment, and user timeout
information. Note that some parts of the remote socket may be information. Note that some parts of the remote socket may be
unspecified in a passive OPEN and are to be filled in by the unspecified in a passive OPEN and are to be filled in by the
parameters of the incoming SYN segment. Verify the security and parameters of the incoming SYN segment. Verify the security and
DiffServ value requested are allowed for this user, if not return Diffserv value requested are allowed for this user, if not, return
&quot;error: DiffServ value not allowed&quot; or &quot;error: security/c "error: Diffserv value not allowed" or "error: security/compartment
ompartment not allowed". If passive, enter the LISTEN state and return. If
not allowed.&quot; If passive enter the LISTEN state and return. If active and the remote socket is unspecified, return "error:
active and the remote socket is unspecified, return &quot;error: remote socket unspecified"; if active and the remote socket is
remote socket unspecified&quot;; if active and the remote socket is
specified, issue a SYN segment. An initial send sequence number specified, issue a SYN segment. An initial send sequence number
(ISS) is selected. A SYN segment of the form &lt;SEQ=ISS>&lt;CTL=SYN> (ISS) is selected. A SYN segment of the form &lt;SEQ=ISS>&lt;CTL=SYN&g t;
is sent. Set SND.UNA to ISS, SND.NXT to ISS+1, enter SYN-SENT is sent. Set SND.UNA to ISS, SND.NXT to ISS+1, enter SYN-SENT
state, and return. state, and return.
</t> </li>
<t> <li>
If the caller does not have access to the local socket specified, If the caller does not have access to the local socket specified,
return &quot;error: connection illegal for this process&quot;. If there return "error: connection illegal for this process". If there is
is no room to create a new connection, return "error: insufficient
no room to create a new connection, return &quot;error: insufficient resources".
resources&quot;. </li>
</t> </ul>
</list> <t>LISTEN STATE
</t> </t>
<t>LISTEN STATE <ul spacing="normal">
<list> <li>
<t>
If the OPEN call is active and the remote socket is specified, then change the If the OPEN call is active and the remote socket is specified, then change the
connection from passive to active, select an ISS. Send a SYN connection from passive to active, select an ISS. Send a SYN
segment, set SND.UNA to ISS, SND.NXT to ISS+1. Enter SYN-SENT segment, set SND.UNA to ISS, SND.NXT to ISS+1. Enter SYN-SENT
state. Data associated with SEND may be sent with SYN segment or state. Data associated with SEND may be sent with SYN segment or
queued for transmission after entering ESTABLISHED state. The queued for transmission after entering ESTABLISHED state. The
urgent bit if requested in the command must be sent with the data urgent bit if requested in the command must be sent with the data
segments sent as a result of this command. If there is no room to segments sent as a result of this command. If there is no room to
queue the request, respond with "error: insufficient resources". queue the request, respond with "error: insufficient resources".
If the remote socket was not specified, then return "error: remote If the remote socket was not specified, then return "error: remote
socket unspecified". socket unspecified".
</t> </li>
</list> </ul>
</t> <t>SYN-SENT STATE</t>
<t><vspace blankLines="999"/></t> <t>
<t>SYN-SENT STATE<vspace /> SYN-RECEIVED STATE</t>
SYN-RECEIVED STATE<vspace /> <t>
ESTABLISHED STATE<vspace /> ESTABLISHED STATE</t>
FIN-WAIT-1 STATE<vspace /> <t>
FIN-WAIT-2 STATE<vspace /> FIN-WAIT-1 STATE</t>
CLOSE-WAIT STATE<vspace /> <t>
CLOSING STATE<vspace /> FIN-WAIT-2 STATE</t>
LAST-ACK STATE<vspace /> <t>
CLOSE-WAIT STATE</t>
<t>
CLOSING STATE</t>
<t>
LAST-ACK STATE</t>
<t>
TIME-WAIT STATE TIME-WAIT STATE
<list> </t>
<t>Return "error: connection already exists".</t> <ul spacing="normal">
</list> <li>Return "error: connection already exists".</li>
</t> </ul>
</list> </section>
</t> <section numbered="true" toc="default">
</section> <name>SEND Call</name>
<section title="SEND Call"> <t>
<t>
<list>
<t>
CLOSED STATE (i.e., TCB does not exist) CLOSED STATE (i.e., TCB does not exist)
<list> </t>
<t> <ul spacing="normal">
<li>
If the user does not have access to such a connection, then return If the user does not have access to such a connection, then return
"error: connection illegal for this process". "error: connection illegal for this process".
</t> </li>
<t> <li>
Otherwise, return "error: connection does not exist". Otherwise, return "error: connection does not exist".
</t> </li>
</list> </ul>
</t> <t>
<t>
LISTEN STATE LISTEN STATE
<list> </t>
<t> <ul spacing="normal">
<li>
If the remote socket is specified, then change the connection If the remote socket is specified, then change the connection
from passive to active, select an ISS. Send a SYN segment, set from passive to active, select an ISS. Send a SYN segment, set
SND.UNA to ISS, SND.NXT to ISS+1. Enter SYN-SENT state. Data SND.UNA to ISS, SND.NXT to ISS+1. Enter SYN-SENT state. Data
associated with SEND may be sent with SYN segment or queued for associated with SEND may be sent with SYN segment or queued for
transmission after entering ESTABLISHED state. The urgent bit if transmission after entering ESTABLISHED state. The urgent bit if
requested in the command must be sent with the data segments sent requested in the command must be sent with the data segments sent
as a result of this command. If there is no room to queue the as a result of this command. If there is no room to queue the
request, respond with "error: insufficient resources". If request, respond with "error: insufficient resources". If
the remote socket was not specified, then return "error: remote the remote socket was not specified, then return "error: remote
socket unspecified". socket unspecified".
</t> </li>
</list> </ul>
</t> <t>
<t> SYN-SENT STATE</t>
SYN-SENT STATE<vspace /> <t>
SYN-RECEIVED STATE SYN-RECEIVED STATE
<list><t> </t>
<ul spacing="normal">
<li>
Queue the data for transmission after entering ESTABLISHED state. Queue the data for transmission after entering ESTABLISHED state.
If no space to queue, respond with "error: insufficient If no space to queue, respond with "error: insufficient
resources". resources".
</t></list> </li>
</t> </ul>
<t> <t>
ESTABLISHED STATE<vspace /> ESTABLISHED STATE</t>
<t>
CLOSE-WAIT STATE CLOSE-WAIT STATE
<list><t> </t>
<ul spacing="normal">
<li>
Segmentize the buffer and send it with a piggybacked Segmentize the buffer and send it with a piggybacked
acknowledgment (acknowledgment value = RCV.NXT). If there is acknowledgment (acknowledgment value = RCV.NXT). If there is
insufficient space to remember this buffer, simply return "error: insufficient space to remember this buffer, simply return "error:
insufficient resources". insufficient resources".
</t> </li>
<t> <li>
If the urgent flag is set, then SND.UP &lt;- SND.NXT and set the If the URGENT flag is set, then SND.UP &lt;- SND.NXT and set the
urgent pointer in the outgoing segments. urgent pointer in the outgoing segments.
</t></list> </li>
</t> </ul>
<t> <t>
FIN-WAIT-1 STATE<vspace /> FIN-WAIT-1 STATE</t>
FIN-WAIT-2 STATE<vspace /> <t>
CLOSING STATE<vspace /> FIN-WAIT-2 STATE</t>
LAST-ACK STATE<vspace /> <t>
CLOSING STATE</t>
<t>
LAST-ACK STATE</t>
<t>
TIME-WAIT STATE TIME-WAIT STATE
<list><t> </t>
<ul spacing="normal">
<li>
Return "error: connection closing" and do not service request. Return "error: connection closing" and do not service request.
</t></list> </li>
</t> </ul>
</list> </section>
</t> <section numbered="true" toc="default">
</section> <name>RECEIVE Call</name>
<section title="RECEIVE Call"> <t>
<t>
<list>
<t>
CLOSED STATE (i.e., TCB does not exist) CLOSED STATE (i.e., TCB does not exist)
<list> </t>
<t> <ul spacing="normal">
<li>
If the user does not have access to such a connection, return If the user does not have access to such a connection, return
"error: connection illegal for this process". "error: connection illegal for this process".
</t> </li>
<t> <li>
Otherwise return "error: connection does not exist". Otherwise, return "error: connection does not exist".
</t> </li>
</list></t> </ul>
<t> <t>
LISTEN STATE<vspace /> LISTEN STATE</t>
SYN-SENT STATE<vspace /> <t>
SYN-SENT STATE</t>
<t>
SYN-RECEIVED STATE SYN-RECEIVED STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Queue for processing after entering ESTABLISHED state. If there Queue for processing after entering ESTABLISHED state. If there
is no room to queue this request, respond with "error: is no room to queue this request, respond with "error:
insufficient resources". insufficient resources".
</t> </li>
</list></t> </ul>
<t> <t>
ESTABLISHED STATE<vspace /> ESTABLISHED STATE</t>
FIN-WAIT-1 STATE<vspace /> <t>
FIN-WAIT-1 STATE</t>
<t>
FIN-WAIT-2 STATE FIN-WAIT-2 STATE
<list> </t>
<t> <ul spacing="normal">
<li>
If insufficient incoming segments are queued to satisfy the If insufficient incoming segments are queued to satisfy the
request, queue the request. If there is no queue space to request, queue the request. If there is no queue space to
remember the RECEIVE, respond with "error: insufficient remember the RECEIVE, respond with "error: insufficient
resources". resources".
</t> </li>
<t> <li>
Reassemble queued incoming segments into receive buffer and return Reassemble queued incoming segments into receive buffer and return
to user. Mark "push seen" (PUSH) if this is the case. to user. Mark "push seen" (PUSH) if this is the case.
</t> </li>
<t> <li>
If RCV.UP is in advance of the data currently being passed to the If RCV.UP is in advance of the data currently being passed to the
user notify the user of the presence of urgent data. user, notify the user of the presence of urgent data.
</t> </li>
<t> <li>
When the TCP endpoint takes responsibility for delivering data to the user When the TCP endpoint takes responsibility for delivering data to the user
,
that fact must be communicated to the sender via an that fact must be communicated to the sender via an
acknowledgment. The formation of such an acknowledgment is acknowledgment. The formation of such an acknowledgment is
described below in the discussion of processing an incoming described below in the discussion of processing an incoming
segment. segment.
</t> </li>
</list></t> </ul>
<t> <t>
CLOSE-WAIT STATE CLOSE-WAIT STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Since the remote side has already sent FIN, RECEIVEs must be Since the remote side has already sent FIN, RECEIVEs must be
satisfied by data already on hand, but not yet delivered to the satisfied by data already on hand, but not yet delivered to the
user. If no text is awaiting delivery, the RECEIVE will get an user. If no text is awaiting delivery, the RECEIVE will get an
"error: connection closing" response. Otherwise, any remaining "error: connection closing" response. Otherwise, any remaining
data can be used to satisfy the RECEIVE. data can be used to satisfy the RECEIVE.
</t> </li>
</list></t> </ul>
<t> <t>
CLOSING STATE<vspace /> CLOSING STATE</t>
LAST-ACK STATE<vspace /> <t>
LAST-ACK STATE</t>
<t>
TIME-WAIT STATE TIME-WAIT STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Return "error: connection closing". Return "error: connection closing".
</t> </li>
</list></t> </ul>
</list> </section>
</t> <section numbered="true" toc="default">
</section> <name>CLOSE Call</name>
<section title="CLOSE Call"> <t>
<t>
<list>
<t>
CLOSED STATE (i.e., TCB does not exist) CLOSED STATE (i.e., TCB does not exist)
<list> </t>
<t> <ul spacing="normal">
<li>
If the user does not have access to such a connection, return If the user does not have access to such a connection, return
"error: connection illegal for this process". "error: connection illegal for this process".
</t> </li>
<t> <li>
Otherwise, return "error: connection does not exist". Otherwise, return "error: connection does not exist".
</t> </li>
</list></t> </ul>
<t> <t>
LISTEN STATE LISTEN STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Any outstanding RECEIVEs are returned with "error: closing" Any outstanding RECEIVEs are returned with "error: closing"
responses. Delete TCB, enter CLOSED state, and return. responses. Delete TCB, enter CLOSED state, and return.
</t></list></t> </li>
<t> </ul>
<t>
SYN-SENT STATE SYN-SENT STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Delete the TCB and return "error: closing" responses to any Delete the TCB and return "error: closing" responses to any
queued SENDs, or RECEIVEs. queued SENDs, or RECEIVEs.
</t></list></t> </li>
<t> </ul>
<t>
SYN-RECEIVED STATE SYN-RECEIVED STATE
<list> </t>
<t> <ul spacing="normal">
<li>
If no SENDs have been issued and there is no pending data to send, If no SENDs have been issued and there is no pending data to send,
then form a FIN segment and send it, and enter FIN-WAIT-1 state; then form a FIN segment and send it, and enter FIN-WAIT-1 state;
otherwise queue for processing after entering ESTABLISHED state. otherwise, queue for processing after entering ESTABLISHED state.
</t></list></t> </li>
<t> </ul>
<t>
ESTABLISHED STATE ESTABLISHED STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Queue this until all preceding SENDs have been segmentized, then Queue this until all preceding SENDs have been segmentized, then
form a FIN segment and send it. In any case, enter FIN-WAIT-1 form a FIN segment and send it. In any case, enter FIN-WAIT-1
state. state.
</t></list></t> </li>
<t> </ul>
FIN-WAIT-1 STATE<vspace /> <t>
FIN-WAIT-1 STATE</t>
<t>
FIN-WAIT-2 STATE FIN-WAIT-2 STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Strictly speaking, this is an error and should receive an "error: Strictly speaking, this is an error and should receive an "error:
connection closing" response. An "ok" response would be connection closing" response. An "ok" response would be
acceptable, too, as long as a second FIN is not emitted (the first acceptable, too, as long as a second FIN is not emitted (the first
FIN may be retransmitted though). FIN may be retransmitted, though).
</t></list></t> </li>
<t> </ul>
<t>
CLOSE-WAIT STATE CLOSE-WAIT STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Queue this request until all preceding SENDs have been Queue this request until all preceding SENDs have been
segmentized; then send a FIN segment, enter LAST-ACK state. segmentized; then send a FIN segment, enter LAST-ACK state.
</t></list></t> </li>
<t> </ul>
CLOSING STATE<vspace /> <t>
LAST-ACK STATE<vspace /> CLOSING STATE</t>
<t>
LAST-ACK STATE</t>
<t>
TIME-WAIT STATE TIME-WAIT STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Respond with "error: connection closing". Respond with "error: connection closing".
</t></list></t> </li>
</list> </ul>
</t> </section>
</section> <section numbered="true" toc="default">
<section title="ABORT Call"> <name>ABORT Call</name>
<t> <t>
<list>
<t>
CLOSED STATE (i.e., TCB does not exist) CLOSED STATE (i.e., TCB does not exist)
<list> </t>
<t> <ul spacing="normal">
<li>
If the user should not have access to such a connection, return If the user should not have access to such a connection, return
"error: connection illegal for this process". "error: connection illegal for this process".
</t> </li>
<t> <li>
Otherwise return "error: connection does not exist". Otherwise, return "error: connection does not exist".
</t></list></t> </li>
<t> </ul>
<t>
LISTEN STATE LISTEN STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Any outstanding RECEIVEs should be returned with "error: Any outstanding RECEIVEs should be returned with "error:
connection reset" responses. Delete TCB, enter CLOSED state, and connection reset" responses. Delete TCB, enter CLOSED state, and
return. return.
</t></list></t> </li>
<t> </ul>
<t>
SYN-SENT STATE SYN-SENT STATE
<list> </t>
<t> <ul spacing="normal">
<li>
All queued SENDs and RECEIVEs should be given "connection reset" All queued SENDs and RECEIVEs should be given "connection reset"
notification, delete the TCB, enter CLOSED state, and return. notification. Delete the TCB, enter CLOSED state, and return.
</t></list></t> </li>
<t> </ul>
SYN-RECEIVED STATE<vspace /> <t>
ESTABLISHED STATE<vspace /> SYN-RECEIVED STATE</t>
FIN-WAIT-1 STATE<vspace /> <t>
FIN-WAIT-2 STATE<vspace /> ESTABLISHED STATE</t>
<t>
FIN-WAIT-1 STATE</t>
<t>
FIN-WAIT-2 STATE</t>
<t>
CLOSE-WAIT STATE CLOSE-WAIT STATE
<list> </t>
<t> <ul spacing="normal">
<li>
<t>
Send a reset segment: Send a reset segment:
<list> </t>
<t> <t>
&lt;SEQ=SND.NXT>&lt;CTL=RST> &lt;SEQ=SND.NXT&gt;&lt;CTL=RST&gt;
</t> </t>
</list></t> </li>
<t> <li>
All queued SENDs and RECEIVEs should be given "connection reset" All queued SENDs and RECEIVEs should be given "connection reset"
notification; all segments queued for transmission (except for the notification; all segments queued for transmission (except for the
RST formed above) or retransmission should be flushed, delete the RST formed above) or retransmission should be flushed. Delete the
TCB, enter CLOSED state, and return. TCB, enter CLOSED state, and return.
</t></list></t> </li>
<t> </ul>
<t>
CLOSING STATE CLOSING STATE
</t>
<t>
LAST-ACK STATE LAST-ACK STATE
</t>
<t>
TIME-WAIT STATE TIME-WAIT STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Respond with "ok" and delete the TCB, enter CLOSED state, and Respond with "ok" and delete the TCB, enter CLOSED state, and
return. return.
</t></list></t> </li>
</list> </ul>
</t> </section>
</section> <section numbered="true" toc="default">
<section title="STATUS Call"> <name>STATUS Call</name>
<t> <t>
<list>
<t>
CLOSED STATE (i.e., TCB does not exist) CLOSED STATE (i.e., TCB does not exist)
<list> </t>
<t> <ul spacing="normal">
<li>
If the user should not have access to such a connection, return If the user should not have access to such a connection, return
"error: connection illegal for this process". "error: connection illegal for this process".
</t> </li>
<t> <li>
Otherwise return "error: connection does not exist". Otherwise, return "error: connection does not exist".
</t></list></t> </li>
<t> </ul>
<t>
LISTEN STATE LISTEN STATE
<list> </t>
<t> <ul spacing="normal">
Return "state = LISTEN", and the TCB pointer. <li>
</t></list></t> Return "state = LISTEN" and the TCB pointer.
<t> </li>
</ul>
<t>
SYN-SENT STATE SYN-SENT STATE
<list> </t>
<t> <ul spacing="normal">
Return "state = SYN-SENT", and the TCB pointer. <li>
</t></list></t> Return "state = SYN-SENT" and the TCB pointer.
<t> </li>
</ul>
<t>
SYN-RECEIVED STATE SYN-RECEIVED STATE
<list> </t>
<t> <ul spacing="normal">
Return "state = SYN-RECEIVED", and the TCB pointer. <li>
</t></list></t> Return "state = SYN-RECEIVED" and the TCB pointer.
<t> </li>
</ul>
<t>
ESTABLISHED STATE ESTABLISHED STATE
<list> </t>
<t> <ul spacing="normal">
Return "state = ESTABLISHED", and the TCB pointer. <li>
</t></list></t> Return "state = ESTABLISHED" and the TCB pointer.
<t> </li>
</ul>
<t>
FIN-WAIT-1 STATE FIN-WAIT-1 STATE
<list> </t>
<t> <ul spacing="normal">
Return "state = FIN-WAIT-1", and the TCB pointer. <li>
</t></list></t> Return "state = FIN-WAIT-1" and the TCB pointer.
<t> </li>
</ul>
<t>
FIN-WAIT-2 STATE FIN-WAIT-2 STATE
<list> </t>
<t> <ul spacing="normal">
Return "state = FIN-WAIT-2", and the TCB pointer. <li>
</t></list></t> Return "state = FIN-WAIT-2" and the TCB pointer.
<t> </li>
</ul>
<t>
CLOSE-WAIT STATE CLOSE-WAIT STATE
<list> </t>
<t> <ul spacing="normal">
Return "state = CLOSE-WAIT", and the TCB pointer. <li>
</t></list></t> Return "state = CLOSE-WAIT" and the TCB pointer.
<t> </li>
</ul>
<t>
CLOSING STATE CLOSING STATE
<list> </t>
<t> <ul spacing="normal">
Return "state = CLOSING", and the TCB pointer. <li>
</t></list></t> Return "state = CLOSING" and the TCB pointer.
<t> </li>
</ul>
<t>
LAST-ACK STATE LAST-ACK STATE
<list> </t>
<t> <ul spacing="normal">
Return "state = LAST-ACK", and the TCB pointer. <li>
</t></list></t> Return "state = LAST-ACK" and the TCB pointer.
<t> </li>
</ul>
<t>
TIME-WAIT STATE TIME-WAIT STATE
<list> </t>
<t> <ul spacing="normal">
Return "state = TIME-WAIT", and the TCB pointer. <li>
</t></list></t> Return "state = TIME-WAIT" and the TCB pointer.
</list> </li>
</t> </ul>
</section> </section>
<section title="SEGMENT ARRIVES"> <section numbered="true" toc="default">
<name>SEGMENT ARRIVES</name>
<section title="CLOSED State"> <section numbered="true" toc="default">
<t> <name>CLOSED STATE</name>
If the state is CLOSED (i.e., TCB does not exist) then <t>
<list> If the state is CLOSED (i.e., TCB does not exist), then
<t> </t>
<ul empty="true" spacing="normal">
<li>
all data in the incoming segment is discarded. An incoming all data in the incoming segment is discarded. An incoming
segment containing a RST is discarded. An incoming segment not segment containing a RST is discarded. An incoming segment not
containing a RST causes a RST to be sent in response. The containing a RST causes a RST to be sent in response. The
acknowledgment and sequence field values are selected to make the acknowledgment and sequence field values are selected to make the
reset sequence acceptable to the TCP endpoint that sent the offending reset sequence acceptable to the TCP endpoint that sent the offending
segment. segment.
</t> </li>
<t> <li>
<t>
If the ACK bit is off, sequence number zero is used, If the ACK bit is off, sequence number zero is used,
<list> </t>
<t> <ul spacing="normal" empty="true">
&lt;SEQ=0>&lt;ACK=SEG.SEQ+SEG.LEN>&lt;CTL=RST,ACK> <li>
</t> &lt;SEQ=0&gt;&lt;ACK=SEG.SEQ+SEG.LEN&gt;&lt;CTL=RST,ACK&gt;
</list></t> </li>
<t> </ul>
</li>
<li>
<t>
If the ACK bit is on, If the ACK bit is on,
<list> </t>
<t> <ul spacing="normal" empty="true">
&lt;SEQ=SEG.ACK>&lt;CTL=RST> <li>
</t></list></t> &lt;SEQ=SEG.ACK&gt;&lt;CTL=RST&gt;
<t> </li>
</ul>
</li>
<li>
Return. Return.
</t> </li>
</list></t> </ul>
</section> </section>
<section title="LISTEN State"> <section numbered="true" toc="default">
<t> <name>LISTEN STATE</name>
If the state is LISTEN then <t>
<list> If the state is LISTEN, then
<t> </t>
first check for an RST <ul empty="true" spacing="normal">
<list> <li>
<t> <t>
An incoming RST segment could not be valid, since First, check for a RST:
</t>
<ul spacing="normal">
<li>
An incoming RST segment could not be valid since
it could not have been sent in response to anything sent by this it could not have been sent in response to anything sent by this
incarnation of the connection. incarnation of the connection.
An incoming RST should be ignored. Return. An incoming RST should be ignored. Return.
</t></list></t> </li>
<t> </ul>
second check for an ACK </li>
<list> <li>
<t> <t>
Second, check for an ACK:
</t>
<ul spacing="normal">
<li>
<t>
Any acknowledgment is bad if it arrives on a connection still in Any acknowledgment is bad if it arrives on a connection still in
the LISTEN state. An acceptable reset segment should be formed the LISTEN state. An acceptable reset segment should be formed
for any arriving ACK-bearing segment. The RST should be for any arriving ACK-bearing segment. The RST should be
formatted as follows: formatted as follows:
<list> </t>
<t> <ul spacing="normal" empty="true">
&lt;SEQ=SEG.ACK>&lt;CTL=RST> <li>
</t></list></t> &lt;SEQ=SEG.ACK&gt;&lt;CTL=RST&gt;
<t> </li>
</ul>
</li>
<li>
Return. Return.
</t></list></t> </li>
<t> </ul>
third check for a SYN </li>
<list> <li>
<t> <t>
Third, check for a SYN:
</t>
<ul spacing="normal">
<li>
<t>
If the SYN bit is set, check the security. If the If the SYN bit is set, check the security. If the
security/compartment on the incoming segment does not exactly security/compartment on the incoming segment does not exactly
match the security/compartment in the TCB then send a reset and match the security/compartment in the TCB, then send a reset and
return. return.
<list> </t>
<t> <ul spacing="normal" empty="true">
&lt;SEQ=0>&lt;ACK=SEG.SEQ+SEG.LEN>&lt;CTL=RST,ACK> <li>
</t></list></t> &lt;SEQ=0&gt;&lt;ACK=SEG.SEQ+SEG.LEN&gt;&lt;CTL=RST,ACK&gt;
<t> </li>
Set RCV.NXT to SEG.SEQ+1, IRS is set to SEG.SEQ and any other </ul>
</li>
<li>
<t>
Set RCV.NXT to SEG.SEQ+1, IRS is set to SEG.SEQ, and any other
control or text should be queued for processing later. ISS control or text should be queued for processing later. ISS
should be selected and a SYN segment sent of the form: should be selected and a SYN segment sent of the form:
<list> </t>
<t> <ul spacing="normal" empty="true">
&lt;SEQ=ISS>&lt;ACK=RCV.NXT>&lt;CTL=SYN,ACK> <li>
</t></list></t> &lt;SEQ=ISS&gt;&lt;ACK=RCV.NXT&gt;&lt;CTL=SYN,ACK&gt;
<t> </li>
</ul>
</li>
<li>
SND.NXT is set to ISS+1 and SND.UNA to ISS. The connection SND.NXT is set to ISS+1 and SND.UNA to ISS. The connection
state should be changed to SYN-RECEIVED. Note that any other state should be changed to SYN-RECEIVED. Note that any other
incoming control or data (combined with SYN) will be processed incoming control or data (combined with SYN) will be processed
in the SYN-RECEIVED state, but processing of SYN and ACK should in the SYN-RECEIVED state, but processing of SYN and ACK should
not be repeated. If the listen was not fully specified (i.e., not be repeated. If the listen was not fully specified (i.e.,
the remote socket was not fully specified), then the the remote socket was not fully specified), then the
unspecified fields should be filled in now. unspecified fields should be filled in now.
</t></list></t> </li>
<t> </ul>
fourth other data or control </li>
<list> <li>
<t> <t>
Fourth, other data or control:
</t>
<ul spacing="normal">
<li>
This should not be reached. Drop the segment and return. Any other con trol or data-bearing segment (not containing SYN) This should not be reached. Drop the segment and return. Any other con trol or data-bearing segment (not containing SYN)
must have an ACK and thus would have been discarded by the ACK must have an ACK and thus would have been discarded by the ACK
processing in the second step, unless it was first discarded by processing in the second step, unless it was first discarded by
RST checking in the first step. RST checking in the first step.
</t></list></t> </li>
</list></t> </ul>
</section> </li>
<section title="SYN-SENT State"> </ul>
<t> </section>
If the state is SYN-SENT then <section numbered="true" toc="default">
<list> <name>SYN-SENT STATE</name>
<t>
first check the ACK bit
<list>
<t>
If the ACK bit is set
<list>
<t>
If SEG.ACK =&lt; ISS, or SEG.ACK > SND.NXT, send a reset (unless
the RST bit is set, if so drop the segment and return)
<list>
<t> <t>
&lt;SEQ=SEG.ACK>&lt;CTL=RST> If the state is SYN-SENT, then
</t></list></t> </t>
<t> <ul empty="true" spacing="normal">
<li>
<t>
First, check the ACK bit:
</t>
<ul spacing="normal">
<li>
<t>
If the ACK bit is set,
</t>
<ul spacing="normal">
<li>
<t>
If SEG.ACK =&lt; ISS or SEG.ACK &gt; SND.NXT, send a reset (unless
the RST bit is set, if so drop the segment and return)
</t>
<ul spacing="normal" empty="true">
<li>
&lt;SEQ=SEG.ACK&gt;&lt;CTL=RST&gt;
</li>
</ul>
</li>
<li>
and discard the segment. Return. and discard the segment. Return.
</t> </li>
<t> <li>
If SND.UNA &lt; SEG.ACK =&lt; SND.NXT then the ACK is acceptable. Som If SND.UNA &lt; SEG.ACK =&lt; SND.NXT, then the ACK is acceptable. So
e deployed TCP code has used the check SEG.ACK == SND.NXT (using &quot;==&quot; me deployed TCP code has used the check SEG.ACK == SND.NXT (using "==" rather th
rather than &quot;=&lt;&quot;, but this is not appropriate when the stack is cap an "=&lt;"), but this is not appropriate when the stack is capable of sending da
able of sending data on the SYN, because the TCP peer may not accept and acknowl ta on the SYN because the TCP peer may not accept and acknowledge all of the dat
edge all of the data on the SYN. a on the SYN.
</t></list></t> </li>
</list></t> </ul>
<t> </li>
second check the RST bit </ul>
<list> </li>
<t> <li>
If the RST bit is set <t>
<list> Second, check the RST bit:
<t> </t>
A potential blind reset attack is described in RFC 5961 <xref target=" <ul spacing="normal">
RFC5961"/>. The mitigation described in that document has specific applicabilit <li>
y explained therein, and is not a substitute for cryptographic protection (e.g. <t>
IPsec or TCP-AO). A TCP implementation that supports the RFC 5961 mitigation SH If the RST bit is set,
OULD first check that the sequence number exactly matches RCV.NXT prior to execu </t>
ting the action in the next paragraph. <ul spacing="normal">
</t> <li>
<t> A potential blind reset attack is described in RFC 5961 <xref target="
If the ACK was acceptable then signal the user "error: RFC5961" format="default"/>. The mitigation described in that document has spec
ific applicability explained therein, and is not a substitute for cryptographic
protection (e.g., IPsec or TCP-AO). A TCP implementation that supports the miti
gation described in RFC 5961 <bcp14>SHOULD</bcp14> first check that the sequence
number exactly matches RCV.NXT prior to executing the action in the next paragr
aph.
</li>
<li>
If the ACK was acceptable, then signal to the user "error:
connection reset", drop the segment, enter CLOSED state, connection reset", drop the segment, enter CLOSED state,
delete TCB, and return. Otherwise (no ACK), drop the segment delete TCB, and return. Otherwise (no ACK), drop the segment
and return. and return.
</t></list></t> </li>
</list></t> </ul>
<t> </li>
third check the security </ul>
<list> </li>
<t> <li>
<t>
Third, check the security:
</t>
<ul spacing="normal">
<li>
<t>
If the security/compartment in the segment does not exactly If the security/compartment in the segment does not exactly
match the security/compartment in the TCB, send a reset match the security/compartment in the TCB, send a reset:
<list> </t>
<t> <ul spacing="normal">
If there is an ACK <li>
<list> <t>
<t> If there is an ACK,
&lt;SEQ=SEG.ACK>&lt;CTL=RST> </t>
</t></list></t> <ul spacing="normal" empty="true">
<t> <li>
Otherwise &lt;SEQ=SEG.ACK&gt;&lt;CTL=RST&gt;
<list> </li>
<t> </ul>
&lt;SEQ=0>&lt;ACK=SEG.SEQ+SEG.LEN>&lt;CTL=RST,ACK> </li>
</t></list></t> <li>
</list></t> <t>
<t> Otherwise,
</t>
<ul spacing="normal" empty="true">
<li>
&lt;SEQ=0&gt;&lt;ACK=SEG.SEQ+SEG.LEN&gt;&lt;CTL=RST,ACK&gt;
</li>
</ul>
</li>
</ul>
</li>
<li>
If a reset was sent, discard the segment and return. If a reset was sent, discard the segment and return.
</t></list></t> </li>
<t> </ul>
fourth check the SYN bit </li>
<list> <li>
<t> <t>
Fourth, check the SYN bit:
</t>
<ul spacing="normal">
<li>
This step should be reached only if the ACK is ok, or there is This step should be reached only if the ACK is ok, or there is
no ACK, and the segment did not contain a RST. no ACK, and the segment did not contain a RST.
</t> </li>
<t> <li>
If the SYN bit is on and the security/compartment If the SYN bit is on and the security/compartment
is acceptable then, RCV.NXT is set to SEG.SEQ+1, IRS is set to is acceptable, then RCV.NXT is set to SEG.SEQ+1, IRS is set to
SEG.SEQ. SND.UNA should be advanced to equal SEG.ACK (if there SEG.SEQ. SND.UNA should be advanced to equal SEG.ACK (if there
is an ACK), and any segments on the retransmission queue that is an ACK), and any segments on the retransmission queue that
are thereby acknowledged should be removed. are thereby acknowledged should be removed.
</t> </li>
<t> <li>
If SND.UNA > ISS (our SYN has been ACKed), change the connection <t>
If SND.UNA &gt; ISS (our SYN has been ACKed), change the connection
state to ESTABLISHED, form an ACK segment state to ESTABLISHED, form an ACK segment
<list> </t>
<t> <ul spacing="normal" empty="true">
&lt;SEQ=SND.NXT>&lt;ACK=RCV.NXT>&lt;CTL=ACK> <li>
</t></list></t> &lt;SEQ=SND.NXT&gt;&lt;ACK=RCV.NXT&gt;&lt;CTL=ACK&gt;
<t> </li>
</ul>
</li>
<li>
and send it. Data or controls that were queued for and send it. Data or controls that were queued for
transmission MAY be included. Some TCP implementations suppress transmission <bcp14>MAY</bcp14> be included. Some TCP implementations su ppress
sending this segment when the received segment contains data that will sending this segment when the received segment contains data that will
anyways generate an acknowledgement in the later processing steps, anyways generate an acknowledgment in the later processing steps,
saving this extra acknowledgement of the SYN from being sent. If there saving this extra acknowledgment of the SYN from being sent. If there
are other controls or text in the segment then continue processing at are other controls or text in the segment, then continue processing at
the sixth step under <xref target="other-states"/> where the URG the <xref target="check-urg-bit" format="none">sixth step</xref> under <x
bit is checked, otherwise return. ref target="other-states" format="default"/> where the URG
</t> bit is checked; otherwise, return.
<t> </li>
Otherwise enter SYN-RECEIVED, form a SYN,ACK segment <li>
<list> <t>
<t> Otherwise, enter SYN-RECEIVED, form a SYN,ACK segment
&lt;SEQ=ISS>&lt;ACK=RCV.NXT>&lt;CTL=SYN,ACK> </t>
</t></list></t> <ul spacing="normal" empty="true">
<t> <li>
&lt;SEQ=ISS&gt;&lt;ACK=RCV.NXT&gt;&lt;CTL=SYN,ACK&gt;
</li>
</ul>
</li>
<li>
<t>
and send it. Set the variables: and send it. Set the variables:
<list> </t>
<t>SND.WND &lt;- SEG.WND<vspace /> <ul spacing="normal" empty="true">
SND.WL1 &lt;- SEG.SEQ<vspace /> <li>
<t>SND.WND &lt;- SEG.WND</t>
<t>
SND.WL1 &lt;- SEG.SEQ</t>
<t>
SND.WL2 &lt;- SEG.ACK</t> SND.WL2 &lt;- SEG.ACK</t>
</list> </li>
</ul>
<t>
If there are other controls or text in the If there are other controls or text in the
segment, queue them for processing after the ESTABLISHED state segment, queue them for processing after the ESTABLISHED state
has been reached, return. has been reached, return.
</t> </t>
<t> </li>
Note that it is legal to send and receive application data on SYN segment <li>
s (this is the &quot;text in the segment&quot; mentioned above. There has been Note that it is legal to send and receive application data on SYN segment
significant misinformation and misunderstanding of this topic historically. Som s (this is the "text in the segment" mentioned above). There has been significa
e firewalls and security devices consider this suspicious. However, the capabil nt misinformation and misunderstanding of this topic historically. Some firewal
ity was used in T/TCP <xref target="RFC1644"/> and is used in TCP Fast Open (TFO ls and security devices consider this suspicious. However, the capability was u
) <xref target="RFC7413"/>, so is important for implementations and network devi sed in T/TCP <xref target="RFC1644" format="default"/> and is used in TCP Fast O
ces to permit. pen (TFO) <xref target="RFC7413" format="default"/>, so is important for impleme
</t></list></t> ntations and network devices to permit.
<t> </li>
fifth, if neither of the SYN or RST bits is set then drop the </ul>
</li>
<li>
Fifth, if neither of the SYN or RST bits is set, then drop the
segment and return. segment and return.
</t></list></t> </li>
</section> </ul>
<section title="Other States" anchor="other-states"> </section>
<t> <section anchor="other-states" numbered="true" toc="default">
<name>Other States</name>
<t>
Otherwise, Otherwise,
<list> </t>
<t> <ul spacing="normal" empty="true">
first check sequence number <li>
<list> <t>
<t> First, check sequence number:
SYN-RECEIVED STATE<vspace /> </t>
ESTABLISHED STATE<vspace /> <ul spacing="normal">
FIN-WAIT-1 STATE<vspace /> <li>
FIN-WAIT-2 STATE<vspace /> SYN-RECEIVED STATE</li>
CLOSE-WAIT STATE<vspace /> <li>
CLOSING STATE<vspace /> ESTABLISHED STATE</li>
LAST-ACK STATE<vspace /> <li>
TIME-WAIT STATE FIN-WAIT-1 STATE</li>
<list> <li>
<t> FIN-WAIT-2 STATE</li>
<li>
CLOSE-WAIT STATE</li>
<li>
CLOSING STATE</li>
<li>
LAST-ACK STATE</li>
<li>
<t>
TIME-WAIT STATE</t>
<ul spacing="normal">
<li>
Segments are processed in sequence. Initial tests on arrival Segments are processed in sequence. Initial tests on arrival
are used to discard old duplicates, but further processing is are used to discard old duplicates, but further processing is
done in SEG.SEQ order. If a segment's contents straddle the done in SEG.SEQ order. If a segment's contents straddle the
boundary between old and new, only the new parts are boundary between old and new, only the new parts are
processed. processed.
</t> </li>
<t> <li>
In general, the processing of received segments MUST be In general, the processing of received segments <bcp14>MUST</bcp14>
be
implemented to aggregate ACK segments whenever possible (MUST-58). implemented to aggregate ACK segments whenever possible (MUST-58).
For example, if the TCP endpoint is processing a series of queued For example, if the TCP endpoint is processing a series of queued
segments, it MUST process them all before sending any ACK segments, it <bcp14>MUST</bcp14> process them all before sending any ACK
segments (MUST-59). segments (MUST-59).
</t> </li>
<t> <li>
<t>
There are four cases for the acceptability test for an incoming There are four cases for the acceptability test for an incoming
segment: segment:
</t> </t>
<t><figure><artwork> <table>
Segment Receive Test <name>Segment Acceptability Tests</name>
Length Window <thead>
------- ------- ------------------------------------------- <tr>
<th>Segment Length</th>
0 0 SEG.SEQ = RCV.NXT <th>Receive Window</th>
<th>Test</th>
0 >0 RCV.NXT =&lt; SEG.SEQ &lt; RCV.NXT+RCV.WND </tr>
</thead>
>0 0 not acceptable <tbody>
<tr>
>0 >0 RCV.NXT =&lt; SEG.SEQ &lt; RCV.NXT+RCV.WND <td>0</td>
or RCV.NXT =&lt; SEG.SEQ+SEG.LEN-1 &lt; RCV.NXT+RCV.WND <td>0</td>
</artwork></figure></t> <td>SEG.SEQ = RCV.NXT</td>
<t> </tr>
In implementing sequence number validation as described here, please note <tr>
<xref target="seqval"/>. <td>0</td>
</t> <td>&gt;0</td>
<t> <td>RCV.NXT =&lt; SEG.SEQ &lt; RCV.NXT+RCV.WND</td
>
</tr>
<tr>
<td>&gt;0</td>
<td>0</td>
<td>not acceptable</td>
</tr>
<tr>
<td>&gt;0</td>
<td>&gt;0</td>
<td>
<t>RCV.NXT =&lt; SEG.SEQ &lt; RCV.NXT+RCV.WND</t
>
<t>or</t>
<t>RCV.NXT =&lt; SEG.SEQ+SEG.LEN-1 &lt; RCV.NXT+
RCV.WND</t>
</td>
</tr>
</tbody>
</table>
</li>
<li>
In implementing sequence number validation as described here, please note
<xref target="seqval" format="default"/>.
</li>
<li>
If the RCV.WND is zero, no segments will be acceptable, but If the RCV.WND is zero, no segments will be acceptable, but
special allowance should be made to accept valid ACKs, URGs and special allowance should be made to accept valid ACKs, URGs, and
RSTs. RSTs.
</t> </li>
<t> <li>
<t>
If an incoming segment is not acceptable, an acknowledgment If an incoming segment is not acceptable, an acknowledgment
should be sent in reply (unless the RST bit is set, if so drop should be sent in reply (unless the RST bit is set, if so drop
the segment and return): the segment and return):
<list> </t>
<t> <t>
&lt;SEQ=SND.NXT>&lt;ACK=RCV.NXT>&lt;CTL=ACK> &lt;SEQ=SND.NXT&gt;&lt;ACK=RCV.NXT&gt;&lt;CTL=ACK&gt;
</t></list></t> </t>
<t> </li>
<li>
After sending the acknowledgment, drop the unacceptable segment After sending the acknowledgment, drop the unacceptable segment
and return. and return.
</t> </li>
<t> <li>
Note that for the TIME-WAIT state, there is an improved algorithm Note that for the TIME-WAIT state, there is an improved algorithm
described in <xref target="RFC6191"/> for handling incoming SYN described in <xref target="RFC6191" format="default"/> for handling incom
segments, that utilizes timestamps rather than relying on ing SYN
segments that utilizes timestamps rather than relying on
the sequence number check described here. When the improved the sequence number check described here. When the improved
algorithm is implemented, the logic above is not applicable for algorithm is implemented, the logic above is not applicable for
incoming SYN segments with timestamp options, received on a incoming SYN segments with Timestamp Options, received on a
connection in the TIME-WAIT state. connection in the TIME-WAIT state.
</t> </li>
<t> <li>
In the following it is assumed that the segment is the idealized In the following it is assumed that the segment is the idealized
segment that begins at RCV.NXT and does not exceed the window. segment that begins at RCV.NXT and does not exceed the window.
One could tailor actual segments to fit this assumption by One could tailor actual segments to fit this assumption by
trimming off any portions that lie outside the window (including trimming off any portions that lie outside the window (including
SYN and FIN), and only processing further if the segment then SYN and FIN) and only processing further if the segment then
begins at RCV.NXT. Segments with higher beginning sequence begins at RCV.NXT. Segments with higher beginning sequence
numbers SHOULD be held for later processing (SHLD-31). numbers <bcp14>SHOULD</bcp14> be held for later processing (SHLD-31).
</t> </li>
</ul>
</list></t> </li>
<t> </ul>
second check the RST bit, </li>
<list> <li>
<t> <t>
RFC 5961 <xref target="RFC5961"/> section 3 describes a potential blind reset at Second, check the RST bit:
tack and optional mitigation approach. This does not provide a cryptographic pr </t>
otection (e.g. as in IPsec or TCP-AO), but can be applicable in situations descr <ul spacing="normal">
ibed in RFC 5961. For stacks implementing the RFC 5961 protection, the three ch <li>
ecks below apply, otherwise processing for these states is indicated further bel <t>
ow. RFC 5961 <xref target="RFC5961" format="default"/>, Section <xref target="RFC596
<list> 1" section="3" sectionFormat="bare" format="default"/> describes a potential bl
<t>1) If the RST bit is set and the sequence number is outside the current recei ind reset attack and optional mitigation approach. This does not provide a cryp
ve window, silently drop the segment.</t> tographic protection (e.g., as in IPsec or TCP-AO) but can be applicable in situ
<t>2) If the RST bit is set and the sequence number exactly matches the next exp ations described in RFC 5961. For stacks implementing the protection described
ected sequence number (RCV.NXT), then TCP endpoints MUST reset the connection in in RFC 5961, the three checks below apply; otherwise, processing for these state
the manner prescribed below according to the connection state.</t> s is indicated further below.
<t>3) If the RST bit is set and the sequence number does not exactly match the n
ext expected sequence value, yet is within the current receive window, TCP endpo
ints MUST send an acknowledgement (challenge ACK):<vspace blankLines="1"/>
&lt;SEQ=SND.NXT&gt;&lt;ACK=RCV.NXT&gt;&lt;CTL=ACK&gt;<vspace blankLines="1"/>
After sending the challenge ACK, TCP endpoints MUST drop the unacceptable segmen
t and stop processing the incoming packet further. Note that RFC 5961 and Errat
a ID 4772 contain additional considerations for ACK throttling in an implementat
ion.</t>
</list>
</t> </t>
<t> <ol type="%d)">
<li>If the RST bit is set and the sequence number is o
utside the current receive window, silently drop the segment.</li>
<li>If the RST bit is set and the sequence number exac
tly matches the next expected sequence number (RCV.NXT), then TCP endpoints <bcp
14>MUST</bcp14> reset the connection in the manner prescribed below according to
the connection state.</li>
<li>
<t>If the RST bit is set and the sequence number doe
s not exactly match the next expected sequence value, yet is within the current
receive window, TCP endpoints <bcp14>MUST</bcp14> send an acknowledgment (challe
nge ACK):</t>
<t>
&lt;SEQ=SND.NXT&gt;&lt;ACK=RCV.NXT&gt;&lt;CTL=ACK&gt;</t>
<t>
After sending the challenge ACK, TCP endpoints <bcp14>MUST</bcp14> drop the unac
ceptable segment and stop processing the incoming packet further. Note that RFC
5961 and Errata ID 4772 <xref target="Err4772" format="default"/> contain addit
ional considerations for ACK throttling in an implementation.</t>
</li>
</ol>
</li>
<li>
<t>
SYN-RECEIVED STATE SYN-RECEIVED STATE
<list> </t>
<t> <ul spacing="normal">
If the RST bit is set <li>
<t>
If the RST bit is set,
<list> </t>
<t> <ul spacing="normal">
<li>
If this connection was initiated with a passive OPEN (i.e., If this connection was initiated with a passive OPEN (i.e.,
came from the LISTEN state), then return this connection to came from the LISTEN state), then return this connection to
LISTEN state and return. The user need not be informed. If LISTEN state and return. The user need not be informed. If
this connection was initiated with an active OPEN (i.e., came this connection was initiated with an active OPEN (i.e., came
from SYN-SENT state) then the connection was refused, signal from SYN-SENT state), then the connection was refused; signal
the user "connection refused". In either case, the user "connection refused". In either case,
the retransmission queue should be flushed. And in the the retransmission queue should be flushed. And in the
active OPEN case, enter the CLOSED state and delete the TCB, active OPEN case, enter the CLOSED state and delete the TCB,
and return. and return.
</t></list></t> </li>
</list></t> </ul>
<t> </li>
ESTABLISHED<vspace /> </ul>
FIN-WAIT-1<vspace /> </li>
FIN-WAIT-2<vspace /> <li>
CLOSE-WAIT ESTABLISHED STATE</li>
<list> <li>
<t> FIN-WAIT-1 STATE</li>
If the RST bit is set then, any outstanding RECEIVEs and SEND <li>
FIN-WAIT-2 STATE</li>
<li>
<t>
CLOSE-WAIT STATE</t>
<ul spacing="normal">
<li>
If the RST bit is set, then any outstanding RECEIVEs and SEND
should receive "reset" responses. All segment queues should be should receive "reset" responses. All segment queues should be
flushed. Users should also receive an unsolicited general flushed. Users should also receive an unsolicited general
"connection reset" signal. Enter the CLOSED state, delete the "connection reset" signal. Enter the CLOSED state, delete the
TCB, and return. TCB, and return.
</t> </li>
</list></t> </ul>
<t> </li>
CLOSING STATE<vspace /> <li>
LAST-ACK STATE<vspace /> CLOSING STATE</li>
TIME-WAIT<vspace /> <li>
<list> LAST-ACK STATE</li>
<t> <li>
If the RST bit is set then, enter the CLOSED state, delete the <t>
TIME-WAIT STATE</t>
<ul spacing="normal">
<li>
If the RST bit is set, then enter the CLOSED state, delete the
TCB, and return. TCB, and return.
</t></list></t> </li>
</list></t> </ul>
<t> </li>
third check security </ul>
<list> </li>
<t> <li>
SYN-RECEIVED <t>
<list> Third, check security:
<t> </t>
<ul spacing="normal">
<li>
<t>
SYN-RECEIVED STATE
</t>
<ul spacing="normal">
<li>
If the security/compartment in the segment does not If the security/compartment in the segment does not
exactly match the security/compartment in the TCB exactly match the security/compartment in the TCB,
then send a reset, and return. then send a reset and return.
</t></list></t> </li>
<t> </ul>
ESTABLISHED<vspace /> </li>
FIN-WAIT-1<vspace /> <li>
FIN-WAIT-2<vspace /> ESTABLISHED STATE</li>
CLOSE-WAIT<vspace /> <li>
CLOSING<vspace /> FIN-WAIT-1 STATE</li>
LAST-ACK<vspace /> <li>
TIME-WAIT FIN-WAIT-2 STATE</li>
<list> <li>
<t> CLOSE-WAIT STATE</li>
<li>
CLOSING STATE</li>
<li>
LAST-ACK STATE</li>
<li>
<t>
TIME-WAIT STATE
</t>
<ul spacing="normal">
<li>
If the security/compartment in the segment does not If the security/compartment in the segment does not
exactly match the security/compartment in the TCB exactly match the security/compartment in the TCB,
then send a reset, any outstanding RECEIVEs and SEND should then send a reset; any outstanding RECEIVEs and SEND should
receive "reset" responses. All segment queues should be receive "reset" responses. All segment queues should be
flushed. Users should also receive an unsolicited general flushed. Users should also receive an unsolicited general
"connection reset" signal. Enter the CLOSED state, delete the "connection reset" signal. Enter the CLOSED state, delete the
TCB, and return. TCB, and return.
</t></list></t> </li>
<t> </ul>
</li>
<li>
Note this check is placed following the sequence check to prevent Note this check is placed following the sequence check to prevent
a segment from an old connection between these port numbers with a a segment from an old connection between these port numbers with a
different security from causing an abort of the different security from causing an abort of the
current connection. current connection.
</t> </li>
</list></t> </ul>
<t> </li>
fourth, check the SYN bit, <li>
<list> <t>
<t> Fourth, check the SYN bit:
SYN-RECEIVED<vspace /> </t>
<list><t>If the connection was initiated with a passive OPEN, then retur <ul spacing="normal">
n this connection to the LISTEN state and return. Otherwise, handle per the dir <li>
ections for synchronized states below.</t></list> <t>
ESTABLISHED STATE<vspace /> SYN-RECEIVED STATE</t>
FIN-WAIT STATE-1<vspace /> <ul spacing="normal">
FIN-WAIT STATE-2<vspace /> <li>If the connection was initiated with a passive OPE
CLOSE-WAIT STATE<vspace /> N, then return this connection to the LISTEN state and return. Otherwise, handl
CLOSING STATE<vspace /> e per the directions for synchronized states below.</li>
LAST-ACK STATE<vspace /> </ul>
</li>
<li>
ESTABLISHED STATE</li>
<li>
FIN-WAIT-1 STATE</li>
<li>
FIN-WAIT-2 STATE</li>
<li>
CLOSE-WAIT STATE</li>
<li>
CLOSING STATE</li>
<li>
LAST-ACK STATE</li>
<li>
<t>
TIME-WAIT STATE TIME-WAIT STATE
<list> </t>
<t> <ul spacing="normal">
If the SYN bit is set in these synchronized states, it may be either a legitimat <li>
e new connection attempt (e.g. in the case of TIME-WAIT), an error where the con <t>
nection should be reset, or the result of an attack attempt, as described in RFC If the SYN bit is set in these synchronized states, it may be either a legitimat
5961 <xref target="RFC5961"/>. For the TIME-WAIT state, new connections can be e new connection attempt (e.g., in the case of TIME-WAIT), an error where the co
accepted if the timestamp option is used and meets expectations (per <xref targ nnection should be reset, or the result of an attack attempt, as described in RF
et="RFC6191"/>). For all other cases, RFC 5961 provides a mitigation with appli C 5961 <xref target="RFC5961" format="default"/>. For the TIME-WAIT state, new
cability to some situations, though there are also alternatives that offer crypt connections can be accepted if the Timestamp Option is used and meets expectatio
ographic protection (see <xref target="Security"/>). RFC 5961 recommends that i ns (per <xref target="RFC6191" format="default"/>). For all other cases, RFC 59
n these synchronized states, if the SYN bit is set, irrespective of the sequence 61 provides a mitigation with applicability to some situations, though there are
number, TCP endpoints MUST send a &quot;challenge ACK&quot; to the remote peer: also alternatives that offer cryptographic protection (see <xref target="Securi
</t> ty" format="default"/>). RFC 5961 recommends that in these synchronized states,
<t> if the SYN bit is set, irrespective of the sequence number, TCP endpoints <bcp1
4>MUST</bcp14> send a "challenge ACK" to the remote peer:</t>
<t>
&lt;SEQ=SND.NXT&gt;&lt;ACK=RCV.NXT&gt;&lt;CTL=ACK&gt; &lt;SEQ=SND.NXT&gt;&lt;ACK=RCV.NXT&gt;&lt;CTL=ACK&gt;
</t> </t>
<t> </li>
After sending the acknowledgement, TCP implementations MUST drop the unacceptabl <li>
e segment and stop processing further. Note that RFC 5961 and Errata ID 4772 co After sending the acknowledgment, TCP implementations <bcp14>MUST</bcp14> drop t
ntain additional ACK throttling notes for an implementation. he unacceptable segment and stop processing further. Note that RFC 5961 and Err
</t> ata ID 4772 <xref target="Err4772" format="default"/> contain additional ACK thr
<t> ottling notes for an implementation.
For implementations that do not follow RFC 5961, the original RFC 793 be </li>
havior follows in this paragraph. If the SYN is in the window it is an error, s <li>
end a reset, any For implementations that do not follow RFC 5961, the original behavior d
outstanding RECEIVEs and SEND should receive &quot;reset&quot; responses escribed in RFC 793 follows in this paragraph. If the SYN is in the window it i
, s an error: send a reset, any
outstanding RECEIVEs and SEND should receive "reset" responses,
all segment queues should be flushed, the user should also all segment queues should be flushed, the user should also
receive an unsolicited general "connection reset" signal, enter receive an unsolicited general "connection reset" signal, enter
the CLOSED state, delete the TCB, and return. the CLOSED state, delete the TCB, and return.
</t> </li>
<t> <li>
If the SYN is not in the window this step would not be reached If the SYN is not in the window, this step would not be reached
and an ACK would have been sent in the first step (sequence and an ACK would have been sent in the first step (sequence
number check). number check).
</t> </li>
</list></t> </ul>
</list></t> </li>
<t> </ul>
fifth check the ACK field, </li>
<list> <li>
<t> <t>
if the ACK bit is off drop the segment and return Fifth, check the ACK field:
</t> </t>
<t> <ul spacing="normal">
if the ACK bit is on <li>
<list> if the ACK bit is off, drop the segment and return
<t> </li>
RFC 5961 <xref target="RFC5961"/> section 5 describes a potential blind data inj <li>
ection attack, and mitigation that implementations MAY choose to include (MAY-12 <t>
). TCP stacks that implement RFC 5961 MUST add an input check that the ACK valu if the ACK bit is on,
e is acceptable only if it is in the range of ((SND.UNA - MAX.SND.WND) =&lt; SEG </t>
.ACK =&lt; SND.NXT). All incoming segments whose ACK value doesn't satisfy the <ul spacing="normal">
above condition MUST be discarded and an ACK sent back. The new state variable <li>
MAX.SND.WND is defined as the largest window that the local sender has ever rece RFC 5961 <xref target="RFC5961" section="5" sectionFormat="comma" format="defaul
ived from its peer (subject to window scaling) or may be hard-coded to a maximum t"/> describes a potential blind data injection attack, and mitigation that impl
permissible window value. When the ACK value is acceptable, the processing per ementations <bcp14>MAY</bcp14> choose to include (MAY-12). TCP stacks that impl
-state below applies: ement RFC 5961 <bcp14>MUST</bcp14> add an input check that the ACK value is acce
</t> ptable only if it is in the range of ((SND.UNA - MAX.SND.WND) =&lt; SEG.ACK =&lt
<t> ; SND.NXT). All incoming segments whose ACK value doesn't satisfy the above con
dition <bcp14>MUST</bcp14> be discarded and an ACK sent back. The new state var
iable MAX.SND.WND is defined as the largest window that the local sender has eve
r received from its peer (subject to window scaling) or may be hard-coded to a m
aximum permissible window value. When the ACK value is acceptable, the per-stat
e processing below applies:
</li>
<li>
<t>
SYN-RECEIVED STATE SYN-RECEIVED STATE
<list> </t>
<t> <ul spacing="normal">
If SND.UNA &lt; SEG.ACK =&lt; SND.NXT then enter ESTABLISHED state <li>
and continue processing with variables below set to: <t>
<list> If SND.UNA &lt; SEG.ACK =&lt; SND.NXT, then enter ESTABLISHED state
<t>SND.WND &lt;- SEG.WND<vspace /> and continue processing with the variables below set to:
SND.WL1 &lt;- SEG.SEQ<vspace /> </t>
SND.WL2 &lt;- SEG.ACK</t> <ul spacing="normal" empty="true">
</list> <li>
</t> SND.WND &lt;- SEG.WND</li>
<t> <li>
SND.WL1 &lt;- SEG.SEQ</li>
<li>
SND.WL2 &lt;- SEG.ACK</li>
</ul>
</li>
<li>
<t>
If the segment acknowledgment is not acceptable, form a If the segment acknowledgment is not acceptable, form a
reset segment, reset segment
<list> </t>
<t> <ul spacing="normal" empty="true">
&lt;SEQ=SEG.ACK>&lt;CTL=RST> <li>
</t></list> &lt;SEQ=SEG.ACK&gt;&lt;CTL=RST&gt;
</t> </li>
<t> </ul>
</li>
<li>
and send it. and send it.
</t> </li>
</list></t> </ul>
<t> </li>
<li>
<t>
ESTABLISHED STATE ESTABLISHED STATE
<list> </t>
<t> <ul spacing="normal">
If SND.UNA &lt; SEG.ACK =&lt; SND.NXT then, set SND.UNA &lt;- SEG.ACK. <li>
If SND.UNA &lt; SEG.ACK =&lt; SND.NXT, then set SND.UNA &lt;- SEG.ACK.
Any segments on the retransmission queue that are thereby Any segments on the retransmission queue that are thereby
entirely acknowledged are removed. Users should receive entirely acknowledged are removed. Users should receive
positive acknowledgments for buffers that have been SENT and positive acknowledgments for buffers that have been SENT and
fully acknowledged (i.e., SEND buffer should be returned with fully acknowledged (i.e., SEND buffer should be returned with
"ok" response). If the ACK is a duplicate "ok" response). If the ACK is a duplicate
(SEG.ACK =&lt; SND.UNA), it can be ignored. If the ACK acks (SEG.ACK =&lt; SND.UNA), it can be ignored. If the ACK acks
something not yet sent (SEG.ACK > SND.NXT) then send an ACK, something not yet sent (SEG.ACK > SND.NXT), then send an ACK,
drop the segment, and return. drop the segment, and return.
</t> </li>
<t> <li>
If SND.UNA =&lt; SEG.ACK =&lt; SND.NXT, the send window should be If SND.UNA =&lt; SEG.ACK =&lt; SND.NXT, the send window should be
updated. If (SND.WL1 &lt; SEG.SEQ or (SND.WL1 = SEG.SEQ and updated. If (SND.WL1 &lt; SEG.SEQ or (SND.WL1 = SEG.SEQ and
SND.WL2 =&lt; SEG.ACK)), set SND.WND &lt;- SEG.WND, set SND.WL2 =&lt; SEG.ACK)), set SND.WND &lt;- SEG.WND, set
SND.WL1 &lt;- SEG.SEQ, and set SND.WL2 &lt;- SEG.ACK. SND.WL1 &lt;- SEG.SEQ, and set SND.WL2 &lt;- SEG.ACK.
</t> </li>
<t> <li>
Note that SND.WND is an offset from SND.UNA, that SND.WL1 Note that SND.WND is an offset from SND.UNA, that SND.WL1
records the sequence number of the last segment used to update records the sequence number of the last segment used to update
SND.WND, and that SND.WL2 records the acknowledgment number of SND.WND, and that SND.WL2 records the acknowledgment number of
the last segment used to update SND.WND. The check here the last segment used to update SND.WND. The check here
prevents using old segments to update the window. prevents using old segments to update the window.
</t> </li>
</list></t> </ul>
<t> </li>
<li>
<t>
FIN-WAIT-1 STATE FIN-WAIT-1 STATE
<list> </t>
<t> <ul spacing="normal">
<li>
In addition to the processing for the ESTABLISHED state, if In addition to the processing for the ESTABLISHED state, if
the FIN segment is now acknowledged then enter FIN-WAIT-2 and continue the FIN segment is now acknowledged, then enter FIN-WAIT-2 and continu e
processing in that state. processing in that state.
</t></list></t> </li>
<t> </ul>
</li>
<li>
<t>
FIN-WAIT-2 STATE FIN-WAIT-2 STATE
<list> </t>
<t> <ul spacing="normal">
<li>
In addition to the processing for the ESTABLISHED state, if In addition to the processing for the ESTABLISHED state, if
the retransmission queue is empty, the user's CLOSE can be the retransmission queue is empty, the user's CLOSE can be
acknowledged ("ok") but do not delete the TCB. acknowledged ("ok") but do not delete the TCB.
</t></list></t> </li>
<t> </ul>
</li>
<li>
<t>
CLOSE-WAIT STATE CLOSE-WAIT STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Do the same processing as for the ESTABLISHED state. Do the same processing as for the ESTABLISHED state.
</t></list></t> </li>
<t> </ul>
</li>
<li>
<t>
CLOSING STATE CLOSING STATE
<list> </t>
<t> <ul spacing="normal">
<li>
In addition to the processing for the ESTABLISHED state, if In addition to the processing for the ESTABLISHED state, if
the ACK acknowledges our FIN then enter the TIME-WAIT state, the ACK acknowledges our FIN, then enter the TIME-WAIT state;
otherwise ignore the segment. otherwise, ignore the segment.
</t></list></t> </li>
<t> </ul>
</li>
<li>
<t>
LAST-ACK STATE LAST-ACK STATE
<list> </t>
<t> <ul spacing="normal">
<li>
The only thing that can arrive in this state is an The only thing that can arrive in this state is an
acknowledgment of our FIN. If our FIN is now acknowledged, acknowledgment of our FIN. If our FIN is now acknowledged,
delete the TCB, enter the CLOSED state, and return. delete the TCB, enter the CLOSED state, and return.
</t></list></t> </li>
<t> </ul>
</li>
<li>
<t>
TIME-WAIT STATE TIME-WAIT STATE
<list> </t>
<t> <ul spacing="normal">
<li>
The only thing that can arrive in this state is a The only thing that can arrive in this state is a
retransmission of the remote FIN. Acknowledge it, and restart retransmission of the remote FIN. Acknowledge it, and restart
the 2 MSL timeout. the 2 MSL timeout.
</t></list></t> </li>
</list></t> </ul>
</list></t> </li>
<t> </ul>
sixth, check the URG bit, </li>
<list> </ul>
<t> </li>
ESTABLISHED STATE<vspace /> <li>
FIN-WAIT-1 STATE<vspace /> <t anchor="check-urg-bit">
Sixth, check the URG bit:
</t>
<ul spacing="normal">
<li>
ESTABLISHED STATE</li>
<li>
FIN-WAIT-1 STATE</li>
<li>
<t>
FIN-WAIT-2 STATE FIN-WAIT-2 STATE
<list> </t>
<t> <ul spacing="normal">
<li>
If the URG bit is set, RCV.UP &lt;- max(RCV.UP,SEG.UP), and signal If the URG bit is set, RCV.UP &lt;- max(RCV.UP,SEG.UP), and signal
the user that the remote side has urgent data if the urgent the user that the remote side has urgent data if the urgent
pointer (RCV.UP) is in advance of the data consumed. If the pointer (RCV.UP) is in advance of the data consumed. If the
user has already been signaled (or is still in the &quot;urgent user has already been signaled (or is still in the "urgent
mode&quot;) for this continuous sequence of urgent data, do not mode") for this continuous sequence of urgent data, do not
signal the user again. signal the user again.
</t></list></t> </li>
<t> </ul>
CLOSE-WAIT STATE<vspace /> </li>
CLOSING STATE<vspace /> <li>
LAST-ACK STATE<vspace /> CLOSE-WAIT STATE</li>
TIME-WAIT <li>
<list> CLOSING STATE</li>
<t> <li>
This should not occur, since a FIN has been received from the LAST-ACK STATE</li>
<li>
<t>
TIME-WAIT STATE
</t>
<ul spacing="normal">
<li>
This should not occur since a FIN has been received from the
remote side. Ignore the URG. remote side. Ignore the URG.
</t></list></t> </li>
</list></t> </ul>
<t> </li>
seventh, process the segment text, </ul>
<list> </li>
<t> <li>
ESTABLISHED STATE<vspace /> <t>
FIN-WAIT-1 STATE<vspace /> Seventh, process the segment text:
</t>
<ul spacing="normal">
<li>
ESTABLISHED STATE</li>
<li>
FIN-WAIT-1 STATE</li>
<li>
<t>
FIN-WAIT-2 STATE FIN-WAIT-2 STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Once in the ESTABLISHED state, it is possible to deliver segment Once in the ESTABLISHED state, it is possible to deliver segment
data to user RECEIVE buffers. Data from segments can be moved data to user RECEIVE buffers. Data from segments can be moved
into buffers until either the buffer is full or the segment is into buffers until either the buffer is full or the segment is
empty. If the segment empties and carries a PUSH flag, then empty. If the segment empties and carries a PUSH flag, then
the user is informed, when the buffer is returned, that a PUSH the user is informed, when the buffer is returned, that a PUSH
has been received. has been received.
</t> </li>
<t> <li>
When the TCP endpoint takes responsibility for delivering the data to th e When the TCP endpoint takes responsibility for delivering the data to th e
user it must also acknowledge the receipt of the data. user, it must also acknowledge the receipt of the data.
</t> </li>
<t> <li>
Once the TCP endpoint takes responsibility for the data it advances Once the TCP endpoint takes responsibility for the data, it advances
RCV.NXT over the data accepted, and adjusts RCV.WND as RCV.NXT over the data accepted, and adjusts RCV.WND as
appropriate to the current buffer availability. The total of appropriate to the current buffer availability. The total of
RCV.NXT and RCV.WND should not be reduced. RCV.NXT and RCV.WND should not be reduced.
</t> </li>
<t> <li>
A TCP implementation MAY send an ACK segment acknowledging RCV.NXT when A TCP implementation <bcp14>MAY</bcp14> send an ACK segment acknowledgin
a g RCV.NXT when a
valid segment arrives that is in the window but not at the valid segment arrives that is in the window but not at the
left window edge (MAY-13). left window edge (MAY-13).
</t> </li>
<t> <li>
Please note the window management suggestions in <xref target="datacomm" Please note the window management suggestions in <xref target="datacomm"
/>. format="default"/>.
</t> </li>
<t> <li>
<t>
Send an acknowledgment of the form: Send an acknowledgment of the form:
<list><t> </t>
&lt;SEQ=SND.NXT>&lt;ACK=RCV.NXT>&lt;CTL=ACK> <t>
</t></list></t> &lt;SEQ=SND.NXT&gt;&lt;ACK=RCV.NXT&gt;&lt;CTL=ACK&gt;
<t> </t>
</li>
<li>
This acknowledgment should be piggybacked on a segment being This acknowledgment should be piggybacked on a segment being
transmitted if possible without incurring undue delay. transmitted if possible without incurring undue delay.
</t> </li>
</list></t> </ul>
<t> </li>
CLOSE-WAIT STATE<vspace /> <li>
CLOSING STATE<vspace /> CLOSE-WAIT STATE</li>
LAST-ACK STATE<vspace /> <li>
CLOSING STATE</li>
<li>
LAST-ACK STATE</li>
<li>
<t>
TIME-WAIT STATE TIME-WAIT STATE
<list> </t>
<t> <ul spacing="normal">
This should not occur, since a FIN has been received from the <li>
This should not occur since a FIN has been received from the
remote side. Ignore the segment text. remote side. Ignore the segment text.
</t> </li>
</list> </ul>
</t> </li>
</list></t> </ul>
<t> </li>
eighth, check the FIN bit, <li>
<list> <t>
<t> Eighth, check the FIN bit:
Do not process the FIN if the state is CLOSED, LISTEN or SYN-SENT </t>
<ul spacing="normal">
<li>
Do not process the FIN if the state is CLOSED, LISTEN, or SYN-SENT
since the SEG.SEQ cannot be validated; drop the segment and since the SEG.SEQ cannot be validated; drop the segment and
return. return.
</t> </li>
<t> <li>
<t>
If the FIN bit is set, signal the user "connection closing" and If the FIN bit is set, signal the user "connection closing" and
return any pending RECEIVEs with same message, advance RCV.NXT return any pending RECEIVEs with same message, advance RCV.NXT
over the FIN, and send an acknowledgment for the FIN. Note that over the FIN, and send an acknowledgment for the FIN. Note that
FIN implies PUSH for any segment text not yet delivered to the FIN implies PUSH for any segment text not yet delivered to the
user. user.
<list> </t>
<t> <ul spacing="normal">
SYN-RECEIVED STATE<vspace /> <li>
SYN-RECEIVED STATE</li>
<li>
<t>
ESTABLISHED STATE ESTABLISHED STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Enter the CLOSE-WAIT state. Enter the CLOSE-WAIT state.
</t></list></t> </li>
<t> </ul>
</li>
<li>
<t>
FIN-WAIT-1 STATE FIN-WAIT-1 STATE
<list> </t>
<t> <ul spacing="normal">
<li>
If our FIN has been ACKed (perhaps in this segment), then If our FIN has been ACKed (perhaps in this segment), then
enter TIME-WAIT, start the time-wait timer, turn off the other enter TIME-WAIT, start the time-wait timer, turn off the other
timers; otherwise enter the CLOSING state. timers; otherwise, enter the CLOSING state.
</t></list></t> </li>
<t> </ul>
</li>
<li>
<t>
FIN-WAIT-2 STATE FIN-WAIT-2 STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Enter the TIME-WAIT state. Start the time-wait timer, turn Enter the TIME-WAIT state. Start the time-wait timer, turn
off the other timers. off the other timers.
</t></list></t> </li>
<t> </ul>
</li>
<li>
<t>
CLOSE-WAIT STATE CLOSE-WAIT STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Remain in the CLOSE-WAIT state. Remain in the CLOSE-WAIT state.
</t></list></t> </li>
<t> </ul>
</li>
<li>
<t>
CLOSING STATE CLOSING STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Remain in the CLOSING state. Remain in the CLOSING state.
</t></list></t> </li>
<t> </ul>
</li>
<li>
<t>
LAST-ACK STATE LAST-ACK STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Remain in the LAST-ACK state. Remain in the LAST-ACK state.
</t></list></t> </li>
<t> </ul>
</li>
<li>
<t>
TIME-WAIT STATE TIME-WAIT STATE
<list> </t>
<t> <ul spacing="normal">
<li>
Remain in the TIME-WAIT state. Restart the 2 MSL time-wait Remain in the TIME-WAIT state. Restart the 2 MSL time-wait
timeout. timeout.
</t></list></t> </li>
</list></t> </ul>
</list></t> </li>
<t> </ul>
</li>
</ul>
</li>
<li>
and return. and return.
</t> </li>
</list> </ul>
</t> </section>
</list> </section>
</t> <section numbered="true" toc="default">
</section> <name>Timeouts</name>
<t>
</section>
<section title="Timeouts">
<t>
<list>
<t>
USER TIMEOUT USER TIMEOUT
<list> </t>
<t> <ul spacing="normal">
<li>
For any state if the user timeout expires, flush all queues, signal For any state if the user timeout expires, flush all queues, signal
the user "error: connection aborted due to user timeout" in general the user "error: connection aborted due to user timeout" in general
and for any outstanding calls, delete the TCB, enter the CLOSED and for any outstanding calls, delete the TCB, enter the CLOSED
state and return. state, and return.
</t> </li>
</list> </ul>
</t> <t>
<t>
RETRANSMISSION TIMEOUT RETRANSMISSION TIMEOUT
<list> </t>
<t> <ul spacing="normal">
<li>
For any state if the retransmission timeout expires on a segment in For any state if the retransmission timeout expires on a segment in
the retransmission queue, send the segment at the front of the the retransmission queue, send the segment at the front of the
retransmission queue again, reinitialize the retransmission timer, retransmission queue again, reinitialize the retransmission timer,
and return. and return.
</t> </li>
</list> </ul>
</t> <t>
<t>
TIME-WAIT TIMEOUT TIME-WAIT TIMEOUT
<list> </t>
<t> <ul spacing="normal">
If the time-wait timeout expires on a connection delete the TCB, <li>
enter the CLOSED state and return. If the time-wait timeout expires on a connection, delete the TCB,
</t> enter the CLOSED state, and return.
</list> </li>
</t> </ul>
</list> </section>
</t> </section>
</section> </section>
</section> <section anchor="glossary" numbered="true" toc="default">
</section> <name>Glossary</name>
<dl newline="true" spacing="normal" indent="8">
<section title="Glossary" anchor="glossary"> <dt>ACK</dt>
<t> <dd>
<list style="hanging" hangIndent="8">
<t hangText="ACK"><vspace />
A control bit (acknowledge) occupying no sequence space, which A control bit (acknowledge) occupying no sequence space, which
indicates that the acknowledgment field of this segment indicates that the acknowledgment field of this segment
specifies the next sequence number the sender of this segment specifies the next sequence number the sender of this segment
is expecting to receive, hence acknowledging receipt of all is expecting to receive, hence acknowledging receipt of all
previous sequence numbers.</t> previous sequence numbers.</dd>
<dt>connection</dt>
<t hangText="connection"><vspace /> <dd>
A logical communication path identified by a pair of sockets.</t> A logical communication path identified by a pair of sockets.</dd>
<dt>datagram</dt>
<t hangText="datagram"><vspace /> <dd>
A message sent in a packet switched computer communications A message sent in a packet-switched computer communications
network.</t> network.</dd>
<dt>Destination Address</dt>
<t hangText="Destination Address"><vspace /> <dd>
The network layer address of the endpoint intended to receive a segmen The network-layer address of the endpoint intended to receive a segmen
t.</t> t.</dd>
<dt>FIN</dt>
<t hangText="FIN"><vspace /> <dd>
A control bit (finis) occupying one sequence number, which A control bit (finis) occupying one sequence number, which
indicates that the sender will send no more data or control indicates that the sender will send no more data or control
occupying sequence space.</t> occupying sequence space.</dd>
<dt>flush</dt>
<t hangText="flush"><vspace /> <dd>
To remove all of the contents (data or segments) from a store (buffer To remove all of the contents (data or segments) from a store (buffer
or queue).</t> or queue).</dd>
<dt>fragment</dt>
<t hangText="fragment"><vspace /> <dd>
A portion of a logical unit of data, in particular an internet A portion of a logical unit of data. In particular, an internet
fragment is a portion of an internet datagram.</t> fragment is a portion of an internet datagram.</dd>
<dt>header</dt>
<t hangText="header"><vspace /> <dd>
Control information at the beginning of a message, segment, Control information at the beginning of a message, segment,
fragment, packet or block of data.</t> fragment, packet, or block of data.</dd>
<dt>host</dt>
<t hangText="host"><vspace /> <dd>
A computer. In particular a source or destination of messages A computer. In particular, a source or destination of messages
from the point of view of the communication network.</t> from the point of view of the communication network.</dd>
<dt>Identification</dt>
<t hangText="Identification"><vspace /> <dd>
An Internet Protocol field. This identifying value assigned An Internet Protocol field. This identifying value assigned
by the sender aids in assembling the fragments of a datagram.</t> by the sender aids in assembling the fragments of a datagram.</dd>
<dt>internet address</dt>
<t hangText="internet address"><vspace /> <dd>
A network layer address.</t> A network-layer address.</dd>
<dt>internet datagram</dt>
<t hangText="internet datagram"><vspace /> <dd>
A unit of data exchanged between internet hosts, together with the int ernet header A unit of data exchanged between internet hosts, together with the int ernet header
that allows the datagram to be routed from source to destination.</t> that allows the datagram to be routed from source to destination.</dd>
<dt>internet fragment</dt>
<t hangText="internet fragment"><vspace /> <dd>
A portion of the data of an internet datagram with an internet A portion of the data of an internet datagram with an internet
header.</t> header.</dd>
<dt>IP</dt>
<t hangText="IP"><vspace /> <dd>
Internet Protocol. See <xref target="RFC0791"/> and <xref target="RFC8 Internet Protocol. See <xref target="RFC0791" format="default"/> and <
200"/>.</t> xref target="RFC8200" format="default"/>.</dd>
<dt>IRS</dt>
<t hangText="IRS"><vspace /> <dd>
The Initial Receive Sequence number. The first sequence The Initial Receive Sequence number. The first sequence
number used by the sender on a connection.</t> number used by the sender on a connection.</dd>
<dt>ISN</dt>
<t hangText="ISN"><vspace /> <dd>
The Initial Sequence Number. The first sequence number used The Initial Sequence Number. The first sequence number used
on a connection, (either ISS or IRS). Selected in a way that is uniqu on a connection (either ISS or IRS). Selected in a way that is unique
e within a given period of time and is unpredictable to attackers.</t> within a given period of time and is unpredictable to attackers.</dd>
<dt>ISS</dt>
<t hangText="ISS"><vspace /> <dd>
The Initial Send Sequence number. The first sequence number The Initial Send Sequence number. The first sequence number
used by the sender on a connection.</t> used by the sender on a connection.</dd>
<dt>left sequence</dt>
<t hangText="left sequence"><vspace /> <dd>
This is the next sequence number to be acknowledged by the This is the next sequence number to be acknowledged by the
data receiving TCP endpoint (or the lowest currently unacknowledged data-receiving TCP endpoint (or the lowest currently unacknowledged
sequence number) and is sometimes referred to as the left edge sequence number) and is sometimes referred to as the left edge
of the send window.</t> of the send window.</dd>
<dt>module</dt>
<t hangText="module"><vspace /> <dd>
An implementation, usually in software, of a protocol or other An implementation, usually in software, of a protocol or other
procedure.</t> procedure.</dd>
<dt>MSL</dt>
<t hangText="MSL"><vspace /> <dd>
Maximum Segment Lifetime, the time a TCP segment can exist in Maximum Segment Lifetime, the time a TCP segment can exist in
the internetwork system. Arbitrarily defined to be 2 minutes.</t> the internetwork system. Arbitrarily defined to be 2 minutes.</dd>
<dt>octet</dt>
<t hangText="octet"><vspace /> <dd>
An eight bit byte.</t> An eight-bit byte.</dd>
<dt>Options</dt>
<t hangText="Options"><vspace /> <dd>
An Option field may contain several options, and each option An Option field may contain several options, and each option
may be several octets in length.</t> may be several octets in length.</dd>
<dt>packet</dt>
<t hangText="packet"><vspace /> <dd>
A package of data with a header that may or may not be A package of data with a header that may or may not be
logically complete. More often a physical packaging than a logically complete. More often a physical packaging than a
logical packaging of data.</t> logical packaging of data.</dd>
<dt>port</dt>
<t hangText="port"><vspace /> <dd>
The portion of a connection identifier used for demultiplexing connect ions The portion of a connection identifier used for demultiplexing connect ions
at an endpoint.</t> at an endpoint.</dd>
<dt>process</dt>
<t hangText="process"><vspace /> <dd>
A program in execution. A source or destination of data from A program in execution. A source or destination of data from
the point of view of the TCP endpoint or other host-to-host protocol.< the point of view of the TCP endpoint or other host-to-host protocol.<
/t> /dd>
<dt>PUSH</dt>
<t hangText="PUSH"><vspace /> <dd>
A control bit occupying no sequence space, indicating that A control bit occupying no sequence space, indicating that
this segment contains data that must be pushed through to the this segment contains data that must be pushed through to the
receiving user.</t> receiving user.</dd>
<dt>RCV.NXT</dt>
<t hangText="RCV.NXT"><vspace /> <dd>
receive next sequence number</t> receive next sequence number</dd>
<dt>RCV.UP</dt>
<t hangText="RCV.UP"><vspace /> <dd>
receive urgent pointer</t> receive urgent pointer</dd>
<dt>RCV.WND</dt>
<t hangText="RCV.WND"><vspace /> <dd>
receive window</t> receive window</dd>
<dt>receive next sequence number</dt>
<t hangText="receive next sequence number"><vspace /> <dd>
This is the next sequence number the local TCP endpoint is expecting t o This is the next sequence number the local TCP endpoint is expecting t o
receive.</t> receive.</dd>
<dt>receive window</dt>
<t hangText="receive window"><vspace /> <dd>
This represents the sequence numbers the local (receiving) TCP endpoin t This represents the sequence numbers the local (receiving) TCP endpoin t
is willing to receive. Thus, the local TCP endpoint considers that is willing to receive. Thus, the local TCP endpoint considers that
segments overlapping the range RCV.NXT to segments overlapping the range RCV.NXT to
RCV.NXT + RCV.WND - 1 carry acceptable data or control. RCV.NXT + RCV.WND - 1 carry acceptable data or control.
Segments containing sequence numbers entirely outside this Segments containing sequence numbers entirely outside this
range are considered duplicates or injection attacks and discarded.</t range are considered duplicates or injection attacks and discarded.</d
> d>
<dt>RST</dt>
<t hangText="RST"><vspace /> <dd>
A control bit (reset), occupying no sequence space, indicating A control bit (reset), occupying no sequence space, indicating
that the receiver should delete the connection without further that the receiver should delete the connection without further
interaction. The receiver can determine, based on the interaction. The receiver can determine, based on the
sequence number and acknowledgment fields of the incoming sequence number and acknowledgment fields of the incoming
segment, whether it should honor the reset command or ignore segment, whether it should honor the reset command or ignore
it. In no case does receipt of a segment containing RST give it. In no case does receipt of a segment containing RST give
rise to a RST in response.</t> rise to a RST in response.</dd>
<dt>SEG.ACK</dt>
<t hangText="SEG.ACK"><vspace /> <dd>
segment acknowledgment</t> segment acknowledgment</dd>
<dt>SEG.LEN</dt>
<t hangText="SEG.LEN"><vspace /> <dd>
segment length</t> segment length</dd>
<dt>SEG.SEQ</dt>
<t hangText="SEG.SEQ"><vspace /> <dd>
segment sequence</t> segment sequence</dd>
<dt>SEG.UP</dt>
<t hangText="SEG.UP"><vspace /> <dd>
segment urgent pointer field</t> segment urgent pointer field</dd>
<dt>SEG.WND</dt>
<t hangText="SEG.WND"><vspace /> <dd>
segment window field</t> segment window field</dd>
<dt>segment</dt>
<t hangText="segment"><vspace /> <dd>
A logical unit of data, in particular a TCP segment is the A logical unit of data. In particular, a TCP segment is the
unit of data transferred between a pair of TCP modules.</t> unit of data transferred between a pair of TCP modules.</dd>
<dt>segment acknowledgment</dt>
<t hangText="segment acknowledgment"><vspace /> <dd>
The sequence number in the acknowledgment field of the The sequence number in the acknowledgment field of the
arriving segment.</t> arriving segment.</dd>
<dt>segment length</dt>
<t hangText="segment length"><vspace /> <dd>
The amount of sequence number space occupied by a segment, The amount of sequence number space occupied by a segment,
including any controls that occupy sequence space.</t> including any controls that occupy sequence space.</dd>
<dt>segment sequence</dt>
<t hangText="segment sequence"><vspace /> <dd>
The number in the sequence field of the arriving segment.</t> The number in the sequence field of the arriving segment.</dd>
<dt>send sequence</dt>
<t hangText="send sequence"><vspace /> <dd>
This is the next sequence number the local (sending) TCP endpoint will This is the next sequence number the local (sending) TCP endpoint will
use on the connection. It is initially selected from an use on the connection. It is initially selected from an
initial sequence number curve (ISN) and is incremented for initial sequence number curve (ISN) and is incremented for
each octet of data or sequenced control transmitted.</t> each octet of data or sequenced control transmitted.</dd>
<dt>send window</dt>
<t hangText="send window"><vspace /> <dd>
This represents the sequence numbers that the remote This represents the sequence numbers that the remote
(receiving) TCP endpoint is willing to receive. It is the value of th e (receiving) TCP endpoint is willing to receive. It is the value of th e
window field specified in segments from the remote (data window field specified in segments from the remote (data-receiving)
receiving) TCP endpoint. The range of new sequence numbers that may TCP endpoint. The range of new sequence numbers that may
be emitted by a TCP implementation lies between SND.NXT and be emitted by a TCP implementation lies between SND.NXT and
SND.UNA + SND.WND - 1. (Retransmissions of sequence numbers SND.UNA + SND.WND - 1. (Retransmissions of sequence numbers
between SND.UNA and SND.NXT are expected, of course.)</t> between SND.UNA and SND.NXT are expected, of course.)</dd>
<dt>SND.NXT</dt>
<t hangText="SND.NXT"><vspace /> <dd>
send sequence</t> send sequence</dd>
<dt>SND.UNA</dt>
<t hangText="SND.UNA"><vspace /> <dd>
left sequence</t> left sequence</dd>
<dt>SND.UP</dt>
<t hangText="SND.UP"><vspace /> <dd>
send urgent pointer</t> send urgent pointer</dd>
<dt>SND.WL1</dt>
<t hangText="SND.WL1"><vspace /> <dd>
segment sequence number at last window update</t> segment sequence number at last window update</dd>
<dt>SND.WL2</dt>
<t hangText="SND.WL2"><vspace /> <dd>
segment acknowledgment number at last window update</t> segment acknowledgment number at last window update</dd>
<dt>SND.WND</dt>
<t hangText="SND.WND"><vspace /> <dd>
send window</t> send window</dd>
<dt>socket (or socket number, or socket address, or socket identifier)</
<t hangText="socket (or socket number, or socket address, or socket identifier)" dt>
><vspace /> <dd>
An address that specifically includes a port identifier, that An address that specifically includes a port identifier, that
is, the concatenation of an Internet Address with a TCP port.</t> is, the concatenation of an Internet Address with a TCP port.</dd>
<dt>Source Address</dt>
<t hangText="Source Address"><vspace /> <dd>
The network layer address of the sending endpoint.</t> The network-layer address of the sending endpoint.</dd>
<dt>SYN</dt>
<t hangText="SYN"><vspace /> <dd>
A control bit in the incoming segment, occupying one sequence A control bit in the incoming segment, occupying one sequence
number, used at the initiation of a connection, to indicate number, used at the initiation of a connection to indicate
where the sequence numbering will start.</t> where the sequence numbering will start.</dd>
<dt>TCB</dt>
<t hangText="TCB"><vspace /> <dd>
Transmission control block, the data structure that records Transmission control block, the data structure that records
the state of a connection.</t> the state of a connection.</dd>
<dt>TCP</dt>
<t hangText="TCP"><vspace /> <dd>
Transmission Control Protocol: A host-to-host protocol for Transmission Control Protocol: a host-to-host protocol for
reliable communication in internetwork environments.</t> reliable communication in internetwork environments.</dd>
<dt>TOS</dt>
<t hangText="TOS"><vspace /> <dd>
Type of Service, an obsoleted IPv4 field. The same header bits curren Type of Service, an obsoleted IPv4 field. The same header bits curren
tly are used for the Differentiated Services field <xref target="RFC2474"/> cont tly are used for the Differentiated Services field <xref target="RFC2474" format
aining the Differentiated Services Code Point (DSCP) value and the 2-bit ECN cod ="default"/> containing the Differentiated Services Codepoint (DSCP) value and t
epoint <xref target="RFC3168"/>.</t> he 2-bit ECN codepoint <xref target="RFC3168" format="default"/>.</dd>
<dt>Type of Service</dt>
<t hangText="Type of Service"><vspace /> <dd>
See &quot;TOS&quot;.</t> See "TOS".</dd>
<dt>URG</dt>
<t hangText="URG"><vspace /> <dd>
A control bit (urgent), occupying no sequence space, used to A control bit (urgent), occupying no sequence space, used to
indicate that the receiving user should be notified to do indicate that the receiving user should be notified to do
urgent processing as long as there is data to be consumed with urgent processing as long as there is data to be consumed with
sequence numbers less than the value indicated by the urgent sequence numbers less than the value indicated by the urgent
pointer.</t> pointer.</dd>
<dt>urgent pointer</dt>
<t hangText="urgent pointer"><vspace /> <dd>
A control field meaningful only when the URG bit is on. This A control field meaningful only when the URG bit is on. This
field communicates the value of the urgent pointer that field communicates the value of the urgent pointer that
indicates the data octet associated with the sending user's indicates the data octet associated with the sending user's
urgent call.</t> urgent call.</dd>
</list> </dl>
</t>
</section>
<section anchor="changes" title="Changes from RFC 793">
<?rfc subcompact="yes" ?>
<t>
This document obsoletes RFC 793 as well as RFC 6093 and 6528, which
updated 793. In all cases, only the normative protocol specification and requir
ements have been incorporated into this document, and some informational text wi
th background and rationale may not have been carried in. The informational con
tent of those documents is still valuable in learning about and understanding TC
P, and they are valid Informational references, even though their normative cont
ent has been incorporated into this document.
</t>
<t>
The main body of this document was adapted from RFC 793's Section 3,
titled "FUNCTIONAL SPECIFICATION", with an attempt to keep formatting and layou
t as close as possible.
</t>
<t>
The collection of applicable RFC Errata that have been reported and
either accepted or held for an update to RFC 793 were incorporated (Errata IDs:
573, 574, 700, 701, 1283, 1561, 1562, 1564, 1571, 1572, 2297, 2298, 2748, 2749,
2934, 3213, 3300, 3301, 6222). Some errata were not applicable due to other cha
nges (Errata IDs: 572, 575, 1565, 1569, 2296, 3305, 3602).
</t>
<t>
Changes to the specification of the Urgent Pointer described in RFCs
1011, 1122, and 6093 were incorporated. See RFC 6093 for detailed discussion o
f why these changes were necessary.
</t>
<t>
The discussion of the RTO from RFC 793 was updated to refer to RFC 6298. The RF
C 1122 text on the RTO originally replaced the 793 text, however, RFC 2988 shoul
d have updated 1122, and has subsequently been obsoleted by 6298.
</t>
<t>
RFC 1011 <xref target="RFC1011"/> contains a number of comments about RFC 793, i
ncluding some needed changes to the TCP specification. These are expanded in RF
C 1122, which contains a collection of other changes and clarifications to RFC 7
93. The normative items impacting the protocol have been incorporated here, tho
ugh some historically useful implementation advice and informative discussion fr
om RFC 1122 is not included here. The present document updates RFC 1011, since
this is now the TCP specification rather than RFC 793, and the comments noted in
1011 have been incorporated.
</t>
<t>
RFC 1122 contains more than just TCP requirements, so this document can't obsole
te RFC 1122 entirely. It is only marked as &quot;updating&quot; 1122, however,
it should be understood to effectively obsolete all of the RFC 1122 material on
TCP.
</t>
<t>
The more secure Initial Sequence Number generation algorithm from RF
C 6528 was incorporated. See RFC 6528 for discussion of the attacks that this m
itigates, as well as advice on selecting PRF algorithms and managing secret key
data.
</t>
<t>
A note based on RFC 6429 was added to explicitly clarify that system resource ma
nagement concerns allow connection resources to be reclaimed. RFC 6429 is obsol
eted in the sense that this clarification has been reflected in this update to t
he base TCP specification now.
</t>
<t>
The description of congestion control implementation was added, based on
the set of documents that are IETF BCP or Standards Track on the topic, and the
current state of common implementations.
</t>
<t>
RFC EDITOR'S NOTE: the content below is for detailed change tracking
and planning, and not to be included with the final revision of the document.
</t>
<t>
This document started as draft-eddy-rfc793bis-00, that was merely a
proposal and rough plan for updating RFC 793.
</t>
<t>
The -01 revision of this draft-eddy-rfc793bis incorporates the conte
nt of RFC 793 Section 3 titled "FUNCTIONAL SPECIFICATION". Other content from R
FC 793 has not been incorporated. The -01 revision of this document makes some
minor formatting changes to the RFC 793 content in order to convert the content
into XML2RFC format and account for left-out parts of RFC 793. For instance, fi
gure numbering differs and some indentation is not exactly the same.
</t>
<t>
The -02 revision of draft-eddy-rfc793bis incorporates errata that ha
ve been verified:
<list>
<t>Errata ID 573: Reported by Bob Braden (note: This errata repo
rt basically is just a reminder that RFC 1122 updates 793. Some of the associat
ed changes are left pending to a separate revision that incorporates 1122. Bob'
s mention of PUSH in 793 section 2.8 was not applicable here because that sectio
n was not part of the "functional specification". Also, the 1122 text on the re
transmission timeout also has been updated by subsequent RFCs, so the change her
e deviates from Bob's suggestion to apply the 1122 text.)</t>
<t>Errata ID 574: Reported by Yin Shuming</t>
<t>Errata ID 700: Reported by Yin Shuming</t>
<t>Errata ID 701: Reported by Yin Shuming</t>
<t>Errata ID 1283: Reported by Pei-chun Cheng</t>
<t>Errata ID 1561: Reported by Constantin Hagemeier</t>
<t>Errata ID 1562: Reported by Constantin Hagemeier</t>
<t>Errata ID 1564: Reported by Constantin Hagemeier</t>
<t>Errata ID 1565: Reported by Constantin Hagemeier</t>
<t>Errata ID 1571: Reported by Constantin Hagemeier</t>
<t>Errata ID 1572: Reported by Constantin Hagemeier</t>
<t>Errata ID 2296: Reported by Vishwas Manral</t>
<t>Errata ID 2297: Reported by Vishwas Manral</t>
<t>Errata ID 2298: Reported by Vishwas Manral</t>
<t>Errata ID 2748: Reported by Mykyta Yevstifeyev</t>
<t>Errata ID 2749: Reported by Mykyta Yevstifeyev</t>
<t>Errata ID 2934: Reported by Constantin Hagemeier</t>
<t>Errata ID 3213: Reported by EugnJun Yi</t>
<t>Errata ID 3300: Reported by Botong Huang</t>
<t>Errata ID 3301: Reported by Botong Huang</t>
<t>Errata ID 3305: Reported by Botong Huang</t>
<t>Note: Some verified errata were not used in this update, as t
hey relate to sections of RFC 793 elided from this document. These include Erra
ta ID 572, 575, and 1569.</t>
<t>Note: Errata ID 3602 was not applied in this revision as it i
s duplicative of the 1122 corrections.</t>
</list>
Not related to RFC 793 content, this revision also makes small tweak
s to the introductory text, fixes indentation of the pseudo header diagram, and
notes that the Security Considerations should also include privacy, when this se
ction is written.
</t>
<t>
The -03 revision of draft-eddy-rfc793bis revises all discussion of t
he urgent pointer in order to comply with RFC 6093, 1122, and 1011. Since 1122
held requirements on the urgent pointer, the full list of requirements was broug
ht into an appendix of this document, so that it can be updated as-needed.
</t>
<t>
The -04 revision of draft-eddy-rfc793bis includes the ISN generation
changes from RFC 6528.
</t>
<t>
The -05 revision of draft-eddy-rfc793bis incorporates MSS requiremen
ts and definitions from RFC 879 <xref target="RFC0879"/>, 1122, and 6691, as wel
l as option-handling
requirements from RFC 1122.
</t>
<t>
The -00 revision of draft-ietf-tcpm-rfc793bis incorporates several a
dditional clarifications and updates to the section on segmentation, many of whi
ch are based on feedback from Joe Touch improving from the initial text on this
in the previous revision.
</t>
<t>
The -01 revision incorporates the change to Reserved bits due to ECN
, as well as many other changes that come from RFC 1122.
</t>
<t>
The -02 revision has small formatting modifications in order to addr
ess xml2rfc warnings about long lines. It was a quick update to avoid document
expiration. TCPM working group discussion in 2015 also indicated that we should
not try to add sections on implementation advice or similar non-normative infor
mation.
</t>
<t>
The -03 revision incorporates more content from RFC 1122: Passive OP
EN Calls, Time-To-Live, Multihoming, IP Options, ICMP messages, Data Communicati
ons, When to Send Data, When to Send a Window Update, Managing the Window, Probi
ng Zero Windows, When to Send an ACK Segment. The section on data communication
s was re-organized into clearer subsections (previously headings were embedded i
n the 793 text), and windows management advice from 793 was removed (as reviewed
by TCPM working group) in favor of the 1122 additions on SWS, ZWP, and related
topics.
</t>
<t>
The -04 revision includes reference to RFC 6429 on the ZWP condition,
RFC1122 material on TCP Connection Failures, TCP Keep-Alives, Acknowledging Que
ued Segments, and Remote Address Validation. RTO computation is referenced from
RFC 6298 rather than RFC 1122.
</t>
<t>
The -05 revision includes the requirement to implement TCP congestion
control with recommendation to implement ECN, the RFC 6633 update to 1122, whic
h changed the requirement on responding to source quench ICMP messages, and disc
ussion of ICMP (and ICMPv6) soft and hard errors per RFC 5461 (ICMPv6 handling f
or TCP doesn't seem to be mentioned elsewhere in standards track).
</t>
<t>
The -06 revision includes an appendix on &quot;Other Implementation N
otes&quot; to capture widely-deployed fundamental features that are not containe
d in the RFC series yet. It also added mention of RFC 6994 and the IANA TCP para
meters registry as a reference. It includes references to RFC 5961 in appropria
te places. The references to TOS were changed to DiffServ field, based on refle
cting RFC 2474 as well as the IPv6 presence of traffic class (carrying DiffServ
field) rather than TOS.
</t>
<t>
The -07 revision includes reference to RFC 6191, updated security conside
rations, discussion of additional implementation considerations, and clarificati
on of data on the SYN.
</t>
<t>
The -08 revision includes changes based on:
<list>
<t>describing treatment of reserved bits (following TCPM mailing list
thread from July 2014 on &quot;793bis item - reserved bit behavior&quot;</t>
<t>addition a brief TCP key concepts section to make up for not inclu
ding the outdated section 2 of RFC 793</t>
<t>changed &quot;TCP&quot; to &quot;host&quot; to resolve conflict be
tween 1122 wording on whether TCP or the network layer chooses an address when m
ultihomed</t>
<t>fixed/updated definition of options in glossary</t>
<t>moved note on aggregating ACKs from 1122 to a more appropriate loc
ation</t>
<t>resolved notes on IP precedence and security/compartment</t>
<t>added implementation note on sequence number validation</t>
<t>added note that PUSH does not apply when Nagle is active</t>
<t>added 1122 content on asynchronous reports to replace 793 section
on TCP to user messages</t>
</list>
</t>
<t>
The -09 revision fixes section numbering problems.
</t>
<t>
The -10 revision includes additions to the security considerations based
on comments from Joe Touch, and suggested edits on RST/FIN notification, RFC 252
5 reference, and other edits suggested by Yuchung Cheng, as well as modification
s to DiffServ text from Yuchung Cheng and Gorry Fairhurst.
</t>
<t>
The -11 revision includes a start at identifying all of the requirements
text and referencing each instance in the common table at the end of the docume
nt.
</t>
<t>
The -12 revision completes the requirement language indexing started in
-11 and adds necessary description of the PUSH functionality that was missing.
</t>
<t>
The -13 revision contains only changes in the inline editor notes.
</t>
<t>
The -14 revision includes updates with regard to several comments from th
e mailing list, including editorial fixes, adding IANA considerations for the he
ader flags, improving figure title placement, and breaking up the "Terminology"
section into more appropriately titled subsections.
</t>
<t>
The -15 revision has many technical and editorial corrections from Gorry
Fairhurst's review, and subsequent discussion on the TCPM list, as well as some
other collected clarifications and improvements from mailing list discussion.
</t>
<t>
The -16 revision addresses several discussions that rose from additional
reviews and follow-up on some of Gorry Fairhurst's comments from revision 14.
</t>
<t>
The -17 revision includes errata 6222 from Charles Deng, update to the ke
y words boilerplate, updated description of the header flags registry changes, a
nd clarification about connections rather than users in the discussion of OPEN c
alls.
</t>
<t>
The -18 revision includes editorial changes to the IANA considerations, b
ased on comments from Richard Scheffenegger at the IETF 108 TCPM virtual meeting
.
</t>
<t>
The -19 revision includes editorial changes from Errata 6281 and 6282 rep
orted by Merlin Buge. It also includes WGLC changes noted by Mohamed Boucadair,
Rahul Jadhav, Praveen Balasubramanian, Matt Olson, Yi Huang, Joe Touch, and Juh
amatti Kuusisaari.
</t>
<t>
The -20 revision includes text on congestion control based on mailing li
st and meeting discussion, put together in its final form by Markku Kojo. It al
so clarifies that SACK, WS, and TS options are recommended for high performance,
but not needed for basic interoperability. It also clarifies that the length f
ield is required for new TCP options.
</t>
<t>
The -21 revision includes slight changes to the header diagram for compatibility
with tooling, from Stephen McQuistin, clarification on the meaning of idle conn
ections from Yuchung Cheng, Neal Cardwell, Michael Scharf, and Richard Scheffene
gger, editorial improvements from Markku Kojo, notes that some stacks suppress e
xtra acknowledgments of the SYN when SYN-ACK carries data from Richard Scheffene
gger, and adds MAY-18 numbering based on note from Jonathan Morton.
</t>
<t>
The -22 revision includes small clarifications on terminology (might versus may)
and IPv6 extension headers versus IPv4 options, based on comments from Gorry Fa
irhurst.
</t>
<t>
The -23 revision has a fix to indentation from Michael Tuexen and idnits issues
addressed from Michael Scharf.
</t>
<t>
The -24 revision incorporates changes after Martin Duke's AD review, including f
urther feedback on those comments from Yuchung Cheng and Joe Touch. Important c
hanges for review include (1) removal of the need to check for the PUSH flag whe
n evaluating the SWS override timer expiration, (2) clarification about receding
urgent pointer, and (3) de-duplicating handling of the RST checking between ste
p 4 and step 1.
</t>
<t>
The -25 revision incorporates changes based on the GENART review from Francis Du
pont, SECDIR review from Kyle Rose, and OPSDIR review from Sarah Banks.
</t>
<t>
The -26 revision incorporates changes stemming from the IESG reviews, and INTDIR
review from Bernie Volz.
</t>
<t>
The -27 revision fixes a few small editorial incompatibilities that Stephen McQu
istin found related to automated code generation.
</t>
<t>
The -28 revision addresses some COMMENTs from Ben Kaduk's IESG review.
</t>
<t>Some other suggested changes that will not be incorporated in this 79
3 update unless TCPM consensus changes with regard to scope are:
<list style="numbers">
<t>Tony Sabatini's suggestion for describing DO field</t>
<t>Per discussion with Joe Touch (TAPS list, 6/20/2015), the descript
ion of the API could be revisited</t>
<t>Reducing the R2 value for SYNs has been suggested as a possible to
pic for future consideration.</t>
</list>
</t>
<t>
Early in the process of updating RFC 793, Scott Brim mentioned that this
should include a PERPASS/privacy review. This may be something for the chairs
or AD to request during WGLC or IETF LC.
</t>
<?rfc subcompact="no" ?>
</section> </section>
<section anchor="changes" numbered="true" toc="default">
<section anchor="IANA" title="IANA Considerations"> <name>Changes from RFC 793</name>
<t> <t>
In the &quot;Transmission Control Protocol (TCP) Header Flags&quot; registry This document obsoletes RFC 793 as well as RFCs 6093 and 6528, which
, IANA is asked to make several changes described in this section.</t> updated 793. In all cases, only the normative protocol specification and requi
<t>RFC 3168 originally created this registry, but only populated it with the rements have been incorporated into this document, and some informational text w
new bits defined in RFC 3168, neglecting the other bits that had previously bee ith background and rationale may not have been carried in. The informational co
n described in RFC 793 and other documents. Bit 7 has since also been updated b ntent of those documents is still valuable in learning about and understanding T
y RFC 8311.</t> CP, and they are valid Informational references, even though their normative con
<t>The &quot;Bit&quot; column is renamed below as the &quot;Bit Offset&quot; tent has been incorporated into this document.
column, since it references each header flag's offset within the 16-bit aligned </t>
view of the TCP header in <xref target="header_format"/>. The bits in offsets <t>
0 through 4 are the TCP segment Data Offset field, and not header flags.</t> The main body of this document was adapted from RFC 793's Section <x
<t>IANA should add a column for &quot;Assignment Notes&quot;.</t> ref target="RFC0793" section="3" sectionFormat="bare" format="default"/>, titled
<t>IANA should assign values indicated below.</t> "FUNCTIONAL SPECIFICATION", with an attempt to keep formatting and layout as cl
<figure> ose as possible.
<artwork> </t>
TCP Header Flags <t>
The collection of applicable RFC errata that have been reported and
Bit Name Reference Assignmen either accepted or held for an update to RFC 793 were incorporated (Errata IDs:
t Notes 573 <xref target="Err573" format="default"/>, 574 <xref target="Err574" format="
Offset default"/>, 700 <xref target="Err700" format="default"/>, 701 <xref target="Err7
--- ---- --------- --------- 01" format="default"/>, 1283 <xref target="Err1283" format="default"/>, 1561 <xr
------- ef target="Err1561" format="default"/>, 1562 <xref target="Err1562" format="defa
4 Reserved for future use (this document) ult"/>, 1564 <xref target="Err1564" format="default"/>, 1571 <xref target="Err15
5 Reserved for future use (this document) 71" format="default"/>, 1572 <xref target="Err1572" format="default"/>, 2297 <xr
6 Reserved for future use (this document) ef target="Err2297" format="default"/>, 2298 <xref target="Err2298" format="defa
7 Reserved for future use [RFC8311] [1] ult"/>, 2748 <xref target="Err2748" format="default"/>, 2749 <xref target="Err27
8 CWR (Congestion Window Reduced) [RFC3168] 49" format="default"/>, 2934 <xref target="Err2934" format="default"/>, 3213 <xr
9 ECE (ECN-Echo) [RFC3168] ef target="Err3213" format="default"/>, 3300 <xref target="Err3300" format="defa
10 Urgent Pointer field is significant (URG) (this document) ult"/>, 3301 <xref target="Err3301" format="default"/>, 6222 <xref target="Err62
11 Acknowledgment field is significant (ACK) (this document) 22" format="default"/>). Some errata were not applicable due to other changes (
12 Push Function (PSH) (this document) Errata IDs: 572 <xref target="Err572" format="default"/>, 575 <xref target="Err5
13 Reset the connection (RST) (this document) 75" format="default"/>, 1565 <xref target="Err1565" format="default"/>, 1569 <xr
14 Synchronize sequence numbers (SYN) (this document) ef target="Err1569" format="default"/>, 2296 <xref target="Err2296" format="defa
15 No more data from sender (FIN) (this document) ult"/>, 3305 <xref target="Err3305" format="default"/>, 3602 <xref target="Err36
02" format="default"/>).
FOOTNOTES: </t>
[1] Previously used by Historic [RFC3540] as NS (Nonce Sum). <t>
</artwork> Changes to the specification of the urgent pointer described in RFCs
</figure> 1011, 1122, and 6093 were incorporated. See RFC 6093 for detailed discussion o
f why these changes were necessary.
<t>This TCP Header Flags registry should also be moved to a sub-registry und </t>
er the global &quot;Transmission Control Protocol (TCP) Parameters registry (htt <t>
ps://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml).</t> The discussion of the RTO from RFC 793 was updated to refer to RFC 6298. The te
xt on the RTO in RFC 1122 originally replaced the text in RFC 793; however, RFC
<t>The registry's Registration Procedure should remain Standards Action, but 2988 should have updated RFC 1122 and has subsequently been obsoleted by RFC 629
the Reference can be updated to this document, and the Note removed.</t> 8.
</t>
<t>
RFC 1011 <xref target="RFC1011" format="default"/> contains a number of comments
about RFC 793, including some needed changes to the TCP specification. These a
re expanded in RFC 1122, which contains a collection of other changes and clarif
ications to RFC 793. The normative items impacting the protocol have been incor
porated here, though some historically useful implementation advice and informat
ive discussion from RFC 1122 is not included here. The present document, which
is now the TCP specification rather than RFC 793, updates RFC 1011, and the comm
ents noted in RFC 1011 have been incorporated.
</t>
<t>
RFC 1122 contains more than just TCP requirements, so this document can't obsole
te RFC 1122 entirely. It is only marked as "updating" RFC 1122; however, it sho
uld be understood to effectively obsolete all of the material on TCP found in RF
C 1122.
</t>
<t>
The more secure initial sequence number generation algorithm from RF
C 6528 was incorporated. See RFC 6528 for discussion of the attacks that this m
itigates, as well as advice on selecting PRF algorithms and managing secret key
data.
</t>
<t>
A note based on RFC 6429 was added to explicitly clarify that system resource ma
nagement concerns allow connection resources to be reclaimed. RFC 6429 is obsol
eted in the sense that the clarification it describes has been reflected within
this base TCP specification.
</t>
<t>
The description of congestion control implementation was added based on t
he set of documents that are IETF BCP or Standards Track on the topic and the cu
rrent state of common implementations.
</t>
</section> </section>
<section anchor="IANA" numbered="true" toc="default">
<name>IANA Considerations</name>
<t>
In the "Transmission Control Protocol (TCP) Header Flags" registry, IANA has
made several changes as described in this section.</t>
<t>RFC 3168 originally created this registry but only populated it with th
e new bits defined in RFC 3168, neglecting the other bits that had previously be
en described in RFC 793 and other documents. Bit 7 has since also been updated
by RFC 8311 <xref target="RFC8311" format="default"/>.</t>
<t>The "Bit" column has been renamed below as the "Bit Offset" column beca
use it references each header flag's offset within the 16-bit aligned view of th
e TCP header in <xref target="header_format" format="default"/>. The bits in of
fsets 0 through 3 are the TCP segment Data Offset field, and not header flags.</
t>
<t>IANA has added a column for "Assignment Notes".</t>
<t>IANA has assigned values as indicated below.</t>
<table>
<name>TCP Header Flags</name>
<thead>
<tr>
<th>Bit Offset</th>
<th>Name</th>
<th>Reference</th>
<th>Assignment Notes</th>
</tr>
</thead>
<tbody>
<tr>
<td>4</td>
<td>Reserved for future use</td>
<td>RFC 9293</td>
<td></td>
</tr>
<tr>
<td>5</td>
<td>Reserved for future use</td>
<td>RFC 9293</td>
<td></td>
</tr>
<tr>
<td>6</td>
<td>Reserved for future use</td>
<td>RFC 9293</td>
<td></td>
</tr>
<tr>
<td>7</td>
<td>Reserved for future use</td>
<td>RFC 8311</td>
<td>Previously used by Historic RFC 3540 as NS (Nonce Sum).</td>
</tr>
<tr>
<td>8</td>
<td>CWR (Congestion Window Reduced)</td>
<td>RFC 3168</td>
<td></td>
</tr>
<tr>
<td>9</td>
<td>ECE (ECN-Echo)</td>
<td>RFC 3168</td>
<td></td>
</tr>
<tr>
<td>10</td>
<td>Urgent pointer field is significant (URG)</td>
<td>RFC 9293</td>
<td></td>
</tr>
<tr>
<td>11</td>
<td>Acknowledgment field is significant (ACK)</td>
<td>RFC 9293</td>
<td></td>
</tr>
<tr>
<td>12</td>
<td>Push function (PSH)</td>
<td>RFC 9293</td>
<td></td>
</tr>
<tr>
<td>13</td>
<td>Reset the connection (RST)</td>
<td>RFC 9293</td>
<td></td>
</tr>
<tr>
<td>14</td>
<td>Synchronize sequence numbers (SYN)</td>
<td>RFC 9293</td>
<td></td>
</tr>
<tr>
<td>15</td>
<td>No more data from sender (FIN)</td>
<td>RFC 9293</td>
<td></td>
</tr>
</tbody>
</table>
<section anchor="Security" title="Security and Privacy Considerations"> <t>The "TCP Header Flags" registry has also been moved to a subregistry un
<t> der the global "Transmission Control Protocol (TCP) Parameters" registry <eref t
The TCP design includes only rudimentary security features that improve the robu arget="https://www.iana.org/assignments/tcp-parameters/" brackets="angle"/>.</t>
stness and reliability of connections and application data transfer, but there a <t>The registry's Registration Procedure remains Standards Action, but the
re no built-in cryptographic capabilities to support any form of confidentiality Reference has been updated to this document, and the Note has been removed.</t>
, authentication, or other typical security functions. Non-cryptographic enhanc </section>
ements (e.g. <xref target="RFC5961"/>) have been developed to improve robustness <section anchor="Security" numbered="true" toc="default">
of TCP connections to particular types of attacks, but the applicability and pr <name>Security and Privacy Considerations</name>
otections of non-cryptographic enhancements are limited (e.g. see section 1.1 of <t>
<xref target="RFC5961"/>). The TCP design includes only rudimentary security features that improve the robu
Applications typically utilize lower-layer (e.g. IPsec) and upper-layer (e.g. TL stness and reliability of connections and application data transfer, but there a
S) protocols to provide security and privacy for TCP connections and application re no built-in cryptographic capabilities to support any form of confidentiality
data carried in TCP. Methods based on TCP options have been developed as well, , authentication, or other typical security functions. Non-cryptographic enhanc
to support some security capabilities. ements (e.g., <xref target="RFC5961" format="default"/>) have been developed to
</t> improve robustness of TCP connections to particular types of attacks, but the ap
<t> plicability and protections of non-cryptographic enhancements are limited (e.g.,
In order to fully provide confidentiality, integrity protection, and authenticat see <xref target="RFC5961" section="1.1" sectionFormat="of" format="default"/>)
ion for TCP connections (including their control flags) IPsec is the only curren .
t effective method. For integrity protection and authentication, the TCP Authen Applications typically utilize lower-layer (e.g., IPsec) and upper-layer (e.g.,
tication Option (TCP-AO) <xref target="RFC5925"/> is available, with a proposed TLS) protocols to provide security and privacy for TCP connections and applicati
extension to also provide confidentiality for the segment payload. on data carried in TCP. Methods based on TCP Options have been developed as wel
l, to support some security capabilities.
</t>
<t>
In order to fully provide confidentiality, integrity protection, and authenticat
ion for TCP connections (including their control flags), IPsec is the only curre
nt effective method. For integrity protection and authentication, the TCP Authe
ntication Option (TCP-AO) <xref target="RFC5925" format="default"/> is available
, with a proposed extension to also provide confidentiality for the segment payl
oad.
Other methods discussed in this section may provide confidentiality or integrity protection for Other methods discussed in this section may provide confidentiality or integrity protection for
the payload, but for the TCP header only cover either a subset of the fields (e. the payload, but for the TCP header only cover either a subset of the fields (e.
g. tcpcrypt <xref target="RFC8548"/>) or none at g., tcpcrypt <xref target="RFC8548" format="default"/>) or none at
all (e.g. TLS). Other security features that have been added to TCP (e.g. ISN all (e.g., TLS). Other security features that have been added to TCP (e.g., ISN
generation, sequence number checks, and others) are only capable of partially generation, sequence number checks, and others) are only capable of partially
hindering attacks. hindering attacks.
</t> </t>
<t> <t>
Applications using long-lived TCP flows have been vulnerable to attacks that exp Applications using long-lived TCP flows have been vulnerable to attacks that exp
loit the processing of control flags described in earlier TCP specifications <xr loit the processing of control flags described in earlier TCP specifications <xr
ef target="RFC4953"/>. TCP-MD5 was a commonly implemented TCP option to support ef target="RFC4953" format="default"/>. TCP-MD5 was a commonly implemented TCP
authentication for some of these connections, but had flaws and is now deprecat Option to support authentication for some of these connections, but had flaws an
ed. TCP-AO provides a capability to protect long-lived TCP connections from att d is now deprecated. TCP-AO provides a capability to protect long-lived TCP con
acks, and has superior properties to TCP-MD5. It does not provide any privacy f nections from attacks and has superior properties to TCP-MD5. It does not provi
or application data, nor for the TCP headers. de any privacy for application data or for the TCP headers.
</t> </t>
<t> <t>
The &quot;tcpcrypt&quot; <xref target="RFC8548"/> Experimental extension to TCP The "tcpcrypt" <xref target="RFC8548" format="default"/> experimental extension
provides the ability to cryptographically protect connection data. Metadata asp to TCP provides the ability to cryptographically protect connection data. Metad
ects of the TCP flow are still visible, but the application stream is well-prote ata aspects of the TCP flow are still visible, but the application stream is wel
cted. Within the TCP header, only the urgent pointer and FIN flag are protected l protected. Within the TCP header, only the urgent pointer and FIN flag are pr
through tcpcrypt. otected through tcpcrypt.
</t> </t>
<t> <t>
The TCP Roadmap <xref target="RFC7414"/> includes notes about several RFCs relat The TCP Roadmap <xref target="RFC7414" format="default"/> includes notes about s
ed to TCP security. Many of the enhancements provided by these RFCs have been i everal RFCs related to TCP security. Many of the enhancements provided by these
ntegrated into the present document, including ISN generation, mitigating blind RFCs have been integrated into the present document, including ISN generation,
in-window attacks, and improving handling of soft errors and ICMP packets. Thes mitigating blind in-window attacks, and improving handling of soft errors and IC
e are all discussed in greater detail in the referenced RFCs that originally des MP packets. These are all discussed in greater detail in the referenced RFCs th
cribed the changes needed to earlier TCP specifications. Additionally, see RFC at originally described the changes needed to earlier TCP specifications. Addit
6093 <xref target="RFC6093"/> for discussion of security considerations related ionally, see RFC 6093 <xref target="RFC6093" format="default"/> for discussion o
to the urgent pointer field, that has been deprecated. f security considerations related to the urgent pointer field, which also discou
</t> rages new applications from using the urgent pointer.
<t> </t>
Since TCP is often used for bulk transfer flows, some attacks are possible that <t>
abuse the TCP congestion control logic. An example is &quot;ACK-division&quot; Since TCP is often used for bulk transfer flows, some attacks are possible that
attacks. Updates that have been made to the TCP congestion control specification abuse the TCP congestion control logic. An example is "ACK-division" attacks. U
s include mechanisms like Appropriate Byte Counting (ABC) <xref target="RFC3465" pdates that have been made to the TCP congestion control specifications include
/> that act as mitigations to these attacks. mechanisms like Appropriate Byte Counting (ABC) <xref target="RFC3465" format="d
</t> efault"/> that act as mitigations to these attacks.
<t> </t>
Other attacks are focused on exhausting the resources of a TCP server. Examples <t>
include SYN flooding <xref target="RFC4987"/> or wasting resources on non-progr Other attacks are focused on exhausting the resources of a TCP server. Examples
essing connections <xref target="RFC6429"/>. Operating systems commonly impleme include SYN flooding <xref target="RFC4987" format="default"/> or wasting resou
nt mitigations for these attacks. Some common defenses also utilize proxies, st rces on non-progressing connections <xref target="RFC6429" format="default"/>.
ateful firewalls, and other technologies outside the end-host TCP implementation Operating systems commonly implement mitigations for these attacks. Some common
. defenses also utilize proxies, stateful firewalls, and other technologies outsi
de the end-host TCP implementation.
</t> </t>
<t> <t>
The concept of a protocol's &quot;wire image&quot; is described in RFC 8546 <xre The concept of a protocol's "wire image" is described in RFC 8546 <xref target="
f target="RFC8546"/>, which describes how TCP's cleartext headers expose more me RFC8546" format="default"/>, which describes how TCP's cleartext headers expose
tadata to nodes on the path than is strictly required to route the packets to th more metadata to nodes on the path than is strictly required to route the packet
eir destination. On-path adversaries may be able to leverage this metadata. Le s to their destination. On-path adversaries may be able to leverage this metada
ssons learned in this respect from TCP have been applied in the design of newer ta. Lessons learned in this respect from TCP have been applied in the design of
transports like QUIC <xref target="RFC9000"/>. Additionally, based partly on ex newer transports like QUIC <xref target="RFC9000" format="default"/>. Addition
periences with TCP and its extensions, there are considerations that might be ap ally, based partly on experiences with TCP and its extensions, there are conside
plicable for future TCP extensions and other transports that the IETF has docume rations that might be applicable for future TCP extensions and other transports
nted in RFC 9065 <xref target="RFC9065"/>, along with IAB recommendations in RFC that the IETF has documented in RFC 9065 <xref target="RFC9065" format="default"
8558 <xref target="RFC8558"/> and <xref target="I-D.iab-use-it-or-lose-it"/>. />, along with IAB recommendations in RFC 8558 <xref target="RFC8558" format="de
</t> fault"/> and <xref target="RFC9170" format="default"/>.
<t> </t>
There are also methods of &quot;fingerprinting&quot; that can be used to infer t <t>
he host TCP implementation (operating system) version or platform information. There are also methods of "fingerprinting" that can be used to infer the host TC
These collect observations of several aspects such as the options present in seg P implementation (operating system) version or platform information. These coll
ments, the ordering of options, the specific behaviors in the case of various co ect observations of several aspects, such as the options present in segments, th
nditions, packet timing, packet sizing, and other aspects of the protocol that a e ordering of options, the specific behaviors in the case of various conditions,
re left to be determined by an implementer, and can use those observations to id packet timing, packet sizing, and other aspects of the protocol that are left t
entify information about the host and implementation. o be determined by an implementer, and can use those observations to identify in
</t> formation about the host and implementation.
</section> </t>
<section title="Acknowledgements"> <t>
<t> Since ICMP message processing also can interact with TCP connections, there is p
This document is largely a revision of RFC 793, which Jon Postel was the edi otential for ICMP-based attacks against TCP connections. These are discussed in
tor of. Due to his excellent work, it was able to last for three decades before RFC 5927 <xref target="RFC5927" format="default"/>, along with mitigations that
we felt the need to revise it. have been implemented.
</t> </t>
<t>
Andre Oppermann was a contributor and helped to edit the first revision of t
his document.
</t>
<t>
We are thankful for the assistance of the IETF TCPM working group chairs, ov
er the course of work on this document:
<list>
<t>Michael Scharf<vspace />
Yoshifumi Nishida<vspace />
Pasi Sarolahti<vspace />
Michael Tuexen</t>
</list>
</t>
<t>
During the discussions of this work on the TCPM mailing list, in working gro
up meetings, and via area reviews, helpful comments, critiques, and reviews were
received from (listed alphabetically by last name): Praveen Balasubramanian, Da
vid Borman, Mohamed Boucadair, Bob Briscoe, Neal Cardwell, Yuchung Cheng, Martin
Duke, Francis Dupont, Ted Faber, Gorry Fairhurst, Fernando Gont, Rodney Grimes,
Yi Huang, Rahul Jadhav, Markku Kojo, Mike Kosek, Juhamatti Kuusisaari, Kevin La
hey, Kevin Mason, Matt Mathis, Stephen McQuistin, Jonathan Morton, Matt Olson, T
ommy Pauly, Tom Petch, Hagen Paul Pfeifer, Kyle Rose, Anthony Sabatini, Michael
Scharf, Greg Skinner, Joe Touch, Michael Tuexen, Reji Varghese, Bernie Volz, Tim
Wicinski, Lloyd Wood, and Alex Zimmermann.
</t>
<t>
Joe Touch provided additional help in clarifying the description of segment
size parameters and PMTUD/PLPMTUD recommendations. Markku Kojo helped put toget
her the text in the section on TCP Congestion Control.
</t>
<t>
This document includes content from errata that were reported by (listed chr
onologically): Yin Shuming, Bob Braden, Morris M. Keesan, Pei-chun Cheng, Consta
ntin Hagemeier, Vishwas Manral, Mykyta Yevstifeyev, EungJun Yi, Botong Huang, Ch
arles Deng, Merlin Buge.
</t>
</section> </section>
</middle> </middle>
<!-- *****BACK MATTER ***** -->
<back> <back>
<!-- References split to informative and normative --> <references>
<references title="Normative References"> <name>References</name>
<!-- A *really* full, totally OTT reference - Note, the "target" attrib <references>
ute of the <name>Normative References</name>
"reference": if you want a URI printed in the reference, this is whe <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
re it goes. --> FC.0791.xml"/>
<!-- <reference anchor="RFC1191" target="https://www.rfc-editor.org/info/rfc1
<reference anchor='RFC2119' 191">
target='http://xml.resource.org/public/rfc/html/rfc2119.html' <front>
> <title>Path MTU discovery</title>
<front> <author fullname="J.C. Mogul" initials="J." surname="Mogul"/>
<title abbrev='RFC Key Words'>Key words for use in RFCs to Indic <author fullname="S.E. Deering" initials="S." surname="Deering"/>
ate Requirement <date month="November" year="1990"/>
Levels</title> </front>
<author initials='S.' surname='Bradner' fullname='Scott Bradner' <seriesInfo name="RFC" value="1191"/>
> <seriesInfo name="DOI" value="10.17487/RFC1191"/>
<organization>Harvard University</organization>
<address>
<postal>
<street>1350 Mass. Ave.</street>
<street>Cambridge</street>
<street>MA 02138</street>
</postal>
<phone>- +1 617 495 3864</phone>
<email>sob@harvard.edu</email>
</address>
</author>
<date year='1997' month='March' />
<area>General</area>
<keyword>keyword</keyword>
<abstract>
<t>In many standards track documents several words are used
to signify
the requirements in the specification. These words are ofte
n
capitalized. This document defines these words as they shou
ld be
interpreted in IETF documents. Authors who follow these gui
delines
should incorporate this phrase near the beginning of their d
ocument:
<list>
<t>
The key words &quot;MUST&quot;, &quot;MUST NOT&quot;
,
&quot;REQUIRED&quot;, &quot;SHALL&quot;, &quot;SHALL
NOT&quot;,
&quot;SHOULD&quot;, &quot;SHOULD NOT&quot;, &quot;RE
COMMENDED&quot;,
&quot;MAY&quot;, and &quot;OPTIONAL&quot; in this do
cument are to be
interpreted as described in RFC 2119.</t>
</list>
</t>
<t>
Note that the force of these words is modified by the requir
ement level of
the document in which they are used.</t>
</abstract>
</front>
<seriesInfo name='BCP' value='14' />
<seriesInfo name='RFC' value='2119' />
<format type='TXT' octets='4723' target='ftp://ftp.isi.edu/in-notes/
rfc2119.txt' />
<format type='HTML' octets='14486'
target='http://xml.resource.org/public/rfc/html/rfc2119.html
' />
<format type='XML' octets='5661'
target='http://xml.resource.org/public/rfc/xml/rfc2119.xml'
/>
</reference> </reference>
--> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.2119.xml"/>
<!-- Right back at the beginning we defined an entity which (we asserted <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
) would contain FC.2474.xml"/>
XML needed for a reference... this is where we use it. --> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC0791; FC.2914.xml"/>
&RFC1191; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC2119; FC.3168.xml"/>
&RFC2474; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC2914; FC.5033.xml"/>
&RFC3168; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC5033; FC.5681.xml"/>
&RFC5681; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC5961; FC.5961.xml"/>
&RFC6298; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC6633; FC.6298.xml"/>
&RFC8174; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC8200; FC.6633.xml"/>
&RFC8201; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC8961; FC.8174.xml"/>
</references> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8200.xml"/>
<references title="Informative References"> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
<!-- A reference written by by an organization not a persoN. --> FC.8201.xml"/>
<!-- <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
<reference FC.8961.xml"/>
anchor="DOMINATION" > </references>
<front> <references>
<title>Ultimate Plan for Taking Over the World</title> <name>Informative References</name>
<author> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
<organization>Mad Dominators, Inc.</organization> FC.0793.xml"/>
</author> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
<date year="1984" /> FC.0896.xml"/>
</front> <reference anchor="RFC1011" target="https://www.rfc-editor.org/info/rfc1
011">
<front>
<title>Official Internet protocols</title>
<author fullname="J.K. Reynolds" initials="J." surname="Reynolds"/>
<author fullname="J. Postel" initials="J." surname="Postel"/>
<date month="May" year="1987"/>
</front>
<seriesInfo name="RFC" value="1011"/>
<seriesInfo name="DOI" value="10.17487/RFC1011"/>
</reference> </reference>
--> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC0793; FC.1122.xml"/>
&RFC0879; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC0896; FC.1349.xml"/>
&RFC1011; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC1122; FC.1644.xml"/>
&RFC1349; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC1644; FC.2018.xml"/>
<!--&RFC1191;--> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
<!--&RFC2675;--> FC.2525.xml"/>
&RFC2018; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC2525; FC.2675.xml"/>
&RFC2675; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC2873; FC.2873.xml"/>
&RFC2883; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC2923; FC.2883.xml"/>
&RFC3449; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC3465; FC.2923.xml"/>
&RFC4727; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC4821; FC.3449.xml"/>
&RFC4987; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC4953; FC.3465.xml"/>
&RFC5044; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC5461; FC.4727.xml"/>
&RFC5570; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC5795; FC.4821.xml"/>
&RFC5925; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC6093; FC.4987.xml"/>
&RFC6191; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.4953.xml"/>
&RFC6429; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC6528; FC.5044.xml"/>
&RFC6691; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC6864; FC.5461.xml"/>
&RFC6994; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC7094; FC.5570.xml"/>
&RFC7323; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC7413; FC.5795.xml"/>
&RFC7414; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC7657; FC.5925.xml"/>
&RFC8087; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC8095; FC.6093.xml"/>
&RFC8303; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC8504; FC.6191.xml"/>
&RFC8546; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC8548; FC.6429.xml"/>
&RFC8558; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC8684; FC.6528.xml"/>
&RFC9000; <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
&RFC9065; FC.6691.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
<reference anchor="TCP-parameters-registry"> FC.6864.xml"/>
<front> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
<title>Transmission Control Protocol (TCP) Parameters, https://www.ian FC.6994.xml"/>
a.org/assignments/tcp-parameters/tcp-parameters.xhtml <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
</title> FC.7094.xml"/>
<author> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
<organization>IANA</organization> FC.7323.xml"/>
</author> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
<date year="2019"/> FC.7413.xml"/>
</front> <xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.7414.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.7657.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8087.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8095.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8303.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8311.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8504.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8546.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8548.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8558.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8684.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.9000.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.9065.xml"/>
<reference anchor="TCP-parameters-registry" target="https://www.iana.org
/assignments/tcp-parameters/">
<front>
<title>Transmission Control Protocol (TCP) Parameters
</title>
<author>
<organization>IANA</organization>
</author>
</front>
</reference> </reference>
<xi:include href="https://datatracker.ietf.org/doc/bibxml3/draft-gont-tc
<reference anchor="header-flags-registry"> pm-tcp-seccomp-prec.xml"/>
<xi:include href="https://datatracker.ietf.org/doc/bibxml3/draft-gont-tc
pm-tcp-seq-validation.xml"/>
<xi:include href="https://datatracker.ietf.org/doc/bibxml3/draft-ietf-tc
pm-tcp-edo.xml"/>
<xi:include href="https://datatracker.ietf.org/doc/bibxml3/draft-mcquist
in-augmented-ascii-diagrams.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.9170.xml"/>
<xi:include href="https://datatracker.ietf.org/doc/bibxml3/draft-minshal
l-nagle.xml"/>
<reference anchor="DS78">
<front>
<title>Connection Management in Transport Protocols</title>
<author initials="Y" surname="Dalal"/>
<author initials="C" surname="Sunshine"/>
<date year="1978" month="December"/>
</front>
<refcontent>Computer Networks, Vol. 2, No. 6, pp. 454-473</refcontent>
<seriesInfo name="DOI" value="10.1016/0376-5075(78)90053-3"/>
</reference>
<reference anchor="FTY99">
<front>
<title>The TIME-WAIT state in TCP and Its Effect on Busy Servers</ti
tle>
<author initials="T" surname="Faber"/>
<author initials="J" surname="Touch"/>
<author initials="W" surname="Yui"/>
<date year="1999" month="March"/>
</front>
<refcontent>Proceedings of IEEE INFOCOM, pp. 1573-1583</refcontent>
<seriesInfo name="DOI" value="10.1109/INFCOM.1999.752180"/>
</reference>
<reference anchor="IEN177" target="https://www.rfc-editor.org/ien/ien177
.txt">
<front>
<title>Comments on Action Items from the January Meeting</title>
<author initials="J" surname="Postel"/>
<date year="1981" month="March"/>
</front>
<seriesInfo name="IEN" value="177"/>
</reference>
<reference anchor="offload" target="https://www.kernel.org/doc/html/late
st/networking/segmentation-offloads.html">
<front>
<title>Segmentation Offloads</title>
<author/>
<date/>
</front>
<refcontent>The Linux Kernel Documentation</refcontent>
</reference>
<reference anchor="Err573" quote-title="false" target="https://www.rfc-e
ditor.org/errata/eid573">
<front>
<title>Erratum ID 573</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err574" quote-title="false" target="https://www.rfc-e
ditor.org/errata/eid574">
<front>
<title>Erratum ID 574</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err700" quote-title="false" target="https://www.rfc-e
ditor.org/errata/eid700">
<front>
<title>Erratum ID 700</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err701" quote-title="false" target="https://www.rfc-e
ditor.org/errata/eid701">
<front>
<title>Erratum ID 701</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err1283" quote-title="false" target="https://www.rfc-
editor.org/errata/eid1283">
<front>
<title>Erratum ID 1283</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err1561" quote-title="false" target="https://www.rfc-
editor.org/errata/eid1561">
<front>
<title>Erratum ID 1561</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err1562" quote-title="false" target="https://www.rfc-
editor.org/errata/eid1562">
<front>
<title>Erratum ID 1562</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err1564" quote-title="false" target="https://www.rfc-
editor.org/errata/eid1564">
<front>
<title>Erratum ID 1564</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err1571" quote-title="false" target="https://www.rfc-
editor.org/errata/eid1571">
<front>
<title>Erratum ID 1571</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err1572" quote-title="false" target="https://www.rfc-
editor.org/errata/eid1572">
<front>
<title>Erratum ID 1572</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err2297" quote-title="false" target="https://www.rfc-e
ditor.org/errata/eid2297">
<front>
<title>Erratum ID 2297</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err2298" quote-title="false" target="https://www.rfc-
editor.org/errata/eid2298">
<front>
<title>Erratum ID 2298</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err2748" quote-title="false" target="https://www.rfc-
editor.org/errata/eid2748">
<front>
<title>Erratum ID 2748</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err2749" quote-title="false" target="https://www.rfc-
editor.org/errata/eid2749">
<front>
<title>Erratum ID 2749</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err2934" quote-title="false" target="https://www.rfc-
editor.org/errata/eid2934">
<front>
<title>Erratum ID 2934</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err3213" quote-title="false" target="https://www.rfc-
editor.org/errata/eid3213">
<front> <front>
<title>Transmission Control Protocol (TCP) Header Flags, https://www.i <title>Erratum ID 3213</title>
ana.org/assignments/tcp-header-flags/tcp-header-flags.xhtml <author>
</title> <organization>RFC Errata</organization>
<author> </author>
<organization>IANA</organization> </front>
</author> <refcontent>RFC 793</refcontent>
<date year="2019"/>
</front>
</reference> </reference>
<reference anchor="Err3300" quote-title="false" target="https://www.rfc-
&I-D.gont-tcpm-tcp-seccomp-prec; editor.org/errata/eid3300">
&I-D.gont-tcpm-tcp-seq-validation; <front>
&I-D.ietf-tcpm-tcp-edo; <title>Erratum ID 3300</title>
&I-D.mcquistin-augmented-ascii-diagrams; <author>
&I-D.iab-use-it-or-lose-it; <organization>RFC Errata</organization>
</author>
<reference anchor="draft-minshall-nagle"> </front>
<front> <refcontent>RFC 793</refcontent>
<title>A Proposed Modification to Nagle's Algorithm
</title>
<author initials="G" surname="Minshall" fullname="Greg Minshall">
</author>
<date month="June" year="1999"/>
</front>
<seriesInfo name="Internet-Draft" value="draft-minshall-nagle-01"/>
</reference> </reference>
<reference anchor="Err3301" quote-title="false" target="https://www.rfc-
<reference editor.org/errata/eid3301">
anchor="DS78" > <front>
<front> <title>Erratum ID 3301</title>
<title>Connection Management in Transport Protocols</title> <author>
<author initials = "Y" surname="Dalal"></author> <organization>RFC Errata</organization>
<author initials = "C" surname="Sunshine"></author> </author>
<date year="1978" month="December" /> </front>
</front> <refcontent>RFC 793</refcontent>
<seriesInfo name="Computer Networks" value="Vol. 2, No. 6, pp. 454-4
73"/>
</reference> </reference>
<reference anchor="Err6222" quote-title="false" target="https://www.rfc-
<reference editor.org/errata/eid6222">
anchor="FTY99"> <front>
<front> <title>Erratum ID 6222</title>
<title>The TIME-WAIT state in TCP and Its Effect on Busy Servers <author>
</title> <organization>RFC Errata</organization>
<author initials = "T" surname="Faber"></author> </author>
<author initials = "J" surname="Touch"></author> </front>
<author initials = "W" surname="Yui"></author> <refcontent>RFC 793</refcontent>
<date year="1999" month="March" />
</front>
<seriesInfo name="Proceedings of IEEE INFOCOM" value="pp. 1573-1583"
/>
</reference> </reference>
<reference anchor="Err572" quote-title="false" target="https://www.rfc-e
<reference ditor.org/errata/eid572">
anchor="IEN177" target="https://www.rfc-editor.org/ien/ien177.txt"> <front>
<front> <title>Erratum ID 572</title>
<title>Comments on Action Items from the January Meeting</title> <author>
<author initials = "J" surname="Postel"></author> <organization>RFC Errata</organization>
<date year="1981" month="March" /> </author>
</front> </front>
<seriesInfo name="IEN" value="177"/> <refcontent>RFC 793</refcontent>
</reference> </reference>
<reference anchor="Err575" quote-title="false" target="https://www.rfc-e
<reference anchor="offload" target="https://www.kernel.org/doc/html/late ditor.org/errata/eid575">
st/networking/segmentation-offloads.html"> <front>
<front> <title>Erratum ID 575</title>
<title>Segmentation Offloads</title> <author>
<author></author> <organization>RFC Errata</organization>
<date/> </author>
</front> </front>
<seriesInfo name="Linux Networking Documentation" value=""/> <refcontent>RFC 793</refcontent>
</reference> </reference>
<reference anchor="Err1565" quote-title="false" target="https://www.rfc-
editor.org/errata/eid1565">
<front>
<title>Erratum ID 1565</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err1569" quote-title="false" target="https://www.rfc-
editor.org/errata/eid1569">
<front>
<title>Erratum ID 1569</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err2296" quote-title="false" target="https://www.rfc-
editor.org/errata/eid2296">
<front>
<title>Erratum ID 2296</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err3305" quote-title="false" target="https://www.rfc-
editor.org/errata/eid3305">
<front>
<title>Erratum ID 3305</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err3602" quote-title="false" target="https://www.rfc-
editor.org/errata/eid3602">
<front>
<title>Erratum ID 3602</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 793</refcontent>
</reference>
<reference anchor="Err4772" quote-title="false" target="https://www.rfc-
editor.org/errata/eid4772">
<front>
<title>Erratum ID 4772</title>
<author>
<organization>RFC Errata</organization>
</author>
</front>
<refcontent>RFC 5961</refcontent>
</reference>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.5927.xml"/>
</references>
</references> </references>
<section title="Other Implementation Notes"> <section numbered="true" toc="default">
<t> <name>Other Implementation Notes</name>
<t>
This section includes additional notes and references on TCP implementation deci sions that are currently not a part of the RFC series or included within the TCP standard. These items can be considered by implementers, but there was not yet a consensus to include them in the standard. This section includes additional notes and references on TCP implementation deci sions that are currently not a part of the RFC series or included within the TCP standard. These items can be considered by implementers, but there was not yet a consensus to include them in the standard.
</t> </t>
<section anchor="seccomp" numbered="true" toc="default">
<section title="IP Security Compartment and Precedence" anchor="seccomp"> <name>IP Security Compartment and Precedence</name>
<t> <t>
The IPv4 specification <xref target="RFC0791"/> includes a precedence value in The IPv4 specification <xref target="RFC0791" format="default"/> includes a prec
the (now obsoleted) Type of Service field (TOS) field. It was modified in edence value in
<xref target="RFC1349"/>, and then obsoleted by the definition of the (now obsoleted) Type of Service (TOS) field. It was modified in
Differentiated Services (DiffServ) <xref target="RFC2474"/>. Setting and <xref target="RFC1349" format="default"/> and then obsoleted by the definition o
conveying TOS between the network layer, TCP implementation, and applications is f
obsolete, Differentiated Services (Diffserv) <xref target="RFC2474" format="default"/>. S
and replaced by DiffServ in the current TCP specification. etting and
conveying TOS between the network layer, TCP implementation, and applications is
obsolete
and is replaced by Diffserv in the current TCP specification.
</t> </t>
<t> <t>
RFC 793 required checking the IP security compartment and precedence on RFC 793 required checking the IP security compartment and precedence on
incoming TCP segments for consistency within a connection, and with incoming TCP segments for consistency within a connection and with
application requests. Each of these aspects of IP have become outdated, application requests. Each of these aspects of IP have become outdated,
without specific updates to RFC 793. The issues with precedence were without specific updates to RFC 793. The issues with precedence were
fixed by <xref target="RFC2873"/>, which is Standards Track, and so this fixed by <xref target="RFC2873" format="default"/>, which is Standards Track, an d so this
present TCP specification includes those changes. However, the state of present TCP specification includes those changes. However, the state of
IP security options that may be used by MLS systems is not as apparent in IP security options that may be used by Multi-Level Secure (MLS) systems is not as apparent in
the IETF currently. the IETF currently.
</t> </t>
<t> <t>
Resetting connections when incoming packets do not meet expected security Resetting connections when incoming packets do not meet expected security
compartment or precedence expectations has been recognized as a possible compartment or precedence expectations has been recognized as a possible
attack vector <xref target="I-D.gont-tcpm-tcp-seccomp-prec"/>, and there has attack vector <xref target="I-D.gont-tcpm-tcp-seccomp-prec" format="default"/>, and there has
been discussion about amending the TCP specification to prevent connections been discussion about amending the TCP specification to prevent connections
from being aborted due to non-matching IP security compartment and DiffServ from being aborted due to nonmatching IP security compartment and Diffserv
codepoint values. codepoint values.
</t> </t>
<section title="Precedence"> <section numbered="true" toc="default">
<t> <name>Precedence</name>
<t>
In DiffServ the former precedence values are treated as Class Selector In Diffserv, the former precedence values are treated as Class Selector
codepoints, and methods for compatible treatment are described in the DiffServ codepoints, and methods for compatible treatment are described in the Diffserv
architecture. The RFC 793/1122 TCP specification includes logic intending to architecture. The RFC TCP specification defined by RFCs 793 and 1122 included l
ogic intending to
have connections use the highest precedence requested by either endpoint have connections use the highest precedence requested by either endpoint
application, and to keep the precedence consistent throughout a connection. application, and to keep the precedence consistent throughout a connection.
This logic from the obsolete TOS is not applicable for DiffServ, and should This logic from the obsolete TOS is not applicable to Diffserv and should
not be included in TCP implementations, though changes to DiffServ values not be included in TCP implementations, though changes to Diffserv values
within a connection are discouraged. For discussion of this, see RFC 7657 (sec within a connection are discouraged. For discussion of this, see RFC 7657 (Sect
5.1, 5.3, and 6) <xref target="RFC7657"/>. ions <xref target="RFC7657" section="5.1" sectionFormat="bare" format="default"/
>, <xref target="RFC7657" section="5.3" sectionFormat="bare" format="default"/>,
and <xref target="RFC7657" section="6" sectionFormat="bare" format="default"/>)
<xref target="RFC7657" format="default"/>.
</t> </t>
<t> <t>
The obsoleted TOS processing rules in TCP assumed bidirectional (or symmetric) p recedence values The obsoleted TOS processing rules in TCP assumed bidirectional (or symmetric) p recedence values
used on a connection, but the DiffServ architecture is asymmetric. used on a connection, but the Diffserv architecture is asymmetric.
Problems with the old TCP logic in this regard were described in <xref Problems with the old TCP logic in this regard were described in <xref target="R
target="RFC2873"/> and the solution described is to ignore IP precedence in FC2873" format="default"/>, and the solution described is to ignore IP precedenc
e in
TCP. Since RFC 2873 is a Standards Track document (although not marked as TCP. Since RFC 2873 is a Standards Track document (although not marked as
updating RFC 793), current implementations are expected to be robust to these updating RFC 793), current implementations are expected to be robust in these
conditions. Note that the DiffServ field value used in each direction is a conditions. Note that the Diffserv field value used in each direction is a
part of the interface between TCP and the network layer, and values in use can b e part of the interface between TCP and the network layer, and values in use can b e
indicated both ways between TCP and the application. indicated both ways between TCP and the application.
</t> </t>
</section> </section>
<section title="MLS Systems"> <section numbered="true" toc="default">
<t> <name>MLS Systems</name>
<t>
The IP security option (IPSO) and compartment defined in <xref The IP Security Option (IPSO) and compartment defined in <xref target="RFC0791
target="RFC0791"/> was refined in RFC 1038 that was later obsoleted by RFC " format="default"/> was refined in RFC 1038, which was later obsoleted by RFC
1108. The Commercial IP Security Option (CIPSO) is defined in FIPS-188 (withd 1108. The Commercial IP Security Option (CIPSO) is defined in FIPS-188 (withd
rawn by NIST in 2015), and rawn by NIST in 2015) and
is supported by some vendors and operating systems. RFC 1108 is now is supported by some vendors and operating systems. RFC 1108 is now
Historic, though RFC 791 itself has not been updated to remove the IP Historic, though RFC 791 itself has not been updated to remove the IP
security option. For IPv6, a similar option (CALIPSO) has been defined <xref Security Option. For IPv6, a similar option (Common Architecture Label IPv6 S
target="RFC5570"/>. RFC 793 includes logic that includes the IP ecurity Option (CALIPSO)) has been defined <xref target="RFC5570" format="defaul
t"/>. RFC 793 includes logic that includes the IP
security/compartment information in treatment of TCP segments. References to security/compartment information in treatment of TCP segments. References to
the IP &quot;security/compartment&quot; in this document may be relevant for the IP "security/compartment" in this document may be relevant for
Multi-Level Secure (MLS) system implementers, but can be ignored for non-MLS Multi-Level Secure (MLS) system implementers but can be ignored for non-MLS
implementations, consistent with running code on the Internet. See <xref implementations, consistent with running code on the Internet. See <xref targ
target="seccomp"/> for further discussion. Note that RFC 5570 describes some et="seccomp" format="default"/> for further discussion. Note that RFC 5570 desc
ribes some
MLS networking scenarios where IPSO, CIPSO, or CALIPSO may be used. In these MLS networking scenarios where IPSO, CIPSO, or CALIPSO may be used. In these
special cases, TCP implementers should see section 7.3.1 of RFC 5570, and special cases, TCP implementers should see Section <xref target="RFC5570" sect ion="7.3.1" sectionFormat="bare" format="default"/> of RFC 5570 and
follow the guidance in that document. follow the guidance in that document.
</t> </t>
</section> </section>
</section> </section>
<section title="Sequence Number Validation" anchor="seqval"> <section anchor="seqval" numbered="true" toc="default">
<t> <name>Sequence Number Validation</name>
<t>
There are cases where the TCP sequence number validation rules can prevent ACK f ields from being processed. This can result in connection issues, as described in There are cases where the TCP sequence number validation rules can prevent ACK f ields from being processed. This can result in connection issues, as described in
<xref target="I-D.gont-tcpm-tcp-seq-validation"/>, which includes descripti ons of potential problems in conditions of simultaneous open, self-connects, sim ultaneous close, and simultaneous window probes. The document also describes po tential changes to the TCP specification to mitigate the issue by expanding the acceptable sequence numbers. <xref target="I-D.gont-tcpm-tcp-seq-validation" format="default"/>, which i ncludes descriptions of potential problems in conditions of simultaneous open, s elf-connects, simultaneous close, and simultaneous window probes. The document also describes potential changes to the TCP specification to mitigate the issue by expanding the acceptable sequence numbers.
</t> </t>
<t> <t>
In Internet usage of TCP, these conditions are rarely occurring. Common operati In Internet usage of TCP, these conditions rarely occur. Common operating syste
ng systems include different alternative mitigations, and the standard has not b ms include different alternative mitigations, and the standard has not been upda
een updated yet to codify one of them, but implementers should consider the prob ted yet to codify one of them, but implementers should consider the problems des
lems described in <xref target="I-D.gont-tcpm-tcp-seq-validation"/>. cribed in <xref target="I-D.gont-tcpm-tcp-seq-validation" format="default"/>.
</t> </t>
</section> </section>
<section title="Nagle Modification" anchor="minshall"> <section anchor="minshall" numbered="true" toc="default">
<t>In common operating systems, both the Nagle algorithm and delayed acknowledge <name>Nagle Modification</name>
ments are implemented and enabled by default. TCP is used by many applications <t>In common operating systems, both the Nagle algorithm and delayed ack
that have a request-response style of communication, where the combination of th nowledgments are implemented and enabled by default. TCP is used by many applic
e Nagle algorithm and delayed acknowledgements can result in poor application pe ations that have a request-response style of communication, where the combinatio
rformance. A modification to the Nagle algorithm is described in <xref target=" n of the Nagle algorithm and delayed acknowledgments can result in poor applicat
draft-minshall-nagle"/> that improves the situation for these applications. ion performance. A modification to the Nagle algorithm is described in <xref ta
rget="I-D.minshall-nagle" format="default"/> that improves the situation for the
se applications.
</t> </t>
<t>This modification is implemented in some common operating systems, and does n <t>This modification is implemented in some common operating systems and
ot impact TCP interoperability. Additionally, many applications simply disable does not impact TCP interoperability. Additionally, many applications simply d
Nagle, since this is generally supported by a socket option. The TCP standard h isable Nagle since this is generally supported by a socket option. The TCP stan
as not been updated to include this Nagle modification, but implementers may fin dard has not been updated to include this Nagle modification, but implementers m
d it beneficial to consider.</t> ay find it beneficial to consider.</t>
</section> </section>
<section title="Low Watermark Settings"> <section numbered="true" toc="default">
<t>Some operating system kernel TCP implementations include socket options <name>Low Watermark Settings</name>
that allow specifying the number of bytes in the buffer until the socket layer w <t>Some operating system kernel TCP implementations include socket optio
ill pass sent data to TCP (SO_SNDLOWAT) or to the application on receiving (SO_R ns that allow specifying the number of bytes in the buffer until the socket laye
CVLOWAT).</t> r will pass sent data to TCP (SO_SNDLOWAT) or to the application on receiving (S
<t>In addition, another socket option (TCP_NOTSENT_LOWAT) can be used to co O_RCVLOWAT).</t>
ntrol the amount of unsent bytes in the write queue. This can help a sending TCP <t>In addition, another socket option (TCP_NOTSENT_LOWAT) can be used to
application to avoid creating large amounts of buffered data (and corresponding control the amount of unsent bytes in the write queue. This can help a sending
latency). As an example, this may be useful for applications that are multiplex TCP application to avoid creating large amounts of buffered data (and correspond
ing data from multiple upper level streams onto a connection, especially when st ing latency). As an example, this may be useful for applications that are multip
reams may be a mix of interactive / real-time and bulk data transfer.</t> lexing data from multiple upper-level streams onto a connection, especially when
</section> streams may be a mix of interactive/real-time and bulk data transfer.</t>
</section>
</section> </section>
<section anchor="reqs" numbered="true" toc="default">
<name>TCP Requirement Summary</name>
<t>This section is adapted from RFC 1122.</t>
<t>Note that there is no requirement related to PLPMTUD in this list, but
that PLPMTUD is recommended.</t>
<section title="TCP Requirement Summary" anchor="reqs"> <table anchor="tcp-req-summary">
<t>This section is adapted from RFC 1122.</t> <name>TCP Requirements Summary</name>
<t>Note that there is no requirement related to PLPMTUD in this list, bu <thead>
t that PLPMTUD is recommended.</t> <tr>
<figure> <th align="center">Feature</th>
<artwork> <th align="center">ReqID</th>
<th align="center"><bcp14>MUST</bcp14></th>
| | | | |S| | <th align="center"><bcp14>SHOULD</bcp14></th>
| | | | |H| |F <th align="center"><bcp14>MAY</bcp14></th>
| | | | |O|M|o <th align="center"><bcp14>SHOULD NOT</bcp14></th>
| | |S| |U|U|o <th align="center"><bcp14>MUST NOT</bcp14></th>
| | |H| |L|S|t </tr>
| |M|O| |D|T|n </thead>
| |U|U|M| | |o <tbody>
| |S|L|A|N|N|t <tr>
| |T|D|Y|O|O|t <th colspan="7">PUSH flag</th>
FEATURE | ReqID | | | |T|T|e </tr>
| | | | | | | <tr>
Push flag | | | | | | | <td>Aggregate or queue un-pushed data</td>
Aggregate or queue un-pushed data | MAY-16 | | |x| | | <td>MAY-16</td>
Sender collapse successive PSH flags | SHLD-27| |x| | | | <td>&nbsp;</td>
SEND call can specify PUSH | MAY-15 | | |x| | | <td>&nbsp;</td>
If cannot: sender buffer indefinitely | MUST-60| | | | |x| <td align="center">X</td>
If cannot: PSH last segment | MUST-61|x| | | | | <td>&nbsp;</td>
Notify receiving ALP of PSH | MAY-17 | | |x| | |1 <td>&nbsp;</td>
Send max size segment when possible | SHLD-28| |x| | | | </tr>
| | | | | | | <tr>
Window | | | | | | | <td>Sender collapse successive PSH bits</td>
Treat as unsigned number | MUST-1 |x| | | | | <td>SHLD-27</td>
Handle as 32-bit number | REC-1 | |x| | | | <td>&nbsp;</td>
Shrink window from right | SHLD-14| | | |x| | <td align="center">X</td>
- Send new data when window shrinks | SHLD-15| | | |x| | <td>&nbsp;</td>
- Retransmit old unacked data within window | SHLD-16| |x| | | | <td>&nbsp;</td>
- Time out conn for data past right edge | SHLD-17| | | |x| | <td>&nbsp;</td>
Robust against shrinking window | MUST-34|x| | | | | </tr>
Receiver's window closed indefinitely | MAY-8 | | |x| | | <tr>
Use standard probing logic | MUST-35|x| | | | | <td>SEND call can specify PUSH</td>
Sender probe zero window | MUST-36|x| | | | | <td>MAY-15</td>
First probe after RTO | SHLD-29| |x| | | | <td>&nbsp;</td>
Exponential backoff | SHLD-30| |x| | | | <td>&nbsp;</td>
Allow window stay zero indefinitely | MUST-37|x| | | | | <td align="center">X</td>
Retransmit old data beyond SND.UNA+SND.WND | MAY-7 | | |x| | | <td>&nbsp;</td>
Process RST and URG even with zero window | MUST-66|x| | | | | <td>&nbsp;</td>
| | | | | | | </tr>
Urgent Data | | | | | | | <tr>
Include support for urgent pointer | MUST-30|x| | | | | <td><ul><li>If cannot: sender buffer indefinitely</li></ul></td>
Pointer indicates first non-urgent octet | MUST-62|x| | | | | <td>MUST-60</td>
Arbitrary length urgent data sequence | MUST-31|x| | | | | <td>&nbsp;</td>
Inform ALP asynchronously of urgent data | MUST-32|x| | | | |1 <td>&nbsp;</td>
ALP can learn if/how much urgent data Q'd | MUST-33|x| | | | |1 <td>&nbsp;</td>
ALP employ the urgent mechanism | SHLD-13| | | |x| | <td>&nbsp;</td>
| | | | | | | <td align="center">X</td>
TCP Options | | | | | | | </tr>
Support the mandatory option set | MUST-4 |x| | | | | <tr>
Receive TCP option in any segment | MUST-5 |x| | | | | <td><ul><li>If cannot: PSH last segment</li></ul></td>
Ignore unsupported options | MUST-6 |x| | | | | <td>MUST-61</td>
Include length for all options except EOL+NOP | MUST-68|x| | | | | <td align="center">X</td>
Cope with illegal option length | MUST-7 |x| | | | | <td>&nbsp;</td>
Process options regardless of word alignment | MUST-64|x| | | | | <td>&nbsp;</td>
Implement sending &amp; receiving MSS option | MUST-14|x| | | | | <td>&nbsp;</td>
IPv4 Send MSS option unless 536 | SHLD-5 | |x| | | | <td>&nbsp;</td>
IPv6 Send MSS option unless 1220 | SHLD-5 | |x| | | | </tr>
Send MSS option always | MAY-3 | | |x| | | <tr>
IPv4 Send-MSS default is 536 | MUST-15|x| | | | | <td>Notify receiving ALP<sup>1</sup> of PSH</td>
IPv6 Send-MSS default is 1220 | MUST-15|x| | | | | <td>MAY-17</td>
Calculate effective send seg size | MUST-16|x| | | | | <td>&nbsp;</td>
MSS accounts for varying MTU | SHLD-6 | |x| | | | <td>&nbsp;</td>
MSS not sent on non-SYN segments | MUST-65| | | | |x| <td align="center">X</td>
MSS value based on MMS_R | MUST-67|x| | | | | <td>&nbsp;</td>
Pad with zero | MUST-69|x| | | | | <td>&nbsp;</td>
| | | | | | | </tr>
TCP Checksums | | | | | | | <tr>
Sender compute checksum | MUST-2 |x| | | | | <td>Send max size segment when possible</td>
Receiver check checksum | MUST-3 |x| | | | | <td>SHLD-28</td>
| | | | | | | <td>&nbsp;</td>
ISN Selection | | | | | | | <td align="center">X</td>
Include a clock-driven ISN generator component | MUST-8 |x| | | | | <td>&nbsp;</td>
Secure ISN generator with a PRF component | SHLD-1 | |x| | | | <td>&nbsp;</td>
PRF computable from outside the host | MUST-9 | | | | |x| <td>&nbsp;</td>
| | | | | | | </tr>
Opening Connections | | | | | | | <tr>
Support simultaneous open attempts | MUST-10|x| | | | | <th colspan="7">Window</th>
SYN-RECEIVED remembers last state | MUST-11|x| | | | | </tr>
Passive Open call interfere with others | MUST-41| | | | |x| <tr>
Function: simultan. LISTENs for same port | MUST-42|x| | | | | <td>Treat as unsigned number</td>
Ask IP for src address for SYN if necc. | MUST-44|x| | | | | <td>MUST-1</td>
Otherwise, use local addr of conn. | MUST-45|x| | | | | <td align="center">X</td>
OPEN to broadcast/multicast IP Address | MUST-46| | | | |x| <td>&nbsp;</td>
Silently discard seg to bcast/mcast addr | MUST-57|x| | | | | <td>&nbsp;</td>
| | | | | | | <td>&nbsp;</td>
Closing Connections | | | | | | | <td>&nbsp;</td>
RST can contain data | SHLD-2 | |x| | | | </tr>
Inform application of aborted conn | MUST-12|x| | | | | <tr>
Half-duplex close connections | MAY-1 | | |x| | | <td>Handle as 32-bit number</td>
Send RST to indicate data lost | SHLD-3 | |x| | | | <td>REC-1</td>
In TIME-WAIT state for 2MSL seconds | MUST-13|x| | | | | <td>&nbsp;</td>
Accept SYN from TIME-WAIT state | MAY-2 | | |x| | | <td align="center">X</td>
Use Timestamps to reduce TIME-WAIT | SHLD-4 | |x| | | | <td>&nbsp;</td>
| | | | | | | <td>&nbsp;</td>
Retransmissions | | | | | | | <td>&nbsp;</td>
Implement exponential backoff, slow start, and | MUST-19|x| | | | | </tr>
congestion avoidance | | | | | | | <tr>
Retransmit with same IP ident | MAY-4 | | |x| | | <td>Shrink window from right</td>
Karn's algorithm | MUST-18|x| | | | | <td>SHLD-14</td>
| | | | | | | <td>&nbsp;</td>
Generating ACKs: | | | | | | | <td>&nbsp;</td>
Aggregate whenever possible | MUST-58|x| | | | | <td>&nbsp;</td>
Queue out-of-order segments | SHLD-31| |x| | | | <td align="center">X</td>
Process all Q'd before send ACK | MUST-59|x| | | | | <td>&nbsp;</td>
Send ACK for out-of-order segment | MAY-13 | | |x| | | </tr>
Delayed ACKs | SHLD-18| |x| | | | <tr>
Delay &lt; 0.5 seconds | MUST-40|x| | | | | <td><ul><li>Send new data when window shrinks</li></ul></td>
Every 2nd full-sized segment or 2*RMSS ACK'd | SHLD-19| |x| | | | <td>SHLD-15</td>
Receiver SWS-Avoidance Algorithm | MUST-39|x| | | | | <td>&nbsp;</td>
| | | | | | | <td>&nbsp;</td>
Sending data | | | | | | | <td>&nbsp;</td>
Configurable TTL | MUST-49|x| | | | | <td align="center">X</td>
Sender SWS-Avoidance Algorithm | MUST-38|x| | | | | <td>&nbsp;</td>
Nagle algorithm | SHLD-7 | |x| | | | </tr>
Application can disable Nagle algorithm | MUST-17|x| | | | | <tr>
| | | | | | | <td><ul><li>Retransmit old unacked data within window</li></ul></td>
Connection Failures: | | | | | | | <td>SHLD-16</td>
Negative advice to IP on R1 retxs | MUST-20|x| | | | | <td>&nbsp;</td>
Close connection on R2 retxs | MUST-20|x| | | | | <td align="center">X</td>
ALP can set R2 | MUST-21|x| | | | |1 <td>&nbsp;</td>
Inform ALP of R1&lt;=retxs&lt;R2 | SHLD-9 | |x| | | |1 <td>&nbsp;</td>
Recommended value for R1 | SHLD-10| |x| | | | <td>&nbsp;</td>
Recommended value for R2 | SHLD-11| |x| | | | </tr>
Same mechanism for SYNs | MUST-22|x| | | | | <tr>
R2 at least 3 minutes for SYN | MUST-23|x| | | | | <td><ul><li>Time out conn for data past right edge</li></ul></td>
| | | | | | | <td>SHLD-17</td>
Send Keep-alive Packets: | MAY-5 | | |x| | | <td>&nbsp;</td>
- Application can request | MUST-24|x| | | | | <td>&nbsp;</td>
- Default is "off" | MUST-25|x| | | | | <td>&nbsp;</td>
- Only send if idle for interval | MUST-26|x| | | | | <td align="center">X</td>
- Interval configurable | MUST-27|x| | | | | <td>&nbsp;</td>
- Default at least 2 hrs. | MUST-28|x| | | | | </tr>
- Tolerant of lost ACKs | MUST-29|x| | | | | <tr>
- Send with no data | SHLD-12| |x| | | | <td>Robust against shrinking window</td>
- Configurable to send garbage octet | MAY-6 | | |x| | | <td>MUST-34</td>
| | | | | | | <td align="center">X</td>
IP Options | | | | | | | <td>&nbsp;</td>
Ignore options TCP doesn't understand | MUST-50|x| | | | | <td>&nbsp;</td>
Time Stamp support | MAY-10 | | |x| | | <td>&nbsp;</td>
Record Route support | MAY-11 | | |x| | | <td>&nbsp;</td>
Source Route: | | | | | | | </tr>
ALP can specify | MUST-51|x| | | | |1 <tr>
Overrides src rt in datagram | MUST-52|x| | | | | <td>Receiver's window closed indefinitely</td>
Build return route from src rt | MUST-53|x| | | | | <td>MAY-8</td>
Later src route overrides | SHLD-24| |x| | | | <td>&nbsp;</td>
| | | | | | | <td>&nbsp;</td>
Receiving ICMP Messages from IP | MUST-54|x| | | | | <td align="center">X</td>
Dest. Unreach (0,1,5) =&gt; inform ALP | SHLD-25| |x| | | | <td>&nbsp;</td>
Abort on Dest. Unreach (0,1,5) =&gt;nn | MUST-56| | | | |x| <td>&nbsp;</td>
Dest. Unreach (2-4) =&gt; abort conn | SHLD-26| |x| | | | </tr>
Source Quench =&gt; silent discard | MUST-55|x| | | | | <tr>
Abort on Time Exceeded =&gt; | MUST-56| | | | |x| <td>Use standard probing logic</td>
Abort on Param Problem =&gt; | MUST-56| | | | |x| <td>MUST-35</td>
| | | | | | | <td align="center">X</td>
Address Validation | | | | | | | <td>&nbsp;</td>
Reject OPEN call to invalid IP address | MUST-46|x| | | | | <td>&nbsp;</td>
Reject SYN from invalid IP address | MUST-63|x| | | | | <td>&nbsp;</td>
Silently discard SYN to bcast/mcast addr | MUST-57|x| | | | | <td>&nbsp;</td>
| | | | | | | </tr>
TCP/ALP Interface Services | | | | | | | <tr>
Error Report mechanism | MUST-47|x| | | | | <td>Sender probe zero window</td>
ALP can disable Error Report Routine | SHLD-20| |x| | | | <td>MUST-36</td>
ALP can specify DiffServ field for sending | MUST-48|x| | | | | <td align="center">X</td>
Passed unchanged to IP | SHLD-22| |x| | | | <td>&nbsp;</td>
ALP can change DiffServ field during connection| SHLD-21| |x| | | | <td>&nbsp;</td>
ALP generally changing DiffServ during conn. | SHLD-23| | | |x| | <td>&nbsp;</td>
Pass received DiffServ field up to ALP | MAY-9 | | |x| | | <td>&nbsp;</td>
FLUSH call | MAY-14 | | |x| | | </tr>
Optional local IP addr parm. in OPEN | MUST-43|x| | | | | <tr>
| | | | | | | <td><ul><li>First probe after RTO</li></ul></td>
RFC 5961 Support: | | | | | | | <td>SHLD-29</td>
Implement data injection protection | MAY-12 | | |x| | | <td>&nbsp;</td>
| | | | | | | <td align="center">X</td>
Explicit Congestion Notification: | | | | | | | <td>&nbsp;</td>
Support ECN | SHLD-8 | |x| | | | <td>&nbsp;</td>
| | | | | | | <td>&nbsp;</td>
Alternative Congestion Control: | | | | | | | </tr>
Implement alternative conformant algorithm(s) | MAY-18 | | |x| | | <tr>
<td><ul><li>Exponential backoff</li></ul></td>
</artwork></figure> <td>SHLD-30</td>
<t> <td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Allow window stay zero indefinitely</td>
<td>MUST-37</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Retransmit old data beyond SND.UNA+SND.WND</td>
<td>MAY-7</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Process RST and URG even with zero window</td>
<td>MUST-66</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="7">Urgent Data</th>
</tr>
<tr>
<td>Include support for urgent pointer</td>
<td>MUST-30</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Pointer indicates first non-urgent octet</td>
<td>MUST-62</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Arbitrary length urgent data sequence</td>
<td>MUST-31</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Inform ALP<sup>1</sup> asynchronously of urgent data </td>
<td>MUST-32</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>ALP<sup>1</sup> can learn if/how much urgent data Q'd</td>
<td>MUST-33</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>ALP employ the urgent mechanism</td>
<td>SHLD-13</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="7">TCP Options</th>
</tr>
<tr>
<td>Support the mandatory option set</td>
<td>MUST-4</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Receive TCP Option in any segment</td>
<td>MUST-5</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Ignore unsupported options</td>
<td>MUST-6</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Include length for all options except EOL+NOP</td>
<td>MUST-68</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Cope with illegal option length</td>
<td>MUST-7</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Process options regardless of word alignment</td>
<td>MUST-64</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Implement sending &amp; receiving MSS Option</td>
<td>MUST-14</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>IPv4 Send MSS Option unless 536</td>
<td>SHLD-5</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>IPv6 Send MSS Option unless 1220</td>
<td>SHLD-5</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Send MSS Option always</td>
<td>MAY-3</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>IPv4 Send-MSS default is 536</td>
<td>MUST-15</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>IPv6 Send-MSS default is 1220</td>
<td>MUST-15</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Calculate effective send seg size</td>
<td>MUST-16</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>MSS accounts for varying MTU</td>
<td>SHLD-6</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>MSS not sent on non-SYN segments</td>
<td>MUST-65</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
</tr>
<tr>
<td>MSS value based on MMS_R</td>
<td>MUST-67</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Pad with zero</td>
<td>MUST-69</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="7">TCP Checksums</th>
</tr>
<tr>
<td>Sender compute checksum</td>
<td>MUST-2</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Receiver check checksum</td>
<td>MUST-3</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="7">ISN Selection</th>
</tr>
<tr>
<td>Include a clock-driven ISN generator component</td>
<td>MUST-8</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Secure ISN generator with a PRF component</td>
<td>SHLD-1</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>PRF computable from outside the host</td>
<td>MUST-9</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
</tr>
<tr>
<th colspan="7">Opening Connections</th>
</tr>
<tr>
<td>Support simultaneous open attempts</td>
<td>MUST-10</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>SYN-RECEIVED remembers last state</td>
<td>MUST-11</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Passive OPEN call interfere with others</td>
<td>MUST-41</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
</tr>
<tr>
<td>Function: simultaneously LISTENs for same port</td>
<td>MUST-42</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Ask IP for src address for SYN if necessary</td>
<td>MUST-44</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Otherwise, use local addr of connection</li></ul></td>
<td>MUST-45</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>OPEN to broadcast/multicast IP address</td>
<td>MUST-46</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
</tr>
<tr>
<td>Silently discard seg to bcast/mcast addr</td>
<td>MUST-57</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="7">Closing Connections</th>
</tr>
<tr>
<td>RST can contain data</td>
<td>SHLD-2</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Inform application of aborted conn</td>
<td>MUST-12</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Half-duplex close connections</td>
<td>MAY-1</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Send RST to indicate data lost</li></ul></td>
<td>SHLD-3</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>In TIME-WAIT state for 2MSL seconds</td>
<td>MUST-13</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Accept SYN from TIME-WAIT state</li></ul></td>
<td>MAY-2</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Use Timestamps to reduce TIME-WAIT</li></ul></td>
<td>SHLD-4</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="7">Retransmissions</th>
</tr>
<tr>
<td>Implement exponential backoff, slow start, and congestion avoidance</t
d>
<td>MUST-19</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Retransmit with same IP identity</td>
<td>MAY-4</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Karn's algorithm</td>
<td>MUST-18</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="7">Generating ACKs</th>
</tr>
<tr>
<td>Aggregate whenever possible</td>
<td>MUST-58</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Queue out-of-order segments</td>
<td>SHLD-31</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Process all Q'd before send ACK</td>
<td>MUST-59</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Send ACK for out-of-order segment</td>
<td>MAY-13</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Delayed ACKs</td>
<td>SHLD-18</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Delay &lt; 0.5 seconds</li></ul></td>
<td>MUST-40</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Every 2nd full-sized segment or 2*RMSS ACK'd</li></ul></td>
<td>SHLD-19</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Receiver SWS-Avoidance Algorithm</td>
<td>MUST-39</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="7">Sending Data</th>
</tr>
<tr>
<td>Configurable TTL</td>
<td>MUST-49</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Sender SWS-Avoidance Algorithm </td>
<td>MUST-38</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Nagle algorithm</td>
<td>SHLD-7</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Application can disable Nagle algorithm</li></ul></td>
<td>MUST-17</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="7">Connection Failures</th>
</tr>
<tr>
<td>Negative advice to IP on R1 retransmissions</td>
<td>MUST-20</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Close connection on R2 retransmissions</td>
<td>MUST-20</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>ALP<sup>1</sup> can set R2</td>
<td>MUST-21</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Inform ALP of R1&lt;=retxs&lt;R2 </td>
<td>SHLD-9</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Recommended value for R1</td>
<td>SHLD-10</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Recommended value for R2</td>
<td>SHLD-11</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Same mechanism for SYNs</td>
<td>MUST-22</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>R2 at least 3 minutes for SYN</li></ul></td>
<td>MUST-23</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="7">Send Keep-alive Packets</th>
</tr>
<tr>
<td>Send Keep-alive Packets:</td>
<td>MAY-5</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Application can request</li></ul></td>
<td>MUST-24</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Default is "off"</li></ul></td>
<td>MUST-25</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Only send if idle for interval</li></ul></td>
<td>MUST-26</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Interval configurable</li></ul></td>
<td>MUST-27</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Default at least 2 hrs.</li></ul></td>
<td>MUST-28</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Tolerant of lost ACKs</li></ul></td>
<td>MUST-29</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Send with no data</li></ul></td>
<td>SHLD-12</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Configurable to send garbage octet</li></ul></td>
<td>MAY-6</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="7">IP Options</th>
</tr>
<tr>
<td>Ignore options TCP doesn't understand</td>
<td>MUST-50</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Timestamp support</td>
<td>MAY-10</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Record Route support</td>
<td>MAY-11</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Source Route:</td>
<td></td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>ALP<sup>1</sup> can specify</li></ul></td>
<td>MUST-51</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul indent="6"><li>Overrides src route in datagram</li></ul></td>
<td>MUST-52</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Build return route from src route</li></ul></td>
<td>MUST-53</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Later src route overrides</li></ul></td>
<td>SHLD-24</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="7">Receiving ICMP Messages from IP</th>
</tr>
<tr>
<td>Receiving ICMP messages from IP</td>
<td>MUST-54</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Dest Unreach (0,1,5) =&gt; inform ALP</li></ul></td>
<td>SHLD-25</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Abort on Dest Unreach (0,1,5)</li></ul></td>
<td>MUST-56</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
</tr>
<tr>
<td><ul><li>Dest Unreach (2-4) =&gt; abort conn</li></ul></td>
<td>SHLD-26</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Source Quench =&gt; silent discard</li></ul></td>
<td>MUST-55</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Abort on Time Exceeded</li></ul></td>
<td>MUST-56</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
</tr>
<tr>
<td><ul><li>Abort on Param Problem</li></ul></td>
<td>MUST-56</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
</tr>
<tr>
<th colspan="7">Address Validation</th>
</tr>
<tr>
<td>Reject OPEN call to invalid IP address</td>
<td>MUST-46</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Reject SYN from invalid IP address</td>
<td>MUST-63</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Silently discard SYN to bcast/mcast addr</td>
<td>MUST-57</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="7">TCP/ALP Interface Services</th>
</tr>
<tr>
<td>Error Report mechanism</td>
<td>MUST-47</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>ALP can disable Error Report Routine</td>
<td>SHLD-20</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>ALP can specify Diffserv field for sending</td>
<td>MUST-48</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td><ul><li>Passed unchanged to IP</li></ul></td>
<td>SHLD-22</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>ALP can change Diffserv field during connection</td>
<td>SHLD-21</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>ALP generally changing Diffserv during conn.</td>
<td>SHLD-23</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Pass received Diffserv field up to ALP</td>
<td>MAY-9</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>FLUSH call</td>
<td>MAY-14</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<td>Optional local IP addr param in OPEN</td>
<td>MUST-43</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="7">RFC 5961 Support</th>
</tr>
<tr>
<td>Implement data injection protection</td>
<td>MAY-12</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="7">Explicit Congestion Notification</th>
</tr>
<tr>
<td>Support ECN</td>
<td>SHLD-8</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr>
<th colspan="7">Alternative Congestion Control</th>
</tr>
<tr>
<td>Implement alternative conformant algorithm(s)</td>
<td>MAY-18</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td align="center">X</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
</tbody>
</table>
<t>
FOOTNOTES: FOOTNOTES:
(1) "ALP" means Application-Layer Program. (1) "ALP" means Application-Layer Program.
</t> </t>
</section> </section>
</back> <section numbered="false" toc="default">
<name>Acknowledgments</name>
<t>
This document is largely a revision of RFC 793, of which <contact fullname="
Jon Postel"/> was the editor. Due to his excellent work, it was able to last fo
r three decades before we felt the need to revise it.
</t>
<t>
<contact fullname="Andre Oppermann"/> was a contributor and helped to edit th
e first revision of this document.
</t>
<t>
We are thankful for the assistance of the IETF TCPM working group chairs ove
r the course of work on this document:
</t>
<contact fullname="Michael Scharf"/>
<contact fullname="Yoshifumi Nishida"/>
<contact fullname="Pasi Sarolahti"/>
<contact fullname="Michael Tüxen"/>
<t>
During the discussions of this work on the TCPM mailing list, in
working group meetings, and via area reviews, helpful comments,
critiques, and reviews were received from (listed alphabetically
by last name): <contact fullname="Praveen Balasubramanian"/>, <contact fulln
ame="David Borman"/>, <contact fullname="Mohamed
Boucadair"/>, <contact fullname="Bob Briscoe"/>, <contact fullname="Neal Car
dwell"/>, <contact fullname="Yuchung Cheng"/>, <contact fullname="Martin Duke"/>
,
<contact fullname="Francis Dupont"/>, <contact fullname="Ted Faber"/>, <cont
act fullname="Gorry Fairhurst"/>, <contact fullname="Fernando Gont"/>, <contact
fullname="Rodney
Grimes"/>, <contact fullname="Yi Huang"/>, <contact fullname="Rahul Jadhav"/
>, <contact fullname="Markku Kojo"/>, <contact fullname="Mike Kosek"/>, <contact
fullname="Juhamatti
Kuusisaari"/>, <contact fullname="Kevin Lahey"/>, <contact fullname="Kevin M
ason"/>, <contact fullname="Matt Mathis"/>, <contact fullname="Stephen
McQuistin"/>, <contact fullname="Jonathan Morton"/>, <contact fullname="Matt
Olson"/>, <contact fullname="Tommy Pauly"/>, <contact fullname="Tom Petch"/>,
<contact fullname="Hagen Paul Pfeifer"/>, <contact fullname="Kyle Rose"/>, <c
ontact fullname="Anthony Sabatini"/>, <contact fullname="Michael Scharf"/>,
<contact fullname="Greg Skinner"/>, <contact fullname="Joe Touch"/>, <contac
t fullname="Michael Tüxen"/>, <contact fullname="Reji Varghese"/>, <contact full
name="Bernie
Volz"/>, <contact fullname="Tim Wicinski"/>, <contact fullname="Lloyd Wood"/
>, and <contact fullname="Alex Zimmermann"/>.
</t>
<t>
<contact fullname="Joe Touch"/> provided additional help in clarifying the de
scription of segment size parameters and PMTUD/PLPMTUD recommendations. Markku
Kojo helped put together the text in the section on TCP Congestion Control.
</t>
<t>
This document includes content from errata that were reported by (listed chr
onologically): <contact fullname="Yin Shuming"/>, <contact fullname="Bob Braden"
/>, <contact fullname="Morris M. Keesan"/>, <contact fullname="Pei-chun Cheng"/>
, <contact fullname="Constantin Hagemeier"/>, <contact fullname="Vishwas Manral"
/>, <contact fullname="Mykyta Yevstifeyev"/>, <contact fullname="EungJun Yi"/>,
<contact fullname="Botong Huang"/>, <contact fullname="Charles Deng"/>, <contact
fullname="Merlin Buge"/>.
</t>
</section>
</back>
</rfc> </rfc>
 End of changes. 822 change blocks. 
4264 lines changed or deleted 5824 lines changed or added

This html diff was produced by rfcdiff 1.48.