From dabccadd3202513ab0bcb424e2c62c90ab23062d Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Fri, 25 Feb 2011 15:00:05 -0800 Subject: Update manual to define identifiers using UAX 31 XID_Start / XID_Continue. --- doc/rust.texi | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'doc/rust.texi') diff --git a/doc/rust.texi b/doc/rust.texi index adf84a12..740d45c6 100644 --- a/doc/rust.texi +++ b/doc/rust.texi @@ -592,10 +592,12 @@ or interrupted by ignored characters. Most tokens in Rust follow rules similar to the C family. -Most tokens (including identifiers, whitespace, keywords, operators and -structural symbols) are drawn from the ASCII-compatible range of -Unicode. String and character literals, however, may include the full range of -Unicode characters. +Most tokens (including whitespace, keywords, operators and structural symbols) +are drawn from the ASCII-compatible range of Unicode. Identifiers are drawn +from Unicode characters specified by the @code{XID_start} and +@code{XID_continue} rules given by UAX #31@footnote{Unicode Standard Annex +#31: Unicode Identifier and Pattern Syntax}. String and character literals may +include the full range of Unicode characters. @emph{TODO: formalize this section much more}. @@ -638,18 +640,22 @@ token or a syntactic extension token. Multi-line comments may be nested. @c * Ref.Lex.Ident:: Identifier tokens. @cindex Identifier token -Identifiers follow the pattern of C identifiers: they begin with a -@emph{letter} or @emph{underscore}, and continue with any combination of -@emph{letters}, @emph{decimal digits} and underscores, and must not be equal -to any keyword or reserved token. @xref{Ref.Lex.Key}. @xref{Ref.Lex.Res}. +Identifiers follow the rules given by Unicode Standard Annex #31, in the form +closed under NFKC normalization, @emph{excluding} those tokens that are +otherwise defined as keywords or reserved +tokens. @xref{Ref.Lex.Key}. @xref{Ref.Lex.Res}. -A @emph{letter} is a Unicode character in the ranges U+0061-U+007A and -U+0041-U+005A (@code{'a'}-@code{'z'} and @code{'A'}-@code{'Z'}). +That is: an identifier starts with any character having derived property +@code{XID_Start} and continues with zero or more characters having derived +property @code{XID_Continue}; and such an identifier is NFKC-normalized during +lexing, such that all subsequent comparison of identifiers is performed on the +NFKC-normalized forms. -An @dfn{underscore} is the character U+005F ('_'). +@emph{TODO: define relationship between Unicode and Rust versions}. -A @dfn{decimal digit} is a character in the range U+0030-U+0039 -(@code{'0'}-@code{'9'}). +@footnote{This identifier syntax is a superset of the identifier syntaxes of C +and Java, and is modeled on Python PEP #3131, which formed the definition of +identifiers in Python 3.0 and later.} @node Ref.Lex.Key @subsection Ref.Lex.Key -- cgit v1.2.3 From e2d36e00ce2b58272b52a5c1f999b0bcb61d066b Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Thu, 24 Feb 2011 18:39:57 -0500 Subject: Remove parens from nullary tag constructors in docs --- doc/rust.texi | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'doc/rust.texi') diff --git a/doc/rust.texi b/doc/rust.texi index 740d45c6..48a639dd 100644 --- a/doc/rust.texi +++ b/doc/rust.texi @@ -1990,22 +1990,22 @@ module system). An example of a @code{tag} item and its use: @example tag animal @{ - dog(); - cat(); + dog; + cat; @} -let animal a = dog(); -a = cat(); +let animal a = dog; +a = cat; @end example An example of a @emph{recursive} @code{tag} item and its use: @example tag list[T] @{ - nil(); + nil; cons(T, @@list[T]); @} -let list[int] a = cons(7, cons(13, nil())); +let list[int] a = cons(7, cons(13, nil)); @end example @@ -3401,9 +3401,9 @@ control enters the block. An example of a pattern @code{alt} statement: @example -type list[X] = tag(nil(), cons(X, @@list[X])); +type list[X] = tag(nil, cons(X, @@list[X])); -let list[int] x = cons(10, cons(11, nil())); +let list[int] x = cons(10, cons(11, nil)); alt (x) @{ case (cons(a, cons(b, _))) @{ -- cgit v1.2.3