diff options
| author | pravic <[email protected]> | 2016-04-29 21:16:15 +0300 |
|---|---|---|
| committer | pravic <[email protected]> | 2016-04-29 21:16:15 +0300 |
| commit | 77e9a3167b4aaadf3583a0c1d1ee0d9e63c9a000 (patch) | |
| tree | 710e445d56a1a582b8eff19b7b4b180276eae122 /librustc_unicode | |
| parent | tweak: /driver option specifies /fixed:no implicitly as well (diff) | |
| download | kmd-env-rs-77e9a3167b4aaadf3583a0c1d1ee0d9e63c9a000.tar.xz kmd-env-rs-77e9a3167b4aaadf3583a0c1d1ee0d9e63c9a000.zip | |
update libcore to 2016-04-29 nightly
Diffstat (limited to 'librustc_unicode')
| -rw-r--r-- | librustc_unicode/char.rs | 55 |
1 files changed, 38 insertions, 17 deletions
diff --git a/librustc_unicode/char.rs b/librustc_unicode/char.rs index 4d80211..863cada 100644 --- a/librustc_unicode/char.rs +++ b/librustc_unicode/char.rs @@ -29,8 +29,7 @@ #![stable(feature = "rust1", since = "1.0.0")] use core::char::CharExt as C; -use core::option::Option::{self, Some, None}; -use core::iter::Iterator; +use core::fmt; use tables::{derived_property, property, general_category, conversions}; // stable reexports @@ -739,7 +738,7 @@ impl char { } /// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s. -#[unstable(feature = "decode_utf16", reason = "recently exposed", issue = "27830")] +#[stable(feature = "decode_utf16", since = "1.9.0")] #[derive(Clone)] pub struct DecodeUtf16<I> where I: Iterator<Item = u16> @@ -748,6 +747,13 @@ pub struct DecodeUtf16<I> buf: Option<u16>, } +/// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s. +#[stable(feature = "decode_utf16", since = "1.9.0")] +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct DecodeUtf16Error { + code: u16, +} + /// Create an iterator over the UTF-16 encoded code points in `iter`, /// returning unpaired surrogates as `Err`s. /// @@ -756,8 +762,6 @@ pub struct DecodeUtf16<I> /// Basic usage: /// /// ``` -/// #![feature(decode_utf16)] -/// /// use std::char::decode_utf16; /// /// fn main() { @@ -766,7 +770,9 @@ pub struct DecodeUtf16<I> /// 0x0073, 0xDD1E, 0x0069, 0x0063, /// 0xD834]; /// -/// assert_eq!(decode_utf16(v.iter().cloned()).collect::<Vec<_>>(), +/// assert_eq!(decode_utf16(v.iter().cloned()) +/// .map(|r| r.map_err(|e| e.unpaired_surrogate())) +/// .collect::<Vec<_>>(), /// vec![Ok('𝄞'), /// Ok('m'), Ok('u'), Ok('s'), /// Err(0xDD1E), @@ -778,8 +784,6 @@ pub struct DecodeUtf16<I> /// A lossy decoder can be obtained by replacing `Err` results with the replacement character: /// /// ``` -/// #![feature(decode_utf16)] -/// /// use std::char::{decode_utf16, REPLACEMENT_CHARACTER}; /// /// fn main() { @@ -794,7 +798,7 @@ pub struct DecodeUtf16<I> /// "𝄞mus�ic�"); /// } /// ``` -#[unstable(feature = "decode_utf16", reason = "recently exposed", issue = "27830")] +#[stable(feature = "decode_utf16", since = "1.9.0")] #[inline] pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> { DecodeUtf16 { @@ -803,11 +807,11 @@ pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::Into } } -#[unstable(feature = "decode_utf16", reason = "recently exposed", issue = "27830")] +#[stable(feature = "decode_utf16", since = "1.9.0")] impl<I: Iterator<Item=u16>> Iterator for DecodeUtf16<I> { - type Item = Result<char, u16>; + type Item = Result<char, DecodeUtf16Error>; - fn next(&mut self) -> Option<Result<char, u16>> { + fn next(&mut self) -> Option<Result<char, DecodeUtf16Error>> { let u = match self.buf.take() { Some(buf) => buf, None => match self.iter.next() { @@ -821,18 +825,18 @@ impl<I: Iterator<Item=u16>> Iterator for DecodeUtf16<I> { Some(Ok(unsafe { from_u32_unchecked(u as u32) })) } else if u >= 0xDC00 { // a trailing surrogate - Some(Err(u)) + Some(Err(DecodeUtf16Error { code: u })) } else { let u2 = match self.iter.next() { Some(u2) => u2, // eof - None => return Some(Err(u)), + None => return Some(Err(DecodeUtf16Error { code: u })), }; if u2 < 0xDC00 || u2 > 0xDFFF { // not a trailing surrogate so we're not a valid // surrogate pair, so rewind to redecode u2 next time. self.buf = Some(u2); - return Some(Err(u)); + return Some(Err(DecodeUtf16Error { code: u })); } // all ok, so lets decode it. @@ -850,8 +854,25 @@ impl<I: Iterator<Item=u16>> Iterator for DecodeUtf16<I> { } } -/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a decoding error. +impl DecodeUtf16Error { + /// Returns the unpaired surrogate which caused this error. + #[stable(feature = "decode_utf16", since = "1.9.0")] + pub fn unpaired_surrogate(&self) -> u16 { + self.code + } +} + +#[stable(feature = "decode_utf16", since = "1.9.0")] +impl fmt::Display for DecodeUtf16Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "unpaired surrogate found: {:x}", self.code) + } +} + +/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a +/// decoding error. +/// /// It can occur, for example, when giving ill-formed UTF-8 bytes to /// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy). -#[unstable(feature = "decode_utf16", reason = "recently added", issue = "27830")] +#[stable(feature = "decode_utf16", since = "1.9.0")] pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}'; |