aboutsummaryrefslogtreecommitdiff
path: root/ctr-std/src/sys_common
diff options
context:
space:
mode:
authorFenrir <[email protected]>2018-01-21 14:06:28 -0700
committerFenrirWolf <[email protected]>2018-01-21 19:16:33 -0700
commit23be3f4885688e5e0011005e2295c75168854c0a (patch)
treedd0850f9c73c489e114a761d5c0757f3dbec3a65 /ctr-std/src/sys_common
parentUpdate CI for Rust nightly-2017-12-01 + other fixes (diff)
downloadctru-rs-23be3f4885688e5e0011005e2295c75168854c0a.tar.xz
ctru-rs-23be3f4885688e5e0011005e2295c75168854c0a.zip
Recreate ctr-std from latest nightly
Diffstat (limited to 'ctr-std/src/sys_common')
-rw-r--r--ctr-std/src/sys_common/backtrace.rs461
-rw-r--r--ctr-std/src/sys_common/bytestring.rs56
-rw-r--r--ctr-std/src/sys_common/gnu/libbacktrace.rs216
-rw-r--r--ctr-std/src/sys_common/gnu/mod.rs15
-rw-r--r--ctr-std/src/sys_common/io.rs129
-rw-r--r--ctr-std/src/sys_common/memchr.rs230
-rw-r--r--ctr-std/src/sys_common/mod.rs38
-rw-r--r--ctr-std/src/sys_common/net.rs630
-rw-r--r--ctr-std/src/sys_common/poison.rs88
-rw-r--r--ctr-std/src/sys_common/process.rs124
-rw-r--r--ctr-std/src/sys_common/remutex.rs13
-rw-r--r--ctr-std/src/sys_common/thread.rs26
-rw-r--r--ctr-std/src/sys_common/thread_info.rs4
-rw-r--r--ctr-std/src/sys_common/thread_local.rs82
-rw-r--r--ctr-std/src/sys_common/util.rs29
-rw-r--r--ctr-std/src/sys_common/wtf8.rs1284
16 files changed, 3003 insertions, 422 deletions
diff --git a/ctr-std/src/sys_common/backtrace.rs b/ctr-std/src/sys_common/backtrace.rs
new file mode 100644
index 0000000..36cbce2
--- /dev/null
+++ b/ctr-std/src/sys_common/backtrace.rs
@@ -0,0 +1,461 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/// Common code for printing the backtrace in the same way across the different
+/// supported platforms.
+
+use env;
+use io::prelude::*;
+use io;
+use str;
+use sync::atomic::{self, Ordering};
+use path::{self, Path};
+use sys::mutex::Mutex;
+use ptr;
+
+pub use sys::backtrace::{
+ unwind_backtrace,
+ resolve_symname,
+ foreach_symbol_fileline,
+ BacktraceContext
+};
+
+#[cfg(target_pointer_width = "64")]
+pub const HEX_WIDTH: usize = 18;
+
+#[cfg(target_pointer_width = "32")]
+pub const HEX_WIDTH: usize = 10;
+
+/// Represents an item in the backtrace list. See `unwind_backtrace` for how
+/// it is created.
+#[derive(Debug, Copy, Clone)]
+pub struct Frame {
+ /// Exact address of the call that failed.
+ pub exact_position: *const u8,
+ /// Address of the enclosing function.
+ pub symbol_addr: *const u8,
+}
+
+/// Max number of frames to print.
+const MAX_NB_FRAMES: usize = 100;
+
+/// Prints the current backtrace.
+pub fn print(w: &mut Write, format: PrintFormat) -> io::Result<()> {
+ static LOCK: Mutex = Mutex::new();
+
+ // Use a lock to prevent mixed output in multithreading context.
+ // Some platforms also requires it, like `SymFromAddr` on Windows.
+ unsafe {
+ LOCK.lock();
+ let res = _print(w, format);
+ LOCK.unlock();
+ res
+ }
+}
+
+fn _print(w: &mut Write, format: PrintFormat) -> io::Result<()> {
+ let mut frames = [Frame {
+ exact_position: ptr::null(),
+ symbol_addr: ptr::null(),
+ }; MAX_NB_FRAMES];
+ let (nb_frames, context) = unwind_backtrace(&mut frames)?;
+ let (skipped_before, skipped_after) =
+ filter_frames(&frames[..nb_frames], format, &context);
+ if skipped_before + skipped_after > 0 {
+ writeln!(w, "note: Some details are omitted, \
+ run with `RUST_BACKTRACE=full` for a verbose backtrace.")?;
+ }
+ writeln!(w, "stack backtrace:")?;
+
+ let filtered_frames = &frames[..nb_frames - skipped_after];
+ for (index, frame) in filtered_frames.iter().skip(skipped_before).enumerate() {
+ resolve_symname(*frame, |symname| {
+ output(w, index, *frame, symname, format)
+ }, &context)?;
+ let has_more_filenames = foreach_symbol_fileline(*frame, |file, line| {
+ output_fileline(w, file, line, format)
+ }, &context)?;
+ if has_more_filenames {
+ w.write_all(b" <... and possibly more>")?;
+ }
+ }
+
+ Ok(())
+}
+
+/// Returns a number of frames to remove at the beginning and at the end of the
+/// backtrace, according to the backtrace format.
+fn filter_frames(frames: &[Frame],
+ format: PrintFormat,
+ context: &BacktraceContext) -> (usize, usize)
+{
+ if format == PrintFormat::Full {
+ return (0, 0);
+ }
+
+ let skipped_before = 0;
+
+ let skipped_after = frames.len() - frames.iter().position(|frame| {
+ let mut is_marker = false;
+ let _ = resolve_symname(*frame, |symname| {
+ if let Some(mangled_symbol_name) = symname {
+ // Use grep to find the concerned functions
+ if mangled_symbol_name.contains("__rust_begin_short_backtrace") {
+ is_marker = true;
+ }
+ }
+ Ok(())
+ }, context);
+ is_marker
+ }).unwrap_or(frames.len());
+
+ if skipped_before + skipped_after >= frames.len() {
+ // Avoid showing completely empty backtraces
+ return (0, 0);
+ }
+
+ (skipped_before, skipped_after)
+}
+
+
+/// Fixed frame used to clean the backtrace with `RUST_BACKTRACE=1`.
+#[inline(never)]
+pub fn __rust_begin_short_backtrace<F, T>(f: F) -> T
+ where F: FnOnce() -> T, F: Send, T: Send
+{
+ f()
+}
+
+/// Controls how the backtrace should be formated.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum PrintFormat {
+ /// Show all the frames with absolute path for files.
+ Full = 2,
+ /// Show only relevant data from the backtrace.
+ Short = 3,
+}
+
+// For now logging is turned off by default, and this function checks to see
+// whether the magical environment variable is present to see if it's turned on.
+pub fn log_enabled() -> Option<PrintFormat> {
+ static ENABLED: atomic::AtomicIsize = atomic::AtomicIsize::new(0);
+ match ENABLED.load(Ordering::SeqCst) {
+ 0 => {},
+ 1 => return None,
+ 2 => return Some(PrintFormat::Full),
+ 3 => return Some(PrintFormat::Short),
+ _ => unreachable!(),
+ }
+
+ let val = match env::var_os("RUST_BACKTRACE") {
+ Some(x) => if &x == "0" {
+ None
+ } else if &x == "full" {
+ Some(PrintFormat::Full)
+ } else {
+ Some(PrintFormat::Short)
+ },
+ None => None,
+ };
+ ENABLED.store(match val {
+ Some(v) => v as isize,
+ None => 1,
+ }, Ordering::SeqCst);
+ val
+}
+
+/// Print the symbol of the backtrace frame.
+///
+/// These output functions should now be used everywhere to ensure consistency.
+/// You may want to also use `output_fileline`.
+fn output(w: &mut Write, idx: usize, frame: Frame,
+ s: Option<&str>, format: PrintFormat) -> io::Result<()> {
+ // Remove the `17: 0x0 - <unknown>` line.
+ if format == PrintFormat::Short && frame.exact_position == ptr::null() {
+ return Ok(());
+ }
+ match format {
+ PrintFormat::Full => write!(w,
+ " {:2}: {:2$?} - ",
+ idx,
+ frame.exact_position,
+ HEX_WIDTH)?,
+ PrintFormat::Short => write!(w, " {:2}: ", idx)?,
+ }
+ match s {
+ Some(string) => demangle(w, string, format)?,
+ None => w.write_all(b"<unknown>")?,
+ }
+ w.write_all(b"\n")
+}
+
+/// Print the filename and line number of the backtrace frame.
+///
+/// See also `output`.
+#[allow(dead_code)]
+fn output_fileline(w: &mut Write,
+ file: &[u8],
+ line: u32,
+ format: PrintFormat) -> io::Result<()> {
+ // prior line: " ##: {:2$} - func"
+ w.write_all(b"")?;
+ match format {
+ PrintFormat::Full => write!(w,
+ " {:1$}",
+ "",
+ HEX_WIDTH)?,
+ PrintFormat::Short => write!(w, " ")?,
+ }
+
+ let file = str::from_utf8(file).unwrap_or("<unknown>");
+ let file_path = Path::new(file);
+ let mut already_printed = false;
+ if format == PrintFormat::Short && file_path.is_absolute() {
+ if let Ok(cwd) = env::current_dir() {
+ if let Ok(stripped) = file_path.strip_prefix(&cwd) {
+ if let Some(s) = stripped.to_str() {
+ write!(w, " at .{}{}:{}", path::MAIN_SEPARATOR, s, line)?;
+ already_printed = true;
+ }
+ }
+ }
+ }
+ if !already_printed {
+ write!(w, " at {}:{}", file, line)?;
+ }
+
+ w.write_all(b"\n")
+}
+
+
+// All rust symbols are in theory lists of "::"-separated identifiers. Some
+// assemblers, however, can't handle these characters in symbol names. To get
+// around this, we use C++-style mangling. The mangling method is:
+//
+// 1. Prefix the symbol with "_ZN"
+// 2. For each element of the path, emit the length plus the element
+// 3. End the path with "E"
+//
+// For example, "_ZN4testE" => "test" and "_ZN3foo3barE" => "foo::bar".
+//
+// We're the ones printing our backtraces, so we can't rely on anything else to
+// demangle our symbols. It's *much* nicer to look at demangled symbols, so
+// this function is implemented to give us nice pretty output.
+//
+// Note that this demangler isn't quite as fancy as it could be. We have lots
+// of other information in our symbols like hashes, version, type information,
+// etc. Additionally, this doesn't handle glue symbols at all.
+pub fn demangle(writer: &mut Write, mut s: &str, format: PrintFormat) -> io::Result<()> {
+ // During ThinLTO LLVM may import and rename internal symbols, so strip out
+ // those endings first as they're one of the last manglings applied to
+ // symbol names.
+ let llvm = ".llvm.";
+ if let Some(i) = s.find(llvm) {
+ let candidate = &s[i + llvm.len()..];
+ let all_hex = candidate.chars().all(|c| {
+ match c {
+ 'A' ... 'F' | '0' ... '9' => true,
+ _ => false,
+ }
+ });
+
+ if all_hex {
+ s = &s[..i];
+ }
+ }
+
+ // Validate the symbol. If it doesn't look like anything we're
+ // expecting, we just print it literally. Note that we must handle non-rust
+ // symbols because we could have any function in the backtrace.
+ let mut valid = true;
+ let mut inner = s;
+ if s.len() > 4 && s.starts_with("_ZN") && s.ends_with("E") {
+ inner = &s[3 .. s.len() - 1];
+ // On Windows, dbghelp strips leading underscores, so we accept "ZN...E" form too.
+ } else if s.len() > 3 && s.starts_with("ZN") && s.ends_with("E") {
+ inner = &s[2 .. s.len() - 1];
+ } else {
+ valid = false;
+ }
+
+ if valid {
+ let mut chars = inner.chars();
+ while valid {
+ let mut i = 0;
+ for c in chars.by_ref() {
+ if c.is_numeric() {
+ i = i * 10 + c as usize - '0' as usize;
+ } else {
+ break
+ }
+ }
+ if i == 0 {
+ valid = chars.next().is_none();
+ break
+ } else if chars.by_ref().take(i - 1).count() != i - 1 {
+ valid = false;
+ }
+ }
+ }
+
+ // Alright, let's do this.
+ if !valid {
+ writer.write_all(s.as_bytes())?;
+ } else {
+ // remove the `::hfc2edb670e5eda97` part at the end of the symbol.
+ if format == PrintFormat::Short {
+ // The symbol in still mangled.
+ let mut split = inner.rsplitn(2, "17h");
+ match (split.next(), split.next()) {
+ (Some(addr), rest) => {
+ if addr.len() == 16 &&
+ addr.chars().all(|c| c.is_digit(16))
+ {
+ inner = rest.unwrap_or("");
+ }
+ }
+ _ => (),
+ }
+ }
+
+ let mut first = true;
+ while !inner.is_empty() {
+ if !first {
+ writer.write_all(b"::")?;
+ } else {
+ first = false;
+ }
+ let mut rest = inner;
+ while rest.chars().next().unwrap().is_numeric() {
+ rest = &rest[1..];
+ }
+ let i: usize = inner[.. (inner.len() - rest.len())].parse().unwrap();
+ inner = &rest[i..];
+ rest = &rest[..i];
+ if rest.starts_with("_$") {
+ rest = &rest[1..];
+ }
+ while !rest.is_empty() {
+ if rest.starts_with(".") {
+ if let Some('.') = rest[1..].chars().next() {
+ writer.write_all(b"::")?;
+ rest = &rest[2..];
+ } else {
+ writer.write_all(b".")?;
+ rest = &rest[1..];
+ }
+ } else if rest.starts_with("$") {
+ macro_rules! demangle {
+ ($($pat:expr => $demangled:expr),*) => ({
+ $(if rest.starts_with($pat) {
+ writer.write_all($demangled)?;
+ rest = &rest[$pat.len()..];
+ } else)*
+ {
+ writer.write_all(rest.as_bytes())?;
+ break;
+ }
+
+ })
+ }
+
+ // see src/librustc/back/link.rs for these mappings
+ demangle! (
+ "$SP$" => b"@",
+ "$BP$" => b"*",
+ "$RF$" => b"&",
+ "$LT$" => b"<",
+ "$GT$" => b">",
+ "$LP$" => b"(",
+ "$RP$" => b")",
+ "$C$" => b",",
+
+ // in theory we can demangle any Unicode code point, but
+ // for simplicity we just catch the common ones.
+ "$u7e$" => b"~",
+ "$u20$" => b" ",
+ "$u27$" => b"'",
+ "$u5b$" => b"[",
+ "$u5d$" => b"]",
+ "$u7b$" => b"{",
+ "$u7d$" => b"}",
+ "$u3b$" => b";",
+ "$u2b$" => b"+",
+ "$u22$" => b"\""
+ )
+ } else {
+ let idx = match rest.char_indices().find(|&(_, c)| c == '$' || c == '.') {
+ None => rest.len(),
+ Some((i, _)) => i,
+ };
+ writer.write_all(rest[..idx].as_bytes())?;
+ rest = &rest[idx..];
+ }
+ }
+ }
+ }
+
+ Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+ use sys_common;
+ macro_rules! t { ($a:expr, $b:expr) => ({
+ let mut m = Vec::new();
+ sys_common::backtrace::demangle(&mut m,
+ $a,
+ super::PrintFormat::Full).unwrap();
+ assert_eq!(String::from_utf8(m).unwrap(), $b);
+ }) }
+
+ #[test]
+ fn demangle() {
+ t!("test", "test");
+ t!("_ZN4testE", "test");
+ t!("_ZN4test", "_ZN4test");
+ t!("_ZN4test1a2bcE", "test::a::bc");
+ }
+
+ #[test]
+ fn demangle_dollars() {
+ t!("_ZN4$RP$E", ")");
+ t!("_ZN8$RF$testE", "&test");
+ t!("_ZN8$BP$test4foobE", "*test::foob");
+ t!("_ZN9$u20$test4foobE", " test::foob");
+ t!("_ZN35Bar$LT$$u5b$u32$u3b$$u20$4$u5d$$GT$E", "Bar<[u32; 4]>");
+ }
+
+ #[test]
+ fn demangle_many_dollars() {
+ t!("_ZN13test$u20$test4foobE", "test test::foob");
+ t!("_ZN12test$BP$test4foobE", "test*test::foob");
+ }
+
+ #[test]
+ fn demangle_windows() {
+ t!("ZN4testE", "test");
+ t!("ZN13test$u20$test4foobE", "test test::foob");
+ t!("ZN12test$RF$test4foobE", "test&test::foob");
+ }
+
+ #[test]
+ fn demangle_elements_beginning_with_underscore() {
+ t!("_ZN13_$LT$test$GT$E", "<test>");
+ t!("_ZN28_$u7b$$u7b$closure$u7d$$u7d$E", "{{closure}}");
+ t!("_ZN15__STATIC_FMTSTRE", "__STATIC_FMTSTR");
+ }
+
+ #[test]
+ fn demangle_trait_impls() {
+ t!("_ZN71_$LT$Test$u20$$u2b$$u20$$u27$static$u20$as$u20$foo..Bar$LT$Test$GT$$GT$3barE",
+ "<Test + 'static as foo::Bar<Test>>::bar");
+ }
+}
diff --git a/ctr-std/src/sys_common/bytestring.rs b/ctr-std/src/sys_common/bytestring.rs
new file mode 100644
index 0000000..eb9cad0
--- /dev/null
+++ b/ctr-std/src/sys_common/bytestring.rs
@@ -0,0 +1,56 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![allow(dead_code)]
+
+use fmt::{Formatter, Result, Write};
+use std_unicode::lossy::{Utf8Lossy, Utf8LossyChunk};
+
+pub fn debug_fmt_bytestring(slice: &[u8], f: &mut Formatter) -> Result {
+ // Writes out a valid unicode string with the correct escape sequences
+ fn write_str_escaped(f: &mut Formatter, s: &str) -> Result {
+ for c in s.chars().flat_map(|c| c.escape_debug()) {
+ f.write_char(c)?
+ }
+ Ok(())
+ }
+
+ f.write_str("\"")?;
+ for Utf8LossyChunk { valid, broken } in Utf8Lossy::from_bytes(slice).chunks() {
+ write_str_escaped(f, valid)?;
+ for b in broken {
+ write!(f, "\\x{:02X}", b)?;
+ }
+ }
+ f.write_str("\"")
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use fmt::{Formatter, Result, Debug};
+
+ #[test]
+ fn smoke() {
+ struct Helper<'a>(&'a [u8]);
+
+ impl<'a> Debug for Helper<'a> {
+ fn fmt(&self, f: &mut Formatter) -> Result {
+ debug_fmt_bytestring(self.0, f)
+ }
+ }
+
+ let input = b"\xF0hello,\tworld";
+ let expected = r#""\xF0hello,\tworld""#;
+ let output = format!("{:?}", Helper(input));
+
+ assert!(output == expected);
+ }
+}
diff --git a/ctr-std/src/sys_common/gnu/libbacktrace.rs b/ctr-std/src/sys_common/gnu/libbacktrace.rs
new file mode 100644
index 0000000..6ad3af6
--- /dev/null
+++ b/ctr-std/src/sys_common/gnu/libbacktrace.rs
@@ -0,0 +1,216 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use libc;
+
+use ffi::CStr;
+use io;
+use mem;
+use ptr;
+use sys::backtrace::BacktraceContext;
+use sys_common::backtrace::Frame;
+
+pub fn foreach_symbol_fileline<F>(frame: Frame,
+ mut f: F,
+ _: &BacktraceContext) -> io::Result<bool>
+where F: FnMut(&[u8], u32) -> io::Result<()>
+{
+ // pcinfo may return an arbitrary number of file:line pairs,
+ // in the order of stack trace (i.e. inlined calls first).
+ // in order to avoid allocation, we stack-allocate a fixed size of entries.
+ const FILELINE_SIZE: usize = 32;
+ let mut fileline_buf = [(ptr::null(), !0); FILELINE_SIZE];
+ let ret;
+ let fileline_count = {
+ let state = unsafe { init_state() };
+ if state.is_null() {
+ return Err(io::Error::new(
+ io::ErrorKind::Other,
+ "failed to allocate libbacktrace state")
+ )
+ }
+ let mut fileline_win: &mut [FileLine] = &mut fileline_buf;
+ let fileline_addr = &mut fileline_win as *mut &mut [FileLine];
+ ret = unsafe {
+ backtrace_pcinfo(state,
+ frame.exact_position as libc::uintptr_t,
+ pcinfo_cb,
+ error_cb,
+ fileline_addr as *mut libc::c_void)
+ };
+ FILELINE_SIZE - fileline_win.len()
+ };
+ if ret == 0 {
+ for &(file, line) in &fileline_buf[..fileline_count] {
+ if file.is_null() { continue; } // just to be sure
+ let file = unsafe { CStr::from_ptr(file).to_bytes() };
+ f(file, line)?;
+ }
+ Ok(fileline_count == FILELINE_SIZE)
+ } else {
+ Ok(false)
+ }
+}
+
+/// Converts a pointer to symbol to its string value.
+pub fn resolve_symname<F>(frame: Frame,
+ callback: F,
+ _: &BacktraceContext) -> io::Result<()>
+ where F: FnOnce(Option<&str>) -> io::Result<()>
+{
+ let symname = {
+ let state = unsafe { init_state() };
+ if state.is_null() {
+ return Err(io::Error::new(
+ io::ErrorKind::Other,
+ "failed to allocate libbacktrace state")
+ )
+ }
+ let mut data: *const libc::c_char = ptr::null();
+ let data_addr = &mut data as *mut *const libc::c_char;
+ let ret = unsafe {
+ backtrace_syminfo(state,
+ frame.symbol_addr as libc::uintptr_t,
+ syminfo_cb,
+ error_cb,
+ data_addr as *mut libc::c_void)
+ };
+ if ret == 0 || data.is_null() {
+ None
+ } else {
+ unsafe {
+ CStr::from_ptr(data).to_str().ok()
+ }
+ }
+ };
+ callback(symname)
+}
+
+////////////////////////////////////////////////////////////////////////
+// libbacktrace.h API
+////////////////////////////////////////////////////////////////////////
+type backtrace_syminfo_callback =
+extern "C" fn(data: *mut libc::c_void,
+ pc: libc::uintptr_t,
+ symname: *const libc::c_char,
+ symval: libc::uintptr_t,
+ symsize: libc::uintptr_t);
+type backtrace_full_callback =
+extern "C" fn(data: *mut libc::c_void,
+ pc: libc::uintptr_t,
+ filename: *const libc::c_char,
+ lineno: libc::c_int,
+ function: *const libc::c_char) -> libc::c_int;
+type backtrace_error_callback =
+extern "C" fn(data: *mut libc::c_void,
+ msg: *const libc::c_char,
+ errnum: libc::c_int);
+enum backtrace_state {}
+
+extern {
+ fn backtrace_create_state(filename: *const libc::c_char,
+ threaded: libc::c_int,
+ error: backtrace_error_callback,
+ data: *mut libc::c_void)
+ -> *mut backtrace_state;
+ fn backtrace_syminfo(state: *mut backtrace_state,
+ addr: libc::uintptr_t,
+ cb: backtrace_syminfo_callback,
+ error: backtrace_error_callback,
+ data: *mut libc::c_void) -> libc::c_int;
+ fn backtrace_pcinfo(state: *mut backtrace_state,
+ addr: libc::uintptr_t,
+ cb: backtrace_full_callback,
+ error: backtrace_error_callback,
+ data: *mut libc::c_void) -> libc::c_int;
+}
+
+////////////////////////////////////////////////////////////////////////
+// helper callbacks
+////////////////////////////////////////////////////////////////////////
+
+type FileLine = (*const libc::c_char, u32);
+
+extern fn error_cb(_data: *mut libc::c_void, _msg: *const libc::c_char,
+ _errnum: libc::c_int) {
+ // do nothing for now
+}
+extern fn syminfo_cb(data: *mut libc::c_void,
+ _pc: libc::uintptr_t,
+ symname: *const libc::c_char,
+ _symval: libc::uintptr_t,
+ _symsize: libc::uintptr_t) {
+ let slot = data as *mut *const libc::c_char;
+ unsafe { *slot = symname; }
+}
+extern fn pcinfo_cb(data: *mut libc::c_void,
+ _pc: libc::uintptr_t,
+ filename: *const libc::c_char,
+ lineno: libc::c_int,
+ _function: *const libc::c_char) -> libc::c_int {
+ if !filename.is_null() {
+ let slot = data as *mut &mut [FileLine];
+ let buffer = unsafe {ptr::read(slot)};
+
+ // if the buffer is not full, add file:line to the buffer
+ // and adjust the buffer for next possible calls to pcinfo_cb.
+ if !buffer.is_empty() {
+ buffer[0] = (filename, lineno as u32);
+ unsafe { ptr::write(slot, &mut buffer[1..]); }
+ }
+ }
+
+ 0
+}
+
+// The libbacktrace API supports creating a state, but it does not
+// support destroying a state. I personally take this to mean that a
+// state is meant to be created and then live forever.
+//
+// I would love to register an at_exit() handler which cleans up this
+// state, but libbacktrace provides no way to do so.
+//
+// With these constraints, this function has a statically cached state
+// that is calculated the first time this is requested. Remember that
+// backtracing all happens serially (one global lock).
+//
+// Things don't work so well on not-Linux since libbacktrace can't track
+// down that executable this is. We at one point used env::current_exe but
+// it turns out that there are some serious security issues with that
+// approach.
+//
+// Specifically, on certain platforms like BSDs, a malicious actor can cause
+// an arbitrary file to be placed at the path returned by current_exe.
+// libbacktrace does not behave defensively in the presence of ill-formed
+// DWARF information, and has been demonstrated to segfault in at least one
+// case. There is no evidence at the moment to suggest that a more carefully
+// constructed file can't cause arbitrary code execution. As a result of all
+// of this, we don't hint libbacktrace with the path to the current process.
+unsafe fn init_state() -> *mut backtrace_state {
+ static mut STATE: *mut backtrace_state = ptr::null_mut();
+ if !STATE.is_null() { return STATE }
+
+ let filename = match ::sys::backtrace::gnu::get_executable_filename() {
+ Ok((filename, file)) => {
+ // filename is purposely leaked here since libbacktrace requires
+ // it to stay allocated permanently, file is also leaked so that
+ // the file stays locked
+ let filename_ptr = filename.as_ptr();
+ mem::forget(filename);
+ mem::forget(file);
+ filename_ptr
+ },
+ Err(_) => ptr::null(),
+ };
+
+ STATE = backtrace_create_state(filename, 0, error_cb,
+ ptr::null_mut());
+ STATE
+}
diff --git a/ctr-std/src/sys_common/gnu/mod.rs b/ctr-std/src/sys_common/gnu/mod.rs
new file mode 100644
index 0000000..3a8cf2d
--- /dev/null
+++ b/ctr-std/src/sys_common/gnu/mod.rs
@@ -0,0 +1,15 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![allow(missing_docs)]
+#![allow(non_camel_case_types)]
+#![allow(non_snake_case)]
+
+pub mod libbacktrace;
diff --git a/ctr-std/src/sys_common/io.rs b/ctr-std/src/sys_common/io.rs
index 23daeeb..ab23936 100644
--- a/ctr-std/src/sys_common/io.rs
+++ b/ctr-std/src/sys_common/io.rs
@@ -7,51 +7,8 @@
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-use io;
-use io::ErrorKind;
-use io::Read;
-use slice::from_raw_parts_mut;
-
pub const DEFAULT_BUF_SIZE: usize = 8 * 1024;
-// Provides read_to_end functionality over an uninitialized buffer.
-// This function is unsafe because it calls the underlying
-// read function with a slice into uninitialized memory. The default
-// implementation of read_to_end for readers will zero out new memory in
-// the buf before passing it to read, but avoiding this zero can often
-// lead to a fairly significant performance win.
-//
-// Implementations using this method have to adhere to two guarantees:
-// * The implementation of read never reads the buffer provided.
-// * The implementation of read correctly reports how many bytes were written.
-pub unsafe fn read_to_end_uninitialized(r: &mut Read, buf: &mut Vec<u8>) -> io::Result<usize> {
-
- let start_len = buf.len();
- buf.reserve(16);
-
- // Always try to read into the empty space of the vector (from the length to the capacity).
- // If the vector ever fills up then we reserve an extra byte which should trigger the normal
- // reallocation routines for the vector, which will likely double the size.
- //
- // This function is similar to the read_to_end function in std::io, but the logic about
- // reservations and slicing is different enough that this is duplicated here.
- loop {
- if buf.len() == buf.capacity() {
- buf.reserve(1);
- }
-
- let buf_slice = from_raw_parts_mut(buf.as_mut_ptr().offset(buf.len() as isize),
- buf.capacity() - buf.len());
-
- match r.read(buf_slice) {
- Ok(0) => { return Ok(buf.len() - start_len); }
- Ok(n) => { let len = buf.len() + n; buf.set_len(len); },
- Err(ref e) if e.kind() == ErrorKind::Interrupted => { }
- Err(e) => { return Err(e); }
- }
- }
-}
-
#[cfg(test)]
#[allow(dead_code)] // not used on emscripten
pub mod test {
@@ -91,89 +48,3 @@ pub mod test {
TempDir(ret)
}
}
-
-#[cfg(test)]
-mod tests {
- use io::prelude::*;
- use super::*;
- use io;
- use io::{ErrorKind, Take, Repeat, repeat};
- use slice::from_raw_parts;
-
- struct ErrorRepeat {
- lr: Take<Repeat>
- }
-
- fn error_repeat(byte: u8, limit: u64) -> ErrorRepeat {
- ErrorRepeat { lr: repeat(byte).take(limit) }
- }
-
- impl Read for ErrorRepeat {
- fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
- let ret = self.lr.read(buf);
- if let Ok(0) = ret {
- return Err(io::Error::new(ErrorKind::Other, ""))
- }
- ret
- }
- }
-
- fn init_vec_data() -> Vec<u8> {
- let mut vec = vec![10u8; 200];
- unsafe { vec.set_len(0); }
- vec
- }
-
- fn assert_all_eq(buf: &[u8], value: u8) {
- for n in buf {
- assert_eq!(*n, value);
- }
- }
-
- fn validate(buf: &Vec<u8>, good_read_len: usize) {
- assert_all_eq(buf, 1u8);
- let cap = buf.capacity();
- let end_slice = unsafe { from_raw_parts(buf.as_ptr().offset(good_read_len as isize),
- cap - good_read_len) };
- assert_all_eq(end_slice, 10u8);
- }
-
- #[test]
- fn read_to_end_uninit_error() {
- let mut er = error_repeat(1,100);
- let mut vec = init_vec_data();
- if let Err(_) = unsafe { read_to_end_uninitialized(&mut er, &mut vec) } {
- validate(&vec, 100);
- } else {
- assert!(false);
- }
- }
-
- #[test]
- fn read_to_end_uninit_zero_len_vec() {
- let mut er = repeat(1).take(100);
- let mut vec = Vec::new();
- let n = unsafe{ read_to_end_uninitialized(&mut er, &mut vec).unwrap() };
- assert_all_eq(&vec, 1u8);
- assert_eq!(vec.len(), n);
- }
-
- #[test]
- fn read_to_end_uninit_good() {
- let mut er = repeat(1).take(100);
- let mut vec = init_vec_data();
- let n = unsafe{ read_to_end_uninitialized(&mut er, &mut vec).unwrap() };
- validate(&vec, 100);
- assert_eq!(vec.len(), n);
- }
-
- #[bench]
- #[cfg_attr(target_os = "emscripten", ignore)]
- fn bench_uninitialized(b: &mut ::test::Bencher) {
- b.iter(|| {
- let mut lr = repeat(1).take(10000000);
- let mut vec = Vec::with_capacity(1024);
- unsafe { read_to_end_uninitialized(&mut lr, &mut vec) }
- });
- }
-}
diff --git a/ctr-std/src/sys_common/memchr.rs b/ctr-std/src/sys_common/memchr.rs
deleted file mode 100644
index 3824a5f..0000000
--- a/ctr-std/src/sys_common/memchr.rs
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-//
-// Original implementation taken from rust-memchr
-// Copyright 2015 Andrew Gallant, bluss and Nicolas Koch
-
-#[allow(dead_code)]
-pub mod fallback {
- use cmp;
- use mem;
-
- const LO_U64: u64 = 0x0101010101010101;
- const HI_U64: u64 = 0x8080808080808080;
-
- // use truncation
- const LO_USIZE: usize = LO_U64 as usize;
- const HI_USIZE: usize = HI_U64 as usize;
-
- /// Return `true` if `x` contains any zero byte.
- ///
- /// From *Matters Computational*, J. Arndt
- ///
- /// "The idea is to subtract one from each of the bytes and then look for
- /// bytes where the borrow propagated all the way to the most significant
- /// bit."
- #[inline]
- fn contains_zero_byte(x: usize) -> bool {
- x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
- }
-
- #[cfg(target_pointer_width = "32")]
- #[inline]
- fn repeat_byte(b: u8) -> usize {
- let mut rep = (b as usize) << 8 | b as usize;
- rep = rep << 16 | rep;
- rep
- }
-
- #[cfg(target_pointer_width = "64")]
- #[inline]
- fn repeat_byte(b: u8) -> usize {
- let mut rep = (b as usize) << 8 | b as usize;
- rep = rep << 16 | rep;
- rep = rep << 32 | rep;
- rep
- }
-
- /// Return the first index matching the byte `a` in `text`.
- pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
- // Scan for a single byte value by reading two `usize` words at a time.
- //
- // Split `text` in three parts
- // - unaligned initial part, before the first word aligned address in text
- // - body, scan by 2 words at a time
- // - the last remaining part, < 2 word size
- let len = text.len();
- let ptr = text.as_ptr();
- let usize_bytes = mem::size_of::<usize>();
-
- // search up to an aligned boundary
- let align = (ptr as usize) & (usize_bytes- 1);
- let mut offset;
- if align > 0 {
- offset = cmp::min(usize_bytes - align, len);
- if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
- return Some(index);
- }
- } else {
- offset = 0;
- }
-
- // search the body of the text
- let repeated_x = repeat_byte(x);
-
- if len >= 2 * usize_bytes {
- while offset <= len - 2 * usize_bytes {
- unsafe {
- let u = *(ptr.offset(offset as isize) as *const usize);
- let v = *(ptr.offset((offset + usize_bytes) as isize) as *const usize);
-
- // break if there is a matching byte
- let zu = contains_zero_byte(u ^ repeated_x);
- let zv = contains_zero_byte(v ^ repeated_x);
- if zu || zv {
- break;
- }
- }
- offset += usize_bytes * 2;
- }
- }
-
- // find the byte after the point the body loop stopped
- text[offset..].iter().position(|elt| *elt == x).map(|i| offset + i)
- }
-
- /// Return the last index matching the byte `a` in `text`.
- pub fn memrchr(x: u8, text: &[u8]) -> Option<usize> {
- // Scan for a single byte value by reading two `usize` words at a time.
- //
- // Split `text` in three parts
- // - unaligned tail, after the last word aligned address in text
- // - body, scan by 2 words at a time
- // - the first remaining bytes, < 2 word size
- let len = text.len();
- let ptr = text.as_ptr();
- let usize_bytes = mem::size_of::<usize>();
-
- // search to an aligned boundary
- let end_align = (ptr as usize + len) & (usize_bytes - 1);
- let mut offset;
- if end_align > 0 {
- offset = if end_align >= len { 0 } else { len - end_align };
- if let Some(index) = text[offset..].iter().rposition(|elt| *elt == x) {
- return Some(offset + index);
- }
- } else {
- offset = len;
- }
-
- // search the body of the text
- let repeated_x = repeat_byte(x);
-
- while offset >= 2 * usize_bytes {
- unsafe {
- let u = *(ptr.offset(offset as isize - 2 * usize_bytes as isize) as *const usize);
- let v = *(ptr.offset(offset as isize - usize_bytes as isize) as *const usize);
-
- // break if there is a matching byte
- let zu = contains_zero_byte(u ^ repeated_x);
- let zv = contains_zero_byte(v ^ repeated_x);
- if zu || zv {
- break;
- }
- }
- offset -= 2 * usize_bytes;
- }
-
- // find the byte before the point the body loop stopped
- text[..offset].iter().rposition(|elt| *elt == x)
- }
-
- // test fallback implementations on all platforms
- #[test]
- fn matches_one() {
- assert_eq!(Some(0), memchr(b'a', b"a"));
- }
-
- #[test]
- fn matches_begin() {
- assert_eq!(Some(0), memchr(b'a', b"aaaa"));
- }
-
- #[test]
- fn matches_end() {
- assert_eq!(Some(4), memchr(b'z', b"aaaaz"));
- }
-
- #[test]
- fn matches_nul() {
- assert_eq!(Some(4), memchr(b'\x00', b"aaaa\x00"));
- }
-
- #[test]
- fn matches_past_nul() {
- assert_eq!(Some(5), memchr(b'z', b"aaaa\x00z"));
- }
-
- #[test]
- fn no_match_empty() {
- assert_eq!(None, memchr(b'a', b""));
- }
-
- #[test]
- fn no_match() {
- assert_eq!(None, memchr(b'a', b"xyz"));
- }
-
- #[test]
- fn matches_one_reversed() {
- assert_eq!(Some(0), memrchr(b'a', b"a"));
- }
-
- #[test]
- fn matches_begin_reversed() {
- assert_eq!(Some(3), memrchr(b'a', b"aaaa"));
- }
-
- #[test]
- fn matches_end_reversed() {
- assert_eq!(Some(0), memrchr(b'z', b"zaaaa"));
- }
-
- #[test]
- fn matches_nul_reversed() {
- assert_eq!(Some(4), memrchr(b'\x00', b"aaaa\x00"));
- }
-
- #[test]
- fn matches_past_nul_reversed() {
- assert_eq!(Some(0), memrchr(b'z', b"z\x00aaaa"));
- }
-
- #[test]
- fn no_match_empty_reversed() {
- assert_eq!(None, memrchr(b'a', b""));
- }
-
- #[test]
- fn no_match_reversed() {
- assert_eq!(None, memrchr(b'a', b"xyz"));
- }
-
- #[test]
- fn each_alignment_reversed() {
- let mut data = [1u8; 64];
- let needle = 2;
- let pos = 40;
- data[pos] = needle;
- for start in 0..16 {
- assert_eq!(Some(pos - start), memrchr(needle, &data[start..]));
- }
- }
-}
diff --git a/ctr-std/src/sys_common/mod.rs b/ctr-std/src/sys_common/mod.rs
index 4a7d79f..27504d3 100644
--- a/ctr-std/src/sys_common/mod.rs
+++ b/ctr-std/src/sys_common/mod.rs
@@ -10,7 +10,7 @@
//! Platform-independent platform abstraction
//!
-//! This is the platform-independent portion of the standard libraries
+//! This is the platform-independent portion of the standard library's
//! platform abstraction layer, whereas `std::sys` is the
//! platform-specific portion.
//!
@@ -23,11 +23,16 @@
//! `std::sys` from the standard library.
#![allow(missing_docs)]
+#![allow(missing_debug_implementations)]
+
+use sync::Once;
+use sys;
pub mod at_exit_imp;
+#[cfg(feature = "backtrace")]
+pub mod backtrace;
pub mod condvar;
pub mod io;
-pub mod memchr;
pub mod mutex;
pub mod poison;
pub mod remutex;
@@ -36,6 +41,25 @@ pub mod thread;
pub mod thread_info;
pub mod thread_local;
pub mod util;
+pub mod wtf8;
+pub mod bytestring;
+pub mod process;
+
+cfg_if! {
+ if #[cfg(any(target_os = "cloudabi", target_os = "l4re", target_os = "redox"))] {
+ pub use sys::net;
+ } else if #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] {
+ pub use sys::net;
+ } else {
+ pub mod net;
+ }
+}
+
+#[cfg(feature = "backtrace")]
+#[cfg(any(all(unix, not(target_os = "emscripten")),
+ all(windows, target_env = "gnu"),
+ target_os = "redox"))]
+pub mod gnu;
// common error constructors
@@ -81,6 +105,16 @@ macro_rules! rtabort {
($($t:tt)*) => (::sys_common::util::abort(format_args!($($t)*)))
}
+/// One-time runtime cleanup.
+pub fn cleanup() {
+ static CLEANUP: Once = Once::new();
+ CLEANUP.call_once(|| unsafe {
+ sys::args::cleanup();
+ sys::stack_overflow::cleanup();
+ at_exit_imp::cleanup();
+ });
+}
+
// Computes (value*numer)/denom without overflow, as long as both
// (numer*denom) and the overall result fit into i64 (which is the case
// for our time conversions).
diff --git a/ctr-std/src/sys_common/net.rs b/ctr-std/src/sys_common/net.rs
new file mode 100644
index 0000000..6223f82
--- /dev/null
+++ b/ctr-std/src/sys_common/net.rs
@@ -0,0 +1,630 @@
+// Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![allow(dead_code)]
+
+use cmp;
+use ffi::CString;
+use fmt;
+use io::{self, Error, ErrorKind};
+use libc::{c_int, c_void};
+use mem;
+use net::{SocketAddr, Shutdown, Ipv4Addr, Ipv6Addr};
+use ptr;
+use sys::net::{cvt, cvt_r, cvt_gai, Socket, init, wrlen_t};
+use sys::net::netc as c;
+use sys_common::{AsInner, FromInner, IntoInner};
+use time::Duration;
+
+// IPV6 stuff does not seem to be supported on 3DS. TODO: Determine if that's true
+const IPV6_ADD_MEMBERSHIP: c_int = 0x0;
+const IPV6_DROP_MEMBERSHIP: c_int = 0x0;
+const IPV6_MULTICAST_LOOP: c_int = 0x0;
+const IPV6_V6ONLY: c_int = 0x0;
+
+// Neither are signals
+const MSG_NOSIGNAL: c_int = 0x0;
+
+// These constants are also currently missing from libctru. TODO: Find them?
+const SO_SNDTIMEO: c_int = 0x0;
+const SO_RCVTIMEO: c_int = 0x0;
+const SO_BROADCAST: c_int = 0x0;
+
+////////////////////////////////////////////////////////////////////////////////
+// sockaddr and misc bindings
+////////////////////////////////////////////////////////////////////////////////
+
+pub fn setsockopt<T>(sock: &Socket, opt: c_int, val: c_int,
+ payload: T) -> io::Result<()> {
+ unsafe {
+ let payload = &payload as *const T as *const c_void;
+ cvt(c::setsockopt(*sock.as_inner(), opt, val, payload,
+ mem::size_of::<T>() as c::socklen_t))?;
+ Ok(())
+ }
+}
+
+pub fn getsockopt<T: Copy>(sock: &Socket, opt: c_int,
+ val: c_int) -> io::Result<T> {
+ unsafe {
+ let mut slot: T = mem::zeroed();
+ let mut len = mem::size_of::<T>() as c::socklen_t;
+ cvt(c::getsockopt(*sock.as_inner(), opt, val,
+ &mut slot as *mut _ as *mut _,
+ &mut len))?;
+ assert_eq!(len as usize, mem::size_of::<T>());
+ Ok(slot)
+ }
+}
+
+fn sockname<F>(f: F) -> io::Result<SocketAddr>
+ where F: FnOnce(*mut c::sockaddr, *mut c::socklen_t) -> c_int
+{
+ unsafe {
+ let mut storage: c::sockaddr_storage = mem::zeroed();
+ let mut len = mem::size_of_val(&storage) as c::socklen_t;
+ cvt(f(&mut storage as *mut _ as *mut _, &mut len))?;
+ sockaddr_to_addr(&storage, len as usize)
+ }
+}
+
+pub fn sockaddr_to_addr(storage: &c::sockaddr_storage,
+ len: usize) -> io::Result<SocketAddr> {
+ match storage.ss_family as c_int {
+ c::AF_INET => {
+ assert!(len as usize >= mem::size_of::<c::sockaddr_in>());
+ Ok(SocketAddr::V4(FromInner::from_inner(unsafe {
+ *(storage as *const _ as *const c::sockaddr_in)
+ })))
+ }
+ c::AF_INET6 => {
+ assert!(len as usize >= mem::size_of::<c::sockaddr_in6>());
+ Ok(SocketAddr::V6(FromInner::from_inner(unsafe {
+ *(storage as *const _ as *const c::sockaddr_in6)
+ })))
+ }
+ _ => {
+ Err(Error::new(ErrorKind::InvalidInput, "invalid argument"))
+ }
+ }
+}
+
+#[cfg(target_os = "android")]
+fn to_ipv6mr_interface(value: u32) -> c_int {
+ value as c_int
+}
+
+#[cfg(not(target_os = "android"))]
+fn to_ipv6mr_interface(value: u32) -> ::libc::c_uint {
+ value as ::libc::c_uint
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// get_host_addresses
+////////////////////////////////////////////////////////////////////////////////
+
+pub struct LookupHost {
+ original: *mut c::addrinfo,
+ cur: *mut c::addrinfo,
+}
+
+impl Iterator for LookupHost {
+ type Item = SocketAddr;
+ fn next(&mut self) -> Option<SocketAddr> {
+ loop {
+ unsafe {
+ let cur = self.cur.as_ref()?;
+ self.cur = cur.ai_next;
+ match sockaddr_to_addr(mem::transmute(cur.ai_addr),
+ cur.ai_addrlen as usize)
+ {
+ Ok(addr) => return Some(addr),
+ Err(_) => continue,
+ }
+ }
+ }
+ }
+}
+
+unsafe impl Sync for LookupHost {}
+unsafe impl Send for LookupHost {}
+
+impl Drop for LookupHost {
+ fn drop(&mut self) {
+ unsafe { c::freeaddrinfo(self.original) }
+ }
+}
+
+pub fn lookup_host(host: &str) -> io::Result<LookupHost> {
+ init();
+
+ let c_host = CString::new(host)?;
+ let mut hints: c::addrinfo = unsafe { mem::zeroed() };
+ hints.ai_socktype = c::SOCK_STREAM;
+ let mut res = ptr::null_mut();
+ unsafe {
+ match cvt_gai(c::getaddrinfo(c_host.as_ptr(), ptr::null(), &hints, &mut res)) {
+ Ok(_) => {
+ Ok(LookupHost { original: res, cur: res })
+ },
+ #[cfg(unix)]
+ Err(e) => {
+ // If we're running glibc prior to version 2.26, the lookup
+ // failure could be caused by caching a stale /etc/resolv.conf.
+ // We need to call libc::res_init() to clear the cache. But we
+ // shouldn't call it in on any other platform, because other
+ // res_init implementations aren't thread-safe. See
+ // https://github.com/rust-lang/rust/issues/41570 and
+ // https://github.com/rust-lang/rust/issues/43592.
+ use sys::net::res_init_if_glibc_before_2_26;
+ let _ = res_init_if_glibc_before_2_26();
+ Err(e)
+ },
+ // the cfg is needed here to avoid an "unreachable pattern" warning
+ #[cfg(not(unix))]
+ Err(e) => Err(e),
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// TCP streams
+////////////////////////////////////////////////////////////////////////////////
+
+pub struct TcpStream {
+ inner: Socket,
+}
+
+impl TcpStream {
+ pub fn connect(addr: &SocketAddr) -> io::Result<TcpStream> {
+ init();
+
+ let sock = Socket::new(addr, c::SOCK_STREAM)?;
+
+ let (addrp, len) = addr.into_inner();
+ cvt_r(|| unsafe { c::connect(*sock.as_inner(), addrp, len) })?;
+ Ok(TcpStream { inner: sock })
+ }
+
+ pub fn connect_timeout(addr: &SocketAddr, timeout: Duration) -> io::Result<TcpStream> {
+ init();
+
+ let sock = Socket::new(addr, c::SOCK_STREAM)?;
+ sock.connect_timeout(addr, timeout)?;
+ Ok(TcpStream { inner: sock })
+ }
+
+ pub fn socket(&self) -> &Socket { &self.inner }
+
+ pub fn into_socket(self) -> Socket { self.inner }
+
+ pub fn set_read_timeout(&self, dur: Option<Duration>) -> io::Result<()> {
+ self.inner.set_timeout(dur, SO_RCVTIMEO)
+ }
+
+ pub fn set_write_timeout(&self, dur: Option<Duration>) -> io::Result<()> {
+ self.inner.set_timeout(dur, SO_SNDTIMEO)
+ }
+
+ pub fn read_timeout(&self) -> io::Result<Option<Duration>> {
+ self.inner.timeout(SO_RCVTIMEO)
+ }
+
+ pub fn write_timeout(&self) -> io::Result<Option<Duration>> {
+ self.inner.timeout(SO_SNDTIMEO)
+ }
+
+ pub fn peek(&self, buf: &mut [u8]) -> io::Result<usize> {
+ self.inner.peek(buf)
+ }
+
+ pub fn read(&self, buf: &mut [u8]) -> io::Result<usize> {
+ self.inner.read(buf)
+ }
+
+ pub fn write(&self, buf: &[u8]) -> io::Result<usize> {
+ let len = cmp::min(buf.len(), <wrlen_t>::max_value() as usize) as wrlen_t;
+ let ret = cvt(unsafe {
+ c::send(*self.inner.as_inner(),
+ buf.as_ptr() as *const c_void,
+ len,
+ MSG_NOSIGNAL)
+ })?;
+ Ok(ret as usize)
+ }
+
+ pub fn peer_addr(&self) -> io::Result<SocketAddr> {
+ sockname(|buf, len| unsafe {
+ c::getpeername(*self.inner.as_inner(), buf, len)
+ })
+ }
+
+ pub fn socket_addr(&self) -> io::Result<SocketAddr> {
+ sockname(|buf, len| unsafe {
+ c::getsockname(*self.inner.as_inner(), buf, len)
+ })
+ }
+
+ pub fn shutdown(&self, how: Shutdown) -> io::Result<()> {
+ self.inner.shutdown(how)
+ }
+
+ pub fn duplicate(&self) -> io::Result<TcpStream> {
+ self.inner.duplicate().map(|s| TcpStream { inner: s })
+ }
+
+ pub fn set_nodelay(&self, nodelay: bool) -> io::Result<()> {
+ self.inner.set_nodelay(nodelay)
+ }
+
+ pub fn nodelay(&self) -> io::Result<bool> {
+ self.inner.nodelay()
+ }
+
+ pub fn set_ttl(&self, ttl: u32) -> io::Result<()> {
+ setsockopt(&self.inner, c::IPPROTO_IP, c::IP_TTL, ttl as c_int)
+ }
+
+ pub fn ttl(&self) -> io::Result<u32> {
+ let raw: c_int = getsockopt(&self.inner, c::IPPROTO_IP, c::IP_TTL)?;
+ Ok(raw as u32)
+ }
+
+ pub fn take_error(&self) -> io::Result<Option<io::Error>> {
+ self.inner.take_error()
+ }
+
+ pub fn set_nonblocking(&self, nonblocking: bool) -> io::Result<()> {
+ self.inner.set_nonblocking(nonblocking)
+ }
+}
+
+impl FromInner<Socket> for TcpStream {
+ fn from_inner(socket: Socket) -> TcpStream {
+ TcpStream { inner: socket }
+ }
+}
+
+impl fmt::Debug for TcpStream {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let mut res = f.debug_struct("TcpStream");
+
+ if let Ok(addr) = self.socket_addr() {
+ res.field("addr", &addr);
+ }
+
+ if let Ok(peer) = self.peer_addr() {
+ res.field("peer", &peer);
+ }
+
+ let name = if cfg!(windows) {"socket"} else {"fd"};
+ res.field(name, &self.inner.as_inner())
+ .finish()
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// TCP listeners
+////////////////////////////////////////////////////////////////////////////////
+
+pub struct TcpListener {
+ inner: Socket,
+}
+
+impl TcpListener {
+ pub fn bind(addr: &SocketAddr) -> io::Result<TcpListener> {
+ init();
+
+ let sock = Socket::new(addr, c::SOCK_STREAM)?;
+
+ // On platforms with Berkeley-derived sockets, this allows
+ // to quickly rebind a socket, without needing to wait for
+ // the OS to clean up the previous one.
+ if !cfg!(windows) {
+ setsockopt(&sock, c::SOL_SOCKET, c::SO_REUSEADDR,
+ 1 as c_int)?;
+ }
+
+ // Bind our new socket
+ let (addrp, len) = addr.into_inner();
+ cvt(unsafe { c::bind(*sock.as_inner(), addrp, len as _) })?;
+
+ // Start listening
+ cvt(unsafe { c::listen(*sock.as_inner(), 128) })?;
+ Ok(TcpListener { inner: sock })
+ }
+
+ pub fn socket(&self) -> &Socket { &self.inner }
+
+ pub fn into_socket(self) -> Socket { self.inner }
+
+ pub fn socket_addr(&self) -> io::Result<SocketAddr> {
+ sockname(|buf, len| unsafe {
+ c::getsockname(*self.inner.as_inner(), buf, len)
+ })
+ }
+
+ pub fn accept(&self) -> io::Result<(TcpStream, SocketAddr)> {
+ let mut storage: c::sockaddr_storage = unsafe { mem::zeroed() };
+ let mut len = mem::size_of_val(&storage) as c::socklen_t;
+ let sock = self.inner.accept(&mut storage as *mut _ as *mut _,
+ &mut len)?;
+ let addr = sockaddr_to_addr(&storage, len as usize)?;
+ Ok((TcpStream { inner: sock, }, addr))
+ }
+
+ pub fn duplicate(&self) -> io::Result<TcpListener> {
+ self.inner.duplicate().map(|s| TcpListener { inner: s })
+ }
+
+ pub fn set_ttl(&self, ttl: u32) -> io::Result<()> {
+ setsockopt(&self.inner, c::IPPROTO_IP, c::IP_TTL, ttl as c_int)
+ }
+
+ pub fn ttl(&self) -> io::Result<u32> {
+ let raw: c_int = getsockopt(&self.inner, c::IPPROTO_IP, c::IP_TTL)?;
+ Ok(raw as u32)
+ }
+
+ pub fn set_only_v6(&self, only_v6: bool) -> io::Result<()> {
+ setsockopt(&self.inner, c::IPPROTO_IPV6, IPV6_V6ONLY, only_v6 as c_int)
+ }
+
+ pub fn only_v6(&self) -> io::Result<bool> {
+ let raw: c_int = getsockopt(&self.inner, c::IPPROTO_IPV6, IPV6_V6ONLY)?;
+ Ok(raw != 0)
+ }
+
+ pub fn take_error(&self) -> io::Result<Option<io::Error>> {
+ self.inner.take_error()
+ }
+
+ pub fn set_nonblocking(&self, nonblocking: bool) -> io::Result<()> {
+ self.inner.set_nonblocking(nonblocking)
+ }
+}
+
+impl FromInner<Socket> for TcpListener {
+ fn from_inner(socket: Socket) -> TcpListener {
+ TcpListener { inner: socket }
+ }
+}
+
+impl fmt::Debug for TcpListener {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let mut res = f.debug_struct("TcpListener");
+
+ if let Ok(addr) = self.socket_addr() {
+ res.field("addr", &addr);
+ }
+
+ let name = if cfg!(windows) {"socket"} else {"fd"};
+ res.field(name, &self.inner.as_inner())
+ .finish()
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// UDP
+////////////////////////////////////////////////////////////////////////////////
+
+pub struct UdpSocket {
+ inner: Socket,
+}
+
+impl UdpSocket {
+ pub fn bind(addr: &SocketAddr) -> io::Result<UdpSocket> {
+ init();
+
+ let sock = Socket::new(addr, c::SOCK_DGRAM)?;
+ let (addrp, len) = addr.into_inner();
+ cvt(unsafe { c::bind(*sock.as_inner(), addrp, len as _) })?;
+ Ok(UdpSocket { inner: sock })
+ }
+
+ pub fn socket(&self) -> &Socket { &self.inner }
+
+ pub fn into_socket(self) -> Socket { self.inner }
+
+ pub fn socket_addr(&self) -> io::Result<SocketAddr> {
+ sockname(|buf, len| unsafe {
+ c::getsockname(*self.inner.as_inner(), buf, len)
+ })
+ }
+
+ pub fn recv_from(&self, buf: &mut [u8]) -> io::Result<(usize, SocketAddr)> {
+ self.inner.recv_from(buf)
+ }
+
+ pub fn peek_from(&self, buf: &mut [u8]) -> io::Result<(usize, SocketAddr)> {
+ self.inner.peek_from(buf)
+ }
+
+ pub fn send_to(&self, buf: &[u8], dst: &SocketAddr) -> io::Result<usize> {
+ let len = cmp::min(buf.len(), <wrlen_t>::max_value() as usize) as wrlen_t;
+ let (dstp, dstlen) = dst.into_inner();
+ let ret = cvt(unsafe {
+ c::sendto(*self.inner.as_inner(),
+ buf.as_ptr() as *const c_void, len,
+ MSG_NOSIGNAL, dstp, dstlen)
+ })?;
+ Ok(ret as usize)
+ }
+
+ pub fn duplicate(&self) -> io::Result<UdpSocket> {
+ self.inner.duplicate().map(|s| UdpSocket { inner: s })
+ }
+
+ pub fn set_read_timeout(&self, dur: Option<Duration>) -> io::Result<()> {
+ self.inner.set_timeout(dur, SO_RCVTIMEO)
+ }
+
+ pub fn set_write_timeout(&self, dur: Option<Duration>) -> io::Result<()> {
+ self.inner.set_timeout(dur, SO_SNDTIMEO)
+ }
+
+ pub fn read_timeout(&self) -> io::Result<Option<Duration>> {
+ self.inner.timeout(SO_RCVTIMEO)
+ }
+
+ pub fn write_timeout(&self) -> io::Result<Option<Duration>> {
+ self.inner.timeout(SO_SNDTIMEO)
+ }
+
+ pub fn set_broadcast(&self, broadcast: bool) -> io::Result<()> {
+ setsockopt(&self.inner, c::SOL_SOCKET, SO_BROADCAST, broadcast as c_int)
+ }
+
+ pub fn broadcast(&self) -> io::Result<bool> {
+ let raw: c_int = getsockopt(&self.inner, c::SOL_SOCKET, SO_BROADCAST)?;
+ Ok(raw != 0)
+ }
+
+ pub fn set_multicast_loop_v4(&self, multicast_loop_v4: bool) -> io::Result<()> {
+ setsockopt(&self.inner, c::IPPROTO_IP, c::IP_MULTICAST_LOOP, multicast_loop_v4 as c_int)
+ }
+
+ pub fn multicast_loop_v4(&self) -> io::Result<bool> {
+ let raw: c_int = getsockopt(&self.inner, c::IPPROTO_IP, c::IP_MULTICAST_LOOP)?;
+ Ok(raw != 0)
+ }
+
+ pub fn set_multicast_ttl_v4(&self, multicast_ttl_v4: u32) -> io::Result<()> {
+ setsockopt(&self.inner, c::IPPROTO_IP, c::IP_MULTICAST_TTL, multicast_ttl_v4 as c_int)
+ }
+
+ pub fn multicast_ttl_v4(&self) -> io::Result<u32> {
+ let raw: c_int = getsockopt(&self.inner, c::IPPROTO_IP, c::IP_MULTICAST_TTL)?;
+ Ok(raw as u32)
+ }
+
+ pub fn set_multicast_loop_v6(&self, multicast_loop_v6: bool) -> io::Result<()> {
+ setsockopt(&self.inner, c::IPPROTO_IPV6, IPV6_MULTICAST_LOOP, multicast_loop_v6 as c_int)
+ }
+
+ pub fn multicast_loop_v6(&self) -> io::Result<bool> {
+ let raw: c_int = getsockopt(&self.inner, c::IPPROTO_IPV6, IPV6_MULTICAST_LOOP)?;
+ Ok(raw != 0)
+ }
+
+ pub fn join_multicast_v4(&self, multiaddr: &Ipv4Addr, interface: &Ipv4Addr)
+ -> io::Result<()> {
+ let mreq = c::ip_mreq {
+ imr_multiaddr: *multiaddr.as_inner(),
+ imr_interface: *interface.as_inner(),
+ };
+ setsockopt(&self.inner, c::IPPROTO_IP, c::IP_ADD_MEMBERSHIP, mreq)
+ }
+
+ pub fn join_multicast_v6(&self, multiaddr: &Ipv6Addr, interface: u32)
+ -> io::Result<()> {
+ let mreq = c::ipv6_mreq {
+ ipv6mr_multiaddr: *multiaddr.as_inner(),
+ ipv6mr_interface: to_ipv6mr_interface(interface),
+ };
+ setsockopt(&self.inner, c::IPPROTO_IPV6, IPV6_ADD_MEMBERSHIP, mreq)
+ }
+
+ pub fn leave_multicast_v4(&self, multiaddr: &Ipv4Addr, interface: &Ipv4Addr)
+ -> io::Result<()> {
+ let mreq = c::ip_mreq {
+ imr_multiaddr: *multiaddr.as_inner(),
+ imr_interface: *interface.as_inner(),
+ };
+ setsockopt(&self.inner, c::IPPROTO_IP, c::IP_DROP_MEMBERSHIP, mreq)
+ }
+
+ pub fn leave_multicast_v6(&self, multiaddr: &Ipv6Addr, interface: u32)
+ -> io::Result<()> {
+ let mreq = c::ipv6_mreq {
+ ipv6mr_multiaddr: *multiaddr.as_inner(),
+ ipv6mr_interface: to_ipv6mr_interface(interface),
+ };
+ setsockopt(&self.inner, c::IPPROTO_IPV6, IPV6_DROP_MEMBERSHIP, mreq)
+ }
+
+ pub fn set_ttl(&self, ttl: u32) -> io::Result<()> {
+ setsockopt(&self.inner, c::IPPROTO_IP, c::IP_TTL, ttl as c_int)
+ }
+
+ pub fn ttl(&self) -> io::Result<u32> {
+ let raw: c_int = getsockopt(&self.inner, c::IPPROTO_IP, c::IP_TTL)?;
+ Ok(raw as u32)
+ }
+
+ pub fn take_error(&self) -> io::Result<Option<io::Error>> {
+ self.inner.take_error()
+ }
+
+ pub fn set_nonblocking(&self, nonblocking: bool) -> io::Result<()> {
+ self.inner.set_nonblocking(nonblocking)
+ }
+
+ pub fn recv(&self, buf: &mut [u8]) -> io::Result<usize> {
+ self.inner.read(buf)
+ }
+
+ pub fn peek(&self, buf: &mut [u8]) -> io::Result<usize> {
+ self.inner.peek(buf)
+ }
+
+ pub fn send(&self, buf: &[u8]) -> io::Result<usize> {
+ let len = cmp::min(buf.len(), <wrlen_t>::max_value() as usize) as wrlen_t;
+ let ret = cvt(unsafe {
+ c::send(*self.inner.as_inner(),
+ buf.as_ptr() as *const c_void,
+ len,
+ MSG_NOSIGNAL)
+ })?;
+ Ok(ret as usize)
+ }
+
+ pub fn connect(&self, addr: &SocketAddr) -> io::Result<()> {
+ let (addrp, len) = addr.into_inner();
+ cvt_r(|| unsafe { c::connect(*self.inner.as_inner(), addrp, len) }).map(|_| ())
+ }
+}
+
+impl FromInner<Socket> for UdpSocket {
+ fn from_inner(socket: Socket) -> UdpSocket {
+ UdpSocket { inner: socket }
+ }
+}
+
+impl fmt::Debug for UdpSocket {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let mut res = f.debug_struct("UdpSocket");
+
+ if let Ok(addr) = self.socket_addr() {
+ res.field("addr", &addr);
+ }
+
+ let name = if cfg!(windows) {"socket"} else {"fd"};
+ res.field(name, &self.inner.as_inner())
+ .finish()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use collections::HashMap;
+
+ #[test]
+ fn no_lookup_host_duplicates() {
+ let mut addrs = HashMap::new();
+ let lh = match lookup_host("localhost") {
+ Ok(lh) => lh,
+ Err(e) => panic!("couldn't resolve `localhost': {}", e)
+ };
+ let _na = lh.map(|sa| *addrs.entry(sa).or_insert(0) += 1).count();
+ assert!(addrs.values().filter(|&&v| v > 1).count() == 0);
+ }
+}
diff --git a/ctr-std/src/sys_common/poison.rs b/ctr-std/src/sys_common/poison.rs
index bdc727f..934ac3e 100644
--- a/ctr-std/src/sys_common/poison.rs
+++ b/ctr-std/src/sys_common/poison.rs
@@ -60,17 +60,53 @@ pub struct Guard {
/// A type of error which can be returned whenever a lock is acquired.
///
-/// Both Mutexes and RwLocks are poisoned whenever a thread fails while the lock
+/// Both [`Mutex`]es and [`RwLock`]s are poisoned whenever a thread fails while the lock
/// is held. The precise semantics for when a lock is poisoned is documented on
/// each lock, but once a lock is poisoned then all future acquisitions will
/// return this error.
+///
+/// # Examples
+///
+/// ```
+/// use std::sync::{Arc, Mutex};
+/// use std::thread;
+///
+/// let mutex = Arc::new(Mutex::new(1));
+///
+/// // poison the mutex
+/// let c_mutex = mutex.clone();
+/// let _ = thread::spawn(move || {
+/// let mut data = c_mutex.lock().unwrap();
+/// *data = 2;
+/// panic!();
+/// }).join();
+///
+/// match mutex.lock() {
+/// Ok(_) => unreachable!(),
+/// Err(p_err) => {
+/// let data = p_err.get_ref();
+/// println!("recovered: {}", data);
+/// }
+/// };
+/// ```
+///
+/// [`Mutex`]: ../../std/sync/struct.Mutex.html
+/// [`RwLock`]: ../../std/sync/struct.RwLock.html
#[stable(feature = "rust1", since = "1.0.0")]
pub struct PoisonError<T> {
guard: T,
}
-/// An enumeration of possible errors which can occur while calling the
-/// `try_lock` method.
+/// An enumeration of possible errors associated with a [`TryLockResult`] which
+/// can occur while trying to aquire a lock, from the [`try_lock`] method on a
+/// [`Mutex`] or the [`try_read`] and [`try_write`] methods on an [`RwLock`].
+///
+/// [`Mutex`]: struct.Mutex.html
+/// [`RwLock`]: struct.RwLock.html
+/// [`TryLockResult`]: type.TryLockResult.html
+/// [`try_lock`]: struct.Mutex.html#method.try_lock
+/// [`try_read`]: struct.RwLock.html#method.try_read
+/// [`try_write`]: struct.RwLock.html#method.try_write
#[stable(feature = "rust1", since = "1.0.0")]
pub enum TryLockError<T> {
/// The lock could not be acquired because another thread failed while holding
@@ -85,19 +121,26 @@ pub enum TryLockError<T> {
/// A type alias for the result of a lock method which can be poisoned.
///
-/// The `Ok` variant of this result indicates that the primitive was not
-/// poisoned, and the `Guard` is contained within. The `Err` variant indicates
-/// that the primitive was poisoned. Note that the `Err` variant *also* carries
-/// the associated guard, and it can be acquired through the `into_inner`
+/// The [`Ok`] variant of this result indicates that the primitive was not
+/// poisoned, and the `Guard` is contained within. The [`Err`] variant indicates
+/// that the primitive was poisoned. Note that the [`Err`] variant *also* carries
+/// the associated guard, and it can be acquired through the [`into_inner`]
/// method.
+///
+/// [`Ok`]: ../../std/result/enum.Result.html#variant.Ok
+/// [`Err`]: ../../std/result/enum.Result.html#variant.Err
+/// [`into_inner`]: ../../std/sync/struct.PoisonError.html#method.into_inner
#[stable(feature = "rust1", since = "1.0.0")]
pub type LockResult<Guard> = Result<Guard, PoisonError<Guard>>;
/// A type alias for the result of a nonblocking locking method.
///
-/// For more information, see `LockResult`. A `TryLockResult` doesn't
-/// necessarily hold the associated guard in the `Err` type as the lock may not
+/// For more information, see [`LockResult`]. A `TryLockResult` doesn't
+/// necessarily hold the associated guard in the [`Err`] type as the lock may not
/// have been acquired for other reasons.
+///
+/// [`LockResult`]: ../../std/sync/type.LockResult.html
+/// [`Err`]: ../../std/result/enum.Result.html#variant.Err
#[stable(feature = "rust1", since = "1.0.0")]
pub type TryLockResult<Guard> = Result<Guard, TryLockError<Guard>>;
@@ -124,6 +167,11 @@ impl<T> Error for PoisonError<T> {
impl<T> PoisonError<T> {
/// Creates a `PoisonError`.
+ ///
+ /// This is generally created by methods like [`Mutex::lock`] or [`RwLock::read`].
+ ///
+ /// [`Mutex::lock`]: ../../std/sync/struct.Mutex.html#method.lock
+ /// [`RwLock::read`]: ../../std/sync/struct.RwLock.html#method.read
#[stable(feature = "sync_poison", since = "1.2.0")]
pub fn new(guard: T) -> PoisonError<T> {
PoisonError { guard: guard }
@@ -131,6 +179,28 @@ impl<T> PoisonError<T> {
/// Consumes this error indicating that a lock is poisoned, returning the
/// underlying guard to allow access regardless.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::collections::HashSet;
+ /// use std::sync::{Arc, Mutex};
+ /// use std::thread;
+ ///
+ /// let mutex = Arc::new(Mutex::new(HashSet::new()));
+ ///
+ /// // poison the mutex
+ /// let c_mutex = mutex.clone();
+ /// let _ = thread::spawn(move || {
+ /// let mut data = c_mutex.lock().unwrap();
+ /// data.insert(10);
+ /// panic!();
+ /// }).join();
+ ///
+ /// let p_err = mutex.lock().unwrap_err();
+ /// let data = p_err.into_inner();
+ /// println!("recovered {} items", data.len());
+ /// ```
#[stable(feature = "sync_poison", since = "1.2.0")]
pub fn into_inner(self) -> T { self.guard }
diff --git a/ctr-std/src/sys_common/process.rs b/ctr-std/src/sys_common/process.rs
new file mode 100644
index 0000000..fd1a5fd
--- /dev/null
+++ b/ctr-std/src/sys_common/process.rs
@@ -0,0 +1,124 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![allow(dead_code)]
+#![unstable(feature = "process_internals", issue = "0")]
+
+use ffi::{OsStr, OsString};
+use env;
+use collections::BTreeMap;
+use alloc::borrow::Borrow;
+
+pub trait EnvKey:
+ From<OsString> + Into<OsString> +
+ Borrow<OsStr> + Borrow<Self> + AsRef<OsStr> +
+ Ord + Clone {}
+
+// Implement a case-sensitive environment variable key
+#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
+pub struct DefaultEnvKey(OsString);
+
+impl From<OsString> for DefaultEnvKey {
+ fn from(k: OsString) -> Self { DefaultEnvKey(k) }
+}
+
+impl From<DefaultEnvKey> for OsString {
+ fn from(k: DefaultEnvKey) -> Self { k.0 }
+}
+
+impl Borrow<OsStr> for DefaultEnvKey {
+ fn borrow(&self) -> &OsStr { &self.0 }
+}
+
+impl AsRef<OsStr> for DefaultEnvKey {
+ fn as_ref(&self) -> &OsStr { &self.0 }
+}
+
+impl EnvKey for DefaultEnvKey {}
+
+// Stores a set of changes to an environment
+#[derive(Clone, Debug)]
+pub struct CommandEnv<K> {
+ clear: bool,
+ vars: BTreeMap<K, Option<OsString>>
+}
+
+impl<K: EnvKey> Default for CommandEnv<K> {
+ fn default() -> Self {
+ CommandEnv {
+ clear: false,
+ vars: Default::default()
+ }
+ }
+}
+
+impl<K: EnvKey> CommandEnv<K> {
+ // Capture the current environment with these changes applied
+ pub fn capture(&self) -> BTreeMap<K, OsString> {
+ let mut result = BTreeMap::<K, OsString>::new();
+ if !self.clear {
+ for (k, v) in env::vars_os() {
+ result.insert(k.into(), v);
+ }
+ }
+ for (k, maybe_v) in &self.vars {
+ if let &Some(ref v) = maybe_v {
+ result.insert(k.clone(), v.clone());
+ } else {
+ result.remove(k);
+ }
+ }
+ result
+ }
+
+ // Apply these changes directly to the current environment
+ pub fn apply(&self) {
+ if self.clear {
+ for (k, _) in env::vars_os() {
+ env::remove_var(k);
+ }
+ }
+ for (key, maybe_val) in self.vars.iter() {
+ if let &Some(ref val) = maybe_val {
+ env::set_var(key, val);
+ } else {
+ env::remove_var(key);
+ }
+ }
+ }
+
+ pub fn is_unchanged(&self) -> bool {
+ !self.clear && self.vars.is_empty()
+ }
+
+ pub fn capture_if_changed(&self) -> Option<BTreeMap<K, OsString>> {
+ if self.is_unchanged() {
+ None
+ } else {
+ Some(self.capture())
+ }
+ }
+
+ // The following functions build up changes
+ pub fn set(&mut self, key: &OsStr, value: &OsStr) {
+ self.vars.insert(key.to_owned().into(), Some(value.to_owned()));
+ }
+ pub fn remove(&mut self, key: &OsStr) {
+ if self.clear {
+ self.vars.remove(key);
+ } else {
+ self.vars.insert(key.to_owned().into(), None);
+ }
+ }
+ pub fn clear(&mut self) {
+ self.clear = true;
+ self.vars.clear();
+ }
+}
diff --git a/ctr-std/src/sys_common/remutex.rs b/ctr-std/src/sys_common/remutex.rs
index 4d0407c..ce43ec6 100644
--- a/ctr-std/src/sys_common/remutex.rs
+++ b/ctr-std/src/sys_common/remutex.rs
@@ -116,11 +116,18 @@ impl<T> Drop for ReentrantMutex<T> {
impl<T: fmt::Debug + 'static> fmt::Debug for ReentrantMutex<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.try_lock() {
- Ok(guard) => write!(f, "ReentrantMutex {{ data: {:?} }}", &*guard),
+ Ok(guard) => f.debug_struct("ReentrantMutex").field("data", &*guard).finish(),
Err(TryLockError::Poisoned(err)) => {
- write!(f, "ReentrantMutex {{ data: Poisoned({:?}) }}", &**err.get_ref())
+ f.debug_struct("ReentrantMutex").field("data", &**err.get_ref()).finish()
},
- Err(TryLockError::WouldBlock) => write!(f, "ReentrantMutex {{ <locked> }}")
+ Err(TryLockError::WouldBlock) => {
+ struct LockedPlaceholder;
+ impl fmt::Debug for LockedPlaceholder {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str("<locked>") }
+ }
+
+ f.debug_struct("ReentrantMutex").field("data", &LockedPlaceholder).finish()
+ }
}
}
}
diff --git a/ctr-std/src/sys_common/thread.rs b/ctr-std/src/sys_common/thread.rs
index bb6baae..f1379b6 100644
--- a/ctr-std/src/sys_common/thread.rs
+++ b/ctr-std/src/sys_common/thread.rs
@@ -9,14 +9,32 @@
// except according to those terms.
use alloc::boxed::FnBox;
-use libc;
-//use sys::stack_overflow;
+use env;
+use sync::atomic::{self, Ordering};
+use sys::stack_overflow;
+use sys::thread as imp;
-pub unsafe fn start_thread(main: *mut libc::c_void) {
+#[allow(dead_code)]
+pub unsafe fn start_thread(main: *mut u8) {
// Next, set up our stack overflow handler which may get triggered if we run
// out of stack.
- // let _handler = stack_overflow::Handler::new();
+ let _handler = stack_overflow::Handler::new();
// Finally, let's run some code.
Box::from_raw(main as *mut Box<FnBox()>)()
}
+
+pub fn min_stack() -> usize {
+ static MIN: atomic::AtomicUsize = atomic::AtomicUsize::new(0);
+ match MIN.load(Ordering::SeqCst) {
+ 0 => {}
+ n => return n - 1,
+ }
+ let amt = env::var("RUST_MIN_STACK").ok().and_then(|s| s.parse().ok());
+ let amt = amt.unwrap_or(imp::DEFAULT_MIN_STACK_SIZE);
+
+ // 0 is our sentinel value, so ensure that we'll never see 0 after
+ // initialization has run
+ MIN.store(amt + 1, Ordering::SeqCst);
+ amt
+}
diff --git a/ctr-std/src/sys_common/thread_info.rs b/ctr-std/src/sys_common/thread_info.rs
index 2abb8af..7970042 100644
--- a/ctr-std/src/sys_common/thread_info.rs
+++ b/ctr-std/src/sys_common/thread_info.rs
@@ -45,7 +45,7 @@ pub fn stack_guard() -> Option<usize> {
pub fn set(stack_guard: Option<usize>, thread: Thread) {
THREAD_INFO.with(|c| assert!(c.borrow().is_none()));
THREAD_INFO.with(move |c| *c.borrow_mut() = Some(ThreadInfo{
- stack_guard: stack_guard,
- thread: thread,
+ stack_guard,
+ thread,
}));
}
diff --git a/ctr-std/src/sys_common/thread_local.rs b/ctr-std/src/sys_common/thread_local.rs
index 25a9d57..a4aa3d9 100644
--- a/ctr-std/src/sys_common/thread_local.rs
+++ b/ctr-std/src/sys_common/thread_local.rs
@@ -33,7 +33,7 @@
//! Using a dynamically allocated TLS key. Note that this key can be shared
//! among many threads via an `Arc`.
//!
-//! ```rust,ignore
+//! ```ignore (cannot-doctest-private-modules)
//! let key = Key::new(None);
//! assert!(key.get().is_null());
//! key.set(1 as *mut u8);
@@ -45,7 +45,7 @@
//! Sometimes a statically allocated key is either required or easier to work
//! with, however.
//!
-//! ```rust,ignore
+//! ```ignore (cannot-doctest-private-modules)
//! static KEY: StaticKey = INIT;
//!
//! unsafe {
@@ -58,9 +58,10 @@
#![unstable(feature = "thread_local_internals", issue = "0")]
#![allow(dead_code)] // sys isn't exported yet
+use ptr;
use sync::atomic::{self, AtomicUsize, Ordering};
-
use sys::thread_local as imp;
+use sys_common::mutex::Mutex;
/// A type for TLS keys that are statically allocated.
///
@@ -73,7 +74,7 @@ use sys::thread_local as imp;
///
/// # Examples
///
-/// ```ignore
+/// ```ignore (cannot-doctest-private-modules)
/// use tls::os::{StaticKey, INIT};
///
/// static KEY: StaticKey = INIT;
@@ -104,7 +105,7 @@ pub struct StaticKey {
///
/// # Examples
///
-/// ```rust,ignore
+/// ```ignore (cannot-doctest-private-modules)
/// use tls::os::Key;
///
/// let key = Key::new(None);
@@ -127,7 +128,7 @@ impl StaticKey {
pub const fn new(dtor: Option<unsafe extern fn(*mut u8)>) -> StaticKey {
StaticKey {
key: atomic::AtomicUsize::new(0),
- dtor: dtor
+ dtor,
}
}
@@ -145,20 +146,6 @@ impl StaticKey {
#[inline]
pub unsafe fn set(&self, val: *mut u8) { imp::set(self.key(), val) }
- /// Deallocates this OS TLS key.
- ///
- /// This function is unsafe as there is no guarantee that the key is not
- /// currently in use by other threads or will not ever be used again.
- ///
- /// Note that this does *not* run the user-provided destructor if one was
- /// specified at definition time. Doing so must be done manually.
- pub unsafe fn destroy(&self) {
- match self.key.swap(0, Ordering::SeqCst) {
- 0 => {}
- n => { imp::destroy(n as imp::Key) }
- }
- }
-
#[inline]
unsafe fn key(&self) -> imp::Key {
match self.key.load(Ordering::Relaxed) {
@@ -168,6 +155,24 @@ impl StaticKey {
}
unsafe fn lazy_init(&self) -> usize {
+ // Currently the Windows implementation of TLS is pretty hairy, and
+ // it greatly simplifies creation if we just synchronize everything.
+ //
+ // Additionally a 0-index of a tls key hasn't been seen on windows, so
+ // we just simplify the whole branch.
+ if imp::requires_synchronized_create() {
+ static INIT_LOCK: Mutex = Mutex::new();
+ INIT_LOCK.lock();
+ let mut key = self.key.load(Ordering::SeqCst);
+ if key == 0 {
+ key = imp::create(self.dtor) as usize;
+ self.key.store(key, Ordering::SeqCst);
+ }
+ INIT_LOCK.unlock();
+ assert!(key != 0);
+ return key
+ }
+
// POSIX allows the key created here to be 0, but the compare_and_swap
// below relies on using 0 as a sentinel value to check who won the
// race to set the shared TLS key. As far as I know, there is no
@@ -227,7 +232,42 @@ impl Key {
impl Drop for Key {
fn drop(&mut self) {
- unsafe { imp::destroy(self.key) }
+ // Right now Windows doesn't support TLS key destruction, but this also
+ // isn't used anywhere other than tests, so just leak the TLS key.
+ // unsafe { imp::destroy(self.key) }
+ }
+}
+
+pub unsafe fn register_dtor_fallback(t: *mut u8,
+ dtor: unsafe extern fn(*mut u8)) {
+ // The fallback implementation uses a vanilla OS-based TLS key to track
+ // the list of destructors that need to be run for this thread. The key
+ // then has its own destructor which runs all the other destructors.
+ //
+ // The destructor for DTORS is a little special in that it has a `while`
+ // loop to continuously drain the list of registered destructors. It
+ // *should* be the case that this loop always terminates because we
+ // provide the guarantee that a TLS key cannot be set after it is
+ // flagged for destruction.
+
+ static DTORS: StaticKey = StaticKey::new(Some(run_dtors));
+ type List = Vec<(*mut u8, unsafe extern fn(*mut u8))>;
+ if DTORS.get().is_null() {
+ let v: Box<List> = box Vec::new();
+ DTORS.set(Box::into_raw(v) as *mut u8);
+ }
+ let list: &mut List = &mut *(DTORS.get() as *mut List);
+ list.push((t, dtor));
+
+ unsafe extern fn run_dtors(mut ptr: *mut u8) {
+ while !ptr.is_null() {
+ let list: Box<List> = Box::from_raw(ptr as *mut List);
+ for (ptr, dtor) in list.into_iter() {
+ dtor(ptr);
+ }
+ ptr = DTORS.get();
+ DTORS.set(ptr::null_mut());
+ }
}
}
diff --git a/ctr-std/src/sys_common/util.rs b/ctr-std/src/sys_common/util.rs
index aad0680..a391c7c 100644
--- a/ctr-std/src/sys_common/util.rs
+++ b/ctr-std/src/sys_common/util.rs
@@ -10,29 +10,8 @@
use fmt;
use io::prelude::*;
-use sync::atomic::{self, Ordering};
use sys::stdio::Stderr;
-
-pub fn min_stack() -> usize {
- static MIN: atomic::AtomicUsize = atomic::AtomicUsize::new(0);
- match MIN.load(Ordering::SeqCst) {
- 0 => {}
- n => return n - 1,
- }
-
- // NOTE: We don't have env variable support on the 3DS so let's just use the
- // default minimum
-
- // let amt = env::var("RUST_MIN_STACK").ok().and_then(|s| s.parse().ok());
- // let amt = amt.unwrap_or(2 * 1024 * 1024);
-
- let amt = 2 * 1024 * 1024;
-
- // 0 is our sentinel value, so ensure that we'll never see 0 after
- // initialization has run
- MIN.store(amt + 1, Ordering::SeqCst);
- amt
-}
+use thread;
pub fn dumb_print(args: fmt::Arguments) {
let _ = Stderr::new().map(|mut stderr| stderr.write_fmt(args));
@@ -47,3 +26,9 @@ pub fn abort(args: fmt::Arguments) -> ! {
dumb_print(format_args!("fatal runtime error: {}\n", args));
unsafe { ::sys::abort_internal(); }
}
+
+#[allow(dead_code)] // stack overflow detection not enabled on all platforms
+pub unsafe fn report_overflow() {
+ dumb_print(format_args!("\nthread '{}' has overflowed its stack\n",
+ thread::current().name().unwrap_or("<unknown>")));
+}
diff --git a/ctr-std/src/sys_common/wtf8.rs b/ctr-std/src/sys_common/wtf8.rs
new file mode 100644
index 0000000..46d554d
--- /dev/null
+++ b/ctr-std/src/sys_common/wtf8.rs
@@ -0,0 +1,1284 @@
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Implementation of [the WTF-8 encoding](https://simonsapin.github.io/wtf-8/).
+//!
+//! This library uses Rust’s type system to maintain
+//! [well-formedness](https://simonsapin.github.io/wtf-8/#well-formed),
+//! like the `String` and `&str` types do for UTF-8.
+//!
+//! Since [WTF-8 must not be used
+//! for interchange](https://simonsapin.github.io/wtf-8/#intended-audience),
+//! this library deliberately does not provide access to the underlying bytes
+//! of WTF-8 strings,
+//! nor can it decode WTF-8 from arbitrary bytes.
+//! WTF-8 strings can be obtained from UTF-8, UTF-16, or code points.
+
+// this module is imported from @SimonSapin's repo and has tons of dead code on
+// unix (it's mostly used on windows), so don't worry about dead code here.
+#![allow(dead_code)]
+
+use core::str::next_code_point;
+
+use ascii::*;
+use borrow::Cow;
+use char;
+use fmt;
+use hash::{Hash, Hasher};
+use iter::FromIterator;
+use mem;
+use ops;
+use rc::Rc;
+use slice;
+use str;
+use sync::Arc;
+use sys_common::AsInner;
+
+const UTF8_REPLACEMENT_CHARACTER: &'static str = "\u{FFFD}";
+
+/// A Unicode code point: from U+0000 to U+10FFFF.
+///
+/// Compare with the `char` type,
+/// which represents a Unicode scalar value:
+/// a code point that is not a surrogate (U+D800 to U+DFFF).
+#[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Copy)]
+pub struct CodePoint {
+ value: u32
+}
+
+/// Format the code point as `U+` followed by four to six hexadecimal digits.
+/// Example: `U+1F4A9`
+impl fmt::Debug for CodePoint {
+ #[inline]
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+ write!(formatter, "U+{:04X}", self.value)
+ }
+}
+
+impl CodePoint {
+ /// Unsafely creates a new `CodePoint` without checking the value.
+ ///
+ /// Only use when `value` is known to be less than or equal to 0x10FFFF.
+ #[inline]
+ pub unsafe fn from_u32_unchecked(value: u32) -> CodePoint {
+ CodePoint { value: value }
+ }
+
+ /// Creates a new `CodePoint` if the value is a valid code point.
+ ///
+ /// Returns `None` if `value` is above 0x10FFFF.
+ #[inline]
+ pub fn from_u32(value: u32) -> Option<CodePoint> {
+ match value {
+ 0 ... 0x10FFFF => Some(CodePoint { value: value }),
+ _ => None
+ }
+ }
+
+ /// Creates a new `CodePoint` from a `char`.
+ ///
+ /// Since all Unicode scalar values are code points, this always succeeds.
+ #[inline]
+ pub fn from_char(value: char) -> CodePoint {
+ CodePoint { value: value as u32 }
+ }
+
+ /// Returns the numeric value of the code point.
+ #[inline]
+ pub fn to_u32(&self) -> u32 {
+ self.value
+ }
+
+ /// Optionally returns a Unicode scalar value for the code point.
+ ///
+ /// Returns `None` if the code point is a surrogate (from U+D800 to U+DFFF).
+ #[inline]
+ pub fn to_char(&self) -> Option<char> {
+ match self.value {
+ 0xD800 ... 0xDFFF => None,
+ _ => Some(unsafe { char::from_u32_unchecked(self.value) })
+ }
+ }
+
+ /// Returns a Unicode scalar value for the code point.
+ ///
+ /// Returns `'\u{FFFD}'` (the replacement character “�”)
+ /// if the code point is a surrogate (from U+D800 to U+DFFF).
+ #[inline]
+ pub fn to_char_lossy(&self) -> char {
+ self.to_char().unwrap_or('\u{FFFD}')
+ }
+}
+
+/// An owned, growable string of well-formed WTF-8 data.
+///
+/// Similar to `String`, but can additionally contain surrogate code points
+/// if they’re not in a surrogate pair.
+#[derive(Eq, PartialEq, Ord, PartialOrd, Clone)]
+pub struct Wtf8Buf {
+ bytes: Vec<u8>
+}
+
+impl ops::Deref for Wtf8Buf {
+ type Target = Wtf8;
+
+ fn deref(&self) -> &Wtf8 {
+ self.as_slice()
+ }
+}
+
+impl ops::DerefMut for Wtf8Buf {
+ fn deref_mut(&mut self) -> &mut Wtf8 {
+ self.as_mut_slice()
+ }
+}
+
+/// Format the string with double quotes,
+/// and surrogates as `\u` followed by four hexadecimal digits.
+/// Example: `"a\u{D800}"` for a string with code points [U+0061, U+D800]
+impl fmt::Debug for Wtf8Buf {
+ #[inline]
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+ fmt::Debug::fmt(&**self, formatter)
+ }
+}
+
+impl Wtf8Buf {
+ /// Creates a new, empty WTF-8 string.
+ #[inline]
+ pub fn new() -> Wtf8Buf {
+ Wtf8Buf { bytes: Vec::new() }
+ }
+
+ /// Creates a new, empty WTF-8 string with pre-allocated capacity for `n` bytes.
+ #[inline]
+ pub fn with_capacity(n: usize) -> Wtf8Buf {
+ Wtf8Buf { bytes: Vec::with_capacity(n) }
+ }
+
+ /// Creates a WTF-8 string from a UTF-8 `String`.
+ ///
+ /// This takes ownership of the `String` and does not copy.
+ ///
+ /// Since WTF-8 is a superset of UTF-8, this always succeeds.
+ #[inline]
+ pub fn from_string(string: String) -> Wtf8Buf {
+ Wtf8Buf { bytes: string.into_bytes() }
+ }
+
+ /// Creates a WTF-8 string from a UTF-8 `&str` slice.
+ ///
+ /// This copies the content of the slice.
+ ///
+ /// Since WTF-8 is a superset of UTF-8, this always succeeds.
+ #[inline]
+ pub fn from_str(str: &str) -> Wtf8Buf {
+ Wtf8Buf { bytes: <[_]>::to_vec(str.as_bytes()) }
+ }
+
+ pub fn clear(&mut self) {
+ self.bytes.clear()
+ }
+
+ /// Creates a WTF-8 string from a potentially ill-formed UTF-16 slice of 16-bit code units.
+ ///
+ /// This is lossless: calling `.encode_wide()` on the resulting string
+ /// will always return the original code units.
+ pub fn from_wide(v: &[u16]) -> Wtf8Buf {
+ let mut string = Wtf8Buf::with_capacity(v.len());
+ for item in char::decode_utf16(v.iter().cloned()) {
+ match item {
+ Ok(ch) => string.push_char(ch),
+ Err(surrogate) => {
+ let surrogate = surrogate.unpaired_surrogate();
+ // Surrogates are known to be in the code point range.
+ let code_point = unsafe {
+ CodePoint::from_u32_unchecked(surrogate as u32)
+ };
+ // Skip the WTF-8 concatenation check,
+ // surrogate pairs are already decoded by decode_utf16
+ string.push_code_point_unchecked(code_point)
+ }
+ }
+ }
+ string
+ }
+
+ /// Copied from String::push
+ /// This does **not** include the WTF-8 concatenation check.
+ fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
+ let c = unsafe {
+ char::from_u32_unchecked(code_point.value)
+ };
+ let mut bytes = [0; 4];
+ let bytes = c.encode_utf8(&mut bytes).as_bytes();
+ self.bytes.extend_from_slice(bytes)
+ }
+
+ #[inline]
+ pub fn as_slice(&self) -> &Wtf8 {
+ unsafe { Wtf8::from_bytes_unchecked(&self.bytes) }
+ }
+
+ #[inline]
+ pub fn as_mut_slice(&mut self) -> &mut Wtf8 {
+ unsafe { Wtf8::from_mut_bytes_unchecked(&mut self.bytes) }
+ }
+
+ /// Reserves capacity for at least `additional` more bytes to be inserted
+ /// in the given `Wtf8Buf`.
+ /// The collection may reserve more space to avoid frequent reallocations.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the new capacity overflows `usize`.
+ #[inline]
+ pub fn reserve(&mut self, additional: usize) {
+ self.bytes.reserve(additional)
+ }
+
+ #[inline]
+ pub fn reserve_exact(&mut self, additional: usize) {
+ self.bytes.reserve_exact(additional)
+ }
+
+ #[inline]
+ pub fn shrink_to_fit(&mut self) {
+ self.bytes.shrink_to_fit()
+ }
+
+ /// Returns the number of bytes that this string buffer can hold without reallocating.
+ #[inline]
+ pub fn capacity(&self) -> usize {
+ self.bytes.capacity()
+ }
+
+ /// Append a UTF-8 slice at the end of the string.
+ #[inline]
+ pub fn push_str(&mut self, other: &str) {
+ self.bytes.extend_from_slice(other.as_bytes())
+ }
+
+ /// Append a WTF-8 slice at the end of the string.
+ ///
+ /// This replaces newly paired surrogates at the boundary
+ /// with a supplementary code point,
+ /// like concatenating ill-formed UTF-16 strings effectively would.
+ #[inline]
+ pub fn push_wtf8(&mut self, other: &Wtf8) {
+ match ((&*self).final_lead_surrogate(), other.initial_trail_surrogate()) {
+ // Replace newly paired surrogates by a supplementary code point.
+ (Some(lead), Some(trail)) => {
+ let len_without_lead_surrogate = self.len() - 3;
+ self.bytes.truncate(len_without_lead_surrogate);
+ let other_without_trail_surrogate = &other.bytes[3..];
+ // 4 bytes for the supplementary code point
+ self.bytes.reserve(4 + other_without_trail_surrogate.len());
+ self.push_char(decode_surrogate_pair(lead, trail));
+ self.bytes.extend_from_slice(other_without_trail_surrogate);
+ }
+ _ => self.bytes.extend_from_slice(&other.bytes)
+ }
+ }
+
+ /// Append a Unicode scalar value at the end of the string.
+ #[inline]
+ pub fn push_char(&mut self, c: char) {
+ self.push_code_point_unchecked(CodePoint::from_char(c))
+ }
+
+ /// Append a code point at the end of the string.
+ ///
+ /// This replaces newly paired surrogates at the boundary
+ /// with a supplementary code point,
+ /// like concatenating ill-formed UTF-16 strings effectively would.
+ #[inline]
+ pub fn push(&mut self, code_point: CodePoint) {
+ if let trail @ 0xDC00...0xDFFF = code_point.to_u32() {
+ if let Some(lead) = (&*self).final_lead_surrogate() {
+ let len_without_lead_surrogate = self.len() - 3;
+ self.bytes.truncate(len_without_lead_surrogate);
+ self.push_char(decode_surrogate_pair(lead, trail as u16));
+ return
+ }
+ }
+
+ // No newly paired surrogates at the boundary.
+ self.push_code_point_unchecked(code_point)
+ }
+
+ /// Shortens a string to the specified length.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `new_len` > current length,
+ /// or if `new_len` is not a code point boundary.
+ #[inline]
+ pub fn truncate(&mut self, new_len: usize) {
+ assert!(is_code_point_boundary(self, new_len));
+ self.bytes.truncate(new_len)
+ }
+
+ /// Consumes the WTF-8 string and tries to convert it to UTF-8.
+ ///
+ /// This does not copy the data.
+ ///
+ /// If the contents are not well-formed UTF-8
+ /// (that is, if the string contains surrogates),
+ /// the original WTF-8 string is returned instead.
+ pub fn into_string(self) -> Result<String, Wtf8Buf> {
+ match self.next_surrogate(0) {
+ None => Ok(unsafe { String::from_utf8_unchecked(self.bytes) }),
+ Some(_) => Err(self),
+ }
+ }
+
+ /// Consumes the WTF-8 string and converts it lossily to UTF-8.
+ ///
+ /// This does not copy the data (but may overwrite parts of it in place).
+ ///
+ /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”)
+ pub fn into_string_lossy(mut self) -> String {
+ let mut pos = 0;
+ loop {
+ match self.next_surrogate(pos) {
+ Some((surrogate_pos, _)) => {
+ pos = surrogate_pos + 3;
+ self.bytes[surrogate_pos..pos]
+ .copy_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes());
+ },
+ None => return unsafe { String::from_utf8_unchecked(self.bytes) }
+ }
+ }
+ }
+
+ /// Converts this `Wtf8Buf` into a boxed `Wtf8`.
+ #[inline]
+ pub fn into_box(self) -> Box<Wtf8> {
+ unsafe { mem::transmute(self.bytes.into_boxed_slice()) }
+ }
+
+ /// Converts a `Box<Wtf8>` into a `Wtf8Buf`.
+ pub fn from_box(boxed: Box<Wtf8>) -> Wtf8Buf {
+ let bytes: Box<[u8]> = unsafe { mem::transmute(boxed) };
+ Wtf8Buf { bytes: bytes.into_vec() }
+ }
+}
+
+/// Create a new WTF-8 string from an iterator of code points.
+///
+/// This replaces surrogate code point pairs with supplementary code points,
+/// like concatenating ill-formed UTF-16 strings effectively would.
+impl FromIterator<CodePoint> for Wtf8Buf {
+ fn from_iter<T: IntoIterator<Item=CodePoint>>(iter: T) -> Wtf8Buf {
+ let mut string = Wtf8Buf::new();
+ string.extend(iter);
+ string
+ }
+}
+
+/// Append code points from an iterator to the string.
+///
+/// This replaces surrogate code point pairs with supplementary code points,
+/// like concatenating ill-formed UTF-16 strings effectively would.
+impl Extend<CodePoint> for Wtf8Buf {
+ fn extend<T: IntoIterator<Item=CodePoint>>(&mut self, iter: T) {
+ let iterator = iter.into_iter();
+ let (low, _high) = iterator.size_hint();
+ // Lower bound of one byte per code point (ASCII only)
+ self.bytes.reserve(low);
+ for code_point in iterator {
+ self.push(code_point);
+ }
+ }
+}
+
+/// A borrowed slice of well-formed WTF-8 data.
+///
+/// Similar to `&str`, but can additionally contain surrogate code points
+/// if they’re not in a surrogate pair.
+#[derive(Eq, Ord, PartialEq, PartialOrd)]
+pub struct Wtf8 {
+ bytes: [u8]
+}
+
+impl AsInner<[u8]> for Wtf8 {
+ fn as_inner(&self) -> &[u8] { &self.bytes }
+}
+
+/// Format the slice with double quotes,
+/// and surrogates as `\u` followed by four hexadecimal digits.
+/// Example: `"a\u{D800}"` for a slice with code points [U+0061, U+D800]
+impl fmt::Debug for Wtf8 {
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ fn write_str_escaped(f: &mut fmt::Formatter, s: &str) -> fmt::Result {
+ use fmt::Write;
+ for c in s.chars().flat_map(|c| c.escape_debug()) {
+ f.write_char(c)?
+ }
+ Ok(())
+ }
+
+ formatter.write_str("\"")?;
+ let mut pos = 0;
+ loop {
+ match self.next_surrogate(pos) {
+ None => break,
+ Some((surrogate_pos, surrogate)) => {
+ write_str_escaped(
+ formatter,
+ unsafe { str::from_utf8_unchecked(
+ &self.bytes[pos .. surrogate_pos]
+ )},
+ )?;
+ write!(formatter, "\\u{{{:x}}}", surrogate)?;
+ pos = surrogate_pos + 3;
+ }
+ }
+ }
+ write_str_escaped(
+ formatter,
+ unsafe { str::from_utf8_unchecked(&self.bytes[pos..]) },
+ )?;
+ formatter.write_str("\"")
+ }
+}
+
+impl fmt::Display for Wtf8 {
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ let wtf8_bytes = &self.bytes;
+ let mut pos = 0;
+ loop {
+ match self.next_surrogate(pos) {
+ Some((surrogate_pos, _)) => {
+ formatter.write_str(unsafe {
+ str::from_utf8_unchecked(&wtf8_bytes[pos .. surrogate_pos])
+ })?;
+ formatter.write_str(UTF8_REPLACEMENT_CHARACTER)?;
+ pos = surrogate_pos + 3;
+ },
+ None => {
+ let s = unsafe {
+ str::from_utf8_unchecked(&wtf8_bytes[pos..])
+ };
+ if pos == 0 {
+ return s.fmt(formatter)
+ } else {
+ return formatter.write_str(s)
+ }
+ }
+ }
+ }
+ }
+}
+
+impl Wtf8 {
+ /// Creates a WTF-8 slice from a UTF-8 `&str` slice.
+ ///
+ /// Since WTF-8 is a superset of UTF-8, this always succeeds.
+ #[inline]
+ pub fn from_str(value: &str) -> &Wtf8 {
+ unsafe { Wtf8::from_bytes_unchecked(value.as_bytes()) }
+ }
+
+ /// Creates a WTF-8 slice from a WTF-8 byte slice.
+ ///
+ /// Since the byte slice is not checked for valid WTF-8, this functions is
+ /// marked unsafe.
+ #[inline]
+ unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 {
+ mem::transmute(value)
+ }
+
+ /// Creates a mutable WTF-8 slice from a mutable WTF-8 byte slice.
+ ///
+ /// Since the byte slice is not checked for valid WTF-8, this functions is
+ /// marked unsafe.
+ #[inline]
+ unsafe fn from_mut_bytes_unchecked(value: &mut [u8]) -> &mut Wtf8 {
+ mem::transmute(value)
+ }
+
+ /// Returns the length, in WTF-8 bytes.
+ #[inline]
+ pub fn len(&self) -> usize {
+ self.bytes.len()
+ }
+
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.bytes.is_empty()
+ }
+
+ /// Returns the code point at `position` if it is in the ASCII range,
+ /// or `b'\xFF' otherwise.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `position` is beyond the end of the string.
+ #[inline]
+ pub fn ascii_byte_at(&self, position: usize) -> u8 {
+ match self.bytes[position] {
+ ascii_byte @ 0x00 ... 0x7F => ascii_byte,
+ _ => 0xFF
+ }
+ }
+
+ /// Returns an iterator for the string’s code points.
+ #[inline]
+ pub fn code_points(&self) -> Wtf8CodePoints {
+ Wtf8CodePoints { bytes: self.bytes.iter() }
+ }
+
+ /// Tries to convert the string to UTF-8 and return a `&str` slice.
+ ///
+ /// Returns `None` if the string contains surrogates.
+ ///
+ /// This does not copy the data.
+ #[inline]
+ pub fn as_str(&self) -> Option<&str> {
+ // Well-formed WTF-8 is also well-formed UTF-8
+ // if and only if it contains no surrogate.
+ match self.next_surrogate(0) {
+ None => Some(unsafe { str::from_utf8_unchecked(&self.bytes) }),
+ Some(_) => None,
+ }
+ }
+
+ /// Lossily converts the string to UTF-8.
+ /// Returns a UTF-8 `&str` slice if the contents are well-formed in UTF-8.
+ ///
+ /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”).
+ ///
+ /// This only copies the data if necessary (if it contains any surrogate).
+ pub fn to_string_lossy(&self) -> Cow<str> {
+ let surrogate_pos = match self.next_surrogate(0) {
+ None => return Cow::Borrowed(unsafe { str::from_utf8_unchecked(&self.bytes) }),
+ Some((pos, _)) => pos,
+ };
+ let wtf8_bytes = &self.bytes;
+ let mut utf8_bytes = Vec::with_capacity(self.len());
+ utf8_bytes.extend_from_slice(&wtf8_bytes[..surrogate_pos]);
+ utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes());
+ let mut pos = surrogate_pos + 3;
+ loop {
+ match self.next_surrogate(pos) {
+ Some((surrogate_pos, _)) => {
+ utf8_bytes.extend_from_slice(&wtf8_bytes[pos .. surrogate_pos]);
+ utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes());
+ pos = surrogate_pos + 3;
+ },
+ None => {
+ utf8_bytes.extend_from_slice(&wtf8_bytes[pos..]);
+ return Cow::Owned(unsafe { String::from_utf8_unchecked(utf8_bytes) })
+ }
+ }
+ }
+ }
+
+ /// Converts the WTF-8 string to potentially ill-formed UTF-16
+ /// and return an iterator of 16-bit code units.
+ ///
+ /// This is lossless:
+ /// calling `Wtf8Buf::from_ill_formed_utf16` on the resulting code units
+ /// would always return the original WTF-8 string.
+ #[inline]
+ pub fn encode_wide(&self) -> EncodeWide {
+ EncodeWide { code_points: self.code_points(), extra: 0 }
+ }
+
+ #[inline]
+ fn next_surrogate(&self, mut pos: usize) -> Option<(usize, u16)> {
+ let mut iter = self.bytes[pos..].iter();
+ loop {
+ let b = *iter.next()?;
+ if b < 0x80 {
+ pos += 1;
+ } else if b < 0xE0 {
+ iter.next();
+ pos += 2;
+ } else if b == 0xED {
+ match (iter.next(), iter.next()) {
+ (Some(&b2), Some(&b3)) if b2 >= 0xA0 => {
+ return Some((pos, decode_surrogate(b2, b3)))
+ }
+ _ => pos += 3
+ }
+ } else if b < 0xF0 {
+ iter.next();
+ iter.next();
+ pos += 3;
+ } else {
+ iter.next();
+ iter.next();
+ iter.next();
+ pos += 4;
+ }
+ }
+ }
+
+ #[inline]
+ fn final_lead_surrogate(&self) -> Option<u16> {
+ let len = self.len();
+ if len < 3 {
+ return None
+ }
+ match &self.bytes[(len - 3)..] {
+ &[0xED, b2 @ 0xA0...0xAF, b3] => Some(decode_surrogate(b2, b3)),
+ _ => None
+ }
+ }
+
+ #[inline]
+ fn initial_trail_surrogate(&self) -> Option<u16> {
+ let len = self.len();
+ if len < 3 {
+ return None
+ }
+ match &self.bytes[..3] {
+ &[0xED, b2 @ 0xB0...0xBF, b3] => Some(decode_surrogate(b2, b3)),
+ _ => None
+ }
+ }
+
+ /// Boxes this `Wtf8`.
+ #[inline]
+ pub fn into_box(&self) -> Box<Wtf8> {
+ let boxed: Box<[u8]> = self.bytes.into();
+ unsafe { mem::transmute(boxed) }
+ }
+
+ /// Creates a boxed, empty `Wtf8`.
+ pub fn empty_box() -> Box<Wtf8> {
+ let boxed: Box<[u8]> = Default::default();
+ unsafe { mem::transmute(boxed) }
+ }
+
+ #[inline]
+ pub fn into_arc(&self) -> Arc<Wtf8> {
+ let arc: Arc<[u8]> = Arc::from(&self.bytes);
+ unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Wtf8) }
+ }
+
+ #[inline]
+ pub fn into_rc(&self) -> Rc<Wtf8> {
+ let rc: Rc<[u8]> = Rc::from(&self.bytes);
+ unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Wtf8) }
+ }
+}
+
+
+/// Return a slice of the given string for the byte range [`begin`..`end`).
+///
+/// # Panics
+///
+/// Panics when `begin` and `end` do not point to code point boundaries,
+/// or point beyond the end of the string.
+impl ops::Index<ops::Range<usize>> for Wtf8 {
+ type Output = Wtf8;
+
+ #[inline]
+ fn index(&self, range: ops::Range<usize>) -> &Wtf8 {
+ // is_code_point_boundary checks that the index is in [0, .len()]
+ if range.start <= range.end &&
+ is_code_point_boundary(self, range.start) &&
+ is_code_point_boundary(self, range.end) {
+ unsafe { slice_unchecked(self, range.start, range.end) }
+ } else {
+ slice_error_fail(self, range.start, range.end)
+ }
+ }
+}
+
+/// Return a slice of the given string from byte `begin` to its end.
+///
+/// # Panics
+///
+/// Panics when `begin` is not at a code point boundary,
+/// or is beyond the end of the string.
+impl ops::Index<ops::RangeFrom<usize>> for Wtf8 {
+ type Output = Wtf8;
+
+ #[inline]
+ fn index(&self, range: ops::RangeFrom<usize>) -> &Wtf8 {
+ // is_code_point_boundary checks that the index is in [0, .len()]
+ if is_code_point_boundary(self, range.start) {
+ unsafe { slice_unchecked(self, range.start, self.len()) }
+ } else {
+ slice_error_fail(self, range.start, self.len())
+ }
+ }
+}
+
+/// Return a slice of the given string from its beginning to byte `end`.
+///
+/// # Panics
+///
+/// Panics when `end` is not at a code point boundary,
+/// or is beyond the end of the string.
+impl ops::Index<ops::RangeTo<usize>> for Wtf8 {
+ type Output = Wtf8;
+
+ #[inline]
+ fn index(&self, range: ops::RangeTo<usize>) -> &Wtf8 {
+ // is_code_point_boundary checks that the index is in [0, .len()]
+ if is_code_point_boundary(self, range.end) {
+ unsafe { slice_unchecked(self, 0, range.end) }
+ } else {
+ slice_error_fail(self, 0, range.end)
+ }
+ }
+}
+
+impl ops::Index<ops::RangeFull> for Wtf8 {
+ type Output = Wtf8;
+
+ #[inline]
+ fn index(&self, _range: ops::RangeFull) -> &Wtf8 {
+ self
+ }
+}
+
+#[inline]
+fn decode_surrogate(second_byte: u8, third_byte: u8) -> u16 {
+ // The first byte is assumed to be 0xED
+ 0xD800 | (second_byte as u16 & 0x3F) << 6 | third_byte as u16 & 0x3F
+}
+
+#[inline]
+fn decode_surrogate_pair(lead: u16, trail: u16) -> char {
+ let code_point = 0x10000 + ((((lead - 0xD800) as u32) << 10) | (trail - 0xDC00) as u32);
+ unsafe { char::from_u32_unchecked(code_point) }
+}
+
+/// Copied from core::str::StrPrelude::is_char_boundary
+#[inline]
+pub fn is_code_point_boundary(slice: &Wtf8, index: usize) -> bool {
+ if index == slice.len() { return true; }
+ match slice.bytes.get(index) {
+ None => false,
+ Some(&b) => b < 128 || b >= 192,
+ }
+}
+
+/// Copied from core::str::raw::slice_unchecked
+#[inline]
+pub unsafe fn slice_unchecked(s: &Wtf8, begin: usize, end: usize) -> &Wtf8 {
+ // memory layout of an &[u8] and &Wtf8 are the same
+ Wtf8::from_bytes_unchecked(slice::from_raw_parts(
+ s.bytes.as_ptr().offset(begin as isize),
+ end - begin
+ ))
+}
+
+/// Copied from core::str::raw::slice_error_fail
+#[inline(never)]
+pub fn slice_error_fail(s: &Wtf8, begin: usize, end: usize) -> ! {
+ assert!(begin <= end);
+ panic!("index {} and/or {} in `{:?}` do not lie on character boundary",
+ begin, end, s);
+}
+
+/// Iterator for the code points of a WTF-8 string.
+///
+/// Created with the method `.code_points()`.
+#[derive(Clone)]
+pub struct Wtf8CodePoints<'a> {
+ bytes: slice::Iter<'a, u8>
+}
+
+impl<'a> Iterator for Wtf8CodePoints<'a> {
+ type Item = CodePoint;
+
+ #[inline]
+ fn next(&mut self) -> Option<CodePoint> {
+ next_code_point(&mut self.bytes).map(|c| CodePoint { value: c })
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let len = self.bytes.len();
+ (len.saturating_add(3) / 4, Some(len))
+ }
+}
+
+/// Generates a wide character sequence for potentially ill-formed UTF-16.
+#[stable(feature = "rust1", since = "1.0.0")]
+#[derive(Clone)]
+pub struct EncodeWide<'a> {
+ code_points: Wtf8CodePoints<'a>,
+ extra: u16
+}
+
+// Copied from libunicode/u_str.rs
+#[stable(feature = "rust1", since = "1.0.0")]
+impl<'a> Iterator for EncodeWide<'a> {
+ type Item = u16;
+
+ #[inline]
+ fn next(&mut self) -> Option<u16> {
+ if self.extra != 0 {
+ let tmp = self.extra;
+ self.extra = 0;
+ return Some(tmp);
+ }
+
+ let mut buf = [0; 2];
+ self.code_points.next().map(|code_point| {
+ let c = unsafe {
+ char::from_u32_unchecked(code_point.value)
+ };
+ let n = c.encode_utf16(&mut buf).len();
+ if n == 2 {
+ self.extra = buf[1];
+ }
+ buf[0]
+ })
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let (low, high) = self.code_points.size_hint();
+ // every code point gets either one u16 or two u16,
+ // so this iterator is between 1 or 2 times as
+ // long as the underlying iterator.
+ (low, high.and_then(|n| n.checked_mul(2)))
+ }
+}
+
+impl Hash for CodePoint {
+ #[inline]
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.value.hash(state)
+ }
+}
+
+impl Hash for Wtf8Buf {
+ #[inline]
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ state.write(&self.bytes);
+ 0xfeu8.hash(state)
+ }
+}
+
+impl Hash for Wtf8 {
+ #[inline]
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ state.write(&self.bytes);
+ 0xfeu8.hash(state)
+ }
+}
+
+impl AsciiExt for Wtf8 {
+ type Owned = Wtf8Buf;
+
+ fn is_ascii(&self) -> bool {
+ self.bytes.is_ascii()
+ }
+ fn to_ascii_uppercase(&self) -> Wtf8Buf {
+ Wtf8Buf { bytes: self.bytes.to_ascii_uppercase() }
+ }
+ fn to_ascii_lowercase(&self) -> Wtf8Buf {
+ Wtf8Buf { bytes: self.bytes.to_ascii_lowercase() }
+ }
+ fn eq_ignore_ascii_case(&self, other: &Wtf8) -> bool {
+ self.bytes.eq_ignore_ascii_case(&other.bytes)
+ }
+
+ fn make_ascii_uppercase(&mut self) { self.bytes.make_ascii_uppercase() }
+ fn make_ascii_lowercase(&mut self) { self.bytes.make_ascii_lowercase() }
+}
+
+#[cfg(test)]
+mod tests {
+ use borrow::Cow;
+ use super::*;
+
+ #[test]
+ fn code_point_from_u32() {
+ assert!(CodePoint::from_u32(0).is_some());
+ assert!(CodePoint::from_u32(0xD800).is_some());
+ assert!(CodePoint::from_u32(0x10FFFF).is_some());
+ assert!(CodePoint::from_u32(0x110000).is_none());
+ }
+
+ #[test]
+ fn code_point_to_u32() {
+ fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() }
+ assert_eq!(c(0).to_u32(), 0);
+ assert_eq!(c(0xD800).to_u32(), 0xD800);
+ assert_eq!(c(0x10FFFF).to_u32(), 0x10FFFF);
+ }
+
+ #[test]
+ fn code_point_from_char() {
+ assert_eq!(CodePoint::from_char('a').to_u32(), 0x61);
+ assert_eq!(CodePoint::from_char('💩').to_u32(), 0x1F4A9);
+ }
+
+ #[test]
+ fn code_point_to_string() {
+ assert_eq!(format!("{:?}", CodePoint::from_char('a')), "U+0061");
+ assert_eq!(format!("{:?}", CodePoint::from_char('💩')), "U+1F4A9");
+ }
+
+ #[test]
+ fn code_point_to_char() {
+ fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() }
+ assert_eq!(c(0x61).to_char(), Some('a'));
+ assert_eq!(c(0x1F4A9).to_char(), Some('💩'));
+ assert_eq!(c(0xD800).to_char(), None);
+ }
+
+ #[test]
+ fn code_point_to_char_lossy() {
+ fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() }
+ assert_eq!(c(0x61).to_char_lossy(), 'a');
+ assert_eq!(c(0x1F4A9).to_char_lossy(), '💩');
+ assert_eq!(c(0xD800).to_char_lossy(), '\u{FFFD}');
+ }
+
+ #[test]
+ fn wtf8buf_new() {
+ assert_eq!(Wtf8Buf::new().bytes, b"");
+ }
+
+ #[test]
+ fn wtf8buf_from_str() {
+ assert_eq!(Wtf8Buf::from_str("").bytes, b"");
+ assert_eq!(Wtf8Buf::from_str("aé 💩").bytes,
+ b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ }
+
+ #[test]
+ fn wtf8buf_from_string() {
+ assert_eq!(Wtf8Buf::from_string(String::from("")).bytes, b"");
+ assert_eq!(Wtf8Buf::from_string(String::from("aé 💩")).bytes,
+ b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ }
+
+ #[test]
+ fn wtf8buf_from_wide() {
+ assert_eq!(Wtf8Buf::from_wide(&[]).bytes, b"");
+ assert_eq!(Wtf8Buf::from_wide(
+ &[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]).bytes,
+ b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9");
+ }
+
+ #[test]
+ fn wtf8buf_push_str() {
+ let mut string = Wtf8Buf::new();
+ assert_eq!(string.bytes, b"");
+ string.push_str("aé 💩");
+ assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ }
+
+ #[test]
+ fn wtf8buf_push_char() {
+ let mut string = Wtf8Buf::from_str("aé ");
+ assert_eq!(string.bytes, b"a\xC3\xA9 ");
+ string.push_char('💩');
+ assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ }
+
+ #[test]
+ fn wtf8buf_push() {
+ let mut string = Wtf8Buf::from_str("aé ");
+ assert_eq!(string.bytes, b"a\xC3\xA9 ");
+ string.push(CodePoint::from_char('💩'));
+ assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+
+ fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() }
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xD83D)); // lead
+ string.push(c(0xDCA9)); // trail
+ assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic!
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xD83D)); // lead
+ string.push(c(0x20)); // not surrogate
+ string.push(c(0xDCA9)); // trail
+ assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xD800)); // lead
+ string.push(c(0xDBFF)); // lead
+ assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF");
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xD800)); // lead
+ string.push(c(0xE000)); // not surrogate
+ assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80");
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xD7FF)); // not surrogate
+ string.push(c(0xDC00)); // trail
+ assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80");
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0x61)); // not surrogate, < 3 bytes
+ string.push(c(0xDC00)); // trail
+ assert_eq!(string.bytes, b"\x61\xED\xB0\x80");
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xDC00)); // trail
+ assert_eq!(string.bytes, b"\xED\xB0\x80");
+ }
+
+ #[test]
+ fn wtf8buf_push_wtf8() {
+ let mut string = Wtf8Buf::from_str("aé");
+ assert_eq!(string.bytes, b"a\xC3\xA9");
+ string.push_wtf8(Wtf8::from_str(" 💩"));
+ assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+
+ fn w(v: &[u8]) -> &Wtf8 { unsafe { Wtf8::from_bytes_unchecked(v) } }
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\xA0\xBD")); // lead
+ string.push_wtf8(w(b"\xED\xB2\xA9")); // trail
+ assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic!
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\xA0\xBD")); // lead
+ string.push_wtf8(w(b" ")); // not surrogate
+ string.push_wtf8(w(b"\xED\xB2\xA9")); // trail
+ assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\xA0\x80")); // lead
+ string.push_wtf8(w(b"\xED\xAF\xBF")); // lead
+ assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF");
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\xA0\x80")); // lead
+ string.push_wtf8(w(b"\xEE\x80\x80")); // not surrogate
+ assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80");
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\x9F\xBF")); // not surrogate
+ string.push_wtf8(w(b"\xED\xB0\x80")); // trail
+ assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80");
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"a")); // not surrogate, < 3 bytes
+ string.push_wtf8(w(b"\xED\xB0\x80")); // trail
+ assert_eq!(string.bytes, b"\x61\xED\xB0\x80");
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\xB0\x80")); // trail
+ assert_eq!(string.bytes, b"\xED\xB0\x80");
+ }
+
+ #[test]
+ fn wtf8buf_truncate() {
+ let mut string = Wtf8Buf::from_str("aé");
+ string.truncate(1);
+ assert_eq!(string.bytes, b"a");
+ }
+
+ #[test]
+ #[should_panic]
+ fn wtf8buf_truncate_fail_code_point_boundary() {
+ let mut string = Wtf8Buf::from_str("aé");
+ string.truncate(2);
+ }
+
+ #[test]
+ #[should_panic]
+ fn wtf8buf_truncate_fail_longer() {
+ let mut string = Wtf8Buf::from_str("aé");
+ string.truncate(4);
+ }
+
+ #[test]
+ fn wtf8buf_into_string() {
+ let mut string = Wtf8Buf::from_str("aé 💩");
+ assert_eq!(string.clone().into_string(), Ok(String::from("aé 💩")));
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert_eq!(string.clone().into_string(), Err(string));
+ }
+
+ #[test]
+ fn wtf8buf_into_string_lossy() {
+ let mut string = Wtf8Buf::from_str("aé 💩");
+ assert_eq!(string.clone().into_string_lossy(), String::from("aé 💩"));
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert_eq!(string.clone().into_string_lossy(), String::from("aé 💩�"));
+ }
+
+ #[test]
+ fn wtf8buf_from_iterator() {
+ fn f(values: &[u32]) -> Wtf8Buf {
+ values.iter().map(|&c| CodePoint::from_u32(c).unwrap()).collect::<Wtf8Buf>()
+ }
+ assert_eq!(f(&[0x61, 0xE9, 0x20, 0x1F4A9]).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+
+ assert_eq!(f(&[0xD83D, 0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic!
+ assert_eq!(f(&[0xD83D, 0x20, 0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
+ assert_eq!(f(&[0xD800, 0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF");
+ assert_eq!(f(&[0xD800, 0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80");
+ assert_eq!(f(&[0xD7FF, 0xDC00]).bytes, b"\xED\x9F\xBF\xED\xB0\x80");
+ assert_eq!(f(&[0x61, 0xDC00]).bytes, b"\x61\xED\xB0\x80");
+ assert_eq!(f(&[0xDC00]).bytes, b"\xED\xB0\x80");
+ }
+
+ #[test]
+ fn wtf8buf_extend() {
+ fn e(initial: &[u32], extended: &[u32]) -> Wtf8Buf {
+ fn c(value: &u32) -> CodePoint { CodePoint::from_u32(*value).unwrap() }
+ let mut string = initial.iter().map(c).collect::<Wtf8Buf>();
+ string.extend(extended.iter().map(c));
+ string
+ }
+
+ assert_eq!(e(&[0x61, 0xE9], &[0x20, 0x1F4A9]).bytes,
+ b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+
+ assert_eq!(e(&[0xD83D], &[0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic!
+ assert_eq!(e(&[0xD83D, 0x20], &[0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
+ assert_eq!(e(&[0xD800], &[0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF");
+ assert_eq!(e(&[0xD800], &[0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80");
+ assert_eq!(e(&[0xD7FF], &[0xDC00]).bytes, b"\xED\x9F\xBF\xED\xB0\x80");
+ assert_eq!(e(&[0x61], &[0xDC00]).bytes, b"\x61\xED\xB0\x80");
+ assert_eq!(e(&[], &[0xDC00]).bytes, b"\xED\xB0\x80");
+ }
+
+ #[test]
+ fn wtf8buf_show() {
+ let mut string = Wtf8Buf::from_str("a\té \u{7f}💩\r");
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert_eq!(format!("{:?}", string), "\"a\\té \\u{7f}\u{1f4a9}\\r\\u{d800}\"");
+ }
+
+ #[test]
+ fn wtf8buf_as_slice() {
+ assert_eq!(Wtf8Buf::from_str("aé").as_slice(), Wtf8::from_str("aé"));
+ }
+
+ #[test]
+ fn wtf8buf_show_str() {
+ let text = "a\té 💩\r";
+ let string = Wtf8Buf::from_str(text);
+ assert_eq!(format!("{:?}", text), format!("{:?}", string));
+ }
+
+ #[test]
+ fn wtf8_from_str() {
+ assert_eq!(&Wtf8::from_str("").bytes, b"");
+ assert_eq!(&Wtf8::from_str("aé 💩").bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ }
+
+ #[test]
+ fn wtf8_len() {
+ assert_eq!(Wtf8::from_str("").len(), 0);
+ assert_eq!(Wtf8::from_str("aé 💩").len(), 8);
+ }
+
+ #[test]
+ fn wtf8_slice() {
+ assert_eq!(&Wtf8::from_str("aé 💩")[1.. 4].bytes, b"\xC3\xA9 ");
+ }
+
+ #[test]
+ #[should_panic]
+ fn wtf8_slice_not_code_point_boundary() {
+ &Wtf8::from_str("aé 💩")[2.. 4];
+ }
+
+ #[test]
+ fn wtf8_slice_from() {
+ assert_eq!(&Wtf8::from_str("aé 💩")[1..].bytes, b"\xC3\xA9 \xF0\x9F\x92\xA9");
+ }
+
+ #[test]
+ #[should_panic]
+ fn wtf8_slice_from_not_code_point_boundary() {
+ &Wtf8::from_str("aé 💩")[2..];
+ }
+
+ #[test]
+ fn wtf8_slice_to() {
+ assert_eq!(&Wtf8::from_str("aé 💩")[..4].bytes, b"a\xC3\xA9 ");
+ }
+
+ #[test]
+ #[should_panic]
+ fn wtf8_slice_to_not_code_point_boundary() {
+ &Wtf8::from_str("aé 💩")[5..];
+ }
+
+ #[test]
+ fn wtf8_ascii_byte_at() {
+ let slice = Wtf8::from_str("aé 💩");
+ assert_eq!(slice.ascii_byte_at(0), b'a');
+ assert_eq!(slice.ascii_byte_at(1), b'\xFF');
+ assert_eq!(slice.ascii_byte_at(2), b'\xFF');
+ assert_eq!(slice.ascii_byte_at(3), b' ');
+ assert_eq!(slice.ascii_byte_at(4), b'\xFF');
+ }
+
+ #[test]
+ fn wtf8_code_points() {
+ fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() }
+ fn cp(string: &Wtf8Buf) -> Vec<Option<char>> {
+ string.code_points().map(|c| c.to_char()).collect::<Vec<_>>()
+ }
+ let mut string = Wtf8Buf::from_str("é ");
+ assert_eq!(cp(&string), [Some('é'), Some(' ')]);
+ string.push(c(0xD83D));
+ assert_eq!(cp(&string), [Some('é'), Some(' '), None]);
+ string.push(c(0xDCA9));
+ assert_eq!(cp(&string), [Some('é'), Some(' '), Some('💩')]);
+ }
+
+ #[test]
+ fn wtf8_as_str() {
+ assert_eq!(Wtf8::from_str("").as_str(), Some(""));
+ assert_eq!(Wtf8::from_str("aé 💩").as_str(), Some("aé 💩"));
+ let mut string = Wtf8Buf::new();
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert_eq!(string.as_str(), None);
+ }
+
+ #[test]
+ fn wtf8_to_string_lossy() {
+ assert_eq!(Wtf8::from_str("").to_string_lossy(), Cow::Borrowed(""));
+ assert_eq!(Wtf8::from_str("aé 💩").to_string_lossy(), Cow::Borrowed("aé 💩"));
+ let mut string = Wtf8Buf::from_str("aé 💩");
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ let expected: Cow<str> = Cow::Owned(String::from("aé 💩�"));
+ assert_eq!(string.to_string_lossy(), expected);
+ }
+
+ #[test]
+ fn wtf8_display() {
+ fn d(b: &[u8]) -> String {
+ format!("{}", &unsafe { Wtf8::from_bytes_unchecked(b) })
+ }
+
+ assert_eq!("", d("".as_bytes()));
+ assert_eq!("aé 💩", d("aé 💩".as_bytes()));
+
+ let mut string = Wtf8Buf::from_str("aé 💩");
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert_eq!("aé 💩�", d(string.as_inner()));
+ }
+
+ #[test]
+ fn wtf8_encode_wide() {
+ let mut string = Wtf8Buf::from_str("aé ");
+ string.push(CodePoint::from_u32(0xD83D).unwrap());
+ string.push_char('💩');
+ assert_eq!(string.encode_wide().collect::<Vec<_>>(),
+ vec![0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]);
+ }
+}