diff options
| author | Fenrir <[email protected]> | 2018-01-21 14:06:28 -0700 |
|---|---|---|
| committer | FenrirWolf <[email protected]> | 2018-01-21 19:16:33 -0700 |
| commit | 23be3f4885688e5e0011005e2295c75168854c0a (patch) | |
| tree | dd0850f9c73c489e114a761d5c0757f3dbec3a65 /ctr-std/src/sys_common | |
| parent | Update CI for Rust nightly-2017-12-01 + other fixes (diff) | |
| download | ctru-rs-23be3f4885688e5e0011005e2295c75168854c0a.tar.xz ctru-rs-23be3f4885688e5e0011005e2295c75168854c0a.zip | |
Recreate ctr-std from latest nightly
Diffstat (limited to 'ctr-std/src/sys_common')
| -rw-r--r-- | ctr-std/src/sys_common/backtrace.rs | 461 | ||||
| -rw-r--r-- | ctr-std/src/sys_common/bytestring.rs | 56 | ||||
| -rw-r--r-- | ctr-std/src/sys_common/gnu/libbacktrace.rs | 216 | ||||
| -rw-r--r-- | ctr-std/src/sys_common/gnu/mod.rs | 15 | ||||
| -rw-r--r-- | ctr-std/src/sys_common/io.rs | 129 | ||||
| -rw-r--r-- | ctr-std/src/sys_common/memchr.rs | 230 | ||||
| -rw-r--r-- | ctr-std/src/sys_common/mod.rs | 38 | ||||
| -rw-r--r-- | ctr-std/src/sys_common/net.rs | 630 | ||||
| -rw-r--r-- | ctr-std/src/sys_common/poison.rs | 88 | ||||
| -rw-r--r-- | ctr-std/src/sys_common/process.rs | 124 | ||||
| -rw-r--r-- | ctr-std/src/sys_common/remutex.rs | 13 | ||||
| -rw-r--r-- | ctr-std/src/sys_common/thread.rs | 26 | ||||
| -rw-r--r-- | ctr-std/src/sys_common/thread_info.rs | 4 | ||||
| -rw-r--r-- | ctr-std/src/sys_common/thread_local.rs | 82 | ||||
| -rw-r--r-- | ctr-std/src/sys_common/util.rs | 29 | ||||
| -rw-r--r-- | ctr-std/src/sys_common/wtf8.rs | 1284 |
16 files changed, 3003 insertions, 422 deletions
diff --git a/ctr-std/src/sys_common/backtrace.rs b/ctr-std/src/sys_common/backtrace.rs new file mode 100644 index 0000000..36cbce2 --- /dev/null +++ b/ctr-std/src/sys_common/backtrace.rs @@ -0,0 +1,461 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/// Common code for printing the backtrace in the same way across the different +/// supported platforms. + +use env; +use io::prelude::*; +use io; +use str; +use sync::atomic::{self, Ordering}; +use path::{self, Path}; +use sys::mutex::Mutex; +use ptr; + +pub use sys::backtrace::{ + unwind_backtrace, + resolve_symname, + foreach_symbol_fileline, + BacktraceContext +}; + +#[cfg(target_pointer_width = "64")] +pub const HEX_WIDTH: usize = 18; + +#[cfg(target_pointer_width = "32")] +pub const HEX_WIDTH: usize = 10; + +/// Represents an item in the backtrace list. See `unwind_backtrace` for how +/// it is created. +#[derive(Debug, Copy, Clone)] +pub struct Frame { + /// Exact address of the call that failed. + pub exact_position: *const u8, + /// Address of the enclosing function. + pub symbol_addr: *const u8, +} + +/// Max number of frames to print. +const MAX_NB_FRAMES: usize = 100; + +/// Prints the current backtrace. +pub fn print(w: &mut Write, format: PrintFormat) -> io::Result<()> { + static LOCK: Mutex = Mutex::new(); + + // Use a lock to prevent mixed output in multithreading context. + // Some platforms also requires it, like `SymFromAddr` on Windows. + unsafe { + LOCK.lock(); + let res = _print(w, format); + LOCK.unlock(); + res + } +} + +fn _print(w: &mut Write, format: PrintFormat) -> io::Result<()> { + let mut frames = [Frame { + exact_position: ptr::null(), + symbol_addr: ptr::null(), + }; MAX_NB_FRAMES]; + let (nb_frames, context) = unwind_backtrace(&mut frames)?; + let (skipped_before, skipped_after) = + filter_frames(&frames[..nb_frames], format, &context); + if skipped_before + skipped_after > 0 { + writeln!(w, "note: Some details are omitted, \ + run with `RUST_BACKTRACE=full` for a verbose backtrace.")?; + } + writeln!(w, "stack backtrace:")?; + + let filtered_frames = &frames[..nb_frames - skipped_after]; + for (index, frame) in filtered_frames.iter().skip(skipped_before).enumerate() { + resolve_symname(*frame, |symname| { + output(w, index, *frame, symname, format) + }, &context)?; + let has_more_filenames = foreach_symbol_fileline(*frame, |file, line| { + output_fileline(w, file, line, format) + }, &context)?; + if has_more_filenames { + w.write_all(b" <... and possibly more>")?; + } + } + + Ok(()) +} + +/// Returns a number of frames to remove at the beginning and at the end of the +/// backtrace, according to the backtrace format. +fn filter_frames(frames: &[Frame], + format: PrintFormat, + context: &BacktraceContext) -> (usize, usize) +{ + if format == PrintFormat::Full { + return (0, 0); + } + + let skipped_before = 0; + + let skipped_after = frames.len() - frames.iter().position(|frame| { + let mut is_marker = false; + let _ = resolve_symname(*frame, |symname| { + if let Some(mangled_symbol_name) = symname { + // Use grep to find the concerned functions + if mangled_symbol_name.contains("__rust_begin_short_backtrace") { + is_marker = true; + } + } + Ok(()) + }, context); + is_marker + }).unwrap_or(frames.len()); + + if skipped_before + skipped_after >= frames.len() { + // Avoid showing completely empty backtraces + return (0, 0); + } + + (skipped_before, skipped_after) +} + + +/// Fixed frame used to clean the backtrace with `RUST_BACKTRACE=1`. +#[inline(never)] +pub fn __rust_begin_short_backtrace<F, T>(f: F) -> T + where F: FnOnce() -> T, F: Send, T: Send +{ + f() +} + +/// Controls how the backtrace should be formated. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum PrintFormat { + /// Show all the frames with absolute path for files. + Full = 2, + /// Show only relevant data from the backtrace. + Short = 3, +} + +// For now logging is turned off by default, and this function checks to see +// whether the magical environment variable is present to see if it's turned on. +pub fn log_enabled() -> Option<PrintFormat> { + static ENABLED: atomic::AtomicIsize = atomic::AtomicIsize::new(0); + match ENABLED.load(Ordering::SeqCst) { + 0 => {}, + 1 => return None, + 2 => return Some(PrintFormat::Full), + 3 => return Some(PrintFormat::Short), + _ => unreachable!(), + } + + let val = match env::var_os("RUST_BACKTRACE") { + Some(x) => if &x == "0" { + None + } else if &x == "full" { + Some(PrintFormat::Full) + } else { + Some(PrintFormat::Short) + }, + None => None, + }; + ENABLED.store(match val { + Some(v) => v as isize, + None => 1, + }, Ordering::SeqCst); + val +} + +/// Print the symbol of the backtrace frame. +/// +/// These output functions should now be used everywhere to ensure consistency. +/// You may want to also use `output_fileline`. +fn output(w: &mut Write, idx: usize, frame: Frame, + s: Option<&str>, format: PrintFormat) -> io::Result<()> { + // Remove the `17: 0x0 - <unknown>` line. + if format == PrintFormat::Short && frame.exact_position == ptr::null() { + return Ok(()); + } + match format { + PrintFormat::Full => write!(w, + " {:2}: {:2$?} - ", + idx, + frame.exact_position, + HEX_WIDTH)?, + PrintFormat::Short => write!(w, " {:2}: ", idx)?, + } + match s { + Some(string) => demangle(w, string, format)?, + None => w.write_all(b"<unknown>")?, + } + w.write_all(b"\n") +} + +/// Print the filename and line number of the backtrace frame. +/// +/// See also `output`. +#[allow(dead_code)] +fn output_fileline(w: &mut Write, + file: &[u8], + line: u32, + format: PrintFormat) -> io::Result<()> { + // prior line: " ##: {:2$} - func" + w.write_all(b"")?; + match format { + PrintFormat::Full => write!(w, + " {:1$}", + "", + HEX_WIDTH)?, + PrintFormat::Short => write!(w, " ")?, + } + + let file = str::from_utf8(file).unwrap_or("<unknown>"); + let file_path = Path::new(file); + let mut already_printed = false; + if format == PrintFormat::Short && file_path.is_absolute() { + if let Ok(cwd) = env::current_dir() { + if let Ok(stripped) = file_path.strip_prefix(&cwd) { + if let Some(s) = stripped.to_str() { + write!(w, " at .{}{}:{}", path::MAIN_SEPARATOR, s, line)?; + already_printed = true; + } + } + } + } + if !already_printed { + write!(w, " at {}:{}", file, line)?; + } + + w.write_all(b"\n") +} + + +// All rust symbols are in theory lists of "::"-separated identifiers. Some +// assemblers, however, can't handle these characters in symbol names. To get +// around this, we use C++-style mangling. The mangling method is: +// +// 1. Prefix the symbol with "_ZN" +// 2. For each element of the path, emit the length plus the element +// 3. End the path with "E" +// +// For example, "_ZN4testE" => "test" and "_ZN3foo3barE" => "foo::bar". +// +// We're the ones printing our backtraces, so we can't rely on anything else to +// demangle our symbols. It's *much* nicer to look at demangled symbols, so +// this function is implemented to give us nice pretty output. +// +// Note that this demangler isn't quite as fancy as it could be. We have lots +// of other information in our symbols like hashes, version, type information, +// etc. Additionally, this doesn't handle glue symbols at all. +pub fn demangle(writer: &mut Write, mut s: &str, format: PrintFormat) -> io::Result<()> { + // During ThinLTO LLVM may import and rename internal symbols, so strip out + // those endings first as they're one of the last manglings applied to + // symbol names. + let llvm = ".llvm."; + if let Some(i) = s.find(llvm) { + let candidate = &s[i + llvm.len()..]; + let all_hex = candidate.chars().all(|c| { + match c { + 'A' ... 'F' | '0' ... '9' => true, + _ => false, + } + }); + + if all_hex { + s = &s[..i]; + } + } + + // Validate the symbol. If it doesn't look like anything we're + // expecting, we just print it literally. Note that we must handle non-rust + // symbols because we could have any function in the backtrace. + let mut valid = true; + let mut inner = s; + if s.len() > 4 && s.starts_with("_ZN") && s.ends_with("E") { + inner = &s[3 .. s.len() - 1]; + // On Windows, dbghelp strips leading underscores, so we accept "ZN...E" form too. + } else if s.len() > 3 && s.starts_with("ZN") && s.ends_with("E") { + inner = &s[2 .. s.len() - 1]; + } else { + valid = false; + } + + if valid { + let mut chars = inner.chars(); + while valid { + let mut i = 0; + for c in chars.by_ref() { + if c.is_numeric() { + i = i * 10 + c as usize - '0' as usize; + } else { + break + } + } + if i == 0 { + valid = chars.next().is_none(); + break + } else if chars.by_ref().take(i - 1).count() != i - 1 { + valid = false; + } + } + } + + // Alright, let's do this. + if !valid { + writer.write_all(s.as_bytes())?; + } else { + // remove the `::hfc2edb670e5eda97` part at the end of the symbol. + if format == PrintFormat::Short { + // The symbol in still mangled. + let mut split = inner.rsplitn(2, "17h"); + match (split.next(), split.next()) { + (Some(addr), rest) => { + if addr.len() == 16 && + addr.chars().all(|c| c.is_digit(16)) + { + inner = rest.unwrap_or(""); + } + } + _ => (), + } + } + + let mut first = true; + while !inner.is_empty() { + if !first { + writer.write_all(b"::")?; + } else { + first = false; + } + let mut rest = inner; + while rest.chars().next().unwrap().is_numeric() { + rest = &rest[1..]; + } + let i: usize = inner[.. (inner.len() - rest.len())].parse().unwrap(); + inner = &rest[i..]; + rest = &rest[..i]; + if rest.starts_with("_$") { + rest = &rest[1..]; + } + while !rest.is_empty() { + if rest.starts_with(".") { + if let Some('.') = rest[1..].chars().next() { + writer.write_all(b"::")?; + rest = &rest[2..]; + } else { + writer.write_all(b".")?; + rest = &rest[1..]; + } + } else if rest.starts_with("$") { + macro_rules! demangle { + ($($pat:expr => $demangled:expr),*) => ({ + $(if rest.starts_with($pat) { + writer.write_all($demangled)?; + rest = &rest[$pat.len()..]; + } else)* + { + writer.write_all(rest.as_bytes())?; + break; + } + + }) + } + + // see src/librustc/back/link.rs for these mappings + demangle! ( + "$SP$" => b"@", + "$BP$" => b"*", + "$RF$" => b"&", + "$LT$" => b"<", + "$GT$" => b">", + "$LP$" => b"(", + "$RP$" => b")", + "$C$" => b",", + + // in theory we can demangle any Unicode code point, but + // for simplicity we just catch the common ones. + "$u7e$" => b"~", + "$u20$" => b" ", + "$u27$" => b"'", + "$u5b$" => b"[", + "$u5d$" => b"]", + "$u7b$" => b"{", + "$u7d$" => b"}", + "$u3b$" => b";", + "$u2b$" => b"+", + "$u22$" => b"\"" + ) + } else { + let idx = match rest.char_indices().find(|&(_, c)| c == '$' || c == '.') { + None => rest.len(), + Some((i, _)) => i, + }; + writer.write_all(rest[..idx].as_bytes())?; + rest = &rest[idx..]; + } + } + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use sys_common; + macro_rules! t { ($a:expr, $b:expr) => ({ + let mut m = Vec::new(); + sys_common::backtrace::demangle(&mut m, + $a, + super::PrintFormat::Full).unwrap(); + assert_eq!(String::from_utf8(m).unwrap(), $b); + }) } + + #[test] + fn demangle() { + t!("test", "test"); + t!("_ZN4testE", "test"); + t!("_ZN4test", "_ZN4test"); + t!("_ZN4test1a2bcE", "test::a::bc"); + } + + #[test] + fn demangle_dollars() { + t!("_ZN4$RP$E", ")"); + t!("_ZN8$RF$testE", "&test"); + t!("_ZN8$BP$test4foobE", "*test::foob"); + t!("_ZN9$u20$test4foobE", " test::foob"); + t!("_ZN35Bar$LT$$u5b$u32$u3b$$u20$4$u5d$$GT$E", "Bar<[u32; 4]>"); + } + + #[test] + fn demangle_many_dollars() { + t!("_ZN13test$u20$test4foobE", "test test::foob"); + t!("_ZN12test$BP$test4foobE", "test*test::foob"); + } + + #[test] + fn demangle_windows() { + t!("ZN4testE", "test"); + t!("ZN13test$u20$test4foobE", "test test::foob"); + t!("ZN12test$RF$test4foobE", "test&test::foob"); + } + + #[test] + fn demangle_elements_beginning_with_underscore() { + t!("_ZN13_$LT$test$GT$E", "<test>"); + t!("_ZN28_$u7b$$u7b$closure$u7d$$u7d$E", "{{closure}}"); + t!("_ZN15__STATIC_FMTSTRE", "__STATIC_FMTSTR"); + } + + #[test] + fn demangle_trait_impls() { + t!("_ZN71_$LT$Test$u20$$u2b$$u20$$u27$static$u20$as$u20$foo..Bar$LT$Test$GT$$GT$3barE", + "<Test + 'static as foo::Bar<Test>>::bar"); + } +} diff --git a/ctr-std/src/sys_common/bytestring.rs b/ctr-std/src/sys_common/bytestring.rs new file mode 100644 index 0000000..eb9cad0 --- /dev/null +++ b/ctr-std/src/sys_common/bytestring.rs @@ -0,0 +1,56 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![allow(dead_code)] + +use fmt::{Formatter, Result, Write}; +use std_unicode::lossy::{Utf8Lossy, Utf8LossyChunk}; + +pub fn debug_fmt_bytestring(slice: &[u8], f: &mut Formatter) -> Result { + // Writes out a valid unicode string with the correct escape sequences + fn write_str_escaped(f: &mut Formatter, s: &str) -> Result { + for c in s.chars().flat_map(|c| c.escape_debug()) { + f.write_char(c)? + } + Ok(()) + } + + f.write_str("\"")?; + for Utf8LossyChunk { valid, broken } in Utf8Lossy::from_bytes(slice).chunks() { + write_str_escaped(f, valid)?; + for b in broken { + write!(f, "\\x{:02X}", b)?; + } + } + f.write_str("\"") +} + +#[cfg(test)] +mod tests { + use super::*; + use fmt::{Formatter, Result, Debug}; + + #[test] + fn smoke() { + struct Helper<'a>(&'a [u8]); + + impl<'a> Debug for Helper<'a> { + fn fmt(&self, f: &mut Formatter) -> Result { + debug_fmt_bytestring(self.0, f) + } + } + + let input = b"\xF0hello,\tworld"; + let expected = r#""\xF0hello,\tworld""#; + let output = format!("{:?}", Helper(input)); + + assert!(output == expected); + } +} diff --git a/ctr-std/src/sys_common/gnu/libbacktrace.rs b/ctr-std/src/sys_common/gnu/libbacktrace.rs new file mode 100644 index 0000000..6ad3af6 --- /dev/null +++ b/ctr-std/src/sys_common/gnu/libbacktrace.rs @@ -0,0 +1,216 @@ +// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use libc; + +use ffi::CStr; +use io; +use mem; +use ptr; +use sys::backtrace::BacktraceContext; +use sys_common::backtrace::Frame; + +pub fn foreach_symbol_fileline<F>(frame: Frame, + mut f: F, + _: &BacktraceContext) -> io::Result<bool> +where F: FnMut(&[u8], u32) -> io::Result<()> +{ + // pcinfo may return an arbitrary number of file:line pairs, + // in the order of stack trace (i.e. inlined calls first). + // in order to avoid allocation, we stack-allocate a fixed size of entries. + const FILELINE_SIZE: usize = 32; + let mut fileline_buf = [(ptr::null(), !0); FILELINE_SIZE]; + let ret; + let fileline_count = { + let state = unsafe { init_state() }; + if state.is_null() { + return Err(io::Error::new( + io::ErrorKind::Other, + "failed to allocate libbacktrace state") + ) + } + let mut fileline_win: &mut [FileLine] = &mut fileline_buf; + let fileline_addr = &mut fileline_win as *mut &mut [FileLine]; + ret = unsafe { + backtrace_pcinfo(state, + frame.exact_position as libc::uintptr_t, + pcinfo_cb, + error_cb, + fileline_addr as *mut libc::c_void) + }; + FILELINE_SIZE - fileline_win.len() + }; + if ret == 0 { + for &(file, line) in &fileline_buf[..fileline_count] { + if file.is_null() { continue; } // just to be sure + let file = unsafe { CStr::from_ptr(file).to_bytes() }; + f(file, line)?; + } + Ok(fileline_count == FILELINE_SIZE) + } else { + Ok(false) + } +} + +/// Converts a pointer to symbol to its string value. +pub fn resolve_symname<F>(frame: Frame, + callback: F, + _: &BacktraceContext) -> io::Result<()> + where F: FnOnce(Option<&str>) -> io::Result<()> +{ + let symname = { + let state = unsafe { init_state() }; + if state.is_null() { + return Err(io::Error::new( + io::ErrorKind::Other, + "failed to allocate libbacktrace state") + ) + } + let mut data: *const libc::c_char = ptr::null(); + let data_addr = &mut data as *mut *const libc::c_char; + let ret = unsafe { + backtrace_syminfo(state, + frame.symbol_addr as libc::uintptr_t, + syminfo_cb, + error_cb, + data_addr as *mut libc::c_void) + }; + if ret == 0 || data.is_null() { + None + } else { + unsafe { + CStr::from_ptr(data).to_str().ok() + } + } + }; + callback(symname) +} + +//////////////////////////////////////////////////////////////////////// +// libbacktrace.h API +//////////////////////////////////////////////////////////////////////// +type backtrace_syminfo_callback = +extern "C" fn(data: *mut libc::c_void, + pc: libc::uintptr_t, + symname: *const libc::c_char, + symval: libc::uintptr_t, + symsize: libc::uintptr_t); +type backtrace_full_callback = +extern "C" fn(data: *mut libc::c_void, + pc: libc::uintptr_t, + filename: *const libc::c_char, + lineno: libc::c_int, + function: *const libc::c_char) -> libc::c_int; +type backtrace_error_callback = +extern "C" fn(data: *mut libc::c_void, + msg: *const libc::c_char, + errnum: libc::c_int); +enum backtrace_state {} + +extern { + fn backtrace_create_state(filename: *const libc::c_char, + threaded: libc::c_int, + error: backtrace_error_callback, + data: *mut libc::c_void) + -> *mut backtrace_state; + fn backtrace_syminfo(state: *mut backtrace_state, + addr: libc::uintptr_t, + cb: backtrace_syminfo_callback, + error: backtrace_error_callback, + data: *mut libc::c_void) -> libc::c_int; + fn backtrace_pcinfo(state: *mut backtrace_state, + addr: libc::uintptr_t, + cb: backtrace_full_callback, + error: backtrace_error_callback, + data: *mut libc::c_void) -> libc::c_int; +} + +//////////////////////////////////////////////////////////////////////// +// helper callbacks +//////////////////////////////////////////////////////////////////////// + +type FileLine = (*const libc::c_char, u32); + +extern fn error_cb(_data: *mut libc::c_void, _msg: *const libc::c_char, + _errnum: libc::c_int) { + // do nothing for now +} +extern fn syminfo_cb(data: *mut libc::c_void, + _pc: libc::uintptr_t, + symname: *const libc::c_char, + _symval: libc::uintptr_t, + _symsize: libc::uintptr_t) { + let slot = data as *mut *const libc::c_char; + unsafe { *slot = symname; } +} +extern fn pcinfo_cb(data: *mut libc::c_void, + _pc: libc::uintptr_t, + filename: *const libc::c_char, + lineno: libc::c_int, + _function: *const libc::c_char) -> libc::c_int { + if !filename.is_null() { + let slot = data as *mut &mut [FileLine]; + let buffer = unsafe {ptr::read(slot)}; + + // if the buffer is not full, add file:line to the buffer + // and adjust the buffer for next possible calls to pcinfo_cb. + if !buffer.is_empty() { + buffer[0] = (filename, lineno as u32); + unsafe { ptr::write(slot, &mut buffer[1..]); } + } + } + + 0 +} + +// The libbacktrace API supports creating a state, but it does not +// support destroying a state. I personally take this to mean that a +// state is meant to be created and then live forever. +// +// I would love to register an at_exit() handler which cleans up this +// state, but libbacktrace provides no way to do so. +// +// With these constraints, this function has a statically cached state +// that is calculated the first time this is requested. Remember that +// backtracing all happens serially (one global lock). +// +// Things don't work so well on not-Linux since libbacktrace can't track +// down that executable this is. We at one point used env::current_exe but +// it turns out that there are some serious security issues with that +// approach. +// +// Specifically, on certain platforms like BSDs, a malicious actor can cause +// an arbitrary file to be placed at the path returned by current_exe. +// libbacktrace does not behave defensively in the presence of ill-formed +// DWARF information, and has been demonstrated to segfault in at least one +// case. There is no evidence at the moment to suggest that a more carefully +// constructed file can't cause arbitrary code execution. As a result of all +// of this, we don't hint libbacktrace with the path to the current process. +unsafe fn init_state() -> *mut backtrace_state { + static mut STATE: *mut backtrace_state = ptr::null_mut(); + if !STATE.is_null() { return STATE } + + let filename = match ::sys::backtrace::gnu::get_executable_filename() { + Ok((filename, file)) => { + // filename is purposely leaked here since libbacktrace requires + // it to stay allocated permanently, file is also leaked so that + // the file stays locked + let filename_ptr = filename.as_ptr(); + mem::forget(filename); + mem::forget(file); + filename_ptr + }, + Err(_) => ptr::null(), + }; + + STATE = backtrace_create_state(filename, 0, error_cb, + ptr::null_mut()); + STATE +} diff --git a/ctr-std/src/sys_common/gnu/mod.rs b/ctr-std/src/sys_common/gnu/mod.rs new file mode 100644 index 0000000..3a8cf2d --- /dev/null +++ b/ctr-std/src/sys_common/gnu/mod.rs @@ -0,0 +1,15 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![allow(missing_docs)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +pub mod libbacktrace; diff --git a/ctr-std/src/sys_common/io.rs b/ctr-std/src/sys_common/io.rs index 23daeeb..ab23936 100644 --- a/ctr-std/src/sys_common/io.rs +++ b/ctr-std/src/sys_common/io.rs @@ -7,51 +7,8 @@ // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your // option. This file may not be copied, modified, or distributed // except according to those terms. -use io; -use io::ErrorKind; -use io::Read; -use slice::from_raw_parts_mut; - pub const DEFAULT_BUF_SIZE: usize = 8 * 1024; -// Provides read_to_end functionality over an uninitialized buffer. -// This function is unsafe because it calls the underlying -// read function with a slice into uninitialized memory. The default -// implementation of read_to_end for readers will zero out new memory in -// the buf before passing it to read, but avoiding this zero can often -// lead to a fairly significant performance win. -// -// Implementations using this method have to adhere to two guarantees: -// * The implementation of read never reads the buffer provided. -// * The implementation of read correctly reports how many bytes were written. -pub unsafe fn read_to_end_uninitialized(r: &mut Read, buf: &mut Vec<u8>) -> io::Result<usize> { - - let start_len = buf.len(); - buf.reserve(16); - - // Always try to read into the empty space of the vector (from the length to the capacity). - // If the vector ever fills up then we reserve an extra byte which should trigger the normal - // reallocation routines for the vector, which will likely double the size. - // - // This function is similar to the read_to_end function in std::io, but the logic about - // reservations and slicing is different enough that this is duplicated here. - loop { - if buf.len() == buf.capacity() { - buf.reserve(1); - } - - let buf_slice = from_raw_parts_mut(buf.as_mut_ptr().offset(buf.len() as isize), - buf.capacity() - buf.len()); - - match r.read(buf_slice) { - Ok(0) => { return Ok(buf.len() - start_len); } - Ok(n) => { let len = buf.len() + n; buf.set_len(len); }, - Err(ref e) if e.kind() == ErrorKind::Interrupted => { } - Err(e) => { return Err(e); } - } - } -} - #[cfg(test)] #[allow(dead_code)] // not used on emscripten pub mod test { @@ -91,89 +48,3 @@ pub mod test { TempDir(ret) } } - -#[cfg(test)] -mod tests { - use io::prelude::*; - use super::*; - use io; - use io::{ErrorKind, Take, Repeat, repeat}; - use slice::from_raw_parts; - - struct ErrorRepeat { - lr: Take<Repeat> - } - - fn error_repeat(byte: u8, limit: u64) -> ErrorRepeat { - ErrorRepeat { lr: repeat(byte).take(limit) } - } - - impl Read for ErrorRepeat { - fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { - let ret = self.lr.read(buf); - if let Ok(0) = ret { - return Err(io::Error::new(ErrorKind::Other, "")) - } - ret - } - } - - fn init_vec_data() -> Vec<u8> { - let mut vec = vec![10u8; 200]; - unsafe { vec.set_len(0); } - vec - } - - fn assert_all_eq(buf: &[u8], value: u8) { - for n in buf { - assert_eq!(*n, value); - } - } - - fn validate(buf: &Vec<u8>, good_read_len: usize) { - assert_all_eq(buf, 1u8); - let cap = buf.capacity(); - let end_slice = unsafe { from_raw_parts(buf.as_ptr().offset(good_read_len as isize), - cap - good_read_len) }; - assert_all_eq(end_slice, 10u8); - } - - #[test] - fn read_to_end_uninit_error() { - let mut er = error_repeat(1,100); - let mut vec = init_vec_data(); - if let Err(_) = unsafe { read_to_end_uninitialized(&mut er, &mut vec) } { - validate(&vec, 100); - } else { - assert!(false); - } - } - - #[test] - fn read_to_end_uninit_zero_len_vec() { - let mut er = repeat(1).take(100); - let mut vec = Vec::new(); - let n = unsafe{ read_to_end_uninitialized(&mut er, &mut vec).unwrap() }; - assert_all_eq(&vec, 1u8); - assert_eq!(vec.len(), n); - } - - #[test] - fn read_to_end_uninit_good() { - let mut er = repeat(1).take(100); - let mut vec = init_vec_data(); - let n = unsafe{ read_to_end_uninitialized(&mut er, &mut vec).unwrap() }; - validate(&vec, 100); - assert_eq!(vec.len(), n); - } - - #[bench] - #[cfg_attr(target_os = "emscripten", ignore)] - fn bench_uninitialized(b: &mut ::test::Bencher) { - b.iter(|| { - let mut lr = repeat(1).take(10000000); - let mut vec = Vec::with_capacity(1024); - unsafe { read_to_end_uninitialized(&mut lr, &mut vec) } - }); - } -} diff --git a/ctr-std/src/sys_common/memchr.rs b/ctr-std/src/sys_common/memchr.rs deleted file mode 100644 index 3824a5f..0000000 --- a/ctr-std/src/sys_common/memchr.rs +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright 2015 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. -// -// Original implementation taken from rust-memchr -// Copyright 2015 Andrew Gallant, bluss and Nicolas Koch - -#[allow(dead_code)] -pub mod fallback { - use cmp; - use mem; - - const LO_U64: u64 = 0x0101010101010101; - const HI_U64: u64 = 0x8080808080808080; - - // use truncation - const LO_USIZE: usize = LO_U64 as usize; - const HI_USIZE: usize = HI_U64 as usize; - - /// Return `true` if `x` contains any zero byte. - /// - /// From *Matters Computational*, J. Arndt - /// - /// "The idea is to subtract one from each of the bytes and then look for - /// bytes where the borrow propagated all the way to the most significant - /// bit." - #[inline] - fn contains_zero_byte(x: usize) -> bool { - x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0 - } - - #[cfg(target_pointer_width = "32")] - #[inline] - fn repeat_byte(b: u8) -> usize { - let mut rep = (b as usize) << 8 | b as usize; - rep = rep << 16 | rep; - rep - } - - #[cfg(target_pointer_width = "64")] - #[inline] - fn repeat_byte(b: u8) -> usize { - let mut rep = (b as usize) << 8 | b as usize; - rep = rep << 16 | rep; - rep = rep << 32 | rep; - rep - } - - /// Return the first index matching the byte `a` in `text`. - pub fn memchr(x: u8, text: &[u8]) -> Option<usize> { - // Scan for a single byte value by reading two `usize` words at a time. - // - // Split `text` in three parts - // - unaligned initial part, before the first word aligned address in text - // - body, scan by 2 words at a time - // - the last remaining part, < 2 word size - let len = text.len(); - let ptr = text.as_ptr(); - let usize_bytes = mem::size_of::<usize>(); - - // search up to an aligned boundary - let align = (ptr as usize) & (usize_bytes- 1); - let mut offset; - if align > 0 { - offset = cmp::min(usize_bytes - align, len); - if let Some(index) = text[..offset].iter().position(|elt| *elt == x) { - return Some(index); - } - } else { - offset = 0; - } - - // search the body of the text - let repeated_x = repeat_byte(x); - - if len >= 2 * usize_bytes { - while offset <= len - 2 * usize_bytes { - unsafe { - let u = *(ptr.offset(offset as isize) as *const usize); - let v = *(ptr.offset((offset + usize_bytes) as isize) as *const usize); - - // break if there is a matching byte - let zu = contains_zero_byte(u ^ repeated_x); - let zv = contains_zero_byte(v ^ repeated_x); - if zu || zv { - break; - } - } - offset += usize_bytes * 2; - } - } - - // find the byte after the point the body loop stopped - text[offset..].iter().position(|elt| *elt == x).map(|i| offset + i) - } - - /// Return the last index matching the byte `a` in `text`. - pub fn memrchr(x: u8, text: &[u8]) -> Option<usize> { - // Scan for a single byte value by reading two `usize` words at a time. - // - // Split `text` in three parts - // - unaligned tail, after the last word aligned address in text - // - body, scan by 2 words at a time - // - the first remaining bytes, < 2 word size - let len = text.len(); - let ptr = text.as_ptr(); - let usize_bytes = mem::size_of::<usize>(); - - // search to an aligned boundary - let end_align = (ptr as usize + len) & (usize_bytes - 1); - let mut offset; - if end_align > 0 { - offset = if end_align >= len { 0 } else { len - end_align }; - if let Some(index) = text[offset..].iter().rposition(|elt| *elt == x) { - return Some(offset + index); - } - } else { - offset = len; - } - - // search the body of the text - let repeated_x = repeat_byte(x); - - while offset >= 2 * usize_bytes { - unsafe { - let u = *(ptr.offset(offset as isize - 2 * usize_bytes as isize) as *const usize); - let v = *(ptr.offset(offset as isize - usize_bytes as isize) as *const usize); - - // break if there is a matching byte - let zu = contains_zero_byte(u ^ repeated_x); - let zv = contains_zero_byte(v ^ repeated_x); - if zu || zv { - break; - } - } - offset -= 2 * usize_bytes; - } - - // find the byte before the point the body loop stopped - text[..offset].iter().rposition(|elt| *elt == x) - } - - // test fallback implementations on all platforms - #[test] - fn matches_one() { - assert_eq!(Some(0), memchr(b'a', b"a")); - } - - #[test] - fn matches_begin() { - assert_eq!(Some(0), memchr(b'a', b"aaaa")); - } - - #[test] - fn matches_end() { - assert_eq!(Some(4), memchr(b'z', b"aaaaz")); - } - - #[test] - fn matches_nul() { - assert_eq!(Some(4), memchr(b'\x00', b"aaaa\x00")); - } - - #[test] - fn matches_past_nul() { - assert_eq!(Some(5), memchr(b'z', b"aaaa\x00z")); - } - - #[test] - fn no_match_empty() { - assert_eq!(None, memchr(b'a', b"")); - } - - #[test] - fn no_match() { - assert_eq!(None, memchr(b'a', b"xyz")); - } - - #[test] - fn matches_one_reversed() { - assert_eq!(Some(0), memrchr(b'a', b"a")); - } - - #[test] - fn matches_begin_reversed() { - assert_eq!(Some(3), memrchr(b'a', b"aaaa")); - } - - #[test] - fn matches_end_reversed() { - assert_eq!(Some(0), memrchr(b'z', b"zaaaa")); - } - - #[test] - fn matches_nul_reversed() { - assert_eq!(Some(4), memrchr(b'\x00', b"aaaa\x00")); - } - - #[test] - fn matches_past_nul_reversed() { - assert_eq!(Some(0), memrchr(b'z', b"z\x00aaaa")); - } - - #[test] - fn no_match_empty_reversed() { - assert_eq!(None, memrchr(b'a', b"")); - } - - #[test] - fn no_match_reversed() { - assert_eq!(None, memrchr(b'a', b"xyz")); - } - - #[test] - fn each_alignment_reversed() { - let mut data = [1u8; 64]; - let needle = 2; - let pos = 40; - data[pos] = needle; - for start in 0..16 { - assert_eq!(Some(pos - start), memrchr(needle, &data[start..])); - } - } -} diff --git a/ctr-std/src/sys_common/mod.rs b/ctr-std/src/sys_common/mod.rs index 4a7d79f..27504d3 100644 --- a/ctr-std/src/sys_common/mod.rs +++ b/ctr-std/src/sys_common/mod.rs @@ -10,7 +10,7 @@ //! Platform-independent platform abstraction //! -//! This is the platform-independent portion of the standard libraries +//! This is the platform-independent portion of the standard library's //! platform abstraction layer, whereas `std::sys` is the //! platform-specific portion. //! @@ -23,11 +23,16 @@ //! `std::sys` from the standard library. #![allow(missing_docs)] +#![allow(missing_debug_implementations)] + +use sync::Once; +use sys; pub mod at_exit_imp; +#[cfg(feature = "backtrace")] +pub mod backtrace; pub mod condvar; pub mod io; -pub mod memchr; pub mod mutex; pub mod poison; pub mod remutex; @@ -36,6 +41,25 @@ pub mod thread; pub mod thread_info; pub mod thread_local; pub mod util; +pub mod wtf8; +pub mod bytestring; +pub mod process; + +cfg_if! { + if #[cfg(any(target_os = "cloudabi", target_os = "l4re", target_os = "redox"))] { + pub use sys::net; + } else if #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] { + pub use sys::net; + } else { + pub mod net; + } +} + +#[cfg(feature = "backtrace")] +#[cfg(any(all(unix, not(target_os = "emscripten")), + all(windows, target_env = "gnu"), + target_os = "redox"))] +pub mod gnu; // common error constructors @@ -81,6 +105,16 @@ macro_rules! rtabort { ($($t:tt)*) => (::sys_common::util::abort(format_args!($($t)*))) } +/// One-time runtime cleanup. +pub fn cleanup() { + static CLEANUP: Once = Once::new(); + CLEANUP.call_once(|| unsafe { + sys::args::cleanup(); + sys::stack_overflow::cleanup(); + at_exit_imp::cleanup(); + }); +} + // Computes (value*numer)/denom without overflow, as long as both // (numer*denom) and the overall result fit into i64 (which is the case // for our time conversions). diff --git a/ctr-std/src/sys_common/net.rs b/ctr-std/src/sys_common/net.rs new file mode 100644 index 0000000..6223f82 --- /dev/null +++ b/ctr-std/src/sys_common/net.rs @@ -0,0 +1,630 @@ +// Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![allow(dead_code)] + +use cmp; +use ffi::CString; +use fmt; +use io::{self, Error, ErrorKind}; +use libc::{c_int, c_void}; +use mem; +use net::{SocketAddr, Shutdown, Ipv4Addr, Ipv6Addr}; +use ptr; +use sys::net::{cvt, cvt_r, cvt_gai, Socket, init, wrlen_t}; +use sys::net::netc as c; +use sys_common::{AsInner, FromInner, IntoInner}; +use time::Duration; + +// IPV6 stuff does not seem to be supported on 3DS. TODO: Determine if that's true +const IPV6_ADD_MEMBERSHIP: c_int = 0x0; +const IPV6_DROP_MEMBERSHIP: c_int = 0x0; +const IPV6_MULTICAST_LOOP: c_int = 0x0; +const IPV6_V6ONLY: c_int = 0x0; + +// Neither are signals +const MSG_NOSIGNAL: c_int = 0x0; + +// These constants are also currently missing from libctru. TODO: Find them? +const SO_SNDTIMEO: c_int = 0x0; +const SO_RCVTIMEO: c_int = 0x0; +const SO_BROADCAST: c_int = 0x0; + +//////////////////////////////////////////////////////////////////////////////// +// sockaddr and misc bindings +//////////////////////////////////////////////////////////////////////////////// + +pub fn setsockopt<T>(sock: &Socket, opt: c_int, val: c_int, + payload: T) -> io::Result<()> { + unsafe { + let payload = &payload as *const T as *const c_void; + cvt(c::setsockopt(*sock.as_inner(), opt, val, payload, + mem::size_of::<T>() as c::socklen_t))?; + Ok(()) + } +} + +pub fn getsockopt<T: Copy>(sock: &Socket, opt: c_int, + val: c_int) -> io::Result<T> { + unsafe { + let mut slot: T = mem::zeroed(); + let mut len = mem::size_of::<T>() as c::socklen_t; + cvt(c::getsockopt(*sock.as_inner(), opt, val, + &mut slot as *mut _ as *mut _, + &mut len))?; + assert_eq!(len as usize, mem::size_of::<T>()); + Ok(slot) + } +} + +fn sockname<F>(f: F) -> io::Result<SocketAddr> + where F: FnOnce(*mut c::sockaddr, *mut c::socklen_t) -> c_int +{ + unsafe { + let mut storage: c::sockaddr_storage = mem::zeroed(); + let mut len = mem::size_of_val(&storage) as c::socklen_t; + cvt(f(&mut storage as *mut _ as *mut _, &mut len))?; + sockaddr_to_addr(&storage, len as usize) + } +} + +pub fn sockaddr_to_addr(storage: &c::sockaddr_storage, + len: usize) -> io::Result<SocketAddr> { + match storage.ss_family as c_int { + c::AF_INET => { + assert!(len as usize >= mem::size_of::<c::sockaddr_in>()); + Ok(SocketAddr::V4(FromInner::from_inner(unsafe { + *(storage as *const _ as *const c::sockaddr_in) + }))) + } + c::AF_INET6 => { + assert!(len as usize >= mem::size_of::<c::sockaddr_in6>()); + Ok(SocketAddr::V6(FromInner::from_inner(unsafe { + *(storage as *const _ as *const c::sockaddr_in6) + }))) + } + _ => { + Err(Error::new(ErrorKind::InvalidInput, "invalid argument")) + } + } +} + +#[cfg(target_os = "android")] +fn to_ipv6mr_interface(value: u32) -> c_int { + value as c_int +} + +#[cfg(not(target_os = "android"))] +fn to_ipv6mr_interface(value: u32) -> ::libc::c_uint { + value as ::libc::c_uint +} + +//////////////////////////////////////////////////////////////////////////////// +// get_host_addresses +//////////////////////////////////////////////////////////////////////////////// + +pub struct LookupHost { + original: *mut c::addrinfo, + cur: *mut c::addrinfo, +} + +impl Iterator for LookupHost { + type Item = SocketAddr; + fn next(&mut self) -> Option<SocketAddr> { + loop { + unsafe { + let cur = self.cur.as_ref()?; + self.cur = cur.ai_next; + match sockaddr_to_addr(mem::transmute(cur.ai_addr), + cur.ai_addrlen as usize) + { + Ok(addr) => return Some(addr), + Err(_) => continue, + } + } + } + } +} + +unsafe impl Sync for LookupHost {} +unsafe impl Send for LookupHost {} + +impl Drop for LookupHost { + fn drop(&mut self) { + unsafe { c::freeaddrinfo(self.original) } + } +} + +pub fn lookup_host(host: &str) -> io::Result<LookupHost> { + init(); + + let c_host = CString::new(host)?; + let mut hints: c::addrinfo = unsafe { mem::zeroed() }; + hints.ai_socktype = c::SOCK_STREAM; + let mut res = ptr::null_mut(); + unsafe { + match cvt_gai(c::getaddrinfo(c_host.as_ptr(), ptr::null(), &hints, &mut res)) { + Ok(_) => { + Ok(LookupHost { original: res, cur: res }) + }, + #[cfg(unix)] + Err(e) => { + // If we're running glibc prior to version 2.26, the lookup + // failure could be caused by caching a stale /etc/resolv.conf. + // We need to call libc::res_init() to clear the cache. But we + // shouldn't call it in on any other platform, because other + // res_init implementations aren't thread-safe. See + // https://github.com/rust-lang/rust/issues/41570 and + // https://github.com/rust-lang/rust/issues/43592. + use sys::net::res_init_if_glibc_before_2_26; + let _ = res_init_if_glibc_before_2_26(); + Err(e) + }, + // the cfg is needed here to avoid an "unreachable pattern" warning + #[cfg(not(unix))] + Err(e) => Err(e), + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// TCP streams +//////////////////////////////////////////////////////////////////////////////// + +pub struct TcpStream { + inner: Socket, +} + +impl TcpStream { + pub fn connect(addr: &SocketAddr) -> io::Result<TcpStream> { + init(); + + let sock = Socket::new(addr, c::SOCK_STREAM)?; + + let (addrp, len) = addr.into_inner(); + cvt_r(|| unsafe { c::connect(*sock.as_inner(), addrp, len) })?; + Ok(TcpStream { inner: sock }) + } + + pub fn connect_timeout(addr: &SocketAddr, timeout: Duration) -> io::Result<TcpStream> { + init(); + + let sock = Socket::new(addr, c::SOCK_STREAM)?; + sock.connect_timeout(addr, timeout)?; + Ok(TcpStream { inner: sock }) + } + + pub fn socket(&self) -> &Socket { &self.inner } + + pub fn into_socket(self) -> Socket { self.inner } + + pub fn set_read_timeout(&self, dur: Option<Duration>) -> io::Result<()> { + self.inner.set_timeout(dur, SO_RCVTIMEO) + } + + pub fn set_write_timeout(&self, dur: Option<Duration>) -> io::Result<()> { + self.inner.set_timeout(dur, SO_SNDTIMEO) + } + + pub fn read_timeout(&self) -> io::Result<Option<Duration>> { + self.inner.timeout(SO_RCVTIMEO) + } + + pub fn write_timeout(&self) -> io::Result<Option<Duration>> { + self.inner.timeout(SO_SNDTIMEO) + } + + pub fn peek(&self, buf: &mut [u8]) -> io::Result<usize> { + self.inner.peek(buf) + } + + pub fn read(&self, buf: &mut [u8]) -> io::Result<usize> { + self.inner.read(buf) + } + + pub fn write(&self, buf: &[u8]) -> io::Result<usize> { + let len = cmp::min(buf.len(), <wrlen_t>::max_value() as usize) as wrlen_t; + let ret = cvt(unsafe { + c::send(*self.inner.as_inner(), + buf.as_ptr() as *const c_void, + len, + MSG_NOSIGNAL) + })?; + Ok(ret as usize) + } + + pub fn peer_addr(&self) -> io::Result<SocketAddr> { + sockname(|buf, len| unsafe { + c::getpeername(*self.inner.as_inner(), buf, len) + }) + } + + pub fn socket_addr(&self) -> io::Result<SocketAddr> { + sockname(|buf, len| unsafe { + c::getsockname(*self.inner.as_inner(), buf, len) + }) + } + + pub fn shutdown(&self, how: Shutdown) -> io::Result<()> { + self.inner.shutdown(how) + } + + pub fn duplicate(&self) -> io::Result<TcpStream> { + self.inner.duplicate().map(|s| TcpStream { inner: s }) + } + + pub fn set_nodelay(&self, nodelay: bool) -> io::Result<()> { + self.inner.set_nodelay(nodelay) + } + + pub fn nodelay(&self) -> io::Result<bool> { + self.inner.nodelay() + } + + pub fn set_ttl(&self, ttl: u32) -> io::Result<()> { + setsockopt(&self.inner, c::IPPROTO_IP, c::IP_TTL, ttl as c_int) + } + + pub fn ttl(&self) -> io::Result<u32> { + let raw: c_int = getsockopt(&self.inner, c::IPPROTO_IP, c::IP_TTL)?; + Ok(raw as u32) + } + + pub fn take_error(&self) -> io::Result<Option<io::Error>> { + self.inner.take_error() + } + + pub fn set_nonblocking(&self, nonblocking: bool) -> io::Result<()> { + self.inner.set_nonblocking(nonblocking) + } +} + +impl FromInner<Socket> for TcpStream { + fn from_inner(socket: Socket) -> TcpStream { + TcpStream { inner: socket } + } +} + +impl fmt::Debug for TcpStream { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut res = f.debug_struct("TcpStream"); + + if let Ok(addr) = self.socket_addr() { + res.field("addr", &addr); + } + + if let Ok(peer) = self.peer_addr() { + res.field("peer", &peer); + } + + let name = if cfg!(windows) {"socket"} else {"fd"}; + res.field(name, &self.inner.as_inner()) + .finish() + } +} + +//////////////////////////////////////////////////////////////////////////////// +// TCP listeners +//////////////////////////////////////////////////////////////////////////////// + +pub struct TcpListener { + inner: Socket, +} + +impl TcpListener { + pub fn bind(addr: &SocketAddr) -> io::Result<TcpListener> { + init(); + + let sock = Socket::new(addr, c::SOCK_STREAM)?; + + // On platforms with Berkeley-derived sockets, this allows + // to quickly rebind a socket, without needing to wait for + // the OS to clean up the previous one. + if !cfg!(windows) { + setsockopt(&sock, c::SOL_SOCKET, c::SO_REUSEADDR, + 1 as c_int)?; + } + + // Bind our new socket + let (addrp, len) = addr.into_inner(); + cvt(unsafe { c::bind(*sock.as_inner(), addrp, len as _) })?; + + // Start listening + cvt(unsafe { c::listen(*sock.as_inner(), 128) })?; + Ok(TcpListener { inner: sock }) + } + + pub fn socket(&self) -> &Socket { &self.inner } + + pub fn into_socket(self) -> Socket { self.inner } + + pub fn socket_addr(&self) -> io::Result<SocketAddr> { + sockname(|buf, len| unsafe { + c::getsockname(*self.inner.as_inner(), buf, len) + }) + } + + pub fn accept(&self) -> io::Result<(TcpStream, SocketAddr)> { + let mut storage: c::sockaddr_storage = unsafe { mem::zeroed() }; + let mut len = mem::size_of_val(&storage) as c::socklen_t; + let sock = self.inner.accept(&mut storage as *mut _ as *mut _, + &mut len)?; + let addr = sockaddr_to_addr(&storage, len as usize)?; + Ok((TcpStream { inner: sock, }, addr)) + } + + pub fn duplicate(&self) -> io::Result<TcpListener> { + self.inner.duplicate().map(|s| TcpListener { inner: s }) + } + + pub fn set_ttl(&self, ttl: u32) -> io::Result<()> { + setsockopt(&self.inner, c::IPPROTO_IP, c::IP_TTL, ttl as c_int) + } + + pub fn ttl(&self) -> io::Result<u32> { + let raw: c_int = getsockopt(&self.inner, c::IPPROTO_IP, c::IP_TTL)?; + Ok(raw as u32) + } + + pub fn set_only_v6(&self, only_v6: bool) -> io::Result<()> { + setsockopt(&self.inner, c::IPPROTO_IPV6, IPV6_V6ONLY, only_v6 as c_int) + } + + pub fn only_v6(&self) -> io::Result<bool> { + let raw: c_int = getsockopt(&self.inner, c::IPPROTO_IPV6, IPV6_V6ONLY)?; + Ok(raw != 0) + } + + pub fn take_error(&self) -> io::Result<Option<io::Error>> { + self.inner.take_error() + } + + pub fn set_nonblocking(&self, nonblocking: bool) -> io::Result<()> { + self.inner.set_nonblocking(nonblocking) + } +} + +impl FromInner<Socket> for TcpListener { + fn from_inner(socket: Socket) -> TcpListener { + TcpListener { inner: socket } + } +} + +impl fmt::Debug for TcpListener { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut res = f.debug_struct("TcpListener"); + + if let Ok(addr) = self.socket_addr() { + res.field("addr", &addr); + } + + let name = if cfg!(windows) {"socket"} else {"fd"}; + res.field(name, &self.inner.as_inner()) + .finish() + } +} + +//////////////////////////////////////////////////////////////////////////////// +// UDP +//////////////////////////////////////////////////////////////////////////////// + +pub struct UdpSocket { + inner: Socket, +} + +impl UdpSocket { + pub fn bind(addr: &SocketAddr) -> io::Result<UdpSocket> { + init(); + + let sock = Socket::new(addr, c::SOCK_DGRAM)?; + let (addrp, len) = addr.into_inner(); + cvt(unsafe { c::bind(*sock.as_inner(), addrp, len as _) })?; + Ok(UdpSocket { inner: sock }) + } + + pub fn socket(&self) -> &Socket { &self.inner } + + pub fn into_socket(self) -> Socket { self.inner } + + pub fn socket_addr(&self) -> io::Result<SocketAddr> { + sockname(|buf, len| unsafe { + c::getsockname(*self.inner.as_inner(), buf, len) + }) + } + + pub fn recv_from(&self, buf: &mut [u8]) -> io::Result<(usize, SocketAddr)> { + self.inner.recv_from(buf) + } + + pub fn peek_from(&self, buf: &mut [u8]) -> io::Result<(usize, SocketAddr)> { + self.inner.peek_from(buf) + } + + pub fn send_to(&self, buf: &[u8], dst: &SocketAddr) -> io::Result<usize> { + let len = cmp::min(buf.len(), <wrlen_t>::max_value() as usize) as wrlen_t; + let (dstp, dstlen) = dst.into_inner(); + let ret = cvt(unsafe { + c::sendto(*self.inner.as_inner(), + buf.as_ptr() as *const c_void, len, + MSG_NOSIGNAL, dstp, dstlen) + })?; + Ok(ret as usize) + } + + pub fn duplicate(&self) -> io::Result<UdpSocket> { + self.inner.duplicate().map(|s| UdpSocket { inner: s }) + } + + pub fn set_read_timeout(&self, dur: Option<Duration>) -> io::Result<()> { + self.inner.set_timeout(dur, SO_RCVTIMEO) + } + + pub fn set_write_timeout(&self, dur: Option<Duration>) -> io::Result<()> { + self.inner.set_timeout(dur, SO_SNDTIMEO) + } + + pub fn read_timeout(&self) -> io::Result<Option<Duration>> { + self.inner.timeout(SO_RCVTIMEO) + } + + pub fn write_timeout(&self) -> io::Result<Option<Duration>> { + self.inner.timeout(SO_SNDTIMEO) + } + + pub fn set_broadcast(&self, broadcast: bool) -> io::Result<()> { + setsockopt(&self.inner, c::SOL_SOCKET, SO_BROADCAST, broadcast as c_int) + } + + pub fn broadcast(&self) -> io::Result<bool> { + let raw: c_int = getsockopt(&self.inner, c::SOL_SOCKET, SO_BROADCAST)?; + Ok(raw != 0) + } + + pub fn set_multicast_loop_v4(&self, multicast_loop_v4: bool) -> io::Result<()> { + setsockopt(&self.inner, c::IPPROTO_IP, c::IP_MULTICAST_LOOP, multicast_loop_v4 as c_int) + } + + pub fn multicast_loop_v4(&self) -> io::Result<bool> { + let raw: c_int = getsockopt(&self.inner, c::IPPROTO_IP, c::IP_MULTICAST_LOOP)?; + Ok(raw != 0) + } + + pub fn set_multicast_ttl_v4(&self, multicast_ttl_v4: u32) -> io::Result<()> { + setsockopt(&self.inner, c::IPPROTO_IP, c::IP_MULTICAST_TTL, multicast_ttl_v4 as c_int) + } + + pub fn multicast_ttl_v4(&self) -> io::Result<u32> { + let raw: c_int = getsockopt(&self.inner, c::IPPROTO_IP, c::IP_MULTICAST_TTL)?; + Ok(raw as u32) + } + + pub fn set_multicast_loop_v6(&self, multicast_loop_v6: bool) -> io::Result<()> { + setsockopt(&self.inner, c::IPPROTO_IPV6, IPV6_MULTICAST_LOOP, multicast_loop_v6 as c_int) + } + + pub fn multicast_loop_v6(&self) -> io::Result<bool> { + let raw: c_int = getsockopt(&self.inner, c::IPPROTO_IPV6, IPV6_MULTICAST_LOOP)?; + Ok(raw != 0) + } + + pub fn join_multicast_v4(&self, multiaddr: &Ipv4Addr, interface: &Ipv4Addr) + -> io::Result<()> { + let mreq = c::ip_mreq { + imr_multiaddr: *multiaddr.as_inner(), + imr_interface: *interface.as_inner(), + }; + setsockopt(&self.inner, c::IPPROTO_IP, c::IP_ADD_MEMBERSHIP, mreq) + } + + pub fn join_multicast_v6(&self, multiaddr: &Ipv6Addr, interface: u32) + -> io::Result<()> { + let mreq = c::ipv6_mreq { + ipv6mr_multiaddr: *multiaddr.as_inner(), + ipv6mr_interface: to_ipv6mr_interface(interface), + }; + setsockopt(&self.inner, c::IPPROTO_IPV6, IPV6_ADD_MEMBERSHIP, mreq) + } + + pub fn leave_multicast_v4(&self, multiaddr: &Ipv4Addr, interface: &Ipv4Addr) + -> io::Result<()> { + let mreq = c::ip_mreq { + imr_multiaddr: *multiaddr.as_inner(), + imr_interface: *interface.as_inner(), + }; + setsockopt(&self.inner, c::IPPROTO_IP, c::IP_DROP_MEMBERSHIP, mreq) + } + + pub fn leave_multicast_v6(&self, multiaddr: &Ipv6Addr, interface: u32) + -> io::Result<()> { + let mreq = c::ipv6_mreq { + ipv6mr_multiaddr: *multiaddr.as_inner(), + ipv6mr_interface: to_ipv6mr_interface(interface), + }; + setsockopt(&self.inner, c::IPPROTO_IPV6, IPV6_DROP_MEMBERSHIP, mreq) + } + + pub fn set_ttl(&self, ttl: u32) -> io::Result<()> { + setsockopt(&self.inner, c::IPPROTO_IP, c::IP_TTL, ttl as c_int) + } + + pub fn ttl(&self) -> io::Result<u32> { + let raw: c_int = getsockopt(&self.inner, c::IPPROTO_IP, c::IP_TTL)?; + Ok(raw as u32) + } + + pub fn take_error(&self) -> io::Result<Option<io::Error>> { + self.inner.take_error() + } + + pub fn set_nonblocking(&self, nonblocking: bool) -> io::Result<()> { + self.inner.set_nonblocking(nonblocking) + } + + pub fn recv(&self, buf: &mut [u8]) -> io::Result<usize> { + self.inner.read(buf) + } + + pub fn peek(&self, buf: &mut [u8]) -> io::Result<usize> { + self.inner.peek(buf) + } + + pub fn send(&self, buf: &[u8]) -> io::Result<usize> { + let len = cmp::min(buf.len(), <wrlen_t>::max_value() as usize) as wrlen_t; + let ret = cvt(unsafe { + c::send(*self.inner.as_inner(), + buf.as_ptr() as *const c_void, + len, + MSG_NOSIGNAL) + })?; + Ok(ret as usize) + } + + pub fn connect(&self, addr: &SocketAddr) -> io::Result<()> { + let (addrp, len) = addr.into_inner(); + cvt_r(|| unsafe { c::connect(*self.inner.as_inner(), addrp, len) }).map(|_| ()) + } +} + +impl FromInner<Socket> for UdpSocket { + fn from_inner(socket: Socket) -> UdpSocket { + UdpSocket { inner: socket } + } +} + +impl fmt::Debug for UdpSocket { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut res = f.debug_struct("UdpSocket"); + + if let Ok(addr) = self.socket_addr() { + res.field("addr", &addr); + } + + let name = if cfg!(windows) {"socket"} else {"fd"}; + res.field(name, &self.inner.as_inner()) + .finish() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use collections::HashMap; + + #[test] + fn no_lookup_host_duplicates() { + let mut addrs = HashMap::new(); + let lh = match lookup_host("localhost") { + Ok(lh) => lh, + Err(e) => panic!("couldn't resolve `localhost': {}", e) + }; + let _na = lh.map(|sa| *addrs.entry(sa).or_insert(0) += 1).count(); + assert!(addrs.values().filter(|&&v| v > 1).count() == 0); + } +} diff --git a/ctr-std/src/sys_common/poison.rs b/ctr-std/src/sys_common/poison.rs index bdc727f..934ac3e 100644 --- a/ctr-std/src/sys_common/poison.rs +++ b/ctr-std/src/sys_common/poison.rs @@ -60,17 +60,53 @@ pub struct Guard { /// A type of error which can be returned whenever a lock is acquired. /// -/// Both Mutexes and RwLocks are poisoned whenever a thread fails while the lock +/// Both [`Mutex`]es and [`RwLock`]s are poisoned whenever a thread fails while the lock /// is held. The precise semantics for when a lock is poisoned is documented on /// each lock, but once a lock is poisoned then all future acquisitions will /// return this error. +/// +/// # Examples +/// +/// ``` +/// use std::sync::{Arc, Mutex}; +/// use std::thread; +/// +/// let mutex = Arc::new(Mutex::new(1)); +/// +/// // poison the mutex +/// let c_mutex = mutex.clone(); +/// let _ = thread::spawn(move || { +/// let mut data = c_mutex.lock().unwrap(); +/// *data = 2; +/// panic!(); +/// }).join(); +/// +/// match mutex.lock() { +/// Ok(_) => unreachable!(), +/// Err(p_err) => { +/// let data = p_err.get_ref(); +/// println!("recovered: {}", data); +/// } +/// }; +/// ``` +/// +/// [`Mutex`]: ../../std/sync/struct.Mutex.html +/// [`RwLock`]: ../../std/sync/struct.RwLock.html #[stable(feature = "rust1", since = "1.0.0")] pub struct PoisonError<T> { guard: T, } -/// An enumeration of possible errors which can occur while calling the -/// `try_lock` method. +/// An enumeration of possible errors associated with a [`TryLockResult`] which +/// can occur while trying to aquire a lock, from the [`try_lock`] method on a +/// [`Mutex`] or the [`try_read`] and [`try_write`] methods on an [`RwLock`]. +/// +/// [`Mutex`]: struct.Mutex.html +/// [`RwLock`]: struct.RwLock.html +/// [`TryLockResult`]: type.TryLockResult.html +/// [`try_lock`]: struct.Mutex.html#method.try_lock +/// [`try_read`]: struct.RwLock.html#method.try_read +/// [`try_write`]: struct.RwLock.html#method.try_write #[stable(feature = "rust1", since = "1.0.0")] pub enum TryLockError<T> { /// The lock could not be acquired because another thread failed while holding @@ -85,19 +121,26 @@ pub enum TryLockError<T> { /// A type alias for the result of a lock method which can be poisoned. /// -/// The `Ok` variant of this result indicates that the primitive was not -/// poisoned, and the `Guard` is contained within. The `Err` variant indicates -/// that the primitive was poisoned. Note that the `Err` variant *also* carries -/// the associated guard, and it can be acquired through the `into_inner` +/// The [`Ok`] variant of this result indicates that the primitive was not +/// poisoned, and the `Guard` is contained within. The [`Err`] variant indicates +/// that the primitive was poisoned. Note that the [`Err`] variant *also* carries +/// the associated guard, and it can be acquired through the [`into_inner`] /// method. +/// +/// [`Ok`]: ../../std/result/enum.Result.html#variant.Ok +/// [`Err`]: ../../std/result/enum.Result.html#variant.Err +/// [`into_inner`]: ../../std/sync/struct.PoisonError.html#method.into_inner #[stable(feature = "rust1", since = "1.0.0")] pub type LockResult<Guard> = Result<Guard, PoisonError<Guard>>; /// A type alias for the result of a nonblocking locking method. /// -/// For more information, see `LockResult`. A `TryLockResult` doesn't -/// necessarily hold the associated guard in the `Err` type as the lock may not +/// For more information, see [`LockResult`]. A `TryLockResult` doesn't +/// necessarily hold the associated guard in the [`Err`] type as the lock may not /// have been acquired for other reasons. +/// +/// [`LockResult`]: ../../std/sync/type.LockResult.html +/// [`Err`]: ../../std/result/enum.Result.html#variant.Err #[stable(feature = "rust1", since = "1.0.0")] pub type TryLockResult<Guard> = Result<Guard, TryLockError<Guard>>; @@ -124,6 +167,11 @@ impl<T> Error for PoisonError<T> { impl<T> PoisonError<T> { /// Creates a `PoisonError`. + /// + /// This is generally created by methods like [`Mutex::lock`] or [`RwLock::read`]. + /// + /// [`Mutex::lock`]: ../../std/sync/struct.Mutex.html#method.lock + /// [`RwLock::read`]: ../../std/sync/struct.RwLock.html#method.read #[stable(feature = "sync_poison", since = "1.2.0")] pub fn new(guard: T) -> PoisonError<T> { PoisonError { guard: guard } @@ -131,6 +179,28 @@ impl<T> PoisonError<T> { /// Consumes this error indicating that a lock is poisoned, returning the /// underlying guard to allow access regardless. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// use std::sync::{Arc, Mutex}; + /// use std::thread; + /// + /// let mutex = Arc::new(Mutex::new(HashSet::new())); + /// + /// // poison the mutex + /// let c_mutex = mutex.clone(); + /// let _ = thread::spawn(move || { + /// let mut data = c_mutex.lock().unwrap(); + /// data.insert(10); + /// panic!(); + /// }).join(); + /// + /// let p_err = mutex.lock().unwrap_err(); + /// let data = p_err.into_inner(); + /// println!("recovered {} items", data.len()); + /// ``` #[stable(feature = "sync_poison", since = "1.2.0")] pub fn into_inner(self) -> T { self.guard } diff --git a/ctr-std/src/sys_common/process.rs b/ctr-std/src/sys_common/process.rs new file mode 100644 index 0000000..fd1a5fd --- /dev/null +++ b/ctr-std/src/sys_common/process.rs @@ -0,0 +1,124 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![allow(dead_code)] +#![unstable(feature = "process_internals", issue = "0")] + +use ffi::{OsStr, OsString}; +use env; +use collections::BTreeMap; +use alloc::borrow::Borrow; + +pub trait EnvKey: + From<OsString> + Into<OsString> + + Borrow<OsStr> + Borrow<Self> + AsRef<OsStr> + + Ord + Clone {} + +// Implement a case-sensitive environment variable key +#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)] +pub struct DefaultEnvKey(OsString); + +impl From<OsString> for DefaultEnvKey { + fn from(k: OsString) -> Self { DefaultEnvKey(k) } +} + +impl From<DefaultEnvKey> for OsString { + fn from(k: DefaultEnvKey) -> Self { k.0 } +} + +impl Borrow<OsStr> for DefaultEnvKey { + fn borrow(&self) -> &OsStr { &self.0 } +} + +impl AsRef<OsStr> for DefaultEnvKey { + fn as_ref(&self) -> &OsStr { &self.0 } +} + +impl EnvKey for DefaultEnvKey {} + +// Stores a set of changes to an environment +#[derive(Clone, Debug)] +pub struct CommandEnv<K> { + clear: bool, + vars: BTreeMap<K, Option<OsString>> +} + +impl<K: EnvKey> Default for CommandEnv<K> { + fn default() -> Self { + CommandEnv { + clear: false, + vars: Default::default() + } + } +} + +impl<K: EnvKey> CommandEnv<K> { + // Capture the current environment with these changes applied + pub fn capture(&self) -> BTreeMap<K, OsString> { + let mut result = BTreeMap::<K, OsString>::new(); + if !self.clear { + for (k, v) in env::vars_os() { + result.insert(k.into(), v); + } + } + for (k, maybe_v) in &self.vars { + if let &Some(ref v) = maybe_v { + result.insert(k.clone(), v.clone()); + } else { + result.remove(k); + } + } + result + } + + // Apply these changes directly to the current environment + pub fn apply(&self) { + if self.clear { + for (k, _) in env::vars_os() { + env::remove_var(k); + } + } + for (key, maybe_val) in self.vars.iter() { + if let &Some(ref val) = maybe_val { + env::set_var(key, val); + } else { + env::remove_var(key); + } + } + } + + pub fn is_unchanged(&self) -> bool { + !self.clear && self.vars.is_empty() + } + + pub fn capture_if_changed(&self) -> Option<BTreeMap<K, OsString>> { + if self.is_unchanged() { + None + } else { + Some(self.capture()) + } + } + + // The following functions build up changes + pub fn set(&mut self, key: &OsStr, value: &OsStr) { + self.vars.insert(key.to_owned().into(), Some(value.to_owned())); + } + pub fn remove(&mut self, key: &OsStr) { + if self.clear { + self.vars.remove(key); + } else { + self.vars.insert(key.to_owned().into(), None); + } + } + pub fn clear(&mut self) { + self.clear = true; + self.vars.clear(); + } +} diff --git a/ctr-std/src/sys_common/remutex.rs b/ctr-std/src/sys_common/remutex.rs index 4d0407c..ce43ec6 100644 --- a/ctr-std/src/sys_common/remutex.rs +++ b/ctr-std/src/sys_common/remutex.rs @@ -116,11 +116,18 @@ impl<T> Drop for ReentrantMutex<T> { impl<T: fmt::Debug + 'static> fmt::Debug for ReentrantMutex<T> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self.try_lock() { - Ok(guard) => write!(f, "ReentrantMutex {{ data: {:?} }}", &*guard), + Ok(guard) => f.debug_struct("ReentrantMutex").field("data", &*guard).finish(), Err(TryLockError::Poisoned(err)) => { - write!(f, "ReentrantMutex {{ data: Poisoned({:?}) }}", &**err.get_ref()) + f.debug_struct("ReentrantMutex").field("data", &**err.get_ref()).finish() }, - Err(TryLockError::WouldBlock) => write!(f, "ReentrantMutex {{ <locked> }}") + Err(TryLockError::WouldBlock) => { + struct LockedPlaceholder; + impl fmt::Debug for LockedPlaceholder { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str("<locked>") } + } + + f.debug_struct("ReentrantMutex").field("data", &LockedPlaceholder).finish() + } } } } diff --git a/ctr-std/src/sys_common/thread.rs b/ctr-std/src/sys_common/thread.rs index bb6baae..f1379b6 100644 --- a/ctr-std/src/sys_common/thread.rs +++ b/ctr-std/src/sys_common/thread.rs @@ -9,14 +9,32 @@ // except according to those terms. use alloc::boxed::FnBox; -use libc; -//use sys::stack_overflow; +use env; +use sync::atomic::{self, Ordering}; +use sys::stack_overflow; +use sys::thread as imp; -pub unsafe fn start_thread(main: *mut libc::c_void) { +#[allow(dead_code)] +pub unsafe fn start_thread(main: *mut u8) { // Next, set up our stack overflow handler which may get triggered if we run // out of stack. - // let _handler = stack_overflow::Handler::new(); + let _handler = stack_overflow::Handler::new(); // Finally, let's run some code. Box::from_raw(main as *mut Box<FnBox()>)() } + +pub fn min_stack() -> usize { + static MIN: atomic::AtomicUsize = atomic::AtomicUsize::new(0); + match MIN.load(Ordering::SeqCst) { + 0 => {} + n => return n - 1, + } + let amt = env::var("RUST_MIN_STACK").ok().and_then(|s| s.parse().ok()); + let amt = amt.unwrap_or(imp::DEFAULT_MIN_STACK_SIZE); + + // 0 is our sentinel value, so ensure that we'll never see 0 after + // initialization has run + MIN.store(amt + 1, Ordering::SeqCst); + amt +} diff --git a/ctr-std/src/sys_common/thread_info.rs b/ctr-std/src/sys_common/thread_info.rs index 2abb8af..7970042 100644 --- a/ctr-std/src/sys_common/thread_info.rs +++ b/ctr-std/src/sys_common/thread_info.rs @@ -45,7 +45,7 @@ pub fn stack_guard() -> Option<usize> { pub fn set(stack_guard: Option<usize>, thread: Thread) { THREAD_INFO.with(|c| assert!(c.borrow().is_none())); THREAD_INFO.with(move |c| *c.borrow_mut() = Some(ThreadInfo{ - stack_guard: stack_guard, - thread: thread, + stack_guard, + thread, })); } diff --git a/ctr-std/src/sys_common/thread_local.rs b/ctr-std/src/sys_common/thread_local.rs index 25a9d57..a4aa3d9 100644 --- a/ctr-std/src/sys_common/thread_local.rs +++ b/ctr-std/src/sys_common/thread_local.rs @@ -33,7 +33,7 @@ //! Using a dynamically allocated TLS key. Note that this key can be shared //! among many threads via an `Arc`. //! -//! ```rust,ignore +//! ```ignore (cannot-doctest-private-modules) //! let key = Key::new(None); //! assert!(key.get().is_null()); //! key.set(1 as *mut u8); @@ -45,7 +45,7 @@ //! Sometimes a statically allocated key is either required or easier to work //! with, however. //! -//! ```rust,ignore +//! ```ignore (cannot-doctest-private-modules) //! static KEY: StaticKey = INIT; //! //! unsafe { @@ -58,9 +58,10 @@ #![unstable(feature = "thread_local_internals", issue = "0")] #![allow(dead_code)] // sys isn't exported yet +use ptr; use sync::atomic::{self, AtomicUsize, Ordering}; - use sys::thread_local as imp; +use sys_common::mutex::Mutex; /// A type for TLS keys that are statically allocated. /// @@ -73,7 +74,7 @@ use sys::thread_local as imp; /// /// # Examples /// -/// ```ignore +/// ```ignore (cannot-doctest-private-modules) /// use tls::os::{StaticKey, INIT}; /// /// static KEY: StaticKey = INIT; @@ -104,7 +105,7 @@ pub struct StaticKey { /// /// # Examples /// -/// ```rust,ignore +/// ```ignore (cannot-doctest-private-modules) /// use tls::os::Key; /// /// let key = Key::new(None); @@ -127,7 +128,7 @@ impl StaticKey { pub const fn new(dtor: Option<unsafe extern fn(*mut u8)>) -> StaticKey { StaticKey { key: atomic::AtomicUsize::new(0), - dtor: dtor + dtor, } } @@ -145,20 +146,6 @@ impl StaticKey { #[inline] pub unsafe fn set(&self, val: *mut u8) { imp::set(self.key(), val) } - /// Deallocates this OS TLS key. - /// - /// This function is unsafe as there is no guarantee that the key is not - /// currently in use by other threads or will not ever be used again. - /// - /// Note that this does *not* run the user-provided destructor if one was - /// specified at definition time. Doing so must be done manually. - pub unsafe fn destroy(&self) { - match self.key.swap(0, Ordering::SeqCst) { - 0 => {} - n => { imp::destroy(n as imp::Key) } - } - } - #[inline] unsafe fn key(&self) -> imp::Key { match self.key.load(Ordering::Relaxed) { @@ -168,6 +155,24 @@ impl StaticKey { } unsafe fn lazy_init(&self) -> usize { + // Currently the Windows implementation of TLS is pretty hairy, and + // it greatly simplifies creation if we just synchronize everything. + // + // Additionally a 0-index of a tls key hasn't been seen on windows, so + // we just simplify the whole branch. + if imp::requires_synchronized_create() { + static INIT_LOCK: Mutex = Mutex::new(); + INIT_LOCK.lock(); + let mut key = self.key.load(Ordering::SeqCst); + if key == 0 { + key = imp::create(self.dtor) as usize; + self.key.store(key, Ordering::SeqCst); + } + INIT_LOCK.unlock(); + assert!(key != 0); + return key + } + // POSIX allows the key created here to be 0, but the compare_and_swap // below relies on using 0 as a sentinel value to check who won the // race to set the shared TLS key. As far as I know, there is no @@ -227,7 +232,42 @@ impl Key { impl Drop for Key { fn drop(&mut self) { - unsafe { imp::destroy(self.key) } + // Right now Windows doesn't support TLS key destruction, but this also + // isn't used anywhere other than tests, so just leak the TLS key. + // unsafe { imp::destroy(self.key) } + } +} + +pub unsafe fn register_dtor_fallback(t: *mut u8, + dtor: unsafe extern fn(*mut u8)) { + // The fallback implementation uses a vanilla OS-based TLS key to track + // the list of destructors that need to be run for this thread. The key + // then has its own destructor which runs all the other destructors. + // + // The destructor for DTORS is a little special in that it has a `while` + // loop to continuously drain the list of registered destructors. It + // *should* be the case that this loop always terminates because we + // provide the guarantee that a TLS key cannot be set after it is + // flagged for destruction. + + static DTORS: StaticKey = StaticKey::new(Some(run_dtors)); + type List = Vec<(*mut u8, unsafe extern fn(*mut u8))>; + if DTORS.get().is_null() { + let v: Box<List> = box Vec::new(); + DTORS.set(Box::into_raw(v) as *mut u8); + } + let list: &mut List = &mut *(DTORS.get() as *mut List); + list.push((t, dtor)); + + unsafe extern fn run_dtors(mut ptr: *mut u8) { + while !ptr.is_null() { + let list: Box<List> = Box::from_raw(ptr as *mut List); + for (ptr, dtor) in list.into_iter() { + dtor(ptr); + } + ptr = DTORS.get(); + DTORS.set(ptr::null_mut()); + } } } diff --git a/ctr-std/src/sys_common/util.rs b/ctr-std/src/sys_common/util.rs index aad0680..a391c7c 100644 --- a/ctr-std/src/sys_common/util.rs +++ b/ctr-std/src/sys_common/util.rs @@ -10,29 +10,8 @@ use fmt; use io::prelude::*; -use sync::atomic::{self, Ordering}; use sys::stdio::Stderr; - -pub fn min_stack() -> usize { - static MIN: atomic::AtomicUsize = atomic::AtomicUsize::new(0); - match MIN.load(Ordering::SeqCst) { - 0 => {} - n => return n - 1, - } - - // NOTE: We don't have env variable support on the 3DS so let's just use the - // default minimum - - // let amt = env::var("RUST_MIN_STACK").ok().and_then(|s| s.parse().ok()); - // let amt = amt.unwrap_or(2 * 1024 * 1024); - - let amt = 2 * 1024 * 1024; - - // 0 is our sentinel value, so ensure that we'll never see 0 after - // initialization has run - MIN.store(amt + 1, Ordering::SeqCst); - amt -} +use thread; pub fn dumb_print(args: fmt::Arguments) { let _ = Stderr::new().map(|mut stderr| stderr.write_fmt(args)); @@ -47,3 +26,9 @@ pub fn abort(args: fmt::Arguments) -> ! { dumb_print(format_args!("fatal runtime error: {}\n", args)); unsafe { ::sys::abort_internal(); } } + +#[allow(dead_code)] // stack overflow detection not enabled on all platforms +pub unsafe fn report_overflow() { + dumb_print(format_args!("\nthread '{}' has overflowed its stack\n", + thread::current().name().unwrap_or("<unknown>"))); +} diff --git a/ctr-std/src/sys_common/wtf8.rs b/ctr-std/src/sys_common/wtf8.rs new file mode 100644 index 0000000..46d554d --- /dev/null +++ b/ctr-std/src/sys_common/wtf8.rs @@ -0,0 +1,1284 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Implementation of [the WTF-8 encoding](https://simonsapin.github.io/wtf-8/). +//! +//! This library uses Rust’s type system to maintain +//! [well-formedness](https://simonsapin.github.io/wtf-8/#well-formed), +//! like the `String` and `&str` types do for UTF-8. +//! +//! Since [WTF-8 must not be used +//! for interchange](https://simonsapin.github.io/wtf-8/#intended-audience), +//! this library deliberately does not provide access to the underlying bytes +//! of WTF-8 strings, +//! nor can it decode WTF-8 from arbitrary bytes. +//! WTF-8 strings can be obtained from UTF-8, UTF-16, or code points. + +// this module is imported from @SimonSapin's repo and has tons of dead code on +// unix (it's mostly used on windows), so don't worry about dead code here. +#![allow(dead_code)] + +use core::str::next_code_point; + +use ascii::*; +use borrow::Cow; +use char; +use fmt; +use hash::{Hash, Hasher}; +use iter::FromIterator; +use mem; +use ops; +use rc::Rc; +use slice; +use str; +use sync::Arc; +use sys_common::AsInner; + +const UTF8_REPLACEMENT_CHARACTER: &'static str = "\u{FFFD}"; + +/// A Unicode code point: from U+0000 to U+10FFFF. +/// +/// Compare with the `char` type, +/// which represents a Unicode scalar value: +/// a code point that is not a surrogate (U+D800 to U+DFFF). +#[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Copy)] +pub struct CodePoint { + value: u32 +} + +/// Format the code point as `U+` followed by four to six hexadecimal digits. +/// Example: `U+1F4A9` +impl fmt::Debug for CodePoint { + #[inline] + fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(formatter, "U+{:04X}", self.value) + } +} + +impl CodePoint { + /// Unsafely creates a new `CodePoint` without checking the value. + /// + /// Only use when `value` is known to be less than or equal to 0x10FFFF. + #[inline] + pub unsafe fn from_u32_unchecked(value: u32) -> CodePoint { + CodePoint { value: value } + } + + /// Creates a new `CodePoint` if the value is a valid code point. + /// + /// Returns `None` if `value` is above 0x10FFFF. + #[inline] + pub fn from_u32(value: u32) -> Option<CodePoint> { + match value { + 0 ... 0x10FFFF => Some(CodePoint { value: value }), + _ => None + } + } + + /// Creates a new `CodePoint` from a `char`. + /// + /// Since all Unicode scalar values are code points, this always succeeds. + #[inline] + pub fn from_char(value: char) -> CodePoint { + CodePoint { value: value as u32 } + } + + /// Returns the numeric value of the code point. + #[inline] + pub fn to_u32(&self) -> u32 { + self.value + } + + /// Optionally returns a Unicode scalar value for the code point. + /// + /// Returns `None` if the code point is a surrogate (from U+D800 to U+DFFF). + #[inline] + pub fn to_char(&self) -> Option<char> { + match self.value { + 0xD800 ... 0xDFFF => None, + _ => Some(unsafe { char::from_u32_unchecked(self.value) }) + } + } + + /// Returns a Unicode scalar value for the code point. + /// + /// Returns `'\u{FFFD}'` (the replacement character “�”) + /// if the code point is a surrogate (from U+D800 to U+DFFF). + #[inline] + pub fn to_char_lossy(&self) -> char { + self.to_char().unwrap_or('\u{FFFD}') + } +} + +/// An owned, growable string of well-formed WTF-8 data. +/// +/// Similar to `String`, but can additionally contain surrogate code points +/// if they’re not in a surrogate pair. +#[derive(Eq, PartialEq, Ord, PartialOrd, Clone)] +pub struct Wtf8Buf { + bytes: Vec<u8> +} + +impl ops::Deref for Wtf8Buf { + type Target = Wtf8; + + fn deref(&self) -> &Wtf8 { + self.as_slice() + } +} + +impl ops::DerefMut for Wtf8Buf { + fn deref_mut(&mut self) -> &mut Wtf8 { + self.as_mut_slice() + } +} + +/// Format the string with double quotes, +/// and surrogates as `\u` followed by four hexadecimal digits. +/// Example: `"a\u{D800}"` for a string with code points [U+0061, U+D800] +impl fmt::Debug for Wtf8Buf { + #[inline] + fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> { + fmt::Debug::fmt(&**self, formatter) + } +} + +impl Wtf8Buf { + /// Creates a new, empty WTF-8 string. + #[inline] + pub fn new() -> Wtf8Buf { + Wtf8Buf { bytes: Vec::new() } + } + + /// Creates a new, empty WTF-8 string with pre-allocated capacity for `n` bytes. + #[inline] + pub fn with_capacity(n: usize) -> Wtf8Buf { + Wtf8Buf { bytes: Vec::with_capacity(n) } + } + + /// Creates a WTF-8 string from a UTF-8 `String`. + /// + /// This takes ownership of the `String` and does not copy. + /// + /// Since WTF-8 is a superset of UTF-8, this always succeeds. + #[inline] + pub fn from_string(string: String) -> Wtf8Buf { + Wtf8Buf { bytes: string.into_bytes() } + } + + /// Creates a WTF-8 string from a UTF-8 `&str` slice. + /// + /// This copies the content of the slice. + /// + /// Since WTF-8 is a superset of UTF-8, this always succeeds. + #[inline] + pub fn from_str(str: &str) -> Wtf8Buf { + Wtf8Buf { bytes: <[_]>::to_vec(str.as_bytes()) } + } + + pub fn clear(&mut self) { + self.bytes.clear() + } + + /// Creates a WTF-8 string from a potentially ill-formed UTF-16 slice of 16-bit code units. + /// + /// This is lossless: calling `.encode_wide()` on the resulting string + /// will always return the original code units. + pub fn from_wide(v: &[u16]) -> Wtf8Buf { + let mut string = Wtf8Buf::with_capacity(v.len()); + for item in char::decode_utf16(v.iter().cloned()) { + match item { + Ok(ch) => string.push_char(ch), + Err(surrogate) => { + let surrogate = surrogate.unpaired_surrogate(); + // Surrogates are known to be in the code point range. + let code_point = unsafe { + CodePoint::from_u32_unchecked(surrogate as u32) + }; + // Skip the WTF-8 concatenation check, + // surrogate pairs are already decoded by decode_utf16 + string.push_code_point_unchecked(code_point) + } + } + } + string + } + + /// Copied from String::push + /// This does **not** include the WTF-8 concatenation check. + fn push_code_point_unchecked(&mut self, code_point: CodePoint) { + let c = unsafe { + char::from_u32_unchecked(code_point.value) + }; + let mut bytes = [0; 4]; + let bytes = c.encode_utf8(&mut bytes).as_bytes(); + self.bytes.extend_from_slice(bytes) + } + + #[inline] + pub fn as_slice(&self) -> &Wtf8 { + unsafe { Wtf8::from_bytes_unchecked(&self.bytes) } + } + + #[inline] + pub fn as_mut_slice(&mut self) -> &mut Wtf8 { + unsafe { Wtf8::from_mut_bytes_unchecked(&mut self.bytes) } + } + + /// Reserves capacity for at least `additional` more bytes to be inserted + /// in the given `Wtf8Buf`. + /// The collection may reserve more space to avoid frequent reallocations. + /// + /// # Panics + /// + /// Panics if the new capacity overflows `usize`. + #[inline] + pub fn reserve(&mut self, additional: usize) { + self.bytes.reserve(additional) + } + + #[inline] + pub fn reserve_exact(&mut self, additional: usize) { + self.bytes.reserve_exact(additional) + } + + #[inline] + pub fn shrink_to_fit(&mut self) { + self.bytes.shrink_to_fit() + } + + /// Returns the number of bytes that this string buffer can hold without reallocating. + #[inline] + pub fn capacity(&self) -> usize { + self.bytes.capacity() + } + + /// Append a UTF-8 slice at the end of the string. + #[inline] + pub fn push_str(&mut self, other: &str) { + self.bytes.extend_from_slice(other.as_bytes()) + } + + /// Append a WTF-8 slice at the end of the string. + /// + /// This replaces newly paired surrogates at the boundary + /// with a supplementary code point, + /// like concatenating ill-formed UTF-16 strings effectively would. + #[inline] + pub fn push_wtf8(&mut self, other: &Wtf8) { + match ((&*self).final_lead_surrogate(), other.initial_trail_surrogate()) { + // Replace newly paired surrogates by a supplementary code point. + (Some(lead), Some(trail)) => { + let len_without_lead_surrogate = self.len() - 3; + self.bytes.truncate(len_without_lead_surrogate); + let other_without_trail_surrogate = &other.bytes[3..]; + // 4 bytes for the supplementary code point + self.bytes.reserve(4 + other_without_trail_surrogate.len()); + self.push_char(decode_surrogate_pair(lead, trail)); + self.bytes.extend_from_slice(other_without_trail_surrogate); + } + _ => self.bytes.extend_from_slice(&other.bytes) + } + } + + /// Append a Unicode scalar value at the end of the string. + #[inline] + pub fn push_char(&mut self, c: char) { + self.push_code_point_unchecked(CodePoint::from_char(c)) + } + + /// Append a code point at the end of the string. + /// + /// This replaces newly paired surrogates at the boundary + /// with a supplementary code point, + /// like concatenating ill-formed UTF-16 strings effectively would. + #[inline] + pub fn push(&mut self, code_point: CodePoint) { + if let trail @ 0xDC00...0xDFFF = code_point.to_u32() { + if let Some(lead) = (&*self).final_lead_surrogate() { + let len_without_lead_surrogate = self.len() - 3; + self.bytes.truncate(len_without_lead_surrogate); + self.push_char(decode_surrogate_pair(lead, trail as u16)); + return + } + } + + // No newly paired surrogates at the boundary. + self.push_code_point_unchecked(code_point) + } + + /// Shortens a string to the specified length. + /// + /// # Panics + /// + /// Panics if `new_len` > current length, + /// or if `new_len` is not a code point boundary. + #[inline] + pub fn truncate(&mut self, new_len: usize) { + assert!(is_code_point_boundary(self, new_len)); + self.bytes.truncate(new_len) + } + + /// Consumes the WTF-8 string and tries to convert it to UTF-8. + /// + /// This does not copy the data. + /// + /// If the contents are not well-formed UTF-8 + /// (that is, if the string contains surrogates), + /// the original WTF-8 string is returned instead. + pub fn into_string(self) -> Result<String, Wtf8Buf> { + match self.next_surrogate(0) { + None => Ok(unsafe { String::from_utf8_unchecked(self.bytes) }), + Some(_) => Err(self), + } + } + + /// Consumes the WTF-8 string and converts it lossily to UTF-8. + /// + /// This does not copy the data (but may overwrite parts of it in place). + /// + /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”) + pub fn into_string_lossy(mut self) -> String { + let mut pos = 0; + loop { + match self.next_surrogate(pos) { + Some((surrogate_pos, _)) => { + pos = surrogate_pos + 3; + self.bytes[surrogate_pos..pos] + .copy_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes()); + }, + None => return unsafe { String::from_utf8_unchecked(self.bytes) } + } + } + } + + /// Converts this `Wtf8Buf` into a boxed `Wtf8`. + #[inline] + pub fn into_box(self) -> Box<Wtf8> { + unsafe { mem::transmute(self.bytes.into_boxed_slice()) } + } + + /// Converts a `Box<Wtf8>` into a `Wtf8Buf`. + pub fn from_box(boxed: Box<Wtf8>) -> Wtf8Buf { + let bytes: Box<[u8]> = unsafe { mem::transmute(boxed) }; + Wtf8Buf { bytes: bytes.into_vec() } + } +} + +/// Create a new WTF-8 string from an iterator of code points. +/// +/// This replaces surrogate code point pairs with supplementary code points, +/// like concatenating ill-formed UTF-16 strings effectively would. +impl FromIterator<CodePoint> for Wtf8Buf { + fn from_iter<T: IntoIterator<Item=CodePoint>>(iter: T) -> Wtf8Buf { + let mut string = Wtf8Buf::new(); + string.extend(iter); + string + } +} + +/// Append code points from an iterator to the string. +/// +/// This replaces surrogate code point pairs with supplementary code points, +/// like concatenating ill-formed UTF-16 strings effectively would. +impl Extend<CodePoint> for Wtf8Buf { + fn extend<T: IntoIterator<Item=CodePoint>>(&mut self, iter: T) { + let iterator = iter.into_iter(); + let (low, _high) = iterator.size_hint(); + // Lower bound of one byte per code point (ASCII only) + self.bytes.reserve(low); + for code_point in iterator { + self.push(code_point); + } + } +} + +/// A borrowed slice of well-formed WTF-8 data. +/// +/// Similar to `&str`, but can additionally contain surrogate code points +/// if they’re not in a surrogate pair. +#[derive(Eq, Ord, PartialEq, PartialOrd)] +pub struct Wtf8 { + bytes: [u8] +} + +impl AsInner<[u8]> for Wtf8 { + fn as_inner(&self) -> &[u8] { &self.bytes } +} + +/// Format the slice with double quotes, +/// and surrogates as `\u` followed by four hexadecimal digits. +/// Example: `"a\u{D800}"` for a slice with code points [U+0061, U+D800] +impl fmt::Debug for Wtf8 { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + fn write_str_escaped(f: &mut fmt::Formatter, s: &str) -> fmt::Result { + use fmt::Write; + for c in s.chars().flat_map(|c| c.escape_debug()) { + f.write_char(c)? + } + Ok(()) + } + + formatter.write_str("\"")?; + let mut pos = 0; + loop { + match self.next_surrogate(pos) { + None => break, + Some((surrogate_pos, surrogate)) => { + write_str_escaped( + formatter, + unsafe { str::from_utf8_unchecked( + &self.bytes[pos .. surrogate_pos] + )}, + )?; + write!(formatter, "\\u{{{:x}}}", surrogate)?; + pos = surrogate_pos + 3; + } + } + } + write_str_escaped( + formatter, + unsafe { str::from_utf8_unchecked(&self.bytes[pos..]) }, + )?; + formatter.write_str("\"") + } +} + +impl fmt::Display for Wtf8 { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let wtf8_bytes = &self.bytes; + let mut pos = 0; + loop { + match self.next_surrogate(pos) { + Some((surrogate_pos, _)) => { + formatter.write_str(unsafe { + str::from_utf8_unchecked(&wtf8_bytes[pos .. surrogate_pos]) + })?; + formatter.write_str(UTF8_REPLACEMENT_CHARACTER)?; + pos = surrogate_pos + 3; + }, + None => { + let s = unsafe { + str::from_utf8_unchecked(&wtf8_bytes[pos..]) + }; + if pos == 0 { + return s.fmt(formatter) + } else { + return formatter.write_str(s) + } + } + } + } + } +} + +impl Wtf8 { + /// Creates a WTF-8 slice from a UTF-8 `&str` slice. + /// + /// Since WTF-8 is a superset of UTF-8, this always succeeds. + #[inline] + pub fn from_str(value: &str) -> &Wtf8 { + unsafe { Wtf8::from_bytes_unchecked(value.as_bytes()) } + } + + /// Creates a WTF-8 slice from a WTF-8 byte slice. + /// + /// Since the byte slice is not checked for valid WTF-8, this functions is + /// marked unsafe. + #[inline] + unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 { + mem::transmute(value) + } + + /// Creates a mutable WTF-8 slice from a mutable WTF-8 byte slice. + /// + /// Since the byte slice is not checked for valid WTF-8, this functions is + /// marked unsafe. + #[inline] + unsafe fn from_mut_bytes_unchecked(value: &mut [u8]) -> &mut Wtf8 { + mem::transmute(value) + } + + /// Returns the length, in WTF-8 bytes. + #[inline] + pub fn len(&self) -> usize { + self.bytes.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.bytes.is_empty() + } + + /// Returns the code point at `position` if it is in the ASCII range, + /// or `b'\xFF' otherwise. + /// + /// # Panics + /// + /// Panics if `position` is beyond the end of the string. + #[inline] + pub fn ascii_byte_at(&self, position: usize) -> u8 { + match self.bytes[position] { + ascii_byte @ 0x00 ... 0x7F => ascii_byte, + _ => 0xFF + } + } + + /// Returns an iterator for the string’s code points. + #[inline] + pub fn code_points(&self) -> Wtf8CodePoints { + Wtf8CodePoints { bytes: self.bytes.iter() } + } + + /// Tries to convert the string to UTF-8 and return a `&str` slice. + /// + /// Returns `None` if the string contains surrogates. + /// + /// This does not copy the data. + #[inline] + pub fn as_str(&self) -> Option<&str> { + // Well-formed WTF-8 is also well-formed UTF-8 + // if and only if it contains no surrogate. + match self.next_surrogate(0) { + None => Some(unsafe { str::from_utf8_unchecked(&self.bytes) }), + Some(_) => None, + } + } + + /// Lossily converts the string to UTF-8. + /// Returns a UTF-8 `&str` slice if the contents are well-formed in UTF-8. + /// + /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”). + /// + /// This only copies the data if necessary (if it contains any surrogate). + pub fn to_string_lossy(&self) -> Cow<str> { + let surrogate_pos = match self.next_surrogate(0) { + None => return Cow::Borrowed(unsafe { str::from_utf8_unchecked(&self.bytes) }), + Some((pos, _)) => pos, + }; + let wtf8_bytes = &self.bytes; + let mut utf8_bytes = Vec::with_capacity(self.len()); + utf8_bytes.extend_from_slice(&wtf8_bytes[..surrogate_pos]); + utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes()); + let mut pos = surrogate_pos + 3; + loop { + match self.next_surrogate(pos) { + Some((surrogate_pos, _)) => { + utf8_bytes.extend_from_slice(&wtf8_bytes[pos .. surrogate_pos]); + utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes()); + pos = surrogate_pos + 3; + }, + None => { + utf8_bytes.extend_from_slice(&wtf8_bytes[pos..]); + return Cow::Owned(unsafe { String::from_utf8_unchecked(utf8_bytes) }) + } + } + } + } + + /// Converts the WTF-8 string to potentially ill-formed UTF-16 + /// and return an iterator of 16-bit code units. + /// + /// This is lossless: + /// calling `Wtf8Buf::from_ill_formed_utf16` on the resulting code units + /// would always return the original WTF-8 string. + #[inline] + pub fn encode_wide(&self) -> EncodeWide { + EncodeWide { code_points: self.code_points(), extra: 0 } + } + + #[inline] + fn next_surrogate(&self, mut pos: usize) -> Option<(usize, u16)> { + let mut iter = self.bytes[pos..].iter(); + loop { + let b = *iter.next()?; + if b < 0x80 { + pos += 1; + } else if b < 0xE0 { + iter.next(); + pos += 2; + } else if b == 0xED { + match (iter.next(), iter.next()) { + (Some(&b2), Some(&b3)) if b2 >= 0xA0 => { + return Some((pos, decode_surrogate(b2, b3))) + } + _ => pos += 3 + } + } else if b < 0xF0 { + iter.next(); + iter.next(); + pos += 3; + } else { + iter.next(); + iter.next(); + iter.next(); + pos += 4; + } + } + } + + #[inline] + fn final_lead_surrogate(&self) -> Option<u16> { + let len = self.len(); + if len < 3 { + return None + } + match &self.bytes[(len - 3)..] { + &[0xED, b2 @ 0xA0...0xAF, b3] => Some(decode_surrogate(b2, b3)), + _ => None + } + } + + #[inline] + fn initial_trail_surrogate(&self) -> Option<u16> { + let len = self.len(); + if len < 3 { + return None + } + match &self.bytes[..3] { + &[0xED, b2 @ 0xB0...0xBF, b3] => Some(decode_surrogate(b2, b3)), + _ => None + } + } + + /// Boxes this `Wtf8`. + #[inline] + pub fn into_box(&self) -> Box<Wtf8> { + let boxed: Box<[u8]> = self.bytes.into(); + unsafe { mem::transmute(boxed) } + } + + /// Creates a boxed, empty `Wtf8`. + pub fn empty_box() -> Box<Wtf8> { + let boxed: Box<[u8]> = Default::default(); + unsafe { mem::transmute(boxed) } + } + + #[inline] + pub fn into_arc(&self) -> Arc<Wtf8> { + let arc: Arc<[u8]> = Arc::from(&self.bytes); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Wtf8) } + } + + #[inline] + pub fn into_rc(&self) -> Rc<Wtf8> { + let rc: Rc<[u8]> = Rc::from(&self.bytes); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Wtf8) } + } +} + + +/// Return a slice of the given string for the byte range [`begin`..`end`). +/// +/// # Panics +/// +/// Panics when `begin` and `end` do not point to code point boundaries, +/// or point beyond the end of the string. +impl ops::Index<ops::Range<usize>> for Wtf8 { + type Output = Wtf8; + + #[inline] + fn index(&self, range: ops::Range<usize>) -> &Wtf8 { + // is_code_point_boundary checks that the index is in [0, .len()] + if range.start <= range.end && + is_code_point_boundary(self, range.start) && + is_code_point_boundary(self, range.end) { + unsafe { slice_unchecked(self, range.start, range.end) } + } else { + slice_error_fail(self, range.start, range.end) + } + } +} + +/// Return a slice of the given string from byte `begin` to its end. +/// +/// # Panics +/// +/// Panics when `begin` is not at a code point boundary, +/// or is beyond the end of the string. +impl ops::Index<ops::RangeFrom<usize>> for Wtf8 { + type Output = Wtf8; + + #[inline] + fn index(&self, range: ops::RangeFrom<usize>) -> &Wtf8 { + // is_code_point_boundary checks that the index is in [0, .len()] + if is_code_point_boundary(self, range.start) { + unsafe { slice_unchecked(self, range.start, self.len()) } + } else { + slice_error_fail(self, range.start, self.len()) + } + } +} + +/// Return a slice of the given string from its beginning to byte `end`. +/// +/// # Panics +/// +/// Panics when `end` is not at a code point boundary, +/// or is beyond the end of the string. +impl ops::Index<ops::RangeTo<usize>> for Wtf8 { + type Output = Wtf8; + + #[inline] + fn index(&self, range: ops::RangeTo<usize>) -> &Wtf8 { + // is_code_point_boundary checks that the index is in [0, .len()] + if is_code_point_boundary(self, range.end) { + unsafe { slice_unchecked(self, 0, range.end) } + } else { + slice_error_fail(self, 0, range.end) + } + } +} + +impl ops::Index<ops::RangeFull> for Wtf8 { + type Output = Wtf8; + + #[inline] + fn index(&self, _range: ops::RangeFull) -> &Wtf8 { + self + } +} + +#[inline] +fn decode_surrogate(second_byte: u8, third_byte: u8) -> u16 { + // The first byte is assumed to be 0xED + 0xD800 | (second_byte as u16 & 0x3F) << 6 | third_byte as u16 & 0x3F +} + +#[inline] +fn decode_surrogate_pair(lead: u16, trail: u16) -> char { + let code_point = 0x10000 + ((((lead - 0xD800) as u32) << 10) | (trail - 0xDC00) as u32); + unsafe { char::from_u32_unchecked(code_point) } +} + +/// Copied from core::str::StrPrelude::is_char_boundary +#[inline] +pub fn is_code_point_boundary(slice: &Wtf8, index: usize) -> bool { + if index == slice.len() { return true; } + match slice.bytes.get(index) { + None => false, + Some(&b) => b < 128 || b >= 192, + } +} + +/// Copied from core::str::raw::slice_unchecked +#[inline] +pub unsafe fn slice_unchecked(s: &Wtf8, begin: usize, end: usize) -> &Wtf8 { + // memory layout of an &[u8] and &Wtf8 are the same + Wtf8::from_bytes_unchecked(slice::from_raw_parts( + s.bytes.as_ptr().offset(begin as isize), + end - begin + )) +} + +/// Copied from core::str::raw::slice_error_fail +#[inline(never)] +pub fn slice_error_fail(s: &Wtf8, begin: usize, end: usize) -> ! { + assert!(begin <= end); + panic!("index {} and/or {} in `{:?}` do not lie on character boundary", + begin, end, s); +} + +/// Iterator for the code points of a WTF-8 string. +/// +/// Created with the method `.code_points()`. +#[derive(Clone)] +pub struct Wtf8CodePoints<'a> { + bytes: slice::Iter<'a, u8> +} + +impl<'a> Iterator for Wtf8CodePoints<'a> { + type Item = CodePoint; + + #[inline] + fn next(&mut self) -> Option<CodePoint> { + next_code_point(&mut self.bytes).map(|c| CodePoint { value: c }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + let len = self.bytes.len(); + (len.saturating_add(3) / 4, Some(len)) + } +} + +/// Generates a wide character sequence for potentially ill-formed UTF-16. +#[stable(feature = "rust1", since = "1.0.0")] +#[derive(Clone)] +pub struct EncodeWide<'a> { + code_points: Wtf8CodePoints<'a>, + extra: u16 +} + +// Copied from libunicode/u_str.rs +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> Iterator for EncodeWide<'a> { + type Item = u16; + + #[inline] + fn next(&mut self) -> Option<u16> { + if self.extra != 0 { + let tmp = self.extra; + self.extra = 0; + return Some(tmp); + } + + let mut buf = [0; 2]; + self.code_points.next().map(|code_point| { + let c = unsafe { + char::from_u32_unchecked(code_point.value) + }; + let n = c.encode_utf16(&mut buf).len(); + if n == 2 { + self.extra = buf[1]; + } + buf[0] + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + let (low, high) = self.code_points.size_hint(); + // every code point gets either one u16 or two u16, + // so this iterator is between 1 or 2 times as + // long as the underlying iterator. + (low, high.and_then(|n| n.checked_mul(2))) + } +} + +impl Hash for CodePoint { + #[inline] + fn hash<H: Hasher>(&self, state: &mut H) { + self.value.hash(state) + } +} + +impl Hash for Wtf8Buf { + #[inline] + fn hash<H: Hasher>(&self, state: &mut H) { + state.write(&self.bytes); + 0xfeu8.hash(state) + } +} + +impl Hash for Wtf8 { + #[inline] + fn hash<H: Hasher>(&self, state: &mut H) { + state.write(&self.bytes); + 0xfeu8.hash(state) + } +} + +impl AsciiExt for Wtf8 { + type Owned = Wtf8Buf; + + fn is_ascii(&self) -> bool { + self.bytes.is_ascii() + } + fn to_ascii_uppercase(&self) -> Wtf8Buf { + Wtf8Buf { bytes: self.bytes.to_ascii_uppercase() } + } + fn to_ascii_lowercase(&self) -> Wtf8Buf { + Wtf8Buf { bytes: self.bytes.to_ascii_lowercase() } + } + fn eq_ignore_ascii_case(&self, other: &Wtf8) -> bool { + self.bytes.eq_ignore_ascii_case(&other.bytes) + } + + fn make_ascii_uppercase(&mut self) { self.bytes.make_ascii_uppercase() } + fn make_ascii_lowercase(&mut self) { self.bytes.make_ascii_lowercase() } +} + +#[cfg(test)] +mod tests { + use borrow::Cow; + use super::*; + + #[test] + fn code_point_from_u32() { + assert!(CodePoint::from_u32(0).is_some()); + assert!(CodePoint::from_u32(0xD800).is_some()); + assert!(CodePoint::from_u32(0x10FFFF).is_some()); + assert!(CodePoint::from_u32(0x110000).is_none()); + } + + #[test] + fn code_point_to_u32() { + fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() } + assert_eq!(c(0).to_u32(), 0); + assert_eq!(c(0xD800).to_u32(), 0xD800); + assert_eq!(c(0x10FFFF).to_u32(), 0x10FFFF); + } + + #[test] + fn code_point_from_char() { + assert_eq!(CodePoint::from_char('a').to_u32(), 0x61); + assert_eq!(CodePoint::from_char('💩').to_u32(), 0x1F4A9); + } + + #[test] + fn code_point_to_string() { + assert_eq!(format!("{:?}", CodePoint::from_char('a')), "U+0061"); + assert_eq!(format!("{:?}", CodePoint::from_char('💩')), "U+1F4A9"); + } + + #[test] + fn code_point_to_char() { + fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() } + assert_eq!(c(0x61).to_char(), Some('a')); + assert_eq!(c(0x1F4A9).to_char(), Some('💩')); + assert_eq!(c(0xD800).to_char(), None); + } + + #[test] + fn code_point_to_char_lossy() { + fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() } + assert_eq!(c(0x61).to_char_lossy(), 'a'); + assert_eq!(c(0x1F4A9).to_char_lossy(), '💩'); + assert_eq!(c(0xD800).to_char_lossy(), '\u{FFFD}'); + } + + #[test] + fn wtf8buf_new() { + assert_eq!(Wtf8Buf::new().bytes, b""); + } + + #[test] + fn wtf8buf_from_str() { + assert_eq!(Wtf8Buf::from_str("").bytes, b""); + assert_eq!(Wtf8Buf::from_str("aé 💩").bytes, + b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + } + + #[test] + fn wtf8buf_from_string() { + assert_eq!(Wtf8Buf::from_string(String::from("")).bytes, b""); + assert_eq!(Wtf8Buf::from_string(String::from("aé 💩")).bytes, + b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + } + + #[test] + fn wtf8buf_from_wide() { + assert_eq!(Wtf8Buf::from_wide(&[]).bytes, b""); + assert_eq!(Wtf8Buf::from_wide( + &[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]).bytes, + b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9"); + } + + #[test] + fn wtf8buf_push_str() { + let mut string = Wtf8Buf::new(); + assert_eq!(string.bytes, b""); + string.push_str("aé 💩"); + assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + } + + #[test] + fn wtf8buf_push_char() { + let mut string = Wtf8Buf::from_str("aé "); + assert_eq!(string.bytes, b"a\xC3\xA9 "); + string.push_char('💩'); + assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + } + + #[test] + fn wtf8buf_push() { + let mut string = Wtf8Buf::from_str("aé "); + assert_eq!(string.bytes, b"a\xC3\xA9 "); + string.push(CodePoint::from_char('💩')); + assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + + fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() } + + let mut string = Wtf8Buf::new(); + string.push(c(0xD83D)); // lead + string.push(c(0xDCA9)); // trail + assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic! + + let mut string = Wtf8Buf::new(); + string.push(c(0xD83D)); // lead + string.push(c(0x20)); // not surrogate + string.push(c(0xDCA9)); // trail + assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); + + let mut string = Wtf8Buf::new(); + string.push(c(0xD800)); // lead + string.push(c(0xDBFF)); // lead + assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF"); + + let mut string = Wtf8Buf::new(); + string.push(c(0xD800)); // lead + string.push(c(0xE000)); // not surrogate + assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80"); + + let mut string = Wtf8Buf::new(); + string.push(c(0xD7FF)); // not surrogate + string.push(c(0xDC00)); // trail + assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80"); + + let mut string = Wtf8Buf::new(); + string.push(c(0x61)); // not surrogate, < 3 bytes + string.push(c(0xDC00)); // trail + assert_eq!(string.bytes, b"\x61\xED\xB0\x80"); + + let mut string = Wtf8Buf::new(); + string.push(c(0xDC00)); // trail + assert_eq!(string.bytes, b"\xED\xB0\x80"); + } + + #[test] + fn wtf8buf_push_wtf8() { + let mut string = Wtf8Buf::from_str("aé"); + assert_eq!(string.bytes, b"a\xC3\xA9"); + string.push_wtf8(Wtf8::from_str(" 💩")); + assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + + fn w(v: &[u8]) -> &Wtf8 { unsafe { Wtf8::from_bytes_unchecked(v) } } + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\xA0\xBD")); // lead + string.push_wtf8(w(b"\xED\xB2\xA9")); // trail + assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic! + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\xA0\xBD")); // lead + string.push_wtf8(w(b" ")); // not surrogate + string.push_wtf8(w(b"\xED\xB2\xA9")); // trail + assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\xA0\x80")); // lead + string.push_wtf8(w(b"\xED\xAF\xBF")); // lead + assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF"); + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\xA0\x80")); // lead + string.push_wtf8(w(b"\xEE\x80\x80")); // not surrogate + assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80"); + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\x9F\xBF")); // not surrogate + string.push_wtf8(w(b"\xED\xB0\x80")); // trail + assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80"); + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"a")); // not surrogate, < 3 bytes + string.push_wtf8(w(b"\xED\xB0\x80")); // trail + assert_eq!(string.bytes, b"\x61\xED\xB0\x80"); + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\xB0\x80")); // trail + assert_eq!(string.bytes, b"\xED\xB0\x80"); + } + + #[test] + fn wtf8buf_truncate() { + let mut string = Wtf8Buf::from_str("aé"); + string.truncate(1); + assert_eq!(string.bytes, b"a"); + } + + #[test] + #[should_panic] + fn wtf8buf_truncate_fail_code_point_boundary() { + let mut string = Wtf8Buf::from_str("aé"); + string.truncate(2); + } + + #[test] + #[should_panic] + fn wtf8buf_truncate_fail_longer() { + let mut string = Wtf8Buf::from_str("aé"); + string.truncate(4); + } + + #[test] + fn wtf8buf_into_string() { + let mut string = Wtf8Buf::from_str("aé 💩"); + assert_eq!(string.clone().into_string(), Ok(String::from("aé 💩"))); + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert_eq!(string.clone().into_string(), Err(string)); + } + + #[test] + fn wtf8buf_into_string_lossy() { + let mut string = Wtf8Buf::from_str("aé 💩"); + assert_eq!(string.clone().into_string_lossy(), String::from("aé 💩")); + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert_eq!(string.clone().into_string_lossy(), String::from("aé 💩�")); + } + + #[test] + fn wtf8buf_from_iterator() { + fn f(values: &[u32]) -> Wtf8Buf { + values.iter().map(|&c| CodePoint::from_u32(c).unwrap()).collect::<Wtf8Buf>() + } + assert_eq!(f(&[0x61, 0xE9, 0x20, 0x1F4A9]).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + + assert_eq!(f(&[0xD83D, 0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic! + assert_eq!(f(&[0xD83D, 0x20, 0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); + assert_eq!(f(&[0xD800, 0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF"); + assert_eq!(f(&[0xD800, 0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80"); + assert_eq!(f(&[0xD7FF, 0xDC00]).bytes, b"\xED\x9F\xBF\xED\xB0\x80"); + assert_eq!(f(&[0x61, 0xDC00]).bytes, b"\x61\xED\xB0\x80"); + assert_eq!(f(&[0xDC00]).bytes, b"\xED\xB0\x80"); + } + + #[test] + fn wtf8buf_extend() { + fn e(initial: &[u32], extended: &[u32]) -> Wtf8Buf { + fn c(value: &u32) -> CodePoint { CodePoint::from_u32(*value).unwrap() } + let mut string = initial.iter().map(c).collect::<Wtf8Buf>(); + string.extend(extended.iter().map(c)); + string + } + + assert_eq!(e(&[0x61, 0xE9], &[0x20, 0x1F4A9]).bytes, + b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + + assert_eq!(e(&[0xD83D], &[0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic! + assert_eq!(e(&[0xD83D, 0x20], &[0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); + assert_eq!(e(&[0xD800], &[0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF"); + assert_eq!(e(&[0xD800], &[0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80"); + assert_eq!(e(&[0xD7FF], &[0xDC00]).bytes, b"\xED\x9F\xBF\xED\xB0\x80"); + assert_eq!(e(&[0x61], &[0xDC00]).bytes, b"\x61\xED\xB0\x80"); + assert_eq!(e(&[], &[0xDC00]).bytes, b"\xED\xB0\x80"); + } + + #[test] + fn wtf8buf_show() { + let mut string = Wtf8Buf::from_str("a\té \u{7f}💩\r"); + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert_eq!(format!("{:?}", string), "\"a\\té \\u{7f}\u{1f4a9}\\r\\u{d800}\""); + } + + #[test] + fn wtf8buf_as_slice() { + assert_eq!(Wtf8Buf::from_str("aé").as_slice(), Wtf8::from_str("aé")); + } + + #[test] + fn wtf8buf_show_str() { + let text = "a\té 💩\r"; + let string = Wtf8Buf::from_str(text); + assert_eq!(format!("{:?}", text), format!("{:?}", string)); + } + + #[test] + fn wtf8_from_str() { + assert_eq!(&Wtf8::from_str("").bytes, b""); + assert_eq!(&Wtf8::from_str("aé 💩").bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + } + + #[test] + fn wtf8_len() { + assert_eq!(Wtf8::from_str("").len(), 0); + assert_eq!(Wtf8::from_str("aé 💩").len(), 8); + } + + #[test] + fn wtf8_slice() { + assert_eq!(&Wtf8::from_str("aé 💩")[1.. 4].bytes, b"\xC3\xA9 "); + } + + #[test] + #[should_panic] + fn wtf8_slice_not_code_point_boundary() { + &Wtf8::from_str("aé 💩")[2.. 4]; + } + + #[test] + fn wtf8_slice_from() { + assert_eq!(&Wtf8::from_str("aé 💩")[1..].bytes, b"\xC3\xA9 \xF0\x9F\x92\xA9"); + } + + #[test] + #[should_panic] + fn wtf8_slice_from_not_code_point_boundary() { + &Wtf8::from_str("aé 💩")[2..]; + } + + #[test] + fn wtf8_slice_to() { + assert_eq!(&Wtf8::from_str("aé 💩")[..4].bytes, b"a\xC3\xA9 "); + } + + #[test] + #[should_panic] + fn wtf8_slice_to_not_code_point_boundary() { + &Wtf8::from_str("aé 💩")[5..]; + } + + #[test] + fn wtf8_ascii_byte_at() { + let slice = Wtf8::from_str("aé 💩"); + assert_eq!(slice.ascii_byte_at(0), b'a'); + assert_eq!(slice.ascii_byte_at(1), b'\xFF'); + assert_eq!(slice.ascii_byte_at(2), b'\xFF'); + assert_eq!(slice.ascii_byte_at(3), b' '); + assert_eq!(slice.ascii_byte_at(4), b'\xFF'); + } + + #[test] + fn wtf8_code_points() { + fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() } + fn cp(string: &Wtf8Buf) -> Vec<Option<char>> { + string.code_points().map(|c| c.to_char()).collect::<Vec<_>>() + } + let mut string = Wtf8Buf::from_str("é "); + assert_eq!(cp(&string), [Some('é'), Some(' ')]); + string.push(c(0xD83D)); + assert_eq!(cp(&string), [Some('é'), Some(' '), None]); + string.push(c(0xDCA9)); + assert_eq!(cp(&string), [Some('é'), Some(' '), Some('💩')]); + } + + #[test] + fn wtf8_as_str() { + assert_eq!(Wtf8::from_str("").as_str(), Some("")); + assert_eq!(Wtf8::from_str("aé 💩").as_str(), Some("aé 💩")); + let mut string = Wtf8Buf::new(); + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert_eq!(string.as_str(), None); + } + + #[test] + fn wtf8_to_string_lossy() { + assert_eq!(Wtf8::from_str("").to_string_lossy(), Cow::Borrowed("")); + assert_eq!(Wtf8::from_str("aé 💩").to_string_lossy(), Cow::Borrowed("aé 💩")); + let mut string = Wtf8Buf::from_str("aé 💩"); + string.push(CodePoint::from_u32(0xD800).unwrap()); + let expected: Cow<str> = Cow::Owned(String::from("aé 💩�")); + assert_eq!(string.to_string_lossy(), expected); + } + + #[test] + fn wtf8_display() { + fn d(b: &[u8]) -> String { + format!("{}", &unsafe { Wtf8::from_bytes_unchecked(b) }) + } + + assert_eq!("", d("".as_bytes())); + assert_eq!("aé 💩", d("aé 💩".as_bytes())); + + let mut string = Wtf8Buf::from_str("aé 💩"); + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert_eq!("aé 💩�", d(string.as_inner())); + } + + #[test] + fn wtf8_encode_wide() { + let mut string = Wtf8Buf::from_str("aé "); + string.push(CodePoint::from_u32(0xD83D).unwrap()); + string.push_char('💩'); + assert_eq!(string.encode_wide().collect::<Vec<_>>(), + vec![0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]); + } +} |