diff options
Diffstat (limited to 'ctr-std/src/ffi')
| -rw-r--r-- | ctr-std/src/ffi/c_str.rs | 738 | ||||
| -rw-r--r-- | ctr-std/src/ffi/mod.rs | 151 | ||||
| -rw-r--r-- | ctr-std/src/ffi/os_str.rs | 416 |
3 files changed, 1187 insertions, 118 deletions
diff --git a/ctr-std/src/ffi/c_str.rs b/ctr-std/src/ffi/c_str.rs index ad40660..a19fe82 100644 --- a/ctr-std/src/ffi/c_str.rs +++ b/ctr-std/src/ffi/c_str.rs @@ -14,27 +14,80 @@ use cmp::Ordering; use error::Error; use fmt::{self, Write}; use io; -use libc::{self, c_char}; use mem; use memchr; use ops; +use os::raw::c_char; use ptr; +use rc::Rc; use slice; use str::{self, Utf8Error}; +use sync::Arc; +use sys; -/// A type representing an owned C-compatible string +/// A type representing an owned, C-compatible, nul-terminated string with no nul bytes in the +/// middle. /// -/// This type serves the primary purpose of being able to safely generate a +/// This type serves the purpose of being able to safely generate a /// C-compatible string from a Rust byte slice or vector. An instance of this /// type is a static guarantee that the underlying bytes contain no interior 0 -/// bytes and the final byte is 0. +/// bytes ("nul characters") and that the final byte is 0 ("nul terminator"). /// -/// A `CString` is created from either a byte slice or a byte vector. After -/// being created, a `CString` predominately inherits all of its methods from -/// the `Deref` implementation to `[c_char]`. Note that the underlying array -/// is represented as an array of `c_char` as opposed to `u8`. A `u8` slice -/// can be obtained with the `as_bytes` method. Slices produced from a `CString` -/// do *not* contain the trailing nul terminator unless otherwise specified. +/// `CString` is to [`CStr`] as [`String`] is to [`&str`]: the former +/// in each pair are owned strings; the latter are borrowed +/// references. +/// +/// # Creating a `CString` +/// +/// A `CString` is created from either a byte slice or a byte vector, +/// or anything that implements [`Into`]`<`[`Vec`]`<`[`u8`]`>>` (for +/// example, you can build a `CString` straight out of a [`String`] or +/// a [`&str`], since both implement that trait). +/// +/// The [`new`] method will actually check that the provided `&[u8]` +/// does not have 0 bytes in the middle, and return an error if it +/// finds one. +/// +/// # Extracting a raw pointer to the whole C string +/// +/// `CString` implements a [`as_ptr`] method through the [`Deref`] +/// trait. This method will give you a `*const c_char` which you can +/// feed directly to extern functions that expect a nul-terminated +/// string, like C's `strdup()`. +/// +/// # Extracting a slice of the whole C string +/// +/// Alternatively, you can obtain a `&[`[`u8`]`]` slice from a +/// `CString` with the [`as_bytes`] method. Slices produced in this +/// way do *not* contain the trailing nul terminator. This is useful +/// when you will be calling an extern function that takes a `*const +/// u8` argument which is not necessarily nul-terminated, plus another +/// argument with the length of the string — like C's `strndup()`. +/// You can of course get the slice's length with its +/// [`len`][slice.len] method. +/// +/// If you need a `&[`[`u8`]`]` slice *with* the nul terminator, you +/// can use [`as_bytes_with_nul`] instead. +/// +/// Once you have the kind of slice you need (with or without a nul +/// terminator), you can call the slice's own +/// [`as_ptr`][slice.as_ptr] method to get a raw pointer to pass to +/// extern functions. See the documentation for that function for a +/// discussion on ensuring the lifetime of the raw pointer. +/// +/// [`Into`]: ../convert/trait.Into.html +/// [`Vec`]: ../vec/struct.Vec.html +/// [`String`]: ../string/struct.String.html +/// [`&str`]: ../primitive.str.html +/// [`u8`]: ../primitive.u8.html +/// [`new`]: #method.new +/// [`as_bytes`]: #method.as_bytes +/// [`as_bytes_with_nul`]: #method.as_bytes_with_nul +/// [`as_ptr`]: #method.as_ptr +/// [slice.as_ptr]: ../primitive.slice.html#method.as_ptr +/// [slice.len]: ../primitive.slice.html#method.len +/// [`Deref`]: ../ops/trait.Deref.html +/// [`CStr`]: struct.CStr.html /// /// # Examples /// @@ -47,6 +100,8 @@ use str::{self, Utf8Error}; /// fn my_printer(s: *const c_char); /// } /// +/// // We are certain that our string doesn't have 0 bytes in the middle, +/// // so we can .unwrap() /// let c_to_print = CString::new("Hello, world!").unwrap(); /// unsafe { /// my_printer(c_to_print.as_ptr()); @@ -57,7 +112,7 @@ use str::{self, Utf8Error}; /// # Safety /// /// `CString` is intended for working with traditional C-style strings -/// (a sequence of non-null bytes terminated by a single null byte); the +/// (a sequence of non-nul bytes terminated by a single nul byte); the /// primary use case for these kinds of strings is interoperating with C-like /// code. Often you will need to transfer ownership to/from that external /// code. It is strongly recommended that you thoroughly read through the @@ -76,18 +131,24 @@ pub struct CString { /// Representation of a borrowed C string. /// -/// This dynamically sized type is only safely constructed via a borrowed -/// version of an instance of `CString`. This type can be constructed from a raw -/// C string as well and represents a C string borrowed from another location. +/// This type represents a borrowed reference to a nul-terminated +/// array of bytes. It can be constructed safely from a `&[`[`u8`]`]` +/// slice, or unsafely from a raw `*const c_char`. It can then be +/// converted to a Rust [`&str`] by performing UTF-8 validation, or +/// into an owned [`CString`]. +/// +/// `CStr` is to [`CString`] as [`&str`] is to [`String`]: the former +/// in each pair are borrowed references; the latter are owned +/// strings. /// /// Note that this structure is **not** `repr(C)` and is not recommended to be -/// placed in the signatures of FFI functions. Instead safe wrappers of FFI -/// functions may leverage the unsafe `from_ptr` constructor to provide a safe +/// placed in the signatures of FFI functions. Instead, safe wrappers of FFI +/// functions may leverage the unsafe [`from_ptr`] constructor to provide a safe /// interface to other consumers. /// /// # Examples /// -/// Inspecting a foreign C string +/// Inspecting a foreign C string: /// /// ```no_run /// use std::ffi::CStr; @@ -97,11 +158,11 @@ pub struct CString { /// /// unsafe { /// let slice = CStr::from_ptr(my_string()); -/// println!("string length: {}", slice.to_bytes().len()); +/// println!("string buffer size without nul terminator: {}", slice.to_bytes().len()); /// } /// ``` /// -/// Passing a Rust-originating C string +/// Passing a Rust-originating C string: /// /// ```no_run /// use std::ffi::{CString, CStr}; @@ -117,7 +178,7 @@ pub struct CString { /// work(&s); /// ``` /// -/// Converting a foreign C string into a Rust `String` +/// Converting a foreign C string into a Rust [`String`]: /// /// ```no_run /// use std::ffi::CStr; @@ -133,6 +194,12 @@ pub struct CString { /// /// println!("string: {}", my_string_safe()); /// ``` +/// +/// [`u8`]: ../primitive.u8.html +/// [`&str`]: ../primitive.str.html +/// [`String`]: ../string/struct.String.html +/// [`CString`]: struct.CString.html +/// [`from_ptr`]: #method.from_ptr #[derive(Hash)] #[stable(feature = "rust1", since = "1.0.0")] pub struct CStr { @@ -143,20 +210,85 @@ pub struct CStr { inner: [c_char] } -/// An error returned from `CString::new` to indicate that a nul byte was found -/// in the vector provided. +/// An error indicating that an interior nul byte was found. +/// +/// While Rust strings may contain nul bytes in the middle, C strings +/// can't, as that byte would effectively truncate the string. +/// +/// This error is created by the [`new`][`CString::new`] method on +/// [`CString`]. See its documentation for more. +/// +/// [`CString`]: struct.CString.html +/// [`CString::new`]: struct.CString.html#method.new +/// +/// # Examples +/// +/// ``` +/// use std::ffi::{CString, NulError}; +/// +/// let _: NulError = CString::new(b"f\0oo".to_vec()).unwrap_err(); +/// ``` #[derive(Clone, PartialEq, Eq, Debug)] #[stable(feature = "rust1", since = "1.0.0")] pub struct NulError(usize, Vec<u8>); -/// An error returned from `CStr::from_bytes_with_nul` to indicate that a nul -/// byte was found too early in the slice provided or one wasn't found at all. +/// An error indicating that a nul byte was not in the expected position. +/// +/// The slice used to create a [`CStr`] must have one and only one nul +/// byte at the end of the slice. +/// +/// This error is created by the +/// [`from_bytes_with_nul`][`CStr::from_bytes_with_nul`] method on +/// [`CStr`]. See its documentation for more. +/// +/// [`CStr`]: struct.CStr.html +/// [`CStr::from_bytes_with_nul`]: struct.CStr.html#method.from_bytes_with_nul +/// +/// # Examples +/// +/// ``` +/// use std::ffi::{CStr, FromBytesWithNulError}; +/// +/// let _: FromBytesWithNulError = CStr::from_bytes_with_nul(b"f\0oo").unwrap_err(); +/// ``` #[derive(Clone, PartialEq, Eq, Debug)] #[stable(feature = "cstr_from_bytes", since = "1.10.0")] -pub struct FromBytesWithNulError { _a: () } +pub struct FromBytesWithNulError { + kind: FromBytesWithNulErrorKind, +} + +#[derive(Clone, PartialEq, Eq, Debug)] +enum FromBytesWithNulErrorKind { + InteriorNul(usize), + NotNulTerminated, +} -/// An error returned from `CString::into_string` to indicate that a UTF-8 error -/// was encountered during the conversion. +impl FromBytesWithNulError { + fn interior_nul(pos: usize) -> FromBytesWithNulError { + FromBytesWithNulError { + kind: FromBytesWithNulErrorKind::InteriorNul(pos), + } + } + fn not_nul_terminated() -> FromBytesWithNulError { + FromBytesWithNulError { + kind: FromBytesWithNulErrorKind::NotNulTerminated, + } + } +} + +/// An error indicating invalid UTF-8 when converting a [`CString`] into a [`String`]. +/// +/// `CString` is just a wrapper over a buffer of bytes with a nul +/// terminator; [`into_string`][`CString::into_string`] performs UTF-8 +/// validation on those bytes and may return this error. +/// +/// This `struct` is created by the +/// [`into_string`][`CString::into_string`] method on [`CString`]. See +/// its documentation for more. +/// +/// [`String`]: ../string/struct.String.html +/// [`CString`]: struct.CString.html +/// [`CString::into_string`]: struct.CString.html#method.into_string #[derive(Clone, PartialEq, Eq, Debug)] #[stable(feature = "cstring_into", since = "1.7.0")] pub struct IntoStringError { @@ -167,8 +299,11 @@ pub struct IntoStringError { impl CString { /// Creates a new C-compatible string from a container of bytes. /// - /// This method will consume the provided data and use the underlying bytes - /// to construct a new string, ensuring that there is a trailing 0 byte. + /// This function will consume the provided data and use the + /// underlying bytes to construct a new string, ensuring that + /// there is a trailing 0 byte. This trailing 0 byte will be + /// appended by this function; the provided data should *not* + /// contain any 0 bytes in it. /// /// # Examples /// @@ -186,9 +321,11 @@ impl CString { /// /// # Errors /// - /// This function will return an error if the bytes yielded contain an - /// internal 0 byte. The error returned will contain the bytes as well as + /// This function will return an error if the supplied bytes contain an + /// internal 0 byte. The [`NulError`] returned will contain the bytes as well as /// the position of the nul byte. + /// + /// [`NulError`]: struct.NulError.html #[stable(feature = "rust1", since = "1.0.0")] pub fn new<T: Into<Vec<u8>>>(t: T) -> Result<CString, NulError> { Self::_new(t.into()) @@ -201,13 +338,15 @@ impl CString { } } - /// Creates a C-compatible string from a byte vector without checking for - /// interior 0 bytes. + /// Creates a C-compatible string by consuming a byte vector, + /// without checking for interior 0 bytes. /// - /// This method is equivalent to `new` except that no runtime assertion + /// This method is equivalent to [`new`] except that no runtime assertion /// is made that `v` contains no 0 bytes, and it requires an actual /// byte vector, not anything that can be converted to one with Into. /// + /// [`new`]: #method.new + /// /// # Examples /// /// ``` @@ -225,39 +364,110 @@ impl CString { CString { inner: v.into_boxed_slice() } } - /// Retakes ownership of a `CString` that was transferred to C. + /// Retakes ownership of a `CString` that was transferred to C via [`into_raw`]. /// /// Additionally, the length of the string will be recalculated from the pointer. /// /// # Safety /// /// This should only ever be called with a pointer that was earlier - /// obtained by calling `into_raw` on a `CString`. Other usage (e.g. trying to take + /// obtained by calling [`into_raw`] on a `CString`. Other usage (e.g. trying to take /// ownership of a string that was allocated by foreign code) is likely to lead /// to undefined behavior or allocator corruption. + /// + /// > **Note:** If you need to borrow a string that was allocated by + /// > foreign code, use [`CStr`]. If you need to take ownership of + /// > a string that was allocated by foreign code, you will need to + /// > make your own provisions for freeing it appropriately, likely + /// > with the foreign code's API to do that. + /// + /// [`into_raw`]: #method.into_raw + /// [`CStr`]: struct.CStr.html + /// + /// # Examples + /// + /// Create a `CString`, pass ownership to an `extern` function (via raw pointer), then retake + /// ownership with `from_raw`: + /// + /// ```no_run + /// use std::ffi::CString; + /// use std::os::raw::c_char; + /// + /// extern { + /// fn some_extern_function(s: *mut c_char); + /// } + /// + /// let c_string = CString::new("Hello!").unwrap(); + /// let raw = c_string.into_raw(); + /// unsafe { + /// some_extern_function(raw); + /// let c_string = CString::from_raw(raw); + /// } + /// ``` #[stable(feature = "cstr_memory", since = "1.4.0")] pub unsafe fn from_raw(ptr: *mut c_char) -> CString { - let len = libc::strlen(ptr) + 1; // Including the NUL byte - let slice = slice::from_raw_parts(ptr, len as usize); - CString { inner: mem::transmute(slice) } + let len = sys::strlen(ptr) + 1; // Including the NUL byte + let slice = slice::from_raw_parts_mut(ptr, len as usize); + CString { inner: Box::from_raw(slice as *mut [c_char] as *mut [u8]) } } - /// Transfers ownership of the string to a C caller. + /// Consumes the `CString` and transfers ownership of the string to a C caller. /// - /// The pointer must be returned to Rust and reconstituted using - /// `from_raw` to be properly deallocated. Specifically, one - /// should *not* use the standard C `free` function to deallocate + /// The pointer which this function returns must be returned to Rust and reconstituted using + /// [`from_raw`] to be properly deallocated. Specifically, one + /// should *not* use the standard C `free()` function to deallocate /// this string. /// - /// Failure to call `from_raw` will lead to a memory leak. + /// Failure to call [`from_raw`] will lead to a memory leak. + /// + /// [`from_raw`]: #method.from_raw + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let c_string = CString::new("foo").unwrap(); + /// + /// let ptr = c_string.into_raw(); + /// + /// unsafe { + /// assert_eq!(b'f', *ptr as u8); + /// assert_eq!(b'o', *ptr.offset(1) as u8); + /// assert_eq!(b'o', *ptr.offset(2) as u8); + /// assert_eq!(b'\0', *ptr.offset(3) as u8); + /// + /// // retake pointer to free memory + /// let _ = CString::from_raw(ptr); + /// } + /// ``` + #[inline] #[stable(feature = "cstr_memory", since = "1.4.0")] pub fn into_raw(self) -> *mut c_char { Box::into_raw(self.into_inner()) as *mut c_char } - /// Converts the `CString` into a `String` if it contains valid Unicode data. + /// Converts the `CString` into a [`String`] if it contains valid UTF-8 data. /// /// On failure, ownership of the original `CString` is returned. + /// + /// [`String`]: ../string/struct.String.html + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let valid_utf8 = vec![b'f', b'o', b'o']; + /// let cstring = CString::new(valid_utf8).unwrap(); + /// assert_eq!(cstring.into_string().unwrap(), "foo"); + /// + /// let invalid_utf8 = vec![b'f', 0xff, b'o', b'o']; + /// let cstring = CString::new(invalid_utf8).unwrap(); + /// let err = cstring.into_string().err().unwrap(); + /// assert_eq!(err.utf8_error().valid_up_to(), 1); + /// ``` + #[stable(feature = "cstring_into", since = "1.7.0")] pub fn into_string(self) -> Result<String, IntoStringError> { String::from_utf8(self.into_bytes()) @@ -267,10 +477,21 @@ impl CString { }) } - /// Returns the underlying byte buffer. + /// Consumes the `CString` and returns the underlying byte buffer. + /// + /// The returned buffer does **not** contain the trailing nul + /// terminator, and it is guaranteed to not have any interior nul + /// bytes. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; /// - /// The returned buffer does **not** contain the trailing nul separator and - /// it is guaranteed to not have any interior nul bytes. + /// let c_string = CString::new("foo").unwrap(); + /// let bytes = c_string.into_bytes(); + /// assert_eq!(bytes, vec![b'f', b'o', b'o']); + /// ``` #[stable(feature = "cstring_into", since = "1.7.0")] pub fn into_bytes(self) -> Vec<u8> { let mut vec = self.into_inner().into_vec(); @@ -279,8 +500,20 @@ impl CString { vec } - /// Equivalent to the `into_bytes` function except that the returned vector - /// includes the trailing nul byte. + /// Equivalent to the [`into_bytes`] function except that the returned vector + /// includes the trailing nul terminator. + /// + /// [`into_bytes`]: #method.into_bytes + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let c_string = CString::new("foo").unwrap(); + /// let bytes = c_string.into_bytes_with_nul(); + /// assert_eq!(bytes, vec![b'f', b'o', b'o', b'\0']); + /// ``` #[stable(feature = "cstring_into", since = "1.7.0")] pub fn into_bytes_with_nul(self) -> Vec<u8> { self.into_inner().into_vec() @@ -288,21 +521,88 @@ impl CString { /// Returns the contents of this `CString` as a slice of bytes. /// - /// The returned slice does **not** contain the trailing nul separator and - /// it is guaranteed to not have any interior nul bytes. + /// The returned slice does **not** contain the trailing nul + /// terminator, and it is guaranteed to not have any interior nul + /// bytes. If you need the nul terminator, use + /// [`as_bytes_with_nul`] instead. + /// + /// [`as_bytes_with_nul`]: #method.as_bytes_with_nul + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let c_string = CString::new("foo").unwrap(); + /// let bytes = c_string.as_bytes(); + /// assert_eq!(bytes, &[b'f', b'o', b'o']); + /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn as_bytes(&self) -> &[u8] { &self.inner[..self.inner.len() - 1] } - /// Equivalent to the `as_bytes` function except that the returned slice - /// includes the trailing nul byte. + /// Equivalent to the [`as_bytes`] function except that the returned slice + /// includes the trailing nul terminator. + /// + /// [`as_bytes`]: #method.as_bytes + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let c_string = CString::new("foo").unwrap(); + /// let bytes = c_string.as_bytes_with_nul(); + /// assert_eq!(bytes, &[b'f', b'o', b'o', b'\0']); + /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn as_bytes_with_nul(&self) -> &[u8] { &self.inner } - // Bypass "move out of struct which implements `Drop` trait" restriction. + /// Extracts a [`CStr`] slice containing the entire string. + /// + /// [`CStr`]: struct.CStr.html + /// + /// # Examples + /// + /// ``` + /// use std::ffi::{CString, CStr}; + /// + /// let c_string = CString::new(b"foo".to_vec()).unwrap(); + /// let c_str = c_string.as_c_str(); + /// assert_eq!(c_str, CStr::from_bytes_with_nul(b"foo\0").unwrap()); + /// ``` + #[inline] + #[stable(feature = "as_c_str", since = "1.20.0")] + pub fn as_c_str(&self) -> &CStr { + &*self + } + + /// Converts this `CString` into a boxed [`CStr`]. + /// + /// [`CStr`]: struct.CStr.html + /// + /// # Examples + /// + /// ``` + /// use std::ffi::{CString, CStr}; + /// + /// let c_string = CString::new(b"foo".to_vec()).unwrap(); + /// let boxed = c_string.into_boxed_c_str(); + /// assert_eq!(&*boxed, CStr::from_bytes_with_nul(b"foo\0").unwrap()); + /// ``` + #[stable(feature = "into_boxed_c_str", since = "1.20.0")] + pub fn into_boxed_c_str(self) -> Box<CStr> { + unsafe { Box::from_raw(Box::into_raw(self.into_inner()) as *mut CStr) } + } + + // Bypass "move out of struct which implements [`Drop`] trait" restriction. + /// + /// [`Drop`]: ../ops/trait.Drop.html fn into_inner(self) -> Box<[u8]> { unsafe { let result = ptr::read(&self.inner); @@ -327,8 +627,9 @@ impl Drop for CString { impl ops::Deref for CString { type Target = CStr; + #[inline] fn deref(&self) -> &CStr { - unsafe { mem::transmute(self.as_bytes_with_nul()) } + unsafe { CStr::from_bytes_with_nul_unchecked(self.as_bytes_with_nul()) } } } @@ -341,6 +642,7 @@ impl fmt::Debug for CString { #[stable(feature = "cstring_into", since = "1.7.0")] impl From<CString> for Vec<u8> { + #[inline] fn from(s: CString) -> Vec<u8> { s.into_bytes() } @@ -360,7 +662,7 @@ impl fmt::Debug for CStr { #[stable(feature = "cstr_default", since = "1.10.0")] impl<'a> Default for &'a CStr { fn default() -> &'a CStr { - static SLICE: &'static [c_char] = &[0]; + const SLICE: &'static [c_char] = &[0]; unsafe { CStr::from_ptr(SLICE.as_ptr()) } } } @@ -376,12 +678,83 @@ impl Default for CString { #[stable(feature = "cstr_borrow", since = "1.3.0")] impl Borrow<CStr> for CString { + #[inline] fn borrow(&self) -> &CStr { self } } +#[stable(feature = "box_from_c_str", since = "1.17.0")] +impl<'a> From<&'a CStr> for Box<CStr> { + fn from(s: &'a CStr) -> Box<CStr> { + let boxed: Box<[u8]> = Box::from(s.to_bytes_with_nul()); + unsafe { Box::from_raw(Box::into_raw(boxed) as *mut CStr) } + } +} + +#[stable(feature = "c_string_from_box", since = "1.18.0")] +impl From<Box<CStr>> for CString { + #[inline] + fn from(s: Box<CStr>) -> CString { + s.into_c_string() + } +} + +#[stable(feature = "box_from_c_string", since = "1.20.0")] +impl From<CString> for Box<CStr> { + #[inline] + fn from(s: CString) -> Box<CStr> { + s.into_boxed_c_str() + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl From<CString> for Arc<CStr> { + #[inline] + fn from(s: CString) -> Arc<CStr> { + let arc: Arc<[u8]> = Arc::from(s.into_inner()); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const CStr) } + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl<'a> From<&'a CStr> for Arc<CStr> { + #[inline] + fn from(s: &CStr) -> Arc<CStr> { + let arc: Arc<[u8]> = Arc::from(s.to_bytes_with_nul()); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const CStr) } + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl From<CString> for Rc<CStr> { + #[inline] + fn from(s: CString) -> Rc<CStr> { + let rc: Rc<[u8]> = Rc::from(s.into_inner()); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const CStr) } + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl<'a> From<&'a CStr> for Rc<CStr> { + #[inline] + fn from(s: &CStr) -> Rc<CStr> { + let rc: Rc<[u8]> = Rc::from(s.to_bytes_with_nul()); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const CStr) } + } +} + +#[stable(feature = "default_box_extra", since = "1.17.0")] +impl Default for Box<CStr> { + fn default() -> Box<CStr> { + let boxed: Box<[u8]> = Box::from([0]); + unsafe { Box::from_raw(Box::into_raw(boxed) as *mut CStr) } + } +} + impl NulError { - /// Returns the position of the nul byte in the slice that was provided to - /// `CString::new`. + /// Returns the position of the nul byte in the slice that caused + /// [`CString::new`] to fail. + /// + /// [`CString::new`]: struct.CString.html#method.new /// /// # Examples /// @@ -432,9 +805,34 @@ impl From<NulError> for io::Error { } } +#[stable(feature = "frombyteswithnulerror_impls", since = "1.17.0")] +impl Error for FromBytesWithNulError { + fn description(&self) -> &str { + match self.kind { + FromBytesWithNulErrorKind::InteriorNul(..) => + "data provided contains an interior nul byte", + FromBytesWithNulErrorKind::NotNulTerminated => + "data provided is not nul terminated", + } + } +} + +#[stable(feature = "frombyteswithnulerror_impls", since = "1.17.0")] +impl fmt::Display for FromBytesWithNulError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(self.description())?; + if let FromBytesWithNulErrorKind::InteriorNul(pos) = self.kind { + write!(f, " at byte pos {}", pos)?; + } + Ok(()) + } +} + impl IntoStringError { - /// Consumes this error, returning original `CString` which generated the + /// Consumes this error, returning original [`CString`] which generated the /// error. + /// + /// [`CString`]: struct.CString.html #[stable(feature = "cstring_into", since = "1.7.0")] pub fn into_cstring(self) -> CString { self.inner @@ -466,15 +864,15 @@ impl fmt::Display for IntoStringError { } impl CStr { - /// Casts a raw C string to a safe C string wrapper. + /// Wraps a raw C string with a safe C string wrapper. /// - /// This function will cast the provided `ptr` to the `CStr` wrapper which + /// This function will wrap the provided `ptr` with a `CStr` wrapper, which /// allows inspection and interoperation of non-owned C strings. This method /// is unsafe for a number of reasons: /// - /// * There is no guarantee to the validity of `ptr` + /// * There is no guarantee to the validity of `ptr`. /// * The returned lifetime is not guaranteed to be the actual lifetime of - /// `ptr` + /// `ptr`. /// * There is no guarantee that the memory pointed to by `ptr` contains a /// valid nul terminator byte at the end of the string. /// @@ -501,15 +899,16 @@ impl CStr { /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub unsafe fn from_ptr<'a>(ptr: *const c_char) -> &'a CStr { - let len = libc::strlen(ptr); - mem::transmute(slice::from_raw_parts(ptr, len as usize + 1)) + let len = sys::strlen(ptr); + let ptr = ptr as *const u8; + CStr::from_bytes_with_nul_unchecked(slice::from_raw_parts(ptr, len as usize + 1)) } /// Creates a C string wrapper from a byte slice. /// - /// This function will cast the provided `bytes` to a `CStr` wrapper after - /// ensuring that it is null terminated and does not contain any interior - /// nul bytes. + /// This function will cast the provided `bytes` to a `CStr` + /// wrapper after ensuring that the byte slice is nul-terminated + /// and does not contain any interior nul bytes. /// /// # Examples /// @@ -519,20 +918,42 @@ impl CStr { /// let cstr = CStr::from_bytes_with_nul(b"hello\0"); /// assert!(cstr.is_ok()); /// ``` + /// + /// Creating a `CStr` without a trailing nul terminator is an error: + /// + /// ``` + /// use std::ffi::CStr; + /// + /// let c_str = CStr::from_bytes_with_nul(b"hello"); + /// assert!(c_str.is_err()); + /// ``` + /// + /// Creating a `CStr` with an interior nul byte is an error: + /// + /// ``` + /// use std::ffi::CStr; + /// + /// let c_str = CStr::from_bytes_with_nul(b"he\0llo\0"); + /// assert!(c_str.is_err()); + /// ``` #[stable(feature = "cstr_from_bytes", since = "1.10.0")] pub fn from_bytes_with_nul(bytes: &[u8]) -> Result<&CStr, FromBytesWithNulError> { - if bytes.is_empty() || memchr::memchr(0, &bytes) != Some(bytes.len() - 1) { - Err(FromBytesWithNulError { _a: () }) + let nul_pos = memchr::memchr(0, bytes); + if let Some(nul_pos) = nul_pos { + if nul_pos + 1 != bytes.len() { + return Err(FromBytesWithNulError::interior_nul(nul_pos)); + } + Ok(unsafe { CStr::from_bytes_with_nul_unchecked(bytes) }) } else { - Ok(unsafe { Self::from_bytes_with_nul_unchecked(bytes) }) + Err(FromBytesWithNulError::not_nul_terminated()) } } /// Unsafely creates a C string wrapper from a byte slice. /// /// This function will cast the provided `bytes` to a `CStr` wrapper without - /// performing any sanity checks. The provided slice must be null terminated + /// performing any sanity checks. The provided slice **must** be nul-terminated /// and not contain any interior nul bytes. /// /// # Examples @@ -546,14 +967,15 @@ impl CStr { /// assert_eq!(cstr, &*cstring); /// } /// ``` + #[inline] #[stable(feature = "cstr_from_bytes", since = "1.10.0")] pub unsafe fn from_bytes_with_nul_unchecked(bytes: &[u8]) -> &CStr { - mem::transmute(bytes) + &*(bytes as *const [u8] as *const CStr) } /// Returns the inner pointer to this C string. /// - /// The returned pointer will be valid for as long as `self` is and points + /// The returned pointer will be valid for as long as `self` is, and points /// to a contiguous region of memory terminated with a 0 byte to represent /// the end of the string. /// @@ -561,7 +983,7 @@ impl CStr { /// /// It is your responsibility to make sure that the underlying memory is not /// freed too early. For example, the following code will cause undefined - /// behaviour when `ptr` is used inside the `unsafe` block: + /// behavior when `ptr` is used inside the `unsafe` block: /// /// ```no_run /// use std::ffi::{CString}; @@ -574,9 +996,9 @@ impl CStr { /// ``` /// /// This happens because the pointer returned by `as_ptr` does not carry any - /// lifetime information and the string is deallocated immediately after + /// lifetime information and the [`CString`] is deallocated immediately after /// the `CString::new("Hello").unwrap().as_ptr()` expression is evaluated. - /// To fix the problem, bind the string to a local variable: + /// To fix the problem, bind the `CString` to a local variable: /// /// ```no_run /// use std::ffi::{CString}; @@ -588,6 +1010,12 @@ impl CStr { /// *ptr; /// } /// ``` + /// + /// This way, the lifetime of the `CString` in `hello` encompasses + /// the lifetime of `ptr` and the `unsafe` block. + /// + /// [`CString`]: struct.CString.html + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn as_ptr(&self) -> *const c_char { self.inner.as_ptr() @@ -595,16 +1023,22 @@ impl CStr { /// Converts this C string to a byte slice. /// - /// This function will calculate the length of this string (which normally - /// requires a linear amount of work to be done) and then return the - /// resulting slice of `u8` elements. - /// - /// The returned slice will **not** contain the trailing nul that this C + /// The returned slice will **not** contain the trailing nul terminator that this C /// string has. /// /// > **Note**: This method is currently implemented as a 0-cost cast, but /// > it is planned to alter its definition in the future to perform the /// > length calculation whenever this method is called. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CStr; + /// + /// let c_str = CStr::from_bytes_with_nul(b"foo\0").unwrap(); + /// assert_eq!(c_str.to_bytes(), b"foo"); + /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn to_bytes(&self) -> &[u8] { let bytes = self.to_bytes_with_nul(); @@ -613,26 +1047,50 @@ impl CStr { /// Converts this C string to a byte slice containing the trailing 0 byte. /// - /// This function is the equivalent of `to_bytes` except that it will retain - /// the trailing nul instead of chopping it off. + /// This function is the equivalent of [`to_bytes`] except that it will retain + /// the trailing nul terminator instead of chopping it off. /// /// > **Note**: This method is currently implemented as a 0-cost cast, but /// > it is planned to alter its definition in the future to perform the /// > length calculation whenever this method is called. + /// + /// [`to_bytes`]: #method.to_bytes + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CStr; + /// + /// let c_str = CStr::from_bytes_with_nul(b"foo\0").unwrap(); + /// assert_eq!(c_str.to_bytes_with_nul(), b"foo\0"); + /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn to_bytes_with_nul(&self) -> &[u8] { - unsafe { mem::transmute(&self.inner) } + unsafe { &*(&self.inner as *const [c_char] as *const [u8]) } } - /// Yields a `&str` slice if the `CStr` contains valid UTF-8. + /// Yields a [`&str`] slice if the `CStr` contains valid UTF-8. /// - /// This function will calculate the length of this string and check for - /// UTF-8 validity, and then return the `&str` if it's valid. + /// If the contents of the `CStr` are valid UTF-8 data, this + /// function will return the corresponding [`&str`] slice. Otherwise, + /// it will return an error with details of where UTF-8 validation failed. /// /// > **Note**: This method is currently implemented to check for validity /// > after a 0-cost cast, but it is planned to alter its definition in the /// > future to perform the length calculation in addition to the UTF-8 /// > check whenever this method is called. + /// + /// [`&str`]: ../primitive.str.html + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CStr; + /// + /// let c_str = CStr::from_bytes_with_nul(b"foo\0").unwrap(); + /// assert_eq!(c_str.to_str(), Ok("foo")); + /// ``` #[stable(feature = "cstr_to_str", since = "1.4.0")] pub fn to_str(&self) -> Result<&str, str::Utf8Error> { // NB: When CStr is changed to perform the length check in .to_bytes() @@ -642,21 +1100,73 @@ impl CStr { str::from_utf8(self.to_bytes()) } - /// Converts a `CStr` into a `Cow<str>`. + /// Converts a `CStr` into a [`Cow`]`<`[`str`]`>`. /// - /// This function will calculate the length of this string (which normally - /// requires a linear amount of work to be done) and then return the - /// resulting slice as a `Cow<str>`, replacing any invalid UTF-8 sequences - /// with `U+FFFD REPLACEMENT CHARACTER`. + /// If the contents of the `CStr` are valid UTF-8 data, this + /// function will return a [`Cow`]`::`[`Borrowed`]`(`[`&str`]`)` + /// with the the corresponding [`&str`] slice. Otherwise, it will + /// replace any invalid UTF-8 sequences with `U+FFFD REPLACEMENT + /// CHARACTER` and return a [`Cow`]`::`[`Owned`]`(`[`String`]`)` + /// with the result. /// /// > **Note**: This method is currently implemented to check for validity /// > after a 0-cost cast, but it is planned to alter its definition in the /// > future to perform the length calculation in addition to the UTF-8 /// > check whenever this method is called. + /// + /// [`Cow`]: ../borrow/enum.Cow.html + /// [`Borrowed`]: ../borrow/enum.Cow.html#variant.Borrowed + /// [`str`]: ../primitive.str.html + /// [`String`]: ../string/struct.String.html + /// + /// # Examples + /// + /// Calling `to_string_lossy` on a `CStr` containing valid UTF-8: + /// + /// ``` + /// use std::borrow::Cow; + /// use std::ffi::CStr; + /// + /// let c_str = CStr::from_bytes_with_nul(b"Hello World\0").unwrap(); + /// assert_eq!(c_str.to_string_lossy(), Cow::Borrowed("Hello World")); + /// ``` + /// + /// Calling `to_string_lossy` on a `CStr` containing invalid UTF-8: + /// + /// ``` + /// use std::borrow::Cow; + /// use std::ffi::CStr; + /// + /// let c_str = CStr::from_bytes_with_nul(b"Hello \xF0\x90\x80World\0").unwrap(); + /// assert_eq!( + /// c_str.to_string_lossy(), + /// Cow::Owned(String::from("Hello �World")) as Cow<str> + /// ); + /// ``` #[stable(feature = "cstr_to_str", since = "1.4.0")] pub fn to_string_lossy(&self) -> Cow<str> { String::from_utf8_lossy(self.to_bytes()) } + + /// Converts a [`Box`]`<CStr>` into a [`CString`] without copying or allocating. + /// + /// [`Box`]: ../boxed/struct.Box.html + /// [`CString`]: struct.CString.html + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let c_string = CString::new(b"foo".to_vec()).unwrap(); + /// let boxed = c_string.into_boxed_c_str(); + /// assert_eq!(boxed.into_c_string(), CString::new("foo").unwrap()); + /// ``` + #[stable(feature = "into_boxed_c_str", since = "1.20.0")] + pub fn into_c_string(self: Box<CStr>) -> CString { + let raw = Box::into_raw(self) as *mut [u8]; + CString { inner: unsafe { Box::from_raw(raw) } } + } } #[stable(feature = "rust1", since = "1.0.0")] @@ -685,7 +1195,7 @@ impl ToOwned for CStr { type Owned = CString; fn to_owned(&self) -> CString { - unsafe { CString::from_vec_unchecked(self.to_bytes().to_vec()) } + CString { inner: self.to_bytes_with_nul().into() } } } @@ -708,6 +1218,7 @@ impl ops::Index<ops::RangeFull> for CString { #[stable(feature = "cstring_asref", since = "1.7.0")] impl AsRef<CStr> for CStr { + #[inline] fn as_ref(&self) -> &CStr { self } @@ -715,6 +1226,7 @@ impl AsRef<CStr> for CStr { #[stable(feature = "cstring_asref", since = "1.7.0")] impl AsRef<CStr> for CString { + #[inline] fn as_ref(&self) -> &CStr { self } @@ -727,6 +1239,8 @@ mod tests { use borrow::Cow::{Borrowed, Owned}; use hash::{Hash, Hasher}; use collections::hash_map::DefaultHasher; + use rc::Rc; + use sync::Arc; #[test] fn c_to_rust() { @@ -846,4 +1360,38 @@ mod tests { let cstr = CStr::from_bytes_with_nul(data); assert!(cstr.is_err()); } + + #[test] + fn into_boxed() { + let orig: &[u8] = b"Hello, world!\0"; + let cstr = CStr::from_bytes_with_nul(orig).unwrap(); + let boxed: Box<CStr> = Box::from(cstr); + let cstring = cstr.to_owned().into_boxed_c_str().into_c_string(); + assert_eq!(cstr, &*boxed); + assert_eq!(&*boxed, &*cstring); + assert_eq!(&*cstring, cstr); + } + + #[test] + fn boxed_default() { + let boxed = <Box<CStr>>::default(); + assert_eq!(boxed.to_bytes_with_nul(), &[0]); + } + + #[test] + fn into_rc() { + let orig: &[u8] = b"Hello, world!\0"; + let cstr = CStr::from_bytes_with_nul(orig).unwrap(); + let rc: Rc<CStr> = Rc::from(cstr); + let arc: Arc<CStr> = Arc::from(cstr); + + assert_eq!(&*rc, cstr); + assert_eq!(&*arc, cstr); + + let rc2: Rc<CStr> = Rc::from(cstr.to_owned()); + let arc2: Arc<CStr> = Arc::from(cstr.to_owned()); + + assert_eq!(&*rc2, cstr); + assert_eq!(&*arc2, cstr); + } } diff --git a/ctr-std/src/ffi/mod.rs b/ctr-std/src/ffi/mod.rs index ca1ff18..a37a5e8 100644 --- a/ctr-std/src/ffi/mod.rs +++ b/ctr-std/src/ffi/mod.rs @@ -9,6 +9,157 @@ // except according to those terms. //! Utilities related to FFI bindings. +//! +//! This module provides utilities to handle data across non-Rust +//! interfaces, like other programming languages and the underlying +//! operating system. It is mainly of use for FFI (Foreign Function +//! Interface) bindings and code that needs to exchange C-like strings +//! with other languages. +//! +//! # Overview +//! +//! Rust represents owned strings with the [`String`] type, and +//! borrowed slices of strings with the [`str`] primitive. Both are +//! always in UTF-8 encoding, and may contain nul bytes in the middle, +//! i.e. if you look at the bytes that make up the string, there may +//! be a `\0` among them. Both `String` and `str` store their length +//! explicitly; there are no nul terminators at the end of strings +//! like in C. +//! +//! C strings are different from Rust strings: +//! +//! * **Encodings** - Rust strings are UTF-8, but C strings may use +//! other encodings. If you are using a string from C, you should +//! check its encoding explicitly, rather than just assuming that it +//! is UTF-8 like you can do in Rust. +//! +//! * **Character size** - C strings may use `char` or `wchar_t`-sized +//! characters; please **note** that C's `char` is different from Rust's. +//! The C standard leaves the actual sizes of those types open to +//! interpretation, but defines different APIs for strings made up of +//! each character type. Rust strings are always UTF-8, so different +//! Unicode characters will be encoded in a variable number of bytes +//! each. The Rust type [`char`] represents a '[Unicode scalar +//! value]', which is similar to, but not the same as, a '[Unicode +//! code point]'. +//! +//! * **Nul terminators and implicit string lengths** - Often, C +//! strings are nul-terminated, i.e. they have a `\0` character at the +//! end. The length of a string buffer is not stored, but has to be +//! calculated; to compute the length of a string, C code must +//! manually call a function like `strlen()` for `char`-based strings, +//! or `wcslen()` for `wchar_t`-based ones. Those functions return +//! the number of characters in the string excluding the nul +//! terminator, so the buffer length is really `len+1` characters. +//! Rust strings don't have a nul terminator; their length is always +//! stored and does not need to be calculated. While in Rust +//! accessing a string's length is a O(1) operation (because the +//! length is stored); in C it is an O(length) operation because the +//! length needs to be computed by scanning the string for the nul +//! terminator. +//! +//! * **Internal nul characters** - When C strings have a nul +//! terminator character, this usually means that they cannot have nul +//! characters in the middle — a nul character would essentially +//! truncate the string. Rust strings *can* have nul characters in +//! the middle, because nul does not have to mark the end of the +//! string in Rust. +//! +//! # Representations of non-Rust strings +//! +//! [`CString`] and [`CStr`] are useful when you need to transfer +//! UTF-8 strings to and from languages with a C ABI, like Python. +//! +//! * **From Rust to C:** [`CString`] represents an owned, C-friendly +//! string: it is nul-terminated, and has no internal nul characters. +//! Rust code can create a `CString` out of a normal string (provided +//! that the string doesn't have nul characters in the middle), and +//! then use a variety of methods to obtain a raw `*mut u8` that can +//! then be passed as an argument to functions which use the C +//! conventions for strings. +//! +//! * **From C to Rust:** [`CStr`] represents a borrowed C string; it +//! is what you would use to wrap a raw `*const u8` that you got from +//! a C function. A `CStr` is guaranteed to be a nul-terminated array +//! of bytes. Once you have a `CStr`, you can convert it to a Rust +//! `&str` if it's valid UTF-8, or lossily convert it by adding +//! replacement characters. +//! +//! [`OsString`] and [`OsStr`] are useful when you need to transfer +//! strings to and from the operating system itself, or when capturing +//! the output of external commands. Conversions between `OsString`, +//! `OsStr` and Rust strings work similarly to those for [`CString`] +//! and [`CStr`]. +//! +//! * [`OsString`] represents an owned string in whatever +//! representation the operating system prefers. In the Rust standard +//! library, various APIs that transfer strings to/from the operating +//! system use `OsString` instead of plain strings. For example, +//! [`env::var_os()`] is used to query environment variables; it +//! returns an `Option<OsString>`. If the environment variable exists +//! you will get a `Some(os_string)`, which you can *then* try to +//! convert to a Rust string. This yields a [`Result<>`], so that +//! your code can detect errors in case the environment variable did +//! not in fact contain valid Unicode data. +//! +//! * [`OsStr`] represents a borrowed reference to a string in a +//! format that can be passed to the operating system. It can be +//! converted into an UTF-8 Rust string slice in a similar way to +//! `OsString`. +//! +//! # Conversions +//! +//! ## On Unix +//! +//! On Unix, [`OsStr`] implements the +//! `std::os::unix:ffi::`[`OsStrExt`][unix.OsStrExt] trait, which +//! augments it with two methods, [`from_bytes`] and [`as_bytes`]. +//! These do inexpensive conversions from and to UTF-8 byte slices. +//! +//! Additionally, on Unix [`OsString`] implements the +//! `std::os::unix:ffi::`[`OsStringExt`][unix.OsStringExt] trait, +//! which provides [`from_vec`] and [`into_vec`] methods that consume +//! their arguments, and take or produce vectors of [`u8`]. +//! +//! ## On Windows +//! +//! On Windows, [`OsStr`] implements the +//! `std::os::windows::ffi::`[`OsStrExt`][windows.OsStrExt] trait, +//! which provides an [`encode_wide`] method. This provides an +//! iterator that can be [`collect`]ed into a vector of [`u16`]. +//! +//! Additionally, on Windows [`OsString`] implements the +//! `std::os::windows:ffi::`[`OsStringExt`][windows.OsStringExt] +//! trait, which provides a [`from_wide`] method. The result of this +//! method is an `OsString` which can be round-tripped to a Windows +//! string losslessly. +//! +//! [`String`]: ../string/struct.String.html +//! [`str`]: ../primitive.str.html +//! [`char`]: ../primitive.char.html +//! [`u8`]: ../primitive.u8.html +//! [`u16`]: ../primitive.u16.html +//! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value +//! [Unicode code point]: http://www.unicode.org/glossary/#code_point +//! [`CString`]: struct.CString.html +//! [`CStr`]: struct.CStr.html +//! [`OsString`]: struct.OsString.html +//! [`OsStr`]: struct.OsStr.html +//! [`env::set_var()`]: ../env/fn.set_var.html +//! [`env::var_os()`]: ../env/fn.var_os.html +//! [`Result<>`]: ../result/enum.Result.html +//! [unix.OsStringExt]: ../os/unix/ffi/trait.OsStringExt.html +//! [`from_vec`]: ../os/unix/ffi/trait.OsStringExt.html#tymethod.from_vec +//! [`into_vec`]: ../os/unix/ffi/trait.OsStringExt.html#tymethod.into_vec +//! [unix.OsStrExt]: ../os/unix/ffi/trait.OsStrExt.html +//! [`from_bytes`]: ../os/unix/ffi/trait.OsStrExt.html#tymethod.from_bytes +//! [`as_bytes`]: ../os/unix/ffi/trait.OsStrExt.html#tymethod.as_bytes +//! [`OsStrExt`]: ../os/unix/ffi/trait.OsStrExt.html +//! [windows.OsStrExt]: ../os/windows/ffi/trait.OsStrExt.html +//! [`encode_wide`]: ../os/windows/ffi/trait.OsStrExt.html#tymethod.encode_wide +//! [`collect`]: ../iter/trait.Iterator.html#method.collect +//! [windows.OsStringExt]: ../os/windows/ffi/trait.OsStringExt.html +//! [`from_wide`]: ../os/windows/ffi/trait.OsStringExt.html#tymethod.from_wide #![stable(feature = "rust1", since = "1.0.0")] diff --git a/ctr-std/src/ffi/os_str.rs b/ctr-std/src/ffi/os_str.rs index 84b50f0..3959e85 100644 --- a/ctr-std/src/ffi/os_str.rs +++ b/ctr-std/src/ffi/os_str.rs @@ -9,11 +9,12 @@ // except according to those terms. use borrow::{Borrow, Cow}; -use fmt::{self, Debug}; -use mem; +use fmt; use ops; use cmp; use hash::{Hash, Hasher}; +use rc::Rc; +use sync::Arc; use sys::os_str::{Buf, Slice}; use sys_common::{AsInner, IntoInner, FromInner}; @@ -29,18 +30,71 @@ use sys_common::{AsInner, IntoInner, FromInner}; /// * On Windows, strings are often arbitrary sequences of non-zero 16-bit /// values, interpreted as UTF-16 when it is valid to do so. /// -/// * In Rust, strings are always valid UTF-8, but may contain zeros. +/// * In Rust, strings are always valid UTF-8, which may contain zeros. /// -/// `OsString` and `OsStr` bridge this gap by simultaneously representing Rust +/// `OsString` and [`OsStr`] bridge this gap by simultaneously representing Rust /// and platform-native string values, and in particular allowing a Rust string -/// to be converted into an "OS" string with no cost. +/// to be converted into an "OS" string with no cost if possible. +/// +/// `OsString` is to [`&OsStr`] as [`String`] is to [`&str`]: the former +/// in each pair are owned strings; the latter are borrowed +/// references. +/// +/// # Creating an `OsString` +/// +/// **From a Rust string**: `OsString` implements +/// [`From`]`<`[`String`]`>`, so you can use `my_string.from` to +/// create an `OsString` from a normal Rust string. +/// +/// **From slices:** Just like you can start with an empty Rust +/// [`String`] and then [`push_str`][String.push_str] `&str` +/// sub-string slices into it, you can create an empty `OsString` with +/// the [`new`] method and then push string slices into it with the +/// [`push`] method. +/// +/// # Extracting a borrowed reference to the whole OS string +/// +/// You can use the [`as_os_str`] method to get an `&`[`OsStr`] from +/// an `OsString`; this is effectively a borrowed reference to the +/// whole string. +/// +/// # Conversions +/// +/// See the [module's toplevel documentation about conversions][conversions] for a discussion on +/// the traits which `OsString` implements for conversions from/to native representations. +/// +/// [`OsStr`]: struct.OsStr.html +/// [`&OsStr`]: struct.OsStr.html +/// [`From`]: ../convert/trait.From.html +/// [`String`]: ../string/struct.String.html +/// [`&str`]: ../primitive.str.html +/// [`u8`]: ../primitive.u8.html +/// [`u16`]: ../primitive.u16.html +/// [String.push_str]: ../string/struct.String.html#method.push_str +/// [`new`]: #method.new +/// [`push`]: #method.push +/// [`as_os_str`]: #method.as_os_str #[derive(Clone)] #[stable(feature = "rust1", since = "1.0.0")] pub struct OsString { inner: Buf } -/// Slices into OS strings (see `OsString`). +/// Borrowed reference to an OS string (see [`OsString`]). +/// +/// This type represents a borrowed reference to a string in the operating system's preferred +/// representation. +/// +/// `&OsStr` is to [`OsString`] as [`&str`] is to [`String`]: the former in each pair are borrowed +/// references; the latter are owned strings. +/// +/// See the [module's toplevel documentation about conversions][conversions] for a discussion on +/// the traits which `OsStr` implements for conversions from/to native representations. +/// +/// [`OsString`]: struct.OsString.html +/// [`&str`]: ../primitive.str.html +/// [`String`]: ../string/struct.String.html +/// [conversions]: index.html#conversions #[stable(feature = "rust1", since = "1.0.0")] pub struct OsStr { inner: Slice @@ -48,26 +102,70 @@ pub struct OsStr { impl OsString { /// Constructs a new empty `OsString`. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let os_string = OsString::new(); + /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn new() -> OsString { OsString { inner: Buf::from_string(String::new()) } } - /// Converts to an `OsStr` slice. + /// Converts to an [`OsStr`] slice. + /// + /// [`OsStr`]: struct.OsStr.html + /// + /// # Examples + /// + /// ``` + /// use std::ffi::{OsString, OsStr}; + /// + /// let os_string = OsString::from("foo"); + /// let os_str = OsStr::new("foo"); + /// assert_eq!(os_string.as_os_str(), os_str); + /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn as_os_str(&self) -> &OsStr { self } - /// Converts the `OsString` into a `String` if it contains valid Unicode data. + /// Converts the `OsString` into a [`String`] if it contains valid Unicode data. /// /// On failure, ownership of the original `OsString` is returned. + /// + /// [`String`]: ../../std/string/struct.String.html + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let os_string = OsString::from("foo"); + /// let string = os_string.into_string(); + /// assert_eq!(string, Ok(String::from("foo"))); + /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn into_string(self) -> Result<String, OsString> { self.inner.into_string().map_err(|buf| OsString { inner: buf} ) } - /// Extends the string with the given `&OsStr` slice. + /// Extends the string with the given [`&OsStr`] slice. + /// + /// [`&OsStr`]: struct.OsStr.html + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let mut os_string = OsString::from("foo"); + /// os_string.push("bar"); + /// assert_eq!(&os_string, "foobar"); + /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn push<T: AsRef<OsStr>>(&mut self, s: T) { self.inner.push_slice(&s.as_ref().inner) @@ -75,11 +173,25 @@ impl OsString { /// Creates a new `OsString` with the given capacity. /// - /// The string will be able to hold exactly `capacity` lenth units of other + /// The string will be able to hold exactly `capacity` length units of other /// OS strings without reallocating. If `capacity` is 0, the string will not /// allocate. /// /// See main `OsString` documentation information about encoding. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let mut os_string = OsString::with_capacity(10); + /// let capacity = os_string.capacity(); + /// + /// // This push is done without reallocating + /// os_string.push("foo"); + /// + /// assert_eq!(capacity, os_string.capacity()); + /// ``` #[stable(feature = "osstring_simple_functions", since = "1.9.0")] pub fn with_capacity(capacity: usize) -> OsString { OsString { @@ -88,6 +200,18 @@ impl OsString { } /// Truncates the `OsString` to zero length. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let mut os_string = OsString::from("foo"); + /// assert_eq!(&os_string, "foo"); + /// + /// os_string.clear(); + /// assert_eq!(&os_string, ""); + /// ``` #[stable(feature = "osstring_simple_functions", since = "1.9.0")] pub fn clear(&mut self) { self.inner.clear() @@ -96,6 +220,15 @@ impl OsString { /// Returns the capacity this `OsString` can hold without reallocating. /// /// See `OsString` introduction for information about encoding. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let mut os_string = OsString::with_capacity(10); + /// assert!(os_string.capacity() >= 10); + /// ``` #[stable(feature = "osstring_simple_functions", since = "1.9.0")] pub fn capacity(&self) -> usize { self.inner.capacity() @@ -105,6 +238,16 @@ impl OsString { /// in the given `OsString`. /// /// The collection may reserve more space to avoid frequent reallocations. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let mut s = OsString::new(); + /// s.reserve(10); + /// assert!(s.capacity() >= 10); + /// ``` #[stable(feature = "osstring_simple_functions", since = "1.9.0")] pub fn reserve(&mut self, additional: usize) { self.inner.reserve(additional) @@ -117,10 +260,59 @@ impl OsString { /// Note that the allocator may give the collection more space than it /// requests. Therefore capacity can not be relied upon to be precisely /// minimal. Prefer reserve if future insertions are expected. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let mut s = OsString::new(); + /// s.reserve_exact(10); + /// assert!(s.capacity() >= 10); + /// ``` #[stable(feature = "osstring_simple_functions", since = "1.9.0")] pub fn reserve_exact(&mut self, additional: usize) { self.inner.reserve_exact(additional) } + + /// Shrinks the capacity of the `OsString` to match its length. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let mut s = OsString::from("foo"); + /// + /// s.reserve(100); + /// assert!(s.capacity() >= 100); + /// + /// s.shrink_to_fit(); + /// assert_eq!(3, s.capacity()); + /// ``` + #[stable(feature = "osstring_shrink_to_fit", since = "1.19.0")] + pub fn shrink_to_fit(&mut self) { + self.inner.shrink_to_fit() + } + + /// Converts this `OsString` into a boxed [`OsStr`]. + /// + /// [`OsStr`]: struct.OsStr.html + /// + /// # Examples + /// + /// ``` + /// use std::ffi::{OsString, OsStr}; + /// + /// let s = OsString::from("hello"); + /// + /// let b: Box<OsStr> = s.into_boxed_os_str(); + /// ``` + #[stable(feature = "into_boxed_os_str", since = "1.20.0")] + pub fn into_boxed_os_str(self) -> Box<OsStr> { + let rw = Box::into_raw(self.inner.into_box()) as *mut OsStr; + unsafe { Box::from_raw(rw) } + } } #[stable(feature = "rust1", since = "1.0.0")] @@ -167,8 +359,8 @@ impl Default for OsString { } #[stable(feature = "rust1", since = "1.0.0")] -impl Debug for OsString { - fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> { +impl fmt::Debug for OsString { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { fmt::Debug::fmt(&**self, formatter) } } @@ -253,26 +445,66 @@ impl OsStr { } fn from_inner(inner: &Slice) -> &OsStr { - unsafe { mem::transmute(inner) } + unsafe { &*(inner as *const Slice as *const OsStr) } } - /// Yields a `&str` slice if the `OsStr` is valid Unicode. + /// Yields a [`&str`] slice if the `OsStr` is valid Unicode. /// /// This conversion may entail doing a check for UTF-8 validity. + /// + /// [`&str`]: ../../std/primitive.str.html + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsStr; + /// + /// let os_str = OsStr::new("foo"); + /// assert_eq!(os_str.to_str(), Some("foo")); + /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn to_str(&self) -> Option<&str> { self.inner.to_str() } - /// Converts an `OsStr` to a `Cow<str>`. + /// Converts an `OsStr` to a [`Cow`]`<`[`str`]`>`. /// /// Any non-Unicode sequences are replaced with U+FFFD REPLACEMENT CHARACTER. + /// + /// [`Cow`]: ../../std/borrow/enum.Cow.html + /// [`str`]: ../../std/primitive.str.html + /// + /// # Examples + /// + /// Calling `to_string_lossy` on an `OsStr` with valid unicode: + /// + /// ``` + /// use std::ffi::OsStr; + /// + /// let os_str = OsStr::new("foo"); + /// assert_eq!(os_str.to_string_lossy(), "foo"); + /// ``` + /// + /// Had `os_str` contained invalid unicode, the `to_string_lossy` call might + /// have returned `"fo�"`. #[stable(feature = "rust1", since = "1.0.0")] pub fn to_string_lossy(&self) -> Cow<str> { self.inner.to_string_lossy() } - /// Copies the slice into an owned `OsString`. + /// Copies the slice into an owned [`OsString`]. + /// + /// [`OsString`]: struct.OsString.html + /// + /// # Examples + /// + /// ``` + /// use std::ffi::{OsStr, OsString}; + /// + /// let os_str = OsStr::new("foo"); + /// let os_string = os_str.to_os_string(); + /// assert_eq!(os_string, OsString::from("foo")); + /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn to_os_string(&self) -> OsString { OsString { inner: self.inner.to_owned() } @@ -299,12 +531,15 @@ impl OsStr { /// Returns the length of this `OsStr`. /// /// Note that this does **not** return the number of bytes in this string - /// as, for example, OS strings on Windows are encoded as a list of `u16` + /// as, for example, OS strings on Windows are encoded as a list of [`u16`] /// rather than a list of bytes. This number is simply useful for passing to - /// other methods like `OsString::with_capacity` to avoid reallocations. + /// other methods like [`OsString::with_capacity`] to avoid reallocations. /// /// See `OsStr` introduction for more information about encoding. /// + /// [`u16`]: ../primitive.u16.html + /// [`OsString::with_capacity`]: struct.OsString.html#method.with_capacity + /// /// # Examples /// /// ``` @@ -321,12 +556,88 @@ impl OsStr { self.inner.inner.len() } + /// Converts a [`Box`]`<OsStr>` into an [`OsString`] without copying or allocating. + /// + /// [`Box`]: ../boxed/struct.Box.html + /// [`OsString`]: struct.OsString.html + #[stable(feature = "into_boxed_os_str", since = "1.20.0")] + pub fn into_os_string(self: Box<OsStr>) -> OsString { + let boxed = unsafe { Box::from_raw(Box::into_raw(self) as *mut Slice) }; + OsString { inner: Buf::from_box(boxed) } + } + /// Gets the underlying byte representation. /// /// Note: it is *crucial* that this API is private, to avoid /// revealing the internal, platform-specific encodings. fn bytes(&self) -> &[u8] { - unsafe { mem::transmute(&self.inner) } + unsafe { &*(&self.inner as *const _ as *const [u8]) } + } +} + +#[stable(feature = "box_from_os_str", since = "1.17.0")] +impl<'a> From<&'a OsStr> for Box<OsStr> { + fn from(s: &'a OsStr) -> Box<OsStr> { + let rw = Box::into_raw(s.inner.into_box()) as *mut OsStr; + unsafe { Box::from_raw(rw) } + } +} + +#[stable(feature = "os_string_from_box", since = "1.18.0")] +impl From<Box<OsStr>> for OsString { + fn from(boxed: Box<OsStr>) -> OsString { + boxed.into_os_string() + } +} + +#[stable(feature = "box_from_os_string", since = "1.20.0")] +impl From<OsString> for Box<OsStr> { + fn from(s: OsString) -> Box<OsStr> { + s.into_boxed_os_str() + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl From<OsString> for Arc<OsStr> { + #[inline] + fn from(s: OsString) -> Arc<OsStr> { + let arc = s.inner.into_arc(); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const OsStr) } + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl<'a> From<&'a OsStr> for Arc<OsStr> { + #[inline] + fn from(s: &OsStr) -> Arc<OsStr> { + let arc = s.inner.into_arc(); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const OsStr) } + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl From<OsString> for Rc<OsStr> { + #[inline] + fn from(s: OsString) -> Rc<OsStr> { + let rc = s.inner.into_rc(); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const OsStr) } + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl<'a> From<&'a OsStr> for Rc<OsStr> { + #[inline] + fn from(s: &OsStr) -> Rc<OsStr> { + let rc = s.inner.into_rc(); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const OsStr) } + } +} + +#[stable(feature = "box_default_extra", since = "1.17.0")] +impl Default for Box<OsStr> { + fn default() -> Box<OsStr> { + let rw = Box::into_raw(Slice::empty_box()) as *mut OsStr; + unsafe { Box::from_raw(rw) } } } @@ -443,9 +754,15 @@ impl Hash for OsStr { } #[stable(feature = "rust1", since = "1.0.0")] -impl Debug for OsStr { - fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> { - self.inner.fmt(formatter) +impl fmt::Debug for OsStr { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(&self.inner, formatter) + } +} + +impl OsStr { + pub(crate) fn display(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.inner, formatter) } } @@ -457,7 +774,13 @@ impl Borrow<OsStr> for OsString { #[stable(feature = "rust1", since = "1.0.0")] impl ToOwned for OsStr { type Owned = OsString; - fn to_owned(&self) -> OsString { self.to_os_string() } + fn to_owned(&self) -> OsString { + self.to_os_string() + } + fn clone_into(&self, target: &mut OsString) { + target.clear(); + target.push(self); + } } #[stable(feature = "rust1", since = "1.0.0")] @@ -511,6 +834,9 @@ mod tests { use super::*; use sys_common::{AsInner, IntoInner}; + use rc::Rc; + use sync::Arc; + #[test] fn test_os_string_with_capacity() { let os_string = OsString::with_capacity(0); @@ -626,4 +952,48 @@ mod tests { let os_str: &OsStr = Default::default(); assert_eq!("", os_str); } + + #[test] + fn into_boxed() { + let orig = "Hello, world!"; + let os_str = OsStr::new(orig); + let boxed: Box<OsStr> = Box::from(os_str); + let os_string = os_str.to_owned().into_boxed_os_str().into_os_string(); + assert_eq!(os_str, &*boxed); + assert_eq!(&*boxed, &*os_string); + assert_eq!(&*os_string, os_str); + } + + #[test] + fn boxed_default() { + let boxed = <Box<OsStr>>::default(); + assert!(boxed.is_empty()); + } + + #[test] + fn test_os_str_clone_into() { + let mut os_string = OsString::with_capacity(123); + os_string.push("hello"); + let os_str = OsStr::new("bonjour"); + os_str.clone_into(&mut os_string); + assert_eq!(os_str, os_string); + assert!(os_string.capacity() >= 123); + } + + #[test] + fn into_rc() { + let orig = "Hello, world!"; + let os_str = OsStr::new(orig); + let rc: Rc<OsStr> = Rc::from(os_str); + let arc: Arc<OsStr> = Arc::from(os_str); + + assert_eq!(&*rc, os_str); + assert_eq!(&*arc, os_str); + + let rc2: Rc<OsStr> = Rc::from(os_str.to_owned()); + let arc2: Arc<OsStr> = Arc::from(os_str.to_owned()); + + assert_eq!(&*rc2, os_str); + assert_eq!(&*arc2, os_str); + } } |