//! Windows-specific extensions to primitives in the [`std::ffi`] module. //! //! # Overview //! //! For historical reasons, the Windows API uses a form of potentially //! ill-formed UTF-16 encoding for strings. Specifically, the 16-bit //! code units in Windows strings may contain [isolated surrogate code //! points which are not paired together][ill-formed-utf-16]. The //! Unicode standard requires that surrogate code points (those in the //! range U+D800 to U+DFFF) always be *paired*, because in the UTF-16 //! encoding a *surrogate code unit pair* is used to encode a single //! character. For compatibility with code that does not enforce //! these pairings, Windows does not enforce them, either. //! //! While it is not always possible to convert such a string losslessly into //! a valid UTF-16 string (or even UTF-8), it is often desirable to be //! able to round-trip such a string from and to Windows APIs //! losslessly. For example, some Rust code may be "bridging" some //! Windows APIs together, just passing `WCHAR` strings among those //! APIs without ever really looking into the strings. //! //! If Rust code *does* need to look into those strings, it can //! convert them to valid UTF-8, possibly lossily, by substituting //! invalid sequences with [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD], as is //! conventionally done in other Rust APIs that deal with string //! encodings. //! //! # `OsStringExt` and `OsStrExt` //! //! [`OsString`] is the Rust wrapper for owned strings in the //! preferred representation of the operating system. On Windows, //! this struct gets augmented with an implementation of the //! [`OsStringExt`] trait, which has an [`OsStringExt::from_wide`] method. This //! lets you create an [`OsString`] from a `&[u16]` slice; presumably //! you get such a slice out of a `WCHAR` Windows API. //! //! Similarly, [`OsStr`] is the Rust wrapper for borrowed strings from //! preferred representation of the operating system. On Windows, the //! [`OsStrExt`] trait provides the [`OsStrExt::encode_wide`] method, which //! outputs an [`EncodeWide`] iterator. You can [`collect`] this //! iterator, for example, to obtain a `Vec`; you can later get a //! pointer to this vector's contents and feed it to Windows APIs. //! //! These traits, along with [`OsString`] and [`OsStr`], work in //! conjunction so that it is possible to **round-trip** strings from //! Windows and back, with no loss of data, even if the strings are //! ill-formed UTF-16. //! //! [ill-formed-utf-16]: https://simonsapin.github.io/wtf-8/#ill-formed-utf-16 //! [`collect`]: crate::iter::Iterator::collect //! [U+FFFD]: crate::char::REPLACEMENT_CHARACTER //! [`std::ffi`]: crate::ffi #![stable(feature = "rust1", since = "1.0.0")] use crate::ffi::{OsStr, OsString}; use crate::sealed::Sealed; use crate::sys::os_str::Buf; use crate::sys_common::wtf8::Wtf8Buf; use crate::sys_common::{AsInner, FromInner}; #[stable(feature = "rust1", since = "1.0.0")] pub use crate::sys_common::wtf8::EncodeWide; /// Windows-specific extensions to [`OsString`]. /// /// This trait is sealed: it cannot be implemented outside the standard library. /// This is so that future additional methods are not breaking changes. #[stable(feature = "rust1", since = "1.0.0")] pub trait OsStringExt: Sealed { /// Creates an `OsString` from a potentially ill-formed UTF-16 slice of /// 16-bit code units. /// /// This is lossless: calling [`OsStrExt::encode_wide`] on the resulting string /// will always return the original code units. /// /// # Examples /// /// ``` /// use std::ffi::OsString; /// use std::os::windows::prelude::*; /// /// // UTF-16 encoding for "Unicode". /// let source = [0x0055, 0x006E, 0x0069, 0x0063, 0x006F, 0x0064, 0x0065]; /// /// let string = OsString::from_wide(&source[..]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] fn from_wide(wide: &[u16]) -> Self; } #[stable(feature = "rust1", since = "1.0.0")] impl OsStringExt for OsString { fn from_wide(wide: &[u16]) -> OsString { FromInner::from_inner(Buf { inner: Wtf8Buf::from_wide(wide) }) } } /// Windows-specific extensions to [`OsStr`]. /// /// This trait is sealed: it cannot be implemented outside the standard library. /// This is so that future additional methods are not breaking changes. #[stable(feature = "rust1", since = "1.0.0")] pub trait OsStrExt: Sealed { /// Re-encodes an `OsStr` as a wide character sequence, i.e., potentially /// ill-formed UTF-16. /// /// This is lossless: calling [`OsStringExt::from_wide`] and then /// `encode_wide` on the result will yield the original code units. /// Note that the encoding does not add a final null terminator. /// /// # Examples /// /// ``` /// use std::ffi::OsString; /// use std::os::windows::prelude::*; /// /// // UTF-16 encoding for "Unicode". /// let source = [0x0055, 0x006E, 0x0069, 0x0063, 0x006F, 0x0064, 0x0065]; /// /// let string = OsString::from_wide(&source[..]); /// /// let result: Vec = string.encode_wide().collect(); /// assert_eq!(&source[..], &result[..]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] fn encode_wide(&self) -> EncodeWide<'_>; } #[stable(feature = "rust1", since = "1.0.0")] impl OsStrExt for OsStr { #[inline] fn encode_wide(&self) -> EncodeWide<'_> { self.as_inner().inner.encode_wide() } }