summaryrefslogtreecommitdiffstats
path: root/third_party/rust/os_str_bytes/src/windows/mod.rs
blob: ed9e60b050dc336acd1c7baf1bb8966cd9b6e4f3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
// These functions are necessarily inefficient, because they must revert
// encoding conversions performed by the standard library. However, there is
// currently no better alternative.

use std::borrow::Cow;
use std::error::Error;
use std::ffi::OsStr;
use std::ffi::OsString;
use std::fmt;
use std::fmt::Display;
use std::fmt::Formatter;
use std::ops::Not;
use std::os::windows::ffi::OsStrExt;
use std::os::windows::ffi::OsStringExt;
use std::result;
use std::str;

if_raw_str! {
    pub(super) mod raw;
}

mod wtf8;
use wtf8::DecodeWide;

#[cfg(test)]
mod tests;

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub(super) enum EncodingError {
    Byte(u8),
    CodePoint(u32),
    End(),
}

impl EncodingError {
    fn position(&self) -> Cow<'_, str> {
        match self {
            Self::Byte(byte) => Cow::Owned(format!("byte b'\\x{:02X}'", byte)),
            Self::CodePoint(code_point) => {
                Cow::Owned(format!("code point U+{:04X}", code_point))
            }
            Self::End() => Cow::Borrowed("end of string"),
        }
    }
}

impl Display for EncodingError {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "byte sequence is not representable in the platform encoding; \
             error at {}",
            self.position(),
        )
    }
}

impl Error for EncodingError {}

type Result<T> = result::Result<T, EncodingError>;

fn from_bytes(string: &[u8]) -> Result<Option<OsString>> {
    let mut encoder = wtf8::encode_wide(string);

    // Collecting an iterator into a result ignores the size hint:
    // https://github.com/rust-lang/rust/issues/48994
    let mut encoded_string = Vec::with_capacity(encoder.size_hint().0);
    for wchar in &mut encoder {
        encoded_string.push(wchar?);
    }

    debug_assert_eq!(str::from_utf8(string).is_ok(), encoder.is_still_utf8());
    Ok(encoder
        .is_still_utf8()
        .not()
        .then(|| OsStringExt::from_wide(&encoded_string)))
}

fn to_bytes(os_string: &OsStr) -> Vec<u8> {
    let encoder = OsStrExt::encode_wide(os_string);

    let mut string = Vec::with_capacity(encoder.size_hint().0);
    string.extend(DecodeWide::new(encoder));
    string
}

pub(super) fn os_str_from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>> {
    from_bytes(string).map(|os_string| {
        os_string.map(Cow::Owned).unwrap_or_else(|| {
            // SAFETY: This slice was validated to be UTF-8.
            Cow::Borrowed(OsStr::new(unsafe {
                str::from_utf8_unchecked(string)
            }))
        })
    })
}

pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> {
    Cow::Owned(to_bytes(os_string))
}

pub(super) fn os_string_from_vec(string: Vec<u8>) -> Result<OsString> {
    from_bytes(&string).map(|os_string| {
        os_string.unwrap_or_else(|| {
            // SAFETY: This slice was validated to be UTF-8.
            unsafe { String::from_utf8_unchecked(string) }.into()
        })
    })
}

pub(super) fn os_string_into_vec(os_string: OsString) -> Vec<u8> {
    to_bytes(&os_string)
}