summaryrefslogtreecommitdiffstats
path: root/vendor/markup5ever/build.rs
blob: 38b4fdddc2a19ad66193b9838971f34576053298 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
// Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

extern crate phf_codegen;
extern crate string_cache_codegen;

use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::path::Path;

mod entities;

static NAMESPACES: &[(&str, &str)] = &[
    ("", ""),
    ("*", "*"),
    ("html", "http://www.w3.org/1999/xhtml"),
    ("xml", "http://www.w3.org/XML/1998/namespace"),
    ("xmlns", "http://www.w3.org/2000/xmlns/"),
    ("xlink", "http://www.w3.org/1999/xlink"),
    ("svg", "http://www.w3.org/2000/svg"),
    ("mathml", "http://www.w3.org/1998/Math/MathML"),
];

fn main() {
    let generated = Path::new(&env::var("OUT_DIR").unwrap()).join("generated.rs");
    let mut generated = BufWriter::new(File::create(&generated).unwrap());

    named_entities_to_phf(&Path::new(&env::var("OUT_DIR").unwrap()).join("named_entities.rs"));

    // Create a string cache for local names
    let local_names = Path::new(&env::var("CARGO_MANIFEST_DIR").unwrap()).join("local_names.txt");
    let mut local_names_atom = string_cache_codegen::AtomType::new("LocalName", "local_name!");
    for line in BufReader::new(File::open(&local_names).unwrap()).lines() {
        let local_name = line.unwrap();
        local_names_atom.atom(&local_name);
        local_names_atom.atom(&local_name.to_ascii_lowercase());
    }
    local_names_atom
        .with_macro_doc("Takes a local name as a string and returns its key in the string cache.")
        .write_to(&mut generated)
        .unwrap();

    // Create a string cache for namespace prefixes
    string_cache_codegen::AtomType::new("Prefix", "namespace_prefix!")
        .with_macro_doc("Takes a namespace prefix string and returns its key in a string cache.")
        .atoms(NAMESPACES.iter().map(|&(prefix, _url)| prefix))
        .write_to(&mut generated)
        .unwrap();

    // Create a string cache for namespace urls
    string_cache_codegen::AtomType::new("Namespace", "namespace_url!")
        .with_macro_doc("Takes a namespace url string and returns its key in a string cache.")
        .atoms(NAMESPACES.iter().map(|&(_prefix, url)| url))
        .write_to(&mut generated)
        .unwrap();

    writeln!(
        generated,
        r#"
        /// Maps the input of [`namespace_prefix!`](macro.namespace_prefix.html) to 
        /// the output of [`namespace_url!`](macro.namespace_url.html).
        ///
        #[macro_export] macro_rules! ns {{
        "#
    )
    .unwrap();
    for &(prefix, url) in NAMESPACES {
        writeln!(
            generated,
            "({}) => {{ namespace_url!({:?}) }};",
            prefix, url
        )
        .unwrap();
    }
    writeln!(generated, "}}").unwrap();
}

fn named_entities_to_phf(to: &Path) {
    let mut entities: HashMap<&str, (u32, u32)> = entities::NAMED_ENTITIES
        .iter()
        .map(|(name, cp1, cp2)| {
            assert!(name.starts_with('&'));
            (&name[1..], (*cp1, *cp2))
        })
        .collect();

    // Add every missing prefix of those keys, mapping to NULL characters.
    for key in entities.keys().cloned().collect::<Vec<_>>() {
        for n in 1..key.len() {
            entities.entry(&key[..n]).or_insert((0, 0));
        }
    }
    entities.insert("", (0, 0));

    let mut phf_map = phf_codegen::Map::new();
    for (key, value) in entities {
        phf_map.entry(key, &format!("{:?}", value));
    }

    let mut file = File::create(to).unwrap();
    writeln!(
        &mut file,
        r#"
/// A map of entity names to their codepoints. The second codepoint will
/// be 0 if the entity contains a single codepoint. Entities have their preceeding '&' removed.
///
/// # Examples
///
/// ```
/// use markup5ever::data::NAMED_ENTITIES;
///
/// assert_eq!(NAMED_ENTITIES.get("gt;").unwrap(), &(62, 0));
/// ```
"#
    )
    .unwrap();
    writeln!(
        &mut file,
        "pub static NAMED_ENTITIES: Map<&'static str, (u32, u32)> = {};",
        phf_map.build(),
    )
    .unwrap();
}