From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/unicode-script/.cargo-checksum.json | 1 + vendor/unicode-script/Cargo.toml | 42 + vendor/unicode-script/README.md | 7 + vendor/unicode-script/scripts/unicode.py | 402 ++++ vendor/unicode-script/src/lib.rs | 560 +++++ vendor/unicode-script/src/tables.rs | 3111 ++++++++++++++++++++++++++++ 6 files changed, 4123 insertions(+) create mode 100644 vendor/unicode-script/.cargo-checksum.json create mode 100644 vendor/unicode-script/Cargo.toml create mode 100644 vendor/unicode-script/README.md create mode 100644 vendor/unicode-script/scripts/unicode.py create mode 100644 vendor/unicode-script/src/lib.rs create mode 100644 vendor/unicode-script/src/tables.rs (limited to 'vendor/unicode-script') diff --git a/vendor/unicode-script/.cargo-checksum.json b/vendor/unicode-script/.cargo-checksum.json new file mode 100644 index 000000000..44c6c80d2 --- /dev/null +++ b/vendor/unicode-script/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"3b6e9ba98b2f20b8b8a13f2e961a78691a064dcd1ae0d5bcfaf26216b2bb9c68","README.md":"14f5fffdc485176a2ab2e04555231042d0f7d818dbf1a0749a1ecbd8a0d4d500","scripts/unicode.py":"53803e407327679983929fbbdaf874c44c21b6f775eb48690dd52528dd3f4a51","src/lib.rs":"1e67da407be73a423a1de030f1397864aa454925a8582e027e9a9246529bf0b6","src/tables.rs":"6303916c60cee9abfa380b345536177415f042f36df3479d35c17b3587e65479"},"package":"098ec66172ce21cd55f8bcc786ee209dd20e04eff70acfca30cb79924d173ae9"} \ No newline at end of file diff --git a/vendor/unicode-script/Cargo.toml b/vendor/unicode-script/Cargo.toml new file mode 100644 index 000000000..6dfc01148 --- /dev/null +++ b/vendor/unicode-script/Cargo.toml @@ -0,0 +1,42 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +edition = "2018" +name = "unicode-script" +version = "0.5.3" +authors = ["Manish Goregaokar "] +exclude = ["target/*", "Cargo.lock", "scripts/tmp", "*.txt"] +description = "This crate exposes the Unicode `Script` and `Script_Extension` properties from [UAX #24](http://www.unicode.org/reports/tr24/)\n" +homepage = "https://github.com/unicode-rs/unicode-script" +documentation = "https://docs.rs/unicode-script" +readme = "README.md" +keywords = ["text", "unicode", "script", "language"] +license = "MIT/Apache-2.0" +repository = "https://github.com/unicode-rs/unicode-script" +[dependencies.compiler_builtins] +version = "0.1" +optional = true + +[dependencies.core] +version = "1.0" +optional = true +package = "rustc-std-workspace-core" + +[dependencies.std] +version = "1.0" +optional = true +package = "rustc-std-workspace-std" + +[features] +bench = [] +rustc-dep-of-std = ["std", "core", "compiler_builtins"] diff --git a/vendor/unicode-script/README.md b/vendor/unicode-script/README.md new file mode 100644 index 000000000..85fd60b11 --- /dev/null +++ b/vendor/unicode-script/README.md @@ -0,0 +1,7 @@ +# unicode-script + +[![Build Status](https://github.com/unicode-rs/unicode-script/workflows/Tests/badge.svg)](https://github.com/unicode-rs/unicode-script/actions) +[![Current Version](https://meritbadge.herokuapp.com/unicode-script)](https://crates.io/crates/unicode-script) +[![License: MIT/Apache-2.0](https://img.shields.io/crates/l/unicode-script.svg)](#license) + +This crate exposes the Unicode `Script` and `Script_Extension` properties from [UAX #24](http://www.unicode.org/reports/tr24/) \ No newline at end of file diff --git a/vendor/unicode-script/scripts/unicode.py b/vendor/unicode-script/scripts/unicode.py new file mode 100644 index 000000000..e40a92c6d --- /dev/null +++ b/vendor/unicode-script/scripts/unicode.py @@ -0,0 +1,402 @@ +#!/usr/bin/env python +# +# Copyright 2011-2015 The Rust Project Developers. See the COPYRIGHT +# file at the top-level directory of this distribution and at +# http://rust-lang.org/COPYRIGHT. +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +# This script uses the following Unicode tables: +# - PropertyValueAliases.txt +# - ScriptExtensions.txt +# - Scripts.txt +# +# Since this should not require frequent updates, we just store this +# out-of-line and check the unicode.rs file into git. + +import fileinput, re, os, sys + +preamble = '''// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly + +#![allow(missing_docs, non_upper_case_globals, non_snake_case)] + +pub use tables_impl::*; + +#[rustfmt::skip] +mod tables_impl { +use crate::ScriptExtension; +''' + +# Close `mod impl {` +ending=''' +} +''' + +UNICODE_VERSION = (13, 0, 0) + +UNICODE_VERSION_NUMBER = "%s.%s.%s" %UNICODE_VERSION + +def escape_char(c): + return "'\\u{%x}'" % c + +def fetch(f): + if not os.path.exists(os.path.basename(f)): + if "emoji" in f: + os.system("curl -O https://www.unicode.org/Public/emoji/%s.%s/%s" + % (UNICODE_VERSION[0], UNICODE_VERSION[1], f)) + else: + os.system("curl -O http://www.unicode.org/Public/%s/ucd/%s" + % (UNICODE_VERSION_NUMBER, f)) + + if not os.path.exists(os.path.basename(f)): + sys.stderr.write("cannot load %s" % f) + exit(1) + +def group_cats(cats): + cats_out = {} + for cat in cats: + cats_out[cat] = group_cat(cats[cat]) + return cats_out + +def aliases(): + """ + Fetch the shorthand aliases for each longhand Script name + """ + fetch("PropertyValueAliases.txt") + longforms = {} + shortforms = {} + re1 = re.compile(r"^ *sc *; *(\w+) *; *(\w+)") + for line in fileinput.input(os.path.basename("PropertyValueAliases.txt")): + m = re1.match(line) + if m: + l = m.group(2).strip() + s = m.group(1).strip() + assert(s not in longforms) + assert(l not in shortforms) + longforms[s] = l + shortforms[l] = s + else: + continue + + return (longforms, shortforms) + +def format_table_content(f, content, indent): + line = " "*indent + first = True + for chunk in content.split(","): + if len(line) + len(chunk) < 98: + if first: + line += chunk + else: + line += ", " + chunk + first = False + else: + f.write(line + ",\n") + line = " "*indent + chunk + f.write(line) + +# Implementation from unicode-segmentation +def load_properties(f, interestingprops): + fetch(f) + props = {} + # Note: these regexes are different from those in unicode-segmentation, + # becase we need to handle spaces here + re1 = re.compile(r"^ *([0-9A-F]+) *; *([^#]+) *#") + re2 = re.compile(r"^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *([^#]+) *#") + + for line in fileinput.input(os.path.basename(f)): + prop = None + d_lo = 0 + d_hi = 0 + m = re1.match(line) + if m: + d_lo = m.group(1) + d_hi = m.group(1) + prop = m.group(2).strip() + else: + m = re2.match(line) + if m: + d_lo = m.group(1) + d_hi = m.group(2) + prop = m.group(3).strip() + else: + continue + if interestingprops and prop not in interestingprops: + continue + d_lo = int(d_lo, 16) + d_hi = int(d_hi, 16) + if prop not in props: + props[prop] = [] + props[prop].append((d_lo, d_hi)) + + return props + +# Implementation from unicode-segmentation +def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True, + pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1])), is_const=True): + pub_string = "const" + if not is_const: + pub_string = "let" + if is_pub: + pub_string = "pub " + pub_string + f.write(" %s %s: %s = &[\n" % (pub_string, name, t_type)) + data = "" + first = True + for dat in t_data: + if not first: + data += "," + first = False + data += pfun(dat) + format_table_content(f, data, 8) + f.write("\n ];\n\n") + +def emit_search(f): + f.write(""" +pub fn bsearch_range_value_table(c: char, r: &'static [(char, char, T)]) -> Option { + use core::cmp::Ordering::{Equal, Less, Greater}; + match r.binary_search_by(|&(lo, hi, _)| { + if lo <= c && c <= hi { Equal } + else if hi < c { Less } + else { Greater } + }) { + Ok(idx) => { + let (_, _, cat) = r[idx]; + Some(cat) + } + Err(_) => None + } +} + +#[inline] +pub fn get_script(c: char) -> Option