diff options
Diffstat (limited to 'vendor/bstr/scripts/generate-unicode-data')
-rwxr-xr-x | vendor/bstr/scripts/generate-unicode-data | 149 |
1 files changed, 0 insertions, 149 deletions
diff --git a/vendor/bstr/scripts/generate-unicode-data b/vendor/bstr/scripts/generate-unicode-data deleted file mode 100755 index b8341c5a6..000000000 --- a/vendor/bstr/scripts/generate-unicode-data +++ /dev/null @@ -1,149 +0,0 @@ -#!/bin/sh - -set -e -D="$(dirname "$0")" - -# Convenience function for checking that a command exists. -requires() { - cmd="$1" - if ! command -v "$cmd" > /dev/null 2>&1; then - echo "DEPENDENCY MISSING: $cmd must be installed" >&2 - exit 1 - fi -} - -# Test if an array ($2) contains a particular element ($1). -array_exists() { - needle="$1" - shift - - for el in "$@"; do - if [ "$el" = "$needle" ]; then - return 0 - fi - done - return 1 -} - -graphemes() { - regex="$(sh "$D/regex/grapheme.sh")" - - echo "generating forward grapheme DFA" - ucd-generate dfa \ - --name GRAPHEME_BREAK_FWD \ - --sparse --minimize --anchored --state-size 2 \ - src/unicode/fsm/ \ - "$regex" - - echo "generating reverse grapheme DFA" - ucd-generate dfa \ - --name GRAPHEME_BREAK_REV \ - --reverse --longest \ - --sparse --minimize --anchored --state-size 2 \ - src/unicode/fsm/ \ - "$regex" -} - -words() { - regex="$(sh "$D/regex/word.sh")" - - echo "generating forward word DFA (this can take a while)" - ucd-generate dfa \ - --name WORD_BREAK_FWD \ - --sparse --minimize --anchored --state-size 4 \ - src/unicode/fsm/ \ - "$regex" -} - -sentences() { - regex="$(sh "$D/regex/sentence.sh")" - - echo "generating forward sentence DFA (this can take a while)" - ucd-generate dfa \ - --name SENTENCE_BREAK_FWD \ - --minimize \ - --sparse --anchored --state-size 4 \ - src/unicode/fsm/ \ - "$regex" -} - -regional_indicator() { - # For finding all occurrences of region indicators. This is used to handle - # regional indicators as a special case for the reverse grapheme iterator - # and the reverse word iterator. - echo "generating regional indicator DFA" - ucd-generate dfa \ - --name REGIONAL_INDICATOR_REV \ - --reverse \ - --classes --minimize --anchored --premultiply --state-size 1 \ - src/unicode/fsm/ \ - "\p{gcb=Regional_Indicator}" -} - -simple_word() { - echo "generating forward simple word DFA" - ucd-generate dfa \ - --name SIMPLE_WORD_FWD \ - --sparse --minimize --state-size 2 \ - src/unicode/fsm/ \ - "\w" -} - -whitespace() { - echo "generating forward whitespace DFA" - ucd-generate dfa \ - --name WHITESPACE_ANCHORED_FWD \ - --anchored --classes --premultiply --minimize --state-size 1 \ - src/unicode/fsm/ \ - "\s+" - - echo "generating reverse whitespace DFA" - ucd-generate dfa \ - --name WHITESPACE_ANCHORED_REV \ - --reverse \ - --anchored --classes --premultiply --minimize --state-size 2 \ - src/unicode/fsm/ \ - "\s+" -} - -main() { - if array_exists "-h" "$@" || array_exists "--help" "$@"; then - echo "Usage: $(basename "$0") [--list-commands] [<command>] ..." >&2 - exit - fi - - commands=" - graphemes - sentences - words - regional-indicator - simple-word - whitespace - " - if array_exists "--list-commands" "$@"; then - for cmd in $commands; do - echo "$cmd" - done - exit - fi - - # ucd-generate is used to compile regexes into DFAs. - requires ucd-generate - - mkdir -p src/unicode/fsm/ - - cmds=$* - if [ $# -eq 0 ] || array_exists "all" "$@"; then - cmds=$commands - fi - for cmd in $cmds; do - if array_exists "$cmd" $commands; then - fun="$(echo "$cmd" | sed 's/-/_/g')" - eval "$fun" - else - echo "unrecognized command: $cmd" >&2 - fi - done -} - -main "$@" |