diff options
Diffstat (limited to 'modules/charset/ambiguous/generate.go')
-rw-r--r-- | modules/charset/ambiguous/generate.go | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/modules/charset/ambiguous/generate.go b/modules/charset/ambiguous/generate.go new file mode 100644 index 00000000..e3fda5be --- /dev/null +++ b/modules/charset/ambiguous/generate.go @@ -0,0 +1,188 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package main + +import ( + "bytes" + "flag" + "fmt" + "go/format" + "os" + "sort" + "text/template" + "unicode" + + "code.gitea.io/gitea/modules/json" + + "golang.org/x/text/unicode/rangetable" +) + +// ambiguous.json provides a one to one mapping of ambiguous characters to other characters +// See https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json + +type AmbiguousTable struct { + Confusable []rune + With []rune + Locale string + RangeTable *unicode.RangeTable +} + +type RunePair struct { + Confusable rune + With rune +} + +var verbose bool + +func main() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, `%s: Generate AmbiguousCharacter + +Usage: %[1]s [-v] [-o output.go] ambiguous.json +`, os.Args[0]) + flag.PrintDefaults() + } + + output := "" + flag.BoolVar(&verbose, "v", false, "verbose output") + flag.StringVar(&output, "o", "ambiguous_gen.go", "file to output to") + flag.Parse() + input := flag.Arg(0) + if input == "" { + input = "ambiguous.json" + } + + bs, err := os.ReadFile(input) + if err != nil { + fatalf("Unable to read: %s Err: %v", input, err) + } + + var unwrapped string + if err := json.Unmarshal(bs, &unwrapped); err != nil { + fatalf("Unable to unwrap content in: %s Err: %v", input, err) + } + + fromJSON := map[string][]uint32{} + if err := json.Unmarshal([]byte(unwrapped), &fromJSON); err != nil { + fatalf("Unable to unmarshal content in: %s Err: %v", input, err) + } + + tables := make([]*AmbiguousTable, 0, len(fromJSON)) + for locale, chars := range fromJSON { + table := &AmbiguousTable{Locale: locale} + table.Confusable = make([]rune, 0, len(chars)/2) + table.With = make([]rune, 0, len(chars)/2) + pairs := make([]RunePair, len(chars)/2) + for i := 0; i < len(chars); i += 2 { + pairs[i/2].Confusable, pairs[i/2].With = rune(chars[i]), rune(chars[i+1]) + } + sort.Slice(pairs, func(i, j int) bool { + return pairs[i].Confusable < pairs[j].Confusable + }) + for _, pair := range pairs { + table.Confusable = append(table.Confusable, pair.Confusable) + table.With = append(table.With, pair.With) + } + table.RangeTable = rangetable.New(table.Confusable...) + tables = append(tables, table) + } + sort.Slice(tables, func(i, j int) bool { + return tables[i].Locale < tables[j].Locale + }) + data := map[string]any{ + "Tables": tables, + } + + if err := runTemplate(generatorTemplate, output, &data); err != nil { + fatalf("Unable to run template: %v", err) + } +} + +func runTemplate(t *template.Template, filename string, data any) error { + buf := bytes.NewBuffer(nil) + if err := t.Execute(buf, data); err != nil { + return fmt.Errorf("unable to execute template: %w", err) + } + bs, err := format.Source(buf.Bytes()) + if err != nil { + verbosef("Bad source:\n%s", buf.String()) + return fmt.Errorf("unable to format source: %w", err) + } + + old, err := os.ReadFile(filename) + if err != nil && !os.IsNotExist(err) { + return fmt.Errorf("failed to read old file %s because %w", filename, err) + } else if err == nil { + if bytes.Equal(bs, old) { + // files are the same don't rewrite it. + return nil + } + } + + file, err := os.Create(filename) + if err != nil { + return fmt.Errorf("failed to create file %s because %w", filename, err) + } + defer file.Close() + _, err = file.Write(bs) + if err != nil { + return fmt.Errorf("unable to write generated source: %w", err) + } + return nil +} + +var generatorTemplate = template.Must(template.New("ambiguousTemplate").Parse(`// This file is generated by modules/charset/ambiguous/generate.go DO NOT EDIT +// Copyright 2022 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + + +package charset + +import "unicode" + +// This file is generated from https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json + +// AmbiguousTable matches a confusable rune with its partner for the Locale +type AmbiguousTable struct { + Confusable []rune + With []rune + Locale string + RangeTable *unicode.RangeTable +} + +// AmbiguousCharacters provides a map by locale name to the confusable characters in that locale +var AmbiguousCharacters = map[string]*AmbiguousTable{ + {{range .Tables}}{{printf "%q:" .Locale}} { + Confusable: []rune{ {{range .Confusable}}{{.}},{{end}} }, + With: []rune{ {{range .With}}{{.}},{{end}} }, + Locale: {{printf "%q" .Locale}}, + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {{range .RangeTable.R16 }} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}}, + {{end}} }, + R32: []unicode.Range32{ + {{range .RangeTable.R32}} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}}, + {{end}} }, + LatinOffset: {{.RangeTable.LatinOffset}}, + }, + }, + {{end}} +} + +`)) + +func logf(format string, args ...any) { + fmt.Fprintf(os.Stderr, format+"\n", args...) +} + +func verbosef(format string, args ...any) { + if verbose { + logf(format, args...) + } +} + +func fatalf(format string, args ...any) { + logf("fatal: "+format+"\n", args...) + os.Exit(1) +} |