summaryrefslogtreecommitdiffstats
path: root/vendor/bstr/scripts/regex/grapheme.sh
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/bstr/scripts/regex/grapheme.sh')
-rw-r--r--vendor/bstr/scripts/regex/grapheme.sh50
1 files changed, 50 insertions, 0 deletions
diff --git a/vendor/bstr/scripts/regex/grapheme.sh b/vendor/bstr/scripts/regex/grapheme.sh
new file mode 100644
index 000000000..0b2b54daa
--- /dev/null
+++ b/vendor/bstr/scripts/regex/grapheme.sh
@@ -0,0 +1,50 @@
+#!/bin/sh
+
+# vim: indentexpr= nosmartindent autoindent
+# vim: tabstop=2 shiftwidth=2 softtabstop=2
+
+# This regex was manually written, derived from the rules in UAX #29.
+# Particularly, from Table 1c, which lays out a regex for grapheme clusters.
+
+CR="\p{gcb=CR}"
+LF="\p{gcb=LF}"
+Control="\p{gcb=Control}"
+Prepend="\p{gcb=Prepend}"
+L="\p{gcb=L}"
+V="\p{gcb=V}"
+LV="\p{gcb=LV}"
+LVT="\p{gcb=LVT}"
+T="\p{gcb=T}"
+RI="\p{gcb=RI}"
+Extend="\p{gcb=Extend}"
+ZWJ="\p{gcb=ZWJ}"
+SpacingMark="\p{gcb=SpacingMark}"
+
+Any="\p{any}"
+ExtendPict="\p{Extended_Pictographic}"
+
+echo "(?x)
+$CR $LF
+|
+$Control
+|
+$Prepend*
+(
+ (
+ ($L* ($V+ | $LV $V* | $LVT) $T*)
+ |
+ $L+
+ |
+ $T+
+ )
+ |
+ $RI $RI
+ |
+ $ExtendPict ($Extend* $ZWJ $ExtendPict)*
+ |
+ [^$Control $CR $LF]
+)
+[$Extend $ZWJ $SpacingMark]*
+|
+$Any
+"