summaryrefslogtreecommitdiffstats
path: root/vendor/jfcherng/php-mb-string/src/MbString.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/jfcherng/php-mb-string/src/MbString.php')
-rw-r--r--vendor/jfcherng/php-mb-string/src/MbString.php367
1 files changed, 367 insertions, 0 deletions
diff --git a/vendor/jfcherng/php-mb-string/src/MbString.php b/vendor/jfcherng/php-mb-string/src/MbString.php
new file mode 100644
index 0000000..afe7fdb
--- /dev/null
+++ b/vendor/jfcherng/php-mb-string/src/MbString.php
@@ -0,0 +1,367 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Jfcherng\Utility;
+
+/**
+ * An internal UTF-32 multi-bytes string class.
+ *
+ * Because UTF-8 is varied-width, mb_*() is kinda O(n) when doing decoding.
+ * Using iconv() to make it UTF-32 and work with str*() can be possibly faster.
+ *
+ * UTF-32 is a fix-width encoding (1 char = 4 bytes).
+ * Note that the first 4 bytes in a UTF-32 string is the header (endian bytes).
+ *
+ * @author Jack Cherng <jfcherng@gmail.com>
+ */
+class MbString extends \ArrayObject
+{
+ /**
+ * UTF-32 string without endian bytes.
+ *
+ * @var string
+ */
+ protected $str;
+
+ /**
+ * The original encoding.
+ *
+ * @var string
+ */
+ protected $encoding;
+
+ /**
+ * The endian bytes for UTF-32.
+ *
+ * @var string
+ */
+ protected static $utf32Header;
+
+ /**
+ * The constructor.
+ *
+ * @param string $str the string
+ * @param string $encoding the encoding
+ */
+ public function __construct(string $str = '', string $encoding = 'UTF-8')
+ {
+ static::$utf32Header = static::$utf32Header ?? static::getUtf32Header();
+
+ $this->encoding = $encoding;
+ $this->set($str);
+ }
+
+ /**
+ * Returns a string representation of the object.
+ *
+ * @return string string representation of the object
+ */
+ public function __toString(): string
+ {
+ return $this->get();
+ }
+
+ /**
+ * The string setter.
+ *
+ * @param string $str the string
+ */
+ public function set(string $str): self
+ {
+ $this->str = $this->inputConv($str);
+
+ return $this;
+ }
+
+ public function setAt(int $idx, string $char): self
+ {
+ $char = $this->inputConv($char);
+ if (\strlen($char) > 4) {
+ $char = \substr($char, 0, 4);
+ }
+
+ $spacesPrepend = $idx - $this->strlen();
+ // set index (out of bound)
+ if ($spacesPrepend > 0) {
+ $this->str .= $this->inputConv(\str_repeat(' ', $spacesPrepend)) . $char;
+ }
+ // set index (in bound)
+ else {
+ $this->str = \substr_replace($this->str, $char, $idx << 2, 4);
+ }
+
+ return $this;
+ }
+
+ /**
+ * The string getter.
+ */
+ public function get(): string
+ {
+ return $this->outputConv($this->str);
+ }
+
+ /**
+ * The raw string getter.
+ *
+ * @return string the UTF-32-encoded raw string
+ */
+ public function getRaw(): string
+ {
+ return $this->str;
+ }
+
+ public function getAt(int $idx): string
+ {
+ return $this->outputConv(\substr($this->str, $idx << 2, 4));
+ }
+
+ public function getAtRaw(int $idx): string
+ {
+ return \substr($this->str, $idx << 2, 4);
+ }
+
+ public function toArray(): array
+ {
+ return self::strToChars($this->get());
+ }
+
+ public function toArraySplit(string $regex, int $limit = -1, $flags = 0): array
+ {
+ if ($this->str === '') {
+ return [];
+ }
+
+ return \preg_split($regex, $this->get(), $limit, $flags);
+ }
+
+ public function toArrayRaw(): array
+ {
+ if ($this->str === '') {
+ return [];
+ }
+
+ return \str_split($this->str, 4);
+ }
+
+ public static function strToChars(string $str): array
+ {
+ return \preg_match_all('/./suS', $str, $matches) ? $matches[0] : [];
+ }
+
+ ///////////////////////////////////
+ // string manipulation functions //
+ ///////////////////////////////////
+
+ public function stripos(string $needle, int $offset = 0)
+ {
+ $needle = $this->inputConv($needle);
+ $pos = \stripos($this->str, $needle, $offset << 2);
+
+ return \is_bool($pos) ? $pos : $pos >> 2;
+ }
+
+ public function strlen(): int
+ {
+ return \strlen($this->str) >> 2;
+ }
+
+ public function strpos(string $needle, int $offset = 0)
+ {
+ $needle = $this->inputConv($needle);
+ $pos = \strpos($this->str, $needle, $offset << 2);
+
+ return \is_bool($pos) ? $pos : $pos >> 2;
+ }
+
+ public function substr(int $start = 0, ?int $length = null): string
+ {
+ return $this->outputConv(
+ isset($length)
+ ? \substr($this->str, $start << 2, $length << 2)
+ : \substr($this->str, $start << 2)
+ );
+ }
+
+ public function substr_replace(string $replacement, int $start = 0, ?int $length = null): string
+ {
+ $replacement = $this->inputConv($replacement);
+
+ return $this->outputConv(
+ isset($length)
+ ? \substr_replace($this->str, $replacement, $start << 2, $length << 2)
+ : \substr_replace($this->str, $replacement, $start << 2)
+ );
+ }
+
+ public function strtolower(): string
+ {
+ return \strtolower($this->get());
+ }
+
+ public function strtoupper(): string
+ {
+ return \strtoupper($this->get());
+ }
+
+ ////////////////////////////////
+ // non-manipulative functions //
+ ////////////////////////////////
+
+ public function has(string $needle): bool
+ {
+ $needle = $this->inputConv($needle);
+
+ return \strpos($this->str, $needle) !== false;
+ }
+
+ public function startsWith(string $needle): bool
+ {
+ $needle = $this->inputConv($needle);
+
+ return $needle === \substr($this->str, 0, \strlen($needle));
+ }
+
+ public function endsWith(string $needle): bool
+ {
+ $needle = $this->inputConv($needle);
+ $length = \strlen($needle);
+
+ return $length === 0 ? true : $needle === \substr($this->str, -$length);
+ }
+
+ /////////////////////////////////////////////
+ // those functions will not return a value //
+ /////////////////////////////////////////////
+
+ public function str_insert_i(string $insert, int $position): self
+ {
+ $insert = $this->inputConv($insert);
+ $this->str = \substr_replace($this->str, $insert, $position << 2, 0);
+
+ return $this;
+ }
+
+ public function str_enclose_i(array $closures, int $start = 0, ?int $length = null): self
+ {
+ // ex: $closures = array('{', '}');
+ foreach ($closures as &$closure) {
+ $closure = $this->inputConv($closure);
+ }
+ unset($closure);
+
+ if (\count($closures) < 2) {
+ $closures[0] = $closures[1] = \reset($closures);
+ }
+
+ if (isset($length)) {
+ $replacement = $closures[0] . \substr($this->str, $start << 2, $length << 2) . $closures[1];
+ $this->str = \substr_replace($this->str, $replacement, $start << 2, $length << 2);
+ } else {
+ $replacement = $closures[0] . \substr($this->str, $start << 2) . $closures[1];
+ $this->str = \substr_replace($this->str, $replacement, $start << 2);
+ }
+
+ return $this;
+ }
+
+ public function str_replace_i(string $search, string $replace): self
+ {
+ $search = $this->inputConv($search);
+ $replace = $this->inputConv($replace);
+ $this->str = \str_replace($search, $replace, $this->str);
+
+ return $this;
+ }
+
+ public function substr_replace_i(string $replacement, int $start = 0, ?int $length = null): self
+ {
+ $replacement = $this->inputConv($replacement);
+ $this->str = (
+ isset($length)
+ ? \substr_replace($this->str, $replacement, $start << 2, $length << 2)
+ : \substr_replace($this->str, $replacement, $start << 2)
+ );
+
+ return $this;
+ }
+
+ /////////////////
+ // ArrayObject //
+ /////////////////
+
+ #[\ReturnTypeWillChange]
+ public function offsetSet($idx, $char): void
+ {
+ $this->setAt($idx, $char);
+ }
+
+ #[\ReturnTypeWillChange]
+ public function offsetGet($idx): string
+ {
+ return $this->getAt($idx);
+ }
+
+ #[\ReturnTypeWillChange]
+ public function offsetExists($idx): bool
+ {
+ return \is_int($idx) ? $this->strlen() > $idx : false;
+ }
+
+ #[\ReturnTypeWillChange]
+ public function append($str): void
+ {
+ $this->str .= $this->inputConv($str);
+ }
+
+ public function count(): int
+ {
+ return $this->strlen();
+ }
+
+ ////////////////////
+ // misc functions //
+ ////////////////////
+
+ /**
+ * Gets the utf 32 header.
+ *
+ * @return string the UTF-32 header or empty string
+ */
+ protected static function getUtf32Header(): string
+ {
+ // just use any string to get the endian header, here we use "A"
+ $tmp = \iconv('UTF-8', 'UTF-32', 'A');
+ // some distributions like "php alpine" docker image won't generate the header
+ return $tmp && \strlen($tmp) > 4 ? \substr($tmp, 0, 4) : '';
+ }
+
+ /**
+ * Convert the output string to its original encoding.
+ *
+ * @param string $str The string
+ */
+ protected function outputConv(string $str): string
+ {
+ if ($str === '') {
+ return '';
+ }
+
+ return \iconv('UTF-32', $this->encoding, static::$utf32Header . $str);
+ }
+
+ /**
+ * Convert the input string to UTF-32 without header.
+ *
+ * @param string $str The string
+ */
+ protected function inputConv(string $str): string
+ {
+ if ($str === '') {
+ return '';
+ }
+
+ return \substr(\iconv($this->encoding, 'UTF-32', $str), \strlen(static::$utf32Header));
+ }
+}