From 1199780155f666b6806d563a29d093a251664009 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 30 Jan 2021 09:13:47 +0100 Subject: Adding upstream version 2.1.2. Signed-off-by: Daniel Baumann --- pendulum/formatting/formatter.py | 685 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 685 insertions(+) create mode 100644 pendulum/formatting/formatter.py (limited to 'pendulum/formatting/formatter.py') diff --git a/pendulum/formatting/formatter.py b/pendulum/formatting/formatter.py new file mode 100644 index 0000000..4e493d0 --- /dev/null +++ b/pendulum/formatting/formatter.py @@ -0,0 +1,685 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import datetime +import re +import typing + +import pendulum + +from pendulum.locales.locale import Locale +from pendulum.utils._compat import decode + + +_MATCH_1 = r"\d" +_MATCH_2 = r"\d\d" +_MATCH_3 = r"\d{3}" +_MATCH_4 = r"\d{4}" +_MATCH_6 = r"[+-]?\d{6}" +_MATCH_1_TO_2 = r"\d\d?" +_MATCH_1_TO_2_LEFT_PAD = r"[0-9 ]\d?" +_MATCH_1_TO_3 = r"\d{1,3}" +_MATCH_1_TO_4 = r"\d{1,4}" +_MATCH_1_TO_6 = r"[+-]?\d{1,6}" +_MATCH_3_TO_4 = r"\d{3}\d?" +_MATCH_5_TO_6 = r"\d{5}\d?" +_MATCH_UNSIGNED = r"\d+" +_MATCH_SIGNED = r"[+-]?\d+" +_MATCH_OFFSET = r"[Zz]|[+-]\d\d:?\d\d" +_MATCH_SHORT_OFFSET = r"[Zz]|[+-]\d\d(?::?\d\d)?" +_MATCH_TIMESTAMP = r"[+-]?\d+(\.\d{1,6})?" +_MATCH_WORD = ( + "(?i)[0-9]*" + "['a-z\u00A0-\u05FF\u0700-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+" + r"|[\u0600-\u06FF/]+(\s*?[\u0600-\u06FF]+){1,2}" +) +_MATCH_TIMEZONE = "[A-Za-z0-9-+]+(/[A-Za-z0-9-+_]+)?" + + +class Formatter: + + _TOKENS = ( + r"\[([^\[]*)\]|\\(.)|" + "(" + "Mo|MM?M?M?" + "|Do|DDDo|DD?D?D?|ddd?d?|do?" + "|E{1,4}" + "|w[o|w]?|W[o|W]?|Qo?" + "|YYYY|YY|Y" + "|gg(ggg?)?|GG(GGG?)?" + "|a|A" + "|hh?|HH?|kk?" + "|mm?|ss?|S{1,9}" + "|x|X" + "|zz?|ZZ?" + "|LTS|LT|LL?L?L?" + ")" + ) + + _FORMAT_RE = re.compile(_TOKENS) + + _FROM_FORMAT_RE = re.compile(r"(? str + """ + Formats a DateTime instance with a given format and locale. + + :param dt: The instance to format + :type dt: pendulum.DateTime + + :param fmt: The format to use + :type fmt: str + + :param locale: The locale to use + :type locale: str or Locale or None + + :rtype: str + """ + if not locale: + locale = pendulum.get_locale() + + locale = Locale.load(locale) + + result = self._FORMAT_RE.sub( + lambda m: m.group(1) + if m.group(1) + else m.group(2) + if m.group(2) + else self._format_token(dt, m.group(3), locale), + fmt, + ) + + return decode(result) + + def _format_token( + self, dt, token, locale + ): # type: (pendulum.DateTime, str, Locale) -> str + """ + Formats a DateTime instance with a given token and locale. + + :param dt: The instance to format + :type dt: pendulum.DateTime + + :param token: The token to use + :type token: str + + :param locale: The locale to use + :type locale: Locale + + :rtype: str + """ + if token in self._DATE_FORMATS: + fmt = locale.get("custom.date_formats.{}".format(token)) + if fmt is None: + fmt = self._DEFAULT_DATE_FORMATS[token] + + return self.format(dt, fmt, locale) + + if token in self._LOCALIZABLE_TOKENS: + return self._format_localizable_token(dt, token, locale) + + if token in self._TOKENS_RULES: + return self._TOKENS_RULES[token](dt) + + # Timezone + if token in ["ZZ", "Z"]: + if dt.tzinfo is None: + return "" + + separator = ":" if token == "Z" else "" + offset = dt.utcoffset() or datetime.timedelta() + minutes = offset.total_seconds() / 60 + + if minutes >= 0: + sign = "+" + else: + sign = "-" + + hour, minute = divmod(abs(int(minutes)), 60) + + return "{}{:02d}{}{:02d}".format(sign, hour, separator, minute) + + def _format_localizable_token( + self, dt, token, locale + ): # type: (pendulum.DateTime, str, Locale) -> str + """ + Formats a DateTime instance + with a given localizable token and locale. + + :param dt: The instance to format + :type dt: pendulum.DateTime + + :param token: The token to use + :type token: str + + :param locale: The locale to use + :type locale: Locale + + :rtype: str + """ + if token == "MMM": + return locale.get("translations.months.abbreviated")[dt.month] + elif token == "MMMM": + return locale.get("translations.months.wide")[dt.month] + elif token == "dd": + return locale.get("translations.days.short")[dt.day_of_week] + elif token == "ddd": + return locale.get("translations.days.abbreviated")[dt.day_of_week] + elif token == "dddd": + return locale.get("translations.days.wide")[dt.day_of_week] + elif token == "Do": + return locale.ordinalize(dt.day) + elif token == "do": + return locale.ordinalize(dt.day_of_week) + elif token == "Mo": + return locale.ordinalize(dt.month) + elif token == "Qo": + return locale.ordinalize(dt.quarter) + elif token == "wo": + return locale.ordinalize(dt.week_of_year) + elif token == "DDDo": + return locale.ordinalize(dt.day_of_year) + elif token == "A": + key = "translations.day_periods" + if dt.hour >= 12: + key += ".pm" + else: + key += ".am" + + return locale.get(key) + else: + return token + + def parse( + self, + time, # type: str + fmt, # type: str + now, # type: pendulum.DateTime + locale=None, # type: typing.Optional[str] + ): # type: (...) -> typing.Dict[str, typing.Any] + """ + Parses a time string matching a given format as a tuple. + + :param time: The timestring + :param fmt: The format + :param now: The datetime to use as "now" + :param locale: The locale to use + + :return: The parsed elements + """ + escaped_fmt = re.escape(fmt) + + tokens = self._FROM_FORMAT_RE.findall(escaped_fmt) + if not tokens: + return time + + if not locale: + locale = pendulum.get_locale() + + locale = Locale.load(locale) + + parsed = { + "year": None, + "month": None, + "day": None, + "hour": None, + "minute": None, + "second": None, + "microsecond": None, + "tz": None, + "quarter": None, + "day_of_week": None, + "day_of_year": None, + "meridiem": None, + "timestamp": None, + } + + pattern = self._FROM_FORMAT_RE.sub( + lambda m: self._replace_tokens(m.group(0), locale), escaped_fmt + ) + + if not re.search("^" + pattern + "$", time): + raise ValueError("String does not match format {}".format(fmt)) + + re.sub(pattern, lambda m: self._get_parsed_values(m, parsed, locale, now), time) + + return self._check_parsed(parsed, now) + + def _check_parsed( + self, parsed, now + ): # type: (typing.Dict[str, typing.Any], pendulum.DateTime) -> typing.Dict[str, typing.Any] + """ + Checks validity of parsed elements. + + :param parsed: The elements to parse. + + :return: The validated elements. + """ + validated = { + "year": parsed["year"], + "month": parsed["month"], + "day": parsed["day"], + "hour": parsed["hour"], + "minute": parsed["minute"], + "second": parsed["second"], + "microsecond": parsed["microsecond"], + "tz": None, + } + + # If timestamp has been specified + # we use it and don't go any further + if parsed["timestamp"] is not None: + str_us = str(parsed["timestamp"]) + if "." in str_us: + microseconds = int("{}".format(str_us.split(".")[1].ljust(6, "0"))) + else: + microseconds = 0 + + from pendulum.helpers import local_time + + time = local_time(parsed["timestamp"], 0, microseconds) + validated["year"] = time[0] + validated["month"] = time[1] + validated["day"] = time[2] + validated["hour"] = time[3] + validated["minute"] = time[4] + validated["second"] = time[5] + validated["microsecond"] = time[6] + + return validated + + if parsed["quarter"] is not None: + if validated["year"] is not None: + dt = pendulum.datetime(validated["year"], 1, 1) + else: + dt = now + + dt = dt.start_of("year") + + while dt.quarter != parsed["quarter"]: + dt = dt.add(months=3) + + validated["year"] = dt.year + validated["month"] = dt.month + validated["day"] = dt.day + + if validated["year"] is None: + validated["year"] = now.year + + if parsed["day_of_year"] is not None: + dt = pendulum.parse( + "{}-{:>03d}".format(validated["year"], parsed["day_of_year"]) + ) + + validated["month"] = dt.month + validated["day"] = dt.day + + if parsed["day_of_week"] is not None: + dt = pendulum.datetime( + validated["year"], + validated["month"] or now.month, + validated["day"] or now.day, + ) + dt = dt.start_of("week").subtract(days=1) + dt = dt.next(parsed["day_of_week"]) + validated["year"] = dt.year + validated["month"] = dt.month + validated["day"] = dt.day + + # Meridiem + if parsed["meridiem"] is not None: + # If the time is greater than 13:00:00 + # This is not valid + if validated["hour"] is None: + raise ValueError("Invalid Date") + + t = ( + validated["hour"], + validated["minute"], + validated["second"], + validated["microsecond"], + ) + if t >= (13, 0, 0, 0): + raise ValueError("Invalid date") + + pm = parsed["meridiem"] == "pm" + validated["hour"] %= 12 + if pm: + validated["hour"] += 12 + + if validated["month"] is None: + if parsed["year"] is not None: + validated["month"] = parsed["month"] or 1 + else: + validated["month"] = parsed["month"] or now.month + + if validated["day"] is None: + if parsed["year"] is not None or parsed["month"] is not None: + validated["day"] = parsed["day"] or 1 + else: + validated["day"] = parsed["day"] or now.day + + for part in ["hour", "minute", "second", "microsecond"]: + if validated[part] is None: + validated[part] = 0 + + validated["tz"] = parsed["tz"] + + return validated + + def _get_parsed_values( + self, m, parsed, locale, now + ): # type: (typing.Match[str], typing.Dict[str, typing.Any], Locale, pendulum.DateTime) -> None + for token, index in m.re.groupindex.items(): + if token in self._LOCALIZABLE_TOKENS: + self._get_parsed_locale_value(token, m.group(index), parsed, locale) + else: + self._get_parsed_value(token, m.group(index), parsed, now) + + def _get_parsed_value( + self, token, value, parsed, now + ): # type: (str, str, typing.Dict[str, typing.Any], pendulum.DateTime) -> None + parsed_token = self._PARSE_TOKENS[token](value) + + if "Y" in token: + if token == "YY": + parsed_token = now.year // 100 * 100 + parsed_token + + parsed["year"] = parsed_token + elif "Q" == token: + parsed["quarter"] = parsed_token + elif token in ["MM", "M"]: + parsed["month"] = parsed_token + elif token in ["DDDD", "DDD"]: + parsed["day_of_year"] = parsed_token + elif "D" in token: + parsed["day"] = parsed_token + elif "H" in token: + parsed["hour"] = parsed_token + elif token in ["hh", "h"]: + if parsed_token > 12: + raise ValueError("Invalid date") + + parsed["hour"] = parsed_token + elif "m" in token: + parsed["minute"] = parsed_token + elif "s" in token: + parsed["second"] = parsed_token + elif "S" in token: + parsed["microsecond"] = parsed_token + elif token in ["d", "E"]: + parsed["day_of_week"] = parsed_token + elif token in ["X", "x"]: + parsed["timestamp"] = parsed_token + elif token in ["ZZ", "Z"]: + negative = True if value.startswith("-") else False + tz = value[1:] + if ":" not in tz: + if len(tz) == 2: + tz = "{}00".format(tz) + + off_hour = tz[0:2] + off_minute = tz[2:4] + else: + off_hour, off_minute = tz.split(":") + + offset = ((int(off_hour) * 60) + int(off_minute)) * 60 + + if negative: + offset = -1 * offset + + parsed["tz"] = pendulum.timezone(offset) + elif token == "z": + # Full timezone + if value not in pendulum.timezones: + raise ValueError("Invalid date") + + parsed["tz"] = pendulum.timezone(value) + + def _get_parsed_locale_value( + self, token, value, parsed, locale + ): # type: (str, str, typing.Dict[str, typing.Any], Locale) -> None + if token == "MMMM": + unit = "month" + match = "months.wide" + elif token == "MMM": + unit = "month" + match = "months.abbreviated" + elif token == "Do": + parsed["day"] = int(re.match(r"(\d+)", value).group(1)) + + return + elif token == "dddd": + unit = "day_of_week" + match = "days.wide" + elif token == "ddd": + unit = "day_of_week" + match = "days.abbreviated" + elif token == "dd": + unit = "day_of_week" + match = "days.short" + elif token in ["a", "A"]: + valid_values = [ + locale.translation("day_periods.am"), + locale.translation("day_periods.pm"), + ] + + if token == "a": + value = value.lower() + valid_values = list(map(lambda x: x.lower(), valid_values)) + + if value not in valid_values: + raise ValueError("Invalid date") + + parsed["meridiem"] = ["am", "pm"][valid_values.index(value)] + + return + else: + raise ValueError('Invalid token "{}"'.format(token)) + + parsed[unit] = locale.match_translation(match, value) + if value is None: + raise ValueError("Invalid date") + + def _replace_tokens(self, token, locale): # type: (str, Locale) -> str + if token.startswith("[") and token.endswith("]"): + return token[1:-1] + elif token.startswith("\\"): + if len(token) == 2 and token[1] in {"[", "]"}: + return "" + + return token + elif token not in self._REGEX_TOKENS and token not in self._LOCALIZABLE_TOKENS: + raise ValueError("Unsupported token: {}".format(token)) + + if token in self._LOCALIZABLE_TOKENS: + values = self._LOCALIZABLE_TOKENS[token] + if callable(values): + candidates = values(locale) + else: + candidates = tuple( + locale.translation(self._LOCALIZABLE_TOKENS[token]).values() + ) + else: + candidates = self._REGEX_TOKENS[token] + + if not candidates: + raise ValueError("Unsupported token: {}".format(token)) + + if not isinstance(candidates, tuple): + candidates = (candidates,) + + pattern = "(?P<{}>{})".format(token, "|".join([decode(p) for p in candidates])) + + return pattern -- cgit v1.2.3