from __future__ import annotations import datetime import re from typing import TYPE_CHECKING from typing import Any from typing import Callable from typing import Match from typing import Sequence from typing import cast import pendulum from pendulum.locales.locale import Locale if TYPE_CHECKING: from pendulum import Timezone _MATCH_1 = r"\d" _MATCH_2 = r"\d\d" _MATCH_3 = r"\d{3}" _MATCH_4 = r"\d{4}" _MATCH_6 = r"[+-]?\d{6}" _MATCH_1_TO_2 = r"\d\d?" _MATCH_1_TO_2_LEFT_PAD = r"[0-9 ]\d?" _MATCH_1_TO_3 = r"\d{1,3}" _MATCH_1_TO_4 = r"\d{1,4}" _MATCH_1_TO_6 = r"[+-]?\d{1,6}" _MATCH_3_TO_4 = r"\d{3}\d?" _MATCH_5_TO_6 = r"\d{5}\d?" _MATCH_UNSIGNED = r"\d+" _MATCH_SIGNED = r"[+-]?\d+" _MATCH_OFFSET = r"[Zz]|[+-]\d\d:?\d\d" _MATCH_SHORT_OFFSET = r"[Zz]|[+-]\d\d(?::?\d\d)?" _MATCH_TIMESTAMP = r"[+-]?\d+(\.\d{1,6})?" _MATCH_WORD = ( "(?i)[0-9]*" "['a-z\u00A0-\u05FF\u0700-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+" r"|[\u0600-\u06FF/]+(\s*?[\u0600-\u06FF]+){1,2}" ) _MATCH_TIMEZONE = "[A-Za-z0-9-+]+(/[A-Za-z0-9-+_]+)?" class Formatter: _TOKENS: str = ( r"\[([^\[]*)\]|\\(.)|" "(" "Mo|MM?M?M?" "|Do|DDDo|DD?D?D?|ddd?d?|do?" "|E{1,4}" "|w[o|w]?|W[o|W]?|Qo?" "|YYYY|YY|Y" "|gg(ggg?)?|GG(GGG?)?" "|a|A" "|hh?|HH?|kk?" "|mm?|ss?|S{1,9}" "|x|X" "|zz?|ZZ?" "|LTS|LT|LL?L?L?" ")" ) _FORMAT_RE: re.Pattern[str] = re.compile(_TOKENS) _FROM_FORMAT_RE: re.Pattern[str] = re.compile(r"(? str: """ Formats a DateTime instance with a given format and locale. :param dt: The instance to format :param fmt: The format to use :param locale: The locale to use """ loaded_locale: Locale = Locale.load(locale or pendulum.get_locale()) result = self._FORMAT_RE.sub( lambda m: m.group(1) if m.group(1) else m.group(2) if m.group(2) else self._format_token(dt, m.group(3), loaded_locale), fmt, ) return result def _format_token(self, dt: pendulum.DateTime, token: str, locale: Locale) -> str: """ Formats a DateTime instance with a given token and locale. :param dt: The instance to format :param token: The token to use :param locale: The locale to use """ if token in self._DATE_FORMATS: fmt = locale.get(f"custom.date_formats.{token}") if fmt is None: fmt = self._DEFAULT_DATE_FORMATS[token] return self.format(dt, fmt, locale) if token in self._LOCALIZABLE_TOKENS: return self._format_localizable_token(dt, token, locale) if token in self._TOKENS_RULES: return self._TOKENS_RULES[token](dt) # Timezone if token in ["ZZ", "Z"]: if dt.tzinfo is None: return "" separator = ":" if token == "Z" else "" offset = dt.utcoffset() or datetime.timedelta() minutes = offset.total_seconds() / 60 if minutes >= 0: sign = "+" else: sign = "-" hour, minute = divmod(abs(int(minutes)), 60) return f"{sign}{hour:02d}{separator}{minute:02d}" return token def _format_localizable_token( self, dt: pendulum.DateTime, token: str, locale: Locale ) -> str: """ Formats a DateTime instance with a given localizable token and locale. :param dt: The instance to format :param token: The token to use :param locale: The locale to use """ if token == "MMM": return cast(str, locale.get("translations.months.abbreviated")[dt.month]) elif token == "MMMM": return cast(str, locale.get("translations.months.wide")[dt.month]) elif token == "dd": return cast(str, locale.get("translations.days.short")[dt.day_of_week]) elif token == "ddd": return cast( str, locale.get("translations.days.abbreviated")[dt.day_of_week] ) elif token == "dddd": return cast(str, locale.get("translations.days.wide")[dt.day_of_week]) elif token == "Do": return locale.ordinalize(dt.day) elif token == "do": return locale.ordinalize(dt.day_of_week) elif token == "Mo": return locale.ordinalize(dt.month) elif token == "Qo": return locale.ordinalize(dt.quarter) elif token == "wo": return locale.ordinalize(dt.week_of_year) elif token == "DDDo": return locale.ordinalize(dt.day_of_year) elif token == "A": key = "translations.day_periods" if dt.hour >= 12: key += ".pm" else: key += ".am" return cast(str, locale.get(key)) else: return token def parse( self, time: str, fmt: str, now: pendulum.DateTime, locale: str | None = None, ) -> dict[str, Any]: """ Parses a time string matching a given format as a tuple. :param time: The timestring :param fmt: The format :param now: The datetime to use as "now" :param locale: The locale to use :return: The parsed elements """ escaped_fmt = re.escape(fmt) tokens = self._FROM_FORMAT_RE.findall(escaped_fmt) if not tokens: raise ValueError("The given time string does not match the given format") if not locale: locale = pendulum.get_locale() loaded_locale: Locale = Locale.load(locale) parsed = { "year": None, "month": None, "day": None, "hour": None, "minute": None, "second": None, "microsecond": None, "tz": None, "quarter": None, "day_of_week": None, "day_of_year": None, "meridiem": None, "timestamp": None, } pattern = self._FROM_FORMAT_RE.sub( lambda m: self._replace_tokens(m.group(0), loaded_locale), escaped_fmt ) if not re.search("^" + pattern + "$", time): raise ValueError(f"String does not match format {fmt}") _get_parsed_values: Callable[ [Match[str]], Any ] = lambda m: self._get_parsed_values(m, parsed, loaded_locale, now) re.sub(pattern, _get_parsed_values, time) return self._check_parsed(parsed, now) def _check_parsed( self, parsed: dict[str, Any], now: pendulum.DateTime ) -> dict[str, Any]: """ Checks validity of parsed elements. :param parsed: The elements to parse. :return: The validated elements. """ validated: dict[str, int | Timezone | None] = { "year": parsed["year"], "month": parsed["month"], "day": parsed["day"], "hour": parsed["hour"], "minute": parsed["minute"], "second": parsed["second"], "microsecond": parsed["microsecond"], "tz": None, } # If timestamp has been specified # we use it and don't go any further if parsed["timestamp"] is not None: str_us = str(parsed["timestamp"]) if "." in str_us: microseconds = int(f'{str_us.split(".")[1].ljust(6, "0")}') else: microseconds = 0 from pendulum.helpers import local_time time = local_time(parsed["timestamp"], 0, microseconds) validated["year"] = time[0] validated["month"] = time[1] validated["day"] = time[2] validated["hour"] = time[3] validated["minute"] = time[4] validated["second"] = time[5] validated["microsecond"] = time[6] return validated if parsed["quarter"] is not None: if validated["year"] is not None: dt = pendulum.datetime(validated["year"], 1, 1) else: dt = now dt = dt.start_of("year") while dt.quarter != parsed["quarter"]: dt = dt.add(months=3) validated["year"] = dt.year validated["month"] = dt.month validated["day"] = dt.day if validated["year"] is None: validated["year"] = now.year if parsed["day_of_year"] is not None: dt = cast( pendulum.DateTime, pendulum.parse(f'{validated["year"]}-{parsed["day_of_year"]:>03d}'), ) validated["month"] = dt.month validated["day"] = dt.day if parsed["day_of_week"] is not None: dt = pendulum.datetime( cast(int, validated["year"]), validated["month"] or now.month, validated["day"] or now.day, ) dt = dt.start_of("week").subtract(days=1) dt = dt.next(parsed["day_of_week"]) validated["year"] = dt.year validated["month"] = dt.month validated["day"] = dt.day # Meridiem if parsed["meridiem"] is not None: # If the time is greater than 13:00:00 # This is not valid if validated["hour"] is None: raise ValueError("Invalid Date") t = ( validated["hour"], validated["minute"], validated["second"], validated["microsecond"], ) if t >= (13, 0, 0, 0): raise ValueError("Invalid date") pm = parsed["meridiem"] == "pm" validated["hour"] %= 12 if pm: validated["hour"] += 12 if validated["month"] is None: if parsed["year"] is not None: validated["month"] = parsed["month"] or 1 else: validated["month"] = parsed["month"] or now.month if validated["day"] is None: if parsed["year"] is not None or parsed["month"] is not None: validated["day"] = parsed["day"] or 1 else: validated["day"] = parsed["day"] or now.day for part in ["hour", "minute", "second", "microsecond"]: if validated[part] is None: validated[part] = 0 validated["tz"] = parsed["tz"] return validated def _get_parsed_values( self, m: Match[str], parsed: dict[str, Any], locale: Locale, now: pendulum.DateTime, ) -> None: for token, index in m.re.groupindex.items(): if token in self._LOCALIZABLE_TOKENS: self._get_parsed_locale_value(token, m.group(index), parsed, locale) else: self._get_parsed_value(token, m.group(index), parsed, now) def _get_parsed_value( self, token: str, value: str, parsed: dict[str, Any], now: pendulum.DateTime, ) -> None: parsed_token = self._PARSE_TOKENS[token](value) if "Y" in token: if token == "YY": parsed_token = now.year // 100 * 100 + parsed_token parsed["year"] = parsed_token elif token == "Q": parsed["quarter"] = parsed_token elif token in ["MM", "M"]: parsed["month"] = parsed_token elif token in ["DDDD", "DDD"]: parsed["day_of_year"] = parsed_token elif "D" in token: parsed["day"] = parsed_token elif "H" in token: parsed["hour"] = parsed_token elif token in ["hh", "h"]: if parsed_token > 12: raise ValueError("Invalid date") parsed["hour"] = parsed_token elif "m" in token: parsed["minute"] = parsed_token elif "s" in token: parsed["second"] = parsed_token elif "S" in token: parsed["microsecond"] = parsed_token elif token in ["d", "E"]: parsed["day_of_week"] = parsed_token elif token in ["X", "x"]: parsed["timestamp"] = parsed_token elif token in ["ZZ", "Z"]: negative = bool(value.startswith("-")) tz = value[1:] if ":" not in tz: if len(tz) == 2: tz = f"{tz}00" off_hour = tz[0:2] off_minute = tz[2:4] else: off_hour, off_minute = tz.split(":") offset = ((int(off_hour) * 60) + int(off_minute)) * 60 if negative: offset = -1 * offset parsed["tz"] = pendulum.timezone(offset) elif token == "z": # Full timezone if value not in pendulum.timezones(): raise ValueError("Invalid date") parsed["tz"] = pendulum.timezone(value) def _get_parsed_locale_value( self, token: str, value: str, parsed: dict[str, Any], locale: Locale ) -> None: if token == "MMMM": unit = "month" match = "months.wide" elif token == "MMM": unit = "month" match = "months.abbreviated" elif token == "Do": parsed["day"] = int(cast(Match[str], re.match(r"(\d+)", value)).group(1)) return elif token == "dddd": unit = "day_of_week" match = "days.wide" elif token == "ddd": unit = "day_of_week" match = "days.abbreviated" elif token == "dd": unit = "day_of_week" match = "days.short" elif token in ["a", "A"]: valid_values = [ locale.translation("day_periods.am"), locale.translation("day_periods.pm"), ] if token == "a": value = value.lower() valid_values = [x.lower() for x in valid_values] if value not in valid_values: raise ValueError("Invalid date") parsed["meridiem"] = ["am", "pm"][valid_values.index(value)] return else: raise ValueError(f'Invalid token "{token}"') parsed[unit] = locale.match_translation(match, value) if value is None: raise ValueError("Invalid date") def _replace_tokens(self, token: str, locale: Locale) -> str: if token.startswith("[") and token.endswith("]"): return token[1:-1] elif token.startswith("\\"): if len(token) == 2 and token[1] in {"[", "]"}: return "" return token elif token not in self._REGEX_TOKENS and token not in self._LOCALIZABLE_TOKENS: raise ValueError(f"Unsupported token: {token}") if token in self._LOCALIZABLE_TOKENS: values = self._LOCALIZABLE_TOKENS[token] if callable(values): candidates = values(locale) else: candidates = tuple( locale.translation( cast(str, self._LOCALIZABLE_TOKENS[token]) ).values() ) else: candidates = cast(Sequence[str], self._REGEX_TOKENS[token]) if not candidates: raise ValueError(f"Unsupported token: {token}") if not isinstance(candidates, tuple): candidates = (cast(str, candidates),) pattern = f'(?P<{token}>{"|".join(candidates)})' return pattern