diff options
Diffstat (limited to 'pendulum/parsing')
-rw-r--r-- | pendulum/parsing/__init__.py | 467 | ||||
-rw-r--r-- | pendulum/parsing/_iso8601.c | 2732 | ||||
-rw-r--r-- | pendulum/parsing/_iso8601.pyi | 22 | ||||
-rw-r--r-- | pendulum/parsing/exceptions/__init__.py | 9 | ||||
-rw-r--r-- | pendulum/parsing/iso8601.py | 901 |
5 files changed, 2076 insertions, 2055 deletions
diff --git a/pendulum/parsing/__init__.py b/pendulum/parsing/__init__.py index 400f119..0e64065 100644 --- a/pendulum/parsing/__init__.py +++ b/pendulum/parsing/__init__.py @@ -1,234 +1,233 @@ -import copy
-import os
-import re
-import struct
-
-from datetime import date
-from datetime import datetime
-from datetime import time
-
-from dateutil import parser
-
-from .exceptions import ParserError
-
-
-with_extensions = os.getenv("PENDULUM_EXTENSIONS", "1") == "1"
-
-try:
- if not with_extensions or struct.calcsize("P") == 4:
- raise ImportError()
-
- from ._iso8601 import parse_iso8601
-except ImportError:
- from .iso8601 import parse_iso8601
-
-
-COMMON = re.compile(
- # Date (optional)
- "^"
- "(?P<date>"
- " (?P<classic>" # Classic date (YYYY-MM-DD)
- r" (?P<year>\d{4})" # Year
- " (?P<monthday>"
- r" (?P<monthsep>[/:])?(?P<month>\d{2})" # Month (optional)
- r" ((?P<daysep>[/:])?(?P<day>\d{2}))" # Day (optional)
- " )?"
- " )"
- ")?"
- # Time (optional)
- "(?P<time>"
- r" (?P<timesep>\ )?" # Separator (space)
- r" (?P<hour>\d{1,2}):(?P<minute>\d{1,2})?(?::(?P<second>\d{1,2}))?" # HH:mm:ss (optional mm and ss)
- # Subsecond part (optional)
- " (?P<subsecondsection>"
- " (?:[.|,])" # Subsecond separator (optional)
- r" (?P<subsecond>\d{1,9})" # Subsecond
- " )?"
- ")?"
- "$",
- re.VERBOSE,
-)
-
-
-DEFAULT_OPTIONS = {
- "day_first": False,
- "year_first": True,
- "strict": True,
- "exact": False,
- "now": None,
-}
-
-
-def parse(text, **options):
- """
- Parses a string with the given options.
-
- :param text: The string to parse.
- :type text: str
-
- :rtype: Parsed
- """
- _options = copy.copy(DEFAULT_OPTIONS)
- _options.update(options)
-
- return _normalize(_parse(text, **_options), **_options)
-
-
-def _normalize(parsed, **options):
- """
- Normalizes the parsed element.
-
- :param parsed: The parsed elements.
- :type parsed: Parsed
-
- :rtype: Parsed
- """
- if options.get("exact"):
- return parsed
-
- if isinstance(parsed, time):
- now = options["now"] or datetime.now()
-
- return datetime(
- now.year,
- now.month,
- now.day,
- parsed.hour,
- parsed.minute,
- parsed.second,
- parsed.microsecond,
- )
- elif isinstance(parsed, date) and not isinstance(parsed, datetime):
- return datetime(parsed.year, parsed.month, parsed.day)
-
- return parsed
-
-
-def _parse(text, **options):
- # Trying to parse ISO8601
- try:
- return parse_iso8601(text)
- except ValueError:
- pass
-
- try:
- return _parse_iso8601_interval(text)
- except ValueError:
- pass
-
- try:
- return _parse_common(text, **options)
- except ParserError:
- pass
-
- # We couldn't parse the string
- # so we fallback on the dateutil parser
- # If not strict
- if options.get("strict", True):
- raise ParserError("Unable to parse string [{}]".format(text))
-
- try:
- dt = parser.parse(
- text, dayfirst=options["day_first"], yearfirst=options["year_first"]
- )
- except ValueError:
- raise ParserError("Invalid date string: {}".format(text))
-
- return dt
-
-
-def _parse_common(text, **options):
- """
- Tries to parse the string as a common datetime format.
-
- :param text: The string to parse.
- :type text: str
-
- :rtype: dict or None
- """
- m = COMMON.match(text)
- has_date = False
- year = 0
- month = 1
- day = 1
-
- if not m:
- raise ParserError("Invalid datetime string")
-
- if m.group("date"):
- # A date has been specified
- has_date = True
-
- year = int(m.group("year"))
-
- if not m.group("monthday"):
- # No month and day
- month = 1
- day = 1
- else:
- if options["day_first"]:
- month = int(m.group("day"))
- day = int(m.group("month"))
- else:
- month = int(m.group("month"))
- day = int(m.group("day"))
-
- if not m.group("time"):
- return date(year, month, day)
-
- # Grabbing hh:mm:ss
- hour = int(m.group("hour"))
-
- minute = int(m.group("minute"))
-
- if m.group("second"):
- second = int(m.group("second"))
- else:
- second = 0
-
- # Grabbing subseconds, if any
- microsecond = 0
- if m.group("subsecondsection"):
- # Limiting to 6 chars
- subsecond = m.group("subsecond")[:6]
-
- microsecond = int("{:0<6}".format(subsecond))
-
- if has_date:
- return datetime(year, month, day, hour, minute, second, microsecond)
-
- return time(hour, minute, second, microsecond)
-
-
-class _Interval:
- """
- Special class to handle ISO 8601 intervals
- """
-
- def __init__(self, start=None, end=None, duration=None):
- self.start = start
- self.end = end
- self.duration = duration
-
-
-def _parse_iso8601_interval(text):
- if "/" not in text:
- raise ParserError("Invalid interval")
-
- first, last = text.split("/")
- start = end = duration = None
-
- if first[0] == "P":
- # duration/end
- duration = parse_iso8601(first)
- end = parse_iso8601(last)
- elif last[0] == "P":
- # start/duration
- start = parse_iso8601(first)
- duration = parse_iso8601(last)
- else:
- # start/end
- start = parse_iso8601(first)
- end = parse_iso8601(last)
-
- return _Interval(start, end, duration)
+from __future__ import annotations + +import contextlib +import copy +import os +import re +import struct + +from datetime import date +from datetime import datetime +from datetime import time +from typing import Any +from typing import Optional +from typing import cast + +from dateutil import parser + +from pendulum.parsing.exceptions import ParserError + +with_extensions = os.getenv("PENDULUM_EXTENSIONS", "1") == "1" + +try: + if not with_extensions or struct.calcsize("P") == 4: + raise ImportError() + + from pendulum.parsing._iso8601 import Duration + from pendulum.parsing._iso8601 import parse_iso8601 +except ImportError: + from pendulum.duration import Duration # type: ignore[misc] + from pendulum.parsing.iso8601 import parse_iso8601 # type: ignore[misc] + +COMMON = re.compile( + # Date (optional) # noqa: E800 + "^" + "(?P<date>" + " (?P<classic>" # Classic date (YYYY-MM-DD) + r" (?P<year>\d{4})" # Year + " (?P<monthday>" + r" (?P<monthsep>[/:])?(?P<month>\d{2})" # Month (optional) + r" ((?P<daysep>[/:])?(?P<day>\d{2}))" # Day (optional) + " )?" + " )" + ")?" + # Time (optional) # noqa: E800 + "(?P<time>" + r" (?P<timesep>\ )?" # Separator (space) + r" (?P<hour>\d{1,2}):(?P<minute>\d{1,2})?(?::(?P<second>\d{1,2}))?" # HH:mm:ss (optional mm and ss) + # Subsecond part (optional) + " (?P<subsecondsection>" + " (?:[.|,])" # Subsecond separator (optional) + r" (?P<subsecond>\d{1,9})" # Subsecond + " )?" + ")?" + "$", + re.VERBOSE, +) + +DEFAULT_OPTIONS = { + "day_first": False, + "year_first": True, + "strict": True, + "exact": False, + "now": None, +} + + +def parse(text: str, **options: Any) -> datetime | date | time | _Interval | Duration: + """ + Parses a string with the given options. + + :param text: The string to parse. + """ + _options: dict[str, Any] = copy.copy(DEFAULT_OPTIONS) + _options.update(options) + + return _normalize(_parse(text, **_options), **_options) + + +def _normalize( + parsed: datetime | date | time | _Interval | Duration, **options: Any +) -> datetime | date | time | _Interval | Duration: + """ + Normalizes the parsed element. + + :param parsed: The parsed elements. + """ + if options.get("exact"): + return parsed + + if isinstance(parsed, time): + now = cast(Optional[datetime], options["now"]) or datetime.now() + + return datetime( + now.year, + now.month, + now.day, + parsed.hour, + parsed.minute, + parsed.second, + parsed.microsecond, + ) + elif isinstance(parsed, date) and not isinstance(parsed, datetime): + return datetime(parsed.year, parsed.month, parsed.day) + + return parsed + + +def _parse(text: str, **options: Any) -> datetime | date | time | _Interval | Duration: + # Trying to parse ISO8601 + with contextlib.suppress(ValueError): + return parse_iso8601(text) + + with contextlib.suppress(ValueError): + return _parse_iso8601_interval(text) + + with contextlib.suppress(ParserError): + return _parse_common(text, **options) + + # We couldn't parse the string + # so we fallback on the dateutil parser + # If not strict + if options.get("strict", True): + raise ParserError(f"Unable to parse string [{text}]") + + try: + dt = parser.parse( + text, dayfirst=options["day_first"], yearfirst=options["year_first"] + ) + except ValueError: + raise ParserError(f"Invalid date string: {text}") + + return dt + + +def _parse_common(text: str, **options: Any) -> datetime | date | time: + """ + Tries to parse the string as a common datetime format. + + :param text: The string to parse. + """ + m = COMMON.match(text) + has_date = False + year = 0 + month = 1 + day = 1 + + if not m: + raise ParserError("Invalid datetime string") + + if m.group("date"): + # A date has been specified + has_date = True + + year = int(m.group("year")) + + if not m.group("monthday"): + # No month and day + month = 1 + day = 1 + else: + if options["day_first"]: + month = int(m.group("day")) + day = int(m.group("month")) + else: + month = int(m.group("month")) + day = int(m.group("day")) + + if not m.group("time"): + return date(year, month, day) + + # Grabbing hh:mm:ss + hour = int(m.group("hour")) + + minute = int(m.group("minute")) + + if m.group("second"): + second = int(m.group("second")) + else: + second = 0 + + # Grabbing subseconds, if any + microsecond = 0 + if m.group("subsecondsection"): + # Limiting to 6 chars + subsecond = m.group("subsecond")[:6] + + microsecond = int(f"{subsecond:0<6}") + + if has_date: + return datetime(year, month, day, hour, minute, second, microsecond) + + return time(hour, minute, second, microsecond) + + +class _Interval: + """ + Special class to handle ISO 8601 intervals + """ + + def __init__( + self, + start: datetime | None = None, + end: datetime | None = None, + duration: Duration | None = None, + ) -> None: + self.start = start + self.end = end + self.duration = duration + + +def _parse_iso8601_interval(text: str) -> _Interval: + if "/" not in text: + raise ParserError("Invalid interval") + + first, last = text.split("/") + start = end = duration = None + + if first[0] == "P": + # duration/end + duration = parse_iso8601(first) + end = parse_iso8601(last) + elif last[0] == "P": + # start/duration + start = parse_iso8601(first) + duration = parse_iso8601(last) + else: + # start/end + start = parse_iso8601(first) + end = parse_iso8601(last) + + return _Interval( + cast(datetime, start), cast(datetime, end), cast(Duration, duration) + ) diff --git a/pendulum/parsing/_iso8601.c b/pendulum/parsing/_iso8601.c index 2e14e4b..1322423 100644 --- a/pendulum/parsing/_iso8601.c +++ b/pendulum/parsing/_iso8601.c @@ -1,1371 +1,1361 @@ -/* ------------------------------------------------------------------------- */
-
-#include <Python.h>
-#include <datetime.h>
-#include <structmember.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-
-#ifndef PyVarObject_HEAD_INIT
-#define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
-#endif
-
-
-/* ------------------------------------------------------------------------- */
-
-#define EPOCH_YEAR 1970
-
-#define DAYS_PER_N_YEAR 365
-#define DAYS_PER_L_YEAR 366
-
-#define USECS_PER_SEC 1000000
-
-#define SECS_PER_MIN 60
-#define SECS_PER_HOUR (60 * SECS_PER_MIN)
-#define SECS_PER_DAY (SECS_PER_HOUR * 24)
-
-// 400-year chunks always have 146097 days (20871 weeks).
-#define DAYS_PER_400_YEARS 146097L
-#define SECS_PER_400_YEARS ((int64_t)DAYS_PER_400_YEARS * (int64_t)SECS_PER_DAY)
-
-// The number of seconds in an aligned 100-year chunk, for those that
-// do not begin with a leap year and those that do respectively.
-const int64_t SECS_PER_100_YEARS[2] = {
- (uint64_t)(76L * DAYS_PER_N_YEAR + 24L * DAYS_PER_L_YEAR) * SECS_PER_DAY,
- (uint64_t)(75L * DAYS_PER_N_YEAR + 25L * DAYS_PER_L_YEAR) * SECS_PER_DAY
-};
-
-// The number of seconds in an aligned 4-year chunk, for those that
-// do not begin with a leap year and those that do respectively.
-const int32_t SECS_PER_4_YEARS[2] = {
- (4 * DAYS_PER_N_YEAR + 0 * DAYS_PER_L_YEAR) * SECS_PER_DAY,
- (3 * DAYS_PER_N_YEAR + 1 * DAYS_PER_L_YEAR) * SECS_PER_DAY
-};
-
-// The number of seconds in non-leap and leap years respectively.
-const int32_t SECS_PER_YEAR[2] = {
- DAYS_PER_N_YEAR * SECS_PER_DAY,
- DAYS_PER_L_YEAR * SECS_PER_DAY
-};
-
-#define MONTHS_PER_YEAR 12
-
-// The month lengths in non-leap and leap years respectively.
-const int32_t DAYS_PER_MONTHS[2][13] = {
- {-1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
- {-1, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}
-};
-
-// The day offsets of the beginning of each (1-based) month in non-leap
-// and leap years respectively.
-// For example, in a leap year there are 335 days before December.
-const int32_t MONTHS_OFFSETS[2][14] = {
- {-1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
- {-1, 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
-};
-
-const int DAY_OF_WEEK_TABLE[12] = {
- 0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4
-};
-
-#define TM_SUNDAY 0
-#define TM_MONDAY 1
-#define TM_TUESDAY 2
-#define TM_WEDNESDAY 3
-#define TM_THURSDAY 4
-#define TM_FRIDAY 5
-#define TM_SATURDAY 6
-
-#define TM_JANUARY 0
-#define TM_FEBRUARY 1
-#define TM_MARCH 2
-#define TM_APRIL 3
-#define TM_MAY 4
-#define TM_JUNE 5
-#define TM_JULY 6
-#define TM_AUGUST 7
-#define TM_SEPTEMBER 8
-#define TM_OCTOBER 9
-#define TM_NOVEMBER 10
-#define TM_DECEMBER 11
-
-// Parsing errors
-const int PARSER_INVALID_ISO8601 = 0;
-const int PARSER_INVALID_DATE = 1;
-const int PARSER_INVALID_TIME = 2;
-const int PARSER_INVALID_WEEK_DATE = 3;
-const int PARSER_INVALID_WEEK_NUMBER = 4;
-const int PARSER_INVALID_WEEKDAY_NUMBER = 5;
-const int PARSER_INVALID_ORDINAL_DAY_FOR_YEAR = 6;
-const int PARSER_INVALID_MONTH_OR_DAY = 7;
-const int PARSER_INVALID_MONTH = 8;
-const int PARSER_INVALID_DAY_FOR_MONTH = 9;
-const int PARSER_INVALID_HOUR = 10;
-const int PARSER_INVALID_MINUTE = 11;
-const int PARSER_INVALID_SECOND = 12;
-const int PARSER_INVALID_SUBSECOND = 13;
-const int PARSER_INVALID_TZ_OFFSET = 14;
-const int PARSER_INVALID_DURATION = 15;
-const int PARSER_INVALID_DURATION_FLOAT_YEAR_MONTH_NOT_SUPPORTED = 16;
-
-const char PARSER_ERRORS[17][80] = {
- "Invalid ISO 8601 string",
- "Invalid date",
- "Invalid time",
- "Invalid week date",
- "Invalid week number",
- "Invalid weekday number",
- "Invalid ordinal day for year",
- "Invalid month and/or day",
- "Invalid month",
- "Invalid day for month",
- "Invalid hour",
- "Invalid minute",
- "Invalid second",
- "Invalid subsecond",
- "Invalid timezone offset",
- "Invalid duration",
- "Float years and months are not supported"
-};
-
-/* ------------------------------------------------------------------------- */
-
-
-int p(int y) {
- return y + y/4 - y/100 + y/400;
-}
-
-int is_leap(int year) {
- return year % 4 == 0 && (year % 100 != 0 || year % 400 == 0);
-}
-
-int week_day(int year, int month, int day) {
- int y;
- int w;
-
- y = year - (month < 3);
-
- w = (p(y) + DAY_OF_WEEK_TABLE[month - 1] + day) % 7;
-
- if (!w) {
- w = 7;
- }
-
- return w;
-}
-
-int days_in_year(int year) {
- if (is_leap(year)) {
- return DAYS_PER_L_YEAR;
- }
-
- return DAYS_PER_N_YEAR;
-}
-
-int is_long_year(int year) {
- return (p(year) % 7 == 4) || (p(year - 1) % 7 == 3);
-}
-
-
-/* ------------------------ Custom Types ------------------------------- */
-
-
-/*
- * class FixedOffset(tzinfo):
- */
-typedef struct {
- PyObject_HEAD
- int offset;
- char *tzname;
-} FixedOffset;
-
-/*
- * def __init__(self, offset):
- * self.offset = offset
-*/
-static int FixedOffset_init(FixedOffset *self, PyObject *args, PyObject *kwargs) {
- int offset;
- char *tzname = NULL;
-
- static char *kwlist[] = {"offset", "tzname", NULL};
-
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|s", kwlist, &offset, &tzname))
- return -1;
-
- self->offset = offset;
- self->tzname = tzname;
-
- return 0;
-}
-
-/*
- * def utcoffset(self, dt):
- * return timedelta(seconds=self.offset * 60)
- */
-static PyObject *FixedOffset_utcoffset(FixedOffset *self, PyObject *args) {
- return PyDelta_FromDSU(0, self->offset, 0);
-}
-
-/*
- * def dst(self, dt):
- * return timedelta(seconds=self.offset * 60)
- */
-static PyObject *FixedOffset_dst(FixedOffset *self, PyObject *args) {
- return PyDelta_FromDSU(0, self->offset, 0);
-}
-
-/*
- * def tzname(self, dt):
- * sign = '+'
- * if self.offset < 0:
- * sign = '-'
- * return "%s%d:%d" % (sign, self.offset / 60, self.offset % 60)
- */
-static PyObject *FixedOffset_tzname(FixedOffset *self, PyObject *args) {
- if (self->tzname != NULL) {
- return PyUnicode_FromString(self->tzname);
- }
-
- char tzname_[7] = {0};
- char sign = '+';
- int offset = self->offset;
-
- if (offset < 0) {
- sign = '-';
- offset *= -1;
- }
-
- sprintf(
- tzname_,
- "%c%02d:%02d",
- sign,
- offset / SECS_PER_HOUR,
- offset / SECS_PER_MIN % SECS_PER_MIN
- );
-
- return PyUnicode_FromString(tzname_);
-}
-
-/*
- * def __repr__(self):
- * return self.tzname()
- */
-static PyObject *FixedOffset_repr(FixedOffset *self) {
- return FixedOffset_tzname(self, NULL);
-}
-
-/*
- * Class member / class attributes
- */
-static PyMemberDef FixedOffset_members[] = {
- {"offset", T_INT, offsetof(FixedOffset, offset), 0, "UTC offset"},
- {NULL}
-};
-
-/*
- * Class methods
- */
-static PyMethodDef FixedOffset_methods[] = {
- {"utcoffset", (PyCFunction)FixedOffset_utcoffset, METH_VARARGS, ""},
- {"dst", (PyCFunction)FixedOffset_dst, METH_VARARGS, ""},
- {"tzname", (PyCFunction)FixedOffset_tzname, METH_VARARGS, ""},
- {NULL}
-};
-
-static PyTypeObject FixedOffset_type = {
- PyVarObject_HEAD_INIT(NULL, 0)
- "FixedOffset_type", /* tp_name */
- sizeof(FixedOffset), /* tp_basicsize */
- 0, /* tp_itemsize */
- 0, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_as_async */
- (reprfunc)FixedOffset_repr, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- (reprfunc)FixedOffset_repr, /* tp_str */
- 0, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /* tp_flags */
- "TZInfo with fixed offset", /* tp_doc */
-};
-
-/*
- * Instantiate new FixedOffset_type object
- * Skip overhead of calling PyObject_New and PyObject_Init.
- * Directly allocate object.
- */
-static PyObject *new_fixed_offset_ex(int offset, char *name, PyTypeObject *type) {
- FixedOffset *self = (FixedOffset *) (type->tp_alloc(type, 0));
-
- if (self != NULL)
- self->offset = offset;
- self->tzname = name;
-
- return (PyObject *) self;
-}
-
-#define new_fixed_offset(offset, name) new_fixed_offset_ex(offset, name, &FixedOffset_type)
-
-
-/*
- * class Duration():
- */
-typedef struct {
- PyObject_HEAD
- int years;
- int months;
- int weeks;
- int days;
- int hours;
- int minutes;
- int seconds;
- int microseconds;
-} Duration;
-
-/*
- * def __init__(self, years, months, days, hours, minutes, seconds, microseconds):
- * self.years = years
- * # ...
-*/
-static int Duration_init(Duration *self, PyObject *args, PyObject *kwargs) {
- int years;
- int months;
- int weeks;
- int days;
- int hours;
- int minutes;
- int seconds;
- int microseconds;
-
- if (!PyArg_ParseTuple(args, "iiiiiiii", &years, &months, &weeks, &days, &hours, &minutes, &seconds, µseconds))
- return -1;
-
- self->years = years;
- self->months = months;
- self->weeks = weeks;
- self->days = days;
- self->hours = hours;
- self->minutes = minutes;
- self->seconds = seconds;
- self->microseconds = microseconds;
-
- return 0;
-}
-
-/*
- * def __repr__(self):
- * return '{} years {} months {} days {} hours {} minutes {} seconds {} microseconds'.format(
- * self.years, self.months, self.days, self.minutes, self.hours, self.seconds, self.microseconds
- * )
- */
-static PyObject *Duration_repr(Duration *self) {
- char repr[82] = {0};
-
- sprintf(
- repr,
- "%d years %d months %d weeks %d days %d hours %d minutes %d seconds %d microseconds",
- self->years,
- self->months,
- self->weeks,
- self->days,
- self->hours,
- self->minutes,
- self->seconds,
- self->microseconds
- );
-
- return PyUnicode_FromString(repr);
-}
-
-/*
- * Instantiate new Duration_type object
- * Skip overhead of calling PyObject_New and PyObject_Init.
- * Directly allocate object.
- */
-static PyObject *new_duration_ex(int years, int months, int weeks, int days, int hours, int minutes, int seconds, int microseconds, PyTypeObject *type) {
- Duration *self = (Duration *) (type->tp_alloc(type, 0));
-
- if (self != NULL) {
- self->years = years;
- self->months = months;
- self->weeks = weeks;
- self->days = days;
- self->hours = hours;
- self->minutes = minutes;
- self->seconds = seconds;
- self->microseconds = microseconds;
- }
-
- return (PyObject *) self;
-}
-
-/*
- * Class member / class attributes
- */
-static PyMemberDef Duration_members[] = {
- {"years", T_INT, offsetof(Duration, years), 0, "years in duration"},
- {"months", T_INT, offsetof(Duration, months), 0, "months in duration"},
- {"weeks", T_INT, offsetof(Duration, weeks), 0, "weeks in duration"},
- {"days", T_INT, offsetof(Duration, days), 0, "days in duration"},
- {"remaining_days", T_INT, offsetof(Duration, days), 0, "days in duration"},
- {"hours", T_INT, offsetof(Duration, hours), 0, "hours in duration"},
- {"minutes", T_INT, offsetof(Duration, minutes), 0, "minutes in duration"},
- {"seconds", T_INT, offsetof(Duration, seconds), 0, "seconds in duration"},
- {"remaining_seconds", T_INT, offsetof(Duration, seconds), 0, "seconds in duration"},
- {"microseconds", T_INT, offsetof(Duration, microseconds), 0, "microseconds in duration"},
- {NULL}
-};
-
-static PyTypeObject Duration_type = {
- PyVarObject_HEAD_INIT(NULL, 0)
- "Duration", /* tp_name */
- sizeof(Duration), /* tp_basicsize */
- 0, /* tp_itemsize */
- 0, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_as_async */
- (reprfunc)Duration_repr, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- (reprfunc)Duration_repr, /* tp_str */
- 0, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /* tp_flags */
- "Duration", /* tp_doc */
-};
-
-#define new_duration(years, months, weeks, days, hours, minutes, seconds, microseconds) new_duration_ex(years, months, weeks, days, hours, minutes, seconds, microseconds, &Duration_type)
-
-typedef struct {
- int is_date;
- int is_time;
- int is_datetime;
- int is_duration;
- int is_period;
- int ambiguous;
- int year;
- int month;
- int day;
- int hour;
- int minute;
- int second;
- int microsecond;
- int offset;
- int has_offset;
- char *tzname;
- int years;
- int months;
- int weeks;
- int days;
- int hours;
- int minutes;
- int seconds;
- int microseconds;
- int error;
-} Parsed;
-
-
-Parsed* new_parsed() {
- Parsed *parsed;
-
- if((parsed = malloc(sizeof *parsed)) != NULL) {
- parsed->is_date = 0;
- parsed->is_time = 0;
- parsed->is_datetime = 0;
- parsed->is_duration = 0;
- parsed->is_period = 0;
-
- parsed->ambiguous = 0;
- parsed->year = 0;
- parsed->month = 1;
- parsed->day = 1;
- parsed->hour = 0;
- parsed->minute = 0;
- parsed->second = 0;
- parsed->microsecond = 0;
- parsed->offset = 0;
- parsed->has_offset = 0;
- parsed->tzname = NULL;
-
- parsed->years = 0;
- parsed->months = 0;
- parsed->weeks = 0;
- parsed->days = 0;
- parsed->hours = 0;
- parsed->minutes = 0;
- parsed->seconds = 0;
- parsed->microseconds = 0;
-
- parsed->error = -1;
- }
-
- return parsed;
-}
-
-
-/* -------------------------- Functions --------------------------*/
-
-Parsed* _parse_iso8601_datetime(char *str, Parsed *parsed) {
- char* c;
- int monthday = 0;
- int week = 0;
- int weekday = 1;
- int ordinal;
- int tz_sign = 0;
- int leap = 0;
- int separators = 0;
- int time = 0;
- int has_hour = 0;
- int i;
- int j;
-
- // Assuming date only for now
- parsed->is_date = 1;
-
- c = str;
-
- for (i = 0; i < 4; i++) {
- if (*c >= '0' && *c <= '9') {
- parsed->year = 10 * parsed->year + *c++ - '0';
- } else {
- parsed->error = PARSER_INVALID_ISO8601;
-
- return NULL;
- }
- }
-
- leap = is_leap(parsed->year);
-
- // Optional separator
- if (*c == '-') {
- separators++;
- c++;
- }
-
- // Checking for week dates
- if (*c == 'W') {
- c++;
-
- i = 0;
- while (*c != '\0' && *c != ' ' && *c != 'T') {
- if (*c == '-') {
- separators++;
- c++;
- continue;
- }
-
- week = 10 * week + *c++ - '0';
-
- i++;
- }
-
- switch (i) {
- case 2:
- // Only week number
- break;
- case 3:
- // Week with weekday
- if (!(separators == 0 || separators == 2)) {
- // We should have 2 or no separator
- parsed->error = PARSER_INVALID_WEEK_DATE;
-
- return NULL;
- }
-
- weekday = week % 10;
- week /= 10;
-
- break;
- default:
- // Any other case is wrong
- parsed->error = PARSER_INVALID_WEEK_DATE;
-
- return NULL;
- }
-
- // Checks
- if (week > 53 || (week > 52 && !is_long_year(parsed->year))) {
- parsed->error = PARSER_INVALID_WEEK_NUMBER;
-
- return NULL;
- }
-
- if (weekday > 7) {
- parsed->error = PARSER_INVALID_WEEKDAY_NUMBER;
-
- return NULL;
- }
-
- // Calculating ordinal day
- ordinal = week * 7 + weekday - (week_day(parsed->year, 1, 4) + 3);
-
- if (ordinal < 1) {
- // Previous year
- ordinal += days_in_year(parsed->year - 1);
- parsed->year -= 1;
- leap = is_leap(parsed->year);
- }
-
- if (ordinal > days_in_year(parsed->year)) {
- // Next year
- ordinal -= days_in_year(parsed->year);
- parsed->year += 1;
- leap = is_leap(parsed->year);
- }
-
- for (j = 1; j < 14; j++) {
- if (ordinal <= MONTHS_OFFSETS[leap][j]) {
- parsed->day = ordinal - MONTHS_OFFSETS[leap][j - 1];
- parsed->month = j - 1;
-
- break;
- }
- }
- } else {
- // At this point we need to check the number
- // of characters until the end of the date part
- // (or the end of the string).
- //
- // If two, we have only a month if there is a separator, it may be a time otherwise.
- // If three, we have an ordinal date.
- // If four, we have a complete date
- i = 0;
- while (*c != '\0' && *c != ' ' && *c != 'T') {
- if (*c == '-') {
- separators++;
- c++;
- continue;
- }
-
- if (!(*c >= '0' && *c <='9')) {
- parsed->error = PARSER_INVALID_DATE;
-
- return NULL;
- }
-
- monthday = 10 * monthday + *c++ - '0';
-
- i++;
- }
-
- switch (i) {
- case 0:
- // No month/day specified (only a year)
- break;
- case 2:
- if (!separators) {
- // The date looks like 201207
- // which is invalid for a date
- // But it might be a time in the form hhmmss
- parsed->ambiguous = 1;
- } else if (separators > 1) {
- parsed->error = PARSER_INVALID_DATE;
-
- return NULL;
- }
-
- parsed->month = monthday;
- break;
- case 3:
- // Ordinal day
- if (separators > 1) {
- parsed->error = PARSER_INVALID_DATE;
-
- return NULL;
- }
-
- if (monthday < 1 || monthday > MONTHS_OFFSETS[leap][13]) {
- parsed->error = PARSER_INVALID_ORDINAL_DAY_FOR_YEAR;
-
- return NULL;
- }
-
- for (j = 1; j < 14; j++) {
- if (monthday <= MONTHS_OFFSETS[leap][j]) {
- parsed->day = monthday - MONTHS_OFFSETS[leap][j - 1];
- parsed->month = j - 1;
-
- break;
- }
- }
-
- break;
- case 4:
- // Month and day
- parsed->month = monthday / 100;
- parsed->day = monthday % 100;
-
- break;
- default:
- parsed->error = PARSER_INVALID_MONTH_OR_DAY;
-
- return NULL;
- }
- }
-
- // Checks
- if (separators && !monthday && !week) {
- parsed->error = PARSER_INVALID_DATE;
-
- return NULL;
- }
-
- if (parsed->month > 12) {
- parsed->error = PARSER_INVALID_MONTH;
-
- return NULL;
- }
-
- if (parsed->day > DAYS_PER_MONTHS[leap][parsed->month]) {
- parsed->error = PARSER_INVALID_DAY_FOR_MONTH;
-
- return NULL;
- }
-
- separators = 0;
- if (*c == 'T' || *c == ' ') {
- if (parsed->ambiguous) {
- parsed->error = PARSER_INVALID_DATE;
-
- return NULL;
- }
-
- // We have time so we have a datetime
- parsed->is_datetime = 1;
- parsed->is_date = 0;
-
- c++;
-
- // Grabbing time information
- i = 0;
- while (*c != '\0' && *c != '.' && *c != ',' && *c != 'Z' && *c != '+' && *c != '-') {
- if (*c == ':') {
- separators++;
- c++;
- continue;
- }
-
- if (!(*c >= '0' && *c <='9')) {
- parsed->error = PARSER_INVALID_TIME;
-
- return NULL;
- }
-
- time = 10 * time + *c++ - '0';
- i++;
- }
-
- switch (i) {
- case 2:
- // Hours only
- if (separators > 0) {
- // Extraneous separators
- parsed->error = PARSER_INVALID_TIME;
-
- return NULL;
- }
-
- parsed->hour = time;
- has_hour = 1;
- break;
- case 4:
- // Hours and minutes
- if (separators > 1) {
- // Extraneous separators
- parsed->error = PARSER_INVALID_TIME;
-
- return NULL;
- }
-
- parsed->hour = time / 100;
- parsed->minute = time % 100;
- has_hour = 1;
- break;
- case 6:
- // Hours, minutes and seconds
- if (!(separators == 0 || separators == 2)) {
- // We should have either two separators or none
- parsed->error = PARSER_INVALID_TIME;
-
- return NULL;
- }
-
- parsed->hour = time / 10000;
- parsed->minute = time / 100 % 100;
- parsed->second = time % 100;
- has_hour = 1;
- break;
- default:
- // Any other case is wrong
- parsed->error = PARSER_INVALID_TIME;
-
- return NULL;
- }
-
- // Checks
- if (parsed->hour > 23) {
- parsed->error = PARSER_INVALID_HOUR;
-
- return NULL;
- }
-
- if (parsed->minute > 59) {
- parsed->error = PARSER_INVALID_MINUTE;
-
- return NULL;
- }
-
- if (parsed->second > 59) {
- parsed->error = PARSER_INVALID_SECOND;
-
- return NULL;
- }
-
- // Subsecond
- if (*c == '.' || *c == ',') {
- c++;
-
- time = 0;
- i = 0;
- while (*c != '\0' && *c != 'Z' && *c != '+' && *c != '-') {
- if (!(*c >= '0' && *c <='9')) {
- parsed->error = PARSER_INVALID_SUBSECOND;
-
- return NULL;
- }
-
- time = 10 * time + *c++ - '0';
- i++;
- }
-
- // adjust to microseconds
- if (i > 6) {
- parsed->microsecond = time / pow(10, i - 6);
- } else if (i <= 6) {
- parsed->microsecond = time * pow(10, 6 - i);
- }
- }
-
- // Timezone
- if (*c == 'Z') {
- parsed->has_offset = 1;
- parsed->tzname = "UTC";
- c++;
- } else if (*c == '+' || *c == '-') {
- tz_sign = 1;
- if (*c == '-') {
- tz_sign = -1;
- }
-
- parsed->has_offset = 1;
- c++;
-
- i = 0;
- time = 0;
- separators = 0;
- while (*c != '\0') {
- if (*c == ':') {
- separators++;
- c++;
- continue;
- }
-
- if (!(*c >= '0' && *c <= '9')) {
- parsed->error = PARSER_INVALID_TZ_OFFSET;
-
- return NULL;
- }
-
- time = 10 * time + *c++ - '0';
- i++;
- }
-
- switch (i) {
- case 2:
- // hh Format
- if (separators) {
- // Extraneous separators
- parsed->error = PARSER_INVALID_TZ_OFFSET;
-
- return NULL;
- }
-
- parsed->offset = tz_sign * (time * 3600);
- break;
- case 4:
- // hhmm Format
- if (separators > 1) {
- // Extraneous separators
- parsed->error = PARSER_INVALID_TZ_OFFSET;
-
- return NULL;
- }
-
- parsed->offset = tz_sign * ((time / 100 * 3600) + (time % 100 * 60));
- break;
- default:
- // Wrong format
- parsed->error = PARSER_INVALID_TZ_OFFSET;
-
- return NULL;
- }
- }
- }
-
- // At this point we should be at the end of the string
- // If not, the string is invalid
- if (*c != '\0') {
- parsed->error = PARSER_INVALID_ISO8601;
-
- return NULL;
- }
-
- return parsed;
-}
-
-
-Parsed* _parse_iso8601_duration(char *str, Parsed *parsed) {
- char* c;
- int value = 0;
- int grabbed = 0;
- int in_time = 0;
- int in_fraction = 0;
- int fraction_length = 0;
- int has_fractional = 0;
- int fraction = 0;
- int has_ymd = 0;
- int has_week = 0;
- int has_year = 0;
- int has_month = 0;
- int has_day = 0;
- int has_hour = 0;
- int has_minute = 0;
- int has_second = 0;
-
- c = str;
-
- // Removing P operator
- c++;
-
- parsed->is_duration = 1;
-
- for (; *c != '\0'; c++) {
- switch (*c) {
- case 'Y':
- if (!grabbed || in_time || has_week || has_ymd) {
- // No value grabbed
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- if (fraction) {
- parsed->error = PARSER_INVALID_DURATION_FLOAT_YEAR_MONTH_NOT_SUPPORTED;
-
- return NULL;
- }
-
- parsed->years = value;
-
- grabbed = 0;
- value = 0;
- fraction = 0;
- in_fraction = 0;
- has_ymd = 1;
- has_year = 1;
-
- break;
- case 'M':
- if (!grabbed || has_week) {
- // No value grabbed
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- if (in_time) {
- if (has_second) {
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- if (has_fractional) {
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- parsed->minutes = value;
- if (fraction) {
- parsed->seconds = fraction * 6;
- has_fractional = 1;
- }
-
- has_minute = 1;
- } else {
- if (fraction) {
- parsed->error = PARSER_INVALID_DURATION_FLOAT_YEAR_MONTH_NOT_SUPPORTED;
-
- return NULL;
- }
-
- if (has_month || has_day) {
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- parsed->months = value;
- has_ymd = 1;
- has_month = 1;
- }
-
- grabbed = 0;
- value = 0;
- fraction = 0;
- in_fraction = 0;
-
- break;
- case 'D':
- if (!grabbed || in_time || has_week) {
- // No value grabbed
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- if (has_day) {
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- parsed->days = value;
- if (fraction) {
- parsed->hours = fraction * 2.4;
- has_fractional = 1;
- }
-
- grabbed = 0;
- value = 0;
- fraction = 0;
- in_fraction = 0;
- has_ymd = 1;
- has_day = 1;
-
- break;
- case 'T':
- if (grabbed) {
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- in_time = 1;
-
- break;
- case 'H':
- if (!grabbed || !in_time || has_week) {
- // No value grabbed
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- if (has_hour || has_second || has_minute) {
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- if (has_fractional) {
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- parsed->hours = value;
- if (fraction) {
- parsed->minutes = fraction * 6;
- has_fractional = 1;
- }
-
- grabbed = 0;
- value = 0;
- fraction = 0;
- in_fraction = 0;
- has_hour = 1;
-
- break;
- case 'S':
- if (!grabbed || !in_time || has_week) {
- // No value grabbed
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- if (has_second) {
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- if (has_fractional) {
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- if (fraction) {
- parsed->seconds = value;
- if (fraction_length > 6) {
- parsed->microseconds = fraction / pow(10, fraction_length - 6);
- } else {
- parsed->microseconds = fraction * pow(10, 6 - fraction_length);
- }
- has_fractional = 1;
- } else {
- parsed->seconds = value;
- }
-
- grabbed = 0;
- value = 0;
- fraction = 0;
- in_fraction = 0;
- has_second = 1;
-
- break;
- case 'W':
- if (!grabbed || in_time || has_ymd) {
- // No value grabbed
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- parsed->weeks = value;
- if (fraction) {
- float days;
- days = fraction * 0.7;
- parsed->hours = (int) ((days - (int) days) * 24);
- parsed->days = (int) days;
- }
-
- grabbed = 0;
- value = 0;
- fraction = 0;
- in_fraction = 0;
- has_week = 1;
-
- break;
- case '.':
- if (!grabbed || has_fractional) {
- // No value grabbed
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- in_fraction = 1;
-
- break;
- case ',':
- if (!grabbed || has_fractional) {
- // No value grabbed
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
-
- in_fraction = 1;
-
- break;
- default:
- if (*c >= '0' && *c <='9') {
- if (in_fraction) {
- fraction = 10 * fraction + *c - '0';
- fraction_length++;
- } else {
- value = 10 * value + *c - '0';
- grabbed = 1;
- }
- break;
- }
-
- parsed->error = PARSER_INVALID_DURATION;
-
- return NULL;
- }
- }
-
- return parsed;
-}
-
-
-PyObject* parse_iso8601(PyObject *self, PyObject *args) {
- char* str;
- PyObject *obj;
- PyObject *tzinfo;
- Parsed *parsed = new_parsed();
-
- if (!PyArg_ParseTuple(args, "s", &str)) {
- PyErr_SetString(
- PyExc_ValueError, "Invalid parameters"
- );
- return NULL;
- }
-
- if (*str == 'P') {
- // Duration (or interval)
- if (_parse_iso8601_duration(str, parsed) == NULL) {
- PyErr_SetString(
- PyExc_ValueError, PARSER_ERRORS[parsed->error]
- );
-
- return NULL;
- }
- } else if (_parse_iso8601_datetime(str, parsed) == NULL) {
- PyErr_SetString(
- PyExc_ValueError, PARSER_ERRORS[parsed->error]
- );
-
- return NULL;
- }
-
- if (parsed->is_date) {
- // Date only
- if (parsed->ambiguous) {
- // We can "safely" assume that the ambiguous
- // date was actually a time in the form hhmmss
- parsed->hour = parsed->year / 100;
- parsed->minute = parsed->year % 100;
- parsed->second = parsed->month;
-
- obj = PyDateTimeAPI->Time_FromTime(
- parsed->hour, parsed->minute, parsed->second, parsed->microsecond,
- Py_BuildValue(""),
- PyDateTimeAPI->TimeType
- );
- } else {
- obj = PyDateTimeAPI->Date_FromDate(
- parsed->year, parsed->month, parsed->day,
- PyDateTimeAPI->DateType
- );
- }
- } else if (parsed->is_datetime) {
- if (!parsed->has_offset) {
- tzinfo = Py_BuildValue("");
- } else {
- tzinfo = new_fixed_offset(parsed->offset, parsed->tzname);
- }
-
- obj = PyDateTimeAPI->DateTime_FromDateAndTime(
- parsed->year,
- parsed->month,
- parsed->day,
- parsed->hour,
- parsed->minute,
- parsed->second,
- parsed->microsecond,
- tzinfo,
- PyDateTimeAPI->DateTimeType
- );
-
- Py_DECREF(tzinfo);
- } else if (parsed->is_duration) {
- obj = new_duration(
- parsed->years, parsed->months, parsed->weeks, parsed->days,
- parsed->hours, parsed->minutes, parsed->seconds, parsed->microseconds
- );
- } else {
- return NULL;
- }
-
- free(parsed);
-
- return obj;
-}
-
-
-/* ------------------------------------------------------------------------- */
-
-static PyMethodDef helpers_methods[] = {
- {
- "parse_iso8601",
- (PyCFunction) parse_iso8601,
- METH_VARARGS,
- PyDoc_STR("Parses a ISO8601 string into a tuple.")
- },
- {NULL}
-};
-
-
-/* ------------------------------------------------------------------------- */
-
-static struct PyModuleDef moduledef = {
- PyModuleDef_HEAD_INIT,
- "_iso8601",
- NULL,
- -1,
- helpers_methods,
- NULL,
- NULL,
- NULL,
- NULL,
-};
-
-PyMODINIT_FUNC
-PyInit__iso8601(void)
-{
- PyObject *module;
-
- PyDateTime_IMPORT;
-
- module = PyModule_Create(&moduledef);
-
- if (module == NULL)
- return NULL;
-
- // FixedOffset declaration
- FixedOffset_type.tp_new = PyType_GenericNew;
- FixedOffset_type.tp_base = PyDateTimeAPI->TZInfoType;
- FixedOffset_type.tp_methods = FixedOffset_methods;
- FixedOffset_type.tp_members = FixedOffset_members;
- FixedOffset_type.tp_init = (initproc)FixedOffset_init;
-
- if (PyType_Ready(&FixedOffset_type) < 0)
- return NULL;
-
- // Duration declaration
- Duration_type.tp_new = PyType_GenericNew;
- Duration_type.tp_members = Duration_members;
- Duration_type.tp_init = (initproc)Duration_init;
-
- if (PyType_Ready(&Duration_type) < 0)
- return NULL;
-
- Py_INCREF(&FixedOffset_type);
- Py_INCREF(&Duration_type);
-
- PyModule_AddObject(module, "TZFixedOffset", (PyObject *)&FixedOffset_type);
- PyModule_AddObject(module, "Duration", (PyObject *)&Duration_type);
-
- return module;
-}
+/* ------------------------------------------------------------------------- */ + +#include <Python.h> +#include <datetime.h> +#include <structmember.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> + +#ifndef PyVarObject_HEAD_INIT +#define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, +#endif + + +/* ------------------------------------------------------------------------- */ + +#define EPOCH_YEAR 1970 + +#define DAYS_PER_N_YEAR 365 +#define DAYS_PER_L_YEAR 366 + +#define USECS_PER_SEC 1000000 + +#define SECS_PER_MIN 60 +#define SECS_PER_HOUR (60 * SECS_PER_MIN) +#define SECS_PER_DAY (SECS_PER_HOUR * 24) + +// 400-year chunks always have 146097 days (20871 weeks). +#define DAYS_PER_400_YEARS 146097L +#define SECS_PER_400_YEARS ((int64_t)DAYS_PER_400_YEARS * (int64_t)SECS_PER_DAY) + +// The number of seconds in an aligned 100-year chunk, for those that +// do not begin with a leap year and those that do respectively. +const int64_t SECS_PER_100_YEARS[2] = { + (uint64_t)(76L * DAYS_PER_N_YEAR + 24L * DAYS_PER_L_YEAR) * SECS_PER_DAY, + (uint64_t)(75L * DAYS_PER_N_YEAR + 25L * DAYS_PER_L_YEAR) * SECS_PER_DAY +}; + +// The number of seconds in an aligned 4-year chunk, for those that +// do not begin with a leap year and those that do respectively. +const int32_t SECS_PER_4_YEARS[2] = { + (4 * DAYS_PER_N_YEAR + 0 * DAYS_PER_L_YEAR) * SECS_PER_DAY, + (3 * DAYS_PER_N_YEAR + 1 * DAYS_PER_L_YEAR) * SECS_PER_DAY +}; + +// The number of seconds in non-leap and leap years respectively. +const int32_t SECS_PER_YEAR[2] = { + DAYS_PER_N_YEAR * SECS_PER_DAY, + DAYS_PER_L_YEAR * SECS_PER_DAY +}; + +#define MONTHS_PER_YEAR 12 + +// The month lengths in non-leap and leap years respectively. +const int32_t DAYS_PER_MONTHS[2][13] = { + {-1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, + {-1, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31} +}; + +// The day offsets of the beginning of each (1-based) month in non-leap +// and leap years respectively. +// For example, in a leap year there are 335 days before December. +const int32_t MONTHS_OFFSETS[2][14] = { + {-1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, + {-1, 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366} +}; + +const int DAY_OF_WEEK_TABLE[12] = { + 0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4 +}; + +#define TM_SUNDAY 0 +#define TM_MONDAY 1 +#define TM_TUESDAY 2 +#define TM_WEDNESDAY 3 +#define TM_THURSDAY 4 +#define TM_FRIDAY 5 +#define TM_SATURDAY 6 + +#define TM_JANUARY 0 +#define TM_FEBRUARY 1 +#define TM_MARCH 2 +#define TM_APRIL 3 +#define TM_MAY 4 +#define TM_JUNE 5 +#define TM_JULY 6 +#define TM_AUGUST 7 +#define TM_SEPTEMBER 8 +#define TM_OCTOBER 9 +#define TM_NOVEMBER 10 +#define TM_DECEMBER 11 + +// Parsing errors +const int PARSER_INVALID_ISO8601 = 0; +const int PARSER_INVALID_DATE = 1; +const int PARSER_INVALID_TIME = 2; +const int PARSER_INVALID_WEEK_DATE = 3; +const int PARSER_INVALID_WEEK_NUMBER = 4; +const int PARSER_INVALID_WEEKDAY_NUMBER = 5; +const int PARSER_INVALID_ORDINAL_DAY_FOR_YEAR = 6; +const int PARSER_INVALID_MONTH_OR_DAY = 7; +const int PARSER_INVALID_MONTH = 8; +const int PARSER_INVALID_DAY_FOR_MONTH = 9; +const int PARSER_INVALID_HOUR = 10; +const int PARSER_INVALID_MINUTE = 11; +const int PARSER_INVALID_SECOND = 12; +const int PARSER_INVALID_SUBSECOND = 13; +const int PARSER_INVALID_TZ_OFFSET = 14; +const int PARSER_INVALID_DURATION = 15; +const int PARSER_INVALID_DURATION_FLOAT_YEAR_MONTH_NOT_SUPPORTED = 16; + +const char PARSER_ERRORS[17][80] = { + "Invalid ISO 8601 string", + "Invalid date", + "Invalid time", + "Invalid week date", + "Invalid week number", + "Invalid weekday number", + "Invalid ordinal day for year", + "Invalid month and/or day", + "Invalid month", + "Invalid day for month", + "Invalid hour", + "Invalid minute", + "Invalid second", + "Invalid subsecond", + "Invalid timezone offset", + "Invalid duration", + "Float years and months are not supported" +}; + +/* ------------------------------------------------------------------------- */ + + +int p(int y) { + return y + y/4 - y/100 + y/400; +} + +int is_leap(int year) { + return year % 4 == 0 && (year % 100 != 0 || year % 400 == 0); +} + +int week_day(int year, int month, int day) { + int y; + int w; + + y = year - (month < 3); + + w = (p(y) + DAY_OF_WEEK_TABLE[month - 1] + day) % 7; + + if (!w) { + w = 7; + } + + return w; +} + +int days_in_year(int year) { + if (is_leap(year)) { + return DAYS_PER_L_YEAR; + } + + return DAYS_PER_N_YEAR; +} + +int is_long_year(int year) { + return (p(year) % 7 == 4) || (p(year - 1) % 7 == 3); +} + + +/* ------------------------ Custom Types ------------------------------- */ + + +/* + * class FixedOffset(tzinfo): + */ +typedef struct { + PyObject_HEAD + int offset; + char *tzname; +} FixedOffset; + +/* + * def __init__(self, offset): + * self.offset = offset +*/ +static int FixedOffset_init(FixedOffset *self, PyObject *args, PyObject *kwargs) { + int offset; + char *tzname = NULL; + + static char *kwlist[] = {"offset", "tzname", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|s", kwlist, &offset, &tzname)) + return -1; + + self->offset = offset; + self->tzname = tzname; + + return 0; +} + +/* + * def utcoffset(self, dt): + * return timedelta(seconds=self.offset * 60) + */ +static PyObject *FixedOffset_utcoffset(FixedOffset *self, PyObject *args) { + return PyDelta_FromDSU(0, self->offset, 0); +} + +/* + * def dst(self, dt): + * return timedelta(seconds=self.offset * 60) + */ +static PyObject *FixedOffset_dst(FixedOffset *self, PyObject *args) { + return PyDelta_FromDSU(0, self->offset, 0); +} + +/* + * def tzname(self, dt): + * sign = '+' + * if self.offset < 0: + * sign = '-' + * return f"{sign}{self.offset / 60}:{self.offset % 60}" + */ +static PyObject *FixedOffset_tzname(FixedOffset *self, PyObject *args) { + if (self->tzname != NULL) { + return PyUnicode_FromString(self->tzname); + } + + char sign = '+'; + int offset = self->offset; + + if (offset < 0) { + sign = '-'; + offset *= -1; + } + + return PyUnicode_FromFormat( + "%c%02d:%02d", + sign, + offset / SECS_PER_HOUR, + offset / SECS_PER_MIN % SECS_PER_MIN + ); +} + +/* + * def __repr__(self): + * return self.tzname() + */ +static PyObject *FixedOffset_repr(FixedOffset *self) { + return FixedOffset_tzname(self, NULL); +} + +/* + * Class member / class attributes + */ +static PyMemberDef FixedOffset_members[] = { + {"offset", T_INT, offsetof(FixedOffset, offset), 0, "UTC offset"}, + {NULL} +}; + +/* + * Class methods + */ +static PyMethodDef FixedOffset_methods[] = { + {"utcoffset", (PyCFunction)FixedOffset_utcoffset, METH_VARARGS, ""}, + {"dst", (PyCFunction)FixedOffset_dst, METH_VARARGS, ""}, + {"tzname", (PyCFunction)FixedOffset_tzname, METH_VARARGS, ""}, + {NULL} +}; + +static PyTypeObject FixedOffset_type = { + PyVarObject_HEAD_INIT(NULL, 0) + "FixedOffset_type", /* tp_name */ + sizeof(FixedOffset), /* tp_basicsize */ + 0, /* tp_itemsize */ + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + (reprfunc)FixedOffset_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + (reprfunc)FixedOffset_repr, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /* tp_flags */ + "TZInfo with fixed offset", /* tp_doc */ +}; + +/* + * Instantiate new FixedOffset_type object + * Skip overhead of calling PyObject_New and PyObject_Init. + * Directly allocate object. + */ +static PyObject *new_fixed_offset_ex(int offset, char *name, PyTypeObject *type) { + FixedOffset *self = (FixedOffset *) (type->tp_alloc(type, 0)); + + if (self != NULL) { + self->offset = offset; + self->tzname = name; + } + + return (PyObject *) self; +} + +#define new_fixed_offset(offset, name) new_fixed_offset_ex(offset, name, &FixedOffset_type) + + +/* + * class Duration(): + */ +typedef struct { + PyObject_HEAD + int years; + int months; + int weeks; + int days; + int hours; + int minutes; + int seconds; + int microseconds; +} Duration; + +/* + * def __init__(self, years, months, days, hours, minutes, seconds, microseconds): + * self.years = years + * # ... +*/ +static int Duration_init(Duration *self, PyObject *args, PyObject *kwargs) { + int years; + int months; + int weeks; + int days; + int hours; + int minutes; + int seconds; + int microseconds; + + if (!PyArg_ParseTuple(args, "iiiiiiii", &years, &months, &weeks, &days, &hours, &minutes, &seconds, µseconds)) + return -1; + + self->years = years; + self->months = months; + self->weeks = weeks; + self->days = days; + self->hours = hours; + self->minutes = minutes; + self->seconds = seconds; + self->microseconds = microseconds; + + return 0; +} + +/* + * def __repr__(self): + * return '{} years {} months {} days {} hours {} minutes {} seconds {} microseconds'.format( + * self.years, self.months, self.days, self.minutes, self.hours, self.seconds, self.microseconds + * ) + */ +static PyObject *Duration_repr(Duration *self) { + return PyUnicode_FromFormat( + "%d years %d months %d weeks %d days %d hours %d minutes %d seconds %d microseconds", + self->years, + self->months, + self->weeks, + self->days, + self->hours, + self->minutes, + self->seconds, + self->microseconds + ); +} + +/* + * Instantiate new Duration_type object + * Skip overhead of calling PyObject_New and PyObject_Init. + * Directly allocate object. + */ +static PyObject *new_duration_ex(int years, int months, int weeks, int days, int hours, int minutes, int seconds, int microseconds, PyTypeObject *type) { + Duration *self = (Duration *) (type->tp_alloc(type, 0)); + + if (self != NULL) { + self->years = years; + self->months = months; + self->weeks = weeks; + self->days = days; + self->hours = hours; + self->minutes = minutes; + self->seconds = seconds; + self->microseconds = microseconds; + } + + return (PyObject *) self; +} + +/* + * Class member / class attributes + */ +static PyMemberDef Duration_members[] = { + {"years", T_INT, offsetof(Duration, years), 0, "years in duration"}, + {"months", T_INT, offsetof(Duration, months), 0, "months in duration"}, + {"weeks", T_INT, offsetof(Duration, weeks), 0, "weeks in duration"}, + {"days", T_INT, offsetof(Duration, days), 0, "days in duration"}, + {"remaining_days", T_INT, offsetof(Duration, days), 0, "days in duration"}, + {"hours", T_INT, offsetof(Duration, hours), 0, "hours in duration"}, + {"minutes", T_INT, offsetof(Duration, minutes), 0, "minutes in duration"}, + {"seconds", T_INT, offsetof(Duration, seconds), 0, "seconds in duration"}, + {"remaining_seconds", T_INT, offsetof(Duration, seconds), 0, "seconds in duration"}, + {"microseconds", T_INT, offsetof(Duration, microseconds), 0, "microseconds in duration"}, + {NULL} +}; + +static PyTypeObject Duration_type = { + PyVarObject_HEAD_INIT(NULL, 0) + "Duration", /* tp_name */ + sizeof(Duration), /* tp_basicsize */ + 0, /* tp_itemsize */ + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + (reprfunc)Duration_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + (reprfunc)Duration_repr, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /* tp_flags */ + "Duration", /* tp_doc */ +}; + +#define new_duration(years, months, weeks, days, hours, minutes, seconds, microseconds) new_duration_ex(years, months, weeks, days, hours, minutes, seconds, microseconds, &Duration_type) + +typedef struct { + int is_date; + int is_time; + int is_datetime; + int is_duration; + int is_period; + int ambiguous; + int year; + int month; + int day; + int hour; + int minute; + int second; + int microsecond; + int offset; + int has_offset; + char *tzname; + int years; + int months; + int weeks; + int days; + int hours; + int minutes; + int seconds; + int microseconds; + int error; +} Parsed; + + +Parsed* new_parsed() { + Parsed *parsed; + + if((parsed = malloc(sizeof *parsed)) != NULL) { + parsed->is_date = 0; + parsed->is_time = 0; + parsed->is_datetime = 0; + parsed->is_duration = 0; + parsed->is_period = 0; + + parsed->ambiguous = 0; + parsed->year = 0; + parsed->month = 1; + parsed->day = 1; + parsed->hour = 0; + parsed->minute = 0; + parsed->second = 0; + parsed->microsecond = 0; + parsed->offset = 0; + parsed->has_offset = 0; + parsed->tzname = NULL; + + parsed->years = 0; + parsed->months = 0; + parsed->weeks = 0; + parsed->days = 0; + parsed->hours = 0; + parsed->minutes = 0; + parsed->seconds = 0; + parsed->microseconds = 0; + + parsed->error = -1; + } + + return parsed; +} + + +/* -------------------------- Functions --------------------------*/ + +Parsed* _parse_iso8601_datetime(char *str, Parsed *parsed) { + char* c; + int monthday = 0; + int week = 0; + int weekday = 1; + int ordinal; + int tz_sign = 0; + int leap = 0; + int separators = 0; + int time = 0; + int i; + int j; + + // Assuming date only for now + parsed->is_date = 1; + + c = str; + + for (i = 0; i < 4; i++) { + if (*c >= '0' && *c <= '9') { + parsed->year = 10 * parsed->year + *c++ - '0'; + } else { + parsed->error = PARSER_INVALID_ISO8601; + + return NULL; + } + } + + leap = is_leap(parsed->year); + + // Optional separator + if (*c == '-') { + separators++; + c++; + } + + // Checking for week dates + if (*c == 'W') { + c++; + + i = 0; + while (*c != '\0' && *c != ' ' && *c != 'T') { + if (*c == '-') { + separators++; + c++; + continue; + } + + week = 10 * week + *c++ - '0'; + + i++; + } + + switch (i) { + case 2: + // Only week number + break; + case 3: + // Week with weekday + if (!(separators == 0 || separators == 2)) { + // We should have 2 or no separator + parsed->error = PARSER_INVALID_WEEK_DATE; + + return NULL; + } + + weekday = week % 10; + week /= 10; + + break; + default: + // Any other case is wrong + parsed->error = PARSER_INVALID_WEEK_DATE; + + return NULL; + } + + // Checks + if (week > 53 || (week > 52 && !is_long_year(parsed->year))) { + parsed->error = PARSER_INVALID_WEEK_NUMBER; + + return NULL; + } + + if (weekday > 7) { + parsed->error = PARSER_INVALID_WEEKDAY_NUMBER; + + return NULL; + } + + // Calculating ordinal day + ordinal = week * 7 + weekday - (week_day(parsed->year, 1, 4) + 3); + + if (ordinal < 1) { + // Previous year + ordinal += days_in_year(parsed->year - 1); + parsed->year -= 1; + leap = is_leap(parsed->year); + } + + if (ordinal > days_in_year(parsed->year)) { + // Next year + ordinal -= days_in_year(parsed->year); + parsed->year += 1; + leap = is_leap(parsed->year); + } + + for (j = 1; j < 14; j++) { + if (ordinal <= MONTHS_OFFSETS[leap][j]) { + parsed->day = ordinal - MONTHS_OFFSETS[leap][j - 1]; + parsed->month = j - 1; + + break; + } + } + } else { + // At this point we need to check the number + // of characters until the end of the date part + // (or the end of the string). + // + // If two, we have only a month if there is a separator, it may be a time otherwise. + // If three, we have an ordinal date. + // If four, we have a complete date + i = 0; + while (*c != '\0' && *c != ' ' && *c != 'T') { + if (*c == '-') { + separators++; + c++; + continue; + } + + if (!(*c >= '0' && *c <='9')) { + parsed->error = PARSER_INVALID_DATE; + + return NULL; + } + + monthday = 10 * monthday + *c++ - '0'; + + i++; + } + + switch (i) { + case 0: + // No month/day specified (only a year) + break; + case 2: + if (!separators) { + // The date looks like 201207 + // which is invalid for a date + // But it might be a time in the form hhmmss + parsed->ambiguous = 1; + } else if (separators > 1) { + parsed->error = PARSER_INVALID_DATE; + + return NULL; + } + + parsed->month = monthday; + break; + case 3: + // Ordinal day + if (separators > 1) { + parsed->error = PARSER_INVALID_DATE; + + return NULL; + } + + if (monthday < 1 || monthday > MONTHS_OFFSETS[leap][13]) { + parsed->error = PARSER_INVALID_ORDINAL_DAY_FOR_YEAR; + + return NULL; + } + + for (j = 1; j < 14; j++) { + if (monthday <= MONTHS_OFFSETS[leap][j]) { + parsed->day = monthday - MONTHS_OFFSETS[leap][j - 1]; + parsed->month = j - 1; + + break; + } + } + + break; + case 4: + // Month and day + parsed->month = monthday / 100; + parsed->day = monthday % 100; + + break; + default: + parsed->error = PARSER_INVALID_MONTH_OR_DAY; + + return NULL; + } + } + + // Checks + if (separators && !monthday && !week) { + parsed->error = PARSER_INVALID_DATE; + + return NULL; + } + + if (parsed->month > 12) { + parsed->error = PARSER_INVALID_MONTH; + + return NULL; + } + + if (parsed->day > DAYS_PER_MONTHS[leap][parsed->month]) { + parsed->error = PARSER_INVALID_DAY_FOR_MONTH; + + return NULL; + } + + separators = 0; + if (*c == 'T' || *c == ' ') { + if (parsed->ambiguous) { + parsed->error = PARSER_INVALID_DATE; + + return NULL; + } + + // We have time so we have a datetime + parsed->is_datetime = 1; + parsed->is_date = 0; + + c++; + + // Grabbing time information + i = 0; + while (*c != '\0' && *c != '.' && *c != ',' && *c != 'Z' && *c != '+' && *c != '-') { + if (*c == ':') { + separators++; + c++; + continue; + } + + if (!(*c >= '0' && *c <='9')) { + parsed->error = PARSER_INVALID_TIME; + + return NULL; + } + + time = 10 * time + *c++ - '0'; + i++; + } + + switch (i) { + case 2: + // Hours only + if (separators > 0) { + // Extraneous separators + parsed->error = PARSER_INVALID_TIME; + + return NULL; + } + + parsed->hour = time; + break; + case 4: + // Hours and minutes + if (separators > 1) { + // Extraneous separators + parsed->error = PARSER_INVALID_TIME; + + return NULL; + } + + parsed->hour = time / 100; + parsed->minute = time % 100; + break; + case 6: + // Hours, minutes and seconds + if (!(separators == 0 || separators == 2)) { + // We should have either two separators or none + parsed->error = PARSER_INVALID_TIME; + + return NULL; + } + + parsed->hour = time / 10000; + parsed->minute = time / 100 % 100; + parsed->second = time % 100; + break; + default: + // Any other case is wrong + parsed->error = PARSER_INVALID_TIME; + + return NULL; + } + + // Checks + if (parsed->hour > 23) { + parsed->error = PARSER_INVALID_HOUR; + + return NULL; + } + + if (parsed->minute > 59) { + parsed->error = PARSER_INVALID_MINUTE; + + return NULL; + } + + if (parsed->second > 59) { + parsed->error = PARSER_INVALID_SECOND; + + return NULL; + } + + // Subsecond + if (*c == '.' || *c == ',') { + c++; + + time = 0; + i = 0; + while (*c != '\0' && *c != 'Z' && *c != '+' && *c != '-') { + if (!(*c >= '0' && *c <='9')) { + parsed->error = PARSER_INVALID_SUBSECOND; + + return NULL; + } + + time = 10 * time + *c++ - '0'; + i++; + } + + // adjust to microseconds + if (i > 6) { + parsed->microsecond = time / pow(10, i - 6); + } else if (i <= 6) { + parsed->microsecond = time * pow(10, 6 - i); + } + } + + // Timezone + if (*c == 'Z') { + parsed->has_offset = 1; + parsed->tzname = "UTC"; + c++; + } else if (*c == '+' || *c == '-') { + tz_sign = 1; + if (*c == '-') { + tz_sign = -1; + } + + parsed->has_offset = 1; + c++; + + i = 0; + time = 0; + separators = 0; + while (*c != '\0') { + if (*c == ':') { + separators++; + c++; + continue; + } + + if (!(*c >= '0' && *c <= '9')) { + parsed->error = PARSER_INVALID_TZ_OFFSET; + + return NULL; + } + + time = 10 * time + *c++ - '0'; + i++; + } + + switch (i) { + case 2: + // hh Format + if (separators) { + // Extraneous separators + parsed->error = PARSER_INVALID_TZ_OFFSET; + + return NULL; + } + + parsed->offset = tz_sign * (time * 3600); + break; + case 4: + // hhmm Format + if (separators > 1) { + // Extraneous separators + parsed->error = PARSER_INVALID_TZ_OFFSET; + + return NULL; + } + + parsed->offset = tz_sign * ((time / 100 * 3600) + (time % 100 * 60)); + break; + default: + // Wrong format + parsed->error = PARSER_INVALID_TZ_OFFSET; + + return NULL; + } + } + } + + // At this point we should be at the end of the string + // If not, the string is invalid + if (*c != '\0') { + parsed->error = PARSER_INVALID_ISO8601; + + return NULL; + } + + return parsed; +} + + +Parsed* _parse_iso8601_duration(char *str, Parsed *parsed) { + char* c; + int value = 0; + int grabbed = 0; + int in_time = 0; + int in_fraction = 0; + int fraction_length = 0; + int has_fractional = 0; + int fraction = 0; + int has_ymd = 0; + int has_week = 0; + int has_month = 0; + int has_day = 0; + int has_hour = 0; + int has_minute = 0; + int has_second = 0; + + c = str; + + // Removing P operator + c++; + + parsed->is_duration = 1; + + for (; *c != '\0'; c++) { + switch (*c) { + case 'Y': + if (!grabbed || in_time || has_week || has_ymd) { + // No value grabbed + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + if (fraction) { + parsed->error = PARSER_INVALID_DURATION_FLOAT_YEAR_MONTH_NOT_SUPPORTED; + + return NULL; + } + + parsed->years = value; + + grabbed = 0; + value = 0; + fraction = 0; + in_fraction = 0; + has_ymd = 1; + + break; + case 'M': + if (!grabbed || has_week) { + // No value grabbed + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + if (in_time) { + if (has_second) { + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + if (has_fractional) { + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + parsed->minutes = value; + if (fraction) { + parsed->seconds = fraction * 6; + has_fractional = 1; + } + + has_minute = 1; + } else { + if (fraction) { + parsed->error = PARSER_INVALID_DURATION_FLOAT_YEAR_MONTH_NOT_SUPPORTED; + + return NULL; + } + + if (has_month || has_day) { + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + parsed->months = value; + has_ymd = 1; + has_month = 1; + } + + grabbed = 0; + value = 0; + fraction = 0; + in_fraction = 0; + + break; + case 'D': + if (!grabbed || in_time || has_week) { + // No value grabbed + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + if (has_day) { + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + parsed->days = value; + if (fraction) { + parsed->hours = fraction * 2.4; + has_fractional = 1; + } + + grabbed = 0; + value = 0; + fraction = 0; + in_fraction = 0; + has_ymd = 1; + has_day = 1; + + break; + case 'T': + if (grabbed) { + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + in_time = 1; + + break; + case 'H': + if (!grabbed || !in_time || has_week) { + // No value grabbed + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + if (has_hour || has_second || has_minute) { + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + if (has_fractional) { + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + parsed->hours = value; + if (fraction) { + parsed->minutes = fraction * 6; + has_fractional = 1; + } + + grabbed = 0; + value = 0; + fraction = 0; + in_fraction = 0; + has_hour = 1; + + break; + case 'S': + if (!grabbed || !in_time || has_week) { + // No value grabbed + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + if (has_second) { + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + if (has_fractional) { + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + if (fraction) { + parsed->seconds = value; + if (fraction_length > 6) { + parsed->microseconds = fraction / pow(10, fraction_length - 6); + } else { + parsed->microseconds = fraction * pow(10, 6 - fraction_length); + } + has_fractional = 1; + } else { + parsed->seconds = value; + } + + grabbed = 0; + value = 0; + fraction = 0; + in_fraction = 0; + has_second = 1; + + break; + case 'W': + if (!grabbed || in_time || has_ymd) { + // No value grabbed + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + parsed->weeks = value; + if (fraction) { + float days; + days = fraction * 0.7; + parsed->hours = (int) ((days - (int) days) * 24); + parsed->days = (int) days; + } + + grabbed = 0; + value = 0; + fraction = 0; + in_fraction = 0; + has_week = 1; + + break; + case '.': + if (!grabbed || has_fractional) { + // No value grabbed + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + in_fraction = 1; + + break; + case ',': + if (!grabbed || has_fractional) { + // No value grabbed + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + + in_fraction = 1; + + break; + default: + if (*c >= '0' && *c <='9') { + if (in_fraction) { + fraction = 10 * fraction + *c - '0'; + fraction_length++; + } else { + value = 10 * value + *c - '0'; + grabbed = 1; + } + break; + } + + parsed->error = PARSER_INVALID_DURATION; + + return NULL; + } + } + + return parsed; +} + + +PyObject* parse_iso8601(PyObject *self, PyObject *args) { + char* str; + PyObject *obj; + PyObject *tzinfo; + Parsed *parsed = new_parsed(); + + if (!PyArg_ParseTuple(args, "s", &str)) { + PyErr_SetString( + PyExc_ValueError, "Invalid parameters" + ); + free(parsed); + return NULL; + } + + if (*str == 'P') { + // Duration (or interval) + if (_parse_iso8601_duration(str, parsed) == NULL) { + PyErr_SetString( + PyExc_ValueError, PARSER_ERRORS[parsed->error] + ); + + free(parsed); + return NULL; + } + } else if (_parse_iso8601_datetime(str, parsed) == NULL) { + PyErr_SetString( + PyExc_ValueError, PARSER_ERRORS[parsed->error] + ); + + free(parsed); + return NULL; + } + + if (parsed->is_date) { + // Date only + if (parsed->ambiguous) { + // We can "safely" assume that the ambiguous + // date was actually a time in the form hhmmss + parsed->hour = parsed->year / 100; + parsed->minute = parsed->year % 100; + parsed->second = parsed->month; + + obj = PyDateTimeAPI->Time_FromTime( + parsed->hour, parsed->minute, parsed->second, parsed->microsecond, + Py_BuildValue(""), + PyDateTimeAPI->TimeType + ); + } else { + obj = PyDateTimeAPI->Date_FromDate( + parsed->year, parsed->month, parsed->day, + PyDateTimeAPI->DateType + ); + } + } else if (parsed->is_datetime) { + if (!parsed->has_offset) { + tzinfo = Py_BuildValue(""); + } else { + tzinfo = new_fixed_offset(parsed->offset, parsed->tzname); + } + + obj = PyDateTimeAPI->DateTime_FromDateAndTime( + parsed->year, + parsed->month, + parsed->day, + parsed->hour, + parsed->minute, + parsed->second, + parsed->microsecond, + tzinfo, + PyDateTimeAPI->DateTimeType + ); + + Py_DECREF(tzinfo); + } else if (parsed->is_duration) { + obj = new_duration( + parsed->years, parsed->months, parsed->weeks, parsed->days, + parsed->hours, parsed->minutes, parsed->seconds, parsed->microseconds + ); + } else { + free(parsed); + return NULL; + } + + free(parsed); + + return obj; +} + + +/* ------------------------------------------------------------------------- */ + +static PyMethodDef helpers_methods[] = { + { + "parse_iso8601", + (PyCFunction) parse_iso8601, + METH_VARARGS, + PyDoc_STR("Parses a ISO8601 string into a tuple.") + }, + {NULL} +}; + + +/* ------------------------------------------------------------------------- */ + +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_iso8601", + NULL, + -1, + helpers_methods, + NULL, + NULL, + NULL, + NULL, +}; + +PyMODINIT_FUNC +PyInit__iso8601(void) +{ + PyObject *module; + + PyDateTime_IMPORT; + + module = PyModule_Create(&moduledef); + + if (module == NULL) + return NULL; + + // FixedOffset declaration + FixedOffset_type.tp_new = PyType_GenericNew; + FixedOffset_type.tp_base = PyDateTimeAPI->TZInfoType; + FixedOffset_type.tp_methods = FixedOffset_methods; + FixedOffset_type.tp_members = FixedOffset_members; + FixedOffset_type.tp_init = (initproc)FixedOffset_init; + + if (PyType_Ready(&FixedOffset_type) < 0) + return NULL; + + // Duration declaration + Duration_type.tp_new = PyType_GenericNew; + Duration_type.tp_members = Duration_members; + Duration_type.tp_init = (initproc)Duration_init; + + if (PyType_Ready(&Duration_type) < 0) + return NULL; + + Py_INCREF(&FixedOffset_type); + Py_INCREF(&Duration_type); + + PyModule_AddObject(module, "TZFixedOffset", (PyObject *)&FixedOffset_type); + PyModule_AddObject(module, "Duration", (PyObject *)&Duration_type); + + return module; +} diff --git a/pendulum/parsing/_iso8601.pyi b/pendulum/parsing/_iso8601.pyi new file mode 100644 index 0000000..b9ce5d4 --- /dev/null +++ b/pendulum/parsing/_iso8601.pyi @@ -0,0 +1,22 @@ +from __future__ import annotations + +from datetime import date +from datetime import datetime +from datetime import time + +class Duration: + + years: int = 0 + months: int = 0 + weeks: int = 0 + days: int = 0 + remaining_days: int = 0 + hours: int = 0 + minutes: int = 0 + seconds: int = 0 + remaining_seconds: int = 0 + microseconds: int = 0 + +def parse_iso8601( + text: str, +) -> datetime | date | time | Duration: ... diff --git a/pendulum/parsing/exceptions/__init__.py b/pendulum/parsing/exceptions/__init__.py index 997b0fa..05195b5 100644 --- a/pendulum/parsing/exceptions/__init__.py +++ b/pendulum/parsing/exceptions/__init__.py @@ -1,3 +1,6 @@ -class ParserError(ValueError):
-
- pass
+from __future__ import annotations + + +class ParserError(ValueError): + + pass diff --git a/pendulum/parsing/iso8601.py b/pendulum/parsing/iso8601.py index 40efa2f..907cf13 100644 --- a/pendulum/parsing/iso8601.py +++ b/pendulum/parsing/iso8601.py @@ -1,447 +1,454 @@ -from __future__ import division
-
-import datetime
-import re
-
-from ..constants import HOURS_PER_DAY
-from ..constants import MINUTES_PER_HOUR
-from ..constants import MONTHS_OFFSETS
-from ..constants import SECONDS_PER_MINUTE
-from ..duration import Duration
-from ..helpers import days_in_year
-from ..helpers import is_leap
-from ..helpers import is_long_year
-from ..helpers import week_day
-from ..tz.timezone import UTC
-from ..tz.timezone import FixedTimezone
-from .exceptions import ParserError
-
-
-ISO8601_DT = re.compile(
- # Date (optional)
- "^"
- "(?P<date>"
- " (?P<classic>" # Classic date (YYYY-MM-DD) or ordinal (YYYY-DDD)
- r" (?P<year>\d{4})" # Year
- " (?P<monthday>"
- r" (?P<monthsep>-)?(?P<month>\d{2})" # Month (optional)
- r" ((?P<daysep>-)?(?P<day>\d{1,2}))?" # Day (optional)
- " )?"
- " )"
- " |"
- " (?P<isocalendar>" # Calendar date (2016-W05 or 2016-W05-5)
- r" (?P<isoyear>\d{4})" # Year
- " (?P<weeksep>-)?" # Separator (optional)
- " W" # W separator
- r" (?P<isoweek>\d{2})" # Week number
- " (?P<weekdaysep>-)?" # Separator (optional)
- r" (?P<isoweekday>\d)?" # Weekday (optional)
- " )"
- ")?"
- # Time (optional)
- "(?P<time>"
- r" (?P<timesep>[T\ ])?" # Separator (T or space)
- r" (?P<hour>\d{1,2})(?P<minsep>:)?(?P<minute>\d{1,2})?(?P<secsep>:)?(?P<second>\d{1,2})?" # HH:mm:ss (optional mm and ss)
- # Subsecond part (optional)
- " (?P<subsecondsection>"
- " (?:[.,])" # Subsecond separator (optional)
- r" (?P<subsecond>\d{1,9})" # Subsecond
- " )?"
- # Timezone offset
- " (?P<tz>"
- r" (?:[-+])\d{2}:?(?:\d{2})?|Z" # Offset (+HH:mm or +HHmm or +HH or Z)
- " )?"
- ")?"
- "$",
- re.VERBOSE,
-)
-
-
-ISO8601_DURATION = re.compile(
- "^P" # Duration P indicator
- # Years, months and days (optional)
- "(?P<w>"
- r" (?P<weeks>\d+(?:[.,]\d+)?W)"
- ")?"
- "(?P<ymd>"
- r" (?P<years>\d+(?:[.,]\d+)?Y)?"
- r" (?P<months>\d+(?:[.,]\d+)?M)?"
- r" (?P<days>\d+(?:[.,]\d+)?D)?"
- ")?"
- "(?P<hms>"
- " (?P<timesep>T)" # Separator (T)
- r" (?P<hours>\d+(?:[.,]\d+)?H)?"
- r" (?P<minutes>\d+(?:[.,]\d+)?M)?"
- r" (?P<seconds>\d+(?:[.,]\d+)?S)?"
- ")?"
- "$",
- re.VERBOSE,
-)
-
-
-def parse_iso8601(text):
- """
- ISO 8601 compliant parser.
-
- :param text: The string to parse
- :type text: str
-
- :rtype: datetime.datetime or datetime.time or datetime.date
- """
- parsed = _parse_iso8601_duration(text)
- if parsed is not None:
- return parsed
-
- m = ISO8601_DT.match(text)
- if not m:
- raise ParserError("Invalid ISO 8601 string")
-
- ambiguous_date = False
- is_date = False
- is_time = False
- year = 0
- month = 1
- day = 1
- minute = 0
- second = 0
- microsecond = 0
- tzinfo = None
-
- if m:
- if m.group("date"):
- # A date has been specified
- is_date = True
-
- if m.group("isocalendar"):
- # We have a ISO 8601 string defined
- # by week number
- if (
- m.group("weeksep")
- and not m.group("weekdaysep")
- and m.group("isoweekday")
- ):
- raise ParserError("Invalid date string: {}".format(text))
-
- if not m.group("weeksep") and m.group("weekdaysep"):
- raise ParserError("Invalid date string: {}".format(text))
-
- try:
- date = _get_iso_8601_week(
- m.group("isoyear"), m.group("isoweek"), m.group("isoweekday")
- )
- except ParserError:
- raise
- except ValueError:
- raise ParserError("Invalid date string: {}".format(text))
-
- year = date["year"]
- month = date["month"]
- day = date["day"]
- else:
- # We have a classic date representation
- year = int(m.group("year"))
-
- if not m.group("monthday"):
- # No month and day
- month = 1
- day = 1
- else:
- if m.group("month") and m.group("day"):
- # Month and day
- if not m.group("daysep") and len(m.group("day")) == 1:
- # Ordinal day
- ordinal = int(m.group("month") + m.group("day"))
- leap = is_leap(year)
- months_offsets = MONTHS_OFFSETS[leap]
-
- if ordinal > months_offsets[13]:
- raise ParserError("Ordinal day is out of range")
-
- for i in range(1, 14):
- if ordinal <= months_offsets[i]:
- day = ordinal - months_offsets[i - 1]
- month = i - 1
-
- break
- else:
- month = int(m.group("month"))
- day = int(m.group("day"))
- else:
- # Only month
- if not m.group("monthsep"):
- # The date looks like 201207
- # which is invalid for a date
- # But it might be a time in the form hhmmss
- ambiguous_date = True
-
- month = int(m.group("month"))
- day = 1
-
- if not m.group("time"):
- # No time has been specified
- if ambiguous_date:
- # We can "safely" assume that the ambiguous date
- # was actually a time in the form hhmmss
- hhmmss = "{}{:0>2}".format(str(year), str(month))
-
- return datetime.time(int(hhmmss[:2]), int(hhmmss[2:4]), int(hhmmss[4:]))
-
- return datetime.date(year, month, day)
-
- if ambiguous_date:
- raise ParserError("Invalid date string: {}".format(text))
-
- if is_date and not m.group("timesep"):
- raise ParserError("Invalid date string: {}".format(text))
-
- if not is_date:
- is_time = True
-
- # Grabbing hh:mm:ss
- hour = int(m.group("hour"))
- minsep = m.group("minsep")
-
- if m.group("minute"):
- minute = int(m.group("minute"))
- elif minsep:
- raise ParserError("Invalid ISO 8601 time part")
-
- secsep = m.group("secsep")
- if secsep and not minsep and m.group("minute"):
- # minute/second separator but no hour/minute separator
- raise ParserError("Invalid ISO 8601 time part")
-
- if m.group("second"):
- if not secsep and minsep:
- # No minute/second separator but hour/minute separator
- raise ParserError("Invalid ISO 8601 time part")
-
- second = int(m.group("second"))
- elif secsep:
- raise ParserError("Invalid ISO 8601 time part")
-
- # Grabbing subseconds, if any
- if m.group("subsecondsection"):
- # Limiting to 6 chars
- subsecond = m.group("subsecond")[:6]
-
- microsecond = int("{:0<6}".format(subsecond))
-
- # Grabbing timezone, if any
- tz = m.group("tz")
- if tz:
- if tz == "Z":
- tzinfo = UTC
- else:
- negative = True if tz.startswith("-") else False
- tz = tz[1:]
- if ":" not in tz:
- if len(tz) == 2:
- tz = "{}00".format(tz)
-
- off_hour = tz[0:2]
- off_minute = tz[2:4]
- else:
- off_hour, off_minute = tz.split(":")
-
- offset = ((int(off_hour) * 60) + int(off_minute)) * 60
-
- if negative:
- offset = -1 * offset
-
- tzinfo = FixedTimezone(offset)
-
- if is_time:
- return datetime.time(hour, minute, second, microsecond)
-
- return datetime.datetime(
- year, month, day, hour, minute, second, microsecond, tzinfo=tzinfo
- )
-
-
-def _parse_iso8601_duration(text, **options):
- m = ISO8601_DURATION.match(text)
- if not m:
- return
-
- years = 0
- months = 0
- weeks = 0
- days = 0
- hours = 0
- minutes = 0
- seconds = 0
- microseconds = 0
- fractional = False
-
- if m.group("w"):
- # Weeks
- if m.group("ymd") or m.group("hms"):
- # Specifying anything more than weeks is not supported
- raise ParserError("Invalid duration string")
-
- _weeks = m.group("weeks")
- if not _weeks:
- raise ParserError("Invalid duration string")
-
- _weeks = _weeks.replace(",", ".").replace("W", "")
- if "." in _weeks:
- _weeks, portion = _weeks.split(".")
- weeks = int(_weeks)
- _days = int(portion) / 10 * 7
- days, hours = int(_days // 1), _days % 1 * HOURS_PER_DAY
- else:
- weeks = int(_weeks)
-
- if m.group("ymd"):
- # Years, months and/or days
- _years = m.group("years")
- _months = m.group("months")
- _days = m.group("days")
-
- # Checking order
- years_start = m.start("years") if _years else -3
- months_start = m.start("months") if _months else years_start + 1
- days_start = m.start("days") if _days else months_start + 1
-
- # Check correct order
- if not (years_start < months_start < days_start):
- raise ParserError("Invalid duration")
-
- if _years:
- _years = _years.replace(",", ".").replace("Y", "")
- if "." in _years:
- raise ParserError("Float years in duration are not supported")
- else:
- years = int(_years)
-
- if _months:
- if fractional:
- raise ParserError("Invalid duration")
-
- _months = _months.replace(",", ".").replace("M", "")
- if "." in _months:
- raise ParserError("Float months in duration are not supported")
- else:
- months = int(_months)
-
- if _days:
- if fractional:
- raise ParserError("Invalid duration")
-
- _days = _days.replace(",", ".").replace("D", "")
-
- if "." in _days:
- fractional = True
-
- _days, _hours = _days.split(".")
- days = int(_days)
- hours = int(_hours) / 10 * HOURS_PER_DAY
- else:
- days = int(_days)
-
- if m.group("hms"):
- # Hours, minutes and/or seconds
- _hours = m.group("hours") or 0
- _minutes = m.group("minutes") or 0
- _seconds = m.group("seconds") or 0
-
- # Checking order
- hours_start = m.start("hours") if _hours else -3
- minutes_start = m.start("minutes") if _minutes else hours_start + 1
- seconds_start = m.start("seconds") if _seconds else minutes_start + 1
-
- # Check correct order
- if not (hours_start < minutes_start < seconds_start):
- raise ParserError("Invalid duration")
-
- if _hours:
- if fractional:
- raise ParserError("Invalid duration")
-
- _hours = _hours.replace(",", ".").replace("H", "")
-
- if "." in _hours:
- fractional = True
-
- _hours, _mins = _hours.split(".")
- hours += int(_hours)
- minutes += int(_mins) / 10 * MINUTES_PER_HOUR
- else:
- hours += int(_hours)
-
- if _minutes:
- if fractional:
- raise ParserError("Invalid duration")
-
- _minutes = _minutes.replace(",", ".").replace("M", "")
-
- if "." in _minutes:
- fractional = True
-
- _minutes, _secs = _minutes.split(".")
- minutes += int(_minutes)
- seconds += int(_secs) / 10 * SECONDS_PER_MINUTE
- else:
- minutes += int(_minutes)
-
- if _seconds:
- if fractional:
- raise ParserError("Invalid duration")
-
- _seconds = _seconds.replace(",", ".").replace("S", "")
-
- if "." in _seconds:
- _seconds, _microseconds = _seconds.split(".")
- seconds += int(_seconds)
- microseconds += int("{:0<6}".format(_microseconds[:6]))
- else:
- seconds += int(_seconds)
-
- return Duration(
- years=years,
- months=months,
- weeks=weeks,
- days=days,
- hours=hours,
- minutes=minutes,
- seconds=seconds,
- microseconds=microseconds,
- )
-
-
-def _get_iso_8601_week(year, week, weekday):
- if not weekday:
- weekday = 1
- else:
- weekday = int(weekday)
-
- year = int(year)
- week = int(week)
-
- if week > 53 or week > 52 and not is_long_year(year):
- raise ParserError("Invalid week for week date")
-
- if weekday > 7:
- raise ParserError("Invalid weekday for week date")
-
- # We can't rely on strptime directly here since
- # it does not support ISO week date
- ordinal = week * 7 + weekday - (week_day(year, 1, 4) + 3)
-
- if ordinal < 1:
- # Previous year
- ordinal += days_in_year(year - 1)
- year -= 1
-
- if ordinal > days_in_year(year):
- # Next year
- ordinal -= days_in_year(year)
- year += 1
-
- fmt = "%Y-%j"
- string = "{}-{}".format(year, ordinal)
-
- dt = datetime.datetime.strptime(string, fmt)
-
- return {"year": dt.year, "month": dt.month, "day": dt.day}
+from __future__ import annotations + +import datetime +import re + +from typing import cast + +from pendulum.constants import HOURS_PER_DAY +from pendulum.constants import MINUTES_PER_HOUR +from pendulum.constants import MONTHS_OFFSETS +from pendulum.constants import SECONDS_PER_MINUTE +from pendulum.duration import Duration +from pendulum.helpers import days_in_year +from pendulum.helpers import is_leap +from pendulum.helpers import is_long_year +from pendulum.helpers import week_day +from pendulum.parsing.exceptions import ParserError +from pendulum.tz.timezone import UTC +from pendulum.tz.timezone import FixedTimezone + +ISO8601_DT = re.compile( + # Date (optional) # noqa: E800 + "^" + "(?P<date>" + " (?P<classic>" # Classic date (YYYY-MM-DD) or ordinal (YYYY-DDD) + r" (?P<year>\d{4})" # Year + " (?P<monthday>" + r" (?P<monthsep>-)?(?P<month>\d{2})" # Month (optional) + r" ((?P<daysep>-)?(?P<day>\d{1,2}))?" # Day (optional) + " )?" + " )" + " |" + " (?P<isocalendar>" # Calendar date (2016-W05 or 2016-W05-5) + r" (?P<isoyear>\d{4})" # Year + " (?P<weeksep>-)?" # Separator (optional) + " W" # W separator + r" (?P<isoweek>\d{2})" # Week number + " (?P<weekdaysep>-)?" # Separator (optional) + r" (?P<isoweekday>\d)?" # Weekday (optional) + " )" + ")?" + # Time (optional) # noqa: E800 + "(?P<time>" + r" (?P<timesep>[T\ ])?" # Separator (T or space) + r" (?P<hour>\d{1,2})(?P<minsep>:)?(?P<minute>\d{1,2})?(?P<secsep>:)?(?P<second>\d{1,2})?" # HH:mm:ss (optional mm and ss) + # Subsecond part (optional) + " (?P<subsecondsection>" + " (?:[.,])" # Subsecond separator (optional) + r" (?P<subsecond>\d{1,9})" # Subsecond + " )?" + # Timezone offset + " (?P<tz>" + r" (?:[-+])\d{2}:?(?:\d{2})?|Z" # Offset (+HH:mm or +HHmm or +HH or Z) + " )?" + ")?" + "$", + re.VERBOSE, +) + +ISO8601_DURATION = re.compile( + "^P" # Duration P indicator + # Years, months and days (optional) # noqa: E800 + "(?P<w>" + r" (?P<weeks>\d+(?:[.,]\d+)?W)" + ")?" + "(?P<ymd>" + r" (?P<years>\d+(?:[.,]\d+)?Y)?" + r" (?P<months>\d+(?:[.,]\d+)?M)?" + r" (?P<days>\d+(?:[.,]\d+)?D)?" + ")?" + "(?P<hms>" + " (?P<timesep>T)" # Separator (T) + r" (?P<hours>\d+(?:[.,]\d+)?H)?" + r" (?P<minutes>\d+(?:[.,]\d+)?M)?" + r" (?P<seconds>\d+(?:[.,]\d+)?S)?" + ")?" + "$", + re.VERBOSE, +) + + +def parse_iso8601( + text: str, +) -> datetime.datetime | datetime.date | datetime.time | Duration: + """ + ISO 8601 compliant parser. + + :param text: The string to parse + :type text: str + + :rtype: datetime.datetime or datetime.time or datetime.date + """ + parsed = _parse_iso8601_duration(text) + if parsed is not None: + return parsed + + m = ISO8601_DT.match(text) + if not m: + raise ParserError("Invalid ISO 8601 string") + + ambiguous_date = False + is_date = False + is_time = False + year = 0 + month = 1 + day = 1 + minute = 0 + second = 0 + microsecond = 0 + tzinfo: FixedTimezone | None = None + + if m.group("date"): + # A date has been specified + is_date = True + + if m.group("isocalendar"): + # We have a ISO 8601 string defined + # by week number + if ( + m.group("weeksep") + and not m.group("weekdaysep") + and m.group("isoweekday") + ): + raise ParserError(f"Invalid date string: {text}") + + if not m.group("weeksep") and m.group("weekdaysep"): + raise ParserError(f"Invalid date string: {text}") + + try: + date = _get_iso_8601_week( + m.group("isoyear"), m.group("isoweek"), m.group("isoweekday") + ) + except ParserError: + raise + except ValueError: + raise ParserError(f"Invalid date string: {text}") + + year = date["year"] + month = date["month"] + day = date["day"] + else: + # We have a classic date representation + year = int(m.group("year")) + + if not m.group("monthday"): + # No month and day + month = 1 + day = 1 + else: + if m.group("month") and m.group("day"): + # Month and day + if not m.group("daysep") and len(m.group("day")) == 1: + # Ordinal day + ordinal = int(m.group("month") + m.group("day")) + leap = is_leap(year) + months_offsets = MONTHS_OFFSETS[leap] + + if ordinal > months_offsets[13]: + raise ParserError("Ordinal day is out of range") + + for i in range(1, 14): + if ordinal <= months_offsets[i]: + day = ordinal - months_offsets[i - 1] + month = i - 1 + + break + else: + month = int(m.group("month")) + day = int(m.group("day")) + else: + # Only month + if not m.group("monthsep"): + # The date looks like 201207 + # which is invalid for a date + # But it might be a time in the form hhmmss + ambiguous_date = True + + month = int(m.group("month")) + day = 1 + + if not m.group("time"): + # No time has been specified + if ambiguous_date: + # We can "safely" assume that the ambiguous date + # was actually a time in the form hhmmss + hhmmss = f"{str(year)}{str(month):0>2}" + + return datetime.time(int(hhmmss[:2]), int(hhmmss[2:4]), int(hhmmss[4:])) + + return datetime.date(year, month, day) + + if ambiguous_date: + raise ParserError(f"Invalid date string: {text}") + + if is_date and not m.group("timesep"): + raise ParserError(f"Invalid date string: {text}") + + if not is_date: + is_time = True + + # Grabbing hh:mm:ss + hour = int(m.group("hour")) + minsep = m.group("minsep") + + if m.group("minute"): + minute = int(m.group("minute")) + elif minsep: + raise ParserError("Invalid ISO 8601 time part") + + secsep = m.group("secsep") + if secsep and not minsep and m.group("minute"): + # minute/second separator but no hour/minute separator + raise ParserError("Invalid ISO 8601 time part") + + if m.group("second"): + if not secsep and minsep: + # No minute/second separator but hour/minute separator + raise ParserError("Invalid ISO 8601 time part") + + second = int(m.group("second")) + elif secsep: + raise ParserError("Invalid ISO 8601 time part") + + # Grabbing subseconds, if any + if m.group("subsecondsection"): + # Limiting to 6 chars + subsecond = m.group("subsecond")[:6] + + microsecond = int(f"{subsecond:0<6}") + + # Grabbing timezone, if any + tz = m.group("tz") + if tz: + if tz == "Z": + tzinfo = UTC + else: + negative = bool(tz.startswith("-")) + tz = tz[1:] + if ":" not in tz: + if len(tz) == 2: + tz = f"{tz}00" + + off_hour = tz[0:2] + off_minute = tz[2:4] + else: + off_hour, off_minute = tz.split(":") + + offset = ((int(off_hour) * 60) + int(off_minute)) * 60 + + if negative: + offset = -1 * offset + + tzinfo = FixedTimezone(offset) + + if is_time: + return datetime.time(hour, minute, second, microsecond) + + return datetime.datetime( + year, month, day, hour, minute, second, microsecond, tzinfo=tzinfo + ) + + +def _parse_iso8601_duration(text: str, **options: str) -> Duration | None: + m = ISO8601_DURATION.match(text) + if not m: + return None + + years = 0 + months = 0 + weeks = 0 + days: int | float = 0 + hours: int | float = 0 + minutes: int | float = 0 + seconds: int | float = 0 + microseconds: int | float = 0 + fractional = False + + _days: str | float + _hour: str | int | None + _minutes: str | int | None + _seconds: str | int | None + if m.group("w"): + # Weeks + if m.group("ymd") or m.group("hms"): + # Specifying anything more than weeks is not supported + raise ParserError("Invalid duration string") + + _weeks = m.group("weeks") + if not _weeks: + raise ParserError("Invalid duration string") + + _weeks = _weeks.replace(",", ".").replace("W", "") + if "." in _weeks: + _weeks, portion = _weeks.split(".") + weeks = int(_weeks) + _days = int(portion) / 10 * 7 + days, hours = int(_days // 1), int(_days % 1 * HOURS_PER_DAY) + else: + weeks = int(_weeks) + + if m.group("ymd"): + # Years, months and/or days + _years = m.group("years") + _months = m.group("months") + _days = m.group("days") + + # Checking order + years_start = m.start("years") if _years else -3 + months_start = m.start("months") if _months else years_start + 1 + days_start = m.start("days") if _days else months_start + 1 + + # Check correct order + if not (years_start < months_start < days_start): + raise ParserError("Invalid duration") + + if _years: + _years = _years.replace(",", ".").replace("Y", "") + if "." in _years: + raise ParserError("Float years in duration are not supported") + else: + years = int(_years) + + if _months: + if fractional: + raise ParserError("Invalid duration") + + _months = _months.replace(",", ".").replace("M", "") + if "." in _months: + raise ParserError("Float months in duration are not supported") + else: + months = int(_months) + + if _days: + if fractional: + raise ParserError("Invalid duration") + + _days = _days.replace(",", ".").replace("D", "") + + if "." in _days: + fractional = True + + _days, _hours = _days.split(".") + days = int(_days) + hours = int(_hours) / 10 * HOURS_PER_DAY + else: + days = int(_days) + + if m.group("hms"): + # Hours, minutes and/or seconds + _hours = m.group("hours") or 0 + _minutes = m.group("minutes") or 0 + _seconds = m.group("seconds") or 0 + + # Checking order + hours_start = m.start("hours") if _hours else -3 + minutes_start = m.start("minutes") if _minutes else hours_start + 1 + seconds_start = m.start("seconds") if _seconds else minutes_start + 1 + + # Check correct order + if not (hours_start < minutes_start < seconds_start): + raise ParserError("Invalid duration") + + if _hours: + if fractional: + raise ParserError("Invalid duration") + + _hours = cast(str, _hours).replace(",", ".").replace("H", "") + + if "." in _hours: + fractional = True + + _hours, _mins = _hours.split(".") + hours += int(_hours) + minutes += int(_mins) / 10 * MINUTES_PER_HOUR + else: + hours += int(_hours) + + if _minutes: + if fractional: + raise ParserError("Invalid duration") + + _minutes = cast(str, _minutes).replace(",", ".").replace("M", "") + + if "." in _minutes: + fractional = True + + _minutes, _secs = _minutes.split(".") + minutes += int(_minutes) + seconds += int(_secs) / 10 * SECONDS_PER_MINUTE + else: + minutes += int(_minutes) + + if _seconds: + if fractional: + raise ParserError("Invalid duration") + + _seconds = cast(str, _seconds).replace(",", ".").replace("S", "") + + if "." in _seconds: + _seconds, _microseconds = _seconds.split(".") + seconds += int(_seconds) + microseconds += int(f"{_microseconds[:6]:0<6}") + else: + seconds += int(_seconds) + + return Duration( + years=years, + months=months, + weeks=weeks, + days=days, + hours=hours, + minutes=minutes, + seconds=seconds, + microseconds=microseconds, + ) + + +def _get_iso_8601_week( + year: int | str, week: int | str, weekday: int | str +) -> dict[str, int]: + if not weekday: + weekday = 1 + else: + weekday = int(weekday) + + year = int(year) + week = int(week) + + if week > 53 or week > 52 and not is_long_year(year): + raise ParserError("Invalid week for week date") + + if weekday > 7: + raise ParserError("Invalid weekday for week date") + + # We can't rely on strptime directly here since + # it does not support ISO week date + ordinal = week * 7 + weekday - (week_day(year, 1, 4) + 3) + + if ordinal < 1: + # Previous year + ordinal += days_in_year(year - 1) + year -= 1 + + if ordinal > days_in_year(year): + # Next year + ordinal -= days_in_year(year) + year += 1 + + fmt = "%Y-%j" + string = f"{year}-{ordinal}" + + dt = datetime.datetime.strptime(string, fmt) + + return {"year": dt.year, "month": dt.month, "day": dt.day} |