summaryrefslogtreecommitdiffstats
path: root/pendulum/parsing
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2021-01-30 08:13:47 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2021-01-30 08:13:47 +0000
commit1199780155f666b6806d563a29d093a251664009 (patch)
tree68716d9c1ee3205f474a04d74d5653eddf94a9f2 /pendulum/parsing
parentInitial commit. (diff)
downloadpendulum-1199780155f666b6806d563a29d093a251664009.tar.xz
pendulum-1199780155f666b6806d563a29d093a251664009.zip
Adding upstream version 2.1.2.upstream/2.1.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'pendulum/parsing')
-rw-r--r--pendulum/parsing/__init__.py234
-rw-r--r--pendulum/parsing/_iso8601.c1371
-rw-r--r--pendulum/parsing/exceptions/__init__.py3
-rw-r--r--pendulum/parsing/iso8601.py447
4 files changed, 2055 insertions, 0 deletions
diff --git a/pendulum/parsing/__init__.py b/pendulum/parsing/__init__.py
new file mode 100644
index 0000000..400f119
--- /dev/null
+++ b/pendulum/parsing/__init__.py
@@ -0,0 +1,234 @@
+import copy
+import os
+import re
+import struct
+
+from datetime import date
+from datetime import datetime
+from datetime import time
+
+from dateutil import parser
+
+from .exceptions import ParserError
+
+
+with_extensions = os.getenv("PENDULUM_EXTENSIONS", "1") == "1"
+
+try:
+ if not with_extensions or struct.calcsize("P") == 4:
+ raise ImportError()
+
+ from ._iso8601 import parse_iso8601
+except ImportError:
+ from .iso8601 import parse_iso8601
+
+
+COMMON = re.compile(
+ # Date (optional)
+ "^"
+ "(?P<date>"
+ " (?P<classic>" # Classic date (YYYY-MM-DD)
+ r" (?P<year>\d{4})" # Year
+ " (?P<monthday>"
+ r" (?P<monthsep>[/:])?(?P<month>\d{2})" # Month (optional)
+ r" ((?P<daysep>[/:])?(?P<day>\d{2}))" # Day (optional)
+ " )?"
+ " )"
+ ")?"
+ # Time (optional)
+ "(?P<time>"
+ r" (?P<timesep>\ )?" # Separator (space)
+ r" (?P<hour>\d{1,2}):(?P<minute>\d{1,2})?(?::(?P<second>\d{1,2}))?" # HH:mm:ss (optional mm and ss)
+ # Subsecond part (optional)
+ " (?P<subsecondsection>"
+ " (?:[.|,])" # Subsecond separator (optional)
+ r" (?P<subsecond>\d{1,9})" # Subsecond
+ " )?"
+ ")?"
+ "$",
+ re.VERBOSE,
+)
+
+
+DEFAULT_OPTIONS = {
+ "day_first": False,
+ "year_first": True,
+ "strict": True,
+ "exact": False,
+ "now": None,
+}
+
+
+def parse(text, **options):
+ """
+ Parses a string with the given options.
+
+ :param text: The string to parse.
+ :type text: str
+
+ :rtype: Parsed
+ """
+ _options = copy.copy(DEFAULT_OPTIONS)
+ _options.update(options)
+
+ return _normalize(_parse(text, **_options), **_options)
+
+
+def _normalize(parsed, **options):
+ """
+ Normalizes the parsed element.
+
+ :param parsed: The parsed elements.
+ :type parsed: Parsed
+
+ :rtype: Parsed
+ """
+ if options.get("exact"):
+ return parsed
+
+ if isinstance(parsed, time):
+ now = options["now"] or datetime.now()
+
+ return datetime(
+ now.year,
+ now.month,
+ now.day,
+ parsed.hour,
+ parsed.minute,
+ parsed.second,
+ parsed.microsecond,
+ )
+ elif isinstance(parsed, date) and not isinstance(parsed, datetime):
+ return datetime(parsed.year, parsed.month, parsed.day)
+
+ return parsed
+
+
+def _parse(text, **options):
+ # Trying to parse ISO8601
+ try:
+ return parse_iso8601(text)
+ except ValueError:
+ pass
+
+ try:
+ return _parse_iso8601_interval(text)
+ except ValueError:
+ pass
+
+ try:
+ return _parse_common(text, **options)
+ except ParserError:
+ pass
+
+ # We couldn't parse the string
+ # so we fallback on the dateutil parser
+ # If not strict
+ if options.get("strict", True):
+ raise ParserError("Unable to parse string [{}]".format(text))
+
+ try:
+ dt = parser.parse(
+ text, dayfirst=options["day_first"], yearfirst=options["year_first"]
+ )
+ except ValueError:
+ raise ParserError("Invalid date string: {}".format(text))
+
+ return dt
+
+
+def _parse_common(text, **options):
+ """
+ Tries to parse the string as a common datetime format.
+
+ :param text: The string to parse.
+ :type text: str
+
+ :rtype: dict or None
+ """
+ m = COMMON.match(text)
+ has_date = False
+ year = 0
+ month = 1
+ day = 1
+
+ if not m:
+ raise ParserError("Invalid datetime string")
+
+ if m.group("date"):
+ # A date has been specified
+ has_date = True
+
+ year = int(m.group("year"))
+
+ if not m.group("monthday"):
+ # No month and day
+ month = 1
+ day = 1
+ else:
+ if options["day_first"]:
+ month = int(m.group("day"))
+ day = int(m.group("month"))
+ else:
+ month = int(m.group("month"))
+ day = int(m.group("day"))
+
+ if not m.group("time"):
+ return date(year, month, day)
+
+ # Grabbing hh:mm:ss
+ hour = int(m.group("hour"))
+
+ minute = int(m.group("minute"))
+
+ if m.group("second"):
+ second = int(m.group("second"))
+ else:
+ second = 0
+
+ # Grabbing subseconds, if any
+ microsecond = 0
+ if m.group("subsecondsection"):
+ # Limiting to 6 chars
+ subsecond = m.group("subsecond")[:6]
+
+ microsecond = int("{:0<6}".format(subsecond))
+
+ if has_date:
+ return datetime(year, month, day, hour, minute, second, microsecond)
+
+ return time(hour, minute, second, microsecond)
+
+
+class _Interval:
+ """
+ Special class to handle ISO 8601 intervals
+ """
+
+ def __init__(self, start=None, end=None, duration=None):
+ self.start = start
+ self.end = end
+ self.duration = duration
+
+
+def _parse_iso8601_interval(text):
+ if "/" not in text:
+ raise ParserError("Invalid interval")
+
+ first, last = text.split("/")
+ start = end = duration = None
+
+ if first[0] == "P":
+ # duration/end
+ duration = parse_iso8601(first)
+ end = parse_iso8601(last)
+ elif last[0] == "P":
+ # start/duration
+ start = parse_iso8601(first)
+ duration = parse_iso8601(last)
+ else:
+ # start/end
+ start = parse_iso8601(first)
+ end = parse_iso8601(last)
+
+ return _Interval(start, end, duration)
diff --git a/pendulum/parsing/_iso8601.c b/pendulum/parsing/_iso8601.c
new file mode 100644
index 0000000..2e14e4b
--- /dev/null
+++ b/pendulum/parsing/_iso8601.c
@@ -0,0 +1,1371 @@
+/* ------------------------------------------------------------------------- */
+
+#include <Python.h>
+#include <datetime.h>
+#include <structmember.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#ifndef PyVarObject_HEAD_INIT
+#define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
+#endif
+
+
+/* ------------------------------------------------------------------------- */
+
+#define EPOCH_YEAR 1970
+
+#define DAYS_PER_N_YEAR 365
+#define DAYS_PER_L_YEAR 366
+
+#define USECS_PER_SEC 1000000
+
+#define SECS_PER_MIN 60
+#define SECS_PER_HOUR (60 * SECS_PER_MIN)
+#define SECS_PER_DAY (SECS_PER_HOUR * 24)
+
+// 400-year chunks always have 146097 days (20871 weeks).
+#define DAYS_PER_400_YEARS 146097L
+#define SECS_PER_400_YEARS ((int64_t)DAYS_PER_400_YEARS * (int64_t)SECS_PER_DAY)
+
+// The number of seconds in an aligned 100-year chunk, for those that
+// do not begin with a leap year and those that do respectively.
+const int64_t SECS_PER_100_YEARS[2] = {
+ (uint64_t)(76L * DAYS_PER_N_YEAR + 24L * DAYS_PER_L_YEAR) * SECS_PER_DAY,
+ (uint64_t)(75L * DAYS_PER_N_YEAR + 25L * DAYS_PER_L_YEAR) * SECS_PER_DAY
+};
+
+// The number of seconds in an aligned 4-year chunk, for those that
+// do not begin with a leap year and those that do respectively.
+const int32_t SECS_PER_4_YEARS[2] = {
+ (4 * DAYS_PER_N_YEAR + 0 * DAYS_PER_L_YEAR) * SECS_PER_DAY,
+ (3 * DAYS_PER_N_YEAR + 1 * DAYS_PER_L_YEAR) * SECS_PER_DAY
+};
+
+// The number of seconds in non-leap and leap years respectively.
+const int32_t SECS_PER_YEAR[2] = {
+ DAYS_PER_N_YEAR * SECS_PER_DAY,
+ DAYS_PER_L_YEAR * SECS_PER_DAY
+};
+
+#define MONTHS_PER_YEAR 12
+
+// The month lengths in non-leap and leap years respectively.
+const int32_t DAYS_PER_MONTHS[2][13] = {
+ {-1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
+ {-1, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}
+};
+
+// The day offsets of the beginning of each (1-based) month in non-leap
+// and leap years respectively.
+// For example, in a leap year there are 335 days before December.
+const int32_t MONTHS_OFFSETS[2][14] = {
+ {-1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
+ {-1, 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
+};
+
+const int DAY_OF_WEEK_TABLE[12] = {
+ 0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4
+};
+
+#define TM_SUNDAY 0
+#define TM_MONDAY 1
+#define TM_TUESDAY 2
+#define TM_WEDNESDAY 3
+#define TM_THURSDAY 4
+#define TM_FRIDAY 5
+#define TM_SATURDAY 6
+
+#define TM_JANUARY 0
+#define TM_FEBRUARY 1
+#define TM_MARCH 2
+#define TM_APRIL 3
+#define TM_MAY 4
+#define TM_JUNE 5
+#define TM_JULY 6
+#define TM_AUGUST 7
+#define TM_SEPTEMBER 8
+#define TM_OCTOBER 9
+#define TM_NOVEMBER 10
+#define TM_DECEMBER 11
+
+// Parsing errors
+const int PARSER_INVALID_ISO8601 = 0;
+const int PARSER_INVALID_DATE = 1;
+const int PARSER_INVALID_TIME = 2;
+const int PARSER_INVALID_WEEK_DATE = 3;
+const int PARSER_INVALID_WEEK_NUMBER = 4;
+const int PARSER_INVALID_WEEKDAY_NUMBER = 5;
+const int PARSER_INVALID_ORDINAL_DAY_FOR_YEAR = 6;
+const int PARSER_INVALID_MONTH_OR_DAY = 7;
+const int PARSER_INVALID_MONTH = 8;
+const int PARSER_INVALID_DAY_FOR_MONTH = 9;
+const int PARSER_INVALID_HOUR = 10;
+const int PARSER_INVALID_MINUTE = 11;
+const int PARSER_INVALID_SECOND = 12;
+const int PARSER_INVALID_SUBSECOND = 13;
+const int PARSER_INVALID_TZ_OFFSET = 14;
+const int PARSER_INVALID_DURATION = 15;
+const int PARSER_INVALID_DURATION_FLOAT_YEAR_MONTH_NOT_SUPPORTED = 16;
+
+const char PARSER_ERRORS[17][80] = {
+ "Invalid ISO 8601 string",
+ "Invalid date",
+ "Invalid time",
+ "Invalid week date",
+ "Invalid week number",
+ "Invalid weekday number",
+ "Invalid ordinal day for year",
+ "Invalid month and/or day",
+ "Invalid month",
+ "Invalid day for month",
+ "Invalid hour",
+ "Invalid minute",
+ "Invalid second",
+ "Invalid subsecond",
+ "Invalid timezone offset",
+ "Invalid duration",
+ "Float years and months are not supported"
+};
+
+/* ------------------------------------------------------------------------- */
+
+
+int p(int y) {
+ return y + y/4 - y/100 + y/400;
+}
+
+int is_leap(int year) {
+ return year % 4 == 0 && (year % 100 != 0 || year % 400 == 0);
+}
+
+int week_day(int year, int month, int day) {
+ int y;
+ int w;
+
+ y = year - (month < 3);
+
+ w = (p(y) + DAY_OF_WEEK_TABLE[month - 1] + day) % 7;
+
+ if (!w) {
+ w = 7;
+ }
+
+ return w;
+}
+
+int days_in_year(int year) {
+ if (is_leap(year)) {
+ return DAYS_PER_L_YEAR;
+ }
+
+ return DAYS_PER_N_YEAR;
+}
+
+int is_long_year(int year) {
+ return (p(year) % 7 == 4) || (p(year - 1) % 7 == 3);
+}
+
+
+/* ------------------------ Custom Types ------------------------------- */
+
+
+/*
+ * class FixedOffset(tzinfo):
+ */
+typedef struct {
+ PyObject_HEAD
+ int offset;
+ char *tzname;
+} FixedOffset;
+
+/*
+ * def __init__(self, offset):
+ * self.offset = offset
+*/
+static int FixedOffset_init(FixedOffset *self, PyObject *args, PyObject *kwargs) {
+ int offset;
+ char *tzname = NULL;
+
+ static char *kwlist[] = {"offset", "tzname", NULL};
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|s", kwlist, &offset, &tzname))
+ return -1;
+
+ self->offset = offset;
+ self->tzname = tzname;
+
+ return 0;
+}
+
+/*
+ * def utcoffset(self, dt):
+ * return timedelta(seconds=self.offset * 60)
+ */
+static PyObject *FixedOffset_utcoffset(FixedOffset *self, PyObject *args) {
+ return PyDelta_FromDSU(0, self->offset, 0);
+}
+
+/*
+ * def dst(self, dt):
+ * return timedelta(seconds=self.offset * 60)
+ */
+static PyObject *FixedOffset_dst(FixedOffset *self, PyObject *args) {
+ return PyDelta_FromDSU(0, self->offset, 0);
+}
+
+/*
+ * def tzname(self, dt):
+ * sign = '+'
+ * if self.offset < 0:
+ * sign = '-'
+ * return "%s%d:%d" % (sign, self.offset / 60, self.offset % 60)
+ */
+static PyObject *FixedOffset_tzname(FixedOffset *self, PyObject *args) {
+ if (self->tzname != NULL) {
+ return PyUnicode_FromString(self->tzname);
+ }
+
+ char tzname_[7] = {0};
+ char sign = '+';
+ int offset = self->offset;
+
+ if (offset < 0) {
+ sign = '-';
+ offset *= -1;
+ }
+
+ sprintf(
+ tzname_,
+ "%c%02d:%02d",
+ sign,
+ offset / SECS_PER_HOUR,
+ offset / SECS_PER_MIN % SECS_PER_MIN
+ );
+
+ return PyUnicode_FromString(tzname_);
+}
+
+/*
+ * def __repr__(self):
+ * return self.tzname()
+ */
+static PyObject *FixedOffset_repr(FixedOffset *self) {
+ return FixedOffset_tzname(self, NULL);
+}
+
+/*
+ * Class member / class attributes
+ */
+static PyMemberDef FixedOffset_members[] = {
+ {"offset", T_INT, offsetof(FixedOffset, offset), 0, "UTC offset"},
+ {NULL}
+};
+
+/*
+ * Class methods
+ */
+static PyMethodDef FixedOffset_methods[] = {
+ {"utcoffset", (PyCFunction)FixedOffset_utcoffset, METH_VARARGS, ""},
+ {"dst", (PyCFunction)FixedOffset_dst, METH_VARARGS, ""},
+ {"tzname", (PyCFunction)FixedOffset_tzname, METH_VARARGS, ""},
+ {NULL}
+};
+
+static PyTypeObject FixedOffset_type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "FixedOffset_type", /* tp_name */
+ sizeof(FixedOffset), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ 0, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_as_async */
+ (reprfunc)FixedOffset_repr, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ (reprfunc)FixedOffset_repr, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /* tp_flags */
+ "TZInfo with fixed offset", /* tp_doc */
+};
+
+/*
+ * Instantiate new FixedOffset_type object
+ * Skip overhead of calling PyObject_New and PyObject_Init.
+ * Directly allocate object.
+ */
+static PyObject *new_fixed_offset_ex(int offset, char *name, PyTypeObject *type) {
+ FixedOffset *self = (FixedOffset *) (type->tp_alloc(type, 0));
+
+ if (self != NULL)
+ self->offset = offset;
+ self->tzname = name;
+
+ return (PyObject *) self;
+}
+
+#define new_fixed_offset(offset, name) new_fixed_offset_ex(offset, name, &FixedOffset_type)
+
+
+/*
+ * class Duration():
+ */
+typedef struct {
+ PyObject_HEAD
+ int years;
+ int months;
+ int weeks;
+ int days;
+ int hours;
+ int minutes;
+ int seconds;
+ int microseconds;
+} Duration;
+
+/*
+ * def __init__(self, years, months, days, hours, minutes, seconds, microseconds):
+ * self.years = years
+ * # ...
+*/
+static int Duration_init(Duration *self, PyObject *args, PyObject *kwargs) {
+ int years;
+ int months;
+ int weeks;
+ int days;
+ int hours;
+ int minutes;
+ int seconds;
+ int microseconds;
+
+ if (!PyArg_ParseTuple(args, "iiiiiiii", &years, &months, &weeks, &days, &hours, &minutes, &seconds, &microseconds))
+ return -1;
+
+ self->years = years;
+ self->months = months;
+ self->weeks = weeks;
+ self->days = days;
+ self->hours = hours;
+ self->minutes = minutes;
+ self->seconds = seconds;
+ self->microseconds = microseconds;
+
+ return 0;
+}
+
+/*
+ * def __repr__(self):
+ * return '{} years {} months {} days {} hours {} minutes {} seconds {} microseconds'.format(
+ * self.years, self.months, self.days, self.minutes, self.hours, self.seconds, self.microseconds
+ * )
+ */
+static PyObject *Duration_repr(Duration *self) {
+ char repr[82] = {0};
+
+ sprintf(
+ repr,
+ "%d years %d months %d weeks %d days %d hours %d minutes %d seconds %d microseconds",
+ self->years,
+ self->months,
+ self->weeks,
+ self->days,
+ self->hours,
+ self->minutes,
+ self->seconds,
+ self->microseconds
+ );
+
+ return PyUnicode_FromString(repr);
+}
+
+/*
+ * Instantiate new Duration_type object
+ * Skip overhead of calling PyObject_New and PyObject_Init.
+ * Directly allocate object.
+ */
+static PyObject *new_duration_ex(int years, int months, int weeks, int days, int hours, int minutes, int seconds, int microseconds, PyTypeObject *type) {
+ Duration *self = (Duration *) (type->tp_alloc(type, 0));
+
+ if (self != NULL) {
+ self->years = years;
+ self->months = months;
+ self->weeks = weeks;
+ self->days = days;
+ self->hours = hours;
+ self->minutes = minutes;
+ self->seconds = seconds;
+ self->microseconds = microseconds;
+ }
+
+ return (PyObject *) self;
+}
+
+/*
+ * Class member / class attributes
+ */
+static PyMemberDef Duration_members[] = {
+ {"years", T_INT, offsetof(Duration, years), 0, "years in duration"},
+ {"months", T_INT, offsetof(Duration, months), 0, "months in duration"},
+ {"weeks", T_INT, offsetof(Duration, weeks), 0, "weeks in duration"},
+ {"days", T_INT, offsetof(Duration, days), 0, "days in duration"},
+ {"remaining_days", T_INT, offsetof(Duration, days), 0, "days in duration"},
+ {"hours", T_INT, offsetof(Duration, hours), 0, "hours in duration"},
+ {"minutes", T_INT, offsetof(Duration, minutes), 0, "minutes in duration"},
+ {"seconds", T_INT, offsetof(Duration, seconds), 0, "seconds in duration"},
+ {"remaining_seconds", T_INT, offsetof(Duration, seconds), 0, "seconds in duration"},
+ {"microseconds", T_INT, offsetof(Duration, microseconds), 0, "microseconds in duration"},
+ {NULL}
+};
+
+static PyTypeObject Duration_type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "Duration", /* tp_name */
+ sizeof(Duration), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ 0, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_as_async */
+ (reprfunc)Duration_repr, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ (reprfunc)Duration_repr, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /* tp_flags */
+ "Duration", /* tp_doc */
+};
+
+#define new_duration(years, months, weeks, days, hours, minutes, seconds, microseconds) new_duration_ex(years, months, weeks, days, hours, minutes, seconds, microseconds, &Duration_type)
+
+typedef struct {
+ int is_date;
+ int is_time;
+ int is_datetime;
+ int is_duration;
+ int is_period;
+ int ambiguous;
+ int year;
+ int month;
+ int day;
+ int hour;
+ int minute;
+ int second;
+ int microsecond;
+ int offset;
+ int has_offset;
+ char *tzname;
+ int years;
+ int months;
+ int weeks;
+ int days;
+ int hours;
+ int minutes;
+ int seconds;
+ int microseconds;
+ int error;
+} Parsed;
+
+
+Parsed* new_parsed() {
+ Parsed *parsed;
+
+ if((parsed = malloc(sizeof *parsed)) != NULL) {
+ parsed->is_date = 0;
+ parsed->is_time = 0;
+ parsed->is_datetime = 0;
+ parsed->is_duration = 0;
+ parsed->is_period = 0;
+
+ parsed->ambiguous = 0;
+ parsed->year = 0;
+ parsed->month = 1;
+ parsed->day = 1;
+ parsed->hour = 0;
+ parsed->minute = 0;
+ parsed->second = 0;
+ parsed->microsecond = 0;
+ parsed->offset = 0;
+ parsed->has_offset = 0;
+ parsed->tzname = NULL;
+
+ parsed->years = 0;
+ parsed->months = 0;
+ parsed->weeks = 0;
+ parsed->days = 0;
+ parsed->hours = 0;
+ parsed->minutes = 0;
+ parsed->seconds = 0;
+ parsed->microseconds = 0;
+
+ parsed->error = -1;
+ }
+
+ return parsed;
+}
+
+
+/* -------------------------- Functions --------------------------*/
+
+Parsed* _parse_iso8601_datetime(char *str, Parsed *parsed) {
+ char* c;
+ int monthday = 0;
+ int week = 0;
+ int weekday = 1;
+ int ordinal;
+ int tz_sign = 0;
+ int leap = 0;
+ int separators = 0;
+ int time = 0;
+ int has_hour = 0;
+ int i;
+ int j;
+
+ // Assuming date only for now
+ parsed->is_date = 1;
+
+ c = str;
+
+ for (i = 0; i < 4; i++) {
+ if (*c >= '0' && *c <= '9') {
+ parsed->year = 10 * parsed->year + *c++ - '0';
+ } else {
+ parsed->error = PARSER_INVALID_ISO8601;
+
+ return NULL;
+ }
+ }
+
+ leap = is_leap(parsed->year);
+
+ // Optional separator
+ if (*c == '-') {
+ separators++;
+ c++;
+ }
+
+ // Checking for week dates
+ if (*c == 'W') {
+ c++;
+
+ i = 0;
+ while (*c != '\0' && *c != ' ' && *c != 'T') {
+ if (*c == '-') {
+ separators++;
+ c++;
+ continue;
+ }
+
+ week = 10 * week + *c++ - '0';
+
+ i++;
+ }
+
+ switch (i) {
+ case 2:
+ // Only week number
+ break;
+ case 3:
+ // Week with weekday
+ if (!(separators == 0 || separators == 2)) {
+ // We should have 2 or no separator
+ parsed->error = PARSER_INVALID_WEEK_DATE;
+
+ return NULL;
+ }
+
+ weekday = week % 10;
+ week /= 10;
+
+ break;
+ default:
+ // Any other case is wrong
+ parsed->error = PARSER_INVALID_WEEK_DATE;
+
+ return NULL;
+ }
+
+ // Checks
+ if (week > 53 || (week > 52 && !is_long_year(parsed->year))) {
+ parsed->error = PARSER_INVALID_WEEK_NUMBER;
+
+ return NULL;
+ }
+
+ if (weekday > 7) {
+ parsed->error = PARSER_INVALID_WEEKDAY_NUMBER;
+
+ return NULL;
+ }
+
+ // Calculating ordinal day
+ ordinal = week * 7 + weekday - (week_day(parsed->year, 1, 4) + 3);
+
+ if (ordinal < 1) {
+ // Previous year
+ ordinal += days_in_year(parsed->year - 1);
+ parsed->year -= 1;
+ leap = is_leap(parsed->year);
+ }
+
+ if (ordinal > days_in_year(parsed->year)) {
+ // Next year
+ ordinal -= days_in_year(parsed->year);
+ parsed->year += 1;
+ leap = is_leap(parsed->year);
+ }
+
+ for (j = 1; j < 14; j++) {
+ if (ordinal <= MONTHS_OFFSETS[leap][j]) {
+ parsed->day = ordinal - MONTHS_OFFSETS[leap][j - 1];
+ parsed->month = j - 1;
+
+ break;
+ }
+ }
+ } else {
+ // At this point we need to check the number
+ // of characters until the end of the date part
+ // (or the end of the string).
+ //
+ // If two, we have only a month if there is a separator, it may be a time otherwise.
+ // If three, we have an ordinal date.
+ // If four, we have a complete date
+ i = 0;
+ while (*c != '\0' && *c != ' ' && *c != 'T') {
+ if (*c == '-') {
+ separators++;
+ c++;
+ continue;
+ }
+
+ if (!(*c >= '0' && *c <='9')) {
+ parsed->error = PARSER_INVALID_DATE;
+
+ return NULL;
+ }
+
+ monthday = 10 * monthday + *c++ - '0';
+
+ i++;
+ }
+
+ switch (i) {
+ case 0:
+ // No month/day specified (only a year)
+ break;
+ case 2:
+ if (!separators) {
+ // The date looks like 201207
+ // which is invalid for a date
+ // But it might be a time in the form hhmmss
+ parsed->ambiguous = 1;
+ } else if (separators > 1) {
+ parsed->error = PARSER_INVALID_DATE;
+
+ return NULL;
+ }
+
+ parsed->month = monthday;
+ break;
+ case 3:
+ // Ordinal day
+ if (separators > 1) {
+ parsed->error = PARSER_INVALID_DATE;
+
+ return NULL;
+ }
+
+ if (monthday < 1 || monthday > MONTHS_OFFSETS[leap][13]) {
+ parsed->error = PARSER_INVALID_ORDINAL_DAY_FOR_YEAR;
+
+ return NULL;
+ }
+
+ for (j = 1; j < 14; j++) {
+ if (monthday <= MONTHS_OFFSETS[leap][j]) {
+ parsed->day = monthday - MONTHS_OFFSETS[leap][j - 1];
+ parsed->month = j - 1;
+
+ break;
+ }
+ }
+
+ break;
+ case 4:
+ // Month and day
+ parsed->month = monthday / 100;
+ parsed->day = monthday % 100;
+
+ break;
+ default:
+ parsed->error = PARSER_INVALID_MONTH_OR_DAY;
+
+ return NULL;
+ }
+ }
+
+ // Checks
+ if (separators && !monthday && !week) {
+ parsed->error = PARSER_INVALID_DATE;
+
+ return NULL;
+ }
+
+ if (parsed->month > 12) {
+ parsed->error = PARSER_INVALID_MONTH;
+
+ return NULL;
+ }
+
+ if (parsed->day > DAYS_PER_MONTHS[leap][parsed->month]) {
+ parsed->error = PARSER_INVALID_DAY_FOR_MONTH;
+
+ return NULL;
+ }
+
+ separators = 0;
+ if (*c == 'T' || *c == ' ') {
+ if (parsed->ambiguous) {
+ parsed->error = PARSER_INVALID_DATE;
+
+ return NULL;
+ }
+
+ // We have time so we have a datetime
+ parsed->is_datetime = 1;
+ parsed->is_date = 0;
+
+ c++;
+
+ // Grabbing time information
+ i = 0;
+ while (*c != '\0' && *c != '.' && *c != ',' && *c != 'Z' && *c != '+' && *c != '-') {
+ if (*c == ':') {
+ separators++;
+ c++;
+ continue;
+ }
+
+ if (!(*c >= '0' && *c <='9')) {
+ parsed->error = PARSER_INVALID_TIME;
+
+ return NULL;
+ }
+
+ time = 10 * time + *c++ - '0';
+ i++;
+ }
+
+ switch (i) {
+ case 2:
+ // Hours only
+ if (separators > 0) {
+ // Extraneous separators
+ parsed->error = PARSER_INVALID_TIME;
+
+ return NULL;
+ }
+
+ parsed->hour = time;
+ has_hour = 1;
+ break;
+ case 4:
+ // Hours and minutes
+ if (separators > 1) {
+ // Extraneous separators
+ parsed->error = PARSER_INVALID_TIME;
+
+ return NULL;
+ }
+
+ parsed->hour = time / 100;
+ parsed->minute = time % 100;
+ has_hour = 1;
+ break;
+ case 6:
+ // Hours, minutes and seconds
+ if (!(separators == 0 || separators == 2)) {
+ // We should have either two separators or none
+ parsed->error = PARSER_INVALID_TIME;
+
+ return NULL;
+ }
+
+ parsed->hour = time / 10000;
+ parsed->minute = time / 100 % 100;
+ parsed->second = time % 100;
+ has_hour = 1;
+ break;
+ default:
+ // Any other case is wrong
+ parsed->error = PARSER_INVALID_TIME;
+
+ return NULL;
+ }
+
+ // Checks
+ if (parsed->hour > 23) {
+ parsed->error = PARSER_INVALID_HOUR;
+
+ return NULL;
+ }
+
+ if (parsed->minute > 59) {
+ parsed->error = PARSER_INVALID_MINUTE;
+
+ return NULL;
+ }
+
+ if (parsed->second > 59) {
+ parsed->error = PARSER_INVALID_SECOND;
+
+ return NULL;
+ }
+
+ // Subsecond
+ if (*c == '.' || *c == ',') {
+ c++;
+
+ time = 0;
+ i = 0;
+ while (*c != '\0' && *c != 'Z' && *c != '+' && *c != '-') {
+ if (!(*c >= '0' && *c <='9')) {
+ parsed->error = PARSER_INVALID_SUBSECOND;
+
+ return NULL;
+ }
+
+ time = 10 * time + *c++ - '0';
+ i++;
+ }
+
+ // adjust to microseconds
+ if (i > 6) {
+ parsed->microsecond = time / pow(10, i - 6);
+ } else if (i <= 6) {
+ parsed->microsecond = time * pow(10, 6 - i);
+ }
+ }
+
+ // Timezone
+ if (*c == 'Z') {
+ parsed->has_offset = 1;
+ parsed->tzname = "UTC";
+ c++;
+ } else if (*c == '+' || *c == '-') {
+ tz_sign = 1;
+ if (*c == '-') {
+ tz_sign = -1;
+ }
+
+ parsed->has_offset = 1;
+ c++;
+
+ i = 0;
+ time = 0;
+ separators = 0;
+ while (*c != '\0') {
+ if (*c == ':') {
+ separators++;
+ c++;
+ continue;
+ }
+
+ if (!(*c >= '0' && *c <= '9')) {
+ parsed->error = PARSER_INVALID_TZ_OFFSET;
+
+ return NULL;
+ }
+
+ time = 10 * time + *c++ - '0';
+ i++;
+ }
+
+ switch (i) {
+ case 2:
+ // hh Format
+ if (separators) {
+ // Extraneous separators
+ parsed->error = PARSER_INVALID_TZ_OFFSET;
+
+ return NULL;
+ }
+
+ parsed->offset = tz_sign * (time * 3600);
+ break;
+ case 4:
+ // hhmm Format
+ if (separators > 1) {
+ // Extraneous separators
+ parsed->error = PARSER_INVALID_TZ_OFFSET;
+
+ return NULL;
+ }
+
+ parsed->offset = tz_sign * ((time / 100 * 3600) + (time % 100 * 60));
+ break;
+ default:
+ // Wrong format
+ parsed->error = PARSER_INVALID_TZ_OFFSET;
+
+ return NULL;
+ }
+ }
+ }
+
+ // At this point we should be at the end of the string
+ // If not, the string is invalid
+ if (*c != '\0') {
+ parsed->error = PARSER_INVALID_ISO8601;
+
+ return NULL;
+ }
+
+ return parsed;
+}
+
+
+Parsed* _parse_iso8601_duration(char *str, Parsed *parsed) {
+ char* c;
+ int value = 0;
+ int grabbed = 0;
+ int in_time = 0;
+ int in_fraction = 0;
+ int fraction_length = 0;
+ int has_fractional = 0;
+ int fraction = 0;
+ int has_ymd = 0;
+ int has_week = 0;
+ int has_year = 0;
+ int has_month = 0;
+ int has_day = 0;
+ int has_hour = 0;
+ int has_minute = 0;
+ int has_second = 0;
+
+ c = str;
+
+ // Removing P operator
+ c++;
+
+ parsed->is_duration = 1;
+
+ for (; *c != '\0'; c++) {
+ switch (*c) {
+ case 'Y':
+ if (!grabbed || in_time || has_week || has_ymd) {
+ // No value grabbed
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ if (fraction) {
+ parsed->error = PARSER_INVALID_DURATION_FLOAT_YEAR_MONTH_NOT_SUPPORTED;
+
+ return NULL;
+ }
+
+ parsed->years = value;
+
+ grabbed = 0;
+ value = 0;
+ fraction = 0;
+ in_fraction = 0;
+ has_ymd = 1;
+ has_year = 1;
+
+ break;
+ case 'M':
+ if (!grabbed || has_week) {
+ // No value grabbed
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ if (in_time) {
+ if (has_second) {
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ if (has_fractional) {
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ parsed->minutes = value;
+ if (fraction) {
+ parsed->seconds = fraction * 6;
+ has_fractional = 1;
+ }
+
+ has_minute = 1;
+ } else {
+ if (fraction) {
+ parsed->error = PARSER_INVALID_DURATION_FLOAT_YEAR_MONTH_NOT_SUPPORTED;
+
+ return NULL;
+ }
+
+ if (has_month || has_day) {
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ parsed->months = value;
+ has_ymd = 1;
+ has_month = 1;
+ }
+
+ grabbed = 0;
+ value = 0;
+ fraction = 0;
+ in_fraction = 0;
+
+ break;
+ case 'D':
+ if (!grabbed || in_time || has_week) {
+ // No value grabbed
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ if (has_day) {
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ parsed->days = value;
+ if (fraction) {
+ parsed->hours = fraction * 2.4;
+ has_fractional = 1;
+ }
+
+ grabbed = 0;
+ value = 0;
+ fraction = 0;
+ in_fraction = 0;
+ has_ymd = 1;
+ has_day = 1;
+
+ break;
+ case 'T':
+ if (grabbed) {
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ in_time = 1;
+
+ break;
+ case 'H':
+ if (!grabbed || !in_time || has_week) {
+ // No value grabbed
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ if (has_hour || has_second || has_minute) {
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ if (has_fractional) {
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ parsed->hours = value;
+ if (fraction) {
+ parsed->minutes = fraction * 6;
+ has_fractional = 1;
+ }
+
+ grabbed = 0;
+ value = 0;
+ fraction = 0;
+ in_fraction = 0;
+ has_hour = 1;
+
+ break;
+ case 'S':
+ if (!grabbed || !in_time || has_week) {
+ // No value grabbed
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ if (has_second) {
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ if (has_fractional) {
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ if (fraction) {
+ parsed->seconds = value;
+ if (fraction_length > 6) {
+ parsed->microseconds = fraction / pow(10, fraction_length - 6);
+ } else {
+ parsed->microseconds = fraction * pow(10, 6 - fraction_length);
+ }
+ has_fractional = 1;
+ } else {
+ parsed->seconds = value;
+ }
+
+ grabbed = 0;
+ value = 0;
+ fraction = 0;
+ in_fraction = 0;
+ has_second = 1;
+
+ break;
+ case 'W':
+ if (!grabbed || in_time || has_ymd) {
+ // No value grabbed
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ parsed->weeks = value;
+ if (fraction) {
+ float days;
+ days = fraction * 0.7;
+ parsed->hours = (int) ((days - (int) days) * 24);
+ parsed->days = (int) days;
+ }
+
+ grabbed = 0;
+ value = 0;
+ fraction = 0;
+ in_fraction = 0;
+ has_week = 1;
+
+ break;
+ case '.':
+ if (!grabbed || has_fractional) {
+ // No value grabbed
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ in_fraction = 1;
+
+ break;
+ case ',':
+ if (!grabbed || has_fractional) {
+ // No value grabbed
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+
+ in_fraction = 1;
+
+ break;
+ default:
+ if (*c >= '0' && *c <='9') {
+ if (in_fraction) {
+ fraction = 10 * fraction + *c - '0';
+ fraction_length++;
+ } else {
+ value = 10 * value + *c - '0';
+ grabbed = 1;
+ }
+ break;
+ }
+
+ parsed->error = PARSER_INVALID_DURATION;
+
+ return NULL;
+ }
+ }
+
+ return parsed;
+}
+
+
+PyObject* parse_iso8601(PyObject *self, PyObject *args) {
+ char* str;
+ PyObject *obj;
+ PyObject *tzinfo;
+ Parsed *parsed = new_parsed();
+
+ if (!PyArg_ParseTuple(args, "s", &str)) {
+ PyErr_SetString(
+ PyExc_ValueError, "Invalid parameters"
+ );
+ return NULL;
+ }
+
+ if (*str == 'P') {
+ // Duration (or interval)
+ if (_parse_iso8601_duration(str, parsed) == NULL) {
+ PyErr_SetString(
+ PyExc_ValueError, PARSER_ERRORS[parsed->error]
+ );
+
+ return NULL;
+ }
+ } else if (_parse_iso8601_datetime(str, parsed) == NULL) {
+ PyErr_SetString(
+ PyExc_ValueError, PARSER_ERRORS[parsed->error]
+ );
+
+ return NULL;
+ }
+
+ if (parsed->is_date) {
+ // Date only
+ if (parsed->ambiguous) {
+ // We can "safely" assume that the ambiguous
+ // date was actually a time in the form hhmmss
+ parsed->hour = parsed->year / 100;
+ parsed->minute = parsed->year % 100;
+ parsed->second = parsed->month;
+
+ obj = PyDateTimeAPI->Time_FromTime(
+ parsed->hour, parsed->minute, parsed->second, parsed->microsecond,
+ Py_BuildValue(""),
+ PyDateTimeAPI->TimeType
+ );
+ } else {
+ obj = PyDateTimeAPI->Date_FromDate(
+ parsed->year, parsed->month, parsed->day,
+ PyDateTimeAPI->DateType
+ );
+ }
+ } else if (parsed->is_datetime) {
+ if (!parsed->has_offset) {
+ tzinfo = Py_BuildValue("");
+ } else {
+ tzinfo = new_fixed_offset(parsed->offset, parsed->tzname);
+ }
+
+ obj = PyDateTimeAPI->DateTime_FromDateAndTime(
+ parsed->year,
+ parsed->month,
+ parsed->day,
+ parsed->hour,
+ parsed->minute,
+ parsed->second,
+ parsed->microsecond,
+ tzinfo,
+ PyDateTimeAPI->DateTimeType
+ );
+
+ Py_DECREF(tzinfo);
+ } else if (parsed->is_duration) {
+ obj = new_duration(
+ parsed->years, parsed->months, parsed->weeks, parsed->days,
+ parsed->hours, parsed->minutes, parsed->seconds, parsed->microseconds
+ );
+ } else {
+ return NULL;
+ }
+
+ free(parsed);
+
+ return obj;
+}
+
+
+/* ------------------------------------------------------------------------- */
+
+static PyMethodDef helpers_methods[] = {
+ {
+ "parse_iso8601",
+ (PyCFunction) parse_iso8601,
+ METH_VARARGS,
+ PyDoc_STR("Parses a ISO8601 string into a tuple.")
+ },
+ {NULL}
+};
+
+
+/* ------------------------------------------------------------------------- */
+
+static struct PyModuleDef moduledef = {
+ PyModuleDef_HEAD_INIT,
+ "_iso8601",
+ NULL,
+ -1,
+ helpers_methods,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+};
+
+PyMODINIT_FUNC
+PyInit__iso8601(void)
+{
+ PyObject *module;
+
+ PyDateTime_IMPORT;
+
+ module = PyModule_Create(&moduledef);
+
+ if (module == NULL)
+ return NULL;
+
+ // FixedOffset declaration
+ FixedOffset_type.tp_new = PyType_GenericNew;
+ FixedOffset_type.tp_base = PyDateTimeAPI->TZInfoType;
+ FixedOffset_type.tp_methods = FixedOffset_methods;
+ FixedOffset_type.tp_members = FixedOffset_members;
+ FixedOffset_type.tp_init = (initproc)FixedOffset_init;
+
+ if (PyType_Ready(&FixedOffset_type) < 0)
+ return NULL;
+
+ // Duration declaration
+ Duration_type.tp_new = PyType_GenericNew;
+ Duration_type.tp_members = Duration_members;
+ Duration_type.tp_init = (initproc)Duration_init;
+
+ if (PyType_Ready(&Duration_type) < 0)
+ return NULL;
+
+ Py_INCREF(&FixedOffset_type);
+ Py_INCREF(&Duration_type);
+
+ PyModule_AddObject(module, "TZFixedOffset", (PyObject *)&FixedOffset_type);
+ PyModule_AddObject(module, "Duration", (PyObject *)&Duration_type);
+
+ return module;
+}
diff --git a/pendulum/parsing/exceptions/__init__.py b/pendulum/parsing/exceptions/__init__.py
new file mode 100644
index 0000000..997b0fa
--- /dev/null
+++ b/pendulum/parsing/exceptions/__init__.py
@@ -0,0 +1,3 @@
+class ParserError(ValueError):
+
+ pass
diff --git a/pendulum/parsing/iso8601.py b/pendulum/parsing/iso8601.py
new file mode 100644
index 0000000..40efa2f
--- /dev/null
+++ b/pendulum/parsing/iso8601.py
@@ -0,0 +1,447 @@
+from __future__ import division
+
+import datetime
+import re
+
+from ..constants import HOURS_PER_DAY
+from ..constants import MINUTES_PER_HOUR
+from ..constants import MONTHS_OFFSETS
+from ..constants import SECONDS_PER_MINUTE
+from ..duration import Duration
+from ..helpers import days_in_year
+from ..helpers import is_leap
+from ..helpers import is_long_year
+from ..helpers import week_day
+from ..tz.timezone import UTC
+from ..tz.timezone import FixedTimezone
+from .exceptions import ParserError
+
+
+ISO8601_DT = re.compile(
+ # Date (optional)
+ "^"
+ "(?P<date>"
+ " (?P<classic>" # Classic date (YYYY-MM-DD) or ordinal (YYYY-DDD)
+ r" (?P<year>\d{4})" # Year
+ " (?P<monthday>"
+ r" (?P<monthsep>-)?(?P<month>\d{2})" # Month (optional)
+ r" ((?P<daysep>-)?(?P<day>\d{1,2}))?" # Day (optional)
+ " )?"
+ " )"
+ " |"
+ " (?P<isocalendar>" # Calendar date (2016-W05 or 2016-W05-5)
+ r" (?P<isoyear>\d{4})" # Year
+ " (?P<weeksep>-)?" # Separator (optional)
+ " W" # W separator
+ r" (?P<isoweek>\d{2})" # Week number
+ " (?P<weekdaysep>-)?" # Separator (optional)
+ r" (?P<isoweekday>\d)?" # Weekday (optional)
+ " )"
+ ")?"
+ # Time (optional)
+ "(?P<time>"
+ r" (?P<timesep>[T\ ])?" # Separator (T or space)
+ r" (?P<hour>\d{1,2})(?P<minsep>:)?(?P<minute>\d{1,2})?(?P<secsep>:)?(?P<second>\d{1,2})?" # HH:mm:ss (optional mm and ss)
+ # Subsecond part (optional)
+ " (?P<subsecondsection>"
+ " (?:[.,])" # Subsecond separator (optional)
+ r" (?P<subsecond>\d{1,9})" # Subsecond
+ " )?"
+ # Timezone offset
+ " (?P<tz>"
+ r" (?:[-+])\d{2}:?(?:\d{2})?|Z" # Offset (+HH:mm or +HHmm or +HH or Z)
+ " )?"
+ ")?"
+ "$",
+ re.VERBOSE,
+)
+
+
+ISO8601_DURATION = re.compile(
+ "^P" # Duration P indicator
+ # Years, months and days (optional)
+ "(?P<w>"
+ r" (?P<weeks>\d+(?:[.,]\d+)?W)"
+ ")?"
+ "(?P<ymd>"
+ r" (?P<years>\d+(?:[.,]\d+)?Y)?"
+ r" (?P<months>\d+(?:[.,]\d+)?M)?"
+ r" (?P<days>\d+(?:[.,]\d+)?D)?"
+ ")?"
+ "(?P<hms>"
+ " (?P<timesep>T)" # Separator (T)
+ r" (?P<hours>\d+(?:[.,]\d+)?H)?"
+ r" (?P<minutes>\d+(?:[.,]\d+)?M)?"
+ r" (?P<seconds>\d+(?:[.,]\d+)?S)?"
+ ")?"
+ "$",
+ re.VERBOSE,
+)
+
+
+def parse_iso8601(text):
+ """
+ ISO 8601 compliant parser.
+
+ :param text: The string to parse
+ :type text: str
+
+ :rtype: datetime.datetime or datetime.time or datetime.date
+ """
+ parsed = _parse_iso8601_duration(text)
+ if parsed is not None:
+ return parsed
+
+ m = ISO8601_DT.match(text)
+ if not m:
+ raise ParserError("Invalid ISO 8601 string")
+
+ ambiguous_date = False
+ is_date = False
+ is_time = False
+ year = 0
+ month = 1
+ day = 1
+ minute = 0
+ second = 0
+ microsecond = 0
+ tzinfo = None
+
+ if m:
+ if m.group("date"):
+ # A date has been specified
+ is_date = True
+
+ if m.group("isocalendar"):
+ # We have a ISO 8601 string defined
+ # by week number
+ if (
+ m.group("weeksep")
+ and not m.group("weekdaysep")
+ and m.group("isoweekday")
+ ):
+ raise ParserError("Invalid date string: {}".format(text))
+
+ if not m.group("weeksep") and m.group("weekdaysep"):
+ raise ParserError("Invalid date string: {}".format(text))
+
+ try:
+ date = _get_iso_8601_week(
+ m.group("isoyear"), m.group("isoweek"), m.group("isoweekday")
+ )
+ except ParserError:
+ raise
+ except ValueError:
+ raise ParserError("Invalid date string: {}".format(text))
+
+ year = date["year"]
+ month = date["month"]
+ day = date["day"]
+ else:
+ # We have a classic date representation
+ year = int(m.group("year"))
+
+ if not m.group("monthday"):
+ # No month and day
+ month = 1
+ day = 1
+ else:
+ if m.group("month") and m.group("day"):
+ # Month and day
+ if not m.group("daysep") and len(m.group("day")) == 1:
+ # Ordinal day
+ ordinal = int(m.group("month") + m.group("day"))
+ leap = is_leap(year)
+ months_offsets = MONTHS_OFFSETS[leap]
+
+ if ordinal > months_offsets[13]:
+ raise ParserError("Ordinal day is out of range")
+
+ for i in range(1, 14):
+ if ordinal <= months_offsets[i]:
+ day = ordinal - months_offsets[i - 1]
+ month = i - 1
+
+ break
+ else:
+ month = int(m.group("month"))
+ day = int(m.group("day"))
+ else:
+ # Only month
+ if not m.group("monthsep"):
+ # The date looks like 201207
+ # which is invalid for a date
+ # But it might be a time in the form hhmmss
+ ambiguous_date = True
+
+ month = int(m.group("month"))
+ day = 1
+
+ if not m.group("time"):
+ # No time has been specified
+ if ambiguous_date:
+ # We can "safely" assume that the ambiguous date
+ # was actually a time in the form hhmmss
+ hhmmss = "{}{:0>2}".format(str(year), str(month))
+
+ return datetime.time(int(hhmmss[:2]), int(hhmmss[2:4]), int(hhmmss[4:]))
+
+ return datetime.date(year, month, day)
+
+ if ambiguous_date:
+ raise ParserError("Invalid date string: {}".format(text))
+
+ if is_date and not m.group("timesep"):
+ raise ParserError("Invalid date string: {}".format(text))
+
+ if not is_date:
+ is_time = True
+
+ # Grabbing hh:mm:ss
+ hour = int(m.group("hour"))
+ minsep = m.group("minsep")
+
+ if m.group("minute"):
+ minute = int(m.group("minute"))
+ elif minsep:
+ raise ParserError("Invalid ISO 8601 time part")
+
+ secsep = m.group("secsep")
+ if secsep and not minsep and m.group("minute"):
+ # minute/second separator but no hour/minute separator
+ raise ParserError("Invalid ISO 8601 time part")
+
+ if m.group("second"):
+ if not secsep and minsep:
+ # No minute/second separator but hour/minute separator
+ raise ParserError("Invalid ISO 8601 time part")
+
+ second = int(m.group("second"))
+ elif secsep:
+ raise ParserError("Invalid ISO 8601 time part")
+
+ # Grabbing subseconds, if any
+ if m.group("subsecondsection"):
+ # Limiting to 6 chars
+ subsecond = m.group("subsecond")[:6]
+
+ microsecond = int("{:0<6}".format(subsecond))
+
+ # Grabbing timezone, if any
+ tz = m.group("tz")
+ if tz:
+ if tz == "Z":
+ tzinfo = UTC
+ else:
+ negative = True if tz.startswith("-") else False
+ tz = tz[1:]
+ if ":" not in tz:
+ if len(tz) == 2:
+ tz = "{}00".format(tz)
+
+ off_hour = tz[0:2]
+ off_minute = tz[2:4]
+ else:
+ off_hour, off_minute = tz.split(":")
+
+ offset = ((int(off_hour) * 60) + int(off_minute)) * 60
+
+ if negative:
+ offset = -1 * offset
+
+ tzinfo = FixedTimezone(offset)
+
+ if is_time:
+ return datetime.time(hour, minute, second, microsecond)
+
+ return datetime.datetime(
+ year, month, day, hour, minute, second, microsecond, tzinfo=tzinfo
+ )
+
+
+def _parse_iso8601_duration(text, **options):
+ m = ISO8601_DURATION.match(text)
+ if not m:
+ return
+
+ years = 0
+ months = 0
+ weeks = 0
+ days = 0
+ hours = 0
+ minutes = 0
+ seconds = 0
+ microseconds = 0
+ fractional = False
+
+ if m.group("w"):
+ # Weeks
+ if m.group("ymd") or m.group("hms"):
+ # Specifying anything more than weeks is not supported
+ raise ParserError("Invalid duration string")
+
+ _weeks = m.group("weeks")
+ if not _weeks:
+ raise ParserError("Invalid duration string")
+
+ _weeks = _weeks.replace(",", ".").replace("W", "")
+ if "." in _weeks:
+ _weeks, portion = _weeks.split(".")
+ weeks = int(_weeks)
+ _days = int(portion) / 10 * 7
+ days, hours = int(_days // 1), _days % 1 * HOURS_PER_DAY
+ else:
+ weeks = int(_weeks)
+
+ if m.group("ymd"):
+ # Years, months and/or days
+ _years = m.group("years")
+ _months = m.group("months")
+ _days = m.group("days")
+
+ # Checking order
+ years_start = m.start("years") if _years else -3
+ months_start = m.start("months") if _months else years_start + 1
+ days_start = m.start("days") if _days else months_start + 1
+
+ # Check correct order
+ if not (years_start < months_start < days_start):
+ raise ParserError("Invalid duration")
+
+ if _years:
+ _years = _years.replace(",", ".").replace("Y", "")
+ if "." in _years:
+ raise ParserError("Float years in duration are not supported")
+ else:
+ years = int(_years)
+
+ if _months:
+ if fractional:
+ raise ParserError("Invalid duration")
+
+ _months = _months.replace(",", ".").replace("M", "")
+ if "." in _months:
+ raise ParserError("Float months in duration are not supported")
+ else:
+ months = int(_months)
+
+ if _days:
+ if fractional:
+ raise ParserError("Invalid duration")
+
+ _days = _days.replace(",", ".").replace("D", "")
+
+ if "." in _days:
+ fractional = True
+
+ _days, _hours = _days.split(".")
+ days = int(_days)
+ hours = int(_hours) / 10 * HOURS_PER_DAY
+ else:
+ days = int(_days)
+
+ if m.group("hms"):
+ # Hours, minutes and/or seconds
+ _hours = m.group("hours") or 0
+ _minutes = m.group("minutes") or 0
+ _seconds = m.group("seconds") or 0
+
+ # Checking order
+ hours_start = m.start("hours") if _hours else -3
+ minutes_start = m.start("minutes") if _minutes else hours_start + 1
+ seconds_start = m.start("seconds") if _seconds else minutes_start + 1
+
+ # Check correct order
+ if not (hours_start < minutes_start < seconds_start):
+ raise ParserError("Invalid duration")
+
+ if _hours:
+ if fractional:
+ raise ParserError("Invalid duration")
+
+ _hours = _hours.replace(",", ".").replace("H", "")
+
+ if "." in _hours:
+ fractional = True
+
+ _hours, _mins = _hours.split(".")
+ hours += int(_hours)
+ minutes += int(_mins) / 10 * MINUTES_PER_HOUR
+ else:
+ hours += int(_hours)
+
+ if _minutes:
+ if fractional:
+ raise ParserError("Invalid duration")
+
+ _minutes = _minutes.replace(",", ".").replace("M", "")
+
+ if "." in _minutes:
+ fractional = True
+
+ _minutes, _secs = _minutes.split(".")
+ minutes += int(_minutes)
+ seconds += int(_secs) / 10 * SECONDS_PER_MINUTE
+ else:
+ minutes += int(_minutes)
+
+ if _seconds:
+ if fractional:
+ raise ParserError("Invalid duration")
+
+ _seconds = _seconds.replace(",", ".").replace("S", "")
+
+ if "." in _seconds:
+ _seconds, _microseconds = _seconds.split(".")
+ seconds += int(_seconds)
+ microseconds += int("{:0<6}".format(_microseconds[:6]))
+ else:
+ seconds += int(_seconds)
+
+ return Duration(
+ years=years,
+ months=months,
+ weeks=weeks,
+ days=days,
+ hours=hours,
+ minutes=minutes,
+ seconds=seconds,
+ microseconds=microseconds,
+ )
+
+
+def _get_iso_8601_week(year, week, weekday):
+ if not weekday:
+ weekday = 1
+ else:
+ weekday = int(weekday)
+
+ year = int(year)
+ week = int(week)
+
+ if week > 53 or week > 52 and not is_long_year(year):
+ raise ParserError("Invalid week for week date")
+
+ if weekday > 7:
+ raise ParserError("Invalid weekday for week date")
+
+ # We can't rely on strptime directly here since
+ # it does not support ISO week date
+ ordinal = week * 7 + weekday - (week_day(year, 1, 4) + 3)
+
+ if ordinal < 1:
+ # Previous year
+ ordinal += days_in_year(year - 1)
+ year -= 1
+
+ if ordinal > days_in_year(year):
+ # Next year
+ ordinal -= days_in_year(year)
+ year += 1
+
+ fmt = "%Y-%j"
+ string = "{}-{}".format(year, ordinal)
+
+ dt = datetime.datetime.strptime(string, fmt)
+
+ return {"year": dt.year, "month": dt.month, "day": dt.day}