diff options
Diffstat (limited to 'misc/apr_date.c')
-rw-r--r-- | misc/apr_date.c | 637 |
1 files changed, 637 insertions, 0 deletions
diff --git a/misc/apr_date.c b/misc/apr_date.c new file mode 100644 index 0000000..28086e3 --- /dev/null +++ b/misc/apr_date.c @@ -0,0 +1,637 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * apr_date.c: date parsing utility routines + * These routines are (hopefully) platform independent. + * + * 27 Oct 1996 Roy Fielding + * Extracted (with many modifications) from mod_proxy.c and + * tested with over 50,000 randomly chosen valid date strings + * and several hundred variations of invalid date strings. + * + */ + +#include "apr.h" +#include "apr_lib.h" + +#define APR_WANT_STRFUNC +#include "apr_want.h" + +#if APR_HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#if APR_HAVE_CTYPE_H +#include <ctype.h> +#endif + +#include "apr_date.h" + +/* + * Compare a string to a mask + * Mask characters (arbitrary maximum is 256 characters, just in case): + * @ - uppercase letter + * $ - lowercase letter + * & - hex digit + * # - digit + * ~ - digit or space + * * - swallow remaining characters + * <x> - exact match for any other character + */ +APU_DECLARE(int) apr_date_checkmask(const char *data, const char *mask) +{ + int i; + char d; + + for (i = 0; i < 256; i++) { + d = data[i]; + switch (mask[i]) { + case '\0': + return (d == '\0'); + + case '*': + return 1; + + case '@': + if (!apr_isupper(d)) + return 0; + break; + case '$': + if (!apr_islower(d)) + return 0; + break; + case '#': + if (!apr_isdigit(d)) + return 0; + break; + case '&': + if (!apr_isxdigit(d)) + return 0; + break; + case '~': + if ((d != ' ') && !apr_isdigit(d)) + return 0; + break; + default: + if (mask[i] != d) + return 0; + break; + } + } + return 0; /* We only get here if mask is corrupted (exceeds 256) */ +} + +/* + * Parses an HTTP date in one of three standard forms: + * + * Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 + * Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 + * Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format + * + * and returns the apr_time_t number of microseconds since 1 Jan 1970 GMT, + * or APR_DATE_BAD if this would be out of range or if the date is invalid. + * + * The restricted HTTP syntax is + * + * HTTP-date = rfc1123-date | rfc850-date | asctime-date + * + * rfc1123-date = wkday "," SP date1 SP time SP "GMT" + * rfc850-date = weekday "," SP date2 SP time SP "GMT" + * asctime-date = wkday SP date3 SP time SP 4DIGIT + * + * date1 = 2DIGIT SP month SP 4DIGIT + * ; day month year (e.g., 02 Jun 1982) + * date2 = 2DIGIT "-" month "-" 2DIGIT + * ; day-month-year (e.g., 02-Jun-82) + * date3 = month SP ( 2DIGIT | ( SP 1DIGIT )) + * ; month day (e.g., Jun 2) + * + * time = 2DIGIT ":" 2DIGIT ":" 2DIGIT + * ; 00:00:00 - 23:59:59 + * + * wkday = "Mon" | "Tue" | "Wed" + * | "Thu" | "Fri" | "Sat" | "Sun" + * + * weekday = "Monday" | "Tuesday" | "Wednesday" + * | "Thursday" | "Friday" | "Saturday" | "Sunday" + * + * month = "Jan" | "Feb" | "Mar" | "Apr" + * | "May" | "Jun" | "Jul" | "Aug" + * | "Sep" | "Oct" | "Nov" | "Dec" + * + * However, for the sake of robustness (and Netscapeness), we ignore the + * weekday and anything after the time field (including the timezone). + * + * This routine is intended to be very fast; 10x faster than using sscanf. + * + * Originally from Andrew Daviel <andrew@vancouver-webpages.com>, 29 Jul 96 + * but many changes since then. + * + */ +APU_DECLARE(apr_time_t) apr_date_parse_http(const char *date) +{ + apr_time_exp_t ds; + apr_time_t result; + int mint, mon; + const char *monstr, *timstr; + static const int months[12] = + { + ('J' << 16) | ('a' << 8) | 'n', ('F' << 16) | ('e' << 8) | 'b', + ('M' << 16) | ('a' << 8) | 'r', ('A' << 16) | ('p' << 8) | 'r', + ('M' << 16) | ('a' << 8) | 'y', ('J' << 16) | ('u' << 8) | 'n', + ('J' << 16) | ('u' << 8) | 'l', ('A' << 16) | ('u' << 8) | 'g', + ('S' << 16) | ('e' << 8) | 'p', ('O' << 16) | ('c' << 8) | 't', + ('N' << 16) | ('o' << 8) | 'v', ('D' << 16) | ('e' << 8) | 'c'}; + + if (!date) + return APR_DATE_BAD; + + while (*date && apr_isspace(*date)) /* Find first non-whitespace char */ + ++date; + + if (*date == '\0') + return APR_DATE_BAD; + + if ((date = strchr(date, ' ')) == NULL) /* Find space after weekday */ + return APR_DATE_BAD; + + ++date; /* Now pointing to first char after space, which should be */ + + /* start of the actual date information for all 4 formats. */ + + if (apr_date_checkmask(date, "## @$$ #### ##:##:## *")) { + /* RFC 1123 format with two days */ + ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100; + if (ds.tm_year < 0) + return APR_DATE_BAD; + + ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0'); + + ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0'); + + monstr = date + 3; + timstr = date + 12; + } + else if (apr_date_checkmask(date, "##-@$$-## ##:##:## *")) { + /* RFC 850 format */ + ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0'); + if (ds.tm_year < 70) + ds.tm_year += 100; + + ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0'); + + monstr = date + 3; + timstr = date + 10; + } + else if (apr_date_checkmask(date, "@$$ ~# ##:##:## ####*")) { + /* asctime format */ + ds.tm_year = ((date[16] - '0') * 10 + (date[17] - '0') - 19) * 100; + if (ds.tm_year < 0) + return APR_DATE_BAD; + + ds.tm_year += ((date[18] - '0') * 10) + (date[19] - '0'); + + if (date[4] == ' ') + ds.tm_mday = 0; + else + ds.tm_mday = (date[4] - '0') * 10; + + ds.tm_mday += (date[5] - '0'); + + monstr = date; + timstr = date + 7; + } + else if (apr_date_checkmask(date, "# @$$ #### ##:##:## *")) { + /* RFC 1123 format with one day */ + ds.tm_year = ((date[6] - '0') * 10 + (date[7] - '0') - 19) * 100; + if (ds.tm_year < 0) + return APR_DATE_BAD; + + ds.tm_year += ((date[8] - '0') * 10) + (date[9] - '0'); + + ds.tm_mday = (date[0] - '0'); + + monstr = date + 2; + timstr = date + 11; + } + else + return APR_DATE_BAD; + + if (ds.tm_mday <= 0 || ds.tm_mday > 31) + return APR_DATE_BAD; + + ds.tm_hour = ((timstr[0] - '0') * 10) + (timstr[1] - '0'); + ds.tm_min = ((timstr[3] - '0') * 10) + (timstr[4] - '0'); + ds.tm_sec = ((timstr[6] - '0') * 10) + (timstr[7] - '0'); + + if ((ds.tm_hour > 23) || (ds.tm_min > 59) || (ds.tm_sec > 61)) + return APR_DATE_BAD; + + mint = (monstr[0] << 16) | (monstr[1] << 8) | monstr[2]; + for (mon = 0; mon < 12; mon++) + if (mint == months[mon]) + break; + + if (mon == 12) + return APR_DATE_BAD; + + if ((ds.tm_mday == 31) && (mon == 3 || mon == 5 || mon == 8 || mon == 10)) + return APR_DATE_BAD; + + /* February gets special check for leapyear */ + if ((mon == 1) && + ((ds.tm_mday > 29) || + ((ds.tm_mday == 29) + && ((ds.tm_year & 3) + || (((ds.tm_year % 100) == 0) + && (((ds.tm_year % 400) != 100))))))) + return APR_DATE_BAD; + + ds.tm_mon = mon; + + /* ap_mplode_time uses tm_usec and tm_gmtoff fields, but they haven't + * been set yet. + * It should be safe to just zero out these values. + * tm_usec is the number of microseconds into the second. HTTP only + * cares about second granularity. + * tm_gmtoff is the number of seconds off of GMT the time is. By + * definition all times going through this function are in GMT, so this + * is zero. + */ + ds.tm_usec = 0; + ds.tm_gmtoff = 0; + if (apr_time_exp_get(&result, &ds) != APR_SUCCESS) + return APR_DATE_BAD; + + return result; +} + +/* + * Parses a string resembling an RFC 822 date. This is meant to be + * leinent in its parsing of dates. Hence, this will parse a wider + * range of dates than apr_date_parse_http. + * + * The prominent mailer (or poster, if mailer is unknown) that has + * been seen in the wild is included for the unknown formats. + * + * Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 + * Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 + * Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format + * Sun, 6 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 + * Sun, 06 Nov 94 08:49:37 GMT ; RFC 822 + * Sun, 6 Nov 94 08:49:37 GMT ; RFC 822 + * Sun, 06 Nov 94 08:49 GMT ; Unknown [drtr@ast.cam.ac.uk] + * Sun, 6 Nov 94 08:49 GMT ; Unknown [drtr@ast.cam.ac.uk] + * Sun, 06 Nov 94 8:49:37 GMT ; Unknown [Elm 70.85] + * Sun, 6 Nov 94 8:49:37 GMT ; Unknown [Elm 70.85] + * Mon, 7 Jan 2002 07:21:22 GMT ; Unknown [Postfix] + * Sun, 06-Nov-1994 08:49:37 GMT ; RFC 850 with four digit years + * + */ + +#define TIMEPARSE(ds,hr10,hr1,min10,min1,sec10,sec1) \ + { \ + ds.tm_hour = ((hr10 - '0') * 10) + (hr1 - '0'); \ + ds.tm_min = ((min10 - '0') * 10) + (min1 - '0'); \ + ds.tm_sec = ((sec10 - '0') * 10) + (sec1 - '0'); \ + } +#define TIMEPARSE_STD(ds,timstr) \ + { \ + TIMEPARSE(ds, timstr[0],timstr[1], \ + timstr[3],timstr[4], \ + timstr[6],timstr[7]); \ + } + +APU_DECLARE(apr_time_t) apr_date_parse_rfc(const char *date) +{ + apr_time_exp_t ds; + apr_time_t result; + int mint, mon; + const char *monstr, *timstr, *gmtstr; + static const int months[12] = + { + ('J' << 16) | ('a' << 8) | 'n', ('F' << 16) | ('e' << 8) | 'b', + ('M' << 16) | ('a' << 8) | 'r', ('A' << 16) | ('p' << 8) | 'r', + ('M' << 16) | ('a' << 8) | 'y', ('J' << 16) | ('u' << 8) | 'n', + ('J' << 16) | ('u' << 8) | 'l', ('A' << 16) | ('u' << 8) | 'g', + ('S' << 16) | ('e' << 8) | 'p', ('O' << 16) | ('c' << 8) | 't', + ('N' << 16) | ('o' << 8) | 'v', ('D' << 16) | ('e' << 8) | 'c' }; + + if (!date) + return APR_DATE_BAD; + + /* Not all dates have text days at the beginning. */ + if (!apr_isdigit(date[0])) + { + while (*date && apr_isspace(*date)) /* Find first non-whitespace char */ + ++date; + + if (*date == '\0') + return APR_DATE_BAD; + + if ((date = strchr(date, ' ')) == NULL) /* Find space after weekday */ + return APR_DATE_BAD; + + ++date; /* Now pointing to first char after space, which should be */ } + + /* start of the actual date information for all 11 formats. */ + if (apr_date_checkmask(date, "## @$$ #### ##:##:## *")) { /* RFC 1123 format */ + ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100; + + if (ds.tm_year < 0) + return APR_DATE_BAD; + + ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0'); + + ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0'); + + monstr = date + 3; + timstr = date + 12; + gmtstr = date + 21; + + TIMEPARSE_STD(ds, timstr); + } + else if (apr_date_checkmask(date, "##-@$$-## ##:##:## *")) {/* RFC 850 format */ + ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0'); + + if (ds.tm_year < 70) + ds.tm_year += 100; + + ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0'); + + monstr = date + 3; + timstr = date + 10; + gmtstr = date + 19; + + TIMEPARSE_STD(ds, timstr); + } + else if (apr_date_checkmask(date, "@$$ ~# ##:##:## ####*")) { + /* asctime format */ + ds.tm_year = ((date[16] - '0') * 10 + (date[17] - '0') - 19) * 100; + if (ds.tm_year < 0) + return APR_DATE_BAD; + + ds.tm_year += ((date[18] - '0') * 10) + (date[19] - '0'); + + if (date[4] == ' ') + ds.tm_mday = 0; + else + ds.tm_mday = (date[4] - '0') * 10; + + ds.tm_mday += (date[5] - '0'); + + monstr = date; + timstr = date + 7; + gmtstr = NULL; + + TIMEPARSE_STD(ds, timstr); + } + else if (apr_date_checkmask(date, "# @$$ #### ##:##:## *")) { + /* RFC 1123 format*/ + ds.tm_year = ((date[6] - '0') * 10 + (date[7] - '0') - 19) * 100; + + if (ds.tm_year < 0) + return APR_DATE_BAD; + + ds.tm_year += ((date[8] - '0') * 10) + (date[9] - '0'); + ds.tm_mday = (date[0] - '0'); + + monstr = date + 2; + timstr = date + 11; + gmtstr = date + 20; + + TIMEPARSE_STD(ds, timstr); + } + else if (apr_date_checkmask(date, "## @$$ ## ##:##:## *")) { + /* This is the old RFC 1123 date format - many many years ago, people + * used two-digit years. Oh, how foolish. + * + * Two-digit day, two-digit year version. */ + ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0'); + + if (ds.tm_year < 70) + ds.tm_year += 100; + + ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0'); + + monstr = date + 3; + timstr = date + 10; + gmtstr = date + 19; + + TIMEPARSE_STD(ds, timstr); + } + else if (apr_date_checkmask(date, " # @$$ ## ##:##:## *")) { + /* This is the old RFC 1123 date format - many many years ago, people + * used two-digit years. Oh, how foolish. + * + * Space + one-digit day, two-digit year version.*/ + ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0'); + + if (ds.tm_year < 70) + ds.tm_year += 100; + + ds.tm_mday = (date[1] - '0'); + + monstr = date + 3; + timstr = date + 10; + gmtstr = date + 19; + + TIMEPARSE_STD(ds, timstr); + } + else if (apr_date_checkmask(date, "# @$$ ## ##:##:## *")) { + /* This is the old RFC 1123 date format - many many years ago, people + * used two-digit years. Oh, how foolish. + * + * One-digit day, two-digit year version. */ + ds.tm_year = ((date[6] - '0') * 10) + (date[7] - '0'); + + if (ds.tm_year < 70) + ds.tm_year += 100; + + ds.tm_mday = (date[0] - '0'); + + monstr = date + 2; + timstr = date + 9; + gmtstr = date + 18; + + TIMEPARSE_STD(ds, timstr); + } + else if (apr_date_checkmask(date, "## @$$ ## ##:## *")) { + /* Loser format. This is quite bogus. */ + ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0'); + + if (ds.tm_year < 70) + ds.tm_year += 100; + + ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0'); + + monstr = date + 3; + timstr = date + 10; + gmtstr = NULL; + + TIMEPARSE(ds, timstr[0],timstr[1], timstr[3],timstr[4], '0','0'); + } + else if (apr_date_checkmask(date, "# @$$ ## ##:## *")) { + /* Loser format. This is quite bogus. */ + ds.tm_year = ((date[6] - '0') * 10) + (date[7] - '0'); + + if (ds.tm_year < 70) + ds.tm_year += 100; + + ds.tm_mday = (date[0] - '0'); + + monstr = date + 2; + timstr = date + 9; + gmtstr = NULL; + + TIMEPARSE(ds, timstr[0],timstr[1], timstr[3],timstr[4], '0','0'); + } + else if (apr_date_checkmask(date, "## @$$ ## #:##:## *")) { + /* Loser format. This is quite bogus. */ + ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0'); + + if (ds.tm_year < 70) + ds.tm_year += 100; + + ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0'); + + monstr = date + 3; + timstr = date + 9; + gmtstr = date + 18; + + TIMEPARSE(ds, '0',timstr[1], timstr[3],timstr[4], timstr[6],timstr[7]); + } + else if (apr_date_checkmask(date, "# @$$ ## #:##:## *")) { + /* Loser format. This is quite bogus. */ + ds.tm_year = ((date[6] - '0') * 10) + (date[7] - '0'); + + if (ds.tm_year < 70) + ds.tm_year += 100; + + ds.tm_mday = (date[0] - '0'); + + monstr = date + 2; + timstr = date + 8; + gmtstr = date + 17; + + TIMEPARSE(ds, '0',timstr[1], timstr[3],timstr[4], timstr[6],timstr[7]); + } + else if (apr_date_checkmask(date, " # @$$ #### ##:##:## *")) { + /* RFC 1123 format with a space instead of a leading zero. */ + ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100; + + if (ds.tm_year < 0) + return APR_DATE_BAD; + + ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0'); + + ds.tm_mday = (date[1] - '0'); + + monstr = date + 3; + timstr = date + 12; + gmtstr = date + 21; + + TIMEPARSE_STD(ds, timstr); + } + else if (apr_date_checkmask(date, "##-@$$-#### ##:##:## *")) { + /* RFC 1123 with dashes instead of spaces between date/month/year + * This also looks like RFC 850 with four digit years. + */ + ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100; + if (ds.tm_year < 0) + return APR_DATE_BAD; + + ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0'); + + ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0'); + + monstr = date + 3; + timstr = date + 12; + gmtstr = date + 21; + + TIMEPARSE_STD(ds, timstr); + } + else + return APR_DATE_BAD; + + if (ds.tm_mday <= 0 || ds.tm_mday > 31) + return APR_DATE_BAD; + + if ((ds.tm_hour > 23) || (ds.tm_min > 59) || (ds.tm_sec > 61)) + return APR_DATE_BAD; + + mint = (monstr[0] << 16) | (monstr[1] << 8) | monstr[2]; + for (mon = 0; mon < 12; mon++) + if (mint == months[mon]) + break; + + if (mon == 12) + return APR_DATE_BAD; + + if ((ds.tm_mday == 31) && (mon == 3 || mon == 5 || mon == 8 || mon == 10)) + return APR_DATE_BAD; + + /* February gets special check for leapyear */ + + if ((mon == 1) && + ((ds.tm_mday > 29) + || ((ds.tm_mday == 29) + && ((ds.tm_year & 3) + || (((ds.tm_year % 100) == 0) + && (((ds.tm_year % 400) != 100))))))) + return APR_DATE_BAD; + + ds.tm_mon = mon; + + /* tm_gmtoff is the number of seconds off of GMT the time is. + * + * We only currently support: [+-]ZZZZ where Z is the offset in + * hours from GMT. + * + * If there is any confusion, tm_gmtoff will remain 0. + */ + ds.tm_gmtoff = 0; + + /* Do we have a timezone ? */ + if (gmtstr) { + int offset; + switch (*gmtstr) { + case '-': + offset = atoi(gmtstr+1); + ds.tm_gmtoff -= (offset / 100) * 60 * 60; + ds.tm_gmtoff -= (offset % 100) * 60; + break; + case '+': + offset = atoi(gmtstr+1); + ds.tm_gmtoff += (offset / 100) * 60 * 60; + ds.tm_gmtoff += (offset % 100) * 60; + break; + } + } + + /* apr_time_exp_get uses tm_usec field, but it hasn't been set yet. + * It should be safe to just zero out this value. + * tm_usec is the number of microseconds into the second. HTTP only + * cares about second granularity. + */ + ds.tm_usec = 0; + + if (apr_time_exp_gmt_get(&result, &ds) != APR_SUCCESS) + return APR_DATE_BAD; + + return result; +} |