From 46651ce6fe013220ed397add242004d764fc0153 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 14:15:05 +0200 Subject: Adding upstream version 14.5. Signed-off-by: Daniel Baumann --- src/backend/utils/adt/formatting.c | 6675 ++++++++++++++++++++++++++++++++++++ 1 file changed, 6675 insertions(+) create mode 100644 src/backend/utils/adt/formatting.c (limited to 'src/backend/utils/adt/formatting.c') diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c new file mode 100644 index 0000000..a1145e2 --- /dev/null +++ b/src/backend/utils/adt/formatting.c @@ -0,0 +1,6675 @@ +/* ----------------------------------------------------------------------- + * formatting.c + * + * src/backend/utils/adt/formatting.c + * + * + * Portions Copyright (c) 1999-2021, PostgreSQL Global Development Group + * + * + * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER(); + * + * The PostgreSQL routines for a timestamp/int/float/numeric formatting, + * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines. + * + * + * Cache & Memory: + * Routines use (itself) internal cache for format pictures. + * + * The cache uses a static buffer and is persistent across transactions. If + * the format-picture is bigger than the cache buffer, the parser is called + * always. + * + * NOTE for Number version: + * All in this version is implemented as keywords ( => not used + * suffixes), because a format picture is for *one* item (number) + * only. It not is as a timestamp version, where each keyword (can) + * has suffix. + * + * NOTE for Timestamp routines: + * In this module the POSIX 'struct tm' type is *not* used, but rather + * PgSQL type, which has tm_mon based on one (*non* zero) and + * year *not* based on 1900, but is used full year number. + * Module supports AD / BC / AM / PM. + * + * Supported types for to_char(): + * + * Timestamp, Numeric, int4, int8, float4, float8 + * + * Supported types for reverse conversion: + * + * Timestamp - to_timestamp() + * Date - to_date() + * Numeric - to_number() + * + * + * Karel Zak + * + * TODO + * - better number building (formatting) / parsing, now it isn't + * ideal code + * - use Assert() + * - add support for roman number to standard number conversion + * - add support for number spelling + * - add support for string to string formatting (we must be better + * than Oracle :-), + * to_char('Hello', 'X X X X X') -> 'H e l l o' + * + * ----------------------------------------------------------------------- + */ + +#ifdef DEBUG_TO_FROM_CHAR +#define DEBUG_elog_output DEBUG3 +#endif + +#include "postgres.h" + +#include +#include +#include +#include +#include + +/* + * towlower() and friends should be in , but some pre-C99 systems + * declare them in , so include that too. + */ +#include +#ifdef HAVE_WCTYPE_H +#include +#endif + +#ifdef USE_ICU +#include +#endif + +#include "catalog/pg_collation.h" +#include "catalog/pg_type.h" +#include "mb/pg_wchar.h" +#include "parser/scansup.h" +#include "utils/builtins.h" +#include "utils/date.h" +#include "utils/datetime.h" +#include "utils/float.h" +#include "utils/formatting.h" +#include "utils/int8.h" +#include "utils/memutils.h" +#include "utils/numeric.h" +#include "utils/pg_locale.h" + +/* ---------- + * Convenience macros for error handling + * ---------- + * + * Two macros below help to handle errors in functions that take + * 'bool *have_error' argument. When this argument is not NULL, it's expected + * that function will suppress ereports when possible. Instead it should + * return some default value and set *have_error flag. + * + * RETURN_ERROR() macro intended to wrap ereport() calls. When have_error + * function argument is not NULL, then instead of ereport'ing we set + * *have_error flag and go to on_error label. It's supposed that jump + * resources will be freed and some 'default' value returned. + * + * CHECK_ERROR() jumps on_error label when *have_error flag is defined and set. + * It's supposed to be used for immediate exit from the function on error + * after call of another function with 'bool *have_error' argument. + */ +#define RETURN_ERROR(throw_error) \ +do { \ + if (have_error) \ + { \ + *have_error = true; \ + goto on_error; \ + } \ + else \ + { \ + throw_error; \ + } \ +} while (0) + +#define CHECK_ERROR \ +do { \ + if (have_error && *have_error) \ + goto on_error; \ +} while (0) + +/* ---------- + * Routines flags + * ---------- + */ +#define DCH_FLAG 0x1 /* DATE-TIME flag */ +#define NUM_FLAG 0x2 /* NUMBER flag */ +#define STD_FLAG 0x4 /* STANDARD flag */ + +/* ---------- + * KeyWord Index (ascii from position 32 (' ') to 126 (~)) + * ---------- + */ +#define KeyWord_INDEX_SIZE ('~' - ' ') +#define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1) + +/* ---------- + * Maximal length of one node + * ---------- + */ +#define DCH_MAX_ITEM_SIZ 12 /* max localized day name */ +#define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */ + + +/* ---------- + * Format parser structs + * ---------- + */ +typedef struct +{ + const char *name; /* suffix string */ + int len, /* suffix length */ + id, /* used in node->suffix */ + type; /* prefix / postfix */ +} KeySuffix; + +/* ---------- + * FromCharDateMode + * ---------- + * + * This value is used to nominate one of several distinct (and mutually + * exclusive) date conventions that a keyword can belong to. + */ +typedef enum +{ + FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */ + FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */ + FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */ +} FromCharDateMode; + +typedef struct +{ + const char *name; + int len; + int id; + bool is_digit; + FromCharDateMode date_mode; +} KeyWord; + +typedef struct +{ + uint8 type; /* NODE_TYPE_XXX, see below */ + char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */ + uint8 suffix; /* keyword prefix/suffix code, if any */ + const KeyWord *key; /* if type is ACTION */ +} FormatNode; + +#define NODE_TYPE_END 1 +#define NODE_TYPE_ACTION 2 +#define NODE_TYPE_CHAR 3 +#define NODE_TYPE_SEPARATOR 4 +#define NODE_TYPE_SPACE 5 + +#define SUFFTYPE_PREFIX 1 +#define SUFFTYPE_POSTFIX 2 + +#define CLOCK_24_HOUR 0 +#define CLOCK_12_HOUR 1 + + +/* ---------- + * Full months + * ---------- + */ +static const char *const months_full[] = { + "January", "February", "March", "April", "May", "June", "July", + "August", "September", "October", "November", "December", NULL +}; + +static const char *const days_short[] = { + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL +}; + +/* ---------- + * AD / BC + * ---------- + * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it + * positive and map year == -1 to year zero, and shift all negative + * years up one. For interval years, we just return the year. + */ +#define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year))) + +#define A_D_STR "A.D." +#define a_d_STR "a.d." +#define AD_STR "AD" +#define ad_STR "ad" + +#define B_C_STR "B.C." +#define b_c_STR "b.c." +#define BC_STR "BC" +#define bc_STR "bc" + +/* + * AD / BC strings for seq_search. + * + * These are given in two variants, a long form with periods and a standard + * form without. + * + * The array is laid out such that matches for AD have an even index, and + * matches for BC have an odd index. So the boolean value for BC is given by + * taking the array index of the match, modulo 2. + */ +static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL}; +static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL}; + +/* ---------- + * AM / PM + * ---------- + */ +#define A_M_STR "A.M." +#define a_m_STR "a.m." +#define AM_STR "AM" +#define am_STR "am" + +#define P_M_STR "P.M." +#define p_m_STR "p.m." +#define PM_STR "PM" +#define pm_STR "pm" + +/* + * AM / PM strings for seq_search. + * + * These are given in two variants, a long form with periods and a standard + * form without. + * + * The array is laid out such that matches for AM have an even index, and + * matches for PM have an odd index. So the boolean value for PM is given by + * taking the array index of the match, modulo 2. + */ +static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL}; +static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL}; + +/* ---------- + * Months in roman-numeral + * (Must be in reverse order for seq_search (in FROM_CHAR), because + * 'VIII' must have higher precedence than 'V') + * ---------- + */ +static const char *const rm_months_upper[] = +{"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL}; + +static const char *const rm_months_lower[] = +{"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL}; + +/* ---------- + * Roman numbers + * ---------- + */ +static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL}; +static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL}; +static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL}; + +/* ---------- + * Ordinal postfixes + * ---------- + */ +static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL}; +static const char *const numth[] = {"st", "nd", "rd", "th", NULL}; + +/* ---------- + * Flags & Options: + * ---------- + */ +#define TH_UPPER 1 +#define TH_LOWER 2 + +/* ---------- + * Number description struct + * ---------- + */ +typedef struct +{ + int pre, /* (count) numbers before decimal */ + post, /* (count) numbers after decimal */ + lsign, /* want locales sign */ + flag, /* number parameters */ + pre_lsign_num, /* tmp value for lsign */ + multi, /* multiplier for 'V' */ + zero_start, /* position of first zero */ + zero_end, /* position of last zero */ + need_locale; /* needs it locale */ +} NUMDesc; + +/* ---------- + * Flags for NUMBER version + * ---------- + */ +#define NUM_F_DECIMAL (1 << 1) +#define NUM_F_LDECIMAL (1 << 2) +#define NUM_F_ZERO (1 << 3) +#define NUM_F_BLANK (1 << 4) +#define NUM_F_FILLMODE (1 << 5) +#define NUM_F_LSIGN (1 << 6) +#define NUM_F_BRACKET (1 << 7) +#define NUM_F_MINUS (1 << 8) +#define NUM_F_PLUS (1 << 9) +#define NUM_F_ROMAN (1 << 10) +#define NUM_F_MULTI (1 << 11) +#define NUM_F_PLUS_POST (1 << 12) +#define NUM_F_MINUS_POST (1 << 13) +#define NUM_F_EEEE (1 << 14) + +#define NUM_LSIGN_PRE (-1) +#define NUM_LSIGN_POST 1 +#define NUM_LSIGN_NONE 0 + +/* ---------- + * Tests + * ---------- + */ +#define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL) +#define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL) +#define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO) +#define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK) +#define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE) +#define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET) +#define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS) +#define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN) +#define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS) +#define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN) +#define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI) +#define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE) + +/* ---------- + * Format picture cache + * + * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long; + * likewise number format pictures up to NUM_CACHE_SIZE bytes long. + * + * For simplicity, the cache entries are fixed-size, so they allow for the + * worst case of a FormatNode for each byte in the picture string. + * + * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and + * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that + * we don't waste too much space by palloc'ing them individually. Be sure + * to adjust those macros if you add fields to those structs. + * + * The max number of entries in each cache is DCH_CACHE_ENTRIES + * resp. NUM_CACHE_ENTRIES. + * ---------- + */ +#define DCH_CACHE_OVERHEAD \ + MAXALIGN(sizeof(bool) + sizeof(int)) +#define NUM_CACHE_OVERHEAD \ + MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc)) + +#define DCH_CACHE_SIZE \ + ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1) +#define NUM_CACHE_SIZE \ + ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1) + +#define DCH_CACHE_ENTRIES 20 +#define NUM_CACHE_ENTRIES 20 + +typedef struct +{ + FormatNode format[DCH_CACHE_SIZE + 1]; + char str[DCH_CACHE_SIZE + 1]; + bool std; + bool valid; + int age; +} DCHCacheEntry; + +typedef struct +{ + FormatNode format[NUM_CACHE_SIZE + 1]; + char str[NUM_CACHE_SIZE + 1]; + bool valid; + int age; + NUMDesc Num; +} NUMCacheEntry; + +/* global cache for date/time format pictures */ +static DCHCacheEntry *DCHCache[DCH_CACHE_ENTRIES]; +static int n_DCHCache = 0; /* current number of entries */ +static int DCHCounter = 0; /* aging-event counter */ + +/* global cache for number format pictures */ +static NUMCacheEntry *NUMCache[NUM_CACHE_ENTRIES]; +static int n_NUMCache = 0; /* current number of entries */ +static int NUMCounter = 0; /* aging-event counter */ + +/* ---------- + * For char->date/time conversion + * ---------- + */ +typedef struct +{ + FromCharDateMode mode; + int hh, + pm, + mi, + ss, + ssss, + d, /* stored as 1-7, Sunday = 1, 0 means missing */ + dd, + ddd, + mm, + ms, + year, + bc, + ww, + w, + cc, + j, + us, + yysz, /* is it YY or YYYY ? */ + clock, /* 12 or 24 hour clock? */ + tzsign, /* +1, -1 or 0 if timezone info is absent */ + tzh, + tzm, + ff; /* fractional precision */ +} TmFromChar; + +#define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar)) + +/* ---------- + * Debug + * ---------- + */ +#ifdef DEBUG_TO_FROM_CHAR +#define DEBUG_TMFC(_X) \ + elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \ + (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \ + (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \ + (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \ + (_X)->yysz, (_X)->clock) +#define DEBUG_TM(_X) \ + elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\ + (_X)->tm_sec, (_X)->tm_year,\ + (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\ + (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon) +#else +#define DEBUG_TMFC(_X) +#define DEBUG_TM(_X) +#endif + +/* ---------- + * Datetime to char conversion + * ---------- + */ +typedef struct TmToChar +{ + struct pg_tm tm; /* classic 'tm' struct */ + fsec_t fsec; /* fractional seconds */ + const char *tzn; /* timezone */ +} TmToChar; + +#define tmtcTm(_X) (&(_X)->tm) +#define tmtcTzn(_X) ((_X)->tzn) +#define tmtcFsec(_X) ((_X)->fsec) + +#define ZERO_tm(_X) \ +do { \ + (_X)->tm_sec = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \ + (_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \ + (_X)->tm_mday = (_X)->tm_mon = 1; \ + (_X)->tm_zone = NULL; \ +} while(0) + +#define ZERO_tmtc(_X) \ +do { \ + ZERO_tm( tmtcTm(_X) ); \ + tmtcFsec(_X) = 0; \ + tmtcTzn(_X) = NULL; \ +} while(0) + +/* + * to_char(time) appears to to_char() as an interval, so this check + * is really for interval and time data types. + */ +#define INVALID_FOR_INTERVAL \ +do { \ + if (is_interval) \ + ereport(ERROR, \ + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \ + errmsg("invalid format specification for an interval value"), \ + errhint("Intervals are not tied to specific calendar dates."))); \ +} while(0) + +/***************************************************************************** + * KeyWord definitions + *****************************************************************************/ + +/* ---------- + * Suffixes (FormatNode.suffix is an OR of these codes) + * ---------- + */ +#define DCH_S_FM 0x01 +#define DCH_S_TH 0x02 +#define DCH_S_th 0x04 +#define DCH_S_SP 0x08 +#define DCH_S_TM 0x10 + +/* ---------- + * Suffix tests + * ---------- + */ +#define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0) +#define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0) +#define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0) +#define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER) + +/* Oracle toggles FM behavior, we don't; see docs. */ +#define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0) +#define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0) +#define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0) + +/* ---------- + * Suffixes definition for DATE-TIME TO/FROM CHAR + * ---------- + */ +#define TM_SUFFIX_LEN 2 + +static const KeySuffix DCH_suff[] = { + {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX}, + {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX}, + {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX}, + {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX}, + {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX}, + {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX}, + {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX}, + /* last */ + {NULL, 0, 0, 0} +}; + + +/* ---------- + * Format-pictures (KeyWord). + * + * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted + * complicated -to-> easy: + * + * (example: "DDD","DD","Day","D" ) + * + * (this specific sort needs the algorithm for sequential search for strings, + * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH" + * or "HH12"? You must first try "HH12", because "HH" is in string, but + * it is not good. + * + * (!) + * - Position for the keyword is similar as position in the enum DCH/NUM_poz. + * (!) + * + * For fast search is used the 'int index[]', index is ascii table from position + * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII + * position or -1 if char is not used in the KeyWord. Search example for + * string "MM": + * 1) see in index to index['M' - 32], + * 2) take keywords position (enum DCH_MI) from index + * 3) run sequential search in keywords[] from this position + * + * ---------- + */ + +typedef enum +{ + DCH_A_D, + DCH_A_M, + DCH_AD, + DCH_AM, + DCH_B_C, + DCH_BC, + DCH_CC, + DCH_DAY, + DCH_DDD, + DCH_DD, + DCH_DY, + DCH_Day, + DCH_Dy, + DCH_D, + DCH_FF1, + DCH_FF2, + DCH_FF3, + DCH_FF4, + DCH_FF5, + DCH_FF6, + DCH_FX, /* global suffix */ + DCH_HH24, + DCH_HH12, + DCH_HH, + DCH_IDDD, + DCH_ID, + DCH_IW, + DCH_IYYY, + DCH_IYY, + DCH_IY, + DCH_I, + DCH_J, + DCH_MI, + DCH_MM, + DCH_MONTH, + DCH_MON, + DCH_MS, + DCH_Month, + DCH_Mon, + DCH_OF, + DCH_P_M, + DCH_PM, + DCH_Q, + DCH_RM, + DCH_SSSSS, + DCH_SSSS, + DCH_SS, + DCH_TZH, + DCH_TZM, + DCH_TZ, + DCH_US, + DCH_WW, + DCH_W, + DCH_Y_YYY, + DCH_YYYY, + DCH_YYY, + DCH_YY, + DCH_Y, + DCH_a_d, + DCH_a_m, + DCH_ad, + DCH_am, + DCH_b_c, + DCH_bc, + DCH_cc, + DCH_day, + DCH_ddd, + DCH_dd, + DCH_dy, + DCH_d, + DCH_ff1, + DCH_ff2, + DCH_ff3, + DCH_ff4, + DCH_ff5, + DCH_ff6, + DCH_fx, + DCH_hh24, + DCH_hh12, + DCH_hh, + DCH_iddd, + DCH_id, + DCH_iw, + DCH_iyyy, + DCH_iyy, + DCH_iy, + DCH_i, + DCH_j, + DCH_mi, + DCH_mm, + DCH_month, + DCH_mon, + DCH_ms, + DCH_p_m, + DCH_pm, + DCH_q, + DCH_rm, + DCH_sssss, + DCH_ssss, + DCH_ss, + DCH_tz, + DCH_us, + DCH_ww, + DCH_w, + DCH_y_yyy, + DCH_yyyy, + DCH_yyy, + DCH_yy, + DCH_y, + + /* last */ + _DCH_last_ +} DCH_poz; + +typedef enum +{ + NUM_COMMA, + NUM_DEC, + NUM_0, + NUM_9, + NUM_B, + NUM_C, + NUM_D, + NUM_E, + NUM_FM, + NUM_G, + NUM_L, + NUM_MI, + NUM_PL, + NUM_PR, + NUM_RN, + NUM_SG, + NUM_SP, + NUM_S, + NUM_TH, + NUM_V, + NUM_b, + NUM_c, + NUM_d, + NUM_e, + NUM_fm, + NUM_g, + NUM_l, + NUM_mi, + NUM_pl, + NUM_pr, + NUM_rn, + NUM_sg, + NUM_sp, + NUM_s, + NUM_th, + NUM_v, + + /* last */ + _NUM_last_ +} NUM_poz; + +/* ---------- + * KeyWords for DATE-TIME version + * ---------- + */ +static const KeyWord DCH_keywords[] = { +/* name, len, id, is_digit, date_mode */ + {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */ + {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE}, + {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE}, + {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE}, + {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */ + {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE}, + {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */ + {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */ + {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN}, + {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN}, + {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE}, + {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE}, + {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE}, + {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN}, + {"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* F */ + {"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE}, + {"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE}, + {"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE}, + {"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE}, + {"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE}, + {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, + {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */ + {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE}, + {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE}, + {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */ + {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK}, + {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK}, + {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK}, + {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK}, + {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK}, + {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK}, + {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */ + {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */ + {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN}, + {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN}, + {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN}, + {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE}, + {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN}, + {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN}, + {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */ + {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */ + {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE}, + {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */ + {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */ + {"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */ + {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, + {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE}, + {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */ + {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE}, + {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE}, + {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */ + {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */ + {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN}, + {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */ + {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN}, + {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN}, + {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN}, + {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN}, + {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */ + {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE}, + {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE}, + {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE}, + {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */ + {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE}, + {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */ + {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */ + {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN}, + {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN}, + {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE}, + {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN}, + {"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* f */ + {"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE}, + {"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE}, + {"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE}, + {"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE}, + {"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE}, + {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, + {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */ + {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE}, + {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE}, + {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */ + {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK}, + {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK}, + {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK}, + {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK}, + {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK}, + {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK}, + {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */ + {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */ + {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN}, + {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN}, + {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN}, + {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE}, + {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */ + {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE}, + {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */ + {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */ + {"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */ + {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, + {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE}, + {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE}, /* t */ + {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */ + {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */ + {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN}, + {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */ + {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN}, + {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN}, + {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN}, + {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN}, + + /* last */ + {NULL, 0, 0, 0, 0} +}; + +/* ---------- + * KeyWords for NUMBER version + * + * The is_digit and date_mode fields are not relevant here. + * ---------- + */ +static const KeyWord NUM_keywords[] = { +/* name, len, id is in Index */ + {",", 1, NUM_COMMA}, /* , */ + {".", 1, NUM_DEC}, /* . */ + {"0", 1, NUM_0}, /* 0 */ + {"9", 1, NUM_9}, /* 9 */ + {"B", 1, NUM_B}, /* B */ + {"C", 1, NUM_C}, /* C */ + {"D", 1, NUM_D}, /* D */ + {"EEEE", 4, NUM_E}, /* E */ + {"FM", 2, NUM_FM}, /* F */ + {"G", 1, NUM_G}, /* G */ + {"L", 1, NUM_L}, /* L */ + {"MI", 2, NUM_MI}, /* M */ + {"PL", 2, NUM_PL}, /* P */ + {"PR", 2, NUM_PR}, + {"RN", 2, NUM_RN}, /* R */ + {"SG", 2, NUM_SG}, /* S */ + {"SP", 2, NUM_SP}, + {"S", 1, NUM_S}, + {"TH", 2, NUM_TH}, /* T */ + {"V", 1, NUM_V}, /* V */ + {"b", 1, NUM_B}, /* b */ + {"c", 1, NUM_C}, /* c */ + {"d", 1, NUM_D}, /* d */ + {"eeee", 4, NUM_E}, /* e */ + {"fm", 2, NUM_FM}, /* f */ + {"g", 1, NUM_G}, /* g */ + {"l", 1, NUM_L}, /* l */ + {"mi", 2, NUM_MI}, /* m */ + {"pl", 2, NUM_PL}, /* p */ + {"pr", 2, NUM_PR}, + {"rn", 2, NUM_rn}, /* r */ + {"sg", 2, NUM_SG}, /* s */ + {"sp", 2, NUM_SP}, + {"s", 1, NUM_S}, + {"th", 2, NUM_th}, /* t */ + {"v", 1, NUM_V}, /* v */ + + /* last */ + {NULL, 0, 0} +}; + + +/* ---------- + * KeyWords index for DATE-TIME version + * ---------- + */ +static const int DCH_index[KeyWord_INDEX_SIZE] = { +/* +0 1 2 3 4 5 6 7 8 9 +*/ + /*---- first 0..31 chars are skipped ----*/ + + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1, + DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF, + DCH_P_M, DCH_Q, DCH_RM, DCH_SSSSS, DCH_TZH, DCH_US, -1, DCH_WW, -1, DCH_Y_YYY, + -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc, + DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi, + -1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_sssss, DCH_tz, DCH_us, -1, DCH_ww, + -1, DCH_y_yyy, -1, -1, -1, -1 + + /*---- chars over 126 are skipped ----*/ +}; + +/* ---------- + * KeyWords index for NUMBER version + * ---------- + */ +static const int NUM_index[KeyWord_INDEX_SIZE] = { +/* +0 1 2 3 4 5 6 7 8 9 +*/ + /*---- first 0..31 chars are skipped ----*/ + + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1, + -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1, + -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E, + NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1, + NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c, + NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi, + -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1, + -1, -1, -1, -1, -1, -1 + + /*---- chars over 126 are skipped ----*/ +}; + +/* ---------- + * Number processor struct + * ---------- + */ +typedef struct NUMProc +{ + bool is_to_char; + NUMDesc *Num; /* number description */ + + int sign, /* '-' or '+' */ + sign_wrote, /* was sign write */ + num_count, /* number of write digits */ + num_in, /* is inside number */ + num_curr, /* current position in number */ + out_pre_spaces, /* spaces before first digit */ + + read_dec, /* to_number - was read dec. point */ + read_post, /* to_number - number of dec. digit */ + read_pre; /* to_number - number non-dec. digit */ + + char *number, /* string with number */ + *number_p, /* pointer to current number position */ + *inout, /* in / out buffer */ + *inout_p, /* pointer to current inout position */ + *last_relevant, /* last relevant number after decimal point */ + + *L_negative_sign, /* Locale */ + *L_positive_sign, + *decimal, + *L_thousands_sep, + *L_currency_symbol; +} NUMProc; + +/* Return flags for DCH_from_char() */ +#define DCH_DATED 0x01 +#define DCH_TIMED 0x02 +#define DCH_ZONED 0x04 + +/* ---------- + * Functions + * ---------- + */ +static const KeyWord *index_seq_search(const char *str, const KeyWord *kw, + const int *index); +static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type); +static bool is_separator_char(const char *str); +static void NUMDesc_prepare(NUMDesc *num, FormatNode *n); +static void parse_format(FormatNode *node, const char *str, const KeyWord *kw, + const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num); + +static void DCH_to_char(FormatNode *node, bool is_interval, + TmToChar *in, char *out, Oid collid); +static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out, + Oid collid, bool std, bool *have_error); + +#ifdef DEBUG_TO_FROM_CHAR +static void dump_index(const KeyWord *k, const int *index); +static void dump_node(FormatNode *node, int max); +#endif + +static const char *get_th(char *num, int type); +static char *str_numth(char *dest, char *num, int type); +static int adjust_partial_year_to_2020(int year); +static int strspace_len(const char *str); +static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode, + bool *have_error); +static void from_char_set_int(int *dest, const int value, const FormatNode *node, + bool *have_error); +static int from_char_parse_int_len(int *dest, const char **src, const int len, + FormatNode *node, bool *have_error); +static int from_char_parse_int(int *dest, const char **src, FormatNode *node, + bool *have_error); +static int seq_search_ascii(const char *name, const char *const *array, int *len); +static int seq_search_localized(const char *name, char **array, int *len, + Oid collid); +static int from_char_seq_search(int *dest, const char **src, + const char *const *array, + char **localized_array, Oid collid, + FormatNode *node, bool *have_error); +static void do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std, + struct pg_tm *tm, fsec_t *fsec, int *fprec, + uint32 *flags, bool *have_error); +static char *fill_str(char *str, int c, int max); +static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree); +static char *int_to_roman(int number); +static void NUM_prepare_locale(NUMProc *Np); +static char *get_last_relevant_decnum(char *num); +static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len); +static void NUM_numpart_to_char(NUMProc *Np, int id); +static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, + char *number, int input_len, int to_char_out_pre_spaces, + int sign, bool is_to_char, Oid collid); +static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std); +static DCHCacheEntry *DCH_cache_search(const char *str, bool std); +static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std); +static NUMCacheEntry *NUM_cache_getnew(const char *str); +static NUMCacheEntry *NUM_cache_search(const char *str); +static NUMCacheEntry *NUM_cache_fetch(const char *str); + + +/* ---------- + * Fast sequential search, use index for data selection which + * go to seq. cycle (it is very fast for unwanted strings) + * (can't be used binary search in format parsing) + * ---------- + */ +static const KeyWord * +index_seq_search(const char *str, const KeyWord *kw, const int *index) +{ + int poz; + + if (!KeyWord_INDEX_FILTER(*str)) + return NULL; + + if ((poz = *(index + (*str - ' '))) > -1) + { + const KeyWord *k = kw + poz; + + do + { + if (strncmp(str, k->name, k->len) == 0) + return k; + k++; + if (!k->name) + return NULL; + } while (*str == *k->name); + } + return NULL; +} + +static const KeySuffix * +suff_search(const char *str, const KeySuffix *suf, int type) +{ + const KeySuffix *s; + + for (s = suf; s->name != NULL; s++) + { + if (s->type != type) + continue; + + if (strncmp(str, s->name, s->len) == 0) + return s; + } + return NULL; +} + +static bool +is_separator_char(const char *str) +{ + /* ASCII printable character, but not letter or digit */ + return (*str > 0x20 && *str < 0x7F && + !(*str >= 'A' && *str <= 'Z') && + !(*str >= 'a' && *str <= 'z') && + !(*str >= '0' && *str <= '9')); +} + +/* ---------- + * Prepare NUMDesc (number description struct) via FormatNode struct + * ---------- + */ +static void +NUMDesc_prepare(NUMDesc *num, FormatNode *n) +{ + if (n->type != NODE_TYPE_ACTION) + return; + + if (IS_EEEE(num) && n->key->id != NUM_E) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("\"EEEE\" must be the last pattern used"))); + + switch (n->key->id) + { + case NUM_9: + if (IS_BRACKET(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("\"9\" must be ahead of \"PR\""))); + if (IS_MULTI(num)) + { + ++num->multi; + break; + } + if (IS_DECIMAL(num)) + ++num->post; + else + ++num->pre; + break; + + case NUM_0: + if (IS_BRACKET(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("\"0\" must be ahead of \"PR\""))); + if (!IS_ZERO(num) && !IS_DECIMAL(num)) + { + num->flag |= NUM_F_ZERO; + num->zero_start = num->pre + 1; + } + if (!IS_DECIMAL(num)) + ++num->pre; + else + ++num->post; + + num->zero_end = num->pre + num->post; + break; + + case NUM_B: + if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num))) + num->flag |= NUM_F_BLANK; + break; + + case NUM_D: + num->flag |= NUM_F_LDECIMAL; + num->need_locale = true; + /* FALLTHROUGH */ + case NUM_DEC: + if (IS_DECIMAL(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("multiple decimal points"))); + if (IS_MULTI(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"V\" and decimal point together"))); + num->flag |= NUM_F_DECIMAL; + break; + + case NUM_FM: + num->flag |= NUM_F_FILLMODE; + break; + + case NUM_S: + if (IS_LSIGN(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"S\" twice"))); + if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together"))); + if (!IS_DECIMAL(num)) + { + num->lsign = NUM_LSIGN_PRE; + num->pre_lsign_num = num->pre; + num->need_locale = true; + num->flag |= NUM_F_LSIGN; + } + else if (num->lsign == NUM_LSIGN_NONE) + { + num->lsign = NUM_LSIGN_POST; + num->need_locale = true; + num->flag |= NUM_F_LSIGN; + } + break; + + case NUM_MI: + if (IS_LSIGN(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"S\" and \"MI\" together"))); + num->flag |= NUM_F_MINUS; + if (IS_DECIMAL(num)) + num->flag |= NUM_F_MINUS_POST; + break; + + case NUM_PL: + if (IS_LSIGN(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"S\" and \"PL\" together"))); + num->flag |= NUM_F_PLUS; + if (IS_DECIMAL(num)) + num->flag |= NUM_F_PLUS_POST; + break; + + case NUM_SG: + if (IS_LSIGN(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"S\" and \"SG\" together"))); + num->flag |= NUM_F_MINUS; + num->flag |= NUM_F_PLUS; + break; + + case NUM_PR: + if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together"))); + num->flag |= NUM_F_BRACKET; + break; + + case NUM_rn: + case NUM_RN: + num->flag |= NUM_F_ROMAN; + break; + + case NUM_L: + case NUM_G: + num->need_locale = true; + break; + + case NUM_V: + if (IS_DECIMAL(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"V\" and decimal point together"))); + num->flag |= NUM_F_MULTI; + break; + + case NUM_E: + if (IS_EEEE(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use \"EEEE\" twice"))); + if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) || + IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) || + IS_ROMAN(num) || IS_MULTI(num)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("\"EEEE\" is incompatible with other formats"), + errdetail("\"EEEE\" may only be used together with digit and decimal point patterns."))); + num->flag |= NUM_F_EEEE; + break; + } +} + +/* ---------- + * Format parser, search small keywords and keyword's suffixes, and make + * format-node tree. + * + * for DATE-TIME & NUMBER version + * ---------- + */ +static void +parse_format(FormatNode *node, const char *str, const KeyWord *kw, + const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num) +{ + FormatNode *n; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "to_char/number(): run parser"); +#endif + + n = node; + + while (*str) + { + int suffix = 0; + const KeySuffix *s; + + /* + * Prefix + */ + if ((flags & DCH_FLAG) && + (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL) + { + suffix |= s->id; + if (s->len) + str += s->len; + } + + /* + * Keyword + */ + if (*str && (n->key = index_seq_search(str, kw, index)) != NULL) + { + n->type = NODE_TYPE_ACTION; + n->suffix = suffix; + if (n->key->len) + str += n->key->len; + + /* + * NUM version: Prepare global NUMDesc struct + */ + if (flags & NUM_FLAG) + NUMDesc_prepare(Num, n); + + /* + * Postfix + */ + if ((flags & DCH_FLAG) && *str && + (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL) + { + n->suffix |= s->id; + if (s->len) + str += s->len; + } + + n++; + } + else if (*str) + { + int chlen; + + if ((flags & STD_FLAG) && *str != '"') + { + /* + * Standard mode, allow only following separators: "-./,':; ". + * However, we support double quotes even in standard mode + * (see below). This is our extension of standard mode. + */ + if (strchr("-./,':; ", *str) == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("invalid datetime format separator: \"%s\"", + pnstrdup(str, pg_mblen(str))))); + + if (*str == ' ') + n->type = NODE_TYPE_SPACE; + else + n->type = NODE_TYPE_SEPARATOR; + + n->character[0] = *str; + n->character[1] = '\0'; + n->key = NULL; + n->suffix = 0; + n++; + str++; + } + else if (*str == '"') + { + /* + * Process double-quoted literal string, if any + */ + str++; + while (*str) + { + if (*str == '"') + { + str++; + break; + } + /* backslash quotes the next character, if any */ + if (*str == '\\' && *(str + 1)) + str++; + chlen = pg_mblen(str); + n->type = NODE_TYPE_CHAR; + memcpy(n->character, str, chlen); + n->character[chlen] = '\0'; + n->key = NULL; + n->suffix = 0; + n++; + str += chlen; + } + } + else + { + /* + * Outside double-quoted strings, backslash is only special if + * it immediately precedes a double quote. + */ + if (*str == '\\' && *(str + 1) == '"') + str++; + chlen = pg_mblen(str); + + if ((flags & DCH_FLAG) && is_separator_char(str)) + n->type = NODE_TYPE_SEPARATOR; + else if (isspace((unsigned char) *str)) + n->type = NODE_TYPE_SPACE; + else + n->type = NODE_TYPE_CHAR; + + memcpy(n->character, str, chlen); + n->character[chlen] = '\0'; + n->key = NULL; + n->suffix = 0; + n++; + str += chlen; + } + } + } + + n->type = NODE_TYPE_END; + n->suffix = 0; +} + +/* ---------- + * DEBUG: Dump the FormatNode Tree (debug) + * ---------- + */ +#ifdef DEBUG_TO_FROM_CHAR + +#define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " ")) +#define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ") + +static void +dump_node(FormatNode *node, int max) +{ + FormatNode *n; + int a; + + elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT"); + + for (a = 0, n = node; a <= max; n++, a++) + { + if (n->type == NODE_TYPE_ACTION) + elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)", + a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix)); + else if (n->type == NODE_TYPE_CHAR) + elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'", + a, n->character); + else if (n->type == NODE_TYPE_END) + { + elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a); + return; + } + else + elog(DEBUG_elog_output, "%d:\t unknown NODE!", a); + } +} +#endif /* DEBUG */ + +/***************************************************************************** + * Private utils + *****************************************************************************/ + +/* ---------- + * Return ST/ND/RD/TH for simple (1..9) numbers + * type --> 0 upper, 1 lower + * ---------- + */ +static const char * +get_th(char *num, int type) +{ + int len = strlen(num), + last; + + last = *(num + (len - 1)); + if (!isdigit((unsigned char) last)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("\"%s\" is not a number", num))); + + /* + * All "teens" (1[0-9]) get 'TH/th', while [02-9][123] still get + * 'ST/st', 'ND/nd', 'RD/rd', respectively + */ + if ((len > 1) && (num[len - 2] == '1')) + last = 0; + + switch (last) + { + case '1': + if (type == TH_UPPER) + return numTH[0]; + return numth[0]; + case '2': + if (type == TH_UPPER) + return numTH[1]; + return numth[1]; + case '3': + if (type == TH_UPPER) + return numTH[2]; + return numth[2]; + default: + if (type == TH_UPPER) + return numTH[3]; + return numth[3]; + } +} + +/* ---------- + * Convert string-number to ordinal string-number + * type --> 0 upper, 1 lower + * ---------- + */ +static char * +str_numth(char *dest, char *num, int type) +{ + if (dest != num) + strcpy(dest, num); + strcat(dest, get_th(num, type)); + return dest; +} + +/***************************************************************************** + * upper/lower/initcap functions + *****************************************************************************/ + +#ifdef USE_ICU + +typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode); + +static int32_t +icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale, + UChar **buff_dest, UChar *buff_source, int32_t len_source) +{ + UErrorCode status; + int32_t len_dest; + + len_dest = len_source; /* try first with same length */ + *buff_dest = palloc(len_dest * sizeof(**buff_dest)); + status = U_ZERO_ERROR; + len_dest = func(*buff_dest, len_dest, buff_source, len_source, + mylocale->info.icu.locale, &status); + if (status == U_BUFFER_OVERFLOW_ERROR) + { + /* try again with adjusted length */ + pfree(*buff_dest); + *buff_dest = palloc(len_dest * sizeof(**buff_dest)); + status = U_ZERO_ERROR; + len_dest = func(*buff_dest, len_dest, buff_source, len_source, + mylocale->info.icu.locale, &status); + } + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("case conversion failed: %s", u_errorName(status)))); + return len_dest; +} + +static int32_t +u_strToTitle_default_BI(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode) +{ + return u_strToTitle(dest, destCapacity, src, srcLength, + NULL, locale, pErrorCode); +} + +#endif /* USE_ICU */ + +/* + * If the system provides the needed functions for wide-character manipulation + * (which are all standardized by C99), then we implement upper/lower/initcap + * using wide-character functions, if necessary. Otherwise we use the + * traditional functions, which of course will not work as desired + * in multibyte character sets. Note that in either case we are effectively + * assuming that the database character encoding matches the encoding implied + * by LC_CTYPE. + * + * If the system provides locale_t and associated functions (which are + * standardized by Open Group's XBD), we can support collations that are + * neither default nor C. The code is written to handle both combinations + * of have-wide-characters and have-locale_t, though it's rather unlikely + * a platform would have the latter without the former. + */ + +/* + * collation-aware, wide-character-aware lower function + * + * We pass the number of bytes so we can pass varlena and char* + * to this function. The result is a palloc'd, null-terminated string. + */ +char * +str_tolower(const char *buff, size_t nbytes, Oid collid) +{ + char *result; + + if (!buff) + return NULL; + + /* C/POSIX collations use this path regardless of database encoding */ + if (lc_ctype_is_c(collid)) + { + result = asc_tolower(buff, nbytes); + } + else + { + pg_locale_t mylocale = 0; + + if (collid != DEFAULT_COLLATION_OID) + { + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a + * conflict of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for %s function", + "lower()"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + mylocale = pg_newlocale_from_collation(collid); + } + +#ifdef USE_ICU + if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + { + int32_t len_uchar; + int32_t len_conv; + UChar *buff_uchar; + UChar *buff_conv; + + len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); + len_conv = icu_convert_case(u_strToLower, mylocale, + &buff_conv, buff_uchar, len_uchar); + icu_from_uchar(&result, buff_conv, len_conv); + pfree(buff_uchar); + pfree(buff_conv); + } + else +#endif + { + if (pg_database_encoding_max_length() > 1) + { + wchar_t *workspace; + size_t curr_char; + size_t result_size; + + /* Overflow paranoia */ + if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); + + char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { +#ifdef HAVE_LOCALE_T + if (mylocale) + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); + else +#endif + workspace[curr_char] = towlower(workspace[curr_char]); + } + + /* + * Make result large enough; case change might change number + * of bytes + */ + result_size = curr_char * pg_database_encoding_max_length() + 1; + result = palloc(result_size); + + wchar2char(result, workspace, result_size, mylocale); + pfree(workspace); + } + else + { + char *p; + + result = pnstrdup(buff, nbytes); + + /* + * Note: we assume that tolower_l() will not be so broken as + * to need an isupper_l() guard test. When using the default + * collation, we apply the traditional Postgres behavior that + * forces ASCII-style treatment of I/i, but in non-default + * collations you get exactly what the collation says. + */ + for (p = result; *p; p++) + { +#ifdef HAVE_LOCALE_T + if (mylocale) + *p = tolower_l((unsigned char) *p, mylocale->info.lt); + else +#endif + *p = pg_tolower((unsigned char) *p); + } + } + } + } + + return result; +} + +/* + * collation-aware, wide-character-aware upper function + * + * We pass the number of bytes so we can pass varlena and char* + * to this function. The result is a palloc'd, null-terminated string. + */ +char * +str_toupper(const char *buff, size_t nbytes, Oid collid) +{ + char *result; + + if (!buff) + return NULL; + + /* C/POSIX collations use this path regardless of database encoding */ + if (lc_ctype_is_c(collid)) + { + result = asc_toupper(buff, nbytes); + } + else + { + pg_locale_t mylocale = 0; + + if (collid != DEFAULT_COLLATION_OID) + { + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a + * conflict of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for %s function", + "upper()"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + mylocale = pg_newlocale_from_collation(collid); + } + +#ifdef USE_ICU + if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + { + int32_t len_uchar, + len_conv; + UChar *buff_uchar; + UChar *buff_conv; + + len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); + len_conv = icu_convert_case(u_strToUpper, mylocale, + &buff_conv, buff_uchar, len_uchar); + icu_from_uchar(&result, buff_conv, len_conv); + pfree(buff_uchar); + pfree(buff_conv); + } + else +#endif + { + if (pg_database_encoding_max_length() > 1) + { + wchar_t *workspace; + size_t curr_char; + size_t result_size; + + /* Overflow paranoia */ + if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); + + char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { +#ifdef HAVE_LOCALE_T + if (mylocale) + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); + else +#endif + workspace[curr_char] = towupper(workspace[curr_char]); + } + + /* + * Make result large enough; case change might change number + * of bytes + */ + result_size = curr_char * pg_database_encoding_max_length() + 1; + result = palloc(result_size); + + wchar2char(result, workspace, result_size, mylocale); + pfree(workspace); + } + else + { + char *p; + + result = pnstrdup(buff, nbytes); + + /* + * Note: we assume that toupper_l() will not be so broken as + * to need an islower_l() guard test. When using the default + * collation, we apply the traditional Postgres behavior that + * forces ASCII-style treatment of I/i, but in non-default + * collations you get exactly what the collation says. + */ + for (p = result; *p; p++) + { +#ifdef HAVE_LOCALE_T + if (mylocale) + *p = toupper_l((unsigned char) *p, mylocale->info.lt); + else +#endif + *p = pg_toupper((unsigned char) *p); + } + } + } + } + + return result; +} + +/* + * collation-aware, wide-character-aware initcap function + * + * We pass the number of bytes so we can pass varlena and char* + * to this function. The result is a palloc'd, null-terminated string. + */ +char * +str_initcap(const char *buff, size_t nbytes, Oid collid) +{ + char *result; + int wasalnum = false; + + if (!buff) + return NULL; + + /* C/POSIX collations use this path regardless of database encoding */ + if (lc_ctype_is_c(collid)) + { + result = asc_initcap(buff, nbytes); + } + else + { + pg_locale_t mylocale = 0; + + if (collid != DEFAULT_COLLATION_OID) + { + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a + * conflict of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for %s function", + "initcap()"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + mylocale = pg_newlocale_from_collation(collid); + } + +#ifdef USE_ICU + if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + { + int32_t len_uchar, + len_conv; + UChar *buff_uchar; + UChar *buff_conv; + + len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); + len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale, + &buff_conv, buff_uchar, len_uchar); + icu_from_uchar(&result, buff_conv, len_conv); + pfree(buff_uchar); + pfree(buff_conv); + } + else +#endif + { + if (pg_database_encoding_max_length() > 1) + { + wchar_t *workspace; + size_t curr_char; + size_t result_size; + + /* Overflow paranoia */ + if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); + + char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { +#ifdef HAVE_LOCALE_T + if (mylocale) + { + if (wasalnum) + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); + else + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); + wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt); + } + else +#endif + { + if (wasalnum) + workspace[curr_char] = towlower(workspace[curr_char]); + else + workspace[curr_char] = towupper(workspace[curr_char]); + wasalnum = iswalnum(workspace[curr_char]); + } + } + + /* + * Make result large enough; case change might change number + * of bytes + */ + result_size = curr_char * pg_database_encoding_max_length() + 1; + result = palloc(result_size); + + wchar2char(result, workspace, result_size, mylocale); + pfree(workspace); + } + else + { + char *p; + + result = pnstrdup(buff, nbytes); + + /* + * Note: we assume that toupper_l()/tolower_l() will not be so + * broken as to need guard tests. When using the default + * collation, we apply the traditional Postgres behavior that + * forces ASCII-style treatment of I/i, but in non-default + * collations you get exactly what the collation says. + */ + for (p = result; *p; p++) + { +#ifdef HAVE_LOCALE_T + if (mylocale) + { + if (wasalnum) + *p = tolower_l((unsigned char) *p, mylocale->info.lt); + else + *p = toupper_l((unsigned char) *p, mylocale->info.lt); + wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt); + } + else +#endif + { + if (wasalnum) + *p = pg_tolower((unsigned char) *p); + else + *p = pg_toupper((unsigned char) *p); + wasalnum = isalnum((unsigned char) *p); + } + } + } + } + } + + return result; +} + +/* + * ASCII-only lower function + * + * We pass the number of bytes so we can pass varlena and char* + * to this function. The result is a palloc'd, null-terminated string. + */ +char * +asc_tolower(const char *buff, size_t nbytes) +{ + char *result; + char *p; + + if (!buff) + return NULL; + + result = pnstrdup(buff, nbytes); + + for (p = result; *p; p++) + *p = pg_ascii_tolower((unsigned char) *p); + + return result; +} + +/* + * ASCII-only upper function + * + * We pass the number of bytes so we can pass varlena and char* + * to this function. The result is a palloc'd, null-terminated string. + */ +char * +asc_toupper(const char *buff, size_t nbytes) +{ + char *result; + char *p; + + if (!buff) + return NULL; + + result = pnstrdup(buff, nbytes); + + for (p = result; *p; p++) + *p = pg_ascii_toupper((unsigned char) *p); + + return result; +} + +/* + * ASCII-only initcap function + * + * We pass the number of bytes so we can pass varlena and char* + * to this function. The result is a palloc'd, null-terminated string. + */ +char * +asc_initcap(const char *buff, size_t nbytes) +{ + char *result; + char *p; + int wasalnum = false; + + if (!buff) + return NULL; + + result = pnstrdup(buff, nbytes); + + for (p = result; *p; p++) + { + char c; + + if (wasalnum) + *p = c = pg_ascii_tolower((unsigned char) *p); + else + *p = c = pg_ascii_toupper((unsigned char) *p); + /* we don't trust isalnum() here */ + wasalnum = ((c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9')); + } + + return result; +} + +/* convenience routines for when the input is null-terminated */ + +static char * +str_tolower_z(const char *buff, Oid collid) +{ + return str_tolower(buff, strlen(buff), collid); +} + +static char * +str_toupper_z(const char *buff, Oid collid) +{ + return str_toupper(buff, strlen(buff), collid); +} + +static char * +str_initcap_z(const char *buff, Oid collid) +{ + return str_initcap(buff, strlen(buff), collid); +} + +static char * +asc_tolower_z(const char *buff) +{ + return asc_tolower(buff, strlen(buff)); +} + +static char * +asc_toupper_z(const char *buff) +{ + return asc_toupper(buff, strlen(buff)); +} + +/* asc_initcap_z is not currently needed */ + + +/* ---------- + * Skip TM / th in FROM_CHAR + * + * If S_THth is on, skip two chars, assuming there are two available + * ---------- + */ +#define SKIP_THth(ptr, _suf) \ + do { \ + if (S_THth(_suf)) \ + { \ + if (*(ptr)) (ptr) += pg_mblen(ptr); \ + if (*(ptr)) (ptr) += pg_mblen(ptr); \ + } \ + } while (0) + + +#ifdef DEBUG_TO_FROM_CHAR +/* ----------- + * DEBUG: Call for debug and for index checking; (Show ASCII char + * and defined keyword for each used position + * ---------- + */ +static void +dump_index(const KeyWord *k, const int *index) +{ + int i, + count = 0, + free_i = 0; + + elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:"); + + for (i = 0; i < KeyWord_INDEX_SIZE; i++) + { + if (index[i] != -1) + { + elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name); + count++; + } + else + { + free_i++; + elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]); + } + } + elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d", + count, free_i); +} +#endif /* DEBUG */ + +/* ---------- + * Return true if next format picture is not digit value + * ---------- + */ +static bool +is_next_separator(FormatNode *n) +{ + if (n->type == NODE_TYPE_END) + return false; + + if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix)) + return true; + + /* + * Next node + */ + n++; + + /* end of format string is treated like a non-digit separator */ + if (n->type == NODE_TYPE_END) + return true; + + if (n->type == NODE_TYPE_ACTION) + { + if (n->key->is_digit) + return false; + + return true; + } + else if (n->character[1] == '\0' && + isdigit((unsigned char) n->character[0])) + return false; + + return true; /* some non-digit input (separator) */ +} + + +static int +adjust_partial_year_to_2020(int year) +{ + /* + * Adjust all dates toward 2020; this is effectively what happens when we + * assume '70' is 1970 and '69' is 2069. + */ + /* Force 0-69 into the 2000's */ + if (year < 70) + return year + 2000; + /* Force 70-99 into the 1900's */ + else if (year < 100) + return year + 1900; + /* Force 100-519 into the 2000's */ + else if (year < 520) + return year + 2000; + /* Force 520-999 into the 1000's */ + else if (year < 1000) + return year + 1000; + else + return year; +} + + +static int +strspace_len(const char *str) +{ + int len = 0; + + while (*str && isspace((unsigned char) *str)) + { + str++; + len++; + } + return len; +} + +/* + * Set the date mode of a from-char conversion. + * + * Puke if the date mode has already been set, and the caller attempts to set + * it to a conflicting mode. + * + * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set. + */ +static void +from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode, bool *have_error) +{ + if (mode != FROM_CHAR_DATE_NONE) + { + if (tmfc->mode == FROM_CHAR_DATE_NONE) + tmfc->mode = mode; + else if (tmfc->mode != mode) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("invalid combination of date conventions"), + errhint("Do not mix Gregorian and ISO week date " + "conventions in a formatting template.")))); + } + +on_error: + return; +} + +/* + * Set the integer pointed to by 'dest' to the given value. + * + * Puke if the destination integer has previously been set to some other + * non-zero value. + * + * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set. + */ +static void +from_char_set_int(int *dest, const int value, const FormatNode *node, + bool *have_error) +{ + if (*dest != 0 && *dest != value) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("conflicting values for \"%s\" field in " + "formatting string", + node->key->name), + errdetail("This value contradicts a previous setting " + "for the same field type.")))); + *dest = value; + +on_error: + return; +} + +/* + * Read a single integer from the source string, into the int pointed to by + * 'dest'. If 'dest' is NULL, the result is discarded. + * + * In fixed-width mode (the node does not have the FM suffix), consume at most + * 'len' characters. However, any leading whitespace isn't counted in 'len'. + * + * We use strtol() to recover the integer value from the source string, in + * accordance with the given FormatNode. + * + * If the conversion completes successfully, src will have been advanced to + * point at the character immediately following the last character used in the + * conversion. + * + * Return the number of characters consumed. + * + * Note that from_char_parse_int() provides a more convenient wrapper where + * the length of the field is the same as the length of the format keyword (as + * with DD and MI). + * + * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set + * and -1 is returned. + */ +static int +from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node, + bool *have_error) +{ + long result; + char copy[DCH_MAX_ITEM_SIZ + 1]; + const char *init = *src; + int used; + + /* + * Skip any whitespace before parsing the integer. + */ + *src += strspace_len(*src); + + Assert(len <= DCH_MAX_ITEM_SIZ); + used = (int) strlcpy(copy, *src, len + 1); + + if (S_FM(node->suffix) || is_next_separator(node)) + { + /* + * This node is in Fill Mode, or the next node is known to be a + * non-digit value, so we just slurp as many characters as we can get. + */ + char *endptr; + + errno = 0; + result = strtol(init, &endptr, 10); + *src = endptr; + } + else + { + /* + * We need to pull exactly the number of characters given in 'len' out + * of the string, and convert those. + */ + char *last; + + if (used < len) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("source string too short for \"%s\" " + "formatting field", + node->key->name), + errdetail("Field requires %d characters, " + "but only %d remain.", + len, used), + errhint("If your source string is not fixed-width, " + "try using the \"FM\" modifier.")))); + + errno = 0; + result = strtol(copy, &last, 10); + used = last - copy; + + if (used > 0 && used < len) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("invalid value \"%s\" for \"%s\"", + copy, node->key->name), + errdetail("Field requires %d characters, " + "but only %d could be parsed.", + len, used), + errhint("If your source string is not fixed-width, " + "try using the \"FM\" modifier.")))); + + *src += used; + } + + if (*src == init) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("invalid value \"%s\" for \"%s\"", + copy, node->key->name), + errdetail("Value must be an integer.")))); + + if (errno == ERANGE || result < INT_MIN || result > INT_MAX) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("value for \"%s\" in source string is out of range", + node->key->name), + errdetail("Value must be in the range %d to %d.", + INT_MIN, INT_MAX)))); + + if (dest != NULL) + { + from_char_set_int(dest, (int) result, node, have_error); + CHECK_ERROR; + } + + return *src - init; + +on_error: + return -1; +} + +/* + * Call from_char_parse_int_len(), using the length of the format keyword as + * the expected length of the field. + * + * Don't call this function if the field differs in length from the format + * keyword (as with HH24; the keyword length is 4, but the field length is 2). + * In such cases, call from_char_parse_int_len() instead to specify the + * required length explicitly. + */ +static int +from_char_parse_int(int *dest, const char **src, FormatNode *node, bool *have_error) +{ + return from_char_parse_int_len(dest, src, node->key->len, node, have_error); +} + +/* + * Sequentially search null-terminated "array" for a case-insensitive match + * to the initial character(s) of "name". + * + * Returns array index of match, or -1 for no match. + * + * *len is set to the length of the match, or 0 for no match. + * + * Case-insensitivity is defined per pg_ascii_tolower, so this is only + * suitable for comparisons to ASCII strings. + */ +static int +seq_search_ascii(const char *name, const char *const *array, int *len) +{ + unsigned char firstc; + const char *const *a; + + *len = 0; + + /* empty string can't match anything */ + if (!*name) + return -1; + + /* we handle first char specially to gain some speed */ + firstc = pg_ascii_tolower((unsigned char) *name); + + for (a = array; *a != NULL; a++) + { + const char *p; + const char *n; + + /* compare first chars */ + if (pg_ascii_tolower((unsigned char) **a) != firstc) + continue; + + /* compare rest of string */ + for (p = *a + 1, n = name + 1;; p++, n++) + { + /* return success if we matched whole array entry */ + if (*p == '\0') + { + *len = n - name; + return a - array; + } + /* else, must have another character in "name" ... */ + if (*n == '\0') + break; + /* ... and it must match */ + if (pg_ascii_tolower((unsigned char) *p) != + pg_ascii_tolower((unsigned char) *n)) + break; + } + } + + return -1; +} + +/* + * Sequentially search an array of possibly non-English words for + * a case-insensitive match to the initial character(s) of "name". + * + * This has the same API as seq_search_ascii(), but we use a more general + * case-folding transformation to achieve case-insensitivity. Case folding + * is done per the rules of the collation identified by "collid". + * + * The array is treated as const, but we don't declare it that way because + * the arrays exported by pg_locale.c aren't const. + */ +static int +seq_search_localized(const char *name, char **array, int *len, Oid collid) +{ + char **a; + char *upper_name; + char *lower_name; + + *len = 0; + + /* empty string can't match anything */ + if (!*name) + return -1; + + /* + * The case-folding processing done below is fairly expensive, so before + * doing that, make a quick pass to see if there is an exact match. + */ + for (a = array; *a != NULL; a++) + { + int element_len = strlen(*a); + + if (strncmp(name, *a, element_len) == 0) + { + *len = element_len; + return a - array; + } + } + + /* + * Fold to upper case, then to lower case, so that we can match reliably + * even in languages in which case conversions are not injective. + */ + upper_name = str_toupper(unconstify(char *, name), strlen(name), collid); + lower_name = str_tolower(upper_name, strlen(upper_name), collid); + pfree(upper_name); + + for (a = array; *a != NULL; a++) + { + char *upper_element; + char *lower_element; + int element_len; + + /* Likewise upper/lower-case array element */ + upper_element = str_toupper(*a, strlen(*a), collid); + lower_element = str_tolower(upper_element, strlen(upper_element), + collid); + pfree(upper_element); + element_len = strlen(lower_element); + + /* Match? */ + if (strncmp(lower_name, lower_element, element_len) == 0) + { + *len = element_len; + pfree(lower_element); + pfree(lower_name); + return a - array; + } + pfree(lower_element); + } + + pfree(lower_name); + return -1; +} + +/* + * Perform a sequential search in 'array' (or 'localized_array', if that's + * not NULL) for an entry matching the first character(s) of the 'src' + * string case-insensitively. + * + * The 'array' is presumed to be English words (all-ASCII), but + * if 'localized_array' is supplied, that might be non-English + * so we need a more expensive case-folding transformation + * (which will follow the rules of the collation 'collid'). + * + * If a match is found, copy the array index of the match into the integer + * pointed to by 'dest', advance 'src' to the end of the part of the string + * which matched, and return the number of characters consumed. + * + * If the string doesn't match, throw an error if 'have_error' is NULL, + * otherwise set '*have_error' and return -1. + * + * 'node' is used only for error reports: node->key->name identifies the + * field type we were searching for. + */ +static int +from_char_seq_search(int *dest, const char **src, const char *const *array, + char **localized_array, Oid collid, + FormatNode *node, bool *have_error) +{ + int len; + + if (localized_array == NULL) + *dest = seq_search_ascii(*src, array, &len); + else + *dest = seq_search_localized(*src, localized_array, &len, collid); + + if (len <= 0) + { + /* + * In the error report, truncate the string at the next whitespace (if + * any) to avoid including irrelevant data. + */ + char *copy = pstrdup(*src); + char *c; + + for (c = copy; *c; c++) + { + if (scanner_isspace(*c)) + { + *c = '\0'; + break; + } + } + + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("invalid value \"%s\" for \"%s\"", + copy, node->key->name), + errdetail("The given value did not match any of " + "the allowed values for this field.")))); + } + *src += len; + return len; + +on_error: + return -1; +} + +/* ---------- + * Process a TmToChar struct as denoted by a list of FormatNodes. + * The formatted data is written to the string pointed to by 'out'. + * ---------- + */ +static void +DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid) +{ + FormatNode *n; + char *s; + struct pg_tm *tm = &in->tm; + int i; + + /* cache localized days and months */ + cache_locale_time(); + + s = out; + for (n = node; n->type != NODE_TYPE_END; n++) + { + if (n->type != NODE_TYPE_ACTION) + { + strcpy(s, n->character); + s += strlen(s); + continue; + } + + switch (n->key->id) + { + case DCH_A_M: + case DCH_P_M: + strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2) + ? P_M_STR : A_M_STR); + s += strlen(s); + break; + case DCH_AM: + case DCH_PM: + strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2) + ? PM_STR : AM_STR); + s += strlen(s); + break; + case DCH_a_m: + case DCH_p_m: + strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2) + ? p_m_STR : a_m_STR); + s += strlen(s); + break; + case DCH_am: + case DCH_pm: + strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2) + ? pm_STR : am_STR); + s += strlen(s); + break; + case DCH_HH: + case DCH_HH12: + + /* + * display time as shown on a 12-hour clock, even for + * intervals + */ + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3, + tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 : + tm->tm_hour % (HOURS_PER_DAY / 2)); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_HH24: + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3, + tm->tm_hour); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_MI: + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3, + tm->tm_min); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_SS: + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3, + tm->tm_sec); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + +#define DCH_to_char_fsec(frac_fmt, frac_val) \ + sprintf(s, frac_fmt, (int) (frac_val)); \ + if (S_THth(n->suffix)) \ + str_numth(s, s, S_TH_TYPE(n->suffix)); \ + s += strlen(s) + + case DCH_FF1: /* tenth of second */ + DCH_to_char_fsec("%01d", in->fsec / 100000); + break; + case DCH_FF2: /* hundredth of second */ + DCH_to_char_fsec("%02d", in->fsec / 10000); + break; + case DCH_FF3: + case DCH_MS: /* millisecond */ + DCH_to_char_fsec("%03d", in->fsec / 1000); + break; + case DCH_FF4: /* tenth of a millisecond */ + DCH_to_char_fsec("%04d", in->fsec / 100); + break; + case DCH_FF5: /* hundredth of a millisecond */ + DCH_to_char_fsec("%05d", in->fsec / 10); + break; + case DCH_FF6: + case DCH_US: /* microsecond */ + DCH_to_char_fsec("%06d", in->fsec); + break; +#undef DCH_to_char_fsec + case DCH_SSSS: + sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR + + tm->tm_min * SECS_PER_MINUTE + + tm->tm_sec); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_tz: + INVALID_FOR_INTERVAL; + if (tmtcTzn(in)) + { + /* We assume here that timezone names aren't localized */ + char *p = asc_tolower_z(tmtcTzn(in)); + + strcpy(s, p); + pfree(p); + s += strlen(s); + } + break; + case DCH_TZ: + INVALID_FOR_INTERVAL; + if (tmtcTzn(in)) + { + strcpy(s, tmtcTzn(in)); + s += strlen(s); + } + break; + case DCH_TZH: + INVALID_FOR_INTERVAL; + sprintf(s, "%c%02d", + (tm->tm_gmtoff >= 0) ? '+' : '-', + abs((int) tm->tm_gmtoff) / SECS_PER_HOUR); + s += strlen(s); + break; + case DCH_TZM: + INVALID_FOR_INTERVAL; + sprintf(s, "%02d", + (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE); + s += strlen(s); + break; + case DCH_OF: + INVALID_FOR_INTERVAL; + sprintf(s, "%c%0*d", + (tm->tm_gmtoff >= 0) ? '+' : '-', + S_FM(n->suffix) ? 0 : 2, + abs((int) tm->tm_gmtoff) / SECS_PER_HOUR); + s += strlen(s); + if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0) + { + sprintf(s, ":%02d", + (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE); + s += strlen(s); + } + break; + case DCH_A_D: + case DCH_B_C: + INVALID_FOR_INTERVAL; + strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR)); + s += strlen(s); + break; + case DCH_AD: + case DCH_BC: + INVALID_FOR_INTERVAL; + strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR)); + s += strlen(s); + break; + case DCH_a_d: + case DCH_b_c: + INVALID_FOR_INTERVAL; + strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR)); + s += strlen(s); + break; + case DCH_ad: + case DCH_bc: + INVALID_FOR_INTERVAL; + strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR)); + s += strlen(s); + break; + case DCH_MONTH: + INVALID_FOR_INTERVAL; + if (!tm->tm_mon) + break; + if (S_TM(n->suffix)) + { + char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, + asc_toupper_z(months_full[tm->tm_mon - 1])); + s += strlen(s); + break; + case DCH_Month: + INVALID_FOR_INTERVAL; + if (!tm->tm_mon) + break; + if (S_TM(n->suffix)) + { + char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, + months_full[tm->tm_mon - 1]); + s += strlen(s); + break; + case DCH_month: + INVALID_FOR_INTERVAL; + if (!tm->tm_mon) + break; + if (S_TM(n->suffix)) + { + char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, + asc_tolower_z(months_full[tm->tm_mon - 1])); + s += strlen(s); + break; + case DCH_MON: + INVALID_FOR_INTERVAL; + if (!tm->tm_mon) + break; + if (S_TM(n->suffix)) + { + char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + strcpy(s, asc_toupper_z(months[tm->tm_mon - 1])); + s += strlen(s); + break; + case DCH_Mon: + INVALID_FOR_INTERVAL; + if (!tm->tm_mon) + break; + if (S_TM(n->suffix)) + { + char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + strcpy(s, months[tm->tm_mon - 1]); + s += strlen(s); + break; + case DCH_mon: + INVALID_FOR_INTERVAL; + if (!tm->tm_mon) + break; + if (S_TM(n->suffix)) + { + char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + strcpy(s, asc_tolower_z(months[tm->tm_mon - 1])); + s += strlen(s); + break; + case DCH_MM: + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3, + tm->tm_mon); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_DAY: + INVALID_FOR_INTERVAL; + if (S_TM(n->suffix)) + { + char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, + asc_toupper_z(days[tm->tm_wday])); + s += strlen(s); + break; + case DCH_Day: + INVALID_FOR_INTERVAL; + if (S_TM(n->suffix)) + { + char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, + days[tm->tm_wday]); + s += strlen(s); + break; + case DCH_day: + INVALID_FOR_INTERVAL; + if (S_TM(n->suffix)) + { + char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, + asc_tolower_z(days[tm->tm_wday])); + s += strlen(s); + break; + case DCH_DY: + INVALID_FOR_INTERVAL; + if (S_TM(n->suffix)) + { + char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + strcpy(s, asc_toupper_z(days_short[tm->tm_wday])); + s += strlen(s); + break; + case DCH_Dy: + INVALID_FOR_INTERVAL; + if (S_TM(n->suffix)) + { + char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + strcpy(s, days_short[tm->tm_wday]); + s += strlen(s); + break; + case DCH_dy: + INVALID_FOR_INTERVAL; + if (S_TM(n->suffix)) + { + char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid); + + if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) + strcpy(s, str); + else + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("localized string format value too long"))); + } + else + strcpy(s, asc_tolower_z(days_short[tm->tm_wday])); + s += strlen(s); + break; + case DCH_DDD: + case DCH_IDDD: + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3, + (n->key->id == DCH_DDD) ? + tm->tm_yday : + date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday)); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_DD: + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_D: + INVALID_FOR_INTERVAL; + sprintf(s, "%d", tm->tm_wday + 1); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_ID: + INVALID_FOR_INTERVAL; + sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_WW: + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, + (tm->tm_yday - 1) / 7 + 1); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_IW: + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, + date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday)); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_Q: + if (!tm->tm_mon) + break; + sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_CC: + if (is_interval) /* straight calculation */ + i = tm->tm_year / 100; + else + { + if (tm->tm_year > 0) + /* Century 20 == 1901 - 2000 */ + i = (tm->tm_year - 1) / 100 + 1; + else + /* Century 6BC == 600BC - 501BC */ + i = tm->tm_year / 100 - 1; + } + if (i <= 99 && i >= -99) + sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i); + else + sprintf(s, "%d", i); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_Y_YYY: + i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000; + sprintf(s, "%d,%03d", i, + ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000)); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_YYYY: + case DCH_IYYY: + sprintf(s, "%0*d", + S_FM(n->suffix) ? 0 : + (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5, + (n->key->id == DCH_YYYY ? + ADJUST_YEAR(tm->tm_year, is_interval) : + ADJUST_YEAR(date2isoyear(tm->tm_year, + tm->tm_mon, + tm->tm_mday), + is_interval))); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_YYY: + case DCH_IYY: + sprintf(s, "%0*d", + S_FM(n->suffix) ? 0 : + (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4, + (n->key->id == DCH_YYY ? + ADJUST_YEAR(tm->tm_year, is_interval) : + ADJUST_YEAR(date2isoyear(tm->tm_year, + tm->tm_mon, + tm->tm_mday), + is_interval)) % 1000); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_YY: + case DCH_IY: + sprintf(s, "%0*d", + S_FM(n->suffix) ? 0 : + (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3, + (n->key->id == DCH_YY ? + ADJUST_YEAR(tm->tm_year, is_interval) : + ADJUST_YEAR(date2isoyear(tm->tm_year, + tm->tm_mon, + tm->tm_mday), + is_interval)) % 100); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_Y: + case DCH_I: + sprintf(s, "%1d", + (n->key->id == DCH_Y ? + ADJUST_YEAR(tm->tm_year, is_interval) : + ADJUST_YEAR(date2isoyear(tm->tm_year, + tm->tm_mon, + tm->tm_mday), + is_interval)) % 10); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_RM: + /* FALLTHROUGH */ + case DCH_rm: + + /* + * For intervals, values like '12 month' will be reduced to 0 + * month and some years. These should be processed. + */ + if (!tm->tm_mon && !tm->tm_year) + break; + else + { + int mon = 0; + const char *const *months; + + if (n->key->id == DCH_RM) + months = rm_months_upper; + else + months = rm_months_lower; + + /* + * Compute the position in the roman-numeral array. Note + * that the contents of the array are reversed, December + * being first and January last. + */ + if (tm->tm_mon == 0) + { + /* + * This case is special, and tracks the case of full + * interval years. + */ + mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1; + } + else if (tm->tm_mon < 0) + { + /* + * Negative case. In this case, the calculation is + * reversed, where -1 means December, -2 November, + * etc. + */ + mon = -1 * (tm->tm_mon + 1); + } + else + { + /* + * Common case, with a strictly positive value. The + * position in the array matches with the value of + * tm_mon. + */ + mon = MONTHS_PER_YEAR - tm->tm_mon; + } + + sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4, + months[mon]); + s += strlen(s); + } + break; + case DCH_W: + sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + case DCH_J: + sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday)); + if (S_THth(n->suffix)) + str_numth(s, s, S_TH_TYPE(n->suffix)); + s += strlen(s); + break; + } + } + + *s = '\0'; +} + +/* + * Process the string 'in' as denoted by the array of FormatNodes 'node[]'. + * The TmFromChar struct pointed to by 'out' is populated with the results. + * + * 'collid' identifies the collation to use, if needed. + * 'std' specifies standard parsing mode. + * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set. + * + * Note: we currently don't have any to_interval() function, so there + * is no need here for INVALID_FOR_INTERVAL checks. + */ +static void +DCH_from_char(FormatNode *node, const char *in, TmFromChar *out, + Oid collid, bool std, bool *have_error) +{ + FormatNode *n; + const char *s; + int len, + value; + bool fx_mode = std; + + /* number of extra skipped characters (more than given in format string) */ + int extra_skip = 0; + + /* cache localized days and months */ + cache_locale_time(); + + for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++) + { + /* + * Ignore spaces at the beginning of the string and before fields when + * not in FX (fixed width) mode. + */ + if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) && + (n->type == NODE_TYPE_ACTION || n == node)) + { + while (*s != '\0' && isspace((unsigned char) *s)) + { + s++; + extra_skip++; + } + } + + if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR) + { + if (std) + { + /* + * Standard mode requires strict matching between format + * string separators/spaces and input string. + */ + Assert(n->character[0] && !n->character[1]); + + if (*s == n->character[0]) + s++; + else + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("unmatched format separator \"%c\"", + n->character[0])))); + } + else if (!fx_mode) + { + /* + * In non FX (fixed format) mode one format string space or + * separator match to one space or separator in input string. + * Or match nothing if there is no space or separator in the + * current position of input string. + */ + extra_skip--; + if (isspace((unsigned char) *s) || is_separator_char(s)) + { + s++; + extra_skip++; + } + } + else + { + /* + * In FX mode, on format string space or separator we consume + * exactly one character from input string. Notice we don't + * insist that the consumed character match the format's + * character. + */ + s += pg_mblen(s); + } + continue; + } + else if (n->type != NODE_TYPE_ACTION) + { + /* + * Text character, so consume one character from input string. + * Notice we don't insist that the consumed character match the + * format's character. + */ + if (!fx_mode) + { + /* + * In non FX mode we might have skipped some extra characters + * (more than specified in format string) before. In this + * case we don't skip input string character, because it might + * be part of field. + */ + if (extra_skip > 0) + extra_skip--; + else + s += pg_mblen(s); + } + else + { + int chlen = pg_mblen(s); + + /* + * Standard mode requires strict match of format characters. + */ + if (std && n->type == NODE_TYPE_CHAR && + strncmp(s, n->character, chlen) != 0) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("unmatched format character \"%s\"", + n->character)))); + + s += chlen; + } + continue; + } + + from_char_set_mode(out, n->key->date_mode, have_error); + CHECK_ERROR; + + switch (n->key->id) + { + case DCH_FX: + fx_mode = true; + break; + case DCH_A_M: + case DCH_P_M: + case DCH_a_m: + case DCH_p_m: + from_char_seq_search(&value, &s, ampm_strings_long, + NULL, InvalidOid, + n, have_error); + CHECK_ERROR; + from_char_set_int(&out->pm, value % 2, n, have_error); + CHECK_ERROR; + out->clock = CLOCK_12_HOUR; + break; + case DCH_AM: + case DCH_PM: + case DCH_am: + case DCH_pm: + from_char_seq_search(&value, &s, ampm_strings, + NULL, InvalidOid, + n, have_error); + CHECK_ERROR; + from_char_set_int(&out->pm, value % 2, n, have_error); + CHECK_ERROR; + out->clock = CLOCK_12_HOUR; + break; + case DCH_HH: + case DCH_HH12: + from_char_parse_int_len(&out->hh, &s, 2, n, have_error); + CHECK_ERROR; + out->clock = CLOCK_12_HOUR; + SKIP_THth(s, n->suffix); + break; + case DCH_HH24: + from_char_parse_int_len(&out->hh, &s, 2, n, have_error); + CHECK_ERROR; + SKIP_THth(s, n->suffix); + break; + case DCH_MI: + from_char_parse_int(&out->mi, &s, n, have_error); + CHECK_ERROR; + SKIP_THth(s, n->suffix); + break; + case DCH_SS: + from_char_parse_int(&out->ss, &s, n, have_error); + CHECK_ERROR; + SKIP_THth(s, n->suffix); + break; + case DCH_MS: /* millisecond */ + len = from_char_parse_int_len(&out->ms, &s, 3, n, have_error); + CHECK_ERROR; + + /* + * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25 + */ + out->ms *= len == 1 ? 100 : + len == 2 ? 10 : 1; + + SKIP_THth(s, n->suffix); + break; + case DCH_FF1: + case DCH_FF2: + case DCH_FF3: + case DCH_FF4: + case DCH_FF5: + case DCH_FF6: + out->ff = n->key->id - DCH_FF1 + 1; + /* fall through */ + case DCH_US: /* microsecond */ + len = from_char_parse_int_len(&out->us, &s, + n->key->id == DCH_US ? 6 : + out->ff, n, have_error); + CHECK_ERROR; + + out->us *= len == 1 ? 100000 : + len == 2 ? 10000 : + len == 3 ? 1000 : + len == 4 ? 100 : + len == 5 ? 10 : 1; + + SKIP_THth(s, n->suffix); + break; + case DCH_SSSS: + from_char_parse_int(&out->ssss, &s, n, have_error); + CHECK_ERROR; + SKIP_THth(s, n->suffix); + break; + case DCH_tz: + case DCH_TZ: + case DCH_OF: + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("formatting field \"%s\" is only supported in to_char", + n->key->name)))); + CHECK_ERROR; + break; + case DCH_TZH: + + /* + * Value of TZH might be negative. And the issue is that we + * might swallow minus sign as the separator. So, if we have + * skipped more characters than specified in the format + * string, then we consider prepending last skipped minus to + * TZH. + */ + if (*s == '+' || *s == '-' || *s == ' ') + { + out->tzsign = *s == '-' ? -1 : +1; + s++; + } + else + { + if (extra_skip > 0 && *(s - 1) == '-') + out->tzsign = -1; + else + out->tzsign = +1; + } + + from_char_parse_int_len(&out->tzh, &s, 2, n, have_error); + CHECK_ERROR; + break; + case DCH_TZM: + /* assign positive timezone sign if TZH was not seen before */ + if (!out->tzsign) + out->tzsign = +1; + from_char_parse_int_len(&out->tzm, &s, 2, n, have_error); + CHECK_ERROR; + break; + case DCH_A_D: + case DCH_B_C: + case DCH_a_d: + case DCH_b_c: + from_char_seq_search(&value, &s, adbc_strings_long, + NULL, InvalidOid, + n, have_error); + CHECK_ERROR; + from_char_set_int(&out->bc, value % 2, n, have_error); + CHECK_ERROR; + break; + case DCH_AD: + case DCH_BC: + case DCH_ad: + case DCH_bc: + from_char_seq_search(&value, &s, adbc_strings, + NULL, InvalidOid, + n, have_error); + CHECK_ERROR; + from_char_set_int(&out->bc, value % 2, n, have_error); + CHECK_ERROR; + break; + case DCH_MONTH: + case DCH_Month: + case DCH_month: + from_char_seq_search(&value, &s, months_full, + S_TM(n->suffix) ? localized_full_months : NULL, + collid, + n, have_error); + CHECK_ERROR; + from_char_set_int(&out->mm, value + 1, n, have_error); + CHECK_ERROR; + break; + case DCH_MON: + case DCH_Mon: + case DCH_mon: + from_char_seq_search(&value, &s, months, + S_TM(n->suffix) ? localized_abbrev_months : NULL, + collid, + n, have_error); + CHECK_ERROR; + from_char_set_int(&out->mm, value + 1, n, have_error); + CHECK_ERROR; + break; + case DCH_MM: + from_char_parse_int(&out->mm, &s, n, have_error); + CHECK_ERROR; + SKIP_THth(s, n->suffix); + break; + case DCH_DAY: + case DCH_Day: + case DCH_day: + from_char_seq_search(&value, &s, days, + S_TM(n->suffix) ? localized_full_days : NULL, + collid, + n, have_error); + CHECK_ERROR; + from_char_set_int(&out->d, value, n, have_error); + CHECK_ERROR; + out->d++; + break; + case DCH_DY: + case DCH_Dy: + case DCH_dy: + from_char_seq_search(&value, &s, days_short, + S_TM(n->suffix) ? localized_abbrev_days : NULL, + collid, + n, have_error); + CHECK_ERROR; + from_char_set_int(&out->d, value, n, have_error); + CHECK_ERROR; + out->d++; + break; + case DCH_DDD: + from_char_parse_int(&out->ddd, &s, n, have_error); + CHECK_ERROR; + SKIP_THth(s, n->suffix); + break; + case DCH_IDDD: + from_char_parse_int_len(&out->ddd, &s, 3, n, have_error); + CHECK_ERROR; + SKIP_THth(s, n->suffix); + break; + case DCH_DD: + from_char_parse_int(&out->dd, &s, n, have_error); + CHECK_ERROR; + SKIP_THth(s, n->suffix); + break; + case DCH_D: + from_char_parse_int(&out->d, &s, n, have_error); + CHECK_ERROR; + SKIP_THth(s, n->suffix); + break; + case DCH_ID: + from_char_parse_int_len(&out->d, &s, 1, n, have_error); + CHECK_ERROR; + /* Shift numbering to match Gregorian where Sunday = 1 */ + if (++out->d > 7) + out->d = 1; + SKIP_THth(s, n->suffix); + break; + case DCH_WW: + case DCH_IW: + from_char_parse_int(&out->ww, &s, n, have_error); + CHECK_ERROR; + SKIP_THth(s, n->suffix); + break; + case DCH_Q: + + /* + * We ignore 'Q' when converting to date because it is unclear + * which date in the quarter to use, and some people specify + * both quarter and month, so if it was honored it might + * conflict with the supplied month. That is also why we don't + * throw an error. + * + * We still parse the source string for an integer, but it + * isn't stored anywhere in 'out'. + */ + from_char_parse_int((int *) NULL, &s, n, have_error); + CHECK_ERROR; + SKIP_THth(s, n->suffix); + break; + case DCH_CC: + from_char_parse_int(&out->cc, &s, n, have_error); + CHECK_ERROR; + SKIP_THth(s, n->suffix); + break; + case DCH_Y_YYY: + { + int matched, + years, + millennia, + nch; + + matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch); + if (matched < 2) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("invalid input string for \"Y,YYY\"")))); + years += (millennia * 1000); + from_char_set_int(&out->year, years, n, have_error); + CHECK_ERROR; + out->yysz = 4; + s += nch; + SKIP_THth(s, n->suffix); + } + break; + case DCH_YYYY: + case DCH_IYYY: + from_char_parse_int(&out->year, &s, n, have_error); + CHECK_ERROR; + out->yysz = 4; + SKIP_THth(s, n->suffix); + break; + case DCH_YYY: + case DCH_IYY: + len = from_char_parse_int(&out->year, &s, n, have_error); + CHECK_ERROR; + if (len < 4) + out->year = adjust_partial_year_to_2020(out->year); + out->yysz = 3; + SKIP_THth(s, n->suffix); + break; + case DCH_YY: + case DCH_IY: + len = from_char_parse_int(&out->year, &s, n, have_error); + CHECK_ERROR; + if (len < 4) + out->year = adjust_partial_year_to_2020(out->year); + out->yysz = 2; + SKIP_THth(s, n->suffix); + break; + case DCH_Y: + case DCH_I: + len = from_char_parse_int(&out->year, &s, n, have_error); + CHECK_ERROR; + if (len < 4) + out->year = adjust_partial_year_to_2020(out->year); + out->yysz = 1; + SKIP_THth(s, n->suffix); + break; + case DCH_RM: + case DCH_rm: + from_char_seq_search(&value, &s, rm_months_lower, + NULL, InvalidOid, + n, have_error); + CHECK_ERROR; + from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, + n, have_error); + CHECK_ERROR; + break; + case DCH_W: + from_char_parse_int(&out->w, &s, n, have_error); + CHECK_ERROR; + SKIP_THth(s, n->suffix); + break; + case DCH_J: + from_char_parse_int(&out->j, &s, n, have_error); + CHECK_ERROR; + SKIP_THth(s, n->suffix); + break; + } + + /* Ignore all spaces after fields */ + if (!fx_mode) + { + extra_skip = 0; + while (*s != '\0' && isspace((unsigned char) *s)) + { + s++; + extra_skip++; + } + } + } + + /* + * Standard parsing mode doesn't allow unmatched format patterns or + * trailing characters in the input string. + */ + if (std) + { + if (n->type != NODE_TYPE_END) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("input string is too short for datetime format")))); + + while (*s != '\0' && isspace((unsigned char) *s)) + s++; + + if (*s != '\0') + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("trailing characters remain in input string " + "after datetime format")))); + } + +on_error: + return; +} + +/* + * The invariant for DCH cache entry management is that DCHCounter is equal + * to the maximum age value among the existing entries, and we increment it + * whenever an access occurs. If we approach overflow, deal with that by + * halving all the age values, so that we retain a fairly accurate idea of + * which entries are oldest. + */ +static inline void +DCH_prevent_counter_overflow(void) +{ + if (DCHCounter >= (INT_MAX - 1)) + { + for (int i = 0; i < n_DCHCache; i++) + DCHCache[i]->age >>= 1; + DCHCounter >>= 1; + } +} + +/* + * Get mask of date/time/zone components present in format nodes. + * + * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set. + */ +static int +DCH_datetime_type(FormatNode *node, bool *have_error) +{ + FormatNode *n; + int flags = 0; + + for (n = node; n->type != NODE_TYPE_END; n++) + { + if (n->type != NODE_TYPE_ACTION) + continue; + + switch (n->key->id) + { + case DCH_FX: + break; + case DCH_A_M: + case DCH_P_M: + case DCH_a_m: + case DCH_p_m: + case DCH_AM: + case DCH_PM: + case DCH_am: + case DCH_pm: + case DCH_HH: + case DCH_HH12: + case DCH_HH24: + case DCH_MI: + case DCH_SS: + case DCH_MS: /* millisecond */ + case DCH_US: /* microsecond */ + case DCH_FF1: + case DCH_FF2: + case DCH_FF3: + case DCH_FF4: + case DCH_FF5: + case DCH_FF6: + case DCH_SSSS: + flags |= DCH_TIMED; + break; + case DCH_tz: + case DCH_TZ: + case DCH_OF: + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("formatting field \"%s\" is only supported in to_char", + n->key->name)))); + flags |= DCH_ZONED; + break; + case DCH_TZH: + case DCH_TZM: + flags |= DCH_ZONED; + break; + case DCH_A_D: + case DCH_B_C: + case DCH_a_d: + case DCH_b_c: + case DCH_AD: + case DCH_BC: + case DCH_ad: + case DCH_bc: + case DCH_MONTH: + case DCH_Month: + case DCH_month: + case DCH_MON: + case DCH_Mon: + case DCH_mon: + case DCH_MM: + case DCH_DAY: + case DCH_Day: + case DCH_day: + case DCH_DY: + case DCH_Dy: + case DCH_dy: + case DCH_DDD: + case DCH_IDDD: + case DCH_DD: + case DCH_D: + case DCH_ID: + case DCH_WW: + case DCH_Q: + case DCH_CC: + case DCH_Y_YYY: + case DCH_YYYY: + case DCH_IYYY: + case DCH_YYY: + case DCH_IYY: + case DCH_YY: + case DCH_IY: + case DCH_Y: + case DCH_I: + case DCH_RM: + case DCH_rm: + case DCH_W: + case DCH_J: + flags |= DCH_DATED; + break; + } + } + +on_error: + return flags; +} + +/* select a DCHCacheEntry to hold the given format picture */ +static DCHCacheEntry * +DCH_cache_getnew(const char *str, bool std) +{ + DCHCacheEntry *ent; + + /* Ensure we can advance DCHCounter below */ + DCH_prevent_counter_overflow(); + + /* + * If cache is full, remove oldest entry (or recycle first not-valid one) + */ + if (n_DCHCache >= DCH_CACHE_ENTRIES) + { + DCHCacheEntry *old = DCHCache[0]; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache); +#endif + if (old->valid) + { + for (int i = 1; i < DCH_CACHE_ENTRIES; i++) + { + ent = DCHCache[i]; + if (!ent->valid) + { + old = ent; + break; + } + if (ent->age < old->age) + old = ent; + } + } +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age); +#endif + old->valid = false; + strlcpy(old->str, str, DCH_CACHE_SIZE + 1); + old->age = (++DCHCounter); + /* caller is expected to fill format, then set valid */ + return old; + } + else + { +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache); +#endif + Assert(DCHCache[n_DCHCache] == NULL); + DCHCache[n_DCHCache] = ent = (DCHCacheEntry *) + MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry)); + ent->valid = false; + strlcpy(ent->str, str, DCH_CACHE_SIZE + 1); + ent->std = std; + ent->age = (++DCHCounter); + /* caller is expected to fill format, then set valid */ + ++n_DCHCache; + return ent; + } +} + +/* look for an existing DCHCacheEntry matching the given format picture */ +static DCHCacheEntry * +DCH_cache_search(const char *str, bool std) +{ + /* Ensure we can advance DCHCounter below */ + DCH_prevent_counter_overflow(); + + for (int i = 0; i < n_DCHCache; i++) + { + DCHCacheEntry *ent = DCHCache[i]; + + if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std) + { + ent->age = (++DCHCounter); + return ent; + } + } + + return NULL; +} + +/* Find or create a DCHCacheEntry for the given format picture */ +static DCHCacheEntry * +DCH_cache_fetch(const char *str, bool std) +{ + DCHCacheEntry *ent; + + if ((ent = DCH_cache_search(str, std)) == NULL) + { + /* + * Not in the cache, must run parser and save a new format-picture to + * the cache. Do not mark the cache entry valid until parsing + * succeeds. + */ + ent = DCH_cache_getnew(str, std); + + parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index, + DCH_FLAG | (std ? STD_FLAG : 0), NULL); + + ent->valid = true; + } + return ent; +} + +/* + * Format a date/time or interval into a string according to fmt. + * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char + * for formatting. + */ +static text * +datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid) +{ + FormatNode *format; + char *fmt_str, + *result; + bool incache; + int fmt_len; + text *res; + + /* + * Convert fmt to C string + */ + fmt_str = text_to_cstring(fmt); + fmt_len = strlen(fmt_str); + + /* + * Allocate workspace for result as C string + */ + result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1); + *result = '\0'; + + if (fmt_len > DCH_CACHE_SIZE) + { + /* + * Allocate new memory if format picture is bigger than static cache + * and do not use cache (call parser always) + */ + incache = false; + + format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode)); + + parse_format(format, fmt_str, DCH_keywords, + DCH_suff, DCH_index, DCH_FLAG, NULL); + } + else + { + /* + * Use cache buffers + */ + DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false); + + incache = true; + format = ent->format; + } + + /* The real work is here */ + DCH_to_char(format, is_interval, tmtc, result, collid); + + if (!incache) + pfree(format); + + pfree(fmt_str); + + /* convert C-string result to TEXT format */ + res = cstring_to_text(result); + + pfree(result); + return res; +} + +/**************************************************************************** + * Public routines + ***************************************************************************/ + +/* ------------------- + * TIMESTAMP to_char() + * ------------------- + */ +Datum +timestamp_to_char(PG_FUNCTION_ARGS) +{ + Timestamp dt = PG_GETARG_TIMESTAMP(0); + text *fmt = PG_GETARG_TEXT_PP(1), + *res; + TmToChar tmtc; + struct pg_tm *tm; + int thisdate; + + if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt)) + PG_RETURN_NULL(); + + ZERO_tmtc(&tmtc); + tm = tmtcTm(&tmtc); + + if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday); + tm->tm_wday = (thisdate + 1) % 7; + tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1; + + if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION()))) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(res); +} + +Datum +timestamptz_to_char(PG_FUNCTION_ARGS) +{ + TimestampTz dt = PG_GETARG_TIMESTAMP(0); + text *fmt = PG_GETARG_TEXT_PP(1), + *res; + TmToChar tmtc; + int tz; + struct pg_tm *tm; + int thisdate; + + if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt)) + PG_RETURN_NULL(); + + ZERO_tmtc(&tmtc); + tm = tmtcTm(&tmtc); + + if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday); + tm->tm_wday = (thisdate + 1) % 7; + tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1; + + if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION()))) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(res); +} + + +/* ------------------- + * INTERVAL to_char() + * ------------------- + */ +Datum +interval_to_char(PG_FUNCTION_ARGS) +{ + Interval *it = PG_GETARG_INTERVAL_P(0); + text *fmt = PG_GETARG_TEXT_PP(1), + *res; + TmToChar tmtc; + struct pg_tm *tm; + + if (VARSIZE_ANY_EXHDR(fmt) <= 0) + PG_RETURN_NULL(); + + ZERO_tmtc(&tmtc); + tm = tmtcTm(&tmtc); + + if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0) + PG_RETURN_NULL(); + + /* wday is meaningless, yday approximates the total span in days */ + tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday; + + if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION()))) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(res); +} + +/* --------------------- + * TO_TIMESTAMP() + * + * Make Timestamp from date_str which is formatted at argument 'fmt' + * ( to_timestamp is reverse to_char() ) + * --------------------- + */ +Datum +to_timestamp(PG_FUNCTION_ARGS) +{ + text *date_txt = PG_GETARG_TEXT_PP(0); + text *fmt = PG_GETARG_TEXT_PP(1); + Oid collid = PG_GET_COLLATION(); + Timestamp result; + int tz; + struct pg_tm tm; + fsec_t fsec; + int fprec; + + do_to_timestamp(date_txt, fmt, collid, false, + &tm, &fsec, &fprec, NULL, NULL); + + /* Use the specified time zone, if any. */ + if (tm.tm_zone) + { + int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), &tz); + + if (dterr) + DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz"); + } + else + tz = DetermineTimeZoneOffset(&tm, session_timezone); + + if (tm2timestamp(&tm, fsec, &tz, &result) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + /* Use the specified fractional precision, if any. */ + if (fprec) + AdjustTimestampForTypmod(&result, fprec); + + PG_RETURN_TIMESTAMP(result); +} + +/* ---------- + * TO_DATE + * Make Date from date_str which is formatted at argument 'fmt' + * ---------- + */ +Datum +to_date(PG_FUNCTION_ARGS) +{ + text *date_txt = PG_GETARG_TEXT_PP(0); + text *fmt = PG_GETARG_TEXT_PP(1); + Oid collid = PG_GET_COLLATION(); + DateADT result; + struct pg_tm tm; + fsec_t fsec; + + do_to_timestamp(date_txt, fmt, collid, false, + &tm, &fsec, NULL, NULL, NULL); + + /* Prevent overflow in Julian-day routines */ + if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range: \"%s\"", + text_to_cstring(date_txt)))); + + result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE; + + /* Now check for just-out-of-range dates */ + if (!IS_VALID_DATE(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range: \"%s\"", + text_to_cstring(date_txt)))); + + PG_RETURN_DATEADT(result); +} + +/* + * Convert the 'date_txt' input to a datetime type using argument 'fmt' + * as a format string. The collation 'collid' may be used for case-folding + * rules in some cases. 'strict' specifies standard parsing mode. + * + * The actual data type (returned in 'typid', 'typmod') is determined by + * the presence of date/time/zone components in the format string. + * + * When timezone component is present, the corresponding offset is + * returned in '*tz'. + * + * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set + * and zero value is returned. + */ +Datum +parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict, + Oid *typid, int32 *typmod, int *tz, + bool *have_error) +{ + struct pg_tm tm; + fsec_t fsec; + int fprec; + uint32 flags; + + do_to_timestamp(date_txt, fmt, collid, strict, + &tm, &fsec, &fprec, &flags, have_error); + CHECK_ERROR; + + *typmod = fprec ? fprec : -1; /* fractional part precision */ + + if (flags & DCH_DATED) + { + if (flags & DCH_TIMED) + { + if (flags & DCH_ZONED) + { + TimestampTz result; + + if (tm.tm_zone) + { + int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz); + + if (dterr) + DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz"); + } + else + { + /* + * Time zone is present in format string, but not in input + * string. Assuming do_to_timestamp() triggers no error + * this should be possible only in non-strict case. + */ + Assert(!strict); + + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("missing time zone in input string for type timestamptz")))); + } + + if (tm2timestamp(&tm, fsec, tz, &result) != 0) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamptz out of range")))); + + AdjustTimestampForTypmod(&result, *typmod); + + *typid = TIMESTAMPTZOID; + return TimestampTzGetDatum(result); + } + else + { + Timestamp result; + + if (tm2timestamp(&tm, fsec, NULL, &result) != 0) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range")))); + + AdjustTimestampForTypmod(&result, *typmod); + + *typid = TIMESTAMPOID; + return TimestampGetDatum(result); + } + } + else + { + if (flags & DCH_ZONED) + { + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("datetime format is zoned but not timed")))); + } + else + { + DateADT result; + + /* Prevent overflow in Julian-day routines */ + if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday)) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range: \"%s\"", + text_to_cstring(date_txt))))); + + result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - + POSTGRES_EPOCH_JDATE; + + /* Now check for just-out-of-range dates */ + if (!IS_VALID_DATE(result)) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range: \"%s\"", + text_to_cstring(date_txt))))); + + *typid = DATEOID; + return DateADTGetDatum(result); + } + } + } + else if (flags & DCH_TIMED) + { + if (flags & DCH_ZONED) + { + TimeTzADT *result = palloc(sizeof(TimeTzADT)); + + if (tm.tm_zone) + { + int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz); + + if (dterr) + RETURN_ERROR(DateTimeParseError(dterr, text_to_cstring(date_txt), "timetz")); + } + else + { + /* + * Time zone is present in format string, but not in input + * string. Assuming do_to_timestamp() triggers no error this + * should be possible only in non-strict case. + */ + Assert(!strict); + + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("missing time zone in input string for type timetz")))); + } + + if (tm2timetz(&tm, fsec, *tz, result) != 0) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timetz out of range")))); + + AdjustTimeForTypmod(&result->time, *typmod); + + *typid = TIMETZOID; + return TimeTzADTPGetDatum(result); + } + else + { + TimeADT result; + + if (tm2time(&tm, fsec, &result) != 0) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("time out of range")))); + + AdjustTimeForTypmod(&result, *typmod); + + *typid = TIMEOID; + return TimeADTGetDatum(result); + } + } + else + { + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("datetime format is not dated and not timed")))); + } + +on_error: + return (Datum) 0; +} + +/* + * do_to_timestamp: shared code for to_timestamp and to_date + * + * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm, + * fractional seconds, and fractional precision. + * + * 'collid' identifies the collation to use, if needed. + * 'std' specifies standard parsing mode. + * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags', + * if that is not NULL. + * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set. + * + * We parse 'fmt' into a list of FormatNodes, which is then passed to + * DCH_from_char to populate a TmFromChar with the parsed contents of + * 'date_txt'. + * + * The TmFromChar is then analysed and converted into the final results in + * struct 'tm', 'fsec', and 'fprec'. + */ +static void +do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std, + struct pg_tm *tm, fsec_t *fsec, int *fprec, + uint32 *flags, bool *have_error) +{ + FormatNode *format = NULL; + TmFromChar tmfc; + int fmt_len; + char *date_str; + int fmask; + bool incache = false; + + Assert(tm != NULL); + Assert(fsec != NULL); + + date_str = text_to_cstring(date_txt); + + ZERO_tmfc(&tmfc); + ZERO_tm(tm); + *fsec = 0; + if (fprec) + *fprec = 0; + if (flags) + *flags = 0; + fmask = 0; /* bit mask for ValidateDate() */ + + fmt_len = VARSIZE_ANY_EXHDR(fmt); + + if (fmt_len) + { + char *fmt_str; + + fmt_str = text_to_cstring(fmt); + + if (fmt_len > DCH_CACHE_SIZE) + { + /* + * Allocate new memory if format picture is bigger than static + * cache and do not use cache (call parser always) + */ + format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode)); + + parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index, + DCH_FLAG | (std ? STD_FLAG : 0), NULL); + } + else + { + /* + * Use cache buffers + */ + DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std); + + incache = true; + format = ent->format; + } + +#ifdef DEBUG_TO_FROM_CHAR + /* dump_node(format, fmt_len); */ + /* dump_index(DCH_keywords, DCH_index); */ +#endif + + DCH_from_char(format, date_str, &tmfc, collid, std, have_error); + CHECK_ERROR; + + pfree(fmt_str); + + if (flags) + *flags = DCH_datetime_type(format, have_error); + + if (!incache) + { + pfree(format); + format = NULL; + } + + CHECK_ERROR; + } + + DEBUG_TMFC(&tmfc); + + /* + * Convert to_date/to_timestamp input fields to standard 'tm' + */ + if (tmfc.ssss) + { + int x = tmfc.ssss; + + tm->tm_hour = x / SECS_PER_HOUR; + x %= SECS_PER_HOUR; + tm->tm_min = x / SECS_PER_MINUTE; + x %= SECS_PER_MINUTE; + tm->tm_sec = x; + } + + if (tmfc.ss) + tm->tm_sec = tmfc.ss; + if (tmfc.mi) + tm->tm_min = tmfc.mi; + if (tmfc.hh) + tm->tm_hour = tmfc.hh; + + if (tmfc.clock == CLOCK_12_HOUR) + { + if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2) + { + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("hour \"%d\" is invalid for the 12-hour clock", + tm->tm_hour), + errhint("Use the 24-hour clock, or give an hour between 1 and 12.")))); + } + + if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2) + tm->tm_hour += HOURS_PER_DAY / 2; + else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2) + tm->tm_hour = 0; + } + + if (tmfc.year) + { + /* + * If CC and YY (or Y) are provided, use YY as 2 low-order digits for + * the year in the given century. Keep in mind that the 21st century + * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from + * 600BC to 501BC. + */ + if (tmfc.cc && tmfc.yysz <= 2) + { + if (tmfc.bc) + tmfc.cc = -tmfc.cc; + tm->tm_year = tmfc.year % 100; + if (tm->tm_year) + { + if (tmfc.cc >= 0) + tm->tm_year += (tmfc.cc - 1) * 100; + else + tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1; + } + else + { + /* find century year for dates ending in "00" */ + tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1); + } + } + else + { + /* If a 4-digit year is provided, we use that and ignore CC. */ + tm->tm_year = tmfc.year; + if (tmfc.bc) + tm->tm_year = -tm->tm_year; + /* correct for our representation of BC years */ + if (tm->tm_year < 0) + tm->tm_year++; + } + fmask |= DTK_M(YEAR); + } + else if (tmfc.cc) + { + /* use first year of century */ + if (tmfc.bc) + tmfc.cc = -tmfc.cc; + if (tmfc.cc >= 0) + /* +1 because 21st century started in 2001 */ + tm->tm_year = (tmfc.cc - 1) * 100 + 1; + else + /* +1 because year == 599 is 600 BC */ + tm->tm_year = tmfc.cc * 100 + 1; + fmask |= DTK_M(YEAR); + } + + if (tmfc.j) + { + j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + fmask |= DTK_DATE_M; + } + + if (tmfc.ww) + { + if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK) + { + /* + * If tmfc.d is not set, then the date is left at the beginning of + * the ISO week (Monday). + */ + if (tmfc.d) + isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + else + isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + fmask |= DTK_DATE_M; + } + else + tmfc.ddd = (tmfc.ww - 1) * 7 + 1; + } + + if (tmfc.w) + tmfc.dd = (tmfc.w - 1) * 7 + 1; + if (tmfc.dd) + { + tm->tm_mday = tmfc.dd; + fmask |= DTK_M(DAY); + } + if (tmfc.mm) + { + tm->tm_mon = tmfc.mm; + fmask |= DTK_M(MONTH); + } + + if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1)) + { + /* + * The month and day field have not been set, so we use the + * day-of-year field to populate them. Depending on the date mode, + * this field may be interpreted as a Gregorian day-of-year, or an ISO + * week date day-of-year. + */ + + if (!tm->tm_year && !tmfc.bc) + { + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("cannot calculate day of year without year information")))); + } + + if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK) + { + int j0; /* zeroth day of the ISO year, in Julian */ + + j0 = isoweek2j(tm->tm_year, 1) - 1; + + j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + fmask |= DTK_DATE_M; + } + else + { + const int *y; + int i; + + static const int ysum[2][13] = { + {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, + {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}}; + + y = ysum[isleap(tm->tm_year)]; + + for (i = 1; i <= MONTHS_PER_YEAR; i++) + { + if (tmfc.ddd <= y[i]) + break; + } + if (tm->tm_mon <= 1) + tm->tm_mon = i; + + if (tm->tm_mday <= 1) + tm->tm_mday = tmfc.ddd - y[i - 1]; + + fmask |= DTK_M(MONTH) | DTK_M(DAY); + } + } + + if (tmfc.ms) + *fsec += tmfc.ms * 1000; + if (tmfc.us) + *fsec += tmfc.us; + if (fprec) + *fprec = tmfc.ff; /* fractional precision, if specified */ + + /* Range-check date fields according to bit mask computed above */ + if (fmask != 0) + { + /* We already dealt with AD/BC, so pass isjulian = true */ + int dterr = ValidateDate(fmask, true, false, false, tm); + + if (dterr != 0) + { + /* + * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate + * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an + * irrelevant hint about datestyle. + */ + RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp")); + } + } + + /* Range-check time fields too */ + if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY || + tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR || + tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE || + *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC) + { + RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp")); + } + + /* Save parsed time-zone into tm->tm_zone if it was specified */ + if (tmfc.tzsign) + { + char *tz; + + if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR || + tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR) + { + RETURN_ERROR(DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp")); + } + + tz = psprintf("%c%02d:%02d", + tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm); + + tm->tm_zone = tz; + } + + DEBUG_TM(tm); + +on_error: + + if (format && !incache) + pfree(format); + + pfree(date_str); +} + + +/********************************************************************** + * the NUMBER version part + *********************************************************************/ + + +static char * +fill_str(char *str, int c, int max) +{ + memset(str, c, max); + *(str + max) = '\0'; + return str; +} + +#define zeroize_NUM(_n) \ +do { \ + (_n)->flag = 0; \ + (_n)->lsign = 0; \ + (_n)->pre = 0; \ + (_n)->post = 0; \ + (_n)->pre_lsign_num = 0; \ + (_n)->need_locale = 0; \ + (_n)->multi = 0; \ + (_n)->zero_start = 0; \ + (_n)->zero_end = 0; \ +} while(0) + +/* This works the same as DCH_prevent_counter_overflow */ +static inline void +NUM_prevent_counter_overflow(void) +{ + if (NUMCounter >= (INT_MAX - 1)) + { + for (int i = 0; i < n_NUMCache; i++) + NUMCache[i]->age >>= 1; + NUMCounter >>= 1; + } +} + +/* select a NUMCacheEntry to hold the given format picture */ +static NUMCacheEntry * +NUM_cache_getnew(const char *str) +{ + NUMCacheEntry *ent; + + /* Ensure we can advance NUMCounter below */ + NUM_prevent_counter_overflow(); + + /* + * If cache is full, remove oldest entry (or recycle first not-valid one) + */ + if (n_NUMCache >= NUM_CACHE_ENTRIES) + { + NUMCacheEntry *old = NUMCache[0]; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache); +#endif + if (old->valid) + { + for (int i = 1; i < NUM_CACHE_ENTRIES; i++) + { + ent = NUMCache[i]; + if (!ent->valid) + { + old = ent; + break; + } + if (ent->age < old->age) + old = ent; + } + } +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age); +#endif + old->valid = false; + strlcpy(old->str, str, NUM_CACHE_SIZE + 1); + old->age = (++NUMCounter); + /* caller is expected to fill format and Num, then set valid */ + return old; + } + else + { +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache); +#endif + Assert(NUMCache[n_NUMCache] == NULL); + NUMCache[n_NUMCache] = ent = (NUMCacheEntry *) + MemoryContextAllocZero(TopMemoryContext, sizeof(NUMCacheEntry)); + ent->valid = false; + strlcpy(ent->str, str, NUM_CACHE_SIZE + 1); + ent->age = (++NUMCounter); + /* caller is expected to fill format and Num, then set valid */ + ++n_NUMCache; + return ent; + } +} + +/* look for an existing NUMCacheEntry matching the given format picture */ +static NUMCacheEntry * +NUM_cache_search(const char *str) +{ + /* Ensure we can advance NUMCounter below */ + NUM_prevent_counter_overflow(); + + for (int i = 0; i < n_NUMCache; i++) + { + NUMCacheEntry *ent = NUMCache[i]; + + if (ent->valid && strcmp(ent->str, str) == 0) + { + ent->age = (++NUMCounter); + return ent; + } + } + + return NULL; +} + +/* Find or create a NUMCacheEntry for the given format picture */ +static NUMCacheEntry * +NUM_cache_fetch(const char *str) +{ + NUMCacheEntry *ent; + + if ((ent = NUM_cache_search(str)) == NULL) + { + /* + * Not in the cache, must run parser and save a new format-picture to + * the cache. Do not mark the cache entry valid until parsing + * succeeds. + */ + ent = NUM_cache_getnew(str); + + zeroize_NUM(&ent->Num); + + parse_format(ent->format, str, NUM_keywords, + NULL, NUM_index, NUM_FLAG, &ent->Num); + + ent->valid = true; + } + return ent; +} + +/* ---------- + * Cache routine for NUM to_char version + * ---------- + */ +static FormatNode * +NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree) +{ + FormatNode *format = NULL; + char *str; + + str = text_to_cstring(pars_str); + + if (len > NUM_CACHE_SIZE) + { + /* + * Allocate new memory if format picture is bigger than static cache + * and do not use cache (call parser always) + */ + format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode)); + + *shouldFree = true; + + zeroize_NUM(Num); + + parse_format(format, str, NUM_keywords, + NULL, NUM_index, NUM_FLAG, Num); + } + else + { + /* + * Use cache buffers + */ + NUMCacheEntry *ent = NUM_cache_fetch(str); + + *shouldFree = false; + + format = ent->format; + + /* + * Copy cache to used struct + */ + Num->flag = ent->Num.flag; + Num->lsign = ent->Num.lsign; + Num->pre = ent->Num.pre; + Num->post = ent->Num.post; + Num->pre_lsign_num = ent->Num.pre_lsign_num; + Num->need_locale = ent->Num.need_locale; + Num->multi = ent->Num.multi; + Num->zero_start = ent->Num.zero_start; + Num->zero_end = ent->Num.zero_end; + } + +#ifdef DEBUG_TO_FROM_CHAR + /* dump_node(format, len); */ + dump_index(NUM_keywords, NUM_index); +#endif + + pfree(str); + return format; +} + + +static char * +int_to_roman(int number) +{ + int len, + num; + char *p, + *result, + numstr[12]; + + result = (char *) palloc(16); + *result = '\0'; + + if (number > 3999 || number < 1) + { + fill_str(result, '#', 15); + return result; + } + len = snprintf(numstr, sizeof(numstr), "%d", number); + + for (p = numstr; *p != '\0'; p++, --len) + { + num = *p - ('0' + 1); + if (num < 0) + continue; + + if (len > 3) + { + while (num-- != -1) + strcat(result, "M"); + } + else + { + if (len == 3) + strcat(result, rm100[num]); + else if (len == 2) + strcat(result, rm10[num]); + else if (len == 1) + strcat(result, rm1[num]); + } + } + return result; +} + + + +/* ---------- + * Locale + * ---------- + */ +static void +NUM_prepare_locale(NUMProc *Np) +{ + if (Np->Num->need_locale) + { + struct lconv *lconv; + + /* + * Get locales + */ + lconv = PGLC_localeconv(); + + /* + * Positive / Negative number sign + */ + if (lconv->negative_sign && *lconv->negative_sign) + Np->L_negative_sign = lconv->negative_sign; + else + Np->L_negative_sign = "-"; + + if (lconv->positive_sign && *lconv->positive_sign) + Np->L_positive_sign = lconv->positive_sign; + else + Np->L_positive_sign = "+"; + + /* + * Number decimal point + */ + if (lconv->decimal_point && *lconv->decimal_point) + Np->decimal = lconv->decimal_point; + + else + Np->decimal = "."; + + if (!IS_LDECIMAL(Np->Num)) + Np->decimal = "."; + + /* + * Number thousands separator + * + * Some locales (e.g. broken glibc pt_BR), have a comma for decimal, + * but "" for thousands_sep, so we set the thousands_sep too. + * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php + */ + if (lconv->thousands_sep && *lconv->thousands_sep) + Np->L_thousands_sep = lconv->thousands_sep; + /* Make sure thousands separator doesn't match decimal point symbol. */ + else if (strcmp(Np->decimal, ",") != 0) + Np->L_thousands_sep = ","; + else + Np->L_thousands_sep = "."; + + /* + * Currency symbol + */ + if (lconv->currency_symbol && *lconv->currency_symbol) + Np->L_currency_symbol = lconv->currency_symbol; + else + Np->L_currency_symbol = " "; + } + else + { + /* + * Default values + */ + Np->L_negative_sign = "-"; + Np->L_positive_sign = "+"; + Np->decimal = "."; + + Np->L_thousands_sep = ","; + Np->L_currency_symbol = " "; + } +} + +/* ---------- + * Return pointer of last relevant number after decimal point + * 12.0500 --> last relevant is '5' + * 12.0000 --> last relevant is '.' + * If there is no decimal point, return NULL (which will result in same + * behavior as if FM hadn't been specified). + * ---------- + */ +static char * +get_last_relevant_decnum(char *num) +{ + char *result, + *p = strchr(num, '.'); + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "get_last_relevant_decnum()"); +#endif + + if (!p) + return NULL; + + result = p; + + while (*(++p)) + { + if (*p != '0') + result = p; + } + + return result; +} + +/* + * These macros are used in NUM_processor() and its subsidiary routines. + * OVERLOAD_TEST: true if we've reached end of input string + * AMOUNT_TEST(s): true if at least s bytes remain in string + */ +#define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len) +#define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s))) + +/* ---------- + * Number extraction for TO_NUMBER() + * ---------- + */ +static void +NUM_numpart_from_char(NUMProc *Np, int id, int input_len) +{ + bool isread = false; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, " --- scan start --- id=%s", + (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???"); +#endif + + if (OVERLOAD_TEST) + return; + + if (*Np->inout_p == ' ') + Np->inout_p++; + + if (OVERLOAD_TEST) + return; + + /* + * read sign before number + */ + if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) && + (Np->read_pre + Np->read_post) == 0) + { +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s", + *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign); +#endif + + /* + * locale sign + */ + if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE) + { + int x = 0; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p); +#endif + if ((x = strlen(Np->L_negative_sign)) && + AMOUNT_TEST(x) && + strncmp(Np->inout_p, Np->L_negative_sign, x) == 0) + { + Np->inout_p += x; + *Np->number = '-'; + } + else if ((x = strlen(Np->L_positive_sign)) && + AMOUNT_TEST(x) && + strncmp(Np->inout_p, Np->L_positive_sign, x) == 0) + { + Np->inout_p += x; + *Np->number = '+'; + } + } + else + { +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p); +#endif + + /* + * simple + - < > + */ + if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) && + *Np->inout_p == '<')) + { + *Np->number = '-'; /* set - */ + Np->inout_p++; + } + else if (*Np->inout_p == '+') + { + *Np->number = '+'; /* set + */ + Np->inout_p++; + } + } + } + + if (OVERLOAD_TEST) + return; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number); +#endif + + /* + * read digit or decimal point + */ + if (isdigit((unsigned char) *Np->inout_p)) + { + if (Np->read_dec && Np->read_post == Np->Num->post) + return; + + *Np->number_p = *Np->inout_p; + Np->number_p++; + + if (Np->read_dec) + Np->read_post++; + else + Np->read_pre++; + + isread = true; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p); +#endif + } + else if (IS_DECIMAL(Np->Num) && Np->read_dec == false) + { + /* + * We need not test IS_LDECIMAL(Np->Num) explicitly here, because + * Np->decimal is always just "." if we don't have a D format token. + * So we just unconditionally match to Np->decimal. + */ + int x = strlen(Np->decimal); + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Try read decimal point (%c)", + *Np->inout_p); +#endif + if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0) + { + Np->inout_p += x - 1; + *Np->number_p = '.'; + Np->number_p++; + Np->read_dec = true; + isread = true; + } + } + + if (OVERLOAD_TEST) + return; + + /* + * Read sign behind "last" number + * + * We need sign detection because determine exact position of post-sign is + * difficult: + * + * FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI -> + * 5.01- + */ + if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0) + { + /* + * locale sign (NUM_S) is always anchored behind a last number, if: - + * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and + * next char is not digit + */ + if (IS_LSIGN(Np->Num) && isread && + (Np->inout_p + 1) < Np->inout + input_len && + !isdigit((unsigned char) *(Np->inout_p + 1))) + { + int x; + char *tmp = Np->inout_p++; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p); +#endif + if ((x = strlen(Np->L_negative_sign)) && + AMOUNT_TEST(x) && + strncmp(Np->inout_p, Np->L_negative_sign, x) == 0) + { + Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */ + *Np->number = '-'; + } + else if ((x = strlen(Np->L_positive_sign)) && + AMOUNT_TEST(x) && + strncmp(Np->inout_p, Np->L_positive_sign, x) == 0) + { + Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */ + *Np->number = '+'; + } + if (*Np->number == ' ') + /* no sign read */ + Np->inout_p = tmp; + } + + /* + * try read non-locale sign, it's happen only if format is not exact + * and we cannot determine sign position of MI/PL/SG, an example: + * + * FM9.999999MI -> 5.01- + * + * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats + * like to_number('1 -', '9S') where sign is not anchored to last + * number. + */ + else if (isread == false && IS_LSIGN(Np->Num) == false && + (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))) + { +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p); +#endif + + /* + * simple + - + */ + if (*Np->inout_p == '-' || *Np->inout_p == '+') + /* NUM_processor() do inout_p++ */ + *Np->number = *Np->inout_p; + } + } +} + +#define IS_PREDEC_SPACE(_n) \ + (IS_ZERO((_n)->Num)==false && \ + (_n)->number == (_n)->number_p && \ + *(_n)->number == '0' && \ + (_n)->Num->post != 0) + +/* ---------- + * Add digit or sign to number-string + * ---------- + */ +static void +NUM_numpart_to_char(NUMProc *Np, int id) +{ + int end; + + if (IS_ROMAN(Np->Num)) + return; + + /* Note: in this elog() output not set '\0' in 'inout' */ + +#ifdef DEBUG_TO_FROM_CHAR + + /* + * Np->num_curr is number of current item in format-picture, it is not + * current position in inout! + */ + elog(DEBUG_elog_output, + "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"", + Np->sign_wrote, + Np->num_curr, + Np->number_p, + Np->inout); +#endif + Np->num_in = false; + + /* + * Write sign if real number will write to output Note: IS_PREDEC_SPACE() + * handle "9.9" --> " .1" + */ + if (Np->sign_wrote == false && + (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) && + (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.'))) + { + if (IS_LSIGN(Np->Num)) + { + if (Np->Num->lsign == NUM_LSIGN_PRE) + { + if (Np->sign == '-') + strcpy(Np->inout_p, Np->L_negative_sign); + else + strcpy(Np->inout_p, Np->L_positive_sign); + Np->inout_p += strlen(Np->inout_p); + Np->sign_wrote = true; + } + } + else if (IS_BRACKET(Np->Num)) + { + *Np->inout_p = Np->sign == '+' ? ' ' : '<'; + ++Np->inout_p; + Np->sign_wrote = true; + } + else if (Np->sign == '+') + { + if (!IS_FILLMODE(Np->Num)) + { + *Np->inout_p = ' '; /* Write + */ + ++Np->inout_p; + } + Np->sign_wrote = true; + } + else if (Np->sign == '-') + { /* Write - */ + *Np->inout_p = '-'; + ++Np->inout_p; + Np->sign_wrote = true; + } + } + + + /* + * digits / FM / Zero / Dec. point + */ + if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC) + { + if (Np->num_curr < Np->out_pre_spaces && + (Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num))) + { + /* + * Write blank space + */ + if (!IS_FILLMODE(Np->Num)) + { + *Np->inout_p = ' '; /* Write ' ' */ + ++Np->inout_p; + } + } + else if (IS_ZERO(Np->Num) && + Np->num_curr < Np->out_pre_spaces && + Np->Num->zero_start <= Np->num_curr) + { + /* + * Write ZERO + */ + *Np->inout_p = '0'; /* Write '0' */ + ++Np->inout_p; + Np->num_in = true; + } + else + { + /* + * Write Decimal point + */ + if (*Np->number_p == '.') + { + if (!Np->last_relevant || *Np->last_relevant != '.') + { + strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */ + Np->inout_p += strlen(Np->inout_p); + } + + /* + * Ora 'n' -- FM9.9 --> 'n.' + */ + else if (IS_FILLMODE(Np->Num) && + Np->last_relevant && *Np->last_relevant == '.') + { + strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */ + Np->inout_p += strlen(Np->inout_p); + } + } + else + { + /* + * Write Digits + */ + if (Np->last_relevant && Np->number_p > Np->last_relevant && + id != NUM_0) + ; + + /* + * '0.1' -- 9.9 --> ' .1' + */ + else if (IS_PREDEC_SPACE(Np)) + { + if (!IS_FILLMODE(Np->Num)) + { + *Np->inout_p = ' '; + ++Np->inout_p; + } + + /* + * '0' -- FM9.9 --> '0.' + */ + else if (Np->last_relevant && *Np->last_relevant == '.') + { + *Np->inout_p = '0'; + ++Np->inout_p; + } + } + else + { + *Np->inout_p = *Np->number_p; /* Write DIGIT */ + ++Np->inout_p; + Np->num_in = true; + } + } + /* do no exceed string length */ + if (*Np->number_p) + ++Np->number_p; + } + + end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0); + + if (Np->last_relevant && Np->last_relevant == Np->number_p) + end = Np->num_curr; + + if (Np->num_curr + 1 == end) + { + if (Np->sign_wrote == true && IS_BRACKET(Np->Num)) + { + *Np->inout_p = Np->sign == '+' ? ' ' : '>'; + ++Np->inout_p; + } + else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST) + { + if (Np->sign == '-') + strcpy(Np->inout_p, Np->L_negative_sign); + else + strcpy(Np->inout_p, Np->L_positive_sign); + Np->inout_p += strlen(Np->inout_p); + } + } + } + + ++Np->num_curr; +} + +/* + * Skip over "n" input characters, but only if they aren't numeric data + */ +static void +NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len) +{ + while (n-- > 0) + { + if (OVERLOAD_TEST) + break; /* end of input */ + if (strchr("0123456789.,+-", *Np->inout_p) != NULL) + break; /* it's a data character */ + Np->inout_p += pg_mblen(Np->inout_p); + } +} + +static char * +NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, + char *number, int input_len, int to_char_out_pre_spaces, + int sign, bool is_to_char, Oid collid) +{ + FormatNode *n; + NUMProc _Np, + *Np = &_Np; + const char *pattern; + int pattern_len; + + MemSet(Np, 0, sizeof(NUMProc)); + + Np->Num = Num; + Np->is_to_char = is_to_char; + Np->number = number; + Np->inout = inout; + Np->last_relevant = NULL; + Np->read_post = 0; + Np->read_pre = 0; + Np->read_dec = false; + + if (Np->Num->zero_start) + --Np->Num->zero_start; + + if (IS_EEEE(Np->Num)) + { + if (!Np->is_to_char) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("\"EEEE\" not supported for input"))); + return strcpy(inout, number); + } + + /* + * Roman correction + */ + if (IS_ROMAN(Np->Num)) + { + if (!Np->is_to_char) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("\"RN\" not supported for input"))); + + Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post = + Np->Num->pre = Np->out_pre_spaces = Np->sign = 0; + + if (IS_FILLMODE(Np->Num)) + { + Np->Num->flag = 0; + Np->Num->flag |= NUM_F_FILLMODE; + } + else + Np->Num->flag = 0; + Np->Num->flag |= NUM_F_ROMAN; + } + + /* + * Sign + */ + if (is_to_char) + { + Np->sign = sign; + + /* MI/PL/SG - write sign itself and not in number */ + if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)) + { + if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false) + Np->sign_wrote = false; /* need sign */ + else + Np->sign_wrote = true; /* needn't sign */ + } + else + { + if (Np->sign != '-') + { + if (IS_BRACKET(Np->Num) && IS_FILLMODE(Np->Num)) + Np->Num->flag &= ~NUM_F_BRACKET; + if (IS_MINUS(Np->Num)) + Np->Num->flag &= ~NUM_F_MINUS; + } + else if (Np->sign != '+' && IS_PLUS(Np->Num)) + Np->Num->flag &= ~NUM_F_PLUS; + + if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false) + Np->sign_wrote = true; /* needn't sign */ + else + Np->sign_wrote = false; /* need sign */ + + if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num) + Np->Num->lsign = NUM_LSIGN_POST; + } + } + else + Np->sign = false; + + /* + * Count + */ + Np->num_count = Np->Num->post + Np->Num->pre - 1; + + if (is_to_char) + { + Np->out_pre_spaces = to_char_out_pre_spaces; + + if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num)) + { + Np->last_relevant = get_last_relevant_decnum(Np->number); + + /* + * If any '0' specifiers are present, make sure we don't strip + * those digits. + */ + if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces) + { + char *last_zero; + + last_zero = Np->number + (Np->Num->zero_end - Np->out_pre_spaces); + if (Np->last_relevant < last_zero) + Np->last_relevant = last_zero; + } + } + + if (Np->sign_wrote == false && Np->out_pre_spaces == 0) + ++Np->num_count; + } + else + { + Np->out_pre_spaces = 0; + *Np->number = ' '; /* sign space */ + *(Np->number + 1) = '\0'; + } + + Np->num_in = 0; + Np->num_curr = 0; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, + "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s", + Np->sign, + Np->number, + Np->Num->pre, + Np->Num->post, + Np->num_count, + Np->out_pre_spaces, + Np->sign_wrote ? "Yes" : "No", + IS_ZERO(Np->Num) ? "Yes" : "No", + Np->Num->zero_start, + Np->Num->zero_end, + Np->last_relevant ? Np->last_relevant : "", + IS_BRACKET(Np->Num) ? "Yes" : "No", + IS_PLUS(Np->Num) ? "Yes" : "No", + IS_MINUS(Np->Num) ? "Yes" : "No", + IS_FILLMODE(Np->Num) ? "Yes" : "No", + IS_ROMAN(Np->Num) ? "Yes" : "No", + IS_EEEE(Np->Num) ? "Yes" : "No" + ); +#endif + + /* + * Locale + */ + NUM_prepare_locale(Np); + + /* + * Processor direct cycle + */ + if (Np->is_to_char) + Np->number_p = Np->number; + else + Np->number_p = Np->number + 1; /* first char is space for sign */ + + for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++) + { + if (!Np->is_to_char) + { + /* + * Check at least one byte remains to be scanned. (In actions + * below, must use AMOUNT_TEST if we want to read more bytes than + * that.) + */ + if (OVERLOAD_TEST) + break; + } + + /* + * Format pictures actions + */ + if (n->type == NODE_TYPE_ACTION) + { + /* + * Create/read digit/zero/blank/sign/special-case + * + * 'NUM_S' note: The locale sign is anchored to number and we + * read/write it when we work with first or last number + * (NUM_0/NUM_9). This is why NUM_S is missing in switch(). + * + * Notice the "Np->inout_p++" at the bottom of the loop. This is + * why most of the actions advance inout_p one less than you might + * expect. In cases where we don't want that increment to happen, + * a switch case ends with "continue" not "break". + */ + switch (n->key->id) + { + case NUM_9: + case NUM_0: + case NUM_DEC: + case NUM_D: + if (Np->is_to_char) + { + NUM_numpart_to_char(Np, n->key->id); + continue; /* for() */ + } + else + { + NUM_numpart_from_char(Np, n->key->id, input_len); + break; /* switch() case: */ + } + + case NUM_COMMA: + if (Np->is_to_char) + { + if (!Np->num_in) + { + if (IS_FILLMODE(Np->Num)) + continue; + else + *Np->inout_p = ' '; + } + else + *Np->inout_p = ','; + } + else + { + if (!Np->num_in) + { + if (IS_FILLMODE(Np->Num)) + continue; + } + if (*Np->inout_p != ',') + continue; + } + break; + + case NUM_G: + pattern = Np->L_thousands_sep; + pattern_len = strlen(pattern); + if (Np->is_to_char) + { + if (!Np->num_in) + { + if (IS_FILLMODE(Np->Num)) + continue; + else + { + /* just in case there are MB chars */ + pattern_len = pg_mbstrlen(pattern); + memset(Np->inout_p, ' ', pattern_len); + Np->inout_p += pattern_len - 1; + } + } + else + { + strcpy(Np->inout_p, pattern); + Np->inout_p += pattern_len - 1; + } + } + else + { + if (!Np->num_in) + { + if (IS_FILLMODE(Np->Num)) + continue; + } + + /* + * Because L_thousands_sep typically contains data + * characters (either '.' or ','), we can't use + * NUM_eat_non_data_chars here. Instead skip only if + * the input matches L_thousands_sep. + */ + if (AMOUNT_TEST(pattern_len) && + strncmp(Np->inout_p, pattern, pattern_len) == 0) + Np->inout_p += pattern_len - 1; + else + continue; + } + break; + + case NUM_L: + pattern = Np->L_currency_symbol; + if (Np->is_to_char) + { + strcpy(Np->inout_p, pattern); + Np->inout_p += strlen(pattern) - 1; + } + else + { + NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len); + continue; + } + break; + + case NUM_RN: + if (IS_FILLMODE(Np->Num)) + { + strcpy(Np->inout_p, Np->number_p); + Np->inout_p += strlen(Np->inout_p) - 1; + } + else + { + sprintf(Np->inout_p, "%15s", Np->number_p); + Np->inout_p += strlen(Np->inout_p) - 1; + } + break; + + case NUM_rn: + if (IS_FILLMODE(Np->Num)) + { + strcpy(Np->inout_p, asc_tolower_z(Np->number_p)); + Np->inout_p += strlen(Np->inout_p) - 1; + } + else + { + sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p)); + Np->inout_p += strlen(Np->inout_p) - 1; + } + break; + + case NUM_th: + if (IS_ROMAN(Np->Num) || *Np->number == '#' || + Np->sign == '-' || IS_DECIMAL(Np->Num)) + continue; + + if (Np->is_to_char) + { + strcpy(Np->inout_p, get_th(Np->number, TH_LOWER)); + Np->inout_p += 1; + } + else + { + /* All variants of 'th' occupy 2 characters */ + NUM_eat_non_data_chars(Np, 2, input_len); + continue; + } + break; + + case NUM_TH: + if (IS_ROMAN(Np->Num) || *Np->number == '#' || + Np->sign == '-' || IS_DECIMAL(Np->Num)) + continue; + + if (Np->is_to_char) + { + strcpy(Np->inout_p, get_th(Np->number, TH_UPPER)); + Np->inout_p += 1; + } + else + { + /* All variants of 'TH' occupy 2 characters */ + NUM_eat_non_data_chars(Np, 2, input_len); + continue; + } + break; + + case NUM_MI: + if (Np->is_to_char) + { + if (Np->sign == '-') + *Np->inout_p = '-'; + else if (IS_FILLMODE(Np->Num)) + continue; + else + *Np->inout_p = ' '; + } + else + { + if (*Np->inout_p == '-') + *Np->number = '-'; + else + { + NUM_eat_non_data_chars(Np, 1, input_len); + continue; + } + } + break; + + case NUM_PL: + if (Np->is_to_char) + { + if (Np->sign == '+') + *Np->inout_p = '+'; + else if (IS_FILLMODE(Np->Num)) + continue; + else + *Np->inout_p = ' '; + } + else + { + if (*Np->inout_p == '+') + *Np->number = '+'; + else + { + NUM_eat_non_data_chars(Np, 1, input_len); + continue; + } + } + break; + + case NUM_SG: + if (Np->is_to_char) + *Np->inout_p = Np->sign; + else + { + if (*Np->inout_p == '-') + *Np->number = '-'; + else if (*Np->inout_p == '+') + *Np->number = '+'; + else + { + NUM_eat_non_data_chars(Np, 1, input_len); + continue; + } + } + break; + + default: + continue; + break; + } + } + else + { + /* + * In TO_CHAR, non-pattern characters in the format are copied to + * the output. In TO_NUMBER, we skip one input character for each + * non-pattern format character, whether or not it matches the + * format character. + */ + if (Np->is_to_char) + { + strcpy(Np->inout_p, n->character); + Np->inout_p += strlen(Np->inout_p); + } + else + { + Np->inout_p += pg_mblen(Np->inout_p); + } + continue; + } + Np->inout_p++; + } + + if (Np->is_to_char) + { + *Np->inout_p = '\0'; + return Np->inout; + } + else + { + if (*(Np->number_p - 1) == '.') + *(Np->number_p - 1) = '\0'; + else + *Np->number_p = '\0'; + + /* + * Correction - precision of dec. number + */ + Np->Num->post = Np->read_post; + +#ifdef DEBUG_TO_FROM_CHAR + elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number); +#endif + return Np->number; + } +} + +/* ---------- + * MACRO: Start part of NUM - for all NUM's to_char variants + * (sorry, but I hate copy same code - macro is better..) + * ---------- + */ +#define NUM_TOCHAR_prepare \ +do { \ + int len = VARSIZE_ANY_EXHDR(fmt); \ + if (len <= 0 || len >= (INT_MAX-VARHDRSZ)/NUM_MAX_ITEM_SIZ) \ + PG_RETURN_TEXT_P(cstring_to_text("")); \ + result = (text *) palloc0((len * NUM_MAX_ITEM_SIZ) + 1 + VARHDRSZ); \ + format = NUM_cache(len, &Num, fmt, &shouldFree); \ +} while (0) + +/* ---------- + * MACRO: Finish part of NUM + * ---------- + */ +#define NUM_TOCHAR_finish \ +do { \ + int len; \ + \ + NUM_processor(format, &Num, VARDATA(result), numstr, 0, out_pre_spaces, sign, true, PG_GET_COLLATION()); \ + \ + if (shouldFree) \ + pfree(format); \ + \ + /* \ + * Convert null-terminated representation of result to standard text. \ + * The result is usually much bigger than it needs to be, but there \ + * seems little point in realloc'ing it smaller. \ + */ \ + len = strlen(VARDATA(result)); \ + SET_VARSIZE(result, len + VARHDRSZ); \ +} while (0) + +/* ------------------- + * NUMERIC to_number() (convert string to numeric) + * ------------------- + */ +Datum +numeric_to_number(PG_FUNCTION_ARGS) +{ + text *value = PG_GETARG_TEXT_PP(0); + text *fmt = PG_GETARG_TEXT_PP(1); + NUMDesc Num; + Datum result; + FormatNode *format; + char *numstr; + bool shouldFree; + int len = 0; + int scale, + precision; + + len = VARSIZE_ANY_EXHDR(fmt); + + if (len <= 0 || len >= INT_MAX / NUM_MAX_ITEM_SIZ) + PG_RETURN_NULL(); + + format = NUM_cache(len, &Num, fmt, &shouldFree); + + numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1); + + NUM_processor(format, &Num, VARDATA_ANY(value), numstr, + VARSIZE_ANY_EXHDR(value), 0, 0, false, PG_GET_COLLATION()); + + scale = Num.post; + precision = Num.pre + Num.multi + scale; + + if (shouldFree) + pfree(format); + + result = DirectFunctionCall3(numeric_in, + CStringGetDatum(numstr), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(((precision << 16) | scale) + VARHDRSZ)); + + if (IS_MULTI(&Num)) + { + Numeric x; + Numeric a = int64_to_numeric(10); + Numeric b = int64_to_numeric(-Num.multi); + + x = DatumGetNumeric(DirectFunctionCall2(numeric_power, + NumericGetDatum(a), + NumericGetDatum(b))); + result = DirectFunctionCall2(numeric_mul, + result, + NumericGetDatum(x)); + } + + pfree(numstr); + return result; +} + +/* ------------------ + * NUMERIC to_char() + * ------------------ + */ +Datum +numeric_to_char(PG_FUNCTION_ARGS) +{ + Numeric value = PG_GETARG_NUMERIC(0); + text *fmt = PG_GETARG_TEXT_PP(1); + NUMDesc Num; + FormatNode *format; + text *result; + bool shouldFree; + int out_pre_spaces = 0, + sign = 0; + char *numstr, + *orgnum, + *p; + Numeric x; + + NUM_TOCHAR_prepare; + + /* + * On DateType depend part (numeric) + */ + if (IS_ROMAN(&Num)) + { + x = DatumGetNumeric(DirectFunctionCall2(numeric_round, + NumericGetDatum(value), + Int32GetDatum(0))); + numstr = + int_to_roman(DatumGetInt32(DirectFunctionCall1(numeric_int4, + NumericGetDatum(x)))); + } + else if (IS_EEEE(&Num)) + { + orgnum = numeric_out_sci(value, Num.post); + + /* + * numeric_out_sci() does not emit a sign for positive numbers. We + * need to add a space in this case so that positive and negative + * numbers are aligned. Also must check for NaN/infinity cases, which + * we handle the same way as in float8_to_char. + */ + if (strcmp(orgnum, "NaN") == 0 || + strcmp(orgnum, "Infinity") == 0 || + strcmp(orgnum, "-Infinity") == 0) + { + /* + * Allow 6 characters for the leading sign, the decimal point, + * "e", the exponent's sign and two exponent digits. + */ + numstr = (char *) palloc(Num.pre + Num.post + 7); + fill_str(numstr, '#', Num.pre + Num.post + 6); + *numstr = ' '; + *(numstr + Num.pre + 1) = '.'; + } + else if (*orgnum != '-') + { + numstr = (char *) palloc(strlen(orgnum) + 2); + *numstr = ' '; + strcpy(numstr + 1, orgnum); + } + else + { + numstr = orgnum; + } + } + else + { + int numstr_pre_len; + Numeric val = value; + + if (IS_MULTI(&Num)) + { + Numeric a = int64_to_numeric(10); + Numeric b = int64_to_numeric(Num.multi); + + x = DatumGetNumeric(DirectFunctionCall2(numeric_power, + NumericGetDatum(a), + NumericGetDatum(b))); + val = DatumGetNumeric(DirectFunctionCall2(numeric_mul, + NumericGetDatum(value), + NumericGetDatum(x))); + Num.pre += Num.multi; + } + + x = DatumGetNumeric(DirectFunctionCall2(numeric_round, + NumericGetDatum(val), + Int32GetDatum(Num.post))); + orgnum = DatumGetCString(DirectFunctionCall1(numeric_out, + NumericGetDatum(x))); + + if (*orgnum == '-') + { + sign = '-'; + numstr = orgnum + 1; + } + else + { + sign = '+'; + numstr = orgnum; + } + + if ((p = strchr(numstr, '.'))) + numstr_pre_len = p - numstr; + else + numstr_pre_len = strlen(numstr); + + /* needs padding? */ + if (numstr_pre_len < Num.pre) + out_pre_spaces = Num.pre - numstr_pre_len; + /* overflowed prefix digit format? */ + else if (numstr_pre_len > Num.pre) + { + numstr = (char *) palloc(Num.pre + Num.post + 2); + fill_str(numstr, '#', Num.pre + Num.post + 1); + *(numstr + Num.pre) = '.'; + } + } + + NUM_TOCHAR_finish; + PG_RETURN_TEXT_P(result); +} + +/* --------------- + * INT4 to_char() + * --------------- + */ +Datum +int4_to_char(PG_FUNCTION_ARGS) +{ + int32 value = PG_GETARG_INT32(0); + text *fmt = PG_GETARG_TEXT_PP(1); + NUMDesc Num; + FormatNode *format; + text *result; + bool shouldFree; + int out_pre_spaces = 0, + sign = 0; + char *numstr, + *orgnum; + + NUM_TOCHAR_prepare; + + /* + * On DateType depend part (int32) + */ + if (IS_ROMAN(&Num)) + numstr = int_to_roman(value); + else if (IS_EEEE(&Num)) + { + /* we can do it easily because float8 won't lose any precision */ + float8 val = (float8) value; + + orgnum = (char *) psprintf("%+.*e", Num.post, val); + + /* + * Swap a leading positive sign for a space. + */ + if (*orgnum == '+') + *orgnum = ' '; + + numstr = orgnum; + } + else + { + int numstr_pre_len; + + if (IS_MULTI(&Num)) + { + orgnum = DatumGetCString(DirectFunctionCall1(int4out, + Int32GetDatum(value * ((int32) pow((double) 10, (double) Num.multi))))); + Num.pre += Num.multi; + } + else + { + orgnum = DatumGetCString(DirectFunctionCall1(int4out, + Int32GetDatum(value))); + } + + if (*orgnum == '-') + { + sign = '-'; + orgnum++; + } + else + sign = '+'; + + numstr_pre_len = strlen(orgnum); + + /* post-decimal digits? Pad out with zeros. */ + if (Num.post) + { + numstr = (char *) palloc(numstr_pre_len + Num.post + 2); + strcpy(numstr, orgnum); + *(numstr + numstr_pre_len) = '.'; + memset(numstr + numstr_pre_len + 1, '0', Num.post); + *(numstr + numstr_pre_len + Num.post + 1) = '\0'; + } + else + numstr = orgnum; + + /* needs padding? */ + if (numstr_pre_len < Num.pre) + out_pre_spaces = Num.pre - numstr_pre_len; + /* overflowed prefix digit format? */ + else if (numstr_pre_len > Num.pre) + { + numstr = (char *) palloc(Num.pre + Num.post + 2); + fill_str(numstr, '#', Num.pre + Num.post + 1); + *(numstr + Num.pre) = '.'; + } + } + + NUM_TOCHAR_finish; + PG_RETURN_TEXT_P(result); +} + +/* --------------- + * INT8 to_char() + * --------------- + */ +Datum +int8_to_char(PG_FUNCTION_ARGS) +{ + int64 value = PG_GETARG_INT64(0); + text *fmt = PG_GETARG_TEXT_PP(1); + NUMDesc Num; + FormatNode *format; + text *result; + bool shouldFree; + int out_pre_spaces = 0, + sign = 0; + char *numstr, + *orgnum; + + NUM_TOCHAR_prepare; + + /* + * On DateType depend part (int32) + */ + if (IS_ROMAN(&Num)) + { + /* Currently don't support int8 conversion to roman... */ + numstr = int_to_roman(DatumGetInt32(DirectFunctionCall1(int84, Int64GetDatum(value)))); + } + else if (IS_EEEE(&Num)) + { + /* to avoid loss of precision, must go via numeric not float8 */ + orgnum = numeric_out_sci(int64_to_numeric(value), + Num.post); + + /* + * numeric_out_sci() does not emit a sign for positive numbers. We + * need to add a space in this case so that positive and negative + * numbers are aligned. We don't have to worry about NaN/inf here. + */ + if (*orgnum != '-') + { + numstr = (char *) palloc(strlen(orgnum) + 2); + *numstr = ' '; + strcpy(numstr + 1, orgnum); + } + else + { + numstr = orgnum; + } + } + else + { + int numstr_pre_len; + + if (IS_MULTI(&Num)) + { + double multi = pow((double) 10, (double) Num.multi); + + value = DatumGetInt64(DirectFunctionCall2(int8mul, + Int64GetDatum(value), + DirectFunctionCall1(dtoi8, + Float8GetDatum(multi)))); + Num.pre += Num.multi; + } + + orgnum = DatumGetCString(DirectFunctionCall1(int8out, + Int64GetDatum(value))); + + if (*orgnum == '-') + { + sign = '-'; + orgnum++; + } + else + sign = '+'; + + numstr_pre_len = strlen(orgnum); + + /* post-decimal digits? Pad out with zeros. */ + if (Num.post) + { + numstr = (char *) palloc(numstr_pre_len + Num.post + 2); + strcpy(numstr, orgnum); + *(numstr + numstr_pre_len) = '.'; + memset(numstr + numstr_pre_len + 1, '0', Num.post); + *(numstr + numstr_pre_len + Num.post + 1) = '\0'; + } + else + numstr = orgnum; + + /* needs padding? */ + if (numstr_pre_len < Num.pre) + out_pre_spaces = Num.pre - numstr_pre_len; + /* overflowed prefix digit format? */ + else if (numstr_pre_len > Num.pre) + { + numstr = (char *) palloc(Num.pre + Num.post + 2); + fill_str(numstr, '#', Num.pre + Num.post + 1); + *(numstr + Num.pre) = '.'; + } + } + + NUM_TOCHAR_finish; + PG_RETURN_TEXT_P(result); +} + +/* ----------------- + * FLOAT4 to_char() + * ----------------- + */ +Datum +float4_to_char(PG_FUNCTION_ARGS) +{ + float4 value = PG_GETARG_FLOAT4(0); + text *fmt = PG_GETARG_TEXT_PP(1); + NUMDesc Num; + FormatNode *format; + text *result; + bool shouldFree; + int out_pre_spaces = 0, + sign = 0; + char *numstr, + *p; + + NUM_TOCHAR_prepare; + + if (IS_ROMAN(&Num)) + numstr = int_to_roman((int) rint(value)); + else if (IS_EEEE(&Num)) + { + if (isnan(value) || isinf(value)) + { + /* + * Allow 6 characters for the leading sign, the decimal point, + * "e", the exponent's sign and two exponent digits. + */ + numstr = (char *) palloc(Num.pre + Num.post + 7); + fill_str(numstr, '#', Num.pre + Num.post + 6); + *numstr = ' '; + *(numstr + Num.pre + 1) = '.'; + } + else + { + numstr = psprintf("%+.*e", Num.post, value); + + /* + * Swap a leading positive sign for a space. + */ + if (*numstr == '+') + *numstr = ' '; + } + } + else + { + float4 val = value; + char *orgnum; + int numstr_pre_len; + + if (IS_MULTI(&Num)) + { + float multi = pow((double) 10, (double) Num.multi); + + val = value * multi; + Num.pre += Num.multi; + } + + orgnum = psprintf("%.0f", fabs(val)); + numstr_pre_len = strlen(orgnum); + + /* adjust post digits to fit max float digits */ + if (numstr_pre_len >= FLT_DIG) + Num.post = 0; + else if (numstr_pre_len + Num.post > FLT_DIG) + Num.post = FLT_DIG - numstr_pre_len; + orgnum = psprintf("%.*f", Num.post, val); + + if (*orgnum == '-') + { /* < 0 */ + sign = '-'; + numstr = orgnum + 1; + } + else + { + sign = '+'; + numstr = orgnum; + } + + if ((p = strchr(numstr, '.'))) + numstr_pre_len = p - numstr; + else + numstr_pre_len = strlen(numstr); + + /* needs padding? */ + if (numstr_pre_len < Num.pre) + out_pre_spaces = Num.pre - numstr_pre_len; + /* overflowed prefix digit format? */ + else if (numstr_pre_len > Num.pre) + { + numstr = (char *) palloc(Num.pre + Num.post + 2); + fill_str(numstr, '#', Num.pre + Num.post + 1); + *(numstr + Num.pre) = '.'; + } + } + + NUM_TOCHAR_finish; + PG_RETURN_TEXT_P(result); +} + +/* ----------------- + * FLOAT8 to_char() + * ----------------- + */ +Datum +float8_to_char(PG_FUNCTION_ARGS) +{ + float8 value = PG_GETARG_FLOAT8(0); + text *fmt = PG_GETARG_TEXT_PP(1); + NUMDesc Num; + FormatNode *format; + text *result; + bool shouldFree; + int out_pre_spaces = 0, + sign = 0; + char *numstr, + *p; + + NUM_TOCHAR_prepare; + + if (IS_ROMAN(&Num)) + numstr = int_to_roman((int) rint(value)); + else if (IS_EEEE(&Num)) + { + if (isnan(value) || isinf(value)) + { + /* + * Allow 6 characters for the leading sign, the decimal point, + * "e", the exponent's sign and two exponent digits. + */ + numstr = (char *) palloc(Num.pre + Num.post + 7); + fill_str(numstr, '#', Num.pre + Num.post + 6); + *numstr = ' '; + *(numstr + Num.pre + 1) = '.'; + } + else + { + numstr = psprintf("%+.*e", Num.post, value); + + /* + * Swap a leading positive sign for a space. + */ + if (*numstr == '+') + *numstr = ' '; + } + } + else + { + float8 val = value; + char *orgnum; + int numstr_pre_len; + + if (IS_MULTI(&Num)) + { + double multi = pow((double) 10, (double) Num.multi); + + val = value * multi; + Num.pre += Num.multi; + } + + orgnum = psprintf("%.0f", fabs(val)); + numstr_pre_len = strlen(orgnum); + + /* adjust post digits to fit max double digits */ + if (numstr_pre_len >= DBL_DIG) + Num.post = 0; + else if (numstr_pre_len + Num.post > DBL_DIG) + Num.post = DBL_DIG - numstr_pre_len; + orgnum = psprintf("%.*f", Num.post, val); + + if (*orgnum == '-') + { /* < 0 */ + sign = '-'; + numstr = orgnum + 1; + } + else + { + sign = '+'; + numstr = orgnum; + } + + if ((p = strchr(numstr, '.'))) + numstr_pre_len = p - numstr; + else + numstr_pre_len = strlen(numstr); + + /* needs padding? */ + if (numstr_pre_len < Num.pre) + out_pre_spaces = Num.pre - numstr_pre_len; + /* overflowed prefix digit format? */ + else if (numstr_pre_len > Num.pre) + { + numstr = (char *) palloc(Num.pre + Num.post + 2); + fill_str(numstr, '#', Num.pre + Num.post + 1); + *(numstr + Num.pre) = '.'; + } + } + + NUM_TOCHAR_finish; + PG_RETURN_TEXT_P(result); +} -- cgit v1.2.3