diff options
Diffstat (limited to 'strings/conf_to_src.c')
-rw-r--r-- | strings/conf_to_src.c | 544 |
1 files changed, 544 insertions, 0 deletions
diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c new file mode 100644 index 00000000..fce763b4 --- /dev/null +++ b/strings/conf_to_src.c @@ -0,0 +1,544 @@ +/* Copyright (c) 2000-2003, 2005-2007 MySQL AB, 2009 Sun Microsystems, Inc. + Copyright (c) 2009-2011, Monty Program Ab + Use is subject to license terms. + Copyright (c) 2009-2011, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include "strings_def.h" +#include <m_ctype.h> +#include <fcntl.h> +#include <my_xml.h> + +#define ROW_LEN 16 +#define ROW16_LEN 8 +#define MAX_BUF (64*1024) + + +#define MY_ALL_CHARSETS_SIZE 2048 + +static struct charset_info_st all_charsets[MY_ALL_CHARSETS_SIZE]; +static uint refids[MY_ALL_CHARSETS_SIZE]; + +static CHARSET_INFO *inheritance_source(uint id) +{ + return &all_charsets[refids[id]]; +} + + +void +print_array(FILE *f, const char *set, const char *name, const uchar *a, int n) +{ + int i; + + fprintf(f,"static const uchar %s_%s[] = {\n", name, set); + + for (i=0 ;i<n ; i++) + { + fprintf(f,"0x%02X",a[i]); + fprintf(f, (i+1<n) ? "," :"" ); + fprintf(f, ((i+1) % ROW_LEN == n % ROW_LEN) ? "\n" : "" ); + } + fprintf(f,"};\n\n"); +} + + +void +print_array16(FILE *f, const char *set, const char *name, const uint16 *a, int n) +{ + int i; + + fprintf(f,"static const uint16 %s_%s[] = {\n", name, set); + + for (i=0 ;i<n ; i++) + { + fprintf(f,"0x%04X",a[i]); + fprintf(f, (i+1<n) ? "," :"" ); + fprintf(f, ((i+1) % ROW16_LEN == n % ROW16_LEN) ? "\n" : "" ); + } + fprintf(f,"};\n\n"); +} + + +static uint get_collation_number(const char *name) +{ + CHARSET_INFO *cs; + for (cs= all_charsets; + cs < all_charsets + array_elements(all_charsets); + cs++) + { + if (cs->coll_name.str && !strcmp(cs->coll_name.str, name)) + return cs->number; + } + return 0; +} + + +static uint +get_charset_number_internal(const char *charset_name, uint cs_flags) +{ + CHARSET_INFO *cs; + for (cs= all_charsets; + cs < all_charsets + array_elements(all_charsets); + cs++) + { + if (cs->cs_name.str && (cs->state & cs_flags) && + !strcmp(cs->cs_name.str, charset_name)) + return cs->number; + } + return 0; +} + +char *mdup(const char *src, uint len) +{ + char *dst=(char*)malloc(len); + if (!dst) + exit(1); + memcpy(dst,src,len); + return dst; +} + +static void simple_cs_copy_data(struct charset_info_st *to, CHARSET_INFO *from) +{ + to->number= from->number ? from->number : to->number; + to->state|= from->state; + + if (from->cs_name.str) + { + to->cs_name.str= strndup(from->cs_name.str, from->cs_name.length); + to->cs_name.length= from->cs_name.length; + } + + if (from->coll_name.str) + { + to->coll_name.str= strndup(from->coll_name.str, from->coll_name.length); + to->coll_name.length= from->coll_name.length; + } + + if (from->tailoring) + to->tailoring= strdup(from->tailoring); + + if (from->m_ctype) + to->m_ctype= (uchar*) mdup((char*) from->m_ctype, MY_CS_CTYPE_TABLE_SIZE); + if (from->to_lower) + to->to_lower= (uchar*) mdup((char*) from->to_lower, MY_CS_TO_LOWER_TABLE_SIZE); + if (from->to_upper) + to->to_upper= (uchar*) mdup((char*) from->to_upper, MY_CS_TO_UPPER_TABLE_SIZE); + if (from->sort_order) + { + to->sort_order= (uchar*) mdup((char*) from->sort_order, MY_CS_SORT_ORDER_TABLE_SIZE); + /* + set_max_sort_char(to); + */ + } + if (from->tab_to_uni) + { + uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16); + to->tab_to_uni= (uint16*) mdup((char*)from->tab_to_uni, sz); + /* + create_fromuni(to); + */ + } +} + + +/* + cs->xxx arrays can be NULL in case when a collation has an entry only + in Index.xml and has no entry in csname.xml (e.g. in case of a binary + collation or a collation using <import> command). + + refcs->xxx arrays can be NULL if <import> refers to a collation + which is not defined in csname.xml, e.g. an always compiled collation + such as latin1_swedish_ci. +*/ +static void inherit_charset_data(struct charset_info_st *cs, + CHARSET_INFO *refcs) +{ + cs->state|= (refcs->state & (MY_CS_PUREASCII|MY_CS_NONASCII)); + if (refcs->m_ctype && cs->m_ctype && + !memcmp(cs->m_ctype, refcs->m_ctype, MY_CS_CTYPE_TABLE_SIZE)) + cs->m_ctype= NULL; + if (refcs->to_lower && cs->to_lower && + !memcmp(cs->to_lower, refcs->to_lower, MY_CS_TO_LOWER_TABLE_SIZE)) + cs->to_lower= NULL; + if (refcs->to_upper && cs->to_upper && + !memcmp(cs->to_upper, refcs->to_upper, MY_CS_TO_LOWER_TABLE_SIZE)) + cs->to_upper= NULL; + if (refcs->tab_to_uni && cs->tab_to_uni && + !memcmp(cs->tab_to_uni, refcs->tab_to_uni, + MY_CS_TO_UNI_TABLE_SIZE * sizeof(uint16))) + cs->tab_to_uni= NULL; +} + + +static CHARSET_INFO *find_charset_data_inheritance_source(CHARSET_INFO *cs) +{ + CHARSET_INFO *refcs; + uint refid= get_charset_number_internal(cs->cs_name.str, MY_CS_PRIMARY); + return refid && refid != cs->number && + (refcs= &all_charsets[refid]) && + (refcs->state & MY_CS_LOADED) ? refcs : NULL; +} + + +/** + Detect if "cs" needs further loading from csname.xml + @param cs - the character set pointer + @retval FALSE - if the current data (e.g. loaded from from Index.xml) + is not enough to dump the character set and requires + further reading from the csname.xml file. + @retval TRUE - if the current data is enough to dump, + no reading of csname.xml is needed. +*/ +static my_bool simple_cs_is_full(CHARSET_INFO *cs) +{ + return ((cs->cs_name.str && cs->tab_to_uni && cs->m_ctype && cs->to_upper && + cs->to_lower) && + (cs->number && cs->coll_name.str && + (cs->sort_order || cs->tailoring || (cs->state & MY_CS_BINSORT)))); +} + +static int add_collation(struct charset_info_st *cs) +{ + if (cs->coll_name.str && + (cs->number || (cs->number= get_collation_number(cs->coll_name.str)))) + { + if (!(all_charsets[cs->number].state & MY_CS_COMPILED)) + { + simple_cs_copy_data(&all_charsets[cs->number],cs); + + } + + cs->number= 0; + cs->coll_name.str= 0; + cs->coll_name.length= 0; + cs->tailoring= NULL; + cs->state= 0; + cs->sort_order= NULL; + cs->state= 0; + } + return MY_XML_OK; +} + + +static void +default_reporter(enum loglevel level __attribute__ ((unused)), + const char *format __attribute__ ((unused)), + ...) +{ +} + + +static void +my_charset_loader_init(MY_CHARSET_LOADER *loader) +{ + loader->error[0]= '\0'; + loader->once_alloc= malloc; + loader->malloc= malloc; + loader->realloc= realloc; + loader->free= free; + loader->reporter= default_reporter; + loader->add_collation= add_collation; +} + + +static int my_read_charset_file(const char *filename) +{ + char buf[MAX_BUF]; + int fd; + uint len; + MY_CHARSET_LOADER loader; + + my_charset_loader_init(&loader); + if ((fd=open(filename,O_RDONLY)) < 0) + { + fprintf(stderr,"Can't open '%s'\n",filename); + return 1; + } + + len=read(fd,buf,MAX_BUF); + DBUG_ASSERT(len < MAX_BUF); + close(fd); + + if (my_parse_charset_xml(&loader, buf, len)) + { + fprintf(stderr, "Error while parsing '%s': %s\n", filename, loader.error); + exit(1); + } + + return FALSE; +} + + +void print_arrays(FILE *f, CHARSET_INFO *cs) +{ + if (cs->m_ctype) + print_array(f, cs->coll_name.str, "ctype", cs->m_ctype, MY_CS_CTYPE_TABLE_SIZE); + if (cs->to_lower) + print_array(f, cs->coll_name.str, "to_lower", cs->to_lower, MY_CS_TO_LOWER_TABLE_SIZE); + if (cs->to_upper) + print_array(f, cs->coll_name.str, "to_upper", cs->to_upper, MY_CS_TO_UPPER_TABLE_SIZE); + if (cs->sort_order) + print_array(f, cs->coll_name.str, "sort_order", cs->sort_order, MY_CS_SORT_ORDER_TABLE_SIZE); + if (cs->tab_to_uni) + print_array16(f, cs->coll_name.str, "to_uni", cs->tab_to_uni, MY_CS_TO_UNI_TABLE_SIZE); +} + + +/** + Print an array member of a CHARSET_INFO. + @param f - the file to print into + @param cs0 - reference to the CHARSET_INFO to print + @param array0 - pointer to the array data (can be NULL) + @param cs1 - reference to the CHARSET_INFO that the data + can be inherited from (e.g. primary collation) + @param array1 - pointer to the array data in cs1 (can be NULL) + @param name - name of the member + + If array0 is not null, then the CHARSET_INFO being dumped has its + own array (e.g. the default collation for the character set). + We print the name of this array using cs0->name and return. + + If array1 is not null, then the CHARSET_INFO being dumpled reuses + the array from another collation. We print the name of the array of + the referenced collation using cs1->name and return. + + Otherwise (if both array0 and array1 are NULL), we have a collation + of a character set whose primary collation is not available now, + and which does not have its own entry in csname.xml file. + + For example, Index.xml has this entry: + <collation name="latin1_swedish_ci_copy"> + <rules> + <import source="latin1_swedish_ci"/> + </rules> + </collation> + and latin1.xml does not have entries for latin1_swedish_ci_copy. + + In such cases we print NULL as a pointer to the array. + It will be set to a not-null data during the first initialization + by the inherit_charset_data() call (see mysys/charset.c for details). +*/ +static void +print_array_ref(FILE *f, + CHARSET_INFO *cs0, const void *array0, + CHARSET_INFO *cs1, const void *array1, + const char *name) +{ + CHARSET_INFO *cs= array0 ? cs0 : array1 ? cs1 : NULL; + if (cs) + fprintf(f," %s_%s, /* %s */\n", + name, cs->coll_name.str, name); + else + fprintf(f," NULL, /* %s */\n", name); +} + + +static const char *nopad_infix(CHARSET_INFO *cs) +{ + return (cs->state & MY_CS_NOPAD) ? "_nopad" : ""; +} + + +void fprintf_lex_str_member(FILE *f, const LEX_CSTRING str, const char *comment) +{ + fprintf(f," { STRING_WITH_LEN(\"%s\") }, %s\n", str.str, comment); +} + + +void dispcset(FILE *f,CHARSET_INFO *cs) +{ + fprintf(f,"{\n"); + fprintf(f," %d,%d,%d,\n",cs->number,0,0); + fprintf(f," MY_CS_COMPILED%s%s%s%s%s%s,\n", + cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "", + cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "", + cs->state & MY_CS_CSSORT ? "|MY_CS_CSSORT" : "", + cs->state & MY_CS_PUREASCII ? "|MY_CS_PUREASCII" : "", + cs->state & MY_CS_NONASCII ? "|MY_CS_NONASCII" : "", + cs->state & MY_CS_NOPAD ? "|MY_CS_NOPAD" : ""); + + if (cs->coll_name.str) + { + CHARSET_INFO *srccs= inheritance_source(cs->number); + fprintf_lex_str_member(f, cs->cs_name, "/* cset name */"); + fprintf_lex_str_member(f, cs->coll_name, "/* coll name */"); + fprintf(f," \"\", /* comment */\n"); + if (cs->tailoring) + fprintf(f, " \"%s\", /* tailoring */\n", cs->tailoring); + else + fprintf(f," NULL, /* tailoring */\n"); + + print_array_ref(f, cs, cs->m_ctype, srccs, srccs->m_ctype, "ctype"); + print_array_ref(f, cs, cs->to_lower, srccs, srccs->to_lower, "to_lower"); + print_array_ref(f, cs, cs->to_upper, srccs, srccs->to_upper, "to_upper"); + + if (cs->sort_order) + fprintf(f," sort_order_%s, /* sort_order */\n", cs->coll_name.str); + else + fprintf(f," NULL, /* sort_order */\n"); + + fprintf(f," NULL, /* uca */\n"); + + print_array_ref(f, cs, cs->tab_to_uni, srccs, srccs->tab_to_uni, "to_uni"); + } + else + { + fprintf(f," {NULL,0}, /* cset name */\n"); + fprintf(f," {NULL,0}, /* coll name */\n"); + fprintf(f," NULL, /* comment */\n"); + fprintf(f," NULL, /* tailoging */\n"); + fprintf(f," NULL, /* ctype */\n"); + fprintf(f," NULL, /* lower */\n"); + fprintf(f," NULL, /* upper */\n"); + fprintf(f," NULL, /* sort order */\n"); + fprintf(f," NULL, /* uca */\n"); + fprintf(f," NULL, /* to_uni */\n"); + } + + fprintf(f," NULL, /* from_uni */\n"); + fprintf(f," NULL, /* casefold */\n"); + fprintf(f," NULL, /* state map */\n"); + fprintf(f," NULL, /* ident map */\n"); + fprintf(f," 1, /* strxfrm_multiply*/\n"); + fprintf(f," 1, /* mbminlen */\n"); + fprintf(f," 1, /* mbmaxlen */\n"); + fprintf(f," 0, /* min_sort_char */\n"); + fprintf(f," 255, /* max_sort_char */\n"); + fprintf(f," ' ', /* pad_char */\n"); + fprintf(f," 0, /* escape_with_backslash_is_dangerous */\n"); + fprintf(f," MY_CS_COLL_LEVELS_S1,\n"); + fprintf(f," &my_charset_8bit_handler,\n"); + + if (cs->state & MY_CS_BINSORT) + fprintf(f," &my_collation_8bit%s_bin_handler,\n", nopad_infix(cs)); + else + fprintf(f," &my_collation_8bit_simple%s_ci_handler,\n", nopad_infix(cs)); + fprintf(f,"}\n"); +} + + +static void +fprint_copyright(FILE *file) +{ + fprintf(file, +"/* Copyright 2000-2008 MySQL AB, 2008 Sun Microsystems, Inc.\n" +" Copyright (c) 2000, 2011, Oracle and/or its affiliates.\n" +" Copyright 2008-2023 MariaDB Corporation\n" +"\n" +" This program is free software; you can redistribute it and/or modify\n" +" it under the terms of the GNU General Public License as published by\n" +" the Free Software Foundation; version 2 of the License.\n" +"\n" +" This program is distributed in the hope that it will be useful,\n" +" but WITHOUT ANY WARRANTY; without even the implied warranty of\n" +" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" +" GNU General Public License for more details.\n" +"\n" +" You should have received a copy of the GNU General Public License\n" +" along with this program; if not, write to the Free Software\n" +" Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */\n" +"\n"); +} + + +int +main(int argc, char **argv __attribute__((unused))) +{ + struct charset_info_st ncs, *cs; + char filename[256]; + FILE *f= stdout; + + if (argc < 2) + { + fprintf(stderr, "usage: %s source-dir\n", argv[0]); + exit(EXIT_FAILURE); + } + + bzero((void*)&ncs,sizeof(ncs)); + bzero((void*)&all_charsets,sizeof(all_charsets)); + bzero((void*) refids, sizeof(refids)); + + snprintf(filename,sizeof(filename),"%s/%s",argv[1],"Index.xml"); + my_read_charset_file(filename); + + for (cs= all_charsets; + cs < all_charsets + array_elements(all_charsets); + cs++) + { + if (cs->number && !(cs->state & MY_CS_COMPILED)) + { + if ( (!simple_cs_is_full(cs)) && (cs->cs_name.str)) + { + snprintf(filename, sizeof filename, "%s/%.*s.xml", + argv[1], cs->csname.length, cs->csname.str); + my_read_charset_file(filename); + } + cs->state|= MY_CS_LOADED; + } + } + + fprintf(f, "/*\n"); + fprintf(f, " This file was generated by the conf_to_src utility. " + "Do not edit it directly,\n"); + fprintf(f, " edit the XML definitions in sql/share/charsets/ instead.\n\n"); + fprintf(f, " To re-generate, run the following in the strings/ " + "directory:\n"); + fprintf(f, " ./conf_to_src ../sql/share/charsets/ > FILE\n"); + fprintf(f, "*/\n\n"); + fprint_copyright(f); + fprintf(f,"#include \"strings_def.h\"\n"); + fprintf(f,"#include <m_ctype.h>\n\n"); + + + for (cs= all_charsets; + cs < all_charsets + array_elements(all_charsets); + cs++) + { + if (cs->state & MY_CS_LOADED) + { + CHARSET_INFO *refcs= find_charset_data_inheritance_source(cs); + cs->state|= my_8bit_charset_flags_from_data(cs) | + my_8bit_collation_flags_from_data(cs); + if (refcs) + { + refids[cs->number]= refcs->number; + inherit_charset_data(cs, refcs); + } + fprintf(f,"#ifdef HAVE_CHARSET_%s\n", cs->cs_name.str); + print_arrays(f, cs); + fprintf(f,"#endif\n"); + fprintf(f,"\n"); + } + } + + fprintf(f,"struct charset_info_st compiled_charsets[] = {\n"); + for (cs= all_charsets; + cs < all_charsets + array_elements(all_charsets); + cs++) + { + if (cs->state & MY_CS_LOADED) + { + fprintf(f,"#ifdef HAVE_CHARSET_%s\n", cs->cs_name.str); + dispcset(f,cs); + fprintf(f,",\n"); + fprintf(f,"#endif\n"); + } + } + + dispcset(f,&ncs); + fprintf(f,"};\n"); + + return 0; +} |