summaryrefslogtreecommitdiffstats
path: root/extra/replace.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:00:34 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:00:34 +0000
commit3f619478f796eddbba6e39502fe941b285dd97b1 (patch)
treee2c7b5777f728320e5b5542b6213fd3591ba51e2 /extra/replace.c
parentInitial commit. (diff)
downloadmariadb-3f619478f796eddbba6e39502fe941b285dd97b1.tar.xz
mariadb-3f619478f796eddbba6e39502fe941b285dd97b1.zip
Adding upstream version 1:10.11.6.upstream/1%10.11.6upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'extra/replace.c')
-rw-r--r--extra/replace.c1104
1 files changed, 1104 insertions, 0 deletions
diff --git a/extra/replace.c b/extra/replace.c
new file mode 100644
index 00000000..8b20f812
--- /dev/null
+++ b/extra/replace.c
@@ -0,0 +1,1104 @@
+/*
+ Copyright (c) 2000, 2014, Oracle and/or its affiliates
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; version 2 of
+ the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ 02110-1335 USA */
+
+/*
+ Replace strings in textfile
+
+ This program replaces strings in files or from stdin to stdout.
+ It accepts a list of from-string/to-string pairs and replaces
+ each occurrence of a from-string with the corresponding to-string.
+ The first occurrence of a found string is matched. If there is more
+ than one possibility for the string to replace, longer matches
+ are preferred before shorter matches.
+
+ Special characters in from string:
+ \^ Match start of line.
+ \$ Match end of line.
+ \b Match space-character, start of line or end of line.
+ For end \b the next replace starts locking at the end space-character.
+ An \b alone or in a string matches only a space-character.
+ \r, \t, \v as in C.
+ The programs make a DFA-state-machine of the strings and the speed isn't
+ dependent on the count of replace-strings (only of the number of replaces).
+ A line is assumed ending with \n or \0.
+ There are no limit except memory on length of strings.
+
+ Written by Monty.
+ fill_buffer_retaining() is taken from gnu-grep and modified.
+*/
+
+#include <my_global.h>
+#include <m_ctype.h>
+#include <my_sys.h>
+#include <m_string.h>
+#include <errno.h>
+
+#define PC_MALLOC 256 /* Bytes for pointers */
+#define PS_MALLOC 512 /* Bytes for data */
+
+typedef struct st_pointer_array { /* when using array-strings */
+ TYPELIB typelib; /* Pointer to strings */
+ uchar *str; /* Strings is here */
+ uint8 *flag; /* Flag about each var. */
+ uint array_allocs,max_count,length,max_length;
+} POINTER_ARRAY;
+
+#define SPACE_CHAR 256
+#define START_OF_LINE 257
+#define END_OF_LINE 258
+#define LAST_CHAR_CODE 259
+
+typedef struct st_replace {
+ uint8 found;
+ struct st_replace *next[256];
+} REPLACE;
+
+typedef struct st_replace_found {
+ my_bool found;
+ char *replace_string;
+ uint to_offset;
+ int from_offset;
+} REPLACE_STRING;
+
+#ifndef WORD_BIT
+#define WORD_BIT (8*sizeof(uint))
+#endif
+
+ /* functions defined in this file */
+
+static int static_get_options(int *argc,char * * *argv);
+static int get_replace_strings(int *argc,char * * *argv,
+ POINTER_ARRAY *from_array,
+ POINTER_ARRAY *to_array);
+static int insert_pointer_name(POINTER_ARRAY *pa, char * name);
+static void free_pointer_array(POINTER_ARRAY *pa);
+static int convert_pipe(REPLACE *,FILE *,FILE *);
+static int convert_file(REPLACE *, char *);
+static REPLACE *init_replace(char * *from, char * *to,uint count,
+ char * word_end_chars);
+static uint replace_strings(REPLACE *rep, char * *start,uint *max_length,
+ char * from);
+static int initialize_buffer(void);
+static void reset_buffer(void);
+static void free_buffer(void);
+
+static int silent=0,verbose=0,updated=0;
+
+ /* The main program */
+
+int main(int argc, char *argv[])
+{
+ int i,error;
+ char word_end_chars[256],*pos;
+ POINTER_ARRAY from,to;
+ REPLACE *replace;
+ MY_INIT(argv[0]);
+
+ if (static_get_options(&argc,&argv))
+ exit(1);
+ if (get_replace_strings(&argc,&argv,&from,&to))
+ exit(1);
+
+ for (i=1,pos=word_end_chars ; i < 256 ; i++)
+ if (my_isspace(&my_charset_latin1,i))
+ *pos++= (char) i;
+ *pos=0;
+ if (!(replace=init_replace((char**) from.typelib.type_names,
+ (char**) to.typelib.type_names,
+ (uint) from.typelib.count,word_end_chars)))
+ exit(1);
+ free_pointer_array(&from);
+ free_pointer_array(&to);
+ if (initialize_buffer())
+ return 1;
+
+ error=0;
+ if (argc == 0)
+ error=convert_pipe(replace,stdin,stdout);
+ else
+ {
+ while (argc--)
+ {
+ error=convert_file(replace,*(argv++));
+ }
+ }
+ free_buffer();
+ my_free(replace);
+ my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
+ exit(error ? 2 : 0);
+ return 0; /* No compiler warning */
+} /* main */
+
+
+ /* reads options */
+ /* Initiates DEBUG - but no debugging here ! */
+
+static int static_get_options(int *argc, char***argv)
+{
+ int help,version;
+ char *pos;
+
+ silent=verbose=help=0;
+
+ while (--*argc > 0 && *(pos = *(++*argv)) == '-' && pos[1] != '-') {
+ while (*++pos)
+ {
+ version=0;
+ switch((*pos)) {
+ case 's':
+ silent=1;
+ break;
+ case 'v':
+ verbose=1;
+ break;
+ case '#':
+ DBUG_PUSH (++pos);
+ pos= (char*) " "; /* Skip rest of arguments */
+ break;
+ case 'V':
+ version=1;
+ /* fall through */
+ case 'I':
+ case '?':
+ help=1; /* Help text written */
+ printf("%s Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
+ MACHINE_TYPE);
+ if (version)
+ break;
+ puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
+ puts("This program replaces strings in files or from stdin to stdout.\n"
+ "It accepts a list of from-string/to-string pairs and replaces\n"
+ "each occurrence of a from-string with the corresponding to-string.\n"
+ "The first occurrence of a found string is matched. If there is\n"
+ "more than one possibility for the string to replace, longer\n"
+ "matches are preferred before shorter matches.\n\n"
+ "A from-string can contain these special characters:\n"
+ " \\^ Match start of line.\n"
+ " \\$ Match end of line.\n"
+ " \\b Match space-character, start of line or end of line.\n"
+ " For a end \\b the next replace starts locking at the end\n"
+ " space-character. A \\b alone in a string matches only a\n"
+ " space-character.\n");
+ printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname);
+ puts("or");
+ printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname);
+ puts("");
+ puts("Options: -? or -I \"Info\" -s \"silent\" -v \"verbose\"");
+ break;
+ default:
+ fprintf(stderr,"illegal option: -%c\n",*pos);
+ break;
+ }
+ }
+ }
+ if (*argc == 0)
+ {
+ if (!help)
+ my_message(0,"No replace options given",MYF(ME_BELL));
+ exit(0); /* Don't use as pipe */
+ }
+ return(0);
+} /* static_get_options */
+
+
+static int get_replace_strings(int *argc, char ***argv,
+ POINTER_ARRAY *from_array,
+ POINTER_ARRAY *to_array)
+{
+ char *pos;
+
+ bzero((char*) from_array,sizeof(from_array[0]));
+ bzero((char*) to_array,sizeof(to_array[0]));
+ while (*argc > 0 && (*(pos = *(*argv)) != '-' || pos[1] != '-' || pos[2]))
+ {
+ insert_pointer_name(from_array,pos);
+ (*argc)--;
+ (*argv)++;
+ if (!*argc || !strcmp(**argv,"--"))
+ {
+ my_message(0,"No to-string for last from-string",MYF(ME_BELL));
+ return 1;
+ }
+ insert_pointer_name(to_array,**argv);
+ (*argc)--;
+ (*argv)++;
+ }
+ if (*argc)
+ { /* Skip "--" argument */
+ (*argc)--;
+ (*argv)++;
+ }
+ return 0;
+}
+
+static int insert_pointer_name(reg1 POINTER_ARRAY *pa,char * name)
+{
+ uint i,length,old_count;
+ uchar *new_pos;
+ const char **new_array;
+ DBUG_ENTER("insert_pointer_name");
+
+ if (! pa->typelib.count)
+ {
+ if (!(pa->typelib.type_names=(const char **)
+ my_malloc(PSI_NOT_INSTRUMENTED, ((PC_MALLOC-MALLOC_OVERHEAD)/
+ (sizeof(char *)+sizeof(*pa->flag))*
+ (sizeof(char *)+sizeof(*pa->flag))),MYF(MY_WME))))
+ DBUG_RETURN(-1);
+ if (!(pa->str= (uchar*) my_malloc(PSI_NOT_INSTRUMENTED,
+ PS_MALLOC-MALLOC_OVERHEAD, MYF(MY_WME))))
+ {
+ my_free((void*) pa->typelib.type_names);
+ DBUG_RETURN (-1);
+ }
+ pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(sizeof(uchar*)+
+ sizeof(*pa->flag));
+ pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
+ pa->length=0;
+ pa->max_length=PS_MALLOC-MALLOC_OVERHEAD;
+ pa->array_allocs=1;
+ }
+ length=(uint) strlen(name)+1;
+ if (pa->length+length >= pa->max_length)
+ {
+ pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC;
+ pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD;
+ if (!(new_pos= (uchar*) my_realloc(PSI_NOT_INSTRUMENTED, (uchar*) pa->str,
+ (uint) pa->max_length, MYF(MY_WME))))
+ DBUG_RETURN(1);
+ if (new_pos != pa->str)
+ {
+ my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str);
+ for (i=0 ; i < pa->typelib.count ; i++)
+ pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff,
+ char*);
+ pa->str=new_pos;
+ }
+ }
+ if (pa->typelib.count >= pa->max_count-1)
+ {
+ int len;
+ pa->array_allocs++;
+ len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD);
+ if (!(new_array=(const char **) my_realloc(PSI_NOT_INSTRUMENTED, (void*)(pa->typelib.type_names),
+ (uint) len/
+ (sizeof(uchar*)+sizeof(*pa->flag))*
+ (sizeof(uchar*)+sizeof(*pa->flag)),
+ MYF(MY_WME))))
+ DBUG_RETURN(1);
+ pa->typelib.type_names=new_array;
+ old_count=pa->max_count;
+ pa->max_count=len/(sizeof(uchar*) + sizeof(*pa->flag));
+ pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
+ memcpy((uchar*) pa->flag,(char *) (pa->typelib.type_names+old_count),
+ old_count*sizeof(*pa->flag));
+ }
+ pa->flag[pa->typelib.count]=0; /* Reset flag */
+ pa->typelib.type_names[pa->typelib.count++]= (char*) (pa->str+pa->length);
+ pa->typelib.type_names[pa->typelib.count]= NullS; /* Put end-mark */
+ (void) strmov((char*) pa->str + pa->length, name);
+ pa->length+=length;
+ DBUG_RETURN(0);
+} /* insert_pointer_name */
+
+
+ /* free pointer array */
+
+static void free_pointer_array(reg1 POINTER_ARRAY *pa)
+{
+ if (pa->typelib.count)
+ {
+ pa->typelib.count=0;
+ my_free((void*) pa->typelib.type_names);
+ pa->typelib.type_names=0;
+ my_free(pa->str);
+ }
+ return;
+} /* free_pointer_array */
+
+
+ /* Code for replace rutines */
+
+#define SET_MALLOC_HUNC 64
+
+typedef struct st_rep_set {
+ uint *bits; /* Pointer to used sets */
+ short next[LAST_CHAR_CODE]; /* Pointer to next sets */
+ uint found_len; /* Best match to date */
+ int found_offset;
+ uint table_offset;
+ uint size_of_bits; /* For convinience */
+} REP_SET;
+
+typedef struct st_rep_sets {
+ uint count; /* Number of sets */
+ uint extra; /* Extra sets in buffer */
+ uint invisible; /* Sets not chown */
+ uint size_of_bits;
+ REP_SET *set,*set_buffer;
+ uint *bit_buffer;
+} REP_SETS;
+
+typedef struct st_found_set {
+ uint table_offset;
+ int found_offset;
+} FOUND_SET;
+
+typedef struct st_follow {
+ int chr;
+ uint table_offset;
+ uint len;
+} FOLLOWS;
+
+
+static int init_sets(REP_SETS *sets,uint states);
+static REP_SET *make_new_set(REP_SETS *sets);
+static void make_sets_invisible(REP_SETS *sets);
+static void free_last_set(REP_SETS *sets);
+static void free_sets(REP_SETS *sets);
+static void internal_set_bit(REP_SET *set, uint bit);
+static void internal_clear_bit(REP_SET *set, uint bit);
+static void or_bits(REP_SET *to,REP_SET *from);
+static void copy_bits(REP_SET *to,REP_SET *from);
+static int cmp_bits(REP_SET *set1,REP_SET *set2);
+static int get_next_bit(REP_SET *set,uint lastpos);
+static short find_set(REP_SETS *sets,REP_SET *find);
+static short find_found(FOUND_SET *found_set,uint table_offset,
+ int found_offset);
+static uint start_at_word(char * pos);
+static uint end_of_word(char * pos);
+static uint replace_len(char * pos);
+
+static uint found_sets=0;
+
+
+ /* Init a replace structure for further calls */
+
+static REPLACE *init_replace(char * *from, char * *to,uint count,
+ char * word_end_chars)
+{
+ uint i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr;
+ int used_sets,chr;
+ short default_state;
+ char used_chars[LAST_CHAR_CODE],is_word_end[256];
+ char * pos, *to_pos, **to_array;
+ REP_SETS sets;
+ REP_SET *set,*start_states,*word_states,*new_set;
+ FOLLOWS *follow,*follow_ptr;
+ REPLACE *replace;
+ FOUND_SET *found_set;
+ REPLACE_STRING *rep_str;
+ DBUG_ENTER("init_replace");
+
+ /* Count number of states */
+ for (i=result_len=max_length=0 , states=2 ; i < count ; i++)
+ {
+ len=replace_len(from[i]);
+ if (!len)
+ {
+ errno=EINVAL;
+ my_message(0,"No to-string for last from-string",MYF(ME_BELL));
+ DBUG_RETURN(0);
+ }
+ states+=len+1;
+ result_len+=(uint) strlen(to[i])+1;
+ if (len > max_length)
+ max_length=len;
+ }
+ bzero((char*) is_word_end,sizeof(is_word_end));
+ for (i=0 ; word_end_chars[i] ; i++)
+ is_word_end[(uchar) word_end_chars[i]]=1;
+
+ if (init_sets(&sets,states))
+ DBUG_RETURN(0);
+ found_sets=0;
+ if (!(found_set= (FOUND_SET*) my_malloc(PSI_NOT_INSTRUMENTED,
+ sizeof(FOUND_SET)*max_length*count,
+ MYF(MY_WME))))
+ {
+ free_sets(&sets);
+ DBUG_RETURN(0);
+ }
+ (void) make_new_set(&sets); /* Set starting set */
+ make_sets_invisible(&sets); /* Hide previus sets */
+ used_sets=-1;
+ word_states=make_new_set(&sets); /* Start of new word */
+ start_states=make_new_set(&sets); /* This is first state */
+ if (!(follow=(FOLLOWS*) my_malloc(PSI_NOT_INSTRUMENTED,
+ (states+2)*sizeof(FOLLOWS), MYF(MY_WME))))
+ {
+ free_sets(&sets);
+ my_free(found_set);
+ DBUG_RETURN(0);
+ }
+
+ /* Init follow_ptr[] */
+ for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++)
+ {
+ if (from[i][0] == '\\' && from[i][1] == '^')
+ {
+ internal_set_bit(start_states,states+1);
+ if (!from[i][2])
+ {
+ start_states->table_offset=i;
+ start_states->found_offset=1;
+ }
+ }
+ else if (from[i][0] == '\\' && from[i][1] == '$')
+ {
+ internal_set_bit(start_states,states);
+ internal_set_bit(word_states,states);
+ if (!from[i][2] && start_states->table_offset == (uint) ~0)
+ {
+ start_states->table_offset=i;
+ start_states->found_offset=0;
+ }
+ }
+ else
+ {
+ internal_set_bit(word_states,states);
+ if (from[i][0] == '\\' && (from[i][1] == 'b' && from[i][2]))
+ internal_set_bit(start_states,states+1);
+ else
+ internal_set_bit(start_states,states);
+ }
+ for (pos=from[i], len=0; *pos ; pos++)
+ {
+ if (*pos == '\\' && *(pos+1))
+ {
+ pos++;
+ switch (*pos) {
+ case 'b':
+ follow_ptr->chr = SPACE_CHAR;
+ break;
+ case '^':
+ follow_ptr->chr = START_OF_LINE;
+ break;
+ case '$':
+ follow_ptr->chr = END_OF_LINE;
+ break;
+ case 'r':
+ follow_ptr->chr = '\r';
+ break;
+ case 't':
+ follow_ptr->chr = '\t';
+ break;
+ case 'v':
+ follow_ptr->chr = '\v';
+ break;
+ default:
+ follow_ptr->chr = (uchar) *pos;
+ break;
+ }
+ }
+ else
+ follow_ptr->chr= (uchar) *pos;
+ follow_ptr->table_offset=i;
+ follow_ptr->len= ++len;
+ follow_ptr++;
+ }
+ follow_ptr->chr=0;
+ follow_ptr->table_offset=i;
+ follow_ptr->len=len;
+ follow_ptr++;
+ states+=(uint) len+1;
+ }
+
+
+ for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++)
+ {
+ set=sets.set+set_nr;
+ default_state= 0; /* Start from beginning */
+
+ /* If end of found-string not found or start-set with current set */
+
+ for (i= (uint) ~0; (i=get_next_bit(set,i)) ;)
+ {
+ if (!follow[i].chr)
+ {
+ if (! default_state)
+ default_state= find_found(found_set,set->table_offset,
+ set->found_offset+1);
+ }
+ }
+ copy_bits(sets.set+used_sets,set); /* Save set for changes */
+ if (!default_state)
+ or_bits(sets.set+used_sets,sets.set); /* Can restart from start */
+
+ /* Find all chars that follows current sets */
+ bzero((char*) used_chars,sizeof(used_chars));
+ for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;)
+ {
+ used_chars[follow[i].chr]=1;
+ if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr &&
+ follow[i].len > 1) || follow[i].chr == END_OF_LINE)
+ used_chars[0]=1;
+ }
+
+ /* Mark word_chars used if \b is in state */
+ if (used_chars[SPACE_CHAR])
+ for (pos= word_end_chars ; *pos ; pos++)
+ used_chars[(int) (uchar) *pos] = 1;
+
+ /* Handle other used characters */
+ for (chr= 0 ; chr < 256 ; chr++)
+ {
+ if (! used_chars[chr])
+ set->next[chr]= (short) (chr ? default_state : -1);
+ else
+ {
+ new_set=make_new_set(&sets);
+ set=sets.set+set_nr; /* if realloc */
+ new_set->table_offset=set->table_offset;
+ new_set->found_len=set->found_len;
+ new_set->found_offset=set->found_offset+1;
+ found_end=0;
+
+ for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; )
+ {
+ if (!follow[i].chr || follow[i].chr == chr ||
+ (follow[i].chr == SPACE_CHAR &&
+ (is_word_end[chr] ||
+ (!chr && follow[i].len > 1 && ! follow[i+1].chr))) ||
+ (follow[i].chr == END_OF_LINE && ! chr))
+ {
+ if ((! chr || (follow[i].chr && !follow[i+1].chr)) &&
+ follow[i].len > found_end)
+ found_end=follow[i].len;
+ if (chr && follow[i].chr)
+ internal_set_bit(new_set,i+1); /* To next set */
+ else
+ internal_set_bit(new_set,i);
+ }
+ }
+ if (found_end)
+ {
+ new_set->found_len=0; /* Set for testing if first */
+ bits_set=0;
+ for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;)
+ {
+ if ((follow[i].chr == SPACE_CHAR ||
+ follow[i].chr == END_OF_LINE) && ! chr)
+ bit_nr=i+1;
+ else
+ bit_nr=i;
+ if (follow[bit_nr-1].len < found_end ||
+ (new_set->found_len &&
+ (chr == 0 || !follow[bit_nr].chr)))
+ internal_clear_bit(new_set,i);
+ else
+ {
+ if (chr == 0 || !follow[bit_nr].chr)
+ { /* best match */
+ new_set->table_offset=follow[bit_nr].table_offset;
+ if (chr || (follow[i].chr == SPACE_CHAR ||
+ follow[i].chr == END_OF_LINE))
+ new_set->found_offset=found_end; /* New match */
+ new_set->found_len=found_end;
+ }
+ bits_set++;
+ }
+ }
+ if (bits_set == 1)
+ {
+ set->next[chr] = find_found(found_set,
+ new_set->table_offset,
+ new_set->found_offset);
+ free_last_set(&sets);
+ }
+ else
+ set->next[chr] = find_set(&sets,new_set);
+ }
+ else
+ set->next[chr] = find_set(&sets,new_set);
+ }
+ }
+ }
+
+ /* Alloc replace structure for the replace-state-machine */
+
+ if ((replace=(REPLACE*) my_malloc(PSI_NOT_INSTRUMENTED,
+ sizeof(REPLACE)*(sets.count)+
+ sizeof(REPLACE_STRING)*(found_sets+1)+
+ sizeof(char *)*count+result_len,
+ MYF(MY_WME | MY_ZEROFILL))))
+ {
+ rep_str=(REPLACE_STRING*) (replace+sets.count);
+ to_array=(char **) (rep_str+found_sets+1);
+ to_pos=(char *) (to_array+count);
+ for (i=0 ; i < count ; i++)
+ {
+ to_array[i]=to_pos;
+ to_pos=strmov(to_pos,to[i])+1;
+ }
+ rep_str[0].found=1;
+ rep_str[0].replace_string=0;
+ for (i=1 ; i <= found_sets ; i++)
+ {
+ pos=from[found_set[i-1].table_offset];
+ /*
+ Test if we are matching start of string (\^)
+ We can't use bcmp() here as pos may be only 1 character and
+ that would confuse MSAN.
+ */
+ rep_str[i].found= (uint8) ((pos[0] == '\\' && pos[1] == '^' &&
+ pos[2] == 0) ? 2 : 1);
+ rep_str[i].replace_string=to_array[found_set[i-1].table_offset];
+ rep_str[i].to_offset=found_set[i-1].found_offset-start_at_word(pos);
+ rep_str[i].from_offset=found_set[i-1].found_offset-replace_len(pos)+
+ end_of_word(pos);
+ }
+ for (i=0 ; i < sets.count ; i++)
+ {
+ for (j=0 ; j < 256 ; j++)
+ if (sets.set[i].next[j] >= 0)
+ replace[i].next[j]=replace+sets.set[i].next[j];
+ else
+ replace[i].next[j]=(REPLACE*) (rep_str+(-sets.set[i].next[j]-1));
+ }
+ }
+ my_free(follow);
+ free_sets(&sets);
+ my_free(found_set);
+ DBUG_PRINT("exit",("Replace table has %d states",sets.count));
+ DBUG_RETURN(replace);
+}
+
+
+static int init_sets(REP_SETS *sets,uint states)
+{
+ bzero((char*) sets,sizeof(*sets));
+ sets->size_of_bits=((states+7)/8);
+ if (!(sets->set_buffer=(REP_SET*) my_malloc(PSI_NOT_INSTRUMENTED,
+ sizeof(REP_SET)*SET_MALLOC_HUNC,
+ MYF(MY_WME))))
+ return 1;
+ if (!(sets->bit_buffer=(uint*) my_malloc(PSI_NOT_INSTRUMENTED,
+ sizeof(uint)*sets->size_of_bits*
+ SET_MALLOC_HUNC,MYF(MY_WME))))
+ {
+ my_free(sets->set);
+ return 1;
+ }
+ return 0;
+}
+
+ /* Make help sets invisible for nicer codeing */
+
+static void make_sets_invisible(REP_SETS *sets)
+{
+ sets->invisible=sets->count;
+ sets->set+=sets->count;
+ sets->count=0;
+}
+
+static REP_SET *make_new_set(REP_SETS *sets)
+{
+ uint i,count,*bit_buffer;
+ REP_SET *set;
+ if (sets->extra)
+ {
+ sets->extra--;
+ set=sets->set+ sets->count++;
+ bzero((char*) set->bits,sizeof(uint)*sets->size_of_bits);
+ bzero((char*) &set->next[0],sizeof(set->next[0])*LAST_CHAR_CODE);
+ set->found_offset=0;
+ set->found_len=0;
+ set->table_offset= (uint) ~0;
+ set->size_of_bits=sets->size_of_bits;
+ return set;
+ }
+ count=sets->count+sets->invisible+SET_MALLOC_HUNC;
+ if (!(set=(REP_SET*) my_realloc(PSI_NOT_INSTRUMENTED, sets->set_buffer,
+ sizeof(REP_SET)*count, MYF(MY_WME))))
+ return 0;
+ sets->set_buffer=set;
+ sets->set=set+sets->invisible;
+ if (!(bit_buffer=(uint*) my_realloc(PSI_NOT_INSTRUMENTED, sets->bit_buffer,
+ (sizeof(uint)*sets->size_of_bits)*count,
+ MYF(MY_WME))))
+ return 0;
+ sets->bit_buffer=bit_buffer;
+ for (i=0 ; i < count ; i++)
+ {
+ sets->set_buffer[i].bits=bit_buffer;
+ bit_buffer+=sets->size_of_bits;
+ }
+ sets->extra=SET_MALLOC_HUNC;
+ return make_new_set(sets);
+}
+
+static void free_last_set(REP_SETS *sets)
+{
+ sets->count--;
+ sets->extra++;
+ return;
+}
+
+static void free_sets(REP_SETS *sets)
+{
+ my_free(sets->set_buffer);
+ my_free(sets->bit_buffer);
+ return;
+}
+
+static void internal_set_bit(REP_SET *set, uint bit)
+{
+ set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT);
+ return;
+}
+
+static void internal_clear_bit(REP_SET *set, uint bit)
+{
+ set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT));
+ return;
+}
+
+
+static void or_bits(REP_SET *to,REP_SET *from)
+{
+ reg1 uint i;
+ for (i=0 ; i < to->size_of_bits ; i++)
+ to->bits[i]|=from->bits[i];
+ return;
+}
+
+static void copy_bits(REP_SET *to,REP_SET *from)
+{
+ memcpy((uchar*) to->bits,(uchar*) from->bits,
+ (size_t) (sizeof(uint) * to->size_of_bits));
+}
+
+static int cmp_bits(REP_SET *set1,REP_SET *set2)
+{
+ return memcmp(set1->bits, set2->bits,
+ sizeof(uint) * set1->size_of_bits);
+}
+
+
+ /* Get next set bit from set. */
+
+static int get_next_bit(REP_SET *set,uint lastpos)
+{
+ uint pos,*start,*end,bits;
+
+ start=set->bits+ ((lastpos+1) / WORD_BIT);
+ end=set->bits + set->size_of_bits;
+ bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1);
+
+ while (! bits && ++start < end)
+ bits=start[0];
+ if (!bits)
+ return 0;
+ pos=(uint) (start-set->bits)*WORD_BIT;
+ while (! (bits & 1))
+ {
+ bits>>=1;
+ pos++;
+ }
+ return pos;
+}
+
+ /* find if there is a same set in sets. If there is, use it and
+ free given set, else put in given set in sets and return it's
+ position */
+
+static short find_set(REP_SETS *sets,REP_SET *find)
+{
+ uint i;
+ for (i=0 ; i < sets->count-1 ; i++)
+ {
+ if (!cmp_bits(sets->set+i,find))
+ {
+ free_last_set(sets);
+ return (short) i;
+ }
+ }
+ return (short) i; /* return new position */
+}
+
+
+/*
+ find if there is a found_set with same table_offset & found_offset
+ If there is return offset to it, else add new offset and return pos.
+ Pos returned is -offset-2 in found_set_structure because it's is
+ saved in set->next and set->next[] >= 0 points to next set and
+ set->next[] == -1 is reserved for end without replaces.
+*/
+
+static short find_found(FOUND_SET *found_set,uint table_offset,
+ int found_offset)
+{
+ int i;
+ for (i=0 ; (uint) i < found_sets ; i++)
+ if (found_set[i].table_offset == table_offset &&
+ found_set[i].found_offset == found_offset)
+ return (short) (-i-2);
+ found_set[i].table_offset=table_offset;
+ found_set[i].found_offset=found_offset;
+ found_sets++;
+ return (short) (-i-2); /* return new position */
+}
+
+ /* Return 1 if regexp starts with \b or ends with \b*/
+
+static uint start_at_word(char * pos)
+{
+ return (((!memcmp(pos,"\\b",2) && pos[2]) || !memcmp(pos,"\\^",2)) ? 1 : 0);
+}
+
+static uint end_of_word(char * pos)
+{
+ char * end=strend(pos);
+ return ((end > pos+2 && !memcmp(end-2,"\\b",2)) ||
+ (end >= pos+2 && !memcmp(end-2,"\\$",2))) ?
+ 1 : 0;
+}
+
+
+static uint replace_len(char * str)
+{
+ uint len=0;
+ while (*str)
+ {
+ if (str[0] == '\\' && str[1])
+ str++;
+ str++;
+ len++;
+ }
+ return len;
+}
+
+
+ /* The actual loop */
+
+static uint replace_strings(REPLACE *rep, char **start, uint *max_length,
+ char *from)
+{
+ reg1 REPLACE *rep_pos;
+ reg2 REPLACE_STRING *rep_str;
+ char *to, *end, *pos, *new;
+
+ end=(to= *start) + *max_length-1;
+ rep_pos=rep+1;
+ for(;;)
+ {
+ while (!rep_pos->found)
+ {
+ rep_pos= rep_pos->next[(uchar) *from];
+ if (to == end)
+ {
+ (*max_length)+=8192;
+ if (!(new=my_realloc(PSI_NOT_INSTRUMENTED, *start,*max_length,MYF(MY_WME))))
+ return (uint) -1;
+ to=new+(to - *start);
+ end=(*start=new)+ *max_length-1;
+ }
+ *to++= *from++;
+ }
+ if (!(rep_str = ((REPLACE_STRING*) rep_pos))->replace_string)
+ return (uint) (to - *start)-1;
+ updated=1; /* Some char * is replaced */
+ to-=rep_str->to_offset;
+ for (pos=rep_str->replace_string; *pos ; pos++)
+ {
+ if (to == end)
+ {
+ (*max_length)*=2;
+ if (!(new=my_realloc(PSI_NOT_INSTRUMENTED, *start,*max_length,MYF(MY_WME))))
+ return (uint) -1;
+ to=new+(to - *start);
+ end=(*start=new)+ *max_length-1;
+ }
+ *to++= *pos;
+ }
+ if (!*(from-=rep_str->from_offset) && rep_pos->found != 2)
+ return (uint) (to - *start);
+ rep_pos=rep;
+ }
+}
+
+static char *buffer; /* The buffer itself, grown as needed. */
+static int bufbytes; /* Number of bytes in the buffer. */
+static int bufread,my_eof; /* Number of bytes to get with each read(). */
+static uint bufalloc;
+static char *out_buff;
+static uint out_length;
+
+static int initialize_buffer()
+{
+ bufread = 8192;
+ bufalloc = bufread + bufread / 2;
+ if (!(buffer = my_malloc(PSI_NOT_INSTRUMENTED, bufalloc+1, MYF(MY_WME))))
+ return 1;
+ bufbytes=my_eof=0;
+ out_length=bufread;
+ if (!(out_buff=my_malloc(PSI_NOT_INSTRUMENTED, out_length, MYF(MY_WME))))
+ return(1);
+ return 0;
+}
+
+static void reset_buffer()
+{
+ bufbytes=my_eof=0;
+}
+
+static void free_buffer()
+{
+ my_free(buffer);
+ my_free(out_buff);
+}
+
+
+/*
+ Fill the buffer retaining the last n bytes at the beginning of the
+ newly filled buffer (for backward context). Returns the number of new
+ bytes read from disk.
+*/
+
+static int fill_buffer_retaining(File fd, int n)
+{
+ int i;
+
+ /* See if we need to grow the buffer. */
+ if ((int) bufalloc - n <= bufread)
+ {
+ while ((int) bufalloc - n <= bufread)
+ {
+ bufalloc *= 2;
+ bufread *= 2;
+ }
+ buffer = my_realloc(PSI_NOT_INSTRUMENTED, buffer, bufalloc+1, MYF(MY_WME));
+ if (! buffer)
+ return(-1);
+ }
+
+ /* Shift stuff down. */
+ bmove(buffer,buffer+bufbytes-n,(uint) n);
+ bufbytes = n;
+
+ if (my_eof)
+ return 0;
+
+ /* Read in new stuff. */
+ if ((i=(int) my_read(fd, (uchar*) buffer + bufbytes,
+ (size_t) bufread, MYF(MY_WME))) < 0)
+ return -1;
+
+ /* Kludge to pretend every nonempty file ends with a newline. */
+ if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
+ {
+ my_eof = i = 1;
+ buffer[bufbytes] = '\n';
+ }
+
+ bufbytes += i;
+ return i;
+}
+
+ /* Return 0 if convert is ok */
+ /* Global variable update is set if something was changed */
+
+static int convert_pipe(REPLACE *rep, FILE *in, FILE *out)
+{
+ int retain,error;
+ uint length;
+ char save_char,*end_of_line,*start_of_line;
+ DBUG_ENTER("convert_pipe");
+
+ updated=retain=0;
+ reset_buffer();
+
+ while ((error=fill_buffer_retaining(my_fileno(in),retain)) > 0)
+ {
+ end_of_line=buffer ;
+ buffer[bufbytes]=0; /* Sentinel */
+ for (;;)
+ {
+ start_of_line=end_of_line;
+ while (end_of_line[0] != '\n' && end_of_line[0])
+ end_of_line++;
+ if (end_of_line == buffer+bufbytes)
+ {
+ retain= (int) (end_of_line - start_of_line);
+ break; /* No end of line, read more */
+ }
+ save_char=end_of_line[0];
+ end_of_line[0]=0;
+ end_of_line++;
+ if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) ==
+ (uint) -1)
+ return 1;
+ if (!my_eof)
+ out_buff[length++]=save_char; /* Don't write added newline */
+ if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP)))
+ DBUG_RETURN(1);
+ }
+ }
+ DBUG_RETURN(error);
+}
+
+
+static int convert_file(REPLACE *rep, char * name)
+{
+ int error;
+ FILE *in,*out;
+ char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name = name;
+#ifdef HAVE_READLINK
+ char link_name[FN_REFLEN];
+#endif
+ File temp_file;
+ size_t dir_buff_length;
+ DBUG_ENTER("convert_file");
+
+ /* check if name is a symlink */
+#ifdef HAVE_READLINK
+ org_name= (!my_disable_symlinks &&
+ !my_readlink(link_name, name, MYF(0))) ? link_name : name;
+#endif
+ if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME))))
+ DBUG_RETURN(1);
+ dirname_part(dir_buff, org_name, &dir_buff_length);
+ if ((temp_file= create_temp_file(tempname, dir_buff, "PR", 0,
+ MYF(MY_WME))) < 0)
+ {
+ my_fclose(in,MYF(0));
+ DBUG_RETURN(1);
+ }
+ if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME))))
+ {
+ my_fclose(in,MYF(0));
+ DBUG_RETURN(1);
+ }
+
+ error=convert_pipe(rep,in,out);
+ my_fclose(in,MYF(0)); my_fclose(out,MYF(0));
+
+ if (updated && ! error)
+ my_redel(org_name, tempname, 0, MYF(MY_WME | MY_LINK_WARNING));
+ else
+ my_delete(tempname,MYF(MY_WME));
+ if (!silent && ! error)
+ {
+ if (updated)
+ printf("%s converted\n",name);
+ else if (verbose)
+ printf("%s left unchanged\n",name);
+ }
+ DBUG_RETURN(error);
+}