diff options
Diffstat (limited to '')
-rw-r--r-- | mysys/mf_soundex.c | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/mysys/mf_soundex.c b/mysys/mf_soundex.c new file mode 100644 index 00000000..b686cac8 --- /dev/null +++ b/mysys/mf_soundex.c @@ -0,0 +1,106 @@ +/* Copyright (c) 2000, 2002, 2004, 2007 MySQL AB + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/**************************************************************** +* SOUNDEX ALGORITHM in C * +* * +* The basic Algorithm source is taken from EDN Nov. * +* 14, 1985 pg. 36. * +* * +* As a test Those in Illinois will find that the * +* first group of numbers in their drivers license * +* number is the soundex number for their last name. * +* * +* RHW PC-IBBS ID. #1230 * +* * +* As an extension if remove_garbage is set then all non- * +* alpha characters are skipped * +* * +* Note, that this implementation corresponds to the * +* original version of the algorithm, not to the more * +* popular "enhanced" version, described by Knuth. * +****************************************************************/ + +#include "mysys_priv.h" +#include <m_ctype.h> +#include "my_static.h" + +static char get_scode(CHARSET_INFO * cs, char **ptr,pbool remove_garbage); + + /* outputed string is 4 byte long */ + /* out_pntr can be == in_pntr */ + +void soundex(CHARSET_INFO * cs,register char * out_pntr, char * in_pntr, + pbool remove_garbage) +{ + char ch,last_ch; + reg3 char * end; + register const uchar *map=cs->to_upper; + + if (remove_garbage) + { + while (*in_pntr && !my_isalpha(cs,*in_pntr)) /* Skip pre-space */ + in_pntr++; + } + *out_pntr++ = map[(uchar)*in_pntr]; /* Copy first letter */ + last_ch = get_scode(cs,&in_pntr,0); /* code of the first letter */ + /* for the first 'double-letter */ + /* check. */ + end=out_pntr+3; /* Loop on input letters until */ + /* end of input (null) or output */ + /* letter code count = 3 */ + + in_pntr++; + while (out_pntr < end && (ch = get_scode(cs,&in_pntr,remove_garbage)) != 0) + { + in_pntr++; + if ((ch != '0') && (ch != last_ch)) /* if not skipped or double */ + { + *out_pntr++ = ch; /* letter, copy to output */ + } /* for next double-letter check */ + last_ch = ch; /* save code of last input letter */ + } + while (out_pntr < end) + *out_pntr++ = '0'; + *out_pntr=0; /* end string */ + return; +} /* soundex */ + + + /* + If alpha, map input letter to soundex code. + If not alpha and remove_garbage is set then skip to next char + else return 0 + */ + +static char get_scode(CHARSET_INFO * cs,char **ptr, pbool remove_garbage) +{ + uchar ch; + + if (remove_garbage) + { + while (**ptr && !my_isalpha(cs,**ptr)) + (*ptr)++; + } + ch=my_toupper(cs,**ptr); + if (ch < 'A' || ch > 'Z') + { + if (my_isalpha(cs,ch)) /* If extended alfa (country spec) */ + return '0'; /* threat as vokal */ + return 0; /* Can't map */ + } + return(soundex_map[ch-'A']); +} /* get_scode */ |