summaryrefslogtreecommitdiffstats
path: root/mysys/mf_soundex.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--mysys/mf_soundex.c106
1 files changed, 106 insertions, 0 deletions
diff --git a/mysys/mf_soundex.c b/mysys/mf_soundex.c
new file mode 100644
index 00000000..b686cac8
--- /dev/null
+++ b/mysys/mf_soundex.c
@@ -0,0 +1,106 @@
+/* Copyright (c) 2000, 2002, 2004, 2007 MySQL AB
+ Use is subject to license terms
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
+
+/****************************************************************
+* SOUNDEX ALGORITHM in C *
+* *
+* The basic Algorithm source is taken from EDN Nov. *
+* 14, 1985 pg. 36. *
+* *
+* As a test Those in Illinois will find that the *
+* first group of numbers in their drivers license *
+* number is the soundex number for their last name. *
+* *
+* RHW PC-IBBS ID. #1230 *
+* *
+* As an extension if remove_garbage is set then all non- *
+* alpha characters are skipped *
+* *
+* Note, that this implementation corresponds to the *
+* original version of the algorithm, not to the more *
+* popular "enhanced" version, described by Knuth. *
+****************************************************************/
+
+#include "mysys_priv.h"
+#include <m_ctype.h>
+#include "my_static.h"
+
+static char get_scode(CHARSET_INFO * cs, char **ptr,pbool remove_garbage);
+
+ /* outputed string is 4 byte long */
+ /* out_pntr can be == in_pntr */
+
+void soundex(CHARSET_INFO * cs,register char * out_pntr, char * in_pntr,
+ pbool remove_garbage)
+{
+ char ch,last_ch;
+ reg3 char * end;
+ register const uchar *map=cs->to_upper;
+
+ if (remove_garbage)
+ {
+ while (*in_pntr && !my_isalpha(cs,*in_pntr)) /* Skip pre-space */
+ in_pntr++;
+ }
+ *out_pntr++ = map[(uchar)*in_pntr]; /* Copy first letter */
+ last_ch = get_scode(cs,&in_pntr,0); /* code of the first letter */
+ /* for the first 'double-letter */
+ /* check. */
+ end=out_pntr+3; /* Loop on input letters until */
+ /* end of input (null) or output */
+ /* letter code count = 3 */
+
+ in_pntr++;
+ while (out_pntr < end && (ch = get_scode(cs,&in_pntr,remove_garbage)) != 0)
+ {
+ in_pntr++;
+ if ((ch != '0') && (ch != last_ch)) /* if not skipped or double */
+ {
+ *out_pntr++ = ch; /* letter, copy to output */
+ } /* for next double-letter check */
+ last_ch = ch; /* save code of last input letter */
+ }
+ while (out_pntr < end)
+ *out_pntr++ = '0';
+ *out_pntr=0; /* end string */
+ return;
+} /* soundex */
+
+
+ /*
+ If alpha, map input letter to soundex code.
+ If not alpha and remove_garbage is set then skip to next char
+ else return 0
+ */
+
+static char get_scode(CHARSET_INFO * cs,char **ptr, pbool remove_garbage)
+{
+ uchar ch;
+
+ if (remove_garbage)
+ {
+ while (**ptr && !my_isalpha(cs,**ptr))
+ (*ptr)++;
+ }
+ ch=my_toupper(cs,**ptr);
+ if (ch < 'A' || ch > 'Z')
+ {
+ if (my_isalpha(cs,ch)) /* If extended alfa (country spec) */
+ return '0'; /* threat as vokal */
+ return 0; /* Can't map */
+ }
+ return(soundex_map[ch-'A']);
+} /* get_scode */