summaryrefslogtreecommitdiffstats
path: root/mysys/mf_soundex.c
blob: b686cac89b4b8c13b019fe00b834fb749b629aa9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/* Copyright (c) 2000, 2002, 2004, 2007 MySQL AB
   Use is subject to license terms

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA */

/****************************************************************
*	SOUNDEX ALGORITHM in C					*
*								*
*	The basic Algorithm source is taken from EDN Nov.	*
*	14, 1985 pg. 36.					*
*								*
*	As a test Those in Illinois will find that the		*
*	first group of numbers in their drivers license		*
*	number is the soundex number for their last name.	*
*								*
*	RHW  PC-IBBS ID. #1230					*
*								*
*	As an extension if remove_garbage is set then all non-	*
*	alpha characters are skipped				*
*                                                               *
*       Note, that this implementation corresponds to the       *
*       original version of the algorithm, not to the more      *
*       popular "enhanced" version, described by Knuth.         *
****************************************************************/

#include "mysys_priv.h"
#include <m_ctype.h>
#include "my_static.h"

static char get_scode(CHARSET_INFO * cs, char **ptr,pbool remove_garbage);

		/* outputed string is 4 byte long */
		/* out_pntr can be == in_pntr */

void soundex(CHARSET_INFO * cs,register char * out_pntr, char * in_pntr,
	     pbool remove_garbage)
{
  char ch,last_ch;
  reg3 char * end;
  register const uchar *map=cs->to_upper;

  if (remove_garbage)
  {
    while (*in_pntr && !my_isalpha(cs,*in_pntr)) /* Skip pre-space */
      in_pntr++;
  }
  *out_pntr++ = map[(uchar)*in_pntr];	/* Copy first letter		 */
  last_ch = get_scode(cs,&in_pntr,0);	/* code of the first letter	 */
					/* for the first 'double-letter  */
					/* check.			 */
  end=out_pntr+3;			/* Loop on input letters until	 */
					/* end of input (null) or output */
					/* letter code count = 3	 */

  in_pntr++;
  while (out_pntr < end && (ch = get_scode(cs,&in_pntr,remove_garbage)) != 0)
  {
    in_pntr++;
    if ((ch != '0') && (ch != last_ch)) /* if not skipped or double */
    {
      *out_pntr++ = ch;			/* letter, copy to output */
    }					/* for next double-letter check */
    last_ch = ch;			/* save code of last input letter */
  }
  while (out_pntr < end)
    *out_pntr++ = '0';
  *out_pntr=0;				/* end string */
  return;
} /* soundex */


  /*
    If alpha, map input letter to soundex code.
    If not alpha and remove_garbage is set then skip to next char
    else return 0
    */

static char get_scode(CHARSET_INFO * cs,char **ptr, pbool remove_garbage)
{
  uchar ch;

  if (remove_garbage)
  {
    while (**ptr && !my_isalpha(cs,**ptr))
      (*ptr)++;
  }
  ch=my_toupper(cs,**ptr);
  if (ch < 'A' || ch > 'Z')
  {
    if (my_isalpha(cs,ch))		/* If extended alfa (country spec) */
      return '0';			/* threat as vokal */
    return 0;				/* Can't map */
  }
  return(soundex_map[ch-'A']);
} /* get_scode */