summaryrefslogtreecommitdiffstats
path: root/servers/slapd/phonetic.c
diff options
context:
space:
mode:
Diffstat (limited to 'servers/slapd/phonetic.c')
-rw-r--r--servers/slapd/phonetic.c459
1 files changed, 459 insertions, 0 deletions
diff --git a/servers/slapd/phonetic.c b/servers/slapd/phonetic.c
new file mode 100644
index 0000000..ed3641c
--- /dev/null
+++ b/servers/slapd/phonetic.c
@@ -0,0 +1,459 @@
+/* phonetic.c - routines to do phonetic matching */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 1998-2022 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+/* Portions Copyright (c) 1995 Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and that due credit is given
+ * to the University of Michigan at Ann Arbor. The name of the University
+ * may not be used to endorse or promote products derived from this
+ * software without specific prior written permission. This software
+ * is provided ``as is'' without express or implied warranty.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+
+#include <ac/ctype.h>
+#include <ac/string.h>
+#include <ac/socket.h>
+#include <ac/time.h>
+
+#include "slap.h"
+
+#if !defined(SLAPD_METAPHONE) && !defined(SLAPD_PHONETIC)
+#define SLAPD_METAPHONE
+#endif
+
+#define iswordbreak(x) (!isascii(x) || isspace((unsigned char) (x)) || \
+ ispunct((unsigned char) (x)) || \
+ isdigit((unsigned char) (x)) || (x) == '\0')
+
+#if 0
+static char *
+first_word( char *s )
+{
+ if ( s == NULL ) {
+ return( NULL );
+ }
+
+ while ( iswordbreak( *s ) ) {
+ if ( *s == '\0' ) {
+ return( NULL );
+ } else {
+ s++;
+ }
+ }
+
+ return( s );
+}
+
+static char *
+next_word( char *s )
+{
+ if ( s == NULL ) {
+ return( NULL );
+ }
+
+ while ( ! iswordbreak( *s ) ) {
+ s++;
+ }
+
+ while ( iswordbreak( *s ) ) {
+ if ( *s == '\0' ) {
+ return( NULL );
+ } else {
+ s++;
+ }
+ }
+
+ return( s );
+}
+
+static char *
+word_dup( char *w )
+{
+ char *s, *ret;
+ char save;
+
+ for ( s = w; !iswordbreak( *s ); s++ )
+ ; /* NULL */
+ save = *s;
+ *s = '\0';
+ ret = ch_strdup( w );
+ *s = save;
+
+ return( ret );
+}
+#endif /* 0 */
+
+#ifndef MAXPHONEMELEN
+#define MAXPHONEMELEN 4
+#endif
+
+#if defined(SLAPD_PHONETIC)
+
+/* lifted from isode-8.0 */
+char *
+phonetic( char *s )
+{
+ char code, adjacent, ch;
+ char *p;
+ int i;
+ char phoneme[MAXPHONEMELEN + 1];
+
+ p = s;
+ if ( p == NULL || *p == '\0' ) {
+ return( NULL );
+ }
+
+ adjacent = '0';
+ phoneme[0] = TOUPPER((unsigned char)*p);
+
+ phoneme[1] = '\0';
+ for ( i = 0; i < 99 && (! iswordbreak(*p)); p++ ) {
+ ch = TOUPPER ((unsigned char)*p);
+
+ code = '0';
+
+ switch (ch) {
+ case 'B':
+ case 'F':
+ case 'P':
+ case 'V':
+ code = (adjacent != '1') ? '1' : '0';
+ break;
+ case 'S':
+ case 'C':
+ case 'G':
+ case 'J':
+ case 'K':
+ case 'Q':
+ case 'X':
+ case 'Z':
+ code = (adjacent != '2') ? '2' : '0';
+ break;
+ case 'D':
+ case 'T':
+ code = (adjacent != '3') ? '3' : '0';
+ break;
+ case 'L':
+ code = (adjacent != '4') ? '4' : '0';
+ break;
+ case 'M':
+ case 'N':
+ code = (adjacent != '5') ? '5' : '0';
+ break;
+ case 'R':
+ code = (adjacent != '6') ? '6' : '0';
+ break;
+ default:
+ adjacent = '0';
+ }
+
+ if ( i == 0 ) {
+ adjacent = code;
+ i++;
+ } else if ( code != '0' ) {
+ if ( i == MAXPHONEMELEN )
+ break;
+ adjacent = phoneme[i] = code;
+ i++;
+ }
+ }
+
+ if ( i > 0 )
+ phoneme[i] = '\0';
+
+ return( ch_strdup( phoneme ) );
+}
+
+#elif defined(SLAPD_METAPHONE)
+
+/*
+ * Metaphone was originally developed by Lawrence Philips and
+ * published in the "Computer Language" magazine in 1990.
+ */
+/*
+ * Metaphone copied from C Gazette, June/July 1991, pp 56-57,
+ * author Gary A. Parker, with changes by Bernard Tiffany of the
+ * University of Michigan, and more changes by Tim Howes of the
+ * University of Michigan.
+ */
+
+/* Character coding array */
+static const char vsvfn[26] = {
+ 1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2,
+ /* A B C D E F G H I J K L M */
+ 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0};
+ /* N O P Q R S T U V W X Y Z */
+
+/* Macros to access character coding array */
+#define vowel(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 1) /* AEIOU */
+#define same(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 2) /* FJLMNR */
+#define varson(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 4) /* CGPST */
+#define frontv(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 8) /* EIY */
+#define noghf(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 16) /* BDH */
+
+char *
+phonetic( char *Word )
+{
+ char *n, *n_start, *n_end; /* pointers to string */
+ char *metaph_end; /* pointers to metaph */
+ char ntrans[40]; /* word with uppercase letters */
+ int KSflag; /* state flag for X -> KS */
+ char buf[MAXPHONEMELEN + 2];
+ char *Metaph;
+
+ /*
+ * Copy Word to internal buffer, dropping non-alphabetic characters
+ * and converting to upper case
+ */
+
+ for (n = ntrans + 4, n_end = ntrans + 35; !iswordbreak( *Word ) &&
+ n < n_end; Word++) {
+ if (isalpha((unsigned char)*Word))
+ *n++ = TOUPPER((unsigned char)*Word);
+ }
+ Metaph = buf;
+ *Metaph = '\0';
+ if (n == ntrans + 4) {
+ return( ch_strdup( buf ) ); /* Return if null */
+ }
+ n_end = n; /* Set n_end to end of string */
+
+ /* ntrans[0] will always be == 0 */
+ ntrans[0] = '\0';
+ ntrans[1] = '\0';
+ ntrans[2] = '\0';
+ ntrans[3] = '\0';
+ *n++ = 0;
+ *n++ = 0;
+ *n++ = 0;
+ *n = 0; /* Pad with nulls */
+ n = ntrans + 4; /* Assign pointer to start */
+
+ /* Check for PN, KN, GN, AE, WR, WH, and X at start */
+ switch (*n) {
+ case 'P':
+ case 'K':
+ case 'G':
+ /* 'PN', 'KN', 'GN' becomes 'N' */
+ if (*(n + 1) == 'N')
+ *n++ = 0;
+ break;
+ case 'A':
+ /* 'AE' becomes 'E' */
+ if (*(n + 1) == 'E')
+ *n++ = 0;
+ break;
+ case 'W':
+ /* 'WR' becomes 'R', and 'WH' to 'H' */
+ if (*(n + 1) == 'R')
+ *n++ = 0;
+ else if (*(n + 1) == 'H') {
+ *(n + 1) = *n;
+ *n++ = 0;
+ }
+ break;
+ case 'X':
+ /* 'X' becomes 'S' */
+ *n = 'S';
+ break;
+ }
+
+ /*
+ * Now, loop step through string, stopping at end of string or when
+ * the computed 'metaph' is MAXPHONEMELEN characters long
+ */
+
+ KSflag = 0; /* state flag for KS translation */
+ for (metaph_end = Metaph + MAXPHONEMELEN, n_start = n;
+ n <= n_end && Metaph < metaph_end; n++) {
+ if (KSflag) {
+ KSflag = 0;
+ *Metaph++ = 'S';
+ } else {
+ /* Drop duplicates except for CC */
+ if (*(n - 1) == *n && *n != 'C')
+ continue;
+ /* Check for F J L M N R or first letter vowel */
+ if (same(*n) || (n == n_start && vowel(*n)))
+ *Metaph++ = *n;
+ else
+ switch (*n) {
+ case 'B':
+
+ /*
+ * B unless in -MB
+ */
+ if (n == (n_end - 1) && *(n - 1) != 'M')
+ *Metaph++ = *n;
+ break;
+ case 'C':
+
+ /*
+ * X if in -CIA-, -CH- else S if in
+ * -CI-, -CE-, -CY- else dropped if
+ * in -SCI-, -SCE-, -SCY- else K
+ */
+ if (*(n - 1) != 'S' || !frontv(*(n + 1))) {
+ if (*(n + 1) == 'I' && *(n + 2) == 'A')
+ *Metaph++ = 'X';
+ else if (frontv(*(n + 1)))
+ *Metaph++ = 'S';
+ else if (*(n + 1) == 'H')
+ *Metaph++ = ((n == n_start && !vowel(*(n + 2)))
+ || *(n - 1) == 'S')
+ ? (char) 'K' : (char) 'X';
+ else
+ *Metaph++ = 'K';
+ }
+ break;
+ case 'D':
+
+ /*
+ * J if in DGE or DGI or DGY else T
+ */
+ *Metaph++ = (*(n + 1) == 'G' && frontv(*(n + 2)))
+ ? (char) 'J' : (char) 'T';
+ break;
+ case 'G':
+
+ /*
+ * F if in -GH and not B--GH, D--GH,
+ * -H--GH, -H---GH else dropped if
+ * -GNED, -GN, -DGE-, -DGI-, -DGY-
+ * else J if in -GE-, -GI-, -GY- and
+ * not GG else K
+ */
+ if ((*(n + 1) != 'J' || vowel(*(n + 2))) &&
+ (*(n + 1) != 'N' || ((n + 1) < n_end &&
+ (*(n + 2) != 'E' || *(n + 3) != 'D'))) &&
+ (*(n - 1) != 'D' || !frontv(*(n + 1))))
+ *Metaph++ = (frontv(*(n + 1)) &&
+ *(n + 2) != 'G') ? (char) 'G' : (char) 'K';
+ else if (*(n + 1) == 'H' && !noghf(*(n - 3)) &&
+ *(n - 4) != 'H')
+ *Metaph++ = 'F';
+ break;
+ case 'H':
+
+ /*
+ * H if before a vowel and not after
+ * C, G, P, S, T else dropped
+ */
+ if (!varson(*(n - 1)) && (!vowel(*(n - 1)) ||
+ vowel(*(n + 1))))
+ *Metaph++ = 'H';
+ break;
+ case 'K':
+
+ /*
+ * dropped if after C else K
+ */
+ if (*(n - 1) != 'C')
+ *Metaph++ = 'K';
+ break;
+ case 'P':
+
+ /*
+ * F if before H, else P
+ */
+ *Metaph++ = *(n + 1) == 'H' ?
+ (char) 'F' : (char) 'P';
+ break;
+ case 'Q':
+
+ /*
+ * K
+ */
+ *Metaph++ = 'K';
+ break;
+ case 'S':
+
+ /*
+ * X in -SH-, -SIO- or -SIA- else S
+ */
+ *Metaph++ = (*(n + 1) == 'H' ||
+ (*(n + 1) == 'I' && (*(n + 2) == 'O' ||
+ *(n + 2) == 'A')))
+ ? (char) 'X' : (char) 'S';
+ break;
+ case 'T':
+
+ /*
+ * X in -TIA- or -TIO- else 0 (zero)
+ * before H else dropped if in -TCH-
+ * else T
+ */
+ if (*(n + 1) == 'I' && (*(n + 2) == 'O' ||
+ *(n + 2) == 'A'))
+ *Metaph++ = 'X';
+ else if (*(n + 1) == 'H')
+ *Metaph++ = '0';
+ else if (*(n + 1) != 'C' || *(n + 2) != 'H')
+ *Metaph++ = 'T';
+ break;
+ case 'V':
+
+ /*
+ * F
+ */
+ *Metaph++ = 'F';
+ break;
+ case 'W':
+
+ /*
+ * W after a vowel, else dropped
+ */
+ case 'Y':
+
+ /*
+ * Y unless followed by a vowel
+ */
+ if (vowel(*(n + 1)))
+ *Metaph++ = *n;
+ break;
+ case 'X':
+
+ /*
+ * KS
+ */
+ if (n == n_start)
+ *Metaph++ = 'S';
+ else {
+ *Metaph++ = 'K'; /* Insert K, then S */
+ KSflag = 1;
+ }
+ break;
+ case 'Z':
+
+ /*
+ * S
+ */
+ *Metaph++ = 'S';
+ break;
+ }
+ }
+ }
+
+ *Metaph = 0; /* Null terminate */
+ return( ch_strdup( buf ) );
+}
+
+#endif /* SLAPD_METAPHONE */