/* This library is released to the public domain, August 2005. Originally written by Arthur O'Dwyer. */ #include #include #include #include #include #include "sdictlib.h" #define is_consonant(k) (strchr("bcdfghjklmnpqrstvwxyz",k) != NULL) #define is_vowel(k) (strchr("aeiouy",k) != NULL) void sdict_init(struct sdict *d) { d->words = NULL; d->len = d->cap = 0; } /* The |sdict| data is stored to disk as a single gigantic text file, containing all the words in the dictionary in capital letters, followed by their syllable breakdown, and separated by newlines. */ int sdict_load(struct sdict *d, const char *fname) { FILE *in = fopen(fname, "r"); char buffer[1000]; if (in == NULL) return -1; while (fgets(buffer, sizeof buffer, in) != NULL) { int syllcount; int i, j; /* Skip comments and things like "DOUBLE-QUOTE */ if (!isalpha(buffer[0])) continue; /* Note that SAUSAGE and SAUSAGE(2) hash to the same key */ for (i=j=0; buffer[i] && !isspace(buffer[i]); ++i) { if (isalpha(buffer[i])) buffer[j++] = tolower(buffer[i]); } buffer[j++] = '\0'; ++i; for (syllcount=0; buffer[i]; ++i) { if (isdigit(buffer[i])) ++syllcount; } sdict_addword(d, buffer, syllcount); } fclose(in); return 0; } static int sdict_sortcmp(const void *p, const void *q) { const struct word_entry *wp = p; const struct word_entry *wq = q; return strcmp(wp->word, wq->word); } void sdict_sort(struct sdict *d) { size_t i, n = d->len; struct word_entry *w = d->words; qsort(w, n, sizeof *w, sdict_sortcmp); /* Remove duplicates. */ for (i=n; i > 1; --i) { if (strcmp(w[i-1].word, w[i-2].word) == 0) { if (i < --n) { free(w[i-1].word); memcpy(&w[i-1], &w[n], sizeof *w); } else free(w[n].word); } } if (n < d->len) { d->len = n; qsort(w, n, sizeof *w, sdict_sortcmp); } } void sdict_free(struct sdict *d) { size_t i; for (i=0; i < d->len; ++i) free(d->words[i].word); free(d->words); } int sdict_addword(struct sdict *d, const char *word, int count) { char *tmp; if (d->len >= d->cap) { size_t newcap = d->cap * 2 + 15; void *t = realloc(d->words, newcap * sizeof *d->words); if (t == NULL) return -3; d->words = t; d->cap = newcap; } tmp = malloc(strlen(word)+1); if (tmp == NULL) return -4; strcpy(tmp, word); d->words[d->len].word = tmp; d->words[d->len].count = count; d->len += 1; return 0; } int sdict_match_simple(const char *w, const char *p) { int i; for (i=0; p[i]; ++i) { if (w[i] == '\0') return 0; else if (p[i] == '1') { if (!is_consonant(w[i])) return 0; } else if (p[i] == '0') { if (!is_vowel(w[i])) return 0; } else if (p[i] != '?') { if (p[i] != w[i]) return 0; } } return (w[i] == '\0'); } /* Equivalent to the common |strcmpi| routine. */ int sdict_strcmp(const char *s, const char *t) { int i; for (i=0; s[i] && t[i]; ++i) { int d = tolower(s[i]) - tolower(t[i]); if (d != 0) return d; } return tolower(s[i]) - tolower(t[i]); } int sdict_find(struct sdict *d, const char *pattern, int (*f)(const struct word_entry *, void *), void *info) { struct word_entry *w = d->words; size_t i; int count=0; for (i=0; i < d->len; ++i) { if (sdict_match_simple(w[i].word, pattern)) { ++count; if (f && f(&w[i], info)) return count; } } return count; } int sdict_bfind(struct sdict *d, const char *pattern, int (*f)(const struct word_entry *, void *), void *info) { struct word_entry *w = d->words; size_t lower = 0, upper = d->len; size_t middle; while (1) { middle = (upper+lower) / 2; int cmp = sdict_strcmp(w[middle].word, pattern); if (cmp < 0) lower = middle+1; else if (cmp > 0) upper = middle; else break; if (upper <= lower) return 0; } /* We have a match */ return (f && f(&w[middle], info)); }