/*
   This library is released to the public domain, August 2005.
   Originally written by Arthur O'Dwyer.
*/

#include <ctype.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sdictlib.h"

#define is_consonant(k) (strchr("bcdfghjklmnpqrstvwxyz",k) != NULL)
#define is_vowel(k) (strchr("aeiouy",k) != NULL)


void sdict_init(struct sdict *d)
{
    d->words = NULL;
    d->len = d->cap = 0;
}


/*
   The |sdict| data is stored to disk as a single gigantic text file, 
   containing all the words in the dictionary in capital letters, 
   followed by their syllable breakdown, and separated by newlines.
*/
int sdict_load(struct sdict *d, const char *fname)
{
    FILE *in = fopen(fname, "r");
    char buffer[1000];

    if (in == NULL)  return -1;

    while (fgets(buffer, sizeof buffer, in) != NULL)
    {
        int syllcount;
        int i, j;

        /* Skip comments and things like "DOUBLE-QUOTE */
        if (!isalpha(buffer[0])) continue;
        /* Note that SAUSAGE and SAUSAGE(2) hash to the same key */
        for (i=j=0; buffer[i] && !isspace(buffer[i]); ++i) {
            if (isalpha(buffer[i]))
              buffer[j++] = tolower(buffer[i]);
        }
        buffer[j++] = '\0';
        ++i;
        for (syllcount=0; buffer[i]; ++i) {
            if (isdigit(buffer[i])) ++syllcount;
        }
        sdict_addword(d, buffer, syllcount);
    }

    fclose(in);
    return 0;
}
    

static int sdict_sortcmp(const void *p, const void *q)
{
    const struct word_entry *wp = p;
    const struct word_entry *wq = q;
    return strcmp(wp->word, wq->word);
}

void sdict_sort(struct sdict *d)
{
    size_t i, n = d->len;
    struct word_entry *w = d->words;
    qsort(w, n, sizeof *w, sdict_sortcmp);
    /* Remove duplicates. */
    for (i=n; i > 1; --i) {
        if (strcmp(w[i-1].word, w[i-2].word) == 0) {
            if (i < --n) {
                free(w[i-1].word);
                memcpy(&w[i-1], &w[n], sizeof *w);
            }
            else free(w[n].word);
        }
    }
    if (n < d->len) {
        d->len = n;
        qsort(w, n, sizeof *w, sdict_sortcmp);
    }
}


void sdict_free(struct sdict *d)
{
    size_t i;
    for (i=0; i < d->len; ++i)
      free(d->words[i].word);
    free(d->words);
}


int sdict_addword(struct sdict *d, const char *word, int count)
{
    char *tmp;
    if (d->len >= d->cap) {
        size_t newcap = d->cap * 2 + 15;
        void *t = realloc(d->words, newcap * sizeof *d->words);
        if (t == NULL) return -3;
        d->words = t;
        d->cap = newcap;
    }
    tmp = malloc(strlen(word)+1);
    if (tmp == NULL) return -4;
    strcpy(tmp, word);
    d->words[d->len].word = tmp;
    d->words[d->len].count = count;
    d->len += 1;
    return 0;
}


int sdict_match_simple(const char *w, const char *p)
{
    int i;
    for (i=0; p[i]; ++i) {
        if (w[i] == '\0') return 0;
        else if (p[i] == '1') { if (!is_consonant(w[i]))  return 0; }
        else if (p[i] == '0') { if (!is_vowel(w[i]))  return 0; }
        else if (p[i] != '?') { if (p[i] != w[i])  return 0; }
    }
    return (w[i] == '\0');
}

/* Equivalent to the common |strcmpi| routine. */
int sdict_strcmp(const char *s, const char *t)
{
    int i;
    for (i=0; s[i] && t[i]; ++i) {
        int d = tolower(s[i]) - tolower(t[i]);
        if (d != 0) return d;
    }
    return tolower(s[i]) - tolower(t[i]);
}


int sdict_find(struct sdict *d, const char *pattern,
               int (*f)(const struct word_entry *, void *), void *info)
{
    struct word_entry *w = d->words;
    size_t i;
    int count=0;
    for (i=0; i < d->len; ++i) {
        if (sdict_match_simple(w[i].word, pattern)) {
            ++count;
            if (f && f(&w[i], info)) return count;
        }
    }
    return count;
}

int sdict_bfind(struct sdict *d, const char *pattern,
                int (*f)(const struct word_entry *, void *), void *info)
{
    struct word_entry *w = d->words;
    size_t lower = 0, upper = d->len;
    size_t middle;
    while (1) {
        middle = (upper+lower) / 2;
        int cmp = sdict_strcmp(w[middle].word, pattern);
        if (cmp < 0) lower = middle+1;
        else if (cmp > 0) upper = middle;
        else break;
        if (upper <= lower) return 0;
    }

    /* We have a match */
    return (f && f(&w[middle], info));
}