/* |haiku-finder|, a program for dealing with syllables and finding random haikus in running text. This program is free for all non-commercial use. Copyright Arthur O'Dwyer, August 2005. */ #include #include #include #include #include #include "sdictlib.h" #define steq(x,y) (!strcmp(x,y)) #define stneq(x,y) (!steq(x,y)) #define is_word(k) ((k) != '\0' && !isspace(k) && !is_terminal(k)) #define is_terminal(k) (ispunct(k) && !strchr(",;'-", (k))) #define is_vowel(k) ((k) != '\0' && strchr("aeiouy", (k))) static char *Argv0; static char *CmuDictFilename = "cmudict.0.6d"; int count_syllables(struct sdict *d, const char *word); int helper_iota(const struct word_entry *p, void *info); int helper_takemean(const struct word_entry *p, void *info); int look_for_haiku(struct sdict *d, FILE *in, FILE *out); int get_token(FILE *in, struct word_entry **e); int scan_to_terminal(FILE *in); int cant_be_haiku(struct word_entry **list, int listlen); void test_and_print_haiku(FILE *out, struct word_entry **list, int listlen); void do_error(const char *fmat, ...); void do_help(int); int main(int argc, char **argv) { int i; struct sdict sdict; Argv0 = argv[0]; if (steq(argv[1], "-H") || steq(argv[1], "-h") || steq(argv[1], "--help") || steq(argv[1], "-?")) do_help(0); else if (steq(argv[1], "--man")) do_help(1); sdict_init(&sdict); if (sdict_load(&sdict, CmuDictFilename)) fprintf(stderr, "I couldn't load my phonetic dictionary!\n"); sdict_sort(&sdict); if (argc == 2 && strchr(argv[1], '.')) { FILE *in = fopen(argv[1], "r"); if (in != NULL) { look_for_haiku(&sdict, in, stdout); fclose(in); } else { sdict_free(&sdict); do_error("I can't open file '%s' for reading!", argv[1]); } } else { for (i=1; i < argc; ++i) { int s = count_syllables(&sdict, argv[i]); if (s <= 0) printf("The word \"%s\" was not in my dictionary.\n", argv[i]); else printf("The word \"%s\" has %d syllables.\n", argv[i], s); } } sdict_free(&sdict); return 0; } int count_syllables(struct sdict *d, const char *word) { char *buf = malloc(strlen(word)+1); const struct word_entry *w = NULL; int i, j; if (buf == NULL) do_error("Out of memory!"); for (i=j=0; word[i]; ++i) { if (isalpha(word[i])) buf[j++] = tolower(word[i]); } buf[j++] = '\0'; sdict_bfind(d, buf, helper_iota, &w); if (w != NULL) { free(buf); return w->count; } else { /* Return the average number of syllables of words with this vowel-consonant pattern. */ int total; int count; int leave_es = 1; int leave_les = 1; int leave_ys = 1; try_average: for (i=0; buf[i] != '\0'; ++i) { if (leave_les && buf[i]=='l' && (buf[i+1]=='e' || buf[i+1]=='y')) { /* leave the 'le' or 'ly' alone */ i += 1; } else if (leave_es && buf[i]=='e') { /* leave the 'e' alone */ } else if (leave_ys && buf[i]=='y') { /* leave the 'y' alone */ } else if (isalpha(buf[i])) { buf[i] = (is_vowel(buf[i])? '0': '1'); } } total = 0; count = sdict_find(d, buf, helper_takemean, &total); if (count != 0) { free(buf); return (total+(count/2))/count; } /* Whoops --- no matches! Recover. */ if (leave_les) { leave_les = 0; goto try_average; } if (leave_ys) { leave_ys = 0; leave_les = 1; goto try_average; } if (leave_es) { leave_es = 0; leave_ys = 1; leave_les = 1; goto try_average; } total = strlen(buf); free(buf); /* Treat the word as an acronym. */ return (total > 1)? total: 1; } } int helper_iota(const struct word_entry *p, void *info) { *(const struct word_entry **)info = p; return 1; } int helper_takemean(const struct word_entry *p, void *info) { *(int *)info += p->count; return 0; } /* Look for haiku, where a haiku is defined as any sentence fitting the 5-7-5 syllable pattern. Spaces delimit words; punctuation of any kind except apostrophes delimits sentences. */ int look_for_haiku(struct sdict *d, FILE *in, FILE *out) { struct word_entry *list[19] = {0}; int listlen = 0; struct word_entry *e; while (get_token(in, &e) != EOF) { if (e == NULL) { /* Hit a sentence boundary. */ test_and_print_haiku(out, list, listlen); while (listlen > 0) { free(list[--listlen]->word); free(list[listlen]); } } else { /* e contains a word. */ e->count = count_syllables(d, e->word); list[listlen++] = e; if (listlen > 2 && cant_be_haiku(list, listlen)) { while (listlen > 0) { free(list[--listlen]->word); free(list[listlen]); } /* Scan to the end of this sentence. */ if (scan_to_terminal(in) == EOF) break; } } } done_processing: while (listlen > 0) { free(list[--listlen]->word); free(list[listlen]); } return 0; } /* If the current list is a haiku, print it! */ void test_and_print_haiku(FILE *out, struct word_entry **list, int listlen) { int total = 0; int first = 0, second = 0; int i; for (i=0; i < listlen; ++i) { total += list[i]->count; if (total == 5) first = 1; else if (total > 5 && !first) return; else if (total == 12) second = 1; else if (total > 12 && !second) return; } if (total == 17 && first && second) { /* The haiku is valid. So print it. */ total = 0; for (i=0; i < listlen; ++i) { total += list[i]->count; fprintf(out, "%s", list[i]->word); if (total != 5 && total != 12 && total != 17) fprintf(out, " "); else fprintf(out, "\n"); } fprintf(out, "\n"); } } /* Return 1 iff the current list can't possibly be a prefix of a haiku. */ int cant_be_haiku(struct word_entry **list, int listlen) { int total = 0; int first = 0, second = 0; int i; for (i=0; i < listlen; ++i) { total += list[i]->count; if (total == 5) first = 1; else if (total > 5 && !first) return 1; else if (total == 12) second = 1; else if (total > 12 && !second) return 1; else if (total > 17) return 1; } /* Sanity check. */ if (listlen > 17) return 1; return 0; } /* Lookahead character used by |get_token| and |scan_to_terminal| */ static int global_k = '\0'; int get_token(FILE *in, struct word_entry **e) { #define k global_k if (k == EOF) { return EOF; } else if (is_terminal(k)) { *e = NULL; k = '\0'; return 0; } else { /* Look for a letter or some punctuation in the file. */ while (!is_word(k) && !is_terminal(k) && k != EOF) k = getc(in); if (k == EOF) { return EOF; } else if (is_terminal(k)) { k = '\0'; *e = NULL; return 0; } else { /* We found a word. Record it. */ int cap = 16, len = 0; *e = malloc(sizeof **e); (*e)->word = malloc(cap); (*e)->word[len++] = k; while ((k = getc(in)) != EOF && is_word(k)) { (*e)->word[len++] = k; if (len >= cap) { cap += 8; (*e)->word = realloc((*e)->word, cap); if ((*e)->word == NULL) do_error("Out of memory!"); } } (*e)->word[len++] = '\0'; return 0; } } #undef k } int scan_to_terminal(FILE *in) { #define k global_k while (!is_terminal(k)) { k = getc(in); if (k == EOF) return EOF; } return 0; #undef k } void do_error(const char *fmat, ...) { va_list ap; printf("%s: ", Argv0); va_start(ap, fmat); vprintf(fmat, ap); printf("\n"); va_end(ap); exit(EXIT_FAILURE); } void do_help(int man) { if (man) goto man; puts("haiku-finder [-?h] word [word word ...]"); puts("haiku-finder [-?h] input.filename"); puts("Counts syllables, or finds random haikus in running text."); puts(" input.filename: find haikus in given file"); puts(" word [word ...]: show syllable count for given word(s)"); puts(" --help: show this message"); puts(" --man: show complete help text"); exit(0); man: puts("haiku-finder: Syllable counter and random haiku finder.\n"); puts(" This program uses the 'cmudict.0.6d' phonetic dictionary"); puts(" (available at ftp://ftp.cs.cmu.edu/afs/cs.cmu.edu/data/"); puts(" anonftp/project/fgdata/dict/) to find random haikus in"); puts(" running text. Haikus are expected to be delimited by"); puts(" non-hyphen punctuation, and conform to the 5-7-5 pattern."); puts(" If a word is not in the provided phonetic dictionary,"); puts(" this program will try several heuristics before giving"); puts(" up on the word entirely. For example, it knows that the"); puts(" word \"lavage\" has two syllables, by comparing its"); puts(" vowel-consonant structure to known words such as \"savage\""); puts(" and \"butane\"."); puts(" This program is free for all non-commercial use."); puts(" (C) Arthur O'Dwyer, August 2005."); exit(0); }