/* 'encode', a program for UTF8-encoding APL programs. Based on work by Jim Weigang. Arthur O'Dwyer, 2004. This source code is in the public domain. */ #include #include #include #include #include #include "uniformats.h" #define steq(x,y) (!strcmp((x),(y))) static const char *Argv0; static const char *OutputFilename = NULL; static int UseAPLTag = 0; int process(FILE *in, FILE *out); static int transcribe(const char *buffer, FILE *out); static void brace(const char *buf, int len, FILE *out); static int keyword_eq(const char *text, const char *key, int textlen); void do_error(const char *fmat, ...); void do_help(int man); static struct { char *word; unsigned long int chr; } braced_keywords[] = { {"leftbrace", '{'}, {"rightbrace", '}'}, {"scan", '\\'}, {"expand", '\\'}, {"reduce", '/'}, {"compress", '/'}, {"replicate", '/'}, {"ravel", ','}, {"catenate", ','}, {"laminate", ','}, {"roll", '?'}, {"deal", '?'}, {"diaeresis", 0x00A8}, {"each", 0x00A8}, {"\"", 0x00A8}, {"times", 0x00D7}, {"signum", 0x00D7}, {"x", 0x00D7}, {"reciprocal", 0x00F7}, {"divide", 0x00F7}, {"%", 0x00F7}, {"left-arrow", 0x2190}, {"gets", 0x2190}, {"assign", 0x2190}, {"up-arrow", 0x2191}, {"first", 0x2191}, {"take", 0x2191}, {"right-arrow", 0x2192}, {"branch", 0x2192}, {"goto", 0x2192}, {"down-arrow", 0x2193}, {"drop", 0x2193}, {"delta", 0x2206}, {"/\\", 0x2206}, {"del", 0x2207}, {"nabla", 0x2207}, {"epsilon", 0x2208}, {"member-of", 0x2208}, {"member", 0x2208}, {"enlist", 0x2208}, {"type", 0x2208}, {"negative", 0x2212}, {"minus", 0x2212}, {"-", 0x2212}, {"jot", 0x2218}, {"ring", 0x2218}, {"o", 0x2218}, {"nazg", 0x2218}, {"stile", 0x2223}, {"abs", 0x2223}, {"residue", 0x2223}, {"up-caret", 0x2227}, {"and", 0x2227}, {"caret", 0x2227}, {"down-caret", 0x2228}, {"or", 0x2228}, {"inverted-caret", 0x2228}, {"intersection", 0x2229}, {"intersect", 0x2229}, {"up-shoe", 0x2229}, {"union", 0x222A}, {"down-shoe", 0x222A}, {"diaresis-dot", 0x2235}, {"paw", 0x2235}, {"not-equal", 0x2260}, {"/=", 0x2260}, {"match", 0x2261}, {"depth", 0x2261}, {"=_", 0x2261}, {"not-match", 0x2262}, {"/match", 0x2262}, {"/=_", 0x2262}, {"<=", 0x2264}, {"less-than-equal", 0x2264}, {">=", 0x2265}, {"greater-than-equal", 0x2265}, {"left-shoe", 0x2282}, {"enclose", 0x2282}, {"partition", 0x2282}, {"right-shoe", 0x2283}, {"disclose", 0x2283}, {"pick", 0x2283}, {"circle-dash", 0x2296}, {"rotate", 0x2296}, {"O-", 0x2296}, {"right-tack", 0x22A2}, {"lev", 0x22A2}, {"|-", 0x22A2}, {"left-tack", 0x22A3}, {"dex", 0x22A3}, {"-|", 0x22A3}, {"down-tack", 0x22A4}, {"represent", 0x22A4}, {"T", 0x22A4}, {"encode", 0x22A4}, {"up-tack", 0x22A5}, {"base", 0x22A5}, {"decode", 0x22A5}, {"and-overbar", 0x22BC}, {"nand", 0x22BC}, {"~^", 0x22BC}, {"or-overbar", 0x22BD}, {"nor", 0x22BD}, {"~v", 0x22BD}, {"star", 0x22C6}, {"pow", 0x22C6}, {"exp", 0x22C6}, {"ceiling", 0x2308}, {"greater-of", 0x2308}, {"max", 0x2308}, {"floor", 0x230A}, {"lesser-of", 0x230A}, {"min", 0x230A}, {"I-beam", 0x2336}, {"I", 0x2336}, {"squish-quad", 0x2337}, {"squad", 0x2337}, {"index", 0x2337}, {"quad-equal", 0x2338}, {"#=", 0x2338}, {"quad-divide", 0x2339}, {"#%", 0x2339}, {"mat-inverse", 0x2339}, {"mat-divide", 0x2339}, {"domino", 0x2339}, {"quad-diamond", 0x233A}, {"#&", 0x233A}, {"quad-jot", 0x233B}, {"#o", 0x233B}, {"quad-circle", 0x233C}, {"#O", 0x233C}, {"circle-stile", 0x233D}, {"reverse", 0x233D}, {"rotate", 0x233D}, {"circle-jot", 0x233E}, {"slash-bar", 0x233F}, {"compress1", 0x233F}, {"reduce1", 0x233F}, {"replicate1", 0x233F}, {"slope-bar", 0x2340}, {"expand1", 0x2340}, {"scan1", 0x2340}, {"backslash-bar", 0x2340}, {"quad-slash", 0x2341}, {"#/", 0x2341}, {"quad-backslash", 0x2342}, {"#\\", 0x2342}, {"quad-less-than", 0x2343}, {"#<", 0x2343}, {"quad<", 0x2343}, {"quad-greater-than", 0x2344}, {"#>", 0x2344}, {"quad>", 0x2344}, {"leftwards-vane", 0x2345}, {"left-vane", 0x2345}, {"rightwards-vane", 0x2346}, {"right-vane", 0x2346}, {"quad<-", 0x2347}, {"quad-left-arrow", 0x2347}, {"quad->", 0x2348}, {"quad-right-arrow", 0x2348}, {"circle-backslash", 0x2349}, {"transpose", 0x2349}, {"O\\", 0x2349"}, {"uptack-underbar", 0x234A}, {"delta-stile", 0x234B}, {"/\|", 0x234B}, {"grade-up", 0x234B}, {"#v", 0x234C}, {"quad-downcaret", 0x234C}, {"#/\\", 0x234D}, {"quad-delta", 0x234D}, {"uptack-jot", 0x234E}, {"execute", 0x234E}, {"upwards-vane", 0x234F}, {"up-vane", 0x234F}, {"quad-up-arrow", 0x2350}, {"downtack-overbar", 0x2351}, {"del-stile", 0x2352}, {"grade-down", 0x2352}, {"#^", 0x2353}, {"quad-up-caret", 0x2353}, {"#\\/", 0x2354}, {"quad-del", 0x2354}, {"quad-nabla", 0x2354}, {"format", 0x2355}, {"downtack-jot", 0x2355}, {"downwards-vane", 0x2356}, {"down-vane", 0x2356}, {"quad-down-arrow", 0x2357}, {"quote_", 0x2358}, {"quote-underbar", 0x2358}, {"'_", 0x2358}, {"delta_", 0x2359}, {"delta-underbar", 0x2359}, {"/\\_", 0x2359}, {"diamond_", 0x235A}, {"diamond-underbar", 0x235A}, {"&_", 0x235A}, {"jot_", 0x235B}, {"jot-underbar", 0x235B}, {"o_", 0x235B}, {"circle_", 0x235C}, {"circle-underbar", 0x235C}, {"O_", 0x235C}, {"upshoe-jot", 0x235D}, {"lamp", 0x235D}, {"comment", 0x235D}, {"quote-quad", 0x235E}, {"circle-star", 0x235F}, {"log", 0x235F}, {"ln", 0x235F}, {"O*", 0x235F}, {"quad-colon", 0x2360}, {"#:", 0x2360}, {"snout", 0x2361}, {"downtack-diaeresis", 0x2361}, {"frog", 0x2362}, {"del-diaeresis", 0x2362}, {"sourpuss", 0x2363}, {"star-diaeresis", 0x2363}, {"hoot", 0x2364}, {"jot-diaeresis", 0x2364}, {"rank", 0x2364}, {"o\"", 0x2364}, {"holler", 0x2365}, {"circle-diaeresis", 0x2365}, {"O\"", 0x2365}, {"downshoe-stile", 0x2366}, {"leftshoe-stile", 0x2367}, {"smirk", 0x2368}, {"tilde-diaeresis", 0x2368}, {"~\"", 0x2368}, {">\"", 0x2369}, {"greater-than-diaeresis", 0x2369}, {"comma-bar", 0x236A}, {"cat-bar", 0x236A}, {"catenate1", 0x236A}, {"laminate1", 0x236A}, {",-", 0x236A}, {"del-tilde", 0x236B}, {"\\/~", 0x236B}, {"zilde", 0x236C}, {"0~", 0x236C}, {"stile-tilde", 0x236D}, {"|~", 0x236D}, {"semicolon-underbar", 0x236E}, {";_", 0x236E}, {"#/=", 0x236F}, {"quad-not-equal", 0x236F}, {"#?", 0x2370}, {"quad-question", 0x2370}, {"downcaret-tilde", 0x2371}, {"upcaret-tilde", 0x2372}, {"iota", 0x2373}, {"index", 0x2373}, {"index-of", 0x2373}, {"rho", 0x2374}, {"shape", 0x2374}, {"reshape", 0x2374}, {"omega", 0x2375}, {"w", 0x2375}, {"alpha-underbar", 0x2376}, {"alpha_", 0x2376}, {"epsilon-underbar", 0x2377}, {"epsilon_", 0x2377}, {"find", 0x2377}, {"iota_", 0x2378}, {"iota-underbar", 0x2378}, {"omega_", 0x2379}, {"omega-underbar", 0x2379}, {"alpha", 0x237A}, {"quad", 0x25AF}, {"circle", 0x25CB}, {"pi-times", 0x25CB}, {"O", 0x25CB}, }; int main(int argc, char *argv[]) { int i, j; int LiteralInputNames = 0; Argv0 = argv[0]; for (i=1; i < argc; i++) { if (argv[i][0] != '-') break; if (argv[i][1] == '\0') break; if (steq(argv[i]+1, "-")) { LiteralInputNames = 1; ++i; break; } else if (steq(argv[i]+1, "-help") || steq(argv[i]+1, "h") || steq(argv[i]+1, "?")) do_help(0); else if (steq(argv[i]+1, "-man")) do_help(1); else if (steq(argv[i], "-o") || steq(argv[i], "-O")) { if (i >= argc-1) { do_error("Need output filename with -o"); } OutputFilename = argv[++i]; } else { for (j=1; argv[i][j]; ++j) { if (toupper(argv[i][j]) == 'A') UseAPLTag = isupper(argv[i][j]); else do_error("Unrecognized option(s) %s; -h for help", argv[i]); } } } if (i == argc) do_error("No files to process; -h for help"); for (; i < argc; ++i) { FILE *infp; FILE *outfp; if (!LiteralInputNames && steq(argv[i], "--")) { LiteralInputNames = 1; continue; } else { infp = (!LiteralInputNames && steq(argv[i], "-"))? stdin: fopen(argv[i], "r"); } if (infp == NULL) do_error("Error opening file '%s' for input; -h for help", argv[i]); outfp = OutputFilename? fopen(OutputFilename, "w"): stdout; if (outfp == NULL) do_error("Error opening file '%s' for output", OutputFilename); process(infp, outfp); if (infp != stdin) fclose(infp); if (outfp != stdout) fclose(outfp); } return EXIT_SUCCESS; } int process(FILE *in, FILE *out) { char buffer[1000]; int Processing = UseAPLTag? 0: 1; while (fgets(buffer, sizeof buffer, in) != NULL) { if (Processing) { transcribe(buffer, out); } else { fputs(buffer, out); if (strncmp(buffer, "\\begin{apl}", 11)==0) ++Processing; else if (strncmp(buffer, "\\end{apl}", 9)==0) --Processing; if (Processing < 0) do_error("Unmatched '\\end{apl}'"); if (Processing > 10000) do_error("Way too many nested '\\begin{apl}'s!"); } } } static int transcribe(const char *buffer, FILE *out) { int idx; for (idx=0; buffer[idx] != '\0'; ++idx) { switch (buffer[idx]) { case '@': brace("lamp", 4, out); break; case '#': brace("quad", 4, out); break; case '&': brace("diamond", 7, out); break; case '^': brace("and", 3, out); break; case '|': brace("stile", 5, out); break; case '{': { int esclen, j; ++idx; for (esclen=0; !strchr("}", buffer[idx+esclen]); ++esclen) continue; brace(buffer+idx, esclen, out); break; } default: putUTF8(buffer[idx], out); break; } } return 0; } static void brace(const char *buf, int len, FILE *out) { int i; for (i=0; i < NELEM(braced_keywords); ++i) { if (keyword_eq(buf, braced_keywords[i].word, len)) { putUTF8(braced_keywords[i].chr, out); return; } } do_error("Unrecognized keyword {%.*s}", len, buf); } static int keyword_eq(const char *text, const char *key, int textlen) { /* needs work */ return !strncmp(text, key, textlen); } void do_error(const char *fmat, ...) { va_list ap; printf("%s: ", Argv0); va_start(ap, fmat); vprintf(fmat, ap); putchar('\n'); va_end(ap); exit(EXIT_FAILURE); } void do_help(int man) { if (man) goto man; man: puts("encode [-?h] [-o filename] filenames"); puts("UTF-8-encodes ASCIIfied APL source code."); puts(" -o filename: send output to specified file"); puts(" --help: show this message"); puts(" --man: show complete help text"); exit(0); puts("encode: Transcribes ASCIIfied APL sources to UTF-8.\n"); puts(" This program will take as input an APL program encoded"); puts(" according to Jim Weigang's APL-ASCII transliteration"); puts(" and produce as output a UTF-8-encoded version of the"); puts(" program. The procedure is effectively lossless, except"); puts(" that some braced keywords are synonyms for the same APL"); puts(" symbol; e.g. {catenate} and {laminate} both refer to the"); puts(" \"comma\" function."); puts(""); puts(" Please direct any bug reports or feature requests to the" " author."); exit(0); }