#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <ctype.h>
#include <limits.h>
#include "uniformats.h"

#define steq(x,y) (!strcmp((x),(y)))
#define NELEM(arr) ((int)(sizeof arr / sizeof *arr))

static const char *Argv0;

static int GobbleHeader = 1;
static int SpewHeader = 0;
static int ConvertCRLFPairs = 1;

void do_error(const char *fmat, ...);
void do_help(int man);


struct Format {
    const char *code;
    int (*get)(unsigned long *, FILE *);
    int (*put)(unsigned long, FILE *);
    int (*gobble)(FILE *);
    int (*spew)(FILE *);
} fmt_table[] = {
    { "Aa",  getANSI,  putANSI,  0,          0 },
    { "Oo",  getOmega, putOmega, 0,          0 },
    { "Uu2", getUCS2,  putUCS2,  gobbleUCS2, spewUCS2 },
    { "4",   getUCS4,  putUCS4,  gobbleUCS4, spewUCS4 },
    { "Ff",  getUTF8,  putUTF8,  gobbleUTF8, spewUTF8 },
};


int main(int argc, char *argv[])
{
    int i, j;
    char *InputFilename = NULL;
    char *OutputFilename = NULL;
    FILE *infp, *outfp;
    unsigned long ucp;
    struct Format *infmt = &fmt_table[0];
    struct Format *outfmt = &fmt_table[0];

    Argv0 = argv[0];

    for (i=1; i < argc; i++)
    {
        if (argv[i][0] != '-') break;
        if (argv[i][1] == '\0') break;

        if (steq(argv[i]+1, "-")) { ++i; break; }
        else if (steq(argv[i]+1, "?")) do_help(0);
        else if (steq(argv[i]+1, "-help")) do_help(0);
        else if (steq(argv[i]+1, "-man")) do_help(1);
        else if (steq(argv[i]+1, "o") || steq(argv[i]+1, "O")) {
            if (i >= argc-1)
              do_error("I need a filename with '%s'!\n", argv[i]);
            OutputFilename = argv[++i];
        }
        else if (argv[i][1] == '-'
              && argv[i][3] == '2'
              && argv[i][5] == '\0')
        {
            for (j=0; j < NELEM(fmt_table); ++j) {
                if (strchr(fmt_table[j].code, argv[i][2]) != NULL) {
                    infmt = &fmt_table[j];
                    break;
                }
            }
            if (j == NELEM(fmt_table))
              do_error("Unrecognized input format '%c'\n", argv[i][2]);

            for (j=0; j < NELEM(fmt_table); ++j) {
                if (strchr(fmt_table[j].code, argv[i][4]) != NULL) {
                    outfmt = &fmt_table[j];
                    break;
                }
            }
            if (j == NELEM(fmt_table))
              do_error("Unrecognized output format '%c'\n", argv[i][4]);
        }
        else {
            for (j=1; argv[i][j]; ++j) {
                switch (argv[i][j]) {
                    case 'B': ConvertCRLFPairs = 1; break;
                    case 'b': ConvertCRLFPairs = 0; break;
                    case 'G': GobbleHeader = 1; break;
                    case 'g': GobbleHeader = 0; break;
                    case 'S': SpewHeader = 1; break;
                    case 's': SpewHeader = 0; break;
                    default: do_error("Unrecognized option(s) %s\n",
                                 argv[i]);
                }
            }
        }
    }

    if (InputFilename == NULL && i < argc) {
        InputFilename = argv[i++];
    }
    if (OutputFilename == NULL && i < argc) {
        OutputFilename = argv[i++];
    }
    if (i != argc)
      do_error("Extra arguments at end of command line.\n");

    if (InputFilename) {
        infp = fopen(InputFilename, "rb");
        if (infp == NULL)
          do_error("Can't open input file '%s'!\n", InputFilename);
    }
    else {
        infp = stdin;
    }

    if (OutputFilename) {
        outfp = fopen(OutputFilename, "wb");
        if (outfp == NULL)
          do_error("Can't open output file '%s'!\n", OutputFilename);
    }
    else {
        outfp = stdout;
    }

    if (GobbleHeader && infmt->gobble)
      infmt->gobble(infp);
    if (SpewHeader && outfmt->spew)
      outfmt->spew(outfp);

    while ((j = infmt->get(&ucp, infp)) > 0) {
        /* Handle some brokenness in popular converters */
        if ((j == 0x0D0A || j == 0x0A0D) && ConvertCRLFPairs) {
            outfmt->put(0x0D, outfp);
            outfmt->put(0x0A, outfp);
        }
        else {
            outfmt->put(ucp, outfp);
        }
    }

    if (j != EOF) {
#define I(s) ((InputFilename != NULL)? s: "")
        fprintf(stderr, "Invalid code found in input file%s%s%s; output"
                " file unfinished\n", I(" '"), I(InputFilename), I("'"));
#undef I
    }

    fclose(infp);
    fclose(outfp);
    return 0;
}


void do_error(const char *fmat, ...)
{
    va_list ap;
    printf("%s: ", Argv0);
    va_start(ap, fmat);
    vprintf(fmat, ap);
    va_end(ap);
    exit(EXIT_FAILURE);
}


void do_help(int man)
{
    if (man) goto man;
    puts("unitrans [-?] [-BbGgSs] [--x2y] [input] [output]");
    puts("Translates between incompatible Unicode encodings.");
    puts("  --x2y (e.g., --f2o, --42a): specify input and output formats");
    puts("  -B[b]: turn on [off] CR/LF fixing");
    puts("  -G[g]: turn on [off] input header gobbling");
    puts("  -S[s]: turn on [off] output header spewing");
    exit(0);
  man:
    puts("unitrans [-?] [-GgSs] [--x2y] [input] [output]");
    puts(" Translates a file between Unicode codings,");
    puts("   according to the value of the --x2y parameter.");
    puts(" x,y can be any combination of the following:");
    puts("   A,a:   Plain ANSI encoding, replacing multibyte");
    puts("          Unicode values with '?' markers");
    puts("   F,f:   UTF-8 variable-length encoding");
    puts("   O,o:   Omega-style encoding: ASCII characters");
    puts("          with ^^^^6a7e to indicate 32-bit values.");
    puts("          Excessive carets in the input stream will");
    puts("          confuse this one; use with caution!");
    puts("   U,u,2: UCS-2 16-bit encoding");
    puts("   4:     UCS-4 32-bit encoding");
    puts("");
    puts(" The -G option tells the program to gobble up any header");
    puts("   in the file; for example, the sequence of bytes FE FF");
    puts("   at the start of a UCS-2 encoded file.  If no header is");
    puts("   detected, nothing is gobbled, and the transcription");
    puts("   proceeds.  This function will probably be confused by");
    puts("   header-less UCS-4 files; use with caution!");
    puts(" This option is turned on by default; use -g to turn it off.");
    puts("");
    puts(" The -S option tells the program to spew out an appropriate");
    puts("   header before beginning to write the output file; for");
    puts("   example, the sequence of bytes FE FF at the start of a"); 
    puts("   UCS-2 encoded file.");
    puts(" The -s option turns off header-spewing (the default).");
    puts("");
    puts(" The -B option tells the program to replace instances of the");
    puts("   Unicode values U+0D0A and U+0A0D with the two-code sequence");
    puts("   U+0D U+0A.  This is useful because some programs that"); 
    puts("   generate Unicode output are broken like that.");
    puts(" The Unicode glyphs U+0D0A and U+0A0D are a Malayam glyph and");
    puts("   an invalid entry in the Gurmukhi code page, respectively.");
    puts("   You should not have any reason to turn off this behavior"); 
    puts("   unless you are using one of those alphabets in your text.");
    puts(" This option is turned on by default; use -b to turn it off.");
    exit(0);
}