/* 'usenetify2', a program for making C and C++ programs suitable for printing, emailing, and posting. Arthur O'Dwyer, 2004. This source code is in the public domain. */ #include #include #include #include #include #include #define steq(x,y) (!strcmp((x),(y))) static const char *Argv0; static int LineLength = 75; static int FudgeMargin = 2; static int EffectiveLineLength; static int TabStop = 8; static int IndentWidth = -1; static int FinishNewline = 0; static const char *OutputFilename = NULL; int process(FILE *in, FILE *out); unsigned affect(unsigned State, int k); unsigned kbreak(unsigned State, int k); void perform_break(FILE *out, unsigned kind, int *linelen); void dumpc(int k, FILE *out, int *linelen); void do_error(const char *fmat, ...); void do_help(int man); int main(int argc, char *argv[]) { int i, j; int LiteralInputNames = 0; Argv0 = argv[0]; for (i=1; i < argc; i++) { if (argv[i][0] != '-') break; if (argv[i][1] == '\0') break; if (steq(argv[i]+1, "-")) { LiteralInputNames = 1; ++i; break; } else if (steq(argv[i]+1, "-help") || steq(argv[i]+1, "h") || steq(argv[i]+1, "?")) do_help(0); else if (steq(argv[i]+1, "-man")) do_help(1); else if (steq(argv[i], "-o") || steq(argv[i], "-O")) { if (i >= argc-1) { do_error("Need output filename with -o"); } OutputFilename = argv[++i]; } else { for (j=1; argv[i][j]; ++j) { if (argv[i][j] == 'l' || argv[i][j] == 'L') { if (isdigit(argv[i][j+1])) { LineLength = 0; for ( ; isdigit(argv[i][j+1]); ++j) LineLength = 10*LineLength+argv[i][j+1]-'0'; if (LineLength <= FudgeMargin) { do_error("Line length must be greater" " than %d!", FudgeMargin); } } else do_error("Need a line length attached to '-L'"); } else if (argv[i][j] == 'n' || argv[i][j] == 'N') FinishNewline = 1; else if (argv[i][j] == 't' || argv[i][j] == 'T') { if (isdigit(argv[i][j+1])) { TabStop = 0; for ( ; isdigit(argv[i][j+1]); ++j) TabStop = 10*TabStop+argv[i][j+1]-'0'; } else do_error("Need a tabstop attached to '-T'"); } else if (argv[i][j] == 'i' || argv[i][j] == 'I') { if (isdigit(argv[i][j+1])) { IndentWidth = 0; for ( ; isdigit(argv[i][j+1]); ++j) IndentWidth = 10*IndentWidth+argv[i][j+1]-'0'; } else do_error("Need a width attached to '-i'"); } else { do_error("Unrecognized option(s) %s; -h for help", argv[i]); } } } } EffectiveLineLength = LineLength - FudgeMargin; if (TabStop >= EffectiveLineLength) { do_error("Tabstop %d too big for line width %d!", TabStop, LineLength); } if (IndentWidth >= EffectiveLineLength) { do_error("Indentation width %d too big for line width %d!", IndentWidth, LineLength); } if (i == argc) do_error("No files to process; -h for help"); for (; i < argc; ++i) { FILE *infp; FILE *outfp; if (!LiteralInputNames && steq(argv[i], "--")) { LiteralInputNames = 1; continue; } else { infp = (!LiteralInputNames && steq(argv[i], "-"))? stdin: fopen(argv[i], "r"); } outfp = OutputFilename? fopen(OutputFilename, "w"): stdout; if (infp == NULL) do_error("Error opening file '%s' for input; -h for help", argv[i]); if (outfp == NULL) do_error("Error opening file '%s' for output", OutputFilename); process(infp, outfp); if (infp != stdin) fclose(infp); if (outfp != stdout) fclose(outfp); } return EXIT_SUCCESS; } /* State variables */ #define CCOMMENT 0x0001 #define CPPCOMMENT 0x0002 #define QUOTES 0x0004 #define CHOTES 0x0008 #define DIRECTIVE 0x0010 #define SLASH 0x0020 #define STAR 0x0040 #define BACKSLASH 0x0080 #define NEWLINE 0x0100 /* Kinds of break supported */ #define B_BACKSLASH 0x0001 #define B_NOINDENT 0x0002 #define B_INDENT 0x0010 #define B_STRING 0x0004 #define B_CPPCOMMENT 0x0008 /* dumpc "control characters"; must not be valid 'char' values */ #define K_INDENT (CHAR_MAX+1) #define K_OPTNEWLINE (CHAR_MAX+2) int process(FILE *in, FILE *out) { int k; int linelen = 0; unsigned State = NEWLINE; unsigned breakbefore = 0; unsigned breakafter = 0; while ((k=getc(in)) != EOF) { State = affect(State, k); /* Is it legitimate to break the line after k? */ breakafter = kbreak(State, k); if (linelen >= EffectiveLineLength) { if (k == '\n') goto dont_break; if ((breakafter | breakbefore) == 0) goto dont_break; if ((breakbefore & B_STRING) && breakafter && !(breakafter & B_STRING)) goto break_after; if ((breakafter & breakbefore) == breakafter) goto break_before; if (breakafter) goto break_after; if (breakbefore) goto break_before; } else goto dont_break; break_before: perform_break(out, breakbefore, &linelen); dumpc(k, out, &linelen); goto done; break_after: dumpc(k, out, &linelen); perform_break(out, breakafter, &linelen); goto done; dont_break: dumpc(k, out, &linelen); goto done; done: breakbefore = breakafter; } /* Terminating newline, if wanted and necessary */ dumpc(K_OPTNEWLINE, out, &linelen); return 0; } void perform_break(FILE *out, unsigned kind, int *linelen) { if (kind & B_STRING) { dumpc('"', out, linelen); dumpc('\n', out, linelen); dumpc(K_INDENT, out, linelen); dumpc('"', out, linelen); } else if (kind & B_CPPCOMMENT) { dumpc('\n', out, linelen); dumpc(K_INDENT, out, linelen); dumpc('/', out, linelen); dumpc('/', out, linelen); } else if (kind & B_INDENT) { dumpc('\n', out, linelen); dumpc(K_INDENT, out, linelen); } else if (kind & B_NOINDENT) { dumpc('\n', out, linelen); } else if (kind & B_BACKSLASH) { dumpc('\\', out, linelen); dumpc('\n', out, linelen); } else do_error("Unreachable in 'perform_break'!"); } unsigned kbreak(unsigned State, int k) { if (State & DIRECTIVE) { if (State & BACKSLASH) return 0; else return B_BACKSLASH; } else if (State & QUOTES) { if (State & BACKSLASH) { return 0; } else if (k == '"') { return B_BACKSLASH; } else { return (B_BACKSLASH|B_STRING); } } else if (State & CHOTES) { if (State & BACKSLASH) { return 0; } else { return B_BACKSLASH; } } else if (State & CCOMMENT) { if (State & BACKSLASH) { if (State & STAR) return B_NOINDENT; else return (B_NOINDENT|B_INDENT); } else if (State & STAR) { return B_BACKSLASH; } else { return (B_BACKSLASH|B_NOINDENT|B_INDENT); } } else if (State & CPPCOMMENT) { if (State & BACKSLASH) return 0; else return (B_BACKSLASH|B_CPPCOMMENT); } else { /* The thing is a regular piece of code */ if (isspace(k)) { /* It's always safe to break regular code at whitespace boundaries. */ return (B_BACKSLASH|B_NOINDENT|B_INDENT); } else if (strchr(";,{}[]()\"", k)) { /* These characters are not part of any multicharacter tokens, di- or trigraphs in C or C++; thus it's safe to break after them no matter what. */ return (B_BACKSLASH|B_NOINDENT|B_INDENT); } else return B_BACKSLASH; } } unsigned affect(unsigned State, int k) { if (!(State & (CCOMMENT|CPPCOMMENT)) && !isspace(k) && !strchr("#/*\\", k)) { State &= ~NEWLINE; } if (k == '#') { if (State & NEWLINE) { State |= DIRECTIVE; } State &= ~(SLASH|STAR|BACKSLASH|NEWLINE); } else if (k == '\\') { if (State & (QUOTES|CHOTES)) State ^= BACKSLASH; else State |= BACKSLASH; } else if (k == '\n') { if (!(State & BACKSLASH)) { State &= ~(CPPCOMMENT|QUOTES|CHOTES|DIRECTIVE|SLASH|STAR); State |= NEWLINE; } State &= ~BACKSLASH; } else if (k == '/') { if (State & STAR) { State &= ~(CCOMMENT|STAR); } else if (State & SLASH) { State &= ~SLASH; State |= CPPCOMMENT; } else if (!(State & (CCOMMENT|CPPCOMMENT|QUOTES|CHOTES))) { State |= SLASH; } State &= ~BACKSLASH; } else if (k == '*') { if (State & CCOMMENT) { State |= STAR; } else if (State & SLASH) { State &= ~SLASH; State |= CCOMMENT; } State &= ~(SLASH|BACKSLASH); } else if (k == '"') { if (State & QUOTES) { if (!(State & BACKSLASH)) State &= ~QUOTES; } else if (!(State & (CHOTES|CCOMMENT|CPPCOMMENT))) { State |= QUOTES; } State &= ~BACKSLASH; } else if (k == '\'') { if (State & CHOTES) { if (!(State & BACKSLASH)) State &= ~CHOTES; } else if (!(State & (QUOTES|CCOMMENT|CPPCOMMENT))) { State |= CHOTES; } State &= ~BACKSLASH; } else { if (State & (SLASH|BACKSLASH)) { State &= ~NEWLINE; } State &= ~(SLASH|STAR|BACKSLASH); } return State; } void dumpc(int k, FILE *out, int *linelen) { static int spaces_to_output = 0; static int last_was_newline = 0; if (k == K_OPTNEWLINE) { if (FinishNewline && !last_was_newline) { dumpc('\n', out, linelen); k = '\n'; } } else if (k == K_INDENT) { int i; if (IndentWidth < 0) { for (i=0; i < -IndentWidth; ++i) dumpc('\t', out, linelen); } else { for (i=0; i < IndentWidth; ++i) dumpc(' ', out, linelen); } } else if (k == '\n') { putc('\n', out); *linelen = 0; spaces_to_output = 0; } else if (k == '\t') { do { ++spaces_to_output; ++*linelen; } while ((*linelen % TabStop) != 0); } else if (isspace(k)) { ++spaces_to_output; ++*linelen; } else { while (spaces_to_output) { putc(' ', out); --spaces_to_output; } putc(k, out); ++*linelen; } last_was_newline = (k == '\n'); return; } void do_error(const char *fmat, ...) { va_list ap; printf("%s: ", Argv0); va_start(ap, fmat); vprintf(fmat, ap); putchar('\n'); va_end(ap); exit(EXIT_FAILURE); } void do_help(int man) { if (man) goto man; puts("usenetify2 [-?h] [-Ii#Ll#Tt#N] [-o filename] filenames"); puts("Wraps lines in C and C++ programs for posting or printing."); puts(" -In (e.g., -I0): indent broken lines n columns"); puts(" -Ln (e.g., -L75): lines no wider than n columns"); puts(" -Tn (e.g., -T4): tabstop every n columns"); puts(" -N: make sure each file ends with a newline"); puts(" -o filename: send output to specified file"); puts(" --help: show this message"); puts(" --man: show complete help text"); exit(0); man: puts("usenetify2: Prepares programs for posting and printing.\n"); puts(" This program will take an unformatted C or C++ program as"); puts(" input, and produce an equally unformatted but correctly"); puts(" line-wrapped version of the program as output."); puts(" The goal of this program is not to duplicate the function"); puts(" of GNU 'indent', but rather to take a reasonably well-"); puts(" designed program and make it acceptable for transmission"); puts(" over a limited medium such as email, Usenet, or 80-column"); puts(" line printers. Very little attempt is made to beautify"); puts(" the generated output; however, it is of utmost importance"); puts(" that the generated code have exactly the same semantics as"); puts(" the input code, and conform to all the same standards."); puts(" This means, e.g., that C and C++-style comments must be"); puts(" preserved accurately, and lines containing preprocessor"); puts(" directives must not be broken apart."); puts(" The -L parameter controls the length of output code lines;"); puts(" this is a hard limit, so if the output ever contained"); puts(" lines longer than the limit, then it would be a bug."); puts(" The default line length is 75 characters."); puts(" The -T parameter controls the tab setting. 'usenetify2'"); puts(" automatically detabs all its input, treating a hard tab"); puts(" as equal to as many spaces as it takes to get to the"); puts(" next tabstop. -T8 is the default; this sets tabstops"); puts(" every 8 columns."); puts(" The -I parameter controls the length of the extra indent"); puts(" inserted, wherever possible, before the continuation of"); puts(" a broken line. The default is to indent one tabstop;"); puts(" setting -I0 (letter I, number zero) will perform no"); puts(" indentation. Setting -In, where n represents any"); puts(" positive integer, will indent broken lines by exactly n"); puts(" spaces."); puts(""); puts(" Please direct any bug reports or feature requests to the" " author."); exit(0); }