/* 'usenetify', a program for making C and C++ programs suitable for printing, emailing, and posting. Arthur O'Dwyer, 2004. This source code is in the public domain. */ #include #include #include #include #include #define steq(x,y) (!strcmp(x,y)) #define NBREAKS 6 #define MAXLINELEN 200 static char *Argv0; static int LineLength = 75; static int IndentLength = 4; static int DebugOutput = 0; static int ReplaceFiles = 0; static const char *CopyName = "cp"; int process(FILE *in, FILE *out); int dumpline(char *buffer, int idx, int indent, FILE *out); int IsAlnumToken(int k); void do_error(const char *fmat, ...); void do_help(int man); int main(int argc, char **argv) { int i, j; Argv0 = argv[0]; for (i=1; i < argc; ++i) { char *tmp; int newLineLen; if (argv[i][0] != '-') break; if (argv[i][1] == '\0') break; if (steq(argv[i]+1, "-")) { ++i; break; } else if (steq(argv[i]+1, "-help") || steq(argv[i]+1, "?") || steq(argv[i]+1, "h")) do_help(0); else if (steq(argv[i]+1, "-man")) do_help(1); else if (steq(argv[i], "-c") || steq(argv[i], "-C")) { if (i >= argc-1) { do_error("Need command with option -c"); } CopyName = argv[++i]; } else if (strtoul(argv[i]+1, &tmp, 10), (tmp && *tmp=='\0')) { newLineLen = strtoul(argv[i]+1, &tmp, 10); if (newLineLen > 1 && newLineLen < MAXLINELEN) LineLength = newLineLen; else { do_error("Line length must be an integer between 2 and %d!", MAXLINELEN); } } else { for (j=1; argv[i][j]; ++j) { if (argv[i][j]=='d' || argv[i][j] == 'D') DebugOutput = 1; else if (argv[i][j]=='r' || argv[i][j] == 'R') ReplaceFiles = 1; else if (argv[i][j]=='i' || argv[i][j] == 'I') { if (isdigit(argv[i][j+1])) { IndentLength = 0; for ( ; isdigit(argv[i][j+1]); ++j) IndentLength = 10*IndentLength+argv[i][j+1]-'0'; } else { do_error("Need indent length with -i"); } } else do_error("Unrecognized option(s) %s; -h for help", argv[i]); } } } if (i == argc) { process(stdin, stdout); } else { for (; i < argc; i++) { FILE *infp = steq(argv[i], "-")? stdin: fopen(argv[i], "r"); if (infp == NULL) do_error("File does not exist: %s", argv[i]); if (ReplaceFiles && (infp != stdin)) { FILE *ftmp; char nam[L_tmpnam]; char buf[100+L_tmpnam]; if (0 == (ftmp = fopen(tmpnam(nam), "w"))) { do_error("Could not open temporary file %s for writing", nam); } process(infp, ftmp); fclose(ftmp); fclose(infp); sprintf(buf, "%s %s %s", CopyName, nam, argv[i]); system(buf); } else { process(infp, stdout); if (infp != stdin) fclose(infp); } } } return EXIT_SUCCESS; } int process(FILE *in, FILE *out) { char buffer[MAXLINELEN+10]; int i = 0; int k; int indentcount = 0; int newline = 1; int cpp_directive = 0; while ((k=getc(in)) != EOF) { if (k == '\n') { fprintf(out, "%.*s\n", i, buffer); i = 0; indentcount = 0; newline = 1; cpp_directive = 0; } else { buffer[i++] = k; if (newline && isspace(k)) ++indentcount; else { if (newline && k == '#') cpp_directive = 1; newline = 0; } } while (i > LineLength) { if (cpp_directive) { int width = LineLength-1; fprintf(out, "%.*s\\\n", width, buffer); i -= width; memmove(buffer, buffer+width, i); } else { i = dumpline(buffer, i, indentcount, out); newline = (i == 0); } } } if (i > 0) { while (i > LineLength) i = dumpline(buffer, i, indentcount, out); fprintf(out, "%.*s\n", i, buffer); } return 0; } int dumpline(char *buffer, int idx, int indent, FILE *out) { enum { CPPCOMMENTBEGIN, STRING, REGULAR }; int lastbreak[NBREAKS] = {-1}; int typebreak[NBREAKS] = {0}; int i, b; if (idx <= LineLength) { fprintf(out, "%.*s\n", idx, buffer); return 0; } /* It's more aesthetic to slide the line over than to break it */ for (i=0; isspace(buffer[i]); ++i) continue; if (i > 3*LineLength/4) { /* The line is 3/4 whitespace; don't waste that space! */ int new_margin = i - IndentLength - 1; if (new_margin > LineLength/3) new_margin = LineLength/3; idx -= (i - new_margin); memmove(buffer, buffer+(i-new_margin), idx); return idx; } /* We have to find a logical place to break this line. So we really need to parse the line. We'll remember the last three places to break it, keeping special semantics for strings (can break apart) and C++ comments (can split lines as long as we remember to include the extra // marker). We do not keep special semantics for character literals (in general too short to bother) or C comments (since we can just treat slash-star and star-slash as regular punctuation, and the comment text as regular identifiers). If we find no places to break the line at all, then we will simply use the backslash line-splice mechanism, but that is only a last resort. */ if (DebugOutput) fprintf(stderr, "parsing line to break (%d)\n", idx); i = 0; while (1) { while (i < idx && isspace(buffer[i])) ++i; if (i >= idx) break; /* A new token. Shift it in the front. */ memmove(lastbreak+1, lastbreak, (NBREAKS-1) * sizeof *lastbreak); memmove(typebreak+1, typebreak, (NBREAKS-1) * sizeof *typebreak); lastbreak[0] = i; typebreak[0] = (!strncmp(buffer+i, "//", 2))? CPPCOMMENTBEGIN: (buffer[i] == '"')? STRING: REGULAR; if (DebugOutput) fprintf(stderr, "%d:%c\n", i, "CSR"[typebreak[0]]); if (typebreak[0] == CPPCOMMENTBEGIN) { /* No use parsing the rest of this line! */ break; } if (IsAlnumToken(buffer[i])) { while (i < idx && IsAlnumToken(buffer[i])) ++i; } else if (buffer[i] == '\'' || buffer[i] == '"') { int lookfor = buffer[i++]; for (; i < idx && buffer[i] != lookfor; ++i) { if (buffer[i] == '\\') ++i; } /* skip closing quote if found */ if (i < idx) ++i; } else { /* a sequence consisting only of punctuation */ do { ++i; } while (i < idx && !isspace(buffer[i]) && !IsAlnumToken(buffer[i]) && !strchr("'\"/", buffer[i])); } } /* Locate a reasonable place to break the line */ for (b=0; ; ++b) { if (b >= NBREAKS || lastbreak[b] == -1) goto no_breaks_found; /* A highly pathological case brought on by excessive indentation. If we break here, we will actually *lengthen* the line, not shorten it! */ if (typebreak[b]==REGULAR && indent+IndentLength-lastbreak[b] >= 0) continue; if (lastbreak[b] > LineLength-5) { /* This break is really far right, but if the *next* break will be really far left, then prefer this one. */ if (lastbreak[b+1] > LineLength/3 || typebreak[b+1] != REGULAR) continue; else break; } else { /* This break has no pathologies whatsoever! */ break; } } if (DebugOutput) fprintf(stderr, "lastbreak[%d] selected: %d:%c\n", b, lastbreak[b], "CSR"[typebreak[b]]); if (lastbreak[b] < LineLength*2/3 && typebreak[b] == CPPCOMMENTBEGIN) { /* Break the comment and re-comment the remainder. See if we can break it at a whitespace boundary, first. Regardless, make sure not to break right after a backslash. If not, then just break it randomly. */ int where = LineLength-1; while (where > lastbreak[b] && !isspace(buffer[where])) --where; if (where-lastbreak[b] <= 3) where = LineLength; while (where > lastbreak[b] && buffer[where-2]=='\\') --where; if (where-lastbreak[b] <= 3) where = LineLength; fprintf(out, "%.*s\n", where, buffer); idx -= where; memmove(buffer+indent+3, buffer+where, idx); /* properly indent the next line with spaces */ sprintf(buffer, "%*s//", indent, ""); buffer[indent+2] = ' '; return idx+indent+3; } else if (b==0 && typebreak[b] == STRING) { /* Break in the middle of a string literal. Pathologies to avoid: Don't break between a backslash and the next character. Don't break right next to the opening quote: pull the whole string onto the next line in that case. */ int where = LineLength-1; while (where > lastbreak[b] && !isspace(buffer[where])) --where; if (where-lastbreak[b] < 2) where = LineLength-1; while (where > lastbreak[b] && buffer[where-1]=='\\') --where; /* Pathological case: a really long string full of backslashes. Count up an even number of them and then break the line right before an "odd-numbered" backslash. */ if (where-lastbreak[b] < 2) { int count = 0; int i; where = lastbreak[b]; for (i=lastbreak[b]+1; i < LineLength; ++i) { count += (buffer[i] == '\\'); if (count % 2) where = i; } } if (where == lastbreak[b]) goto no_breaks_found; fprintf(out, "%.*s\"\n", where, buffer); idx -= where; memmove(buffer+indent+(IndentLength+1), buffer+where, idx); sprintf(buffer, "%*s", indent+IndentLength, ""); buffer[indent+IndentLength] = '"'; return indent+(IndentLength+1)+idx; } else { regular_break: if (DebugOutput) fprintf(stderr, "regular_break\n"); /* Pathological case: shifting down this line won't help */ if (lastbreak[b] == indent+IndentLength) goto no_breaks_found; /* just go on and break the line here */ fprintf(out, "%.*s\n", lastbreak[b], buffer); idx -= lastbreak[b]; memmove(buffer+indent+IndentLength, buffer+lastbreak[b], idx); /* and properly indent the next line with spaces */ sprintf(buffer, "%*s", indent+IndentLength-1, ""); buffer[indent+IndentLength-1] = ' '; return indent+IndentLength+idx; } no_breaks_found: if (DebugOutput) fprintf(stderr, "no_breaks_found\n"); /* we have absolutely no place to break the line */ fprintf(out, "%.*s\\\n", LineLength-1, buffer); idx -= LineLength-1; memmove(buffer, buffer+LineLength-1, idx); return idx; } int IsAlnumToken(int k) { /* Could this character be part of an identifier or numeric literal? We don't want to break names at underscores, or floating-point constants at the decimal point or around the sign in the "E+42" part. */ return isalnum(k) || strchr("_.+-", k); } void do_error(const char *fmat, ...) { va_list ap; printf("%s: ", Argv0); va_start(ap, fmat); vprintf(fmat, ap); printf("\n"); va_end(ap); exit(EXIT_FAILURE); } void do_help(int man) { if (man) goto man; puts("usenetify [-?h] [linelength] [-Ii#DdRr] [-c cmd] filenames"); puts("Wraps lines in C and C++ programs for posting or printing."); puts(" -D: produce copious debugging output to stderr"); puts(" -R: replace named files with usenetified versions"); printf(" linelen (e.g., -65, -80): make n-character lines " "(currently %d)\n", LineLength); puts(" -In (e.g., -I4): use n-space extra indents on broken lines"); puts(" -c : specify equivalent of Unix 'cp' (for use with -R)"); puts(" --help: show this message"); puts(" --man: show complete help text"); exit(0); man: puts("usenetify: Prepares programs for posting and printing.\n"); puts(" This program will take an unformatted C or C++ program as"); puts(" input, and produce an equally unformatted but correctly"); puts(" line-wrapped version of the program as output."); puts(" The goal of this program is not to duplicate the functionality"); puts(" of GNU 'indent', but rather to take a reasonably well-"); puts(" designed program and make it acceptable for transmission"); puts(" over a limited medium such as email, Usenet, or 80-column"); puts(" line printers. Very little attempt is made to beautify"); puts(" the generated output; however, it is of utmost importance"); puts(" that the generated code have exactly the same semantics as"); puts(" the input code, and conform to all the same standards."); puts(" This means that C and C++-style comments must be preserved"); puts(" accurately, and lines containing preprocessor directives"); puts(" must not be broken, e.g."); puts(" The 'linelength' parameter controls the length of output"); puts(" code lines; this is a hard limit, not a \"recommendation.\""); puts(" The default line length is 75 characters."); puts(" The -I parameter controls the length of the extra indent"); puts(" inserted before the continuation of a broken line. The"); puts(" default is -I4, 4 spaces; setting -I0 (letter I, number"); puts(" zero) will make the parts of a broken line align under"); puts(" each other."); puts(" The -D option sends some details from the line-breaking engine"); puts(" to the standard error output. This is purely for debugging"); puts(" purposes, and need not concern the user."); puts(" The -R option tells the program to overwrite its input files"); puts(" with its output; this option can be **DANGEROUS**, since this"); puts(" program is not guaranteed to produce correct results in all"); puts(" cases."); puts(" The -c parameter can be used on non-*nix systems to specify"); puts(" a replacement for the 'cp' file-copying utility."); puts(""); puts(" Please direct any bug reports or feature requests to the author."); exit(0); }