/* Copyright (C) 1991-2002, The Numerical Algorithms Group Ltd. All rights reserved. Copyright (C) 2007-2009, Gabriel Dos Reis. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The Numerical Algorithms Group Ltd. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * hthits pattern htdb-file * * Scan HyperDoc files for a given pattern. * * The output contains lines of the form: * * page-name`title`n * * The title and body of each page are scanned but the name is not. It is * possible that the title matches but not any lines. The number of matches * in the page (n) is given last. * * SMW Feb 91 */ #include "openaxiom-c-macros.h" #include "debug.h" #include <stdlib.h> #include <stdio.h> #include <string.h> #include <ctype.h> #include <sys/types.h> #include <sys/stat.h> #include <regex.h> #include <locale.h> #include "cfuns.h" /* * For fixed-size arrays. */ #define MAX_HTDB_LINE 1024 #define MAX_ENTRY_TYPE 30 /* I.e. \page \newcommand \patch ... */ #define MAX_ENTRY_NAME 1024 /* E.g. DifferentialCalculusPage */ #define MAX_COMP_REGEX 1024 typedef struct PgInfo { char name[MAX_ENTRY_NAME]; long start, size; } PgInfo ; /* * Global variables set according to the command line. */ char *progName; char *pattern; char *htdbFName; int gverifydates=0; regex_t reg_pattern; static void badDB(void) { fprintf(stderr, "%s: bad database file %s\n", progName, htdbFName); exit(1); } static void untexbuf(register char* s) { register char *d = s; while (*s) switch (*s) { case '\\': *d++ = ' '; s++; if (*s != '%') while (isalpha(*s)) s++; break; case '%': *d++ = ' '; s++; while (*s && *s != '\n') s++; break; case '{': case '}': case '#': *d++ = ' '; s++; break; default: *d++ = *s++; } *d = 0; } /* * Any newlines and separator characters in the title are changed to blanks. */ static void splitpage(char* buf, char** ptitle, char** pbody) { int n, depth, tno; char *s; switch (buf[1]) { case 'p': tno = 2; break; /* \page{Name}{Title} */ case 'b': tno = 3; break; /* \begin{page}{Name}{Title} */ default: fprintf(stderr, "%s: Invalid page format: %s\n", progName, buf); exit(1); } n = 0; depth = 0; for (s = buf; *s; s++) { if (*s == '{') if (++depth == 1 && ++n == tno) *ptitle = s + 1; if (*s == '}') if (depth-- == 1 && n == tno) { *s = 0; *pbody = s + 1; break; } } } /* * Given string s and length n, output ` followed by the first n characters * of s with ` and newline converted to blanks. This function destructively * modifies s. */ static void squirt(char* s, int n) { register char *t, *e; int c; c = s[n]; for (t = s, e = s + n; t < e; t++) if (*t == '`' || *t == '\n') *t = ' '; if (s[n] != 0) { s[n] = 0; } printf("{%.*s}", n, s); s[n] = c; } static void searchPage(char* pgname, char* pgtitle, char* pgbody) { char *bodyrest; regmatch_t match_pos; int nhits = 0; if (!regexec(®_pattern, pgtitle, 1, &match_pos, 0)) nhits++; bodyrest = pgbody; while (!regexec(®_pattern, bodyrest, 1, &match_pos, 0)) { nhits++; bodyrest += match_pos.rm_eo; } if (nhits) { printf("\\newsearchresultentry{%d}{%s}",nhits, pgtitle); squirt(pgname, strlen(pgname)); printf("\n"); } } static void handlePage(FILE* infile, PgInfo* pg) { static char *pgBuf = 0; static int pgBufSize = 0; char *title, *body; if (pg->size > pgBufSize - 1) { if (pgBuf) free(pgBuf); pgBufSize = pg->size + 20000; pgBuf = (char *)malloc(pgBufSize); if (!pgBuf) { fprintf(stderr,"%s: Out of memory\n", progName); exit(1); } } fseek(infile, pg->start, 0); fread(pgBuf, pg->size, 1, infile); pgBuf[pg->size] = 0; splitpage(pgBuf, &title, &body); /*untexbuf(title);*/ untexbuf(body); #ifdef DEBUG printf("-------------- %s -------------\n%s", pg->name, pgBuf); printf("============== %s =============\n", title); printf("%s", body); #endif searchPage(pg->name, title, body); } static void handleFilePages(const char* fname, int pgc, PgInfo* pgv) { FILE *infile; int i; infile = fopen(fname, "r"); if (infile == NULL) { fprintf(stderr, "%s: Cannot read file %s\n", progName, fname); exit(1); } for (i = 0; i < pgc; i++) handlePage(infile, pgv + i); fclose(infile); } static void handleFile(FILE* htdbFile) { static PgInfo *pgInfoV = 0; static int pgInfoC = 0; char htdbLine[MAX_HTDB_LINE]; char htfname[MAX_HTDB_LINE]; time_t httime; long htsize; struct stat htstat; long fstart, fend; int rc, i, npages; char entname[MAX_ENTRY_NAME], enttype[MAX_ENTRY_TYPE]; long entoffset, entlineno; fgets(htdbLine, MAX_HTDB_LINE, htdbFile); sscanf(htdbLine, " %s %ld", htfname, &httime); /* * 1. Verify file: get size and check modification time. */ rc = stat(htfname, &htstat); if (rc == -1) { fprintf(stderr, "%s: Cannot access %s\n", progName, htfname); exit(1); } if (gverifydates && (htstat.st_mtime != httime)) { fprintf(stderr, "%s: Out of date file %s\n", progName, htfname); exit(1); } htsize = htstat.st_size; /* * 2. Count the pages in the file. */ npages = 0; fstart = ftell(htdbFile); fend = ftell(htdbFile); while (fgets(htdbLine, MAX_HTDB_LINE, htdbFile) != NULL) { if (htdbLine[0] == '\t') break; if (!strncmp(htdbLine, "\\page", 5)) npages++; fend = ftell(htdbFile); } /* * 3. Find offset and size of each \page (skipping \newcommands etc.) */ if (npages > pgInfoC) { if (pgInfoV) free(pgInfoV); pgInfoC = npages; pgInfoV = (PgInfo *) malloc(npages * sizeof(PgInfo)); if (!pgInfoV) { fprintf(stderr, "%s: out of memory\n", progName); exit(1); } } fseek(htdbFile, fstart, 0); for (i = 0; fgets(htdbLine, MAX_HTDB_LINE, htdbFile) != NULL;) { if (htdbLine[0] == '\t') break; sscanf(htdbLine, "%s %s %ld %ld", enttype, entname, &entoffset, &entlineno); if (i > 0 && pgInfoV[i - 1].size == -1) pgInfoV[i - 1].size = entoffset - pgInfoV[i - 1].start; if (!strcmp(enttype, "\\page")) { strncpy(pgInfoV[i].name, entname, MAX_ENTRY_NAME); pgInfoV[i].start = entoffset; pgInfoV[i].size = -1; i++; } } if (i > 0 && pgInfoV[i - 1].size == -1) pgInfoV[i - 1].size = htsize - pgInfoV[i - 1].start; if (i != npages) badDB(); /* * 4. Position database input to read next file-description */ fseek(htdbFile, fend, 0); /* * 5. Process the pages of the file. */ handleFilePages(htfname, npages, pgInfoV); } static void handleHtdb(void) { FILE *htdbFile; int c; htdbFile = fopen(htdbFName, "r"); if (htdbFile == NULL) badDB(); while ((c = getc(htdbFile)) != EOF) { if (c != '\t') badDB(); ungetc(c, htdbFile); handleFile(htdbFile); } fclose(htdbFile); } static void cmdline(int argc, char** argv) { progName = argv[0]; if (argc != 3) { fprintf(stderr, "Usage: %s pattern htdb-file\n", progName); exit(1); } pattern = argv[1]; htdbFName = argv[2]; } int main(int argc, char** argv) { putenv("LC_ALL=C"); setlocale(LC_ALL, ""); cmdline(argc, argv); regcomp(®_pattern, pattern, REG_NEWLINE); handleHtdb(); return(0); }