From ab8cc85adde879fb963c94d15675783f2cf4b183 Mon Sep 17 00:00:00 2001 From: dos-reis Date: Tue, 14 Aug 2007 05:14:52 +0000 Subject: Initial population. --- src/hyper/hthits.pamphlet | 474 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 474 insertions(+) create mode 100644 src/hyper/hthits.pamphlet (limited to 'src/hyper/hthits.pamphlet') diff --git a/src/hyper/hthits.pamphlet b/src/hyper/hthits.pamphlet new file mode 100644 index 00000000..c3dc4e1b --- /dev/null +++ b/src/hyper/hthits.pamphlet @@ -0,0 +1,474 @@ +\documentclass{article} +\usepackage{axiom} + +\title{\$SPAD/src/hthits} +\author{The Axiom Team} + +\begin{document} +\maketitle + +\begin{abstract} +\end{abstract} +\eject + +\tableofcontents +\eject + +\section{hthits.c} + +This source file implements HyperDoc's ability to scan files for a +given pattern. For that purpose it needs a ``regex'' for string +pattern matching. + +This source file used to rely on [[]], +which was originally part of the X/Open System Interface and Headers +Issue 2. However, since then, it has been withdrawn and no longer +always available on newer platfroms. Consequently, +we need to use a different, portable regex library. The POSIX +definition provides one, namely through [[]]. That is what we +use now. Its availability is tested at configure time. + +<>= +/* + * hthits pattern htdb-file + * + * Scan HyperDoc files for a given pattern. + * + * The output contains lines of the form: + * + * page-name`title`n + * + * The title and body of each page are scanned but the name is not. It is + * possible that the title matches but not any lines. The number of matches + * in the page (n) is given last. + * + * SMW Feb 91 + */ +#define _HTHITS_C +#include "axiom-c-macros.h" + +#include "debug.h" + +#include +#include +#include +#include +#include +#include +#include + +/* + * For fixed-size arrays. + */ +#define MAX_HTDB_LINE 1024 +#define MAX_ENTRY_TYPE 30 /* I.e. \page \newcommand \patch ... */ +#define MAX_ENTRY_NAME 1024 /* E.g. DifferentialCalculusPage */ +#define MAX_COMP_REGEX 1024 + +typedef struct pgInfo { + char name[MAX_ENTRY_NAME]; + long start, size; +} PgInfo ; + +#include "hthits.H1" + +/* + * Global variables set according to the command line. + */ + +char *progName; +char *pattern; +char *htdbFName; +int gverifydates=0; +regex_t reg_pattern; + +int +main(int argc,char ** argv) +{ + cmdline(argc, argv); + + regcomp(®_pattern, pattern, REG_NEWLINE); + + handleHtdb(); + return(0); +} + +void +cmdline(int argc,char ** argv) +{ + progName = argv[0]; + + if (argc != 3) { + fprintf(stderr, "Usage: %s pattern htdb-file\n", progName); + exit(1); + } + + pattern = argv[1]; + htdbFName = argv[2]; +} + +void +handleHtdb(void) +{ + FILE *htdbFile; + int c; + + htdbFile = fopen(htdbFName, "r"); + if (htdbFile == NULL) + badDB(); + + while ((c = getc(htdbFile)) != EOF) { + if (c != '\t') + badDB(); + ungetc(c, htdbFile); + + handleFile(htdbFile); + } + fclose(htdbFile); +} + + +void +handleFile(FILE *htdbFile) +{ + static PgInfo *pgInfoV = 0; + static int pgInfoC = 0; + + char htdbLine[MAX_HTDB_LINE]; + char htfname[MAX_HTDB_LINE]; + time_t httime; + long htsize; + struct stat htstat; + + long fstart, fend; + int rc, i, npages; + + char entname[MAX_ENTRY_NAME], enttype[MAX_ENTRY_TYPE]; + long entoffset, entlineno; + + fgets(htdbLine, MAX_HTDB_LINE, htdbFile); + + sscanf(htdbLine, " %s %ld", htfname, &httime); + + /* + * 1. Verify file: get size and check modification time. + */ + rc = stat(htfname, &htstat); + if (rc == -1) { + fprintf(stderr, "%s: Cannot access %s\n", progName, htfname); + exit(1); + } + if (gverifydates && (htstat.st_mtime != httime)) { + + fprintf(stderr, "%s: Out of date file %s\n", progName, htfname); + exit(1); + } + htsize = htstat.st_size; + + /* + * 2. Count the pages in the file. + */ + npages = 0; + fstart = ftell(htdbFile); + fend = ftell(htdbFile); + + while (fgets(htdbLine, MAX_HTDB_LINE, htdbFile) != NULL) { + if (htdbLine[0] == '\t') + break; + if (!strncmp(htdbLine, "\\page", 5)) + npages++; + fend = ftell(htdbFile); + } + + /* + * 3. Find offset and size of each \page (skipping \newcommands etc.) + */ + if (npages > pgInfoC) { + if (pgInfoV) + free(pgInfoV); + + pgInfoC = npages; + pgInfoV = (PgInfo *) + malloc(npages * sizeof(PgInfo)); + + if (!pgInfoV) { + fprintf(stderr, "%s: out of memory\n", progName); + exit(1); + } + } + + fseek(htdbFile, fstart, 0); + + for (i = 0; fgets(htdbLine, MAX_HTDB_LINE, htdbFile) != NULL;) { + if (htdbLine[0] == '\t') + break; + + sscanf(htdbLine, "%s %s %ld %ld", + enttype, entname, &entoffset, &entlineno); + + if (i > 0 && pgInfoV[i - 1].size == -1) + pgInfoV[i - 1].size = entoffset - pgInfoV[i - 1].start; + + if (!strcmp(enttype, "\\page")) { + strncpy(pgInfoV[i].name, entname, MAX_ENTRY_NAME); + pgInfoV[i].start = entoffset; + pgInfoV[i].size = -1; + + i++; + } + } + if (i > 0 && pgInfoV[i - 1].size == -1) + pgInfoV[i - 1].size = htsize - pgInfoV[i - 1].start; + + if (i != npages) + badDB(); + + /* + * 4. Position database input to read next file-description + */ + fseek(htdbFile, fend, 0); + + /* + * 5. Process the pages of the file. + */ + handleFilePages(htfname, npages, pgInfoV); +} + +void +handleFilePages(char *fname, int pgc, PgInfo *pgv) +{ + FILE *infile; + int i; + + infile = fopen(fname, "r"); + if (infile == NULL) { + fprintf(stderr, "%s: Cannot read file %s\n", progName, fname); + exit(1); + } + + + for (i = 0; i < pgc; i++) + handlePage(infile, pgv + i); + + fclose(infile); + +} + +void +handlePage(FILE *infile,PgInfo * pg) +{ + static char *pgBuf = 0; + static int pgBufSize = 0; + + char *title, *body; + + if (pg->size > pgBufSize - 1) { + if (pgBuf) + free(pgBuf); + pgBufSize = pg->size + 20000; + pgBuf = (char *)malloc(pgBufSize); + + if (!pgBuf) { + fprintf(stderr,"%s: Out of memory\n", progName); + exit(1); + } + } + + fseek(infile, pg->start, 0); + fread(pgBuf, pg->size, 1, infile); + pgBuf[pg->size] = 0; + + splitpage(pgBuf, &title, &body); + /*untexbuf(title);*/ + untexbuf(body); + +#ifdef DEBUG + printf("-------------- %s -------------\n%s", pg->name, pgBuf); + printf("============== %s =============\n", title); + printf("%s", body); +#endif + + searchPage(pg->name, title, body); + +} + +void +searchPage(char *pgname,char * pgtitle,char * pgbody) +{ + char *bodyrest; + regmatch_t match_pos; + int nhits = 0; + + if (!regexec(®_pattern, pgtitle, 1, &match_pos, 0)) + nhits++; + + bodyrest = pgbody; + while (!regexec(®_pattern, bodyrest, 1, &match_pos, 0)) { + nhits++; + bodyrest += match_pos.rm_eo; + } + if (nhits) { + printf("\\newsearchresultentry{%d}{%s}",nhits, pgtitle); + squirt(pgname, strlen(pgname)); + printf("\n"); + } +} + +/* + * Given string s and length n, output ` followed by the first n characters + * of s with ` and newline converted to blanks. This function destructively + * modifies s. + */ + +void +squirt(char *s, int n) +{ + register char *t, *e; + int c; + + c = s[n]; + + for (t = s, e = s + n; t < e; t++) + if (*t == '`' || *t == '\n') + *t = ' '; + + if (s[n] != 0) { + s[n] = 0; + } + printf("{%.*s}", n, s); + s[n] = c; +} + +/* + * Any newlines and separator characters in the title are changed to blanks. + */ +void +splitpage(char *buf, char **ptitle, char **pbody) +{ + int n, depth, tno; + char *s; + + switch (buf[1]) { + case 'p': + tno = 2; + break; /* \page{Name}{Title} */ + case 'b': + tno = 3; + break; /* \begin{page}{Name}{Title} */ + default: + fprintf(stderr, "%s: Invalid page format: %s\n", progName, buf); + exit(1); + } + + n = 0; + depth = 0; + + for (s = buf; *s; s++) { + if (*s == '{') + if (++depth == 1 && ++n == tno) + *ptitle = s + 1; + if (*s == '}') + if (depth-- == 1 && n == tno) { + *s = 0; + *pbody = s + 1; + break; + } + } +} + + +void +untexbuf(register char *s) +{ + register char *d = s; + + while (*s) + switch (*s) { + case '\\': + *d++ = ' '; + s++; + if (*s != '%') + while (isalpha(*s)) + s++; + break; + case '%': + *d++ = ' '; + s++; + while (*s && *s != '\n') + s++; + break; + case '{': + case '}': + case '#': + *d++ = ' '; + s++; + break; + default: + *d++ = *s++; + } + *d = 0; +} + +void +badDB(void) +{ + fprintf(stderr, "%s: bad database file %s\n", progName, htdbFName); + exit(1); +} + +void +regerr(int code) +{ + fprintf(stderr, "%s: regular expression error %d for \"%s\"\n", + progName, code, pattern); +} +@ +\section{License} +<>= +/* +Copyright (c) 1991-2002, The Numerical ALgorithms Group Ltd. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + - Neither the name of The Numerical ALgorithms Group Ltd. nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +@ +<<*>>= +<> +<> +@ +\eject +\begin{thebibliography}{99} +\bibitem{1} nothing +\end{thebibliography} +\end{document} + + + + -- cgit v1.2.3