1 files changed, 939 insertions, 0 deletions
diff --git a/src/translators/btparse/lex_auxiliary.c b/src/translators/btparse/lex_auxiliary.c
new file mode 100644
index 0000000..8fac463
--- /dev/null
+++ b/src/translators/btparse/lex_auxiliary.c
@@ -0,0 +1,939 @@
+/* ------------------------------------------------------------------------
+@NAME       : lex_auxiliary.c
+@INPUT      : 
+@OUTPUT     : 
+@RETURNS    : 
+@DESCRIPTION: The code and global variables here have three main purposes:
+                - maintain the lexical buffer (zztoktext, which
+                  traditionally with PCCTS is a static array; I have
+                  changed things so that it's dynamically allocated and
+                  resized on overflow)
+                - keep track of lexical state that's not handled by PCCTS
+                  code (like "where are we in terms of BibTeX entries?" or
+                  "what are the delimiters for the current entry/string?")
+                - everything called from lexical actions is here, to keep
+                  the grammar file itself neat and clean
+@GLOBALS    : 
+@CALLS      : 
+@CALLERS    : 
+@CREATED    : Greg Ward, 1996/07/25-28
+@MODIFIED   : Jan 1997
+              Jun 1997
+@VERSION    : $Id: lex_auxiliary.c,v 1.31 1999/11/29 01:13:10 greg Rel $
+@COPYRIGHT  : Copyright (c) 1996-99 by Gregory P. Ward.  All rights reserved.
+
+              This file is part of the btparse library.  This library is
+              free software; you can redistribute it and/or modify it under
+              the terms of the GNU General Public License as
+              published by the Free Software Foundation; either version 2
+              of the License, or (at your option) any later version.
+-------------------------------------------------------------------------- */
+
+/*#include "bt_config.h"*/
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdarg.h>
+#include <assert.h>
+#include "lex_auxiliary.h"
+#include "stdpccts.h"
+#include "error.h"
+#include "prototypes.h"
+/*#include "my_dmalloc.h"*/
+
+#define DUPE_TEXT 0
+
+extern char * InputFilename;            /* from input.c */
+
+GEN_PRIVATE_ERRFUNC (lexical_warning, (const char * fmt, ...),
+                     BTERR_LEXWARN, InputFilename, zzline, NULL, -1, fmt)
+GEN_PRIVATE_ERRFUNC (lexical_error, (const char * fmt, ...),
+                     BTERR_LEXERR, InputFilename, zzline, NULL, -1, fmt)
+
+
+
+/* ----------------------------------------------------------------------
+ * Global variables
+ */
+
+/* First, the lexical buffer.  This is used elsewhere, so can't be static */
+char *         zztoktext = NULL;
+
+/* 
+ * Now, the lexical state -- first, stuff that arises from scanning 
+ * at top-level and the beginnings of entries;
+ *   EntryState:
+ *     toplevel when we start scanning a file, or when we are in in_entry
+ *       mode and see '}' or ')'
+ *     after_at when we are in toplevel mode and see an '@'
+ *     after_type when we are in after_at mode and see a name (!= 'comment')
+ *     in_comment when we are in after_at mode and see a name (== 'comment')
+ *     in_entry when we are in after_type mode and see '{' or '('
+ *   EntryOpener:
+ *     the character ('(' or '{') which opened the entry currently being
+ *     scanned (we use this to make sure that the entry opener and closer
+ *     match; if not, we issue a warning)
+ *   EntryMetatype: (NB. typedef for bt_metatype is in btparse.h)
+ *     classifies entries according to the syntax we will use to parse them;
+ *     also winds up (after being changed to a bt_nodetype value) in the 
+ *     node that roots the entry AST:
+ *       comment    - anything between () or {}
+ *       preamble   - a single compound value
+ *       string     - a list of "name = compound_value" assignments; no key
+ *       alias      - a single "name = compound_value" assignment (where
+ *                    the compound value in this case is presumably a 
+ *                    name, rather than a string -- this is not syntactically
+ *                    checked though)
+ *       modify,
+ *       entry      - a key followed by a list of "name = compound_value" 
+ *                    assignments
+ *   JunkCount:
+ *     the number of non-whitespace, non-'@' characters seen at toplevel
+ *     between two entries (used to print out a warning when we hit
+ *     the beginning of entry, to help people catch "old style" implicit
+ *     comments
+ */
+static enum { toplevel, after_at, after_type, in_comment, in_entry } 
+               EntryState;
+static char    EntryOpener;             /* '(' or '{' */
+static bt_metatype
+               EntryMetatype;
+static int     JunkCount;               /* non-whitespace chars at toplevel */
+
+/*
+ * String state -- these are maintained and used by the functions called
+ * from actions in the string lexer.
+ *   BraceDepth:
+ *     brace depth within a string; we can only end the current string
+ *     when this is zero
+ *   ParenDepth:
+ *     parenthesis depth within a string; needed for @comment entries
+ *     that are paren-delimited (because the comment in that case is
+ *     a paren-delimited string)
+ *   StringOpener:
+ *     similar to EntryOpener, but stronger than merely warning of token
+ *     mismatch -- this determines which character ('"' or '}') can 
+ *     actually end the string
+ *   StringStart:
+ *     line on which current string started; if we detect an apparent
+ *     runaway, this is used to report where the runaway started
+ *   ApparentRunaway:
+ *     flags if we have already detected (and warned) that the current
+ *     string appears to be a runaway, so that we don't warn again
+ *     (and again and again and again)
+ *   QuoteWarned:
+ *     flags if we have already warned about seeing a '"' in a string,
+ *     because they tend to come in pairs and one warning per string 
+ *     is enough
+ *
+ * (See bibtex.g for an explanation of my runaway string detection heuristic.)
+ */
+static char    StringOpener = '\0';     /* '{' or '"' */
+static int     BraceDepth;              /* depth of brace-nesting */
+static int     ParenDepth;              /* depth of parenthesis-nesting */
+static int     StringStart = -1;        /* start line of current string */
+static int     ApparentRunaway;         /* current string looks like runaway */
+static int     QuoteWarned;             /* already warned about " in string? */
+
+
+
+/* ----------------------------------------------------------------------
+ * Miscellaneous functions:
+ *   lex_info()      (handy for debugging)
+ *   zzcr_attr()     (called from PCCTS-generated code)
+ */
+
+void lex_info (void)
+{
+   printf ("LA(1) = \"%s\" token %d, %s\n", LATEXT(1), LA(1), zztokens[LA(1)]);
+#ifdef LL_K
+   printf ("LA(2) = \"%s\" token %d, %s\n", LATEXT(2), LA(2), zztokens[LA(2)]);
+#endif
+}
+
+
+void zzcr_attr (Attrib *a, int tok, char *txt)
+{
+   if (tok == STRING)
+   {
+      int   len = strlen (txt);
+
+      assert ((txt[0] == '{' && txt[len-1] == '}')
+              || (txt[0] == '"' && txt[len-1] == '"'));
+      txt[len-1] = (char) 0;            /* remove closing quote from string */
+      txt++;                            /* so we'll skip the opening quote */
+   }
+
+#if DUPE_TEXT
+   a->text = strdup (txt);
+#else
+   a->text = txt;
+#endif
+   a->token = tok;
+   a->line = zzline;
+   a->offset = zzbegcol;
+#if DEBUG > 1
+   dprintf ("zzcr_attr: input txt = %p (%s)\n", txt, txt);
+   dprintf ("           dupe txt  = %p (%s)\n", a->text, a->text);
+#endif
+}
+
+
+#if DUPE_TEXT
+void zzd_attr (Attrib *attr)
+{
+   free (attr->text);
+}
+#endif
+
+
+/* ----------------------------------------------------------------------
+ * Lexical buffer functions:
+ *   alloc_lex_buffer()
+ *   realloc_lex_buffer()
+ *   free_lex_buffer()
+ *   lexer_overflow()
+ *   zzcopy()              (only if ZZCOPY_FUNCTION is defined and true)
+ */
+
+
+/*
+ * alloc_lex_buffer()
+ * 
+ * allocates the lexical buffer with `size' characters.  Clears the buffer,
+ * points zzlextext at it, and sets zzbufsize to `size'.
+ *
+ * Does nothing if the buffer is already allocated.
+ *
+ * globals: zztoktext, zzlextext, zzbufsize
+ * callers: bt_parse_entry() (in input.c)
+ */
+void alloc_lex_buffer (int size)
+{
+   if (zztoktext == NULL)
+   {
+      zztoktext = (char *) malloc (size * sizeof (char));
+      memset (zztoktext, 0, size);
+      zzlextext = zztoktext;
+      zzbufsize = size;
+   }
+} /* alloc_lex_buffer() */
+
+
+/*
+ * realloc_lex_buffer()
+ * 
+ * Reallocates the lexical buffer -- size is increased by `size_increment'
+ * characters (which could be negative).  Updates all globals that point
+ * to or into the buffer (zzlextext, zzbegexpr, zzendexpr), as well as
+ * zztoktext (the buffer itself) zzbufsize (the buffer size).
+ *
+ * This is only meant to be called (ultimately) from zzgettok(), part of
+ * the DLG code.  (In fact, zzgettok() invokes the ZZCOPY() macro, which
+ * calls lexer_overflow() on buffer overflow, which calls
+ * realloc_lex_buffer().  Whatever.)  The `lastpos' and `nextpos' arguments
+ * correspond, respectively, to a local variable in zzgettok() and a static
+ * global in dlgauto.h (hence really in scan.c).  They both point into
+ * the lexical buffer, so have to be passed by reference here so that
+ * we can update them to point into the newly-reallocated buffer.
+ * 
+ * globals: zztottext, zzbufsize, zzlextext, zzbegexpr, zzendexpr
+ * callers: lexer_overflow()
+ */
+static void
+realloc_lex_buffer (int     size_increment, 
+                    unsigned char ** lastpos, 
+                    unsigned char ** nextpos)
+{
+   int   beg, end, next;
+
+   if (zztoktext == NULL)
+      internal_error ("attempt to reallocate unallocated lexical buffer");
+
+   zztoktext = (char *) realloc (zztoktext, zzbufsize+size_increment);
+   memset (zztoktext+zzbufsize, 0, size_increment);
+   zzbufsize += size_increment;
+
+   beg = zzbegexpr - zzlextext;
+   end = zzendexpr - zzlextext;
+   next = *nextpos - zzlextext;
+   zzlextext = zztoktext;
+
+   if (lastpos != NULL)
+      *lastpos = zzlextext+zzbufsize-1;
+   zzbegexpr = zzlextext + beg;
+   zzendexpr = zzlextext + end;
+   *nextpos = zzlextext + next;
+   
+} /* realloc_lex_buffer() */
+
+
+/*
+ * free_lex_buffer()
+ *
+ * Frees the lexical buffer allocated by alloc_lex_buffer().
+ */
+void free_lex_buffer (void)
+{
+   if (zztoktext == NULL)
+      internal_error ("attempt to free unallocated (or already freed) " 
+                      "lexical buffer");
+
+   free (zztoktext);
+   zztoktext = NULL;
+} /* free_lex_buffer() */
+
+
+/*
+ * lexer_overflow()
+ *
+ * Prints a warning and calls realloc_lex_buffer() to increase the size
+ * of the lexical buffer by ZZLEXBUFSIZE (a constant -- hence the buffer
+ * size increases linearly, not exponentially).
+ *
+ * Also prints a couple of lines of useful debugging stuff if DEBUG is true.
+ */ 
+void lexer_overflow (unsigned char **lastpos, unsigned char **nextpos)
+{
+#if DEBUG
+   char   head[16], tail[16];
+
+   printf ("zzcopy: overflow detected\n");
+   printf ("        zzbegcol=%d, zzendcol=%d, zzline=%d\n",
+           zzbegcol, zzendcol, zzline);
+   strncpy (head, zzlextext, 15); head[15] = 0;
+   strncpy (tail, zzlextext+ZZLEXBUFSIZE-15, 15); tail[15] = 0;
+   printf ("        zzlextext=>%s...%s< (last char=%d (%c))\n",
+           head, tail, 
+           zzlextext[ZZLEXBUFSIZE-1], zzlextext[ZZLEXBUFSIZE-1]);
+   printf ("        zzchar = %d (%c), zzbegexpr=zzlextext+%d\n",
+           zzchar, zzchar, zzbegexpr-zzlextext);
+#endif
+
+   notify ("lexical buffer overflowed (reallocating to %d bytes)",
+                    zzbufsize+ZZLEXBUFSIZE);
+   realloc_lex_buffer (ZZLEXBUFSIZE, lastpos, nextpos);
+
+} /* lexer_overflow () */
+
+
+#if ZZCOPY_FUNCTION
+/*
+ * zzcopy()
+ * 
+ * Does the same as the ZZCOPY macro (in lex_auxiliary.h), but as a
+ * function for easier debugging.
+ */
+void zzcopy (char **nextpos, char **lastpos, int *ovf_flag)
+{
+   if (*nextpos >= *lastpos)
+   {
+      lexer_overflow (lastpos, nextpos);
+   }
+
+   **nextpos = zzchar;
+   (*nextpos)++;
+}
+#endif
+
+
+
+/* ----------------------------------------------------------------------
+ * Report/maintain lexical state 
+ *   report_state()        (only meaningful if DEBUG)
+ *   initialize_lexer_state()
+ *
+ * Note that the lexical action functions, below, also fiddle with
+ * the lexical state variables an awful lot.
+ */
+
+#if DEBUG
+char *state_names[] =
+   { "toplevel", "after_at", "after_type", "in_comment", "in_entry" };
+char *metatype_names[] = 
+   { "unknown", "comment", "preamble", "string", "alias", "modify", "entry" };
+
+static void
+report_state (char *where)
+{
+   printf ("%s: lextext=%s (line %d, offset %d), token=%d, "
+           "EntryState=%s\n",
+           where, zzlextext, zzline, zzbegcol, NLA,
+           state_names[EntryState]);
+}
+#else
+# define report_state(where)
+/*
+static void
+report_state (char *where) { }
+*/
+#endif
+  
+void initialize_lexer_state (void)
+{
+   zzmode (START);
+   EntryState = toplevel;
+   EntryOpener = (char) 0;
+   EntryMetatype = BTE_UNKNOWN;
+   JunkCount = 0;
+}
+
+
+bt_metatype entry_metatype (void)
+{
+   return EntryMetatype;
+}
+
+
+
+/* ----------------------------------------------------------------------
+ * Lexical actions (START and LEX_ENTRY modes)
+ */
+
+/* 
+ * newline ()
+ * 
+ * Does everything needed to handle newline outside of a quoted string:
+ * increments line counter and skips the newline.
+ */
+void newline (void)
+{
+   zzline++;
+   zzskip();
+}
+
+
+void comment (void)
+{
+   zzline++;
+   zzskip();
+}
+   
+
+void at_sign (void)
+{
+   if (EntryState == toplevel)
+   {
+      EntryState = after_at;
+      zzmode (LEX_ENTRY);
+      if (JunkCount > 0)
+      {
+         lexical_warning ("%d characters of junk seen at toplevel", JunkCount);
+         JunkCount = 0;
+      }
+   }
+   else
+   {
+   /* internal_error ("lexer recognized \"@\" at other than top-level"); */
+      lexical_warning ("\"@\" in strange place -- should get syntax error");
+   }
+   report_state ("at_sign");
+}
+
+
+void toplevel_junk (void)
+{
+   JunkCount += strlen (zzlextext);
+   zzskip ();
+}
+
+
+void name (void)
+{
+   report_state ("name (pre)");
+
+   switch (EntryState)
+   {
+      case toplevel:
+      {
+         internal_error ("junk at toplevel (\"%s\")", zzlextext); 
+         break;
+      }
+      case after_at: 
+      {
+         char * etype = zzlextext;
+         EntryState = after_type;
+
+         if (strcasecmp (etype, "comment") == 0)
+         {
+            EntryMetatype = BTE_COMMENT;
+            EntryState = in_comment;
+         }
+
+         else if (strcasecmp (etype, "preamble") == 0)
+            EntryMetatype = BTE_PREAMBLE;
+
+         else if (strcasecmp (etype, "string") == 0)
+            EntryMetatype = BTE_MACRODEF;
+/*
+         else if (strcasecmp (etype, "alias") == 0)
+            EntryMetatype = BTE_ALIAS;
+
+         else if (strcasecmp (etype, "modify") == 0)
+            EntryMetatype = BTE_MODIFY;
+*/
+         else
+            EntryMetatype = BTE_REGULAR;
+
+         break;
+      }
+      case after_type:
+      case in_comment:
+      case in_entry:
+         break;                         /* do nothing */
+   }
+
+   report_state ("name (post)");
+
+}
+
+
+void lbrace (void)
+{
+   /* 
+    * Currently takes a restrictive view of "when an lbrace is an entry
+    * opener" -- ie. *only* after '@name' (as determined by EntryState),
+    * where name is not 'comment'.  This means that lbrace usually
+    * determines a string (in particular, when it's seen at toplevel --
+    * which will happen under certain error situations), which in turn
+    * means that some unexpected things can become strings (like whole
+    * entries).
+    */
+
+   if (EntryState == in_entry || EntryState == in_comment)
+   {
+      start_string ('{');
+   }
+   else if (EntryState == after_type)
+   {
+      EntryState = in_entry;
+      EntryOpener = '{';
+      NLA = ENTRY_OPEN;
+   }
+   else
+   {
+      lexical_warning ("\"{\" in strange place -- should get a syntax error");
+   }
+
+   report_state ("lbrace");
+}
+
+
+void rbrace (void)
+{
+   if (EntryState == in_entry)
+   {
+      if (EntryOpener == '(')
+         lexical_warning ("entry started with \"(\", but ends with \"}\"");
+      NLA = ENTRY_CLOSE;
+      initialize_lexer_state ();
+   }
+   else
+   {
+      lexical_warning ("\"}\" in strange place -- should get a syntax error");
+   }
+   report_state ("rbrace");
+}
+
+
+void lparen (void)
+{
+   if (EntryState == in_comment)
+   {
+      start_string ('(');
+   }
+   else if (EntryState == after_type)
+   {
+      EntryState = in_entry;
+      EntryOpener = '(';
+   }
+   else
+   {
+      lexical_warning ("\"(\" in strange place -- should get a syntax error");
+   }
+   report_state ("lparen");
+}
+
+
+void rparen (void)
+{
+   if (EntryState == in_entry)
+   {
+      if (EntryOpener == '{')
+         lexical_warning ("entry started with \"{\", but ends with \")\"");
+      initialize_lexer_state ();
+   }
+   else
+   {
+      lexical_warning ("\")\" in strange place -- should get a syntax error");
+   }
+   report_state ("rparen");
+}
+
+
+/* ----------------------------------------------------------------------
+ * Stuff for processing strings.
+ */
+
+
+/*
+ * start_string ()
+ *
+ * Called when we see a '{' or '"' in the field data.  Records which quote
+ * character was used, and calls open_brace() to increment the depth
+ * counter if it was a '{'.  Switches to LEX_STRING mode, and tells the
+ * lexer to continue slurping characters into the same buffer.
+ */
+void start_string (char start_char)
+{
+   StringOpener = start_char;
+   BraceDepth = 0;
+   ParenDepth = 0;
+   StringStart = zzline;
+   ApparentRunaway = 0;
+   QuoteWarned = 0;
+   if (start_char == '{')
+      open_brace ();
+   if (start_char == '(')
+      ParenDepth++;
+   if (start_char == '"' && EntryState == in_comment)
+   {
+      lexical_error ("comment entries must be delimited by either braces or parentheses");
+      EntryState = toplevel;
+      zzmode (START);
+      return;
+   }
+
+#ifdef USER_ZZMODE_STACK
+   if (zzauto != LEX_ENTRY || EntryState != in_entry)
+#else
+   if (EntryState != in_entry && EntryState != in_comment)
+#endif
+   {
+      lexical_warning ("start of string seen at weird place");
+   }
+
+   zzmore ();
+   zzmode (LEX_STRING);
+}
+
+
+/*
+ * end_string ()
+ *
+ * Called when we see either a '"' (at depth 0) or '}' (if it brings us
+ * down to depth 0) in a quoted string.  Just makes sure that braces are
+ * balanced, and then goes back to the LEX_FIELD mode.
+ */
+void end_string (char end_char)
+{
+   char   match;
+
+#ifndef ALLOW_WARNINGS
+   match = (char) 0;                    /* silence "might be used" */
+                                        /* uninitialized" warning */
+#endif
+
+   switch (end_char)
+   {
+      case '}': match = '{'; break;
+      case ')': match = '('; break;
+      case '"': match = '"'; break;
+      default: 
+         internal_error ("end_string(): invalid end_char \"%c\"", end_char);
+   }
+
+   assert (StringOpener == match);
+
+   /*
+    * If we're at non-zero BraceDepth, that probably means mismatched braces
+    * somewhere -- complain about it and reset BraceDepth to minimize future
+    * confusion.
+    */
+
+   if (BraceDepth > 0)
+   {
+      lexical_error ("unbalanced braces: too many {'s");
+      BraceDepth = 0;
+   }
+
+   StringOpener = (char) 0;
+   StringStart = -1;
+   NLA = STRING;
+
+   if (EntryState == in_comment)
+   {
+      int   len = strlen (zzlextext);
+
+      /* 
+       * ARG! no, this is wrong -- what if unbalanced braces in the string 
+       * and we try to output put it later? 
+       *
+       * ARG! again, this is no more wrong than when we strip quotes in
+       * post_parse.c, and blithely assume that we can put them back on
+       * later for output in BibTeX syntax.  Hmmm.
+       *
+       * Actually, it looks like this isn't a problem after all: you
+       * can't have unbalanced braces in a BibTeX string (at least
+       * not as parsed by btparse).
+       */
+
+      if (zzlextext[0] == '(')          /* convert to standard quote delims */
+      {
+         zzlextext[    0] = '{';
+         zzlextext[len-1] = '}';
+      }
+
+      EntryState = toplevel;
+      zzmode (START);
+   }
+   else
+   {
+      zzmode (LEX_ENTRY);
+   }
+      
+   report_state ("string");
+}
+
+
+/*
+ * open_brace ()
+ * 
+ * Called when we see a '{', either to start a string (in which case 
+ * it's called from start_string()) or inside a string (called directly
+ * from the lexer).
+ */
+void open_brace (void)
+{
+   BraceDepth++;
+   zzmore ();
+   report_state ("open_brace");
+}
+
+
+/*
+ * close_brace ()
+ *
+ * Called when we see a '}' inside a string.  Decrements the depth counter
+ * and checks to see if we are down to depth 0, in which case the string is
+ * ended and the current lookahead token is set to STRING.  Otherwise,
+ * just tells the lexer to keep slurping characters into the buffer.
+ */
+void close_brace (void)
+{
+   BraceDepth--;
+   if (StringOpener == '{' && BraceDepth == 0)
+   {
+      end_string ('}');
+   }
+
+   /* 
+    * This could happen if some bonehead puts an unmatched right-brace
+    * in a quote-delimited string (eg. "Hello}").  To attempt to recover,
+    * we reset the depth to zero and continue slurping into the string.
+    */
+   else if (BraceDepth < 0)
+   {
+      lexical_error ("unbalanced braces: too many }'s");
+      BraceDepth = 0;
+      zzmore ();
+   }
+
+   /* Otherwise, it's just any old right brace in a string -- keep eating */
+   else
+   {
+      zzmore ();
+   }
+   report_state ("close_brace");
+}
+
+
+void lparen_in_string (void)
+{
+   ParenDepth++;
+   zzmore ();
+}
+
+
+void rparen_in_string (void)
+{
+   ParenDepth--;
+   if (StringOpener == '(' && ParenDepth == 0)
+   {
+      end_string (')');
+   }
+   else
+   {
+      zzmore ();
+   }
+}
+
+
+/* 
+ * quote_in_string ()
+ * 
+ * Called when we see '"' in a string.  Ends the string if the quote is at
+ * depth 0 and the string was started with a quote, otherwise instructs the
+ * lexer to continue munching happily along.  (Also prints a warning,
+ * assuming that input is destined for processing by TeX and you really
+ * want either `` or '' rather than ".)
+ */
+void quote_in_string (void)
+{
+   if (StringOpener == '"' && BraceDepth == 0)
+   {
+      end_string ('"');
+   }
+   else
+   {
+      boolean at_top = FALSE;;
+
+      /* 
+       * Note -- this warning assumes that strings are destined 
+       * to be processed by TeX, so it should be optional.  Hmmm.
+       */
+
+      if (StringOpener == '"' || StringOpener == '(')
+         at_top = (BraceDepth == 0);
+      else if (StringOpener == '{')
+         at_top = (BraceDepth == 1);
+      else
+         internal_error ("Illegal string opener \"%c\"", StringOpener);
+
+      if (!QuoteWarned && at_top)
+      {
+         lexical_warning ("found \" at brace-depth zero in string "
+                          "(TeX accents in BibTeX should be inside braces)");
+         QuoteWarned = 1;
+      }
+      zzmore ();
+   }
+}
+
+
+/*
+ * check_runaway_string ()
+ *
+ * Called from the lexer whenever we see a newline in a string.  See 
+ * bibtex.g for a detailed explanation; basically, this function
+ * looks for an entry start ("@name{") or new field ("name=") immediately
+ * after a newline (with possible whitespace).  This is a heuristic 
+ * check for runaway strings, under the assumption that text that looks
+ * like a new entry or new field won't actually occur inside a string
+ * very often.
+ */
+void check_runaway_string (void)
+{
+   int      len;
+   int      i;
+
+   /* 
+    * could these be made significantly more efficient by a 256-element
+    * lookup table instead of calling strchr()?
+    */
+   static const char *alpha_chars = "abcdefghijklmnopqrstuvwxyz";
+   static const char *name_chars = "abcdefghijklmnopqrstuvwxyz0123456789:+/'.-";
+
+   /* 
+    * on entry: zzlextext contains the whole string, starting with {
+    * and with newlines/tabs converted to space; zzbegexpr points to
+    * a chunk of the string starting with newline (newlines and 
+    * tabs have not yet been converted)
+    */
+
+#if DEBUG > 1
+   printf ("check_runaway_string(): zzline=%d\n", zzline);
+   printf ("zzlextext=>%s<\nzzbegexpr=>%s<\n", 
+           zzlextext, zzbegexpr);
+#endif
+      
+
+   /* 
+    * increment zzline to take the leading newline into account -- but
+    * first a sanity check to be sure that newline is there!
+    */
+
+   if (zzbegexpr[0] != '\n')
+   {
+      lexical_warning ("huh? something's wrong (buffer overflow?) near "
+                       "offset %d (line %d)", zzendcol, zzline);
+   /* internal_error ("zzbegexpr (line %d, offset %d-%d, "
+                      "text >%s<, expr >%s<)"
+                      "should start with a newline",
+                      zzline, zzbegcol, zzendcol, zzlextext, zzbegexpr);
+   */
+   }
+   else
+   {
+      zzline++;
+   }
+
+   /* standardize whitespace (convert all to space) */
+
+   len = strlen (zzbegexpr);
+   for (i = 0; i < len; i++)
+   {
+      if (isspace (zzbegexpr[i]))
+         zzbegexpr[i] = ' ';
+   }
+   
+
+   if (!ApparentRunaway)                /* haven't already warned about it */
+   {
+      enum { none, entry, field, giveup } guess;
+
+      i = 1;
+      guess = none;
+      while (i < len && zzbegexpr[i] == ' ') i++;
+
+      if (zzbegexpr[i] == '@')
+      {
+         i++;
+         while (i < len && zzbegexpr[i] == ' ') i++;
+         guess = entry;
+      }
+
+      if (strchr (alpha_chars, tolower (zzbegexpr[i])) != NULL)
+      {
+         while (i < len && strchr (name_chars, tolower (zzbegexpr[i])) != NULL)
+            i++;
+         while (i < len && zzbegexpr[i] == ' ') i++;
+         if (i == len)
+         {
+            guess = giveup;
+         }
+         else
+         {
+            if (guess == entry)
+            {
+               if (zzbegexpr[i] != '{' && zzbegexpr[i] != '(')
+                  guess = giveup;
+            }
+            else                        /* assume it's a field */
+            {
+               if (zzbegexpr[i] == '=')
+                  guess = field;
+               else
+                  guess = giveup;
+            }               
+         }
+      }
+      else                              /* no name seen after WS or @ */
+      {
+         guess = giveup;
+      }
+
+      if (guess == none)
+         internal_error ("gee, I should have made a guess by now");
+
+      if (guess != giveup)
+      {
+         lexical_warning ("possible runaway string started at line %d", 
+                          StringStart);
+         ApparentRunaway = 1;
+      }
+   }
+
+   zzmore();
+}
+