#include <config.h>
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <sys/types.h>
#include <stddef.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <locale.h>
#define STREQ(a, b) (strcmp (a, b) == 0)
#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9)
#include "gettext.h"
#define _(str) gettext (str)
#include "mbsupport.h"  
#include <wchar.h>
#include <wctype.h>
#if HAVE_LANGINFO_CODESET
# include <langinfo.h>
#endif
#include "regex.h"
#include "dfa.h"
#include "hard-locale.h"
#include "xalloc.h"
#ifdef setbit
# undef setbit
#endif
#ifdef clrbit
# undef clrbit
#endif
#ifndef CHARBITS
# define CHARBITS 8
#endif
#define NOTCHAR (1 << CHARBITS)
#ifndef INTBITS
# define INTBITS (CHARBITS * sizeof (int))
#endif
#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)
typedef int charclass[CHARCLASS_INTS];
#define MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
  ((constraint) & 1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4))
#define MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
  ((constraint) & 1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0)))
#define SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
  (MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn)		     \
   && MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
#define PREV_NEWLINE_DEPENDENT(constraint) \
  (((constraint) & 0xc0) >> 2 != ((constraint) & 0x30))
#define PREV_LETTER_DEPENDENT(constraint) \
  (((constraint) & 0x0c) >> 2 != ((constraint) & 0x03))
#define NO_CONSTRAINT 0xff
#define BEGLINE_CONSTRAINT 0xcf
#define ENDLINE_CONSTRAINT 0xaf
#define BEGWORD_CONSTRAINT 0xf2
#define ENDWORD_CONSTRAINT 0xf4
#define LIMWORD_CONSTRAINT 0xf6
#define NOTLIMWORD_CONSTRAINT 0xf9
typedef enum
{
  END = -1,			
  EMPTY = NOTCHAR,		
  BACKREF,			
  BEGLINE,			
  ENDLINE,			
  BEGWORD,			
  ENDWORD,			
  LIMWORD,			
  NOTLIMWORD,			
  QMARK,			
  STAR,				
  PLUS,				
  REPMN,			
  CAT,				
  OR,				
  LPAREN,			
  RPAREN,			
  ANYCHAR,                     
  MBCSET,			
  WCHAR,			
  CSET				
} token;
typedef struct
{
  unsigned int index;		
  unsigned int constraint;	
} position;
typedef struct
{
  position *elems;		
  int nelem;			
} position_set;
typedef struct
{
  int hash;			
  position_set elems;		
  char newline;			
  char letter;			
  char backref;			
  unsigned char constraint;	
  int first_end;		
#if MBS_SUPPORT
  position_set mbps;           
#endif
} dfa_state;
#if MBS_SUPPORT
struct mb_char_classes
{
  int cset;
  int invert;
  wchar_t *chars;		
  int nchars;
  wctype_t *ch_classes;		
  int nch_classes;
  wchar_t *range_sts;		
  wchar_t *range_ends;		
  int nranges;
  char **equivs;		
  int nequivs;
  char **coll_elems;
  int ncoll_elems;		
};
#endif
struct dfa
{
  charclass *charclasses;	
  int cindex;			
  int calloc;			
  token *tokens;		
  int tindex;			
  int talloc;			
  int depth;			
  int nleaves;			
  int nregexps;			
  unsigned int mb_cur_max;	
  int utf8_anychar_classes[5];	
  int nmultibyte_prop;
  int *multibyte_prop;
  struct mb_char_classes *mbcsets;
  int nmbcsets;
  int mbcsets_alloc;
  dfa_state *states;		
  int sindex;			
  int salloc;			
  position_set *follows;	
  int searchflag;		
  int tralloc;			
  int trcount;			
  int **trans;			
  int **realtrans;		
  int **fails;			
  int *success;			
  int *newlines;		
  struct dfamust *musts;	
};
#define ACCEPTING(s, r) ((r).states[s].constraint)
#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, dfa) \
  SUCCEEDS_IN_CONTEXT((dfa).states[state].constraint,		   \
                       prevn, currn, prevl, currl)
static void dfamust (struct dfa *dfa);
static void regexp (void);
#define CALLOC(p, t, n) ((p) = xcalloc((size_t)(n), sizeof (t)))
#define MALLOC(p, t, n) ((p) = xmalloc((n) * sizeof (t)))
#define REALLOC(p, t, n) ((p) = xrealloc((p), (n) * sizeof (t)))
#define REALLOC_IF_NECESSARY(p, t, nalloc, index) \
  if ((index) >= (nalloc))			  \
    {						  \
      do					  \
        (nalloc) *= 2;				  \
      while ((index) >= (nalloc));		  \
      REALLOC(p, t, nalloc);			  \
    }
#ifdef DEBUG
static void
prtok (token t)
{
  char const *s;
  if (t < 0)
    fprintf(stderr, "END");
  else if (t < NOTCHAR)
    fprintf(stderr, "%c", t);
  else
    {
      switch (t)
        {
        case EMPTY: s = "EMPTY"; break;
        case BACKREF: s = "BACKREF"; break;
        case BEGLINE: s = "BEGLINE"; break;
        case ENDLINE: s = "ENDLINE"; break;
        case BEGWORD: s = "BEGWORD"; break;
        case ENDWORD: s = "ENDWORD"; break;
        case LIMWORD: s = "LIMWORD"; break;
        case NOTLIMWORD: s = "NOTLIMWORD"; break;
        case QMARK: s = "QMARK"; break;
        case STAR: s = "STAR"; break;
        case PLUS: s = "PLUS"; break;
        case CAT: s = "CAT"; break;
        case OR: s = "OR"; break;
        case LPAREN: s = "LPAREN"; break;
        case RPAREN: s = "RPAREN"; break;
#if MBS_SUPPORT
        case ANYCHAR: s = "ANYCHAR"; break;
        case MBCSET: s = "MBCSET"; break;
#endif 
        default: s = "CSET"; break;
        }
      fprintf(stderr, "%s", s);
    }
}
#endif 
static int
tstbit (unsigned int b, charclass const c)
{
  return c[b / INTBITS] & 1 << b % INTBITS;
}
static void
setbit (unsigned int b, charclass c)
{
  c[b / INTBITS] |= 1 << b % INTBITS;
}
static void
clrbit (unsigned int b, charclass c)
{
  c[b / INTBITS] &= ~(1 << b % INTBITS);
}
static void
copyset (charclass const src, charclass dst)
{
  memcpy (dst, src, sizeof (charclass));
}
static void
zeroset (charclass s)
{
  memset (s, 0, sizeof (charclass));
}
static void
notset (charclass s)
{
  int i;
  for (i = 0; i < CHARCLASS_INTS; ++i)
    s[i] = ~s[i];
}
static int
equal (charclass const s1, charclass const s2)
{
  return memcmp (s1, s2, sizeof (charclass)) == 0;
}
static struct dfa *dfa;
static int
charclass_index (charclass const s)
{
  int i;
  for (i = 0; i < dfa->cindex; ++i)
    if (equal(s, dfa->charclasses[i]))
      return i;
  REALLOC_IF_NECESSARY(dfa->charclasses, charclass, dfa->calloc, dfa->cindex);
  ++dfa->cindex;
  copyset(s, dfa->charclasses[i]);
  return i;
}
static reg_syntax_t syntax_bits, syntax_bits_set;
static int case_fold;
static unsigned char eolbyte;
void
dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
{
  syntax_bits_set = 1;
  syntax_bits = bits;
  case_fold = fold;
  eolbyte = eol;
}
static void
setbit_case_fold (
#if MBS_SUPPORT
                  wint_t b,
#else
                  unsigned int b,
#endif
                  charclass c)
{
  if (case_fold)
    {
#if MBS_SUPPORT
      if (MB_CUR_MAX > 1)
        {
          wint_t b1 = iswupper(b) ? towlower(b) : b;
          wint_t b2 = iswlower(b) ? towupper(b) : b;
          if (wctob ((unsigned char)b1) == b1)
            setbit (b1, c);
          if (b2 != b1 && wctob ((unsigned char)b2) == b2)
            setbit (b2, c);
        }
      else
#endif
        {
          unsigned char b1 = isupper(b) ? tolower(b) : b;
          unsigned char b2 = islower(b) ? toupper(b) : b;
          setbit (b1, c);
          if (b2 != b1)
            setbit (b2, c);
        }
    }
  else
    {
#if MBS_SUPPORT
      if (wctob ((unsigned char)b) == b)
#endif
        setbit (b, c);
    }
}
static inline int
using_utf8 (void)
{
  static int utf8 = -1;
  if (utf8 == -1)
    {
#if defined HAVE_LANGINFO_CODESET && defined MBS_SUPPORT
      utf8 = (STREQ (nl_langinfo (CODESET), "UTF-8"));
#else
      utf8 = 0;
#endif
    }
  return utf8;
}
static char const *lexptr;	
static int lexleft;		
static token lasttok;		
static int laststart;		
static int parens;		
static int minrep, maxrep;	
static int hard_LC_COLLATE;	
static int cur_mb_len = 1;	
#if MBS_SUPPORT
static mbstate_t mbs;		
static wchar_t wctok;		
static unsigned char *mblen_buf;
static wchar_t *inputwcs;	
static unsigned char const *buf_begin;	
static unsigned char const *buf_end;	
#endif 
#if MBS_SUPPORT
# define FETCH_WC(c, wc, eoferr)		\
  do {						\
    if (! lexleft)				\
      {						\
        if ((eoferr) != 0)			\
          dfaerror (eoferr);			\
        else					\
          return lasttok = END;			\
      }						\
    else					\
      {						\
        wchar_t _wc;				\
        cur_mb_len = mbrtowc(&_wc, lexptr, lexleft, &mbs); \
        if (cur_mb_len <= 0)			\
          {					\
            cur_mb_len = 1;			\
            --lexleft;				\
            (wc) = (c) = (unsigned char) *lexptr++; \
          }					\
        else					\
          {					\
            lexptr += cur_mb_len;		\
            lexleft -= cur_mb_len;		\
            (wc) = _wc;				\
            (c) = wctob(wc);			\
          }					\
      }						\
  } while(0)
# define FETCH(c, eoferr)			\
  do {						\
    wint_t wc;					\
    FETCH_WC(c, wc, eoferr);			\
  } while(0)
#else
# define FETCH(c, eoferr)	      \
  do {				      \
    if (! lexleft)		      \
      {				      \
        if ((eoferr) != 0)	      \
          dfaerror (eoferr);	      \
        else			      \
          return lasttok = END;	      \
      }				      \
    (c) = (unsigned char) *lexptr++;  \
    --lexleft;			      \
  } while(0)
# define FETCH_WC(c, unused, eoferr) FETCH (c, eoferr)
#endif 
static int
in_coll_range (char ch, char from, char to)
{
  char c[6] = { from, 0, ch, 0, to, 0 };
  return strcoll (&c[0], &c[2]) <= 0 && strcoll (&c[2], &c[4]) <= 0;
}
typedef int predicate (int);
struct dfa_ctype {
  const char *name;
  predicate *func;
  bool single_byte_only;
};
static const struct dfa_ctype prednames[] = {
  { "alpha", isalpha, false },
  { "upper", isupper, false },
  { "lower", islower, false },
  { "digit", isdigit, true },
  { "xdigit", isxdigit, true },
  { "space", isspace, false },
  { "punct", ispunct, false },
  { "alnum", isalnum, false },
  { "print", isprint, false },
  { "graph", isgraph, false },
  { "cntrl", iscntrl, false },
  { "blank", isblank, false },
  { NULL, NULL, false }
};
static const struct dfa_ctype *
find_pred (const char *str)
{
  unsigned int i;
  for (i = 0; prednames[i].name; ++i)
    if (STREQ (str, prednames[i].name))
      break;
  return &prednames[i];
}
static token
parse_bracket_exp (void)
{
  int invert;
  int c, c1, c2;
  charclass ccl;
  int colon_warning_state;
#if MBS_SUPPORT
  wint_t wc, wc1, wc2;
  struct mb_char_classes *work_mbc;
  int chars_al, range_sts_al, range_ends_al, ch_classes_al,
    equivs_al, coll_elems_al;
  chars_al = 1;
  range_sts_al = range_ends_al = 0;
  ch_classes_al = equivs_al = coll_elems_al = 0;
  if (MB_CUR_MAX > 1)
    {
      REALLOC_IF_NECESSARY(dfa->mbcsets, struct mb_char_classes,
                           dfa->mbcsets_alloc, dfa->nmbcsets + 1);
      work_mbc = &(dfa->mbcsets[dfa->nmbcsets++]);
      memset (work_mbc, 0, sizeof *work_mbc);
    }
  else
    work_mbc = NULL;
#endif
  memset (ccl, 0, sizeof ccl);
  FETCH_WC (c, wc, _("unbalanced ["));
  if (c == '^')
    {
      FETCH_WC (c, wc, _("unbalanced ["));
      invert = 1;
    }
  else
    invert = 0;
  colon_warning_state = (c == ':');
  do
    {
      c1 = EOF; 
      colon_warning_state &= ~2;
      if (c == '[' && (syntax_bits & RE_CHAR_CLASSES))
        {
#define BRACKET_BUFFER_SIZE 128
          char str[BRACKET_BUFFER_SIZE];
          FETCH_WC (c1, wc1, _("unbalanced ["));
          if (c1 == ':'
#if MBS_SUPPORT
              || (MB_CUR_MAX > 1 && (c1 == '.' || c1 == '='))
#endif
              )
            {
              size_t len = 0;
              for (;;)
                {
                  FETCH_WC (c, wc, _("unbalanced ["));
                  if ((c == c1 && *lexptr == ']') || lexleft == 0)
                    break;
                  if (len < BRACKET_BUFFER_SIZE)
                    str[len++] = c;
                  else
                    str[0] = '\0';
                }
              str[len] = '\0';
              FETCH_WC (c, wc, _("unbalanced ["));
              if (c1 == ':')
                {
                  char const *class
                    = (case_fold && (STREQ  (str, "upper")
                                     || STREQ  (str, "lower"))
                                       ? "alpha"
                                       : str);
                  const struct dfa_ctype *pred = find_pred (class);
                  if (!pred)
                    dfaerror(_("invalid character class"));
#if MBS_SUPPORT
                  if (MB_CUR_MAX > 1 && !pred->single_byte_only)
                    {
                      wctype_t wt = wctype (class);
                      if (ch_classes_al == 0)
                        MALLOC(work_mbc->ch_classes, wctype_t, ++ch_classes_al);
                      REALLOC_IF_NECESSARY(work_mbc->ch_classes, wctype_t,
                                           ch_classes_al,
                                           work_mbc->nch_classes + 1);
                      work_mbc->ch_classes[work_mbc->nch_classes++] = wt;
                    }
#endif
                  for (c2 = 0; c2 < NOTCHAR; ++c2)
                    if (pred->func(c2))
                      setbit_case_fold (c2, ccl);
                }
#if MBS_SUPPORT
              else if (c1 == '=' || c1 == '.')
                {
                  char *elem;
                  MALLOC(elem, char, len + 1);
                  strncpy(elem, str, len + 1);
                  if (c1 == '=')
                    {
                      if (equivs_al == 0)
                        MALLOC(work_mbc->equivs, char*, ++equivs_al);
                      REALLOC_IF_NECESSARY(work_mbc->equivs, char*,
                                           equivs_al,
                                           work_mbc->nequivs + 1);
                      work_mbc->equivs[work_mbc->nequivs++] = elem;
                    }
                  if (c1 == '.')
                    {
                      if (coll_elems_al == 0)
                        MALLOC(work_mbc->coll_elems, char*, ++coll_elems_al);
                      REALLOC_IF_NECESSARY(work_mbc->coll_elems, char*,
                                           coll_elems_al,
                                           work_mbc->ncoll_elems + 1);
                      work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem;
                    }
                }
#endif
              colon_warning_state |= 8;
              FETCH_WC (c1, wc1, _("unbalanced ["));
              continue;
            }
        }
      if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
        FETCH_WC(c, wc, _("unbalanced ["));
      if (c1 == EOF)
        FETCH_WC(c1, wc1, _("unbalanced ["));
      if (c1 == '-')
        {
          FETCH_WC(c2, wc2, _("unbalanced ["));
          if (c2 == ']')
            {
              lexptr -= cur_mb_len;
              lexleft += cur_mb_len;
            }
        }
      if (c1 == '-' && c2 != ']')
        {
          if (c2 == '\\'
              && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
            FETCH_WC(c2, wc2, _("unbalanced ["));
#if MBS_SUPPORT
          if (MB_CUR_MAX > 1)
            {
              if (range_sts_al == 0)
                {
                  MALLOC(work_mbc->range_sts, wchar_t, ++range_sts_al);
                  MALLOC(work_mbc->range_ends, wchar_t, ++range_ends_al);
                }
              REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
                                   range_sts_al, work_mbc->nranges + 1);
              REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
                                   range_ends_al, work_mbc->nranges + 1);
              work_mbc->range_sts[work_mbc->nranges] =
                case_fold ? towlower(wc) : (wchar_t)wc;
              work_mbc->range_ends[work_mbc->nranges++] =
                case_fold ? towlower(wc2) : (wchar_t)wc2;
#ifndef GREP
              if (case_fold && (iswalpha(wc) || iswalpha(wc2)))
                {
                  REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
                                       range_sts_al, work_mbc->nranges + 1);
                  work_mbc->range_sts[work_mbc->nranges] = towupper(wc);
                  REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
                                       range_ends_al, work_mbc->nranges + 1);
                  work_mbc->range_ends[work_mbc->nranges++] = towupper(wc2);
                }
#endif
            }
          else
#endif
            {
              c1 = c;
              if (case_fold)
                {
                  c1 = tolower (c1);
                  c2 = tolower (c2);
                }
              if (!hard_LC_COLLATE)
                for (c = c1; c <= c2; c++)
                  setbit_case_fold (c, ccl);
              else
                for (c = 0; c < NOTCHAR; ++c)
                  if (!(case_fold && isupper (c))
                      && in_coll_range (c, c1, c2))
                    setbit_case_fold (c, ccl);
            }
          colon_warning_state |= 8;
          FETCH_WC(c1, wc1, _("unbalanced ["));
          continue;
        }
      colon_warning_state |= (c == ':') ? 2 : 4;
#if MBS_SUPPORT
      setbit_case_fold (wc, ccl);
      if (MB_CUR_MAX > 1)
        {
          if (case_fold && iswalpha(wc))
            {
              wc = towlower(wc);
              c = wctob(wc);
              if (c == EOF || (wint_t)c != (wint_t)wc)
                {
                  REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al,
                                       work_mbc->nchars + 1);
                  work_mbc->chars[work_mbc->nchars++] = wc;
                }
#ifdef GREP
              continue;
#else
              wc = towupper(wc);
              c = wctob(wc);
#endif
            }
          if (c == EOF || (wint_t)c != (wint_t)wc)
            {
              REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al,
                                   work_mbc->nchars + 1);
              work_mbc->chars[work_mbc->nchars++] = wc;
            }
        }
#else
      setbit_case_fold (c, ccl);
#endif
    }
  while ((
#if MBS_SUPPORT
         wc = wc1,
#endif
         (c = c1) != ']'));
  if (colon_warning_state == 7)
    dfawarn (_("character class syntax is [[:space:]], not [:space:]"));
#if MBS_SUPPORT
  if (MB_CUR_MAX > 1
      && (!using_utf8()
          || invert
          || work_mbc->nchars != 0
          || work_mbc->nch_classes != 0
          || work_mbc->nranges != 0
          || work_mbc->nequivs != 0
          || work_mbc->ncoll_elems != 0))
    {
      static charclass zeroclass;
      work_mbc->invert = invert;
      work_mbc->cset = equal(ccl, zeroclass) ? -1 : charclass_index(ccl);
      return MBCSET;
    }
#endif
  if (invert)
    {
#if MBS_SUPPORT
      assert(MB_CUR_MAX == 1);
#endif
      notset(ccl);
      if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
        clrbit(eolbyte, ccl);
    }
  return CSET + charclass_index(ccl);
}
#define IS_WORD_CONSTITUENT(C) (isalnum(C) || (C) == '_')
static token
lex (void)
{
  unsigned int c, c2;
  int backslash = 0;
  charclass ccl;
  int i;
  for (i = 0; i < 2; ++i)
    {
#if MBS_SUPPORT
      if (MB_CUR_MAX > 1)
        {
          FETCH_WC (c, wctok, NULL);
          if ((int)c == EOF)
            goto normal_char;
        }
      else
#endif 
        FETCH(c, NULL);
      switch (c)
        {
        case '\\':
          if (backslash)
            goto normal_char;
          if (lexleft == 0)
            dfaerror(_("unfinished \\ escape"));
          backslash = 1;
          break;
        case '^':
          if (backslash)
            goto normal_char;
          if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
              || lasttok == END
              || lasttok == LPAREN
              || lasttok == OR)
            return lasttok = BEGLINE;
          goto normal_char;
        case '$':
          if (backslash)
            goto normal_char;
          if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
              || lexleft == 0
              || (syntax_bits & RE_NO_BK_PARENS
                  ? lexleft > 0 && *lexptr == ')'
                  : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == ')')
              || (syntax_bits & RE_NO_BK_VBAR
                  ? lexleft > 0 && *lexptr == '|'
                  : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == '|')
              || ((syntax_bits & RE_NEWLINE_ALT)
                  && lexleft > 0 && *lexptr == '\n'))
            return lasttok = ENDLINE;
          goto normal_char;
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
          if (backslash && !(syntax_bits & RE_NO_BK_REFS))
            {
              laststart = 0;
              return lasttok = BACKREF;
            }
          goto normal_char;
        case '`':
          if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
            return lasttok = BEGLINE;	
          goto normal_char;
        case '\'':
          if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
            return lasttok = ENDLINE;	
          goto normal_char;
        case '<':
          if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
            return lasttok = BEGWORD;
          goto normal_char;
        case '>':
          if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
            return lasttok = ENDWORD;
          goto normal_char;
        case 'b':
          if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
            return lasttok = LIMWORD;
          goto normal_char;
        case 'B':
          if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
            return lasttok = NOTLIMWORD;
          goto normal_char;
        case '?':
          if (syntax_bits & RE_LIMITED_OPS)
            goto normal_char;
          if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
            goto normal_char;
          if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
            goto normal_char;
          return lasttok = QMARK;
        case '*':
          if (backslash)
            goto normal_char;
          if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
            goto normal_char;
          return lasttok = STAR;
        case '+':
          if (syntax_bits & RE_LIMITED_OPS)
            goto normal_char;
          if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
            goto normal_char;
          if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
            goto normal_char;
          return lasttok = PLUS;
        case '{':
          if (!(syntax_bits & RE_INTERVALS))
            goto normal_char;
          if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0))
            goto normal_char;
          if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
            goto normal_char;
          if (syntax_bits & RE_NO_BK_BRACES)
            {
              int lo = -1, hi = -1;
              char const *p = lexptr;
              char const *lim = p + lexleft;
              for (;  p != lim && ISASCIIDIGIT (*p);  p++)
                lo = (lo < 0 ? 0 : lo * 10) + *p - '0';
              if (p != lim && *p == ',')
                while (++p != lim && ISASCIIDIGIT (*p))
                  hi = (hi < 0 ? 0 : hi * 10) + *p - '0';
              else
                hi = lo;
              if (p == lim || *p != '}'
                  || lo < 0 || RE_DUP_MAX < hi || (0 <= hi && hi < lo))
                goto normal_char;
            }
          minrep = 0;
          FETCH(c, _("unfinished repeat count"));
          if (ISASCIIDIGIT (c))
            {
              minrep = c - '0';
              for (;;)
                {
                  FETCH(c, _("unfinished repeat count"));
                  if (! ISASCIIDIGIT (c))
                    break;
                  minrep = 10 * minrep + c - '0';
                }
            }
          else
            dfaerror(_("malformed repeat count"));
          if (c == ',')
            {
              FETCH (c, _("unfinished repeat count"));
              if (! ISASCIIDIGIT (c))
                maxrep = -1;
              else
                {
                  maxrep = c - '0';
                  for (;;)
                    {
                      FETCH (c, _("unfinished repeat count"));
                      if (! ISASCIIDIGIT (c))
                        break;
                      maxrep = 10 * maxrep + c - '0';
                    }
                  if (0 <= maxrep && maxrep < minrep)
                    dfaerror (_("malformed repeat count"));
                }
            }
          else
            maxrep = minrep;
          if (!(syntax_bits & RE_NO_BK_BRACES))
            {
              if (c != '\\')
                dfaerror(_("malformed repeat count"));
              FETCH(c, _("unfinished repeat count"));
            }
          if (c != '}')
            dfaerror(_("malformed repeat count"));
          laststart = 0;
          return lasttok = REPMN;
        case '|':
          if (syntax_bits & RE_LIMITED_OPS)
            goto normal_char;
          if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0))
            goto normal_char;
          laststart = 1;
          return lasttok = OR;
        case '\n':
          if (syntax_bits & RE_LIMITED_OPS
              || backslash
              || !(syntax_bits & RE_NEWLINE_ALT))
            goto normal_char;
          laststart = 1;
          return lasttok = OR;
        case '(':
          if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
            goto normal_char;
          ++parens;
          laststart = 1;
          return lasttok = LPAREN;
        case ')':
          if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
            goto normal_char;
          if (parens == 0 && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
            goto normal_char;
          --parens;
          laststart = 0;
          return lasttok = RPAREN;
        case '.':
          if (backslash)
            goto normal_char;
#if MBS_SUPPORT
          if (MB_CUR_MAX > 1)
            {
              laststart = 0;
              return lasttok = ANYCHAR;
            }
#endif 
          zeroset(ccl);
          notset(ccl);
          if (!(syntax_bits & RE_DOT_NEWLINE))
            clrbit(eolbyte, ccl);
          if (syntax_bits & RE_DOT_NOT_NULL)
            clrbit('\0', ccl);
          laststart = 0;
          return lasttok = CSET + charclass_index(ccl);
#ifndef GAWK
        case 's':
        case 'S':
          if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
            goto normal_char;
          zeroset(ccl);
          for (c2 = 0; c2 < NOTCHAR; ++c2)
            if (isspace(c2))
              setbit(c2, ccl);
          if (c == 'S')
            notset(ccl);
          laststart = 0;
          return lasttok = CSET + charclass_index(ccl);
#endif
        case 'w':
        case 'W':
          if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
            goto normal_char;
          zeroset(ccl);
          for (c2 = 0; c2 < NOTCHAR; ++c2)
            if (IS_WORD_CONSTITUENT(c2))
              setbit(c2, ccl);
          if (c == 'W')
            notset(ccl);
          laststart = 0;
          return lasttok = CSET + charclass_index(ccl);
        case '[':
          if (backslash)
            goto normal_char;
          laststart = 0;
          return lasttok = parse_bracket_exp();
        default:
        normal_char:
          laststart = 0;
#if MBS_SUPPORT
          if (MB_CUR_MAX > 1)
            return lasttok = WCHAR;
#endif
          if (case_fold && isalpha(c))
            {
              zeroset(ccl);
              setbit_case_fold (c, ccl);
              return lasttok = CSET + charclass_index(ccl);
            }
          return lasttok = c;
        }
    }
  abort();
  return END;	
}
static token tok;		
static int depth;		
static void
addtok_mb (token t, int mbprop)
{
#if MBS_SUPPORT
  if (MB_CUR_MAX > 1)
    {
      REALLOC_IF_NECESSARY(dfa->multibyte_prop, int, dfa->nmultibyte_prop,
                           dfa->tindex);
      dfa->multibyte_prop[dfa->tindex] = mbprop;
    }
#else
  (void) mbprop;
#endif
  REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex);
  dfa->tokens[dfa->tindex++] = t;
  switch (t)
    {
    case QMARK:
    case STAR:
    case PLUS:
      break;
    case CAT:
    case OR:
      --depth;
      break;
    default:
      ++dfa->nleaves;
    case EMPTY:
      ++depth;
      break;
    }
  if (depth > dfa->depth)
    dfa->depth = depth;
}
static void
addtok (token t)
{
#if MBS_SUPPORT
  if (MB_CUR_MAX > 1 && t == MBCSET)
    addtok_mb (MBCSET, ((dfa->nmbcsets - 1) << 2) + 3);
  else
#endif
    addtok_mb (t, 3);
}
#if MBS_SUPPORT
static void
addtok_wc (wint_t wc)
{
  unsigned char buf[MB_LEN_MAX];
  mbstate_t s;
  int i;
  memset (&s, 0, sizeof s);
  cur_mb_len = wcrtomb ((char *) buf, wc, &s);
  if (cur_mb_len <= 0)
    buf[0] = 0;
  addtok_mb(buf[0], cur_mb_len == 1 ? 3 : 1);
  for (i = 1; i < cur_mb_len; i++)
    {
      addtok_mb(buf[i], i == cur_mb_len - 1 ? 2 : 0);
      addtok(CAT);
    }
}
#endif
static void
add_utf8_anychar (void)
{
  static const charclass utf8_classes[5] = {
      {  0,  0,  0,  0, ~0, ~0, 0, 0 },            
      { ~0, ~0, ~0, ~0, 0, 0, 0, 0 },              
      {  0,  0,  0,  0,  0,  0, 0xfffffffcU, 0 },  
      {  0,  0,  0,  0,  0,  0,  0, 0xffff },      
      {  0,  0,  0,  0,  0,  0,  0, 0xff0000 }     
  };
  const unsigned int n = sizeof (utf8_classes) / sizeof (utf8_classes[0]);
  unsigned int i;
  if (dfa->utf8_anychar_classes[0] == 0)
    for (i = 0; i < n; i++)
      {
        charclass c;
        memcpy (c, utf8_classes[i], sizeof c);
        if (i == 1)
          {
            if (!(syntax_bits & RE_DOT_NEWLINE))
              clrbit (eolbyte, c);
            if (syntax_bits & RE_DOT_NOT_NULL)
              clrbit ('\0', c);
          }
        dfa->utf8_anychar_classes[i] = CSET + charclass_index(c);
      }
  for (i = 1; i < n; i++)
    addtok (dfa->utf8_anychar_classes[i]);
  while (--i > 1)
    {
      addtok (dfa->utf8_anychar_classes[0]);
      addtok (CAT);
      addtok (OR);
    }
}
static void
atom (void)
{
  if (0)
    {
    }
#if MBS_SUPPORT
  else if (tok == WCHAR)
    {
      addtok_wc (case_fold ? towlower(wctok) : wctok);
#ifndef GREP
      if (case_fold && iswalpha(wctok))
        {
          addtok_wc (towupper(wctok));
          addtok (OR);
        }
#endif
      tok = lex();
    }
  else if (tok == ANYCHAR && using_utf8())
    {
      add_utf8_anychar();
      tok = lex();
    }
#endif 
  else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
           || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
#if MBS_SUPPORT
           || tok == ANYCHAR || tok == MBCSET
#endif 
           || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
    {
      addtok(tok);
      tok = lex();
    }
  else if (tok == LPAREN)
    {
      tok = lex();
      regexp();
      if (tok != RPAREN)
        dfaerror(_("unbalanced ("));
      tok = lex();
    }
  else
    addtok(EMPTY);
}
static int
nsubtoks (int tindex)
{
  int ntoks1;
  switch (dfa->tokens[tindex - 1])
    {
    default:
      return 1;
    case QMARK:
    case STAR:
    case PLUS:
      return 1 + nsubtoks(tindex - 1);
    case CAT:
    case OR:
      ntoks1 = nsubtoks(tindex - 1);
      return 1 + ntoks1 + nsubtoks(tindex - 1 - ntoks1);
    }
}
static void
copytoks (int tindex, int ntokens)
{
  int i;
  for (i = 0; i < ntokens; ++i)
    {
      addtok(dfa->tokens[tindex + i]);
#if MBS_SUPPORT
      if (MB_CUR_MAX > 1 && dfa->tokens[tindex + i] == MBCSET)
        dfa->multibyte_prop[dfa->tindex - 1] = dfa->multibyte_prop[tindex + i];
#endif
    }
}
static void
closure (void)
{
  int tindex, ntokens, i;
  atom();
  while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN)
    if (tok == REPMN && (minrep || maxrep))
      {
        ntokens = nsubtoks(dfa->tindex);
        tindex = dfa->tindex - ntokens;
        if (maxrep < 0)
          addtok(PLUS);
        if (minrep == 0)
          addtok(QMARK);
        for (i = 1; i < minrep; ++i)
          {
            copytoks(tindex, ntokens);
            addtok(CAT);
          }
        for (; i < maxrep; ++i)
          {
            copytoks(tindex, ntokens);
            addtok(QMARK);
            addtok(CAT);
          }
        tok = lex();
      }
    else if (tok == REPMN)
      {
        dfa->tindex -= nsubtoks(dfa->tindex);
        tok = lex();
        closure();
      }
    else
      {
        addtok(tok);
        tok = lex();
      }
}
static void
branch (void)
{
  closure();
  while (tok != RPAREN && tok != OR && tok >= 0)
    {
      closure();
      addtok(CAT);
    }
}
static void
regexp (void)
{
  branch();
  while (tok == OR)
    {
      tok = lex();
      branch();
      addtok(OR);
    }
}
void
dfaparse (char const *s, size_t len, struct dfa *d)
{
  dfa = d;
  lexptr = s;
  lexleft = len;
  lasttok = END;
  laststart = 1;
  parens = 0;
#ifdef LC_COLLATE
  hard_LC_COLLATE = hard_locale (LC_COLLATE);
#endif
#if MBS_SUPPORT
  if (MB_CUR_MAX > 1)
    {
      cur_mb_len = 0;
      memset(&mbs, 0, sizeof mbs);
    }
#endif 
  if (! syntax_bits_set)
    dfaerror(_("no syntax specified"));
  tok = lex();
  depth = d->depth;
  regexp();
  if (tok != END)
    dfaerror(_("unbalanced )"));
  addtok(END - d->nregexps);
  addtok(CAT);
  if (d->nregexps)
    addtok(OR);
  ++d->nregexps;
}
static void
copy (position_set const *src, position_set *dst)
{
  int i;
  for (i = 0; i < src->nelem; ++i)
    dst->elems[i] = src->elems[i];
  dst->nelem = src->nelem;
}
static void
insert (position p, position_set *s)
{
  int count = s->nelem;
  int lo = 0, hi = count;
  while (lo < hi)
    {
      int mid = ((unsigned) lo + (unsigned) hi) >> 1;
      if (s->elems[mid].index < p.index)
        lo = mid + 1;
      else
        hi = mid;
    }
  if (lo < count && p.index == s->elems[lo].index)
    s->elems[lo].constraint |= p.constraint;
  else
    {
      int i;
      for (i = count; i > lo; i--)
        s->elems[i] = s->elems[i - 1];
      s->elems[lo] = p;
      ++s->nelem;
    }
}
static void
merge (position_set const *s1, position_set const *s2, position_set *m)
{
  int i = 0, j = 0;
  m->nelem = 0;
  while (i < s1->nelem && j < s2->nelem)
    if (s1->elems[i].index > s2->elems[j].index)
      m->elems[m->nelem++] = s1->elems[i++];
    else if (s1->elems[i].index < s2->elems[j].index)
      m->elems[m->nelem++] = s2->elems[j++];
    else
      {
        m->elems[m->nelem] = s1->elems[i++];
        m->elems[m->nelem++].constraint |= s2->elems[j++].constraint;
      }
  while (i < s1->nelem)
    m->elems[m->nelem++] = s1->elems[i++];
  while (j < s2->nelem)
    m->elems[m->nelem++] = s2->elems[j++];
}
static void
delete (position p, position_set *s)
{
  int i;
  for (i = 0; i < s->nelem; ++i)
    if (p.index == s->elems[i].index)
      break;
  if (i < s->nelem)
    for (--s->nelem; i < s->nelem; ++i)
      s->elems[i] = s->elems[i + 1];
}
static int
state_index (struct dfa *d, position_set const *s, int newline, int letter)
{
  int hash = 0;
  int constraint;
  int i, j;
  newline = newline ? 1 : 0;
  letter = letter ? 1 : 0;
  for (i = 0; i < s->nelem; ++i)
    hash ^= s->elems[i].index + s->elems[i].constraint;
  for (i = 0; i < d->sindex; ++i)
    {
      if (hash != d->states[i].hash || s->nelem != d->states[i].elems.nelem
          || newline != d->states[i].newline || letter != d->states[i].letter)
        continue;
      for (j = 0; j < s->nelem; ++j)
        if (s->elems[j].constraint
            != d->states[i].elems.elems[j].constraint
            || s->elems[j].index != d->states[i].elems.elems[j].index)
          break;
      if (j == s->nelem)
        return i;
    }
  REALLOC_IF_NECESSARY(d->states, dfa_state, d->salloc, d->sindex);
  d->states[i].hash = hash;
  MALLOC(d->states[i].elems.elems, position, s->nelem);
  copy(s, &d->states[i].elems);
  d->states[i].newline = newline;
  d->states[i].letter = letter;
  d->states[i].backref = 0;
  d->states[i].constraint = 0;
  d->states[i].first_end = 0;
#if MBS_SUPPORT
  d->states[i].mbps.nelem = 0;
  d->states[i].mbps.elems = NULL;
#endif
  for (j = 0; j < s->nelem; ++j)
    if (d->tokens[s->elems[j].index] < 0)
      {
        constraint = s->elems[j].constraint;
        if (SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 0)
            || SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 1)
            || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 0)
            || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 1))
          d->states[i].constraint |= constraint;
        if (! d->states[i].first_end)
          d->states[i].first_end = d->tokens[s->elems[j].index];
      }
    else if (d->tokens[s->elems[j].index] == BACKREF)
      {
        d->states[i].constraint = NO_CONSTRAINT;
        d->states[i].backref = 1;
      }
  ++d->sindex;
  return i;
}
static void
epsclosure (position_set *s, struct dfa const *d)
{
  int i, j;
  char *visited;	
  position p, old;
  CALLOC(visited, char, d->tindex);
  for (i = 0; i < s->nelem; ++i)
    if (d->tokens[s->elems[i].index] >= NOTCHAR
        && d->tokens[s->elems[i].index] != BACKREF
#if MBS_SUPPORT
        && d->tokens[s->elems[i].index] != ANYCHAR
        && d->tokens[s->elems[i].index] != MBCSET
#endif
        && d->tokens[s->elems[i].index] < CSET)
      {
        old = s->elems[i];
        p.constraint = old.constraint;
        delete(s->elems[i], s);
        if (visited[old.index])
          {
            --i;
            continue;
          }
        visited[old.index] = 1;
        switch (d->tokens[old.index])
          {
          case BEGLINE:
            p.constraint &= BEGLINE_CONSTRAINT;
            break;
          case ENDLINE:
            p.constraint &= ENDLINE_CONSTRAINT;
            break;
          case BEGWORD:
            p.constraint &= BEGWORD_CONSTRAINT;
            break;
          case ENDWORD:
            p.constraint &= ENDWORD_CONSTRAINT;
            break;
          case LIMWORD:
            p.constraint &= LIMWORD_CONSTRAINT;
            break;
          case NOTLIMWORD:
            p.constraint &= NOTLIMWORD_CONSTRAINT;
            break;
          default:
            break;
          }
        for (j = 0; j < d->follows[old.index].nelem; ++j)
          {
            p.index = d->follows[old.index].elems[j].index;
            insert(p, s);
          }
        i = -1;
      }
  free(visited);
}
void
dfaanalyze (struct dfa *d, int searchflag)
{
  int *nullable;		
  int *nfirstpos;		
  position *firstpos;		
  int *nlastpos;		
  position *lastpos;		
  int *nalloc;			
  position_set tmp;		
  position_set merged;		
  int wants_newline;		
  int *o_nullable;
  int *o_nfirst, *o_nlast;
  position *o_firstpos, *o_lastpos;
  int i, j;
  position *pos;
#ifdef DEBUG
  fprintf(stderr, "dfaanalyze:\n");
  for (i = 0; i < d->tindex; ++i)
    {
      fprintf(stderr, " %d:", i);
      prtok(d->tokens[i]);
    }
  putc('\n', stderr);
#endif
  d->searchflag = searchflag;
  MALLOC(nullable, int, d->depth);
  o_nullable = nullable;
  MALLOC(nfirstpos, int, d->depth);
  o_nfirst = nfirstpos;
  MALLOC(firstpos, position, d->nleaves);
  o_firstpos = firstpos, firstpos += d->nleaves;
  MALLOC(nlastpos, int, d->depth);
  o_nlast = nlastpos;
  MALLOC(lastpos, position, d->nleaves);
  o_lastpos = lastpos, lastpos += d->nleaves;
  CALLOC(nalloc, int, d->tindex);
  MALLOC(merged.elems, position, d->nleaves);
  CALLOC(d->follows, position_set, d->tindex);
  for (i = 0; i < d->tindex; ++i)
#ifdef DEBUG
    {				
#endif
    switch (d->tokens[i])
      {
      case EMPTY:
        *nullable++ = 1;
        *nfirstpos++ = *nlastpos++ = 0;
        break;
      case STAR:
      case PLUS:
        tmp.nelem = nfirstpos[-1];
        tmp.elems = firstpos;
        pos = lastpos;
        for (j = 0; j < nlastpos[-1]; ++j)
          {
            merge(&tmp, &d->follows[pos[j].index], &merged);
            REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
                                 nalloc[pos[j].index], merged.nelem - 1);
            copy(&merged, &d->follows[pos[j].index]);
          }
      case QMARK:
        if (d->tokens[i] != PLUS)
          nullable[-1] = 1;
        break;
      case CAT:
        tmp.nelem = nfirstpos[-1];
        tmp.elems = firstpos;
        pos = lastpos + nlastpos[-1];
        for (j = 0; j < nlastpos[-2]; ++j)
          {
            merge(&tmp, &d->follows[pos[j].index], &merged);
            REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
                                 nalloc[pos[j].index], merged.nelem - 1);
            copy(&merged, &d->follows[pos[j].index]);
          }
        if (nullable[-2])
          nfirstpos[-2] += nfirstpos[-1];
        else
          firstpos += nfirstpos[-1];
        --nfirstpos;
        if (nullable[-1])
          nlastpos[-2] += nlastpos[-1];
        else
          {
            pos = lastpos + nlastpos[-2];
            for (j = nlastpos[-1] - 1; j >= 0; --j)
              pos[j] = lastpos[j];
            lastpos += nlastpos[-2];
            nlastpos[-2] = nlastpos[-1];
          }
        --nlastpos;
        nullable[-2] = nullable[-1] && nullable[-2];
        --nullable;
        break;
      case OR:
        nfirstpos[-2] += nfirstpos[-1];
        --nfirstpos;
        nlastpos[-2] += nlastpos[-1];
        --nlastpos;
        nullable[-2] = nullable[-1] || nullable[-2];
        --nullable;
        break;
      default:
        *nullable++ = d->tokens[i] == BACKREF;
        *nfirstpos++ = *nlastpos++ = 1;
        --firstpos, --lastpos;
        firstpos->index = lastpos->index = i;
        firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
        nalloc[i] = 1;
        MALLOC(d->follows[i].elems, position, nalloc[i]);
        break;
      }
#ifdef DEBUG
      fprintf(stderr, "node %d:", i);
      prtok(d->tokens[i]);
      putc('\n', stderr);
      fprintf(stderr, nullable[-1] ? " nullable: yes\n" : " nullable: no\n");
      fprintf(stderr, " firstpos:");
      for (j = nfirstpos[-1] - 1; j >= 0; --j)
        {
          fprintf(stderr, " %d:", firstpos[j].index);
          prtok(d->tokens[firstpos[j].index]);
        }
      fprintf(stderr, "\n lastpos:");
      for (j = nlastpos[-1] - 1; j >= 0; --j)
        {
          fprintf(stderr, " %d:", lastpos[j].index);
          prtok(d->tokens[lastpos[j].index]);
        }
      putc('\n', stderr);
    }
#endif
  for (i = 0; i < d->tindex; ++i)
    if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF
#if MBS_SUPPORT
        || d->tokens[i] == ANYCHAR
        || d->tokens[i] == MBCSET
#endif
        || d->tokens[i] >= CSET)
      {
#ifdef DEBUG
        fprintf(stderr, "follows(%d:", i);
        prtok(d->tokens[i]);
        fprintf(stderr, "):");
        for (j = d->follows[i].nelem - 1; j >= 0; --j)
          {
            fprintf(stderr, " %d:", d->follows[i].elems[j].index);
            prtok(d->tokens[d->follows[i].elems[j].index]);
          }
        putc('\n', stderr);
#endif
        copy(&d->follows[i], &merged);
        epsclosure(&merged, d);
        if (d->follows[i].nelem < merged.nelem)
          REALLOC(d->follows[i].elems, position, merged.nelem);
        copy(&merged, &d->follows[i]);
      }
  merged.nelem = 0;
  for (i = 0; i < nfirstpos[-1]; ++i)
    insert(firstpos[i], &merged);
  epsclosure(&merged, d);
  wants_newline = 0;
  for (i = 0; i < merged.nelem; ++i)
    if (PREV_NEWLINE_DEPENDENT(merged.elems[i].constraint))
      wants_newline = 1;
  d->salloc = 1;
  d->sindex = 0;
  MALLOC(d->states, dfa_state, d->salloc);
  state_index(d, &merged, wants_newline, 0);
  free(o_nullable);
  free(o_nfirst);
  free(o_firstpos);
  free(o_nlast);
  free(o_lastpos);
  free(nalloc);
  free(merged.elems);
}
void
dfastate (int s, struct dfa *d, int trans[])
{
  position_set *grps;		
  charclass *labels;		
  int ngrps = 0;		
  position pos;			
  charclass matches;		
  int matchesf;			
  charclass intersect;		
  int intersectf;		
  charclass leftovers;		
  int leftoversf;		
  static charclass letters;	
  static charclass newline;	
  position_set follows;		
  position_set tmp;		
  int state;			
  int wants_newline;		
  int state_newline;		
  int wants_letter;		
  int state_letter;		
  static int initialized;	
#if MBS_SUPPORT
  int next_isnt_1st_byte = 0;	
#endif
  int i, j, k;
  grps = xnmalloc (NOTCHAR, sizeof *grps);
  labels = xnmalloc (NOTCHAR, sizeof *labels);
  if (! initialized)
    {
      initialized = 1;
      for (i = 0; i < NOTCHAR; ++i)
        if (IS_WORD_CONSTITUENT(i))
          setbit(i, letters);
      setbit(eolbyte, newline);
    }
  zeroset(matches);
  for (i = 0; i < d->states[s].elems.nelem; ++i)
    {
      pos = d->states[s].elems.elems[i];
      if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR)
        setbit(d->tokens[pos.index], matches);
      else if (d->tokens[pos.index] >= CSET)
        copyset(d->charclasses[d->tokens[pos.index] - CSET], matches);
#if MBS_SUPPORT
      else if (d->tokens[pos.index] == ANYCHAR
               || d->tokens[pos.index] == MBCSET)
        {
          if (d->states[s].mbps.nelem == 0)
            {
              MALLOC(d->states[s].mbps.elems, position,
                     d->states[s].elems.nelem);
            }
          insert(pos, &(d->states[s].mbps));
          continue;
        }
#endif 
      else
        continue;
      if (pos.constraint != 0xFF)
        {
          if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
                                         d->states[s].newline, 1))
            clrbit(eolbyte, matches);
          if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
                                         d->states[s].newline, 0))
            for (j = 0; j < CHARCLASS_INTS; ++j)
              matches[j] &= newline[j];
          if (! MATCHES_LETTER_CONTEXT(pos.constraint,
                                        d->states[s].letter, 1))
            for (j = 0; j < CHARCLASS_INTS; ++j)
              matches[j] &= ~letters[j];
          if (! MATCHES_LETTER_CONTEXT(pos.constraint,
                                        d->states[s].letter, 0))
            for (j = 0; j < CHARCLASS_INTS; ++j)
              matches[j] &= letters[j];
          for (j = 0; j < CHARCLASS_INTS && !matches[j]; ++j)
            continue;
          if (j == CHARCLASS_INTS)
            continue;
        }
      for (j = 0; j < ngrps; ++j)
        {
          if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR
              && !tstbit(d->tokens[pos.index], labels[j]))
            continue;
          intersectf = 0;
          for (k = 0; k < CHARCLASS_INTS; ++k)
            (intersect[k] = matches[k] & labels[j][k]) ? (intersectf = 1) : 0;
          if (! intersectf)
            continue;
          leftoversf = matchesf = 0;
          for (k = 0; k < CHARCLASS_INTS; ++k)
            {
              int match = matches[k], label = labels[j][k];
              (leftovers[k] = ~match & label) ? (leftoversf = 1) : 0;
              (matches[k] = match & ~label) ? (matchesf = 1) : 0;
            }
          if (leftoversf)
            {
              copyset(leftovers, labels[ngrps]);
              copyset(intersect, labels[j]);
              MALLOC(grps[ngrps].elems, position, d->nleaves);
              copy(&grps[j], &grps[ngrps]);
              ++ngrps;
            }
          grps[j].elems[grps[j].nelem++] = pos;
          if (! matchesf)
            break;
        }
      if (j == ngrps)
        {
          copyset(matches, labels[ngrps]);
          zeroset(matches);
          MALLOC(grps[ngrps].elems, position, d->nleaves);
          grps[ngrps].nelem = 1;
          grps[ngrps].elems[0] = pos;
          ++ngrps;
        }
    }
  MALLOC(follows.elems, position, d->nleaves);
  MALLOC(tmp.elems, position, d->nleaves);
  if (d->searchflag)
    {
      wants_newline = 0;
      wants_letter = 0;
      for (i = 0; i < d->states[0].elems.nelem; ++i)
        {
          if (PREV_NEWLINE_DEPENDENT(d->states[0].elems.elems[i].constraint))
            wants_newline = 1;
          if (PREV_LETTER_DEPENDENT(d->states[0].elems.elems[i].constraint))
            wants_letter = 1;
        }
      copy(&d->states[0].elems, &follows);
      state = state_index(d, &follows, 0, 0);
      if (wants_newline)
        state_newline = state_index(d, &follows, 1, 0);
      else
        state_newline = state;
      if (wants_letter)
        state_letter = state_index(d, &follows, 0, 1);
      else
        state_letter = state;
      for (i = 0; i < NOTCHAR; ++i)
        trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state;
      trans[eolbyte] = state_newline;
    }
  else
    for (i = 0; i < NOTCHAR; ++i)
      trans[i] = -1;
  for (i = 0; i < ngrps; ++i)
    {
      follows.nelem = 0;
      for (j = 0; j < grps[i].nelem; ++j)
        for (k = 0; k < d->follows[grps[i].elems[j].index].nelem; ++k)
          insert(d->follows[grps[i].elems[j].index].elems[k], &follows);
#if MBS_SUPPORT
      if (d->mb_cur_max > 1)
        {
          next_isnt_1st_byte = 0;
          for (j = 0; j < follows.nelem; ++j)
            {
              if (!(d->multibyte_prop[follows.elems[j].index] & 1))
                {
                  next_isnt_1st_byte = 1;
                  break;
                }
            }
        }
#endif
#if MBS_SUPPORT
      if (d->searchflag && (d->mb_cur_max == 1 || !next_isnt_1st_byte))
#else
      if (d->searchflag)
#endif
        for (j = 0; j < d->states[0].elems.nelem; ++j)
          insert(d->states[0].elems.elems[j], &follows);
      wants_newline = 0;
      if (tstbit(eolbyte, labels[i]))
        for (j = 0; j < follows.nelem; ++j)
          if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
            wants_newline = 1;
      wants_letter = 0;
      for (j = 0; j < CHARCLASS_INTS; ++j)
        if (labels[i][j] & letters[j])
          break;
      if (j < CHARCLASS_INTS)
        for (j = 0; j < follows.nelem; ++j)
          if (PREV_LETTER_DEPENDENT(follows.elems[j].constraint))
            wants_letter = 1;
      state = state_index(d, &follows, 0, 0);
      if (wants_newline)
        state_newline = state_index(d, &follows, 1, 0);
      else
        state_newline = state;
      if (wants_letter)
        state_letter = state_index(d, &follows, 0, 1);
      else
        state_letter = state;
      for (j = 0; j < CHARCLASS_INTS; ++j)
        for (k = 0; k < INTBITS; ++k)
          if (labels[i][j] & 1 << k)
            {
              int c = j * INTBITS + k;
              if (c == eolbyte)
                trans[c] = state_newline;
              else if (IS_WORD_CONSTITUENT(c))
                trans[c] = state_letter;
              else if (c < NOTCHAR)
                trans[c] = state;
            }
    }
  for (i = 0; i < ngrps; ++i)
    free(grps[i].elems);
  free(follows.elems);
  free(tmp.elems);
  free(grps);
  free(labels);
}
static void
build_state (int s, struct dfa *d)
{
  int *trans;			
  int i;
  if (d->trcount >= 1024)
    {
      for (i = 0; i < d->tralloc; ++i)
        {
          free(d->trans[i]);
          free(d->fails[i]);
          d->trans[i] = d->fails[i] = NULL;
        }
      d->trcount = 0;
    }
  ++d->trcount;
  d->success[s] = 0;
  if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 1, d->states[s].letter, 0,
      s, *d))
    d->success[s] |= 4;
  if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 1,
      s, *d))
    d->success[s] |= 2;
  if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 0,
      s, *d))
    d->success[s] |= 1;
  MALLOC(trans, int, NOTCHAR);
  dfastate(s, d, trans);
  for (i = 0; i < NOTCHAR; ++i)
    if (trans[i] >= d->tralloc)
      {
        int oldalloc = d->tralloc;
        while (trans[i] >= d->tralloc)
          d->tralloc *= 2;
        REALLOC(d->realtrans, int *, d->tralloc + 1);
        d->trans = d->realtrans + 1;
        REALLOC(d->fails, int *, d->tralloc);
        REALLOC(d->success, int, d->tralloc);
        REALLOC(d->newlines, int, d->tralloc);
        while (oldalloc < d->tralloc)
          {
            d->trans[oldalloc] = NULL;
            d->fails[oldalloc++] = NULL;
          }
      }
  d->newlines[s] = trans[eolbyte];
  trans[eolbyte] = -1;
  if (ACCEPTING(s, *d))
    d->fails[s] = trans;
  else
    d->trans[s] = trans;
}
static void
build_state_zero (struct dfa *d)
{
  d->tralloc = 1;
  d->trcount = 0;
  CALLOC(d->realtrans, int *, d->tralloc + 1);
  d->trans = d->realtrans + 1;
  CALLOC(d->fails, int *, d->tralloc);
  MALLOC(d->success, int, d->tralloc);
  MALLOC(d->newlines, int, d->tralloc);
  build_state(0, d);
}
#if MBS_SUPPORT
#define SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p)		\
  if (s == 0)						\
    {							\
      while (inputwcs[p - buf_begin] == 0		\
            && mblen_buf[p - buf_begin] > 0		\
            && (unsigned char const *) p < buf_end)	\
        ++p;						\
      if ((char *) p >= end)				\
        {						\
          free(mblen_buf);				\
          free(inputwcs);				\
          *end = saved_end;				\
          return NULL;					\
        }						\
    }
static void
realloc_trans_if_necessary(struct dfa *d, int new_state)
{
  if (new_state >= d->tralloc)
    {
      int oldalloc = d->tralloc;
      while (new_state >= d->tralloc)
        d->tralloc *= 2;
      REALLOC(d->realtrans, int *, d->tralloc + 1);
      d->trans = d->realtrans + 1;
      REALLOC(d->fails, int *, d->tralloc);
      REALLOC(d->success, int, d->tralloc);
      REALLOC(d->newlines, int, d->tralloc);
      while (oldalloc < d->tralloc)
        {
          d->trans[oldalloc] = NULL;
          d->fails[oldalloc++] = NULL;
        }
    }
}
typedef enum
{
  TRANSIT_STATE_IN_PROGRESS,	
  TRANSIT_STATE_DONE,		
  TRANSIT_STATE_END_BUFFER	
} status_transit_state;
static status_transit_state
transit_state_singlebyte (struct dfa *d, int s, unsigned char const *p,
                                  int *next_state)
{
  int *t;
  int works = s;
  status_transit_state rval = TRANSIT_STATE_IN_PROGRESS;
  while (rval == TRANSIT_STATE_IN_PROGRESS)
    {
      if ((t = d->trans[works]) != NULL)
        {
          works = t[*p];
          rval = TRANSIT_STATE_DONE;
          if (works < 0)
            works = 0;
        }
      else if (works < 0)
        {
          if (p == buf_end)
            {
              abort ();
            }
          works = 0;
        }
      else if (d->fails[works])
        {
          works = d->fails[works][*p];
          rval = TRANSIT_STATE_DONE;
        }
      else
        {
          build_state(works, d);
        }
    }
  *next_state = works;
  return rval;
}
static int
match_anychar (struct dfa *d, int s, position pos, int idx)
{
  int newline = 0;
  int letter = 0;
  wchar_t wc;
  int mbclen;
  wc = inputwcs[idx];
  mbclen = (mblen_buf[idx] == 0)? 1 : mblen_buf[idx];
  if (wc == (wchar_t)eolbyte)
    {
      if (!(syntax_bits & RE_DOT_NEWLINE))
        return 0;
      newline = 1;
    }
  else if (wc == (wchar_t)'\0')
    {
      if (syntax_bits & RE_DOT_NOT_NULL)
        return 0;
      newline = 1;
    }
  if (iswalnum(wc) || wc == L'_')
    letter = 1;
  if (!SUCCEEDS_IN_CONTEXT(pos.constraint, d->states[s].newline,
                           newline, d->states[s].letter, letter))
    return 0;
  return mbclen;
}
static int
match_mb_charset (struct dfa *d, int s, position pos, int idx)
{
  int i;
  int match;		
  int match_len;	
  int op_len;		
  char buffer[128];
  wchar_t wcbuf[6];
  struct mb_char_classes *work_mbc;
  int newline = 0;
  int letter = 0;
  wchar_t wc;		
  wc = inputwcs[idx];
  if (wc == (wchar_t)eolbyte)
    {
      if (!(syntax_bits & RE_DOT_NEWLINE))
        return 0;
      newline = 1;
    }
  else if (wc == (wchar_t)'\0')
    {
      if (syntax_bits & RE_DOT_NOT_NULL)
        return 0;
      newline = 1;
    }
  if (iswalnum(wc) || wc == L'_')
    letter = 1;
  if (!SUCCEEDS_IN_CONTEXT(pos.constraint, d->states[s].newline,
                           newline, d->states[s].letter, letter))
    return 0;
  work_mbc = &(d->mbcsets[(d->multibyte_prop[pos.index]) >> 2]);
  match = !work_mbc->invert;
  match_len = (mblen_buf[idx] == 0)? 1 : mblen_buf[idx];
  if (wc < NOTCHAR && work_mbc->cset != -1
      && tstbit((unsigned char)wc, d->charclasses[work_mbc->cset]))
    goto charset_matched;
  for (i = 0; i<work_mbc->nch_classes; i++)
    {
      if (iswctype((wint_t)wc, work_mbc->ch_classes[i]))
        goto charset_matched;
    }
  strncpy(buffer, (char const *) buf_begin + idx, match_len);
  buffer[match_len] = '\0';
  for (i = 0; i<work_mbc->nequivs; i++)
    {
      op_len = strlen(work_mbc->equivs[i]);
      strncpy(buffer, (char const *) buf_begin + idx, op_len);
      buffer[op_len] = '\0';
      if (strcoll(work_mbc->equivs[i], buffer) == 0)
        {
          match_len = op_len;
          goto charset_matched;
        }
    }
  for (i = 0; i<work_mbc->ncoll_elems; i++)
    {
      op_len = strlen(work_mbc->coll_elems[i]);
      strncpy(buffer, (char const *) buf_begin + idx, op_len);
      buffer[op_len] = '\0';
      if (strcoll(work_mbc->coll_elems[i], buffer) == 0)
        {
          match_len = op_len;
          goto charset_matched;
        }
    }
  wcbuf[0] = wc;
  wcbuf[1] = wcbuf[3] = wcbuf[5] = '\0';
  for (i = 0; i<work_mbc->nranges; i++)
    {
      wcbuf[2] = work_mbc->range_sts[i];
      wcbuf[4] = work_mbc->range_ends[i];
      if (wcscoll(wcbuf, wcbuf+2) >= 0 &&
          wcscoll(wcbuf+4, wcbuf) >= 0)
        goto charset_matched;
    }
  for (i = 0; i<work_mbc->nchars; i++)
    {
      if (wc == work_mbc->chars[i])
        goto charset_matched;
    }
  match = !match;
 charset_matched:
  return match ? match_len : 0;
}
static int*
check_matching_with_multibyte_ops (struct dfa *d, int s, int idx)
{
  int i;
  int* rarray;
  MALLOC(rarray, int, d->states[s].mbps.nelem);
  for (i = 0; i < d->states[s].mbps.nelem; ++i)
    {
      position pos = d->states[s].mbps.elems[i];
      switch(d->tokens[pos.index])
        {
        case ANYCHAR:
          rarray[i] = match_anychar(d, s, pos, idx);
          break;
        case MBCSET:
          rarray[i] = match_mb_charset(d, s, pos, idx);
          break;
        default:
          break; 
        }
    }
  return rarray;
}
static status_transit_state
transit_state_consume_1char (struct dfa *d, int s, unsigned char const **pp,
                             int *match_lens, int *mbclen, position_set *pps)
{
  int i, j;
  int s1, s2;
  int* work_mbls;
  status_transit_state rs = TRANSIT_STATE_DONE;
  *mbclen = (mblen_buf[*pp - buf_begin] == 0)? 1
    : mblen_buf[*pp - buf_begin];
  s1 = s;
  for (i = 0; i < *mbclen; i++)
    {
      s2 = s1;
      rs = transit_state_singlebyte(d, s2, (*pp)++, &s1);
    }
  copy(&(d->states[s1].elems), pps);
  if (match_lens == NULL && d->states[s].mbps.nelem != 0)
    work_mbls = check_matching_with_multibyte_ops(d, s, *pp - buf_begin);
  else
    work_mbls = match_lens;
  for (i = 0; i < d->states[s].mbps.nelem ; i++)
   {
      if (work_mbls[i] == *mbclen)
        for (j = 0; j < d->follows[d->states[s].mbps.elems[i].index].nelem;
             j++)
          insert(d->follows[d->states[s].mbps.elems[i].index].elems[j],
                 pps);
    }
  if (match_lens == NULL && work_mbls != NULL)
    free(work_mbls);
  return rs;
}
static int
transit_state (struct dfa *d, int s, unsigned char const **pp)
{
  int s1;
  int mbclen;		
  int maxlen = 0;
  int i, j;
  int *match_lens = NULL;
  int nelem = d->states[s].mbps.nelem; 
  position_set follows;
  unsigned char const *p1 = *pp;
  wchar_t wc;
  if (nelem > 0)
    {
      match_lens = check_matching_with_multibyte_ops(d, s, *pp - buf_begin);
      for (i = 0; i < nelem; i++)
        {
          if (match_lens[i] > maxlen)
            maxlen = match_lens[i];
        }
    }
  if (nelem == 0 || maxlen == 0)
    {
      status_transit_state rs;
      rs = transit_state_singlebyte(d, s, *pp, &s1);
      if (rs == TRANSIT_STATE_DONE)
        ++*pp;
      free(match_lens);
      return s1;
    }
  follows.nelem = 0;
  MALLOC(follows.elems, position, d->nleaves);
  transit_state_consume_1char(d, s, pp, match_lens, &mbclen, &follows);
  wc = inputwcs[*pp - mbclen - buf_begin];
  s1 = state_index(d, &follows, wc == L'\n', iswalnum(wc));
  realloc_trans_if_necessary(d, s1);
  while (*pp - p1 < maxlen)
    {
      follows.nelem = 0;
      transit_state_consume_1char(d, s1, pp, NULL, &mbclen, &follows);
      for (i = 0; i < nelem ; i++)
        {
          if (match_lens[i] == *pp - p1)
            for (j = 0;
                 j < d->follows[d->states[s1].mbps.elems[i].index].nelem; j++)
              insert(d->follows[d->states[s1].mbps.elems[i].index].elems[j],
                     &follows);
        }
      wc = inputwcs[*pp - mbclen - buf_begin];
      s1 = state_index(d, &follows, wc == L'\n', iswalnum(wc));
      realloc_trans_if_necessary(d, s1);
    }
  free(match_lens);
  free(follows.elems);
  return s1;
}
#endif 
static void
prepare_wc_buf (const char *begin, const char *end)
{
  unsigned char eol = eolbyte;
  size_t remain_bytes, i;
  buf_begin = (unsigned char *) begin;
  remain_bytes = 0;
  for (i = 0; i < end - begin + 1; i++)
    {
      if (remain_bytes == 0)
        {
          remain_bytes
            = mbrtowc(inputwcs + i, begin + i, end - begin - i + 1, &mbs);
          if (remain_bytes < 1
              || remain_bytes == (size_t) -1
              || remain_bytes == (size_t) -2
              || (remain_bytes == 1 && inputwcs[i] == (wchar_t)begin[i]))
            {
              remain_bytes = 0;
              inputwcs[i] = (wchar_t)begin[i];
              mblen_buf[i] = 0;
              if (begin[i] == eol)
                break;
            }
          else
            {
              mblen_buf[i] = remain_bytes;
              remain_bytes--;
            }
        }
      else
        {
          mblen_buf[i] = remain_bytes;
          inputwcs[i] = 0;
          remain_bytes--;
        }
    }
  buf_end = (unsigned char *) (begin + i);
  mblen_buf[i] = 0;
  inputwcs[i] = 0; 
}
char *
dfaexec (struct dfa *d, char const *begin, char *end,
         int newline, int *count, int *backref)
{
  int s, s1, tmp;	
  unsigned char const *p; 
  int **trans, *t;	
  unsigned char eol = eolbyte;	
  unsigned char saved_end;
  static int sbit[NOTCHAR];	
  static int sbit_init;
  if (! sbit_init)
    {
      unsigned int i;
      sbit_init = 1;
      for (i = 0; i < NOTCHAR; ++i)
        sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1;
      sbit[eol] = 4;
    }
  if (! d->tralloc)
    build_state_zero(d);
  s = s1 = 0;
  p = (unsigned char const *) begin;
  trans = d->trans;
  saved_end = *(unsigned char *) end;
  *end = eol;
#if MBS_SUPPORT
  if (d->mb_cur_max > 1)
    {
      MALLOC(mblen_buf, unsigned char, end - begin + 2);
      MALLOC(inputwcs, wchar_t, end - begin + 2);
      memset(&mbs, 0, sizeof(mbstate_t));
      prepare_wc_buf ((const char *) p, end);
    }
#endif 
  for (;;)
    {
#if MBS_SUPPORT
      if (d->mb_cur_max > 1)
        while ((t = trans[s]))
          {
            if (p > buf_end)
              break;
            s1 = s;
            SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p);
            if (d->states[s].mbps.nelem == 0)
              {
                s = t[*p++];
                continue;
              }
            if (backref)
              {
                *backref = 1;
                free(mblen_buf);
                free(inputwcs);
                *end = saved_end;
                return (char *) p;
              }
            s = transit_state(d, s, &p);
            trans = d->trans;
          }
      else
#endif 
      while ((t = trans[s]) != 0) { 
        s1 = t[*p++];
        if ((t = trans[s1]) == 0) {
          tmp = s ; s = s1 ; s1 = tmp ; 
          break;
        }
        s = t[*p++];
      }
      if (s >= 0 && (char *) p <= end && d->fails[s])
        {
          if (d->success[s] & sbit[*p])
            {
              if (backref)
                *backref = (d->states[s].backref != 0);
#if MBS_SUPPORT
              if (d->mb_cur_max > 1)
                {
                  free(mblen_buf);
                  free(inputwcs);
                }
#endif 
              *end = saved_end;
              return (char *) p;
            }
          s1 = s;
#if MBS_SUPPORT
          if (d->mb_cur_max > 1)
            {
              s = transit_state(d, s, &p);
              trans = d->trans;
            }
          else
#endif 
          s = d->fails[s][*p++];
          continue;
        }
      if ((char *) p <= end && p[-1] == eol)
        {
          if (count)
            ++*count;
#if MBS_SUPPORT
          if (d->mb_cur_max > 1)
            prepare_wc_buf ((const char *) p, end);
#endif
        }
      if ((char *) p > end)
        {
#if MBS_SUPPORT
          if (d->mb_cur_max > 1)
            {
              free(mblen_buf);
              free(inputwcs);
            }
#endif 
          *end = saved_end;
          return NULL;
        }
      if (s >= 0)
        {
          build_state(s, d);
          trans = d->trans;
          continue;
        }
      if (p[-1] == eol && newline)
        {
          s = d->newlines[s1];
          continue;
        }
      s = 0;
    }
}
#if MBS_SUPPORT
static void
free_mbdata (struct dfa *d)
{
  unsigned int i;
  free(d->multibyte_prop);
  d->multibyte_prop = NULL;
  for (i = 0; i < d->nmbcsets; ++i)
    {
      unsigned int j;
      struct mb_char_classes *p = &(d->mbcsets[i]);
      free(p->chars);
      free(p->ch_classes);
      free(p->range_sts);
      free(p->range_ends);
      for (j = 0; j < p->nequivs; ++j)
        free(p->equivs[j]);
      free(p->equivs);
      for (j = 0; j < p->ncoll_elems; ++j)
        free(p->coll_elems[j]);
      free(p->coll_elems);
    }
  free(d->mbcsets);
  d->mbcsets = NULL;
  d->nmbcsets = 0;
}
#endif
void
dfainit (struct dfa *d)
{
  memset (d, 0, sizeof *d);
  d->calloc = 1;
  MALLOC(d->charclasses, charclass, d->calloc);
  d->talloc = 1;
  MALLOC(d->tokens, token, d->talloc);
#if MBS_SUPPORT
  d->mb_cur_max = MB_CUR_MAX;
  if (d->mb_cur_max > 1)
    {
      d->nmultibyte_prop = 1;
      MALLOC(d->multibyte_prop, int, d->nmultibyte_prop);
      d->mbcsets_alloc = 1;
      MALLOC(d->mbcsets, struct mb_char_classes, d->mbcsets_alloc);
    }
#endif
}
#if MBS_SUPPORT
static void
dfaoptimize (struct dfa *d)
{
  unsigned int i;
  if (!using_utf8())
    return;
  for (i = 0; i < d->tindex; ++i)
    {
      switch(d->tokens[i])
        {
        case ANYCHAR:
          abort ();
        case MBCSET:
          return;
        default:
          break;
        }
    }
  free_mbdata (d);
  d->mb_cur_max = 1;
}
#endif
void
dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
{
  dfainit(d);
  dfaparse(s, len, d);
  dfamust(d);
#if MBS_SUPPORT
  dfaoptimize(d);
#endif
  dfaanalyze(d, searchflag);
}
void
dfafree (struct dfa *d)
{
  int i;
  struct dfamust *dm, *ndm;
  free(d->charclasses);
  free(d->tokens);
#if MBS_SUPPORT
  if (d->mb_cur_max > 1)
    free_mbdata(d);
#endif 
  for (i = 0; i < d->sindex; ++i) {
    free(d->states[i].elems.elems);
#if MBS_SUPPORT
    free(d->states[i].mbps.elems);
#endif 
  }
  free(d->states);
  for (i = 0; i < d->tindex; ++i)
    free(d->follows[i].elems);
  free(d->follows);
  for (i = 0; i < d->tralloc; ++i)
    {
      free(d->trans[i]);
      free(d->fails[i]);
    }
  free(d->realtrans);
  free(d->fails);
  free(d->newlines);
  free(d->success);
  for (dm = d->musts; dm; dm = ndm)
    {
      ndm = dm->next;
      free(dm->must);
      free(dm);
    }
}
static char *
icatalloc (char const *old, char const *new)
{
  char *result;
  size_t oldsize, newsize;
  newsize = (new == NULL) ? 0 : strlen(new);
  if (old == NULL)
    oldsize = 0;
  else if (newsize == 0)
    return (char *) old;
  else	oldsize = strlen(old);
  if (old == NULL)
    result = malloc(newsize + 1);
  else
    result = realloc((void *) old, oldsize + newsize + 1);
  if (result != NULL && new != NULL)
    (void) strcpy(result + oldsize, new);
  return result;
}
static char *
icpyalloc (char const *string)
{
  return icatalloc (NULL, string);
}
static char *
istrstr (char const *lookin, char const *lookfor)
{
  char const *cp;
  size_t len;
  len = strlen(lookfor);
  for (cp = lookin; *cp != '\0'; ++cp)
    if (strncmp(cp, lookfor, len) == 0)
      return (char *) cp;
  return NULL;
}
static void
freelist (char **cpp)
{
  int i;
  if (cpp == NULL)
    return;
  for (i = 0; cpp[i] != NULL; ++i)
    {
      free(cpp[i]);
      cpp[i] = NULL;
    }
}
static char **
enlist (char **cpp, char *new, size_t len)
{
  int i, j;
  if (cpp == NULL)
    return NULL;
  if ((new = icpyalloc(new)) == NULL)
    {
      freelist(cpp);
      return NULL;
    }
  new[len] = '\0';
  for (i = 0; cpp[i] != NULL; ++i)
    if (istrstr(cpp[i], new) != NULL)
      {
        free(new);
        return cpp;
      }
  j = 0;
  while (cpp[j] != NULL)
    if (istrstr(new, cpp[j]) == NULL)
      ++j;
    else
      {
        free(cpp[j]);
        if (--i == j)
          break;
        cpp[j] = cpp[i];
        cpp[i] = NULL;
      }
  cpp = realloc((char *) cpp, (i + 2) * sizeof *cpp);
  if (cpp == NULL)
    return NULL;
  cpp[i] = new;
  cpp[i + 1] = NULL;
  return cpp;
}
static char **
comsubs (char *left, char const *right)
{
  char **cpp;
  char *lcp;
  char *rcp;
  size_t i, len;
  if (left == NULL || right == NULL)
    return NULL;
  cpp = malloc(sizeof *cpp);
  if (cpp == NULL)
    return NULL;
  cpp[0] = NULL;
  for (lcp = left; *lcp != '\0'; ++lcp)
    {
      len = 0;
      rcp = strchr (right, *lcp);
      while (rcp != NULL)
        {
          for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
            continue;
          if (i > len)
            len = i;
          rcp = strchr (rcp + 1, *lcp);
        }
      if (len == 0)
        continue;
      if ((cpp = enlist(cpp, lcp, len)) == NULL)
        break;
    }
  return cpp;
}
static char **
addlists (char **old, char **new)
{
  int i;
  if (old == NULL || new == NULL)
    return NULL;
  for (i = 0; new[i] != NULL; ++i)
    {
      old = enlist(old, new[i], strlen(new[i]));
      if (old == NULL)
        break;
    }
  return old;
}
static char **
inboth (char **left, char **right)
{
  char **both;
  char **temp;
  int lnum, rnum;
  if (left == NULL || right == NULL)
    return NULL;
  both = malloc(sizeof *both);
  if (both == NULL)
    return NULL;
  both[0] = NULL;
  for (lnum = 0; left[lnum] != NULL; ++lnum)
    {
      for (rnum = 0; right[rnum] != NULL; ++rnum)
        {
          temp = comsubs(left[lnum], right[rnum]);
          if (temp == NULL)
            {
              freelist(both);
              return NULL;
            }
          both = addlists(both, temp);
          freelist(temp);
          free(temp);
          if (both == NULL)
            return NULL;
        }
    }
  return both;
}
typedef struct
{
  char **in;
  char *left;
  char *right;
  char *is;
} must;
static void
resetmust (must *mp)
{
  mp->left[0] = mp->right[0] = mp->is[0] = '\0';
  freelist(mp->in);
}
static void
dfamust (struct dfa *d)
{
  must *musts;
  must *mp;
  char *result;
  int ri;
  int i;
  int exact;
  token t;
  static must must0;
  struct dfamust *dm;
  static char empty_string[] = "";
  result = empty_string;
  exact = 0;
  musts = malloc((d->tindex + 1) * sizeof *musts);
  if (musts == NULL)
    return;
  mp = musts;
  for (i = 0; i <= d->tindex; ++i)
    mp[i] = must0;
  for (i = 0; i <= d->tindex; ++i)
    {
      mp[i].in = malloc(sizeof *mp[i].in);
      mp[i].left = malloc(2);
      mp[i].right = malloc(2);
      mp[i].is = malloc(2);
      if (mp[i].in == NULL || mp[i].left == NULL ||
          mp[i].right == NULL || mp[i].is == NULL)
        goto done;
      mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0';
      mp[i].in[0] = NULL;
    }
#ifdef DEBUG
  fprintf(stderr, "dfamust:\n");
  for (i = 0; i < d->tindex; ++i)
    {
      fprintf(stderr, " %d:", i);
      prtok(d->tokens[i]);
    }
  putc('\n', stderr);
#endif
  for (ri = 0; ri < d->tindex; ++ri)
    {
      switch (t = d->tokens[ri])
        {
        case LPAREN:
        case RPAREN:
          assert (!"neither LPAREN nor RPAREN may appear here");
        case EMPTY:
        case BEGLINE:
        case ENDLINE:
        case BEGWORD:
        case ENDWORD:
        case LIMWORD:
        case NOTLIMWORD:
        case BACKREF:
          resetmust(mp);
          break;
        case STAR:
        case QMARK:
          assert (musts < mp);
          --mp;
          resetmust(mp);
          break;
        case OR:
          assert (&musts[2] <= mp);
          {
            char **new;
            must *lmp;
            must *rmp;
            int j, ln, rn, n;
            rmp = --mp;
            lmp = --mp;
            if (!STREQ (lmp->is, rmp->is))
              lmp->is[0] = '\0';
            i = 0;
            while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i])
              ++i;
            lmp->left[i] = '\0';
            ln = strlen(lmp->right);
            rn = strlen(rmp->right);
            n = ln;
            if (n > rn)
              n = rn;
            for (i = 0; i < n; ++i)
              if (lmp->right[ln - i - 1] != rmp->right[rn - i - 1])
                break;
            for (j = 0; j < i; ++j)
              lmp->right[j] = lmp->right[(ln - i) + j];
            lmp->right[j] = '\0';
            new = inboth(lmp->in, rmp->in);
            if (new == NULL)
              goto done;
            freelist(lmp->in);
            free(lmp->in);
            lmp->in = new;
          }
          break;
        case PLUS:
          assert (musts < mp);
          --mp;
          mp->is[0] = '\0';
          break;
        case END:
          assert (mp == &musts[1]);
          for (i = 0; musts[0].in[i] != NULL; ++i)
            if (strlen(musts[0].in[i]) > strlen(result))
              result = musts[0].in[i];
          if (STREQ (result, musts[0].is))
            exact = 1;
          goto done;
        case CAT:
          assert (&musts[2] <= mp);
          {
            must *lmp;
            must *rmp;
            rmp = --mp;
            lmp = --mp;
            lmp->in = addlists(lmp->in, rmp->in);
            if (lmp->in == NULL)
              goto done;
            if (lmp->right[0] != '\0' &&
                rmp->left[0] != '\0')
              {
                char *tp;
                tp = icpyalloc(lmp->right);
                if (tp == NULL)
                  goto done;
                tp = icatalloc(tp, rmp->left);
                if (tp == NULL)
                  goto done;
                lmp->in = enlist(lmp->in, tp,
                                 strlen(tp));
                free(tp);
                if (lmp->in == NULL)
                  goto done;
              }
            if (lmp->is[0] != '\0')
              {
                lmp->left = icatalloc(lmp->left,
                                      rmp->left);
                if (lmp->left == NULL)
                  goto done;
              }
            if (rmp->is[0] == '\0')
              lmp->right[0] = '\0';
            lmp->right = icatalloc(lmp->right, rmp->right);
            if (lmp->right == NULL)
              goto done;
            if (lmp->is[0] != '\0' && rmp->is[0] != '\0')
              {
                lmp->is = icatalloc(lmp->is, rmp->is);
                if (lmp->is == NULL)
                  goto done;
              }
            else
              lmp->is[0] = '\0';
          }
          break;
        default:
          if (t < END)
            {
              assert (!"oops! t >= END");
            }
          else if (t == '\0')
            {
              goto done;
            }
          else if (t >= CSET
#if MBS_SUPPORT
                   || t == ANYCHAR
                   || t == MBCSET
#endif 
                   )
            {
              resetmust(mp);
            }
          else
            {
              resetmust(mp);
              mp->is[0] = mp->left[0] = mp->right[0] = t;
              mp->is[1] = mp->left[1] = mp->right[1] = '\0';
              mp->in = enlist(mp->in, mp->is, (size_t)1);
              if (mp->in == NULL)
                goto done;
            }
          break;
        }
#ifdef DEBUG
      fprintf(stderr, " node: %d:", ri);
      prtok(d->tokens[ri]);
      fprintf(stderr, "\n  in:");
      for (i = 0; mp->in[i]; ++i)
        fprintf(stderr, " \"%s\"", mp->in[i]);
      fprintf(stderr, "\n  is: \"%s\"\n", mp->is);
      fprintf(stderr, "  left: \"%s\"\n", mp->left);
      fprintf(stderr, "  right: \"%s\"\n", mp->right);
#endif
      ++mp;
    }
 done:
  if (strlen(result))
    {
      MALLOC(dm, struct dfamust, 1);
      dm->exact = exact;
      MALLOC(dm->must, char, strlen(result) + 1);
      strcpy(dm->must, result);
      dm->next = d->musts;
      d->musts = dm;
    }
  mp = musts;
  for (i = 0; i <= d->tindex; ++i)
    {
      freelist(mp[i].in);
      free(mp[i].in);
      free(mp[i].left);
      free(mp[i].right);
      free(mp[i].is);
    }
  free(mp);
}
struct dfa *
dfaalloc (void)
{
  return xmalloc (sizeof (struct dfa));
}
struct dfamust *
dfamusts (struct dfa const *d)
{
  return d->musts;
}
