/* ,file-id archive://[lord]/449/rx/spencer.c/1998-05-18
 */
/*	Copyright (C) 1997 Tom Lord
 * 
 * This program is provided to you under the terms of the Liberty Software
 * License.  You are NOT permitted to redistribute, modify, or use it
 * except in very specific ways described by that license.
 *
 * This software comes with NO WARRANTY.
 * 
 * You should have received a copy of the Liberty Software License
 * along with this software; see the file =LICENSE.  If not, write to
 * the Tom Lord, 1810 Francisco St. #2, Berkeley CA, 94703, USA.  
 */





#include <stdio.h>
#include "vu/bitset.h"
#include "vu/hashtab.h"
#include "vu/dstr.h"
#include "rexp.h"
#include "nfa.h"
#include "dfa.h"
#include "super.h"
#include "unfa.h"
#include "match-regexp.h"


struct rx_solutions
{
  int step;

  int cset_size;
  struct rx_exp_node * exp;
  struct rx_exp_node ** subexps;
  struct rx_registers * regs;

  int start;
  int end;

  rx_vmfn vmfn;
  rx_contextfn contextfn;
  void * closure;

  struct rx_unfa * dfa;
  struct rx_dfa match_engine;
  struct rx_unfa * left_dfa;
  struct rx_dfa left_match_engine;

  int split_guess;
  struct rx_solutions * left;
  struct rx_solutions * right;

  int interval_x;
  int saved_rm_so;
  int saved_rm_eo;
  int final_tag;
};

static struct rx_solutions * cached_solutions_storage = 0;



/* rx_make_solutions
 *
 * Construct a stream-of-solutions to a regexp matching
 * problem.
 */

struct rx_solutions *
rx_make_solutions (struct rx_registers * regs,
		   int cset_size,
		   struct rx_exp_node * expression,
		   struct rx_exp_node ** subexps,
		   int start,
		   int end,
		   rx_vmfn vmfn,
		   rx_contextfn contextfn,
		   void * closure)
{
  struct rx_solutions * solns;

  if (   expression
      && (expression->len >= 0)
      && (expression->len != (end - start)))
    return 0;

  if (cached_solutions_storage)
    {
      solns = (struct rx_solutions *)cached_solutions_storage;
      cached_solutions_storage = 0;
    }
  else
    solns = (struct rx_solutions *)xmalloc (sizeof (*solns));
  memset0 ((char *)solns, sizeof (*solns));

  solns->step = 0;
  solns->cset_size = cset_size;
  solns->subexps = subexps;
  solns->exp = expression;
  rx_save_rexp (expression);
  solns->regs = regs;
  solns->start = start;
  solns->end = end;
  solns->vmfn = vmfn;
  solns->contextfn = contextfn;
  solns->closure = closure;

  if (!solns->exp || !solns->exp->observed)
    {
      solns->dfa = rx_unfa (expression, cset_size);
      rx_init_dfa_from_rx (&solns->match_engine, solns->dfa->nfa);
      rx_dfa_goto_start_superstate (&solns->match_engine);
    }
  else
    {
      struct rx_exp_node * simplified;
      rx_simplify_rexp (&simplified, cset_size, solns->exp, subexps);
      solns->dfa = rx_unfa (simplified, cset_size);
      rx_init_dfa_from_rx (&solns->match_engine, solns->dfa->nfa);
      rx_dfa_goto_start_superstate (&solns->match_engine);
      rx_free_rexp (simplified);
    }

  if (expression && (   (expression->type == r_concat)
		     || (expression->type == r_star)
		     || (expression->type == r_interval)))
    {
      struct rx_exp_node * subexp;

      subexp = solns->exp->left;

      if (!subexp || !subexp->observed)
	{
	  solns->left_dfa = rx_unfa (subexp, solns->cset_size);
	}
      else
	{
	  struct rx_exp_node * simplified;
	  rx_simplify_rexp (&simplified, solns->cset_size, subexp, solns->subexps);
	  solns->left_dfa = rx_unfa (simplified, solns->cset_size);
	  rx_free_rexp (simplified);
	}
      memset0 ((char *)&solns->left_match_engine, sizeof (solns->left_match_engine));
      rx_init_dfa_from_rx (&solns->left_match_engine, solns->left_dfa->nfa);
    }
  
  return solns;
}



static int
rx_solution_fit_p (struct rx_solutions * solns)
{
  unsigned const char * burst;
  int burst_addr;
  int burst_len;
  int burst_end_addr;
  int rel_pos_in_burst;
  int vmstat;
  int current_pos;
	  
  current_pos = solns->start;
 next_burst:
  vmstat = solns->vmfn (solns->closure,
			&burst, &burst_len, &burst_addr,
			current_pos, solns->end,
			current_pos);

  if (vmstat != 1)
    return vmstat;

  rel_pos_in_burst = current_pos - burst_addr;
  burst_end_addr = burst_addr + burst_len;

  if (burst_end_addr >= solns->end)
    {
      int fit_status;
      fit_status = rx_dfa_fit_p (&solns->match_engine,
			     burst + rel_pos_in_burst,
			     solns->end - current_pos);
      return fit_status;
    }
  else
    {
      int fit_status;
      fit_status = rx_dfa_advance (&solns->match_engine,
			       burst + rel_pos_in_burst,
			       burst_len - rel_pos_in_burst);
      if (fit_status != 1)
	{
	  return fit_status;
	}
      else
	{
	  current_pos += burst_len - rel_pos_in_burst;
	  goto next_burst;
	}
    }
}


static int
rx_solution_fit_str_p (struct rx_solutions * solns)
{
  int current_pos;
  unsigned const char * burst;
  int burst_addr;
  int burst_len;
  int burst_end_addr;
  int rel_pos_in_burst;
  int vmstat;
  int count;
  unsigned char * key;


  current_pos = solns->start;
  count = solns->exp->cstr.len;
  key = (unsigned char *)solns->exp->cstr.chr;

 next_burst:
  vmstat = solns->vmfn (solns->closure,
			&burst, &burst_len, &burst_addr,
			current_pos, solns->end,
			current_pos);

  if (vmstat != 1)
    return vmstat;

  rel_pos_in_burst = current_pos - burst_addr;
  burst_end_addr = burst_addr + burst_len;

  {
    unsigned const char * pos;

    pos = burst + rel_pos_in_burst;

    if (burst_end_addr >= solns->end)
      {
	while (count)
	  {
	    if (*pos != *key)
	      return 0;
	    ++pos;
	    ++key;
	    --count;
	  }
	return 1;
      }
    else
      {
	int part_count;
	int part_count_init;

	part_count_init = burst_len - rel_pos_in_burst;
	part_count = part_count_init;
	while (part_count)
	  {
	    if (*pos != *key)
	      return 0;
	    ++pos;
	    ++key;
	    --part_count;
	  }
	count -= part_count_init;
	current_pos += burst_len - rel_pos_in_burst;
	goto next_burst;
      }
  }
}




int
rx_next_solution (struct rx_solutions * solns)
{
  if (solns == 0)
    {
      return 0;
    }

  if (!solns->exp)
    {
      if (solns->step != 0)
	{
	  return 0;
	}
      else
	{
	  solns->step = 1;
	  solns->final_tag = 1;
	  return (solns->start == solns->end
		  ? 1
		  : 0);
	}
    }
  else if (   (solns->exp->len >= 0)
	   && (solns->exp->len != (solns->end - solns->start)))
    {
      return 0;
    }
  else if (!solns->exp->observed)
    {
      if (solns->step != 0)
	{
	  return 0;
	}
      else if (solns->exp->type == r_string)
	{
	  int ans;
	  ans = rx_solution_fit_str_p (solns);
	  solns->final_tag = 1;
	  solns->step = -1;
	  return ans;
	}
      else
	{
	  int ans;
	  ans = rx_solution_fit_p (solns);
	  solns->final_tag = solns->match_engine.final_tag;
	  solns->step = -1;
	  return ans;
	}
    }
  else /* if (solns->exp->observed) */
    {
      int fit_p;
      switch (solns->step)
	{
	case -2:
	  if (solns->exp->intval)
	    {
	      solns->regs[solns->exp->intval].rm_so = solns->saved_rm_so;
	      solns->regs[solns->exp->intval].rm_eo = solns->saved_rm_eo;
	    }
	  return 0;

	case -1:
	  return 0;

	case 0:
	  fit_p = rx_solution_fit_p (solns);
	  /* Set final_tag here because this rough fit test
	   * may be all the matching that gets done.
	   * For example, consider a paren node containing
	   * a true regular expression ending with a cut
	   * operator.
	   */
	  solns->final_tag = solns->match_engine.final_tag;
	  switch (fit_p)
	    {
	    case 0:
	      solns->step = -1;
	      return 0;
	    case 1:
	      solns->step = 1;
	      goto resolve_fit;
	    default:
	      solns->step = -1;
	      return fit_p;
	    }

	default:
	resolve_fit:
	  switch (solns->exp->type)
	    {
	    case r_cset:
	    case r_string:
	    case r_cut:
	      return panic ("bogus regexp in rx_next_solution");
	      
	    case r_parens:
	      {
		int paren_stat;
		switch (solns->step)
		  {
		  case 1:
		    if (solns->exp->intval)
		      {
			/* BUG: and save nested,
			 *      and set to -1,
			 *      and set nested to -1,
			 */
			solns->saved_rm_so = solns->regs[solns->exp->intval].rm_so;
			solns->saved_rm_eo = solns->regs[solns->exp->intval].rm_eo;
		      }

		    if (   !solns->exp->left
			|| !solns->exp->left->observed)
		      {
			if (solns->exp->intval)
			  {
			    solns->regs[solns->exp->intval].rm_so = solns->start;
			    solns->regs[solns->exp->intval].rm_eo = solns->end;
			  }
			solns->step = -2;
			/* Keep the final_tag from the fit_p test. */
			return 1;
		      }
		    else
		      {
			solns->left = rx_make_solutions (solns->regs,
							 solns->cset_size,
							 solns->exp->left,
							 solns->subexps,
							 solns->start,
							 solns->end,
							 solns->vmfn,
							 solns->contextfn,
							 solns->closure);
		      }
		    solns->step = 2;
		    /* fall through */

		  case 2:
		    if (solns->exp->intval)
		      {
			/* BUG: and restore nested,
			 */
			solns->regs[solns->exp->intval].rm_so = solns->saved_rm_so;
			solns->regs[solns->exp->intval].rm_eo = solns->saved_rm_eo;
		      }

		    paren_stat = rx_next_solution (solns->left);
		    if (paren_stat == 1)
		      {
			if (solns->exp->intval)
			  {
			    solns->regs[solns->exp->intval].rm_so = solns->start;
			    solns->regs[solns->exp->intval].rm_eo = solns->end;
			  }
			solns->final_tag = solns->left->final_tag;
			return 1;
		      }
		    else 
		      {
			solns->step = -1;
			rx_free_solutions (solns->left);
			solns->left = 0;
			if (solns->exp->intval)
			  {
			    /* BUG: and restore nested,
			     */
			    solns->regs[solns->exp->intval].rm_so = solns->saved_rm_so;
			    solns->regs[solns->exp->intval].rm_eo = solns->saved_rm_eo;
			  }
			return paren_stat;
		      }
		  }
	      }

	    case r_alternate:
	      {
		int alt_stat;
		switch (solns->step)
		  {
		  case 1:
		    solns->left = rx_make_solutions (solns->regs,
						     solns->cset_size,
						     solns->exp->left,
						     solns->subexps,
						     solns->start,
						     solns->end,
						     solns->vmfn,
						     solns->contextfn,
						     solns->closure);
		    solns->step = 2;
		    /* fall through */
		    
		  case 2:
		    alt_stat = rx_next_solution (solns->left);

		    if (alt_stat == 1)
		      {
			solns->final_tag = solns->left->final_tag;
			return alt_stat;
		      }
		    else 
		      {
			solns->step = 3;
			rx_free_solutions (solns->left);
			solns->left = 0;
			/* fall through */
		      }

		  case 3:
		    solns->right = rx_make_solutions (solns->regs,
						      solns->cset_size,
						      solns->exp->right,
						      solns->subexps,
						      solns->start,
						      solns->end,
						      solns->vmfn,
						      solns->contextfn,
						      solns->closure);
		    solns->step = 4;
		    /* fall through */
		    
		  case 4:
		    alt_stat = rx_next_solution (solns->right);

		    if (alt_stat == 1)
		      {
			solns->final_tag = solns->right->final_tag;
			return alt_stat;
		      }
		    else 
		      {
			solns->step = -1;
			rx_free_solutions (solns->right);
			solns->right = 0;
			return alt_stat;
		      }
		  }
	     }

	    case r_concat:
	      {
		switch (solns->step)
		  {
		    int concat_stat;
		  case 1:
		    solns->split_guess = solns->end;

		  concat_split_guess_loop:
		    solns->left = rx_make_solutions (solns->regs,
						     solns->cset_size,
						     solns->exp->left,
						     solns->subexps,
						     solns->start,
						     solns->split_guess,
						     solns->vmfn,
						     solns->contextfn,
						     solns->closure);
		    solns->step = 2;

		  case 2:
		  concat_try_next_left_match:

		    concat_stat = rx_next_solution (solns->left);
		    if (concat_stat != 1)
		      {
			rx_free_solutions (solns->left);
			rx_free_solutions (solns->right);
			solns->left = solns->right = 0;
			solns->split_guess = solns->split_guess - 1;
			if (solns->split_guess >= solns->start)
			  goto concat_split_guess_loop;
			else
			  {
			    solns->step = -1;
			    return concat_stat;
			  }
		      }
		    else
		      {
			solns->step = 3;
			/* fall through */
		      }

		  case 3:
		    solns->right = rx_make_solutions (solns->regs,
						      solns->cset_size,
						      solns->exp->right,
						      solns->subexps,
						      solns->split_guess,
						      solns->end,
						      solns->vmfn,
						      solns->contextfn,
						      solns->closure);
		    solns->step = 4;
		    /* fall through */

		  case 4:
		  /* concat_try_next_right_match: */

		    concat_stat = rx_next_solution (solns->right);
		    if (concat_stat == 1)
		      {
			solns->final_tag = solns->right->final_tag;
			return concat_stat;
		      }
		    else 
		      {
			rx_free_solutions (solns->right);
			solns->right = 0;
			solns->step = 2;
			goto concat_try_next_left_match;
		      }
		  }
	      }

	    case r_star:
	      {
		switch (solns->step)
		  {
		    int star_stat;
		  case 1:
		    solns->split_guess = solns->end;

		  star_split_guess_loop:
		    solns->left = rx_make_solutions (solns->regs,
						     solns->cset_size,
						     solns->exp->left,
						     solns->subexps,
						     solns->start,
						     solns->split_guess,
						     solns->vmfn,
						     solns->contextfn,
						     solns->closure);
		    solns->step = 2;

		  case 2:
		  star_try_next_left_match:

		    star_stat = rx_next_solution (solns->left);
		    if (star_stat != 1)
		      {
			rx_free_solutions (solns->left);
			rx_free_solutions (solns->right);
			solns->left = solns->right = 0;
			solns->split_guess = solns->split_guess - 1;
			if (solns->split_guess >= solns->start)
			  goto star_split_guess_loop;
			else
			  {
			    solns->step = -1;

			    if (   (solns->exp->type == r_star)
				&& (solns->start == solns->end)
				&& (star_stat == 0))
			      {
				solns->final_tag = 1;
				return 1;
			      }
			    else
			      return star_stat;
			  }
		      }
		    else
		      {
			solns->step = 3;
			/* fall through */
		      }


		    if (solns->split_guess == solns->end)
		      {
			solns->final_tag = solns->left->final_tag;
			return 1;
		      }
		    
		  case 3:
		    solns->right = rx_make_solutions (solns->regs,
						      solns->cset_size,
						      solns->exp,
						      solns->subexps,
						      solns->split_guess,
						      solns->end,
						      solns->vmfn,
						      solns->contextfn,
						      solns->closure);

		    solns->step = 4;
		    /* fall through */

		  case 4:
		  /* star_try_next_right_match: */
		    
		    star_stat = rx_next_solution (solns->right);
		    if (star_stat == 1)
		      {
			solns->final_tag = solns->right->final_tag;
			return star_stat;
		      }
		    else
		      {
			rx_free_solutions (solns->right);
			solns->right = 0;
			solns->step = 2;
			goto star_try_next_left_match;
		      }
		  }
	      }

	    case r_interval:
	      {
		switch (solns->step)
		  {
		    int interval_stat;

		  case 1:
		    /* If the interval permits nothing, 
		     * return immediately.
		     */
		    if (solns->exp->intval2 < solns->interval_x)
		      {
			solns->step = -1;
			return 0;
		      }

		    /* If the interval permits only 0 iterations,
		     * return immediately.  Success depends on the
		     * emptiness of the match.
		     */
		    if (   (solns->exp->intval2 == solns->interval_x)
			&& (solns->exp->intval <= solns->interval_x))
		      {
			solns->step = -1;
			solns->final_tag = 1;
			return ((solns->start == solns->end)
				? 1
				: 0);
		      }
		    solns->split_guess = solns->end;

		    /* The interval permits more than 0 iterations.
		     * If it permits 0 and the match is to be empty, 
		     * the trivial match is the most preferred answer. 
		     */
		    if (solns->exp->intval <= solns->interval_x)
		      {
			solns->step = 2;
			if (solns->start == solns->end)
			  {
			    solns->final_tag = 1;
			    return 1;
			  }
			/* If this isn't a trivial match, or if the trivial match
			 * is rejected, look harder. 
			 */
		      }
		    
		  case 2:
		  interval_split_guess_loop:
		    /* The match requires at least one iteration, either because
		     * there are characters to match, or because the interval starts
		     * above 0.
		     *
		     * Look for the first iteration:
		     */
		    solns->left = rx_make_solutions (solns->regs,
						     solns->cset_size,
						     solns->exp->left,
						     solns->subexps,
						     solns->start,
						     solns->split_guess,
						     solns->vmfn,
						     solns->contextfn,
						     solns->closure);
		    solns->step = 3;

		  case 3:
		  interval_try_next_left_match:

		    interval_stat = rx_next_solution (solns->left);
		    if (interval_stat != 1)
		      {
			rx_free_solutions (solns->left);
			rx_free_solutions (solns->right);
			solns->left = solns->right = 0;
			solns->split_guess = solns->split_guess - 1;
			if (solns->split_guess >= solns->start)
			  goto interval_split_guess_loop;
			else
			  {
			    solns->step = -1;
			    return interval_stat;
			  }
		      }
		    else
		      {
			solns->step = 4;
			/* fall through */
		      }

		  case 4:
		    {
		      /* After matching one required iteration, construct a smaller
		       * interval and try to match that against the rest.
		       *
		       * To avoid thwarting unfa caching, instead of building a new
		       * rexp node with different interval extents, we keep interval_x
		       * in each solns structure to keep track of the number of 
		       * iterations matched so far.
		       */
		      solns->right = rx_make_solutions (solns->regs,
							solns->cset_size,
							solns->exp,
							solns->subexps,
							solns->split_guess,
							solns->end,
							solns->vmfn,
							solns->contextfn,
							solns->closure);
		      solns->right->interval_x = solns->interval_x + 1;
		    }

		    solns->step = 5;
		    /* fall through */

		  case 5:
		  /* interval_try_next_right_match: */
		    
		    interval_stat = rx_next_solution (solns->right);
		    if (interval_stat == 1)
		      {
			solns->final_tag = solns->right->final_tag;
			return interval_stat;
		      }
		    else
		      {
			rx_free_solutions (solns->right);
			solns->right = 0;
			solns->step = 2;
			goto interval_try_next_left_match;
		      }
		  }
	      }

	    case r_context:
	      {
		solns->step = -1;
		solns->final_tag = 1;
		return solns->contextfn (solns->closure,
					 solns->exp,
					 solns->start, solns->end,
					 solns->regs);
	      }


	    }
	}
      return panic ("unreached in rx_next_solution");
    }
}



int
rx_solutions_final_tag (struct rx_solutions * solns)
{
  return solns->final_tag;
}

void *
rx_solutions_closure (struct rx_solutions * solns)
{
  return solns->closure;
}



void
rx_free_solutions (struct rx_solutions * solns)
{
  if (!solns)
    return;

  if (solns == 0)
    return;

  if (solns->left)
    {
      rx_free_solutions (solns->left);
      solns->left = 0;
    }

  if (solns->right)
    {
      rx_free_solutions (solns->right);
      solns->right = 0;
    }

  if (solns->dfa)
    {
      rx_free_unfa (solns->dfa);
      solns->dfa = 0;
    }
  if (solns->left_dfa)
    {
      rx_free_dfa_storage (&solns->left_match_engine);
      rx_free_unfa (solns->left_dfa);
      solns->left_dfa = 0;
    }

  rx_free_dfa_storage (&solns->match_engine);

  if (solns->exp)
    {
      rx_free_rexp (solns->exp);
      solns->exp = 0;
    }

  if (!cached_solutions_storage)
    cached_solutions_storage = solns;
  else
    free (solns);
}



int
rx_str_vmfn (void * closure,
	     unsigned const char ** burstp,
	     int * lenp,
	     int * offsetp,
	     int start,
	     int end,
	     int need)
{
  struct rx_str_closure * strc;
  strc = (struct rx_str_closure *)closure;

  if (   (need < 0)
      || (need > strc->len))
    return 0;

  *burstp = strc->str;
  *lenp = strc->len;
  *offsetp = 0;
  return 1;
}

int
rx_str_contextfn (void * closure,
		  struct rx_exp_node * node,
		  int start, int end,
		  struct rx_registers * regs)
{
  struct rx_str_closure * strc;

  strc = (struct rx_str_closure *)closure;
  switch (node->intval)
    {
    case '1': case '2': case '3': case '4': case '5':
    case '6': case '7': case '8': case '9':
      {
	int cmp;
	int regn;
	regn = node->intval - '0';
	if (   (regs[regn].rm_so == -1)
	    || ((end - start) != (regs[regn].rm_eo - regs[regn].rm_so)))
	  return 0;
	else
	  {
	    if (strc->rules.case_indep)
	      cmp = strncasecmp (strc->str + start,
				 strc->str + regs[regn].rm_so,
				 end - start);
	    else
	      cmp = strncmp (strc->str + start,
			     strc->str + regs[regn].rm_so,
			     end - start);

	    return (!cmp
		    ? 1
		    : 0);
	  }
      }

    case '^':
      {
	return ((   (start == end)
		 && (   ((start == 0) && !strc->rules.not_bol)
		     || (   (start > 0)
			 && strc->rules.newline_anchor
			 && (strc->str[start - 1] == '\n'))))
		? 1
		: 0);
      }

    case '$':
      {
	return ((   (start == end)
		 && (   ((start == strc->len) && !strc->rules.not_eol)
		     || (   (start < strc->len)
			 && strc->rules.newline_anchor
			 && (strc->str[start] == '\n'))))
		? 1
		: 0);
      }

    default:
      return panic ("unrecognized context function in rx_str_contextfn");
    }
}



static char * silly_hack = 0;

struct rx_solutions *
rx_basic_make_solutions (struct rx_registers * regs,
			 struct rx_exp_node * expression,
			 struct rx_exp_node ** subexps,
			 int start,
			 int end,
			 struct rx_context_rules * rules,
			 const unsigned char * str)
{
  struct rx_str_closure * closure;

  if (   expression
      && (expression->len >= 0)
      && (expression->len != (end - start)))
    return 0;
  if (silly_hack)
    {
      closure = (struct rx_str_closure *)silly_hack;
      silly_hack = 0;
    }
  else
    closure = (struct rx_str_closure *)xmalloc (sizeof (*closure));
  closure->str = str;
  closure->len = end;
  closure->rules = *rules;
  return rx_make_solutions (regs,
			    256, expression, subexps,
			    start, end,
			    rx_str_vmfn, rx_str_contextfn, (void *)closure);
}

void
rx_basic_free_solutions (struct rx_solutions * solns)
{
  if (solns == 0)
    return;

  if (!silly_hack)
    silly_hack = solns->closure;
  else
    xfree ((void *)rx_solutions_closure (solns->closure));
  rx_free_solutions (solns);
}
