/* ,file-id archive://[lord]/433/rx/nfa.c/1998-05-18
 */
/*	Copyright (C) 1997 Tom Lord
 * 
 * This program is provided to you under the terms of the Liberty Software
 * License.  You are NOT permitted to redistribute, modify, or use it
 * except in very specific ways described by that license.
 *
 * This software comes with NO WARRANTY.
 * 
 * You should have received a copy of the Liberty Software License
 * along with this software; see the file =LICENSE.  If not, write to
 * the Tom Lord, 1810 Francisco St. #2, Berkeley CA, 94703, USA.  
 */





#include "vu/bitset.h"
#include "vu/hashtab.h"
#include "vu/xmalloc.h"
#include "vu/dstr.h"
#include "vu/str.h"
#include "rexp.h"
#include "nfa.h"
#include "super.h"


/* Allocation and Initialization
 */

static unsigned long rx_id = 0;

struct rx_nfa *
rx_nfa_xalloc (int cset_size)
{
  struct rx_nfa * new_rx;

  new_rx = (struct rx_nfa *)xmalloc (sizeof (*new_rx));
  memset0 ((char *)new_rx, sizeof (*new_rx));
  new_rx->rx_id = rx_id++;
  if (rx_id == 0)
    panic ("rx id wraparound!");
  new_rx->local_cset_size = cset_size;
  return new_rx;
}


static void
rx_free_nfa_graph (struct rx_nfa *rx)
{
  while (rx->nfa_states)
    {
      while (rx->nfa_states->edges)
	{
	  switch (rx->nfa_states->edges->type)
	    {
	    case ne_cset:
	      xfree (rx->nfa_states->edges->cset);
	      break;
	    default:
	      break;
	    }
	  {
	    struct rx_nfa_edge * e;
	    e = rx->nfa_states->edges;
	    rx->nfa_states->edges = rx->nfa_states->edges->next;
	    xfree (e);
	  }
	}
      {
	struct rx_nfa_state *n;
	n = rx->nfa_states;
	rx->nfa_states = rx->nfa_states->next;
	xfree (n);
      }
    }
}

static void 
nfa_set_freer (struct hashtab_item * node)
{
  xfree ((char *)node->data);
}

static struct hashtab_rules nfa_set_hash_rules;

void
rx_free_nfa (struct rx_nfa * rx)
{
  if (rx->start_set)
    rx->start_set->starts_for = 0;
  hashtab_free_static (&rx->set_list_memo, nfa_set_freer, &nfa_set_hash_rules);
  memset0 ((char *)&rx->set_list_memo, sizeof (rx->set_list_memo));
  rx_free_nfa_graph (rx);
  free (rx);
}


/* {Translating a Syntax Tree into an NFA}
 *
 */

void
rx_build_nfa (struct rx_nfa *rx,
	      struct rx_exp_node *rexp,
	      struct rx_nfa_state **start,
	      struct rx_nfa_state **end)
{
  struct rx_nfa_edge *edge;

  if (!*start)
    *start = rx_nfa_state (rx);

  if (!*end)
    *end = rx_nfa_state (rx);

  if (!rexp)
    {
      rx_nfa_edge (rx, ne_epsilon, *start, *end);
      return;
    }

  switch (rexp->type)
    {
    case r_cset:
      (*start)->has_cset_edges = 1;
      edge = rx_nfa_edge (rx, ne_cset, *start, *end);
      edge->cset = bitset_dup (rx->local_cset_size, rexp->cset);
      return;

    case r_string:
      {
	if (rexp->cstr.len == 1)
	  {
	    (*start)->has_cset_edges = 1;
	    edge = rx_nfa_edge (rx, ne_cset, *start, *end);
	    edge->cset = bitset_xalloc (rx->local_cset_size);
	    bitset_adjoin (edge->cset, rexp->cstr.chr[0]);
	    return;
	  }
	else
	  {
	    struct rx_exp_node copied;
	    struct rx_nfa_state * shared;

	    copied = *rexp;
	    shared = 0;

	    copied.cstr.len--;
	    copied.cstr.chr++;
	    rx_build_nfa (rx, &copied, &shared, end);

	    copied.cstr.len = 1;
	    copied.cstr.chr--;
	    rx_build_nfa (rx, &copied, start, &shared);

	    return;
	  }
      }
 
    case r_interval:
    case r_star:
      {
	struct rx_nfa_state * star_start;
	struct rx_nfa_state * star_end;

	star_start = 0;
	star_end = 0;
	rx_build_nfa (rx, rexp->left, &star_start, &star_end);
	rx_nfa_edge (rx, ne_epsilon, star_start, star_end);
	rx_nfa_edge (rx, ne_epsilon, *start, star_start);
	rx_nfa_edge (rx, ne_epsilon, star_end, *end);
	rx_nfa_edge (rx, ne_epsilon, star_end, star_start);
	return;
      }

    case r_cut:
      {
	struct rx_nfa_state * cut_end;

	cut_end = rx_nfa_state (rx);
	rx_nfa_edge (rx, ne_epsilon, *start, cut_end);
	cut_end->state_label = rexp->intval;
	return;
      }

    case r_parens:
      rx_build_nfa (rx, rexp->left, start, end);
      return;

    case r_concat:
      {
	struct rx_nfa_state *shared;

	shared = 0;
	rx_build_nfa (rx, rexp->left, start, &shared);
	rx_build_nfa (rx, rexp->right, &shared, end);
	return;
      }

    case r_alternate:
      {
	struct rx_nfa_state *ls = 0;
	struct rx_nfa_state *le = 0;
	struct rx_nfa_state *rs = 0;
	struct rx_nfa_state *re = 0;

	ls = 0;
	le = 0;
	rs = 0;
	re = 0;

	rx_build_nfa (rx, rexp->left, &ls, &le);
	rx_build_nfa (rx, rexp->right, &rs, &re);
	rx_nfa_edge (rx, ne_epsilon, *start, ls);
	rx_nfa_edge (rx, ne_epsilon, *start, rs);
	rx_nfa_edge (rx, ne_epsilon, le, *end);
	rx_nfa_edge (rx, ne_epsilon, re, *end);
	return;
      }

    case r_context:
      rx_nfa_edge (rx, ne_epsilon, *start, *end);
      return;

    default:
      panic ("unreconized node type in rx_build_nfa");
    }
}

void
rx_set_start_state (struct rx_nfa * rx, struct rx_nfa_state * n)
{
  rx->start_nfa_state = n;
  n->is_start = 1;
}


/* Building Nodes and Edges One By One
 */

struct rx_nfa_state *
rx_nfa_state (struct rx_nfa *rx)
{
  struct rx_nfa_state * n;

  n = (struct rx_nfa_state *)xmalloc (sizeof (*n));
  memset0 ((char *)n, sizeof (*n));
  n->next = rx->nfa_states;
  n->id = rx->nfa_state_id++;
  rx->nfa_states = n;
  return n;
}


struct rx_nfa_edge * 
rx_nfa_edge (struct rx_nfa *rx,
	     enum rx_nfa_etype type,
	     struct rx_nfa_state *start,
	     struct rx_nfa_state *dest)
{
  struct rx_nfa_edge *e;
  e = (struct rx_nfa_edge *)xmalloc (sizeof (*e));
  e->next = start->edges;
  e->type = type;
  e->dest = dest;
  start->edges = e;
  return e;
}



/* Managing State Sets
 */

static int 
nfa_set_cmp (void * va, void * vb)
{
  struct rx_nfa_state_set * a;
  struct rx_nfa_state_set * b;

  a = (struct rx_nfa_state_set *)va;
  b = (struct rx_nfa_state_set *)vb;

  return ((va == vb)
	  ? 0
	  : (!va
	     ? -1
	     : (!vb
		? 1
		: (a->car->id < b->car->id
		   ? -1
		   : (a->car->id > b->car->id
		      ? 1
		      : nfa_set_cmp ((void *)a->cdr, (void *)b->cdr))))));
}

static int 
nfa_set_equal (void * va, void * vb)
{
  return !nfa_set_cmp (va, vb);
}

static struct hashtab_rules nfa_set_hash_rules = { nfa_set_equal, 0, 0, 0, 0 };


static struct rx_nfa_state_set * 
nfa_set_cons (struct rx_nfa * rx,
	      struct hashtab * memo,
	      struct rx_nfa_state * state,
	      struct rx_nfa_state_set * set)
{
  struct rx_nfa_state_set template;
  struct hashtab_item * node;
  unsigned long hash_value;

  template.car = state;
  template.cdr = set;
  hash_value = (  (unsigned long)set
		^ (((unsigned long)state) << 19)
		^ (((unsigned long)state) >> (8 * sizeof (unsigned long) - 19)));

  node = hashtab_store (memo,
			hash_value, 
			&template,
			&nfa_set_hash_rules);
  
  if (node->data == &template)
    {
      struct rx_nfa_state_set * l;

      l = (struct rx_nfa_state_set *) xmalloc (sizeof (*l));
      *l = template;
      node->data = (void *) l;
    }

  return (struct rx_nfa_state_set *)node->data;
}


static struct rx_nfa_state_set * 
nfa_set_adjoin (struct rx_nfa * rx,
		struct hashtab * memo,
		struct rx_nfa_state * state,
		struct rx_nfa_state_set * set)
{
  if (!set || (state->id < set->car->id))
    return nfa_set_cons (rx, memo, state, set);
  if (state->id == set->car->id)
    return set;
  else
    {
      struct rx_nfa_state_set * newcdr;
      newcdr = nfa_set_adjoin (rx, memo, state, set->cdr);
      if (newcdr != set->cdr)
	set = nfa_set_cons (rx, memo, set->car, newcdr);
      return set;
    }
}


static struct rx_nfa_state_set * 
nfa_set_union (struct rx_nfa * rx,
	       struct hashtab * memo,
	       struct rx_nfa_state_set * a,
	       struct rx_nfa_state_set * b)
{
  if (!a)
    return b;
  if (!b)
    return a;

  if (a->car->id < b->car->id)
    return nfa_set_cons (rx, memo, a->car, b);
  else if (b->car->id < a->car->id)
    return nfa_set_cons (rx, memo, b->car, a);
  else
    return nfa_set_union (rx, memo, a, b->cdr);
}


/* {Computing Epsilon Closures.}
 */


/* This is called while computing closures for "outnode".
 * The current node in the traversal is "node".
 * Returns 0 on allocation failure.
 */

static void
eclose_node (struct rx_nfa *rx,
	     struct rx_nfa_state *outnode,
	     struct rx_nfa_state *node)
{
  struct rx_nfa_edge *e;

  if (node->closure_computed)
    outnode->closure = nfa_set_union (rx, &rx->set_list_memo, outnode->closure, node->closure);
  
  if (node->mark)
    return;

  node->mark = 1;

  outnode->closure = nfa_set_adjoin (rx,
				     &rx->set_list_memo,
				     node,
				     outnode->closure);

  e = node->edges;
  while (e)
    {
      if (e->type == ne_epsilon)
	eclose_node (rx, outnode, e->dest);
      e = e->next;
    }

  node->mark = 0;
}

struct rx_nfa_state_set *
rx_state_closure (struct rx_nfa * rx, struct rx_nfa_state * n)
{
  if (!n->closure_computed)
    {
      eclose_node (rx, n, n);
      n->closure_computed = 1;
      return n->closure;
    }
  return n->closure;
}

