/* relational.c:
 *
 * vim:smartindent ts=8:sts=2:sta:et:ai:shiftwidth=2
 ****************************************************************
 * Copyright (C) 2003 Tom Lord
 *
 * See the file "COPYING" for further information about
 * the copyright and warranty status of this work.
 */


#include "hackerlab/bugs/panic.h"
#include "hackerlab/bugs/exception.h"
#include "hackerlab/os/errno.h"
#include "hackerlab/os/stdarg.h"
#include "hackerlab/vu/safe.h"
#include "hackerlab/vu/safe-vu-utils-vfdbuf.h"
#include "hackerlab/mem/talloc.h"
#include "hackerlab/sort/qsort.h"
#include "hackerlab/char/char-class.h"
#include "hackerlab/char/str.h"
#include "hackerlab/char/pika-escaping-utils.h"
#define _IN_RELATIONAL_
#include "libawk/relational.h"


AR_TYPEDEF (int, rel_cut_spec);
AR_TYPEDEF (rel_field, rel_record);
AR_TYPEDEF (rel_record, rel_table);
AR_TYPEDEF (struct rel_join_output_spec, rel_join_output_spec);
AR_TYPEDEF (struct rel_join_output_spec *, rel_join_output_specs);

/* __STDC__ prototypes for static functions */
static int rec_cmp_by_field (void * va, void * vb, void * vdata);
static int rec_cmp_by_field_fn (void * va, void * vb, void * vdata);
static int rec_cmp_by_fields (void * va, void * vb, void * vdata);
static rel_record rel_read_record (int fd,
                                   int n_fields,
                                   char * err_name,
                                   char * err_src,
                                   char const * name);
static rel_record rel_read_pika_unescape_iso8859_1_record (int fd,
                                                           int n_fields,
                                                           char * err_name,
                                                           char * err_src,
                                                           char const * name);
static void rel_print_record (int fd, rel_record rec);
static void rel_print_pika_escape_iso8859_1_record (int fd, int escape_classes, rel_record rec);
static void rel_print_record_sp (int fd, rel_record rec);
static void rel_print_pika_escape_iso8859_1_record_sp (int fd, int escape_classes, rel_record rec);


/************************************************************************
 *(h0 "Relational Tables")
 * 
 * Tla makes heavy use of a simple data structure for storing two
 * dimensional tables of strings: the `rel_table' type.
 * 
 * In general, these functions will cause the process to exit with 
 * non-0 status and a error message to the standard error descriptor
 * if an allocation failure occurs.
 */
/*(menu)
 */


/************************************************************************
 *(h1 "Table Types")
 * 
 * Tables should be declared to be of type `rel_table' and initialized 
 * to 0, as in:
 *  
 *    rel_table table = 0;
 * 
 * 
 * Individual records (each an array of fields) and individual fields
 * can be read using ordinary, 0-based array subscripting:
 * 
 *    table[4][1]
 * 
 * refers to the second field (or column) of the fifth row (or record)
 * of `tabel'.
 */

/*(c rel_field :category type)
 * typedef t_uchar * rel_field;
 * 
 * A single field within a relational table -- a 0-terminated string.
 */
/*(c rel_record :category type)
 * typedef rel_field * rel_record;
 * 
 * A single row within a relational table;  an array of fields.
 */
/*(c rel_table :category type)
 * typedef rel_record * rel_table;
 * 
 * A relational table;  an array of records.
 */


/************************************************************************
 *(h1 "Table Sizes")
 * 
 */


/*(c rel_n_records)
 * int rel_n_records (rel_table r);
 * 
 * Return the number of records (rows) within a table.
 */
int
rel_n_records (rel_table r)
{
  return ar_size_rel_table (r);
}


/*(c rel_n_fields)
 * int rel_n_fields (rel_record r);
 * 
 * Return the number of fields (columns) within a record.
 */
int
rel_n_fields (rel_record r)
{
  return ar_size_rel_record (r);
}


/************************************************************************
 *(h1 "Adding Fields and Records")
 * 
 * 
 * 
 */


/*(c rel_make_record)
 * rel_record rel_make_record (t_uchar const * const field0, ...);
 * 
 * Allocate a new record containing the indicated fields.
 * 
 * The list of field values may not itself contain a 0 (null) field
 * and must be terminated by a 0, as in this call:
 *  
 *     r = rel_make_record ("apples", "oranges", 0);
 * 
 * which creates a record with two fields.
 * 
 * Note that this function allocates private copies of the fields.
 * They will be freed by `ar_free_rel_record' or `rel_free_table'.
 */
rel_record
_rel_make_record (char const * name, t_uchar const * const field0, ...)
{
  va_list fp;
  rel_record answer;

  if (!field0)
    return 0;

  answer = 0;
  _rel_add_field (&answer, field0, name);
  va_start (fp, field0);
  while (1)
    {
      t_uchar * contents;

      contents = va_arg (fp, t_uchar *);
      if (!contents)
        break;
      rel_add_field (&answer, contents);
    }
  va_end (fp);
  return answer;
}

void
_rel_add_record (rel_table * table, rel_record r, char const * name)
{
    ar_push_ext_rel_table (table, r, name);
    talloc_steal (ar_base (*table), ar_base (r));
}

/*(c rel_add_records)
 * void rel_add_records (rel_table * table, ...);
 * 
 * Append records to `*table'.
 * 
 * The list of records must be terminated by 0 (null).
 * 
 * This procedure may move the table itself in memory.   If 
 * it does, `*table' will be updated to point to the relocated
 * table.
 * 
 * This procedure does ^not^ copy its argument records but uses
 * them directly.   If the table is later passed to `rel_free_table',
 * those records will be freed.   (Thus, in general, tables should  
 * not share records.)
 * 
 * A typical usage, creating a table of the form:
 * 
 *      apples    trees
 *      oranges   trees
 *      grapes    vines
 * 
 * is the call (not with care the 0 values passed to terminate 
 * argument lists):
 * 
 *    rel_table t = 0;
 * 
 *    rel_add_records (&t, rel_make_record ("apples", "trees", 0),
 *                         rel_make_record ("oranges", "trees", 0),
 *                         rel_make_record ("grapes", "vines", 0),
 *                         0);
 */
void
_rel_add_records (rel_table * table, char const * name, ...)
{
  va_list rp;
  rel_record r;

  va_start (rp, name);
  for (r = va_arg (rp, rel_record); r; r = va_arg (rp, rel_record))
      _rel_add_record (table, r, name);
  va_end (rp);
}

/** 
 * \brief insert records into a rel_table. 
 *
 * if the index is not <= rel_n_records table, an assertion will trigger
 * \param table the table to insert into
 * \param index the line to insert at
 * \param record, ...
 * \return void
 */
void 
rel_insert_records (rel_table *table, int index, ...)
{
  va_list rp;
  rel_record r;

  invariant (index <= rel_n_records (*table));
  va_start (rp, index);
  for (r = va_arg (rp, rel_record); r; r = va_arg (rp, rel_record))
    {
      ar_insert_rel_table (table, index, r);
    }
  va_end (rp);
}

/**
 * \brief replace a record in a rel table
 * \param table the table to replace in
 * \param index the line to replace
 * \param record the line to replace with
 * \return void
 */
void
rel_replace_record (rel_table table, int index, rel_record record)
{
  ar_free_rel_record (&table[index]);
  table[index] = record;
  talloc_steal (ar_base (table), ar_base (record));
}

/**
 * \brief remove records from a rel_table.
 *
 * \param table the table to remove from
 * \param from_index the first record to remove
 * \param to_index the last record to remove
 * \return void
 */
void
rel_remove_records (rel_table *table, int from_index, int to_index)
{
    int index;
    if (from_index > rel_n_records (*table) - 1)
        from_index = rel_n_records (*table) - 1;
    if (to_index > rel_n_records (*table) - 1)
        to_index = rel_n_records (*table) - 1;
    if (from_index > to_index)
        to_index = from_index;
    if (from_index < 0)
        return;
    /* free resources */
    for (index = from_index; index < to_index + 1; ++index)
      {
        ar_free_rel_record (&(*table)[index]);
      }
    /* now shrink the table */
    for (index = from_index; index < to_index + 1; ++index)
      {
        ar_remove_rel_table (table, from_index);
      }
}


/*(c rel_add_field)
 * void rel_add_field (rel_record * r, t_uchar const * field);
 * 
 * Append a single field (the string `field') to the record `*r'.
 * 
 * A private copy of `field' is allocated.
 * 
 * The record may be relocated in memory in which case the value of
 * `*r' will be update.
 */
void
_rel_add_field (rel_record * r, t_uchar const * field, char const * name)
{
  if (*r)
      ar_push_rel_record (r, talloc_strdup (ar_base(*r), field));
  else
    {
      ar_push_ext_rel_record (r, talloc_strdup (NULL, field), name);
      talloc_steal (ar_base (*r), (*r)[ar_size_rel_record (*r) - 1]);
    }
}


/*(c rel_singleton_record_n)
 * rel_record rel_singleton_record_n (t_uchar const * start, size_t len);
 * 
 * Create a new record containing a single field which is a copy of
 * the `len' characters beginning at `start' with a final 0 appended.
 */
rel_record
rel_singleton_record_n (t_uchar const * start, size_t len)
{
  rel_record answer = 0;

  ar_push_rel_record (&answer, talloc_strndup (NULL, start, len));
  talloc_steal (ar_base (answer), answer[0]);
  return answer;
}


/************************************************************************
 *(h1 "Freeing Records and Tables")
 * 
 * 
 * 
 */


/*(c rel_free_table)
 * void rel_free_table (rel_table t);
 * 
 * Free the entire table `t'. 
 * 
 * This function will also free all records which are part
 * of `t' -- there is no need to separately call `ar_free_rel_record'.
 */
void
rel_free_table (rel_table t)
{
  ar_free_rel_table (&t);
}


/************************************************************************
 *(h1 "Parsing Tables from Strings")
 * 
 * 
 * 
 */

typedef int split_callback(void const * , char);

/*(c rel_callback_split)
 * rel_table rel_ws_split (t_uchar * string, split_callback *split_fn, void * split_context);
 * 
 * Allocate and return a new table formed by parsing rows,
 * each containing a single field, from `string'.   Rows 
 * are separated by arbitrary whitespace.
 */
static rel_table
rel_callback_split (t_uchar const * string, split_callback * split_fn, void const *split_context)
{
  rel_table answer = 0;
  t_uchar const * start;
  t_uchar const * end;

  if (!string)
    return 0;

  start = string;

  while (1)
    {
      while (split_fn (split_context, *start))
        ++start;

      if (!*start)
        return answer;

      end = start;

      while (*end && !split_fn (split_context, *end))
        ++end;

      rel_add_records (&answer, rel_singleton_record_n ((t_uchar *)start, end - start), 0);

      start = end;
    }
}

static int
split_delim (void const * context, char ch)
{
    t_uchar const *delimiters = (t_uchar const * ) context;
    while (*delimiters != '\0')
	if (*delimiters++ == ch)
	    return 1;
    return 0;
}

/*(c rel_delim_split)
 * rel_table rel_delim_split (t_uchar * string, t_uchar * delimiters);
 * 
 * Allocate and return a new table formed by parsing rows,
 * each containing a single field, from `string'.   Rows 
 * are separated by any char in delimiters.
 */
rel_table
rel_delim_split (t_uchar const * string, t_uchar const * delimiters)
{
    return rel_callback_split (string, split_delim, delimiters);
}

static int 
split_is_space (void const *unused, char ch)
{
    return char_is_space (ch);
}

/*(c rel_ws_split)
 * rel_table rel_ws_split (t_uchar const * string);
 * 
 * Allocate and return a new table formed by parsing rows,
 * each containing a single field, from `string'.   Rows 
 * are separated by arbitrary whitespace.
 */
rel_table
rel_ws_split (t_uchar const * string)
{
  return rel_callback_split (string, split_is_space, NULL);
}


/*(c rel_nl_split)
 * rel_table rel_nl_split (t_uchar const * string);
 * 
 * Allocate and return a new table formed by parsing rows,
 * each containing a single field, from `string'.   Rows 
 * are separated by newlines.
 * FIXME: audit the callers to this to see if it can be integrated
 * into the callback method above, which would strip empty lines.
 */
rel_table
rel_nl_split (t_uchar const * string)
{
  rel_table answer = 0;
  t_uchar const * start;
  t_uchar const * end;

  if (!string)
    return 0;

  start = string;

  while (1)
    {
      if (!*start)
        return answer;

      end = start;

      while (*end && (*end != '\n'))
        ++end;

      rel_add_records (&answer, rel_singleton_record_n (start, end - start), 0);

      if (*end)
        start = end + 1;
      else
        start = end;
    }
}

/**
 * \brief convert a string of whitespace separated tokens to a two column table
 * Throws EINVAL if the token count is not even.
 * \param string the input
 * \return rel_table the result.
 */
rel_table 
rel_ws_split_pairs (t_uchar const * string)
{
  rel_table answer = NULL;
  rel_table singles = NULL;
  int index = 0;
  int lim = 0;

  singles = rel_ws_split (string);
  lim = rel_n_records (singles);
  /* Make sure there is an even number of entries. */
  if (lim % 2)
      Throw (exception (EINVAL, "rel_ws_split_pairs: string has an uneven number of tokens"));

  for (index = 0; index < lim / 2; ++index)
      rel_add_records (&answer, 
                       rel_make_record (singles[index][0], singles[index + 1][0], 0),
                       0);

  rel_free_table (singles);
  return answer;
}



/************************************************************************
 *(h1 "Copying Tables")
 * 
 * 
 * 
 */



/*(c rel_copy_table)
 * rel_table rel_copy_table (rel_table t);
 * 
 * Return a freshly allocated copy of table `t'.
 */
rel_table
_rel_copy_table (rel_table t, char const * name)
{
  rel_table answer;
  int records;
  int r;

  records = rel_n_records (t);

  answer = 0;
  ar_setsize_ext_rel_table (&answer, records, name);
  for (r = 0; r < records; ++r)
    {
      answer[r] = _rel_copy_record (t[r], name);
      talloc_steal (ar_base (answer), ar_base (answer[r]));
    }

  return answer;
}


/*(c rel_copy_record)
 * rel_record rel_copy_record (rel_record r);
 * 
 * Return a freshly allocated copy of record `r'.
 */
rel_record
_rel_copy_record (rel_record r, char const * name)
{
  rel_record answer;
  int fields;
  int f;

  fields = rel_n_fields (r);

  answer = 0;
  ar_setsize_ext_rel_record (&answer, fields, name);
  for (f = 0; f < fields; ++f)
    answer[f] = talloc_reference (ar_base (answer), r[f]);

  return answer;
}


/************************************************************************
 *(h1 "Appending Tables")
 * 
 * 
 * 
 */



/*(c rel_append_x)
 * void rel_append_x (rel_table * out, rel_table t);
 * 
 * Append copies of all records in table `t' to the 
 * table `*out'.
 * 
 * This procedure may move the output table in memory.   If 
 * it does, `*out' will be updated to point to the relocated
 * table.
 */
void
_rel_append_x (rel_table * out, rel_table t, char const * name)
{
  int lim;
  int x;

  lim = rel_n_records (t);

  for (x = 0; x < lim; ++x)
      _rel_add_record (out, rel_copy_record (t[x]), name);
}


/************************************************************************
 *(h1 "Reordering Tables")
 * 
 * 
 * 
 */


/*(c rel_reverse_table)
 * void rel_reverse_table (rel_table t);
 * 
 * Reverse the order of records in table `t'.
 */
void
rel_reverse_table (rel_table t)
{
  int a;
  int b;

  a = 0;
  b = rel_n_records (t) - 1;

  while (a < b)
    {
      rel_record tmp;

      tmp = t[a];
      t[a] = t[b];
      t[b] = tmp;

      ++a;
      --b;
    }
}


struct rel_sort_spec
{
  int reverse_p;
  int field;
};


/*(c rel_sort_table_by_field)
 * void rel_sort_table_by_field (int reverse_p,
 *                               rel_table t,
 *                               int field_n);
 * 
 * Sort table `t' lexically according the contents of 
 * `field_n' within each record.
 * 
 * If `reverse_p' is not 0, then sort in descending rather
 * than ascending order.
 */
void
rel_sort_table_by_field (int reverse_p,
                         rel_table t,
                         int field_n)
{
  struct rel_sort_spec spec;

  spec.reverse_p = reverse_p;
  spec.field = field_n;

  quicksort ((void *)t, rel_n_records (t), sizeof (rel_record), rec_cmp_by_field, (void *)&spec);
}


static int
rec_cmp_by_field (void * va, void * vb, void * vdata)
{
  rel_record * a;
  rel_record * b;
  struct rel_sort_spec * spec;

  a = (rel_record *)va;
  b = (rel_record *)vb;
  spec = (struct rel_sort_spec *)vdata;

  if (spec->reverse_p)
    {
      return -str_cmp ((*a)[spec->field], (*b)[spec->field]);
    }
  else
    {
      return str_cmp ((*a)[spec->field], (*b)[spec->field]);
    }
}


struct rel_sort_by_fn_spec
{
  int reverse_p;
  int field;
  int (*fn) (t_uchar * va, t_uchar * vb);
};


/*(c rel_sort_table_by_field_fn)
 * void rel_sort_table_by_field_fn (int reverse_p,
 *                                  rel_table t,
 *                                  int field_n, 
 *                                  int (*fn)(t_uchar *, t_uchar *));
 * 
 * Sort table `t' according the contents of 
 * `field_n' within each record.
 * 
 * If `reverse_p' is not 0, then sort in descending rather
 * than ascending order.
 * 
 * The ordering is determined by `fn' which should accept 
 * two arguments, both field values, and return -1, 0, or 1
 * depending on whether the first is less than, equal to, or
 * greater than the second.
 */
void
rel_sort_table_by_field_fn (int reverse_p,
                            rel_table t,
                            int field_n, 
                            int (*fn)(t_uchar *, t_uchar *))
{
  struct rel_sort_by_fn_spec spec;

  spec.reverse_p = reverse_p;
  spec.field = field_n;
  spec.fn = fn;

  quicksort ((void *)t, rel_n_records (t), sizeof (rel_record), rec_cmp_by_field_fn, (void *)&spec);
}

static int
rec_cmp_by_field_fn (void * va, void * vb, void * vdata)
{
  rel_record * a;
  rel_record * b;
  struct rel_sort_by_fn_spec * spec;

  a = (rel_record *)va;
  b = (rel_record *)vb;
  spec = (struct rel_sort_by_fn_spec *)vdata;

  if (spec->reverse_p)
    {
      return -spec->fn ((*a)[spec->field], (*b)[spec->field]);
    }
  else
    {
      return spec->fn ((*a)[spec->field], (*b)[spec->field]);
    }
}



struct rel_nsort_spec
{
  int reverse_p;
  int * fields;
};


/*(c rel_sort_table_by_fields)
 * void rel_sort_table_by_fields (int reverse_p,
 *                                rel_table t,
 *                                int * fields);
 * 
 * Sort table `t' lexically, according the contents of the indicated
 * fields.
 * 
 * If `reverse_p' is not 0, then sort in descending rather
 * than ascending order.
 *
 * `fields' is a list of fields created by `rel_sort_fields' (see below)
 *  and lists the sort keys, from highest to lowest priority.
 */
void
rel_sort_table_by_fields (int reverse_p,
                          rel_table t,
                          int * fields)
{
  struct rel_nsort_spec spec;

  spec.reverse_p = reverse_p;
  spec.fields = fields;

  quicksort ((void *)t, rel_n_records (t), sizeof (rel_record), rec_cmp_by_fields, (void *)&spec);
}


/*(c rel_sort_fields)
 * int * rel_sort_fields (int f, ...);
 * 
 * Construct a list of fields suitable for use with
 * `rel_sort_table_by_fields'.
 * 
 * The arguments should be terminated by an argument which is less
 * than 0.
 * 
 * It is not necessary to free the value returned by this
 * function (but ^note^ that, at the moment, the table is 
 * simply space-leaked).
 */
AR_TYPEDEF(int, legint);
int *
rel_sort_fields (int f, ...)
{
  va_list fp;
  int * answer;

  answer = 0;
  ar_push_legint (&answer, f);

  va_start (fp, f);
  while (1)
    {
      f = va_arg (fp, int);
      ar_push_legint (&answer, f);
      if (f < 0)
        break;
    }
  va_end (fp);
  return answer;
}


static int
rec_cmp_by_fields (void * va, void * vb, void * vdata)
{
  rel_record * a;
  rel_record * b;
  struct rel_nsort_spec * spec;
  int nth;

  a = (rel_record *)va;
  b = (rel_record *)vb;
  spec = (struct rel_nsort_spec *)vdata;

  for (nth = 0; spec->fields[nth] >= 0; ++nth)
    {
      int cmp;

      if (spec->reverse_p)
        {
          cmp = -str_cmp ((*a)[spec->fields[nth]], (*b)[spec->fields[nth]]);
        }
      else
        {
          cmp = str_cmp ((*a)[spec->fields[nth]], (*b)[spec->fields[nth]]);
        }

      if (cmp)
        return cmp;
    }

  return 0;
}


/************************************************************************
 *(h1 "Eliminating Duplicate Fields")
 * 
 * 
 * 
 */


/*(c rel_uniq_by_field)
 * void rel_uniq_by_field (rel_table * table,
 *                         int field);
 * 
 * Discard from `table' the second and subsequent
 * consecutive occurences of contiguous records sharing
 * equal values for the indicated `field'.
 * 
 * This procedure may move the `table' in memory.   If 
 * it does, `*table' will be updated to point to the 
 * relocated table.
 */
void
_rel_uniq_by_field (rel_table * table,
                   int field, char const * name)
{
  int lim;
  int dest;
  int src;

  lim = rel_n_records (*table);
  for (dest = 0, src = 0; src < lim; ++dest, ++src)
    {
      (*table)[dest] = (*table)[src];

      while ((src < (lim - 1)) && !str_cmp ((*table)[dest][field], (*table)[src + 1][field]))
        {
          ar_free_rel_record (&(*table)[src + 1]);
          ++src;
        }
    }
  ar_setsize_ext_rel_table (table, dest, name);
}



/************************************************************************
 *(h1 "Table Cuts")
 * 
 * 
 * 
 */


/*(c rel_cut)
 * rel_table rel_cut (rel_cut_spec fields, rel_table t);
 * 
 * Create a new, freshly allocated table formed by removing
 * from `t' the fields indicated by `fields'.
 * 
 * `fields' should be a value returned by `rel_cut_list' (see below).
 */
rel_table
_rel_cut (rel_cut_spec fields, rel_table t, char const * name)
{
  rel_table answer;
  int lim;
  int x;

  answer = 0;

  lim = ar_size_rel_table (t);
  for (x = 0; x < lim; ++x)
      _rel_add_record (&answer, rel_cut_record (fields, t[x]), name);

  return answer;
}


/*(c rel_cut_record)
 * rel_record rel_cut_record (rel_cut_spec fields, rel_record r);
 * 
 * Create a new, freshly allocated record formed by removing
 * from `r' the fields indicated by `fields'.
 * 
 * `fields' should be a value returned by `rel_cut_list' (see below).
 */
rel_record
rel_cut_record (rel_cut_spec fields, rel_record r)
{
  rel_record answer;
  int x;

  answer = 0;
  for (x = 0; fields[x] >= 0; ++x)
    {
      rel_add_field (&answer, r[fields[x]]);
    }
  return answer;
}


/*(c rel_cut_list)
 * rel_cut_spec rel_cut_list (int field, ...);
 * 
 * Construct a list of fields suitable for use with
 * `rel_cut'.
 * 
 * The arguments should be terminated by an argument which is less
 * than 0.
 * 
 * It is not necessary to free the value returned by this
 * function (but ^note^ that, at the moment, the table is 
 * simply space-leaked).
 */
rel_cut_spec
rel_cut_list (int field, ...)
{
  va_list fp;
  rel_cut_spec answer;

  answer = 0;
  ar_push_rel_cut_spec (&answer, field);

  va_start (fp, field);
  while (1)
    {
      field = va_arg (fp, int);
      ar_push_rel_cut_spec (&answer, field);
      if (field < 0)
        break;
    }
  va_end (fp);
  return answer;
}

void
rel_cut_spec_finalise (rel_cut_spec *spec)
{
    ar_free_rel_cut_spec (spec);
}





/************************************************************************
 *(h1 "The Relational Join Operation")
 * 
 * 
 * 
 */


/*(c rel_join)
 * rel_table rel_join (int absence_table,
 *                     struct rel_join_output_spec * output,
 *                     int table1_field,
 *                     int table2_field,
 *                     rel_table table1,
 *                     rel_table table2);
 * 
 * Perform a relational join on `table1' and `table2' as
 * specified by the other arguments.
 * 
 * `table1_field' and `table2_field' indicate the fields to compare
 * for the join.  Both tables should be lexically sorted by that
 * field, in increasing order.
 * 
 * If `absence_table' is -1, then the output table contains an entry
 * for each row of `table1' and `table2' having the indicated fields
 * in common.  If `absence_table' is 1, then output is produced only
 * for rows unique to table 1, if 2, then for rows unique to table2.
 * 
 * `output' describes which field values to copy into the output table.
 * See `rel_join_output' below.
 */
rel_table
_rel_join (int absence_table,
          struct rel_join_output_spec * output,
          int table1_field,
          int table2_field,
          rel_table table1,
          rel_table table2,
          char const * name)
{
  int f1_len;
  int f2_len;
  int f1_pos;
  int f2_pos;
  int n_output_fields;
  rel_table answer;

  /* How curious that such a simple loop can do so many useful things.
   */

  answer = 0;

  f1_len = rel_n_records (table1);
  f2_len = rel_n_records (table2);

  for (n_output_fields = 0; output[n_output_fields].table != -1; ++n_output_fields)
    ;

  f1_pos = 0;
  f2_pos = 0;

  while ((f1_pos < f1_len) || (f2_pos < f2_len))
    {
      int cmp;
      int want_output;

      if (f2_pos == f2_len)
        cmp = -1;
      else if (f1_pos == f1_len)
        cmp = 1;
      else
        cmp = str_cmp (table1[f1_pos][table1_field], table2[f2_pos][table2_field]);

     if (absence_table < 0)
       want_output = !cmp;
      else if (absence_table == 1)
        want_output = (cmp < 0);
      else
        want_output = (cmp > 0);

      if (want_output)
        {
          rel_record r;
          rel_record f1_rec;
          rel_record f2_rec;
          int x;

          r = 0;
          f1_rec = ((f1_pos < f1_len) ? table1[f1_pos] : 0);
          f2_rec = ((f2_pos < f2_len) ? table2[f2_pos] : 0);
          for (x = 0; x < n_output_fields; ++x)
            {
              _rel_add_field (&r, ((output[x].table == 1) ? f1_rec : f2_rec)[output[x].field], name);
            }
          _rel_add_record (&answer, r, name);
        }

      if ((f1_pos < f1_len) && (cmp <= 0))
        ++f1_pos;

      if ((f2_pos < f2_len) && (cmp >= 0))
        ++f2_pos;
    }

  return answer;
}


/*(c rel_join_output)
 * struct rel_join_output_spec * rel_join_output (int table,
 *                                                int field, ...);
 * 
 * Construct a list of fields to be included in the output of
 * a `rel_join' call.
 * 
 * The argument list is a sequence of pairs, terminated by a single -1.
 * 
 * Each pair names a `table' (1 or 2) and a `field' (0 based).
 */
struct rel_join_output_spec *
_rel_join_output (char const * name,
                 int table,
                 int field, ...)
{
  va_list ap;
  struct rel_join_output_spec * answer;
  struct rel_join_output_spec * item;
  int x;

  static struct rel_join_output_spec ** cache = 0;

  answer = 0;

  for (x = 0; !answer && x < ar_size_rel_join_output_specs (cache); ++x)
    {
      item = cache[x];

      if (item->table != table || item->field != field)
        continue;

      va_start (ap, field);
      while (1)
        {
          int tmp_table;
          int tmp_field;

          ++item;
          tmp_table = va_arg (ap, int);
          if (tmp_table < 0)
            tmp_field = -1;
          else
            tmp_field = va_arg (ap, int);

          if (item->table != tmp_table || item->field != tmp_field)
            break;
          if (item->table == -1)
            {
              answer = cache[x];
              break;
            }
        }
      va_end (ap);
    }
    
  if (!answer)
    {
      struct rel_join_output_spec item;
      item.table = table;
      item.field = field;
      
      ar_push_ext_rel_join_output_spec (&answer, item, name);

      va_start (ap, field);
      while (1)
        {
          table = va_arg (ap, int);
          if (table < 0)
            break;
          field = va_arg (ap, int);

          item.table = table;
          item.field = field;
          ar_push_ext_rel_join_output_spec (&answer, item, name);
        }
      va_end (ap);

      item.table = -1;
      item.field = -1;
      ar_push_ext_rel_join_output_spec (&answer, item, name);

      ar_push_ext_rel_join_output_specs (&cache, answer, name);
    }

  return answer;
}




/************************************************************************
 *(h1 "Reading Tables from Streams")
 * 
 * 
 * 
 */


/*(c rel_read_table)
 * rel_table rel_read_table (int fd,
 *                           int n_fields,
 *                           char * err_name,
 *                           char * err_src);
 * 
 * Read a table with `n_fields' per row from descriptor
 * `fd'.  (Fields are whitespace-separated strings, rows
 * are separated by newlines.)
 * 
 * In the event of an I/O or syntax error, report an error 
 * from program `err_name' concerning input from `err_src'
 * and exit with status 2.
 */
rel_table
_rel_read_table (int fd,
                int n_fields,
                char * err_name,
                char * err_src,
                char const * name)
{
  rel_record rec;
  rel_table answer;

  answer = 0;
  while (1)
    {
      rec = rel_read_record (fd, n_fields, err_name, err_src, name);
      if (!rec)
        break;
      _rel_add_record (&answer, rec, name);
    }
  return answer;
}


static rel_record
rel_read_record (int fd,
                 int n_fields,
                 char * err_name,
                 char * err_src,
                 char const * name)
{
  t_uchar * line;
  long len;
  t_uchar * pos;
  int f;
  rel_record answer;

  safe_next_line (&line, &len, fd);
  if (!line)
    return 0;

  answer = 0;
  ar_setsize_ext_rel_record (&answer, n_fields, name);

  pos = line;
  for (f = 0; f < n_fields; ++f)
    {

      while (len && !char_is_space (*pos))
        {
          ++pos;
          --len;
        }

      if (!len || (pos == line))
        {
          if (err_name)
            {
              safe_printfmt (2, "%s: ill formated input\n", err_name);
              safe_printfmt (2, "   input source: %s\n", err_src);
              exit (2);
            }
        }

      answer[f] = talloc_strndup (ar_base(answer), line, pos - line);

      while (len && char_is_space (*pos))
        {
          ++pos;
          --len;
        }

      line = pos;
    }

  return answer;
}

/*(c rel_read_pika_unescape_iso8859_1_table)
 * rel_table rel_read_pika_unescape_iso8859_1_table (int fd,
 *                                                   int n_fields,
 *                                                   char * err_name,
 *                                                   char * err_src);
 * 
 * Read an escaped table with `n_fields' per row from descriptor
 * `fd'.  (Fields are whitespace-separated strings, rows
 * are separated by newlines.)
 *
 * Escape sequences will be unescaped
 *
 * In the event of an I/O or syntax error, report an error 
 * from program `err_name' concerning input from `err_src'
 * and exit with status 2.
 */
rel_table
_rel_read_pika_unescape_iso8859_1_table (int fd,
                                        int n_fields,
                                        char * err_name,
                                        char * err_src,
                                        char const * name)
{
  rel_record rec;
  rel_table answer;

  answer = 0;
  while (1)
    {
      rec = rel_read_pika_unescape_iso8859_1_record (fd, n_fields, err_name, err_src, name);
      if (!rec)
        break;
      _rel_add_record (&answer, rec, name);
    }
  return answer;
}


static rel_record
rel_read_pika_unescape_iso8859_1_record (int fd,
                                         int n_fields,
                                         char * err_name,
                                         char * err_src,
                                         char const * name)
{
  t_uchar * line;
  long len;
  t_uchar * pos;
  int f;
  rel_record answer;

  safe_next_line (&line, &len, fd);
  if (!line)
    return 0;

  answer = 0;
  ar_setsize_ext_rel_record (&answer, n_fields, name);

  pos = line;
  for (f = 0; f < n_fields; ++f)
    {
      t_uchar * temp_str;

      while (len && !char_is_space (*pos))
        {
          ++pos;
          --len;
        }

      if (!len || (pos == line))
        {
          if (err_name)
            {
              safe_printfmt (2, "%s: ill formated input\n", err_name);
              safe_printfmt (2, "   input source: %s\n", err_src);
              exit (2);
            }
        }

      temp_str = pika_save_unescape_iso8859_1_n (0, 0, line, pos - line);
      answer[f] = talloc_strdup (ar_base (answer), temp_str);
      lim_free (0, temp_str);
      

      while (len && char_is_space (*pos))
        {
          ++pos;
          --len;
        }

      line = pos;
    }

  return answer;
}


/************************************************************************
 *(h1 "Printing Tables to Streams")
 * 
 * 
 * 
 */


/*(c rel_print_table)
 * void rel_print_table (int fd, rel_table table);
 * 
 * Print `table' on descriptor `fd' as one record per
 * line, with fields separated by tabs.
 * 
 * In the event of an I/O error, exit with non-0, non-1 status.
 */
void
rel_print_table (int fd, rel_table table)
{
  int recs;
  int r;

  recs = ar_size_rel_table (table);

  for (r = 0; r < recs; ++r)
    rel_print_record (fd, table[r]);
}

void
rel_print_pika_escape_iso8859_1_table (int fd, int escape_classes, rel_table table)
{
  int recs;
  int r;

  recs = ar_size_rel_table (table);

  for (r = 0; r < recs; ++r)
    rel_print_pika_escape_iso8859_1_record (fd, escape_classes, table[r]);
}


/*(c rel_print_table_sp)
 * void rel_print_table_sp (int fd, rel_table file);
 * 
 * Print `table' on descriptor `fd' as one record per
 * line, with fields separated by single spaces.
 * 
 * In the event of an I/O error, exit with non-0, non-1 status.
 */
void
rel_print_table_sp (int fd, rel_table file)
{
  int recs;
  int r;

  recs = ar_size_rel_table (file);

  for (r = 0; r < recs; ++r)
    rel_print_record_sp (fd, file[r]);
}

void
rel_print_pika_escape_iso8859_1_table_sp (int fd, int escape_classes, rel_table file)
{
  int recs;
  int r;

  recs = ar_size_rel_table (file);

  for (r = 0; r < recs; ++r)
    rel_print_pika_escape_iso8859_1_record_sp (fd, escape_classes, file[r]);
}


static void
rel_print_record (int fd, rel_record rec)
{
  int fields;
  int f;

  fields = ar_size_rel_record (rec);

  for (f = 0; f < fields; ++f)
    {
      safe_printfmt (fd, "%s%s", (f ? "\t" : ""), rec[f]);
    }

  if (f)
    safe_printfmt (fd, "\n");
}

static void
rel_print_pika_escape_iso8859_1_record (int fd, int escape_classes, rel_record rec)
{
  int fields;
  int f;

  fields = ar_size_rel_record (rec);

  for (f = 0; f < fields; ++f)
    {
      t_uchar * item;

      item = pika_save_escape_iso8859_1 (0, 0, escape_classes, rec[f]);
      safe_printfmt (fd, "%s%s", (f ? "\t" : ""), item);
      lim_free (0, item);
    }

  if (f)
    safe_printfmt (fd, "\n");
}



static void
rel_print_record_sp (int fd, rel_record rec)
{
  int fields;
  int f;

  fields = ar_size_rel_record (rec);

  for (f = 0; f < fields; ++f)
    {
      safe_printfmt (fd, "%s%s", (f ? " " : ""), rec[f]);
    }

  if (f)
    safe_printfmt (fd, "\n");
}

static void
rel_print_pika_escape_iso8859_1_record_sp (int fd, int escape_classes, rel_record rec)
{
  int fields;
  int f;

  fields = ar_size_rel_record (rec);

  for (f = 0; f < fields; ++f)
    {
      t_uchar * item;

      item = pika_save_escape_iso8859_1 (0, 0, escape_classes, rec[f]);
      safe_printfmt (fd, "%s%s", (f ? " " : ""), item);
      lim_free (0, item);
    }

  if (f)
    safe_printfmt (fd, "\n");
}

/* rel_set_subtract:
 * return a new set containing only element in left and not
 * in right.
 * identity is considered the first column of the rel tables
 * but the entire records are copied
 * left and right may be mutated 
 * the answer is not sorted.
 */
rel_table
rel_set_subtract (rel_table left, rel_table right)
{
  rel_table answer;
  
  rel_sort_table_by_field (0, left, 0);
  rel_sort_table_by_field (0, right, 0);

  answer = rel_join (1, rel_join_output (1,0, -1), 0, 0, left, right);

  return answer;
}

/**
 * \brief convert from a single list of strings to a reltable with 
 * the supplied number of elements per row.
 * count % width must be 0 or a panic will be caused.
 * \return rel_table
 * \param count the number of records in the suppplied array
 * \param width how wide the output rel table should be.
 * \para, strings the strings to convert.
 */
rel_table 
rel_unflatten (int count, int width, char * strings[])
{
    rel_table result = NULL;
    int position;
    invariant (count % width == 0);
    for (position = 0; position < count / width; ++position)
      {
	rel_record record = NULL;
	int col;
	for (col=0; col < width; ++col)
	    rel_add_field (&record, strings[position * width + col]);
	rel_add_record (&result, record);
      }
    return result;
}



/* tag: Tom Lord Mon May  5 12:50:00 2003 (relational.c)
 */
