#include "lss.h"
#include "alloc.h"
#include <unistd.h>
#include <fcntl.h>
#include <sys/file.h>
#include <stdio.h>
#include <assert.h>
#include <stdarg.h>
#include <errno.h>
#include <string.h>

char *lss_sccs_id() { return "@(#)lss.c 1.8"; }

#if HAVE_FLOCK
int flock( int fd, int style );
#endif

int fsync( int fd );

static struct LSS_Record *find_entry( LSS *lss, UINT_32 rec_num );
static void lss_log_entry( LSS *lss, char *fmt, ... );

#define NUM_CAPACITIES  (9)

static UINT_32 index_capacities[NUM_CAPACITIES] =
	{11, 71, 541, 1223, 2741, 6133, 13499, 29443, 63809};

static int lss_lock( int fd )
{
#if HAVE_FLOCK
  return flock( fd, LOCK_EX|LOCK_NB );
#else
# if HAVE_LOCKF
  return lockf( fd, F_TLOCK, 0 );
# else
  fprintf( stderr, "*** lss_lock() stubbed out; file not really locked!\n" );
# endif
#endif
}


static void signal_error( LSS *lss, int code, char *msg )
{
  if (lss->error_handler)
    {
      lss->error_handler( lss, lss->info, code, msg );
    }
  else
    {
      fprintf( stderr, "Unrecoverable LSS error: %s (%d)\n", msg, code );
      abort();
    }
}

static void sys_error( LSS *lss, char *msg, ... )
{
  char temp[200];
  va_list args;

  if (errno)
    {
      strcpy( temp, strerror(errno) );
      strcat( temp, ": " );
    }
  else
    temp[0] = 0;

  va_start( args, msg );
  vsprintf( temp + strlen(temp), msg, args );
  va_end( args );

  signal_error( lss, LSSERR_SYS_ERR, temp );
}


static UINT_32 next_higher_capacity( UINT_32 old_cap )
{
unsigned i;

    /* find the next higher capacity; Note that if
       the old capacity is taken from a different table,
       this may not increase the capacity by much! (but at least 1)
    */
    for (i=0; i<NUM_CAPACITIES; i++)
    {
	if (index_capacities[i] > old_cap)
	{
	    return index_capacities[i];
	}
    }
    
    /* make it 1.75 times bigger, and fairly odd */

    return ((old_cap * 3) / 2) | 15;
}

static void alloc_new_index( LSS *lss, UINT_32 new_cap )
{
    lss->index_capacity = new_cap;
    lss->index = ALLOCN( struct LSS_Record, new_cap );
    memset( lss->index, 0, sizeof(struct LSS_Record) * new_cap );
}


/* this is called when the population in the index meets or exceeds 80% */

static void increase_index_capacity( LSS *lss )
{
struct LSS_Record *src, *old_index;
UINT_32 i, old_count, spotted, new_cap, old_cap;

    /* save the old index */
    
    old_index = lss->index;
    old_cap = lss->index_capacity;

    new_cap = next_higher_capacity( lss->index_capacity );
    alloc_new_index( lss, new_cap );

    /* copy in stuff from old index
	since we're adding 'em one by one, reset the count to 0
    */
    old_count = lss->index_count;
    lss->index_count = 0;
    spotted = 0;
    
    for (i=0, src=old_index; i<old_cap; i++, src++)
    {
	if (src->number)
	{
	    *find_entry( lss, src->number ) = *src;
	    spotted++;
	}
    }
    /* now they should be the same */
    assert( old_count == lss->index_count );
    assert( spotted == lss->index_count );

    lss_log_entry( lss,
		   "increased index capacity from %u to %u (count = %u)\n",
		   old_cap, new_cap, old_count );
    free( old_index );
}

void lss_close( LSS *lss )
{
  if (lss->num_accesses)
    {
      signal_error( lss, LSSERR_UNRELEASED_ACCESS,
		    "outstanding lss_access() calls" );
    }

  close( lss->fd );
  /* HUGE HUGE HUGE RESOURCE LEAK HERE...
     we should free ALL the LSS structures 
     */
  free(lss);
}

void set_error_handler( LSS *lss, 
		        lss_error_handler_t *handler,
		        void *info )
{
  lss->info = info;
  lss->error_handler = handler;
}

/*
 * I'm factoring these two parts out of lss_open() in an
 * attempt to get around a gcc compiler bug 

	% gcc -O2 -DUSE_HW_REGS -DNDEBUG -DINLINES -DGC_MACROS -Wall
	-Wno-unused -I. -I- -I/u/rscheme/inst/0.6.1/4/6/include -ffixed-27
	-ffixed-26 -ffixe d-25 -ffixed-24 -ffixed-23 -ffixed-22 -ffixed-21
	-ffixed-20 -ffixed-19 -ffixed- 18 -ffixed-17 -ffixed-16 -ffixed-15
	-ffixed-14 -ffixed-13 -S -c lss.c
	gcc: Internal compiler error: program cc1 got fatal signal 6

  the assembly output ends just before the code for lss_open()

  this doesn't happen if I omit the "-ffixed-" decls (note that this
  file doesn't include <rscheme/regs.h>, so w/o the -ffixed, the compiler
  doesn't know to leave those registers alone) so it seems to
  have something to do with register pressure.
*/

static LSS *lss_create( LSS *lss )
{
  unsigned i;
  int rc;
  commit_info_t *ci = &lss->last_commit;

  alloc_new_index( lss, index_capacities[0] );
  
  lss->index_count = 0;

  memset( ci, 0, sizeof(commit_info_t) );
  memset( lss->client_commit_info, 0, MAX_LSS_CLIENT_SIZE );

  ci->lss_fmt_version = LSS_FMT_VERSION;
  ci->create_time = time(NULL);
  
  rc = write( lss->fd, ci, sizeof(commit_info_t) );
  rc = write( lss->fd, lss->client_commit_info, MAX_LSS_CLIENT_SIZE );

  lss->spare_commit_at = 0;
  return lss;
}

static LSS *lss_open_existing( LSS *lss, off_t CR_offset )
{
  size_t n;
  commit_info_t *ci = &lss->last_commit;

  ci->lss_magic = 0;
  errno = 0;

  lseek( lss->fd, CR_offset, SEEK_SET );
  n = read( lss->fd, ci, sizeof(commit_info_t) );

  if ((n != sizeof(commit_info_t)) || (ci->lss_magic != LSS_MAGIC))
    {
      close(lss->fd);
      free(lss);
      errno = LSSERR_NOT_LSS;
      return NULL;
    }
  if (ci->lss_fmt_version != LSS_FMT_VERSION)
    {
      close(lss->fd);
      free(lss);
      errno = LSSERR_BAD_VER;
      return NULL;
    }
  n = read( lss->fd, lss->client_commit_info, ci->client_info_len );
  if (n != ci->client_info_len)
    if (n < 0)
      sys_error( lss, "reading client commit info" );
    else
      sys_error( lss, "only read %d of %d for client commit info", 
		 n, ci->client_info_len );
  
  lss->index_capacity = ci->index_capacity;
  lss->index_count = ci->index_count;
  lss->index = ALLOCN( struct LSS_Record, ci->index_capacity );
  
  if (lseek( lss->fd, ci->index_offset, SEEK_SET ) < 0)
    sys_error( lss, "seek to index" );

  n = read( lss->fd, lss->index,
	    sizeof( struct LSS_Record ) * lss->index_capacity );
  if (n != sizeof( struct LSS_Record ) * lss->index_capacity)
    sys_error( lss, "reading index" );

  /*  
   *  this commit record was copied just after the index 
   */
  lss->spare_commit_at = lseek( lss->fd, 0, SEEK_CUR );
  return lss;
}

static void debug_log_open( LSS *lss, const char *path, int mode )
{
  char *p, temp[2000];

  sprintf( temp, "%s.log", path );

  lss->logfd = open( temp, O_WRONLY|O_APPEND|O_CREAT, 0666 );
  if (lss->logfd < 0)
    {
      signal_error( lss,
		    LSSERR_DEBUG_OPEN_FAILED, 
		    "open of debug log failed" );
    }

  p = temp;

  p += sprintf( p, "(%s", (mode & LSS_WRITE) ? "WRITE" : "READ" );

  if (mode & LSS_LOCK)
      p += sprintf( p, ", LOCK" );

  if (mode & LSS_CREATE)
      p += sprintf( p, ", CREATE" );

  *p++ = ')';
  *p++ = 0;

  lss_log_entry( lss, "opened %s %s\n", path, temp );
  lss_log_entry( lss, ">> index is %u/%u (%.1f%%) at +%u\n", 
		 lss->index_count, lss->index_capacity,
		 (100.0 * lss->index_count) / lss->index_capacity,
		 lss->last_commit.index_offset );
  lss_log_entry( lss, ">> previous commit record at +%u\n",
		 lss->last_commit.prev_commit_at );
}

LSS *lss_open( const char *path, int mode,
	       lss_error_handler_t *handler, 
	       void *info,
	       off_t CR_offset )
{
int lh, fd;
LSS *s;
commit_info_t x;

    if (CR_offset && ((mode & LSS_CREATE) || (mode & LSS_WRITE)))
      {
	errno = LSSERR_INVALID_ROLLBACK;
	return NULL;
      }

    if (mode & LSS_CREATE)
	fd = open( path, O_CREAT | O_RDWR, 0666 );
    else if (mode & LSS_WRITE)
	fd = open( path, O_RDWR );
    else
	fd = open( path, O_RDONLY );

    if (fd < 0)
	return NULL;

    lh = 0;
    if (mode & LSS_LOCK)
      {
	if (lss_lock( fd ) < 0)
	  {
	    errno = LSSERR_LOCKED;
	    close(fd);
	    return NULL;
	  }
	lh = 1;
      }

    s = ALLOC(LSS);
    s->fd = fd;
    s->lock_held = lh;

    s->info = info;
    s->error_handler = handler;
    s->num_accesses = 0;

    if (mode & LSS_CREATE)
      s = lss_create( s );
    else
      s = lss_open_existing( s, CR_offset );

    if (!s)
      return NULL;

    if (mode & LSS_KEEP_LOG)
      debug_log_open( s, path, mode );
    else
      s->logfd = -1;

    return s;
}

/* a random permutation with the 0 values replaced w/255 */

static UINT_8 hash_permutation[256] = {
240, 235, 36, 105, 218, 102, 186, 24, 61, 255, 252, 65, 16, 177, 48,
120, 32, 88, 234, 150, 178, 176, 229, 154, 33, 41, 30, 130, 137, 163,
107, 98, 93, 126, 58, 171, 106, 147, 192, 115, 132, 129, 180, 230,
124, 217, 231, 221, 156, 44, 118, 8, 225, 99, 42, 140, 17, 182, 172,
91, 158, 179, 103, 239, 63, 9, 6, 233, 54, 157, 159, 162, 169, 86, 95,
175, 104, 210, 2, 117, 57, 201, 167, 134, 82, 125, 74, 119, 241, 143,
25, 114, 75, 181, 62, 78, 53, 165, 136, 64, 66, 67, 109, 80, 247, 246,
245, 21, 213, 5, 168, 222, 148, 188, 4, 23, 145, 212, 244, 15, 43,
242, 227, 116, 208, 141, 71, 3, 52, 135, 139, 26, 85, 14, 40, 205,
133, 243, 149, 110, 216, 146, 34, 128, 152, 204, 37, 68, 214, 73, 198,
200, 27, 142, 174, 11, 122, 197, 87, 164, 203, 127, 96, 166, 50, 100,
19, 153, 251, 184, 215, 12, 108, 144, 20, 151, 22, 113, 121, 29, 72,
191, 255, 237, 35, 10, 94, 59, 236, 223, 253, 89, 238, 155, 211, 31,
224, 131, 185, 193, 49, 56, 38, 249, 79, 70, 160, 47, 228, 219, 97,
170, 45, 161, 55, 226, 39, 46, 101, 76, 1, 207, 112, 250, 220, 84, 81,
206, 232, 173, 183, 51, 199, 196, 190, 248, 209, 7, 111, 90, 202, 13,
189, 69, 195, 60, 28, 92, 254, 83, 187, 138, 194, 123, 77, 18 };

static struct LSS_Record *find_entry( LSS *lss, UINT_32 rec_num )
{
struct LSS_Record *p;
UINT_32 i;

    /* hash */

    i = hash_permutation[ rec_num & 0xFF ];
    i *= hash_permutation[ (rec_num >> 8) & 0xFF ];
    i *= hash_permutation[ (rec_num >> 16) & 0xFF ];
    i *= hash_permutation[ (rec_num >> 24) & 0xFF ];

    /* max value of i is 4,228,250,625 (0xFC05FC01),
       but with a non-flat distribution 
       */

    i %= lss->index_capacity;

    p = &lss->index[i];

    /* from then on, keep looking sequentially
       (because we're never at 100% capacity, we're guaranteed
       to either find it or find an empty record) */

    while (1)
    {
	if (p->number == rec_num)
	    return p;
	else if (!p->number)
	{
	    /* check for overflow at 80% */
	    if (((lss->index_count * 10) / 8) >= lss->index_capacity)
	    {
		increase_index_capacity( lss );
		/* all our pointers are dangling, so
		   start over again, looking for this entry
		*/
		return find_entry( lss, rec_num );
	    }
	    /* adding a new entry */
	    lss_log_entry( lss,
			   "adding new entry for rec[%#x] in index[%u]\n", 
			   rec_num, i );
	    lss->index_count++;
	    p->number = rec_num;
	    return p;
	}
	p++;
	i++;
	if (i >= lss->index_capacity)
	{
	    /* wrapped around... go back to beginning */
	    i = 0;
	    p = lss->index;
	}
    }
}

static void do_write( LSS *lss, void *data, UINT_32 bytes, UINT_32 rec )
{
  int n;

    n = write( lss->fd, data, bytes );
    if (n != bytes)
      {
	if (n < 0)
	  if (rec)
	    sys_error( lss, "writing record %08x (%u bytes)", rec, bytes );
	  else
	    sys_error( lss, "writing commit record (%u bytes)", bytes );
	else
	  {
	    char temp[100];
	    if (rec)
	      sprintf( temp, "partial write of record %08x (%u of %u)",
		       rec, n, bytes );
	    else
	      sprintf( temp, "partial write of commit record (%u of %u)",
		       n, bytes );
	    signal_error( lss, LSSERR_SYS_ERR, temp );
	  }
      }
}


void lss_write( LSS *lss, UINT_32 record_num, void *data, UINT_32 bytes )
{
struct LSS_Record *r = find_entry( lss, record_num );

    r->offset = lseek( lss->fd, 0, SEEK_END );
    r->length = bytes;

    lss_log_entry(lss,
		  "writing (RECORD %#x) at +%u for %u bytes\n", 
		  record_num, r->offset, r->length );

    /* actually write out a word-aligned amount of stuff */
    bytes += (4-bytes) & 3;

    do_write( lss, data, bytes, record_num );
}

void lss_access( LSS *lss, UINT_32 record_num, access_t *a )
{
  struct LSS_Record *r = find_entry( lss, record_num );
  if (r->offset)
    {
      int nb;

      lseek( lss->fd, r->offset, SEEK_SET );
      a->addr = malloc( a->bytes = r->length );
      nb = read( lss->fd, a->addr, a->bytes );
      if (nb != a->bytes)
	if (nb < 0)
	  sys_error( lss, "accessing record %#x", record_num );
	else
	  sys_error( lss, "accessing record %#x (only read %d of %u bytes)",
		     record_num, nb, a->bytes );
		   
      a->record_num = record_num;
      lss->num_accesses++;
      lss_log_entry(lss,
		    "accessing (RECORD %#x) at +%u for %u bytes\n", 
		    record_num, r->offset, r->length );
    }
  else
    {
      char temp[50];
      r->number = 0;

      sprintf( temp, "Record not found (%08x)", record_num );
      signal_error( lss, LSSERR_NO_RECORD, temp );
    }
}

void lss_release( LSS *lss, access_t *a )
{
    assert( lss->num_accesses > 0 );
    lss->num_accesses--;
    free( (char *)a->addr );
}


void lss_commit( LSS *lss, void *client_info, UINT_32 client_len )
{
  char temp[512];
  
  if (client_len + sizeof(commit_info_t) > 512)
    {
      signal_error( lss, LSSERR_TOO_BIG, "client commit info too big" );
    }
  
  lss->last_commit.lss_magic = LSS_MAGIC;
  lss->last_commit.index_capacity = lss->index_capacity;
  lss->last_commit.index_count = lss->index_count;
  lss->last_commit.index_offset = lseek( lss->fd, 0, SEEK_END );
  lss->last_commit.client_info_len = client_len;


  do_write( lss, 
	    lss->index, 
	    sizeof(struct LSS_Record) * lss->index_capacity,
	    0 );

  lss->last_commit.commit_version++;
  lss->last_commit.commit_time = time(NULL);
  lss->last_commit.prev_commit_at = lss->spare_commit_at;

  lss->spare_commit_at = lseek( lss->fd, 0, SEEK_CUR );

  /*
   * write out a copy of the commit record, in case the first
   * block is lost or you want to roll back multiple versions
   */
  do_write( lss,
	    &lss->last_commit, 
	    sizeof(commit_info_t),
	    0 );
  do_write( lss,
	    client_info,
	    client_len,
	    0 );

  lseek( lss->fd, 0, SEEK_SET );
  if (fsync( lss->fd ) < 0)
    {
      sys_error( lss, "fsync()'ing pre-commit record" );
    }
    
  lss_log_entry(lss,
		"committing cv %u, index (%u/%u) at %u\n",
		lss->last_commit.commit_version,
		lss->last_commit.index_count, 
		lss->last_commit.index_capacity,
		lss->last_commit.index_offset );
  
  memcpy( temp, &lss->last_commit, sizeof( commit_info_t ) );
  memcpy( temp + sizeof( commit_info_t ), client_info, client_len );

  do_write( lss, 
	    temp,
	    sizeof(commit_info_t) + client_len,
	    0 );

  if (fsync( lss->fd ) < 0)
    {
      sys_error( lss, "fsync()'ing commit record" );
    }
}


void *lss_get_client_commit_info( LSS *lss, UINT_32 *len )
{
  if (len)
    *len = lss->last_commit.client_info_len;
  return lss->client_commit_info;
}

void lss_get_lss_commit_info( LSS *lss, commit_info_t *ci )
{
    *ci = lss->last_commit;
}

static void lss_log_entry( LSS *lss, char *fmt, ... )
{
  char temp[5000];
  va_list a;

  if (lss->logfd >= 0)
    {
      time_t t;
      struct tm *lt;

      t = time(NULL);

      strftime( temp, 100, "%Y.%m.%d %H:%M:%S", localtime(&t) );

      sprintf( temp + strlen(temp), " %s lss[%d] ",
	      getenv("USER") ? getenv("USER") : "unknown",
	      getpid() );
      va_start( a, fmt );
      vsprintf( temp + strlen(temp), fmt, a );
      va_end( a );
      write( lss->logfd, temp, strlen(temp) );

      /* write() gets it outside the process, but
	 for this purpose, we don't care about getting
	 these messages to stable store.  WHICH IMPLIES
	 that in the even of a crash, the database may
	 have newer data than the log file has!

      fsync( lss->logfd );
      */
    }
}
