/*
COMPARE.C
**********************************************************************

   file compare program, similar to unix diff


Source code is hereby donated to the Public Domain by the author
Nick Ramsay 5 August 1996.

email : nick@sycom1.demon.co.uk

Program was originally written by combining source code from several
PD diff-type programs + authors own routines.

**********************************************************************

This COMPARE program implements Paul Heckel's algorithm from the
Communications of the Association for Computing Machinery,
April 1978 for detecting the differences between two files.
This algorithm has the advantage over more commonly used compare
algorithms that it is fast and can detect differences of an
arbitrary number of lines.  It has the defect that it reads both
files twice if there are differences.

It also uses getopt() - a UNIX-like command-line parser.



Compiling & linking
-------------------
You must compile & link with a model which supports > 64K of DATA.
The program allocates large amounts of data for string space & its
hash tables.  Stats follow:
 Code : 5956  Constants : 630  Data : 56484

The program should compile OK using either Microsoft, TopSpeed or Turbo C
compilers.  Any changes needed should be minimal, if any.

**********************************************************************

The command format is: COMPARE [options] filespec1 filespec2

Options:
         /f                  show full lines.
         /t                  expand tabs before comparing.
         /b                  ignore trailing blanks.
         /w                  ignore spaces and tabs.
         /y                  case insensitive compare.

Defaults are:
         Brief         show first 34 characters of lines.
         No tabs         don't expand tabs.
         No trim         don't ignore trailing blanks.
         White         compare spaces and tabs.
         Case                  case sensitive compare.

Arbitrary path qualification is allowed in the filespecs. In addition,
the output can be redirected to the printer or a file with normal DOS
redirection conventions (e.g. > PRN).

The design limitations of the program are that only the first 256
characters of each record are compared, and only the first 5000 lines
then :( ... contents will be lost.

*********************************************************************

*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <io.h>
#include <dos.h>
#include <ctype.h>                  /* for toupper */
#include <string.h>                  /* for strcpy, strupr */

#define MAXLINES     5000
#define FULL         0x80
#define TABS         0x40
#define TRIM         0x20
#define WHITE        0x10
#define CASE         0x08
#define BLANK        0x04

#define TRUE            1
#define FALSE           0

void clrscr(void);
void homescr(void);

int getopt(int argc, char *argv[], char *optionS);
int    optind         = 1;        /* index of which argument is next */
char   *optarg;                   /* pointer to argument of current option */
int    opterr         = 1;        /* allow error message */

static char   *letP = NULL;       /* remember next option char's location */
static char      SW = 0;          /* DOS switch character, either '-' or '/' */

long hash1[MAXLINES], hash2[MAXLINES];
unsigned char occ1[8192], occ2[8192];
int n1, n2;
FILE *file1, *file2;
char name[80], *s;
int different = 0;

unsigned char flag1 = WHITE;

void givehelp(void)
{
         printf("Usage is: COMPARE [options] FileSpec1 FileSpec2\n");
         printf("Options are :-\n");
         printf("\t/f\tShow full lines.\n");
         printf("\t/t\tExpand tabs before comparing.\n");
         printf("\t/b\tIgnore trailing blanks.\n");
         printf("\t/w\tIgnore spaces and tabs.\n");
         printf("\t/y\tCase insensitive compare.\n");
         printf("\nFilespec2 can be a drive or directory name.\n");
         printf("NOTE: Files of over 5,000 lines are truncated\n");
}

/* expand tabs */
void tabex(unsigned char *s1, unsigned char *s2)
{
         int i;
         unsigned char j;
         for (i=j=0; s1[i]; i++) {
                  if (s1[i] != '\t') {
                           s2[j++] = s1[i];
                           continue;
                  }
                  do s2[j++] = ' '; while(j%8 != 0);
         }
         s2[j] = 0;
}

/* zap white space */
void zapwhite(unsigned char *s1, unsigned char *s2)
{
         int i, j;
         for (i=j=0; s1[i]; i++) {
                  if (s1[i] != ' ' && s1[i] != '\t')
                           s2[j++] = s1[i];
         }
         s2[j] = 0;
}

/* extract bits from the occurrence vector */
unsigned char getbits(unsigned char *array, unsigned long indx)
{
         unsigned i, j;
         indx &= 32767;
         i = (int)indx>>2;
         j = (int)indx - (i<<2);
         return (array[i]>>((3-j)<<1) & 0x03);
}

/* store bits in the occurrence array */
void setbits(unsigned char *array, unsigned long indx, unsigned char x)
{
         unsigned i, j, shift;

         indx &= 32767;
         i = (int)indx>>2;
         j = (int)indx - (i<<2);
         shift = (3-j)<<1;
         array[i] &= ~(0x03<<shift);
         array[i] |= x<<shift;
}

/* read in file, build hash & occurrence tables */
int input(FILE *file, long hashvect[], unsigned char occ[])
{
         int i, j;
         long h;
         unsigned char bits, buffer[256], temp[256];
         long hash();

         for (i=0; i<MAXLINES; i++) {
                  if (flag1&WHITE) {
                           if (fgets(temp,256,file)==0) return i;
                           zapwhite(temp,buffer);
                  } else if ((flag1&TABS)==0) {
                           if (fgets(buffer,256,file)==0) return i;
                  } else {
                           if (fgets(temp,256,file)==0) return i;
                           tabex(temp,buffer);
                  }
                  if (flag1&CASE) strupr(buffer);
                  if (flag1&TRIM) {
                           for (j=0; j<256 && j>=0 && buffer[j] && buffer[j]!='\n'; j++);
                           for (j=j-1; j>=0 && buffer[j]==' ' ; j--);
                           buffer[j+1] = 0;
                  }
                  h = hash(buffer);
                  if (h<0)
                           hashvect[i] = h;
                  else
                           hashvect[i] = -h;
                  bits = getbits(occ,-hashvect[i]);
                  if (bits==0)
                           setbits(occ,-hashvect[i],1);
                  else if (bits==1)
                           setbits(occ,-hashvect[i],2);
         }
         printf("File truncated at %d lines.\n",MAXLINES);
         return i-1;
}

/* hash a character string */
long hash(unsigned char *s)
{
         long h=0, h1;
         while (*s) {
                  h1 = h;
                  h = h<<1;
                  if (h1<0) h |= 1;
                  h ^= *s++;
         }
         if (h==0) h = 1;
         return h;
}

/* display the results of comparison */
void output(int l1,int l2)
{
         static int cl1 = 0, cl2 = 0;
         char line[81];
         unsigned int bi1=0, bi2=0;
         unsigned char end1=1, end2=1;
         int i;
         char buffer1[256], buffer2[256], temp[256];

         different = 1;

         for (i=0; i<80; i++) line[i] = ' ';
         line[80] = 0;
         if (l1>=0) {
                  for (i=cl1; i<=l1; i++)
                           if((flag1&TABS)==0) fgets(buffer1,256,file1);
                           else {
                                    fgets(temp,256,file1);
                                    tabex(temp,buffer1);
                           }
                  cl1 = l1 + 1;
                  sprintf(line,"%4d ",l1+1);
                  line[5] = ' ';
                  for (i=0; buffer1[i+bi1] && buffer1[i+bi1]!='\n' && i<34; i++)
                           line[i+5] = buffer1[i+bi1];
                  if (i==34) {
                           bi1 += 34;
                           end1 = 0;
                  }
         }
         if (l2>=0) {
                  for (i=cl2; i<=l2; i++)
                           if((flag1&TABS)==0) fgets(buffer2,256,file2);
                           else {
                                    fgets(temp,256,file2);
                                    tabex(temp,buffer2);
                           }
                  cl2 = l2 + 1;
                  sprintf(line+40,"%4d ",l2+1);
                  line[45] = ' ';
                  for (i=0; buffer2[i+bi2] && buffer2[i+bi2]!='\n' && i<34; i++)
                           line[i+45] = buffer2[i+bi2];
                  if (i==34) {
                           bi2 += 34;
                           end2 = 0;
                  }
         }
         line[45+i] = '\n';
         line[46+i] = 0;
         fwrite(line,1,46+i,stdout);
         if (flag1 & FULL) while (!end1 || !end2) {
                  for (i=0; i<80; i++) line[i] = ' ';
                  if (!end1) {
                           for (i=0; buffer1[i+bi1] && buffer1[i+bi1]!='\n' && i<34; i++)
                                    line[i+5] = buffer1[i+bi1];
                           if (i==34) bi1 += 34; else end1 = 1;
                  }
                  if (!end2) {
                           for (i=0; buffer2[i+bi2] && buffer2[i+bi2]!='\n' && i<34; i++)
                                    line[i+45] = buffer2[i+bi2];
                           if (i==34) bi2 += 34; else end2 = 1;
                  }
                  line[45+i] = '\n';
                  line[46+i] = 0;
                  fwrite(line,1,46+i,stdout);
         }
}

/* match strings with specified minimum */
int match(unsigned char *s1, unsigned char *s2, unsigned char min)
{
         unsigned int i;
         for (i=0; *s1 && *s2; i++)
                  if (toupper(*s1++) != *s2++) return 0;
         if (*s1==0) return i>=min;
         return 0;
}

/* main program */
int main(int argc, char *argv[])
{
    int i,j,k,opt,ifile;
    unsigned char linked;

    clrscr();
    homescr();

    fprintf(stderr, "\nCOMPARE - A Public Domain Source File Diff Utility\n\n");

    if (argc<3)
    {
      givehelp();
      return 0;
    }

    /* get options */

    opterr = FALSE;                  /* handle errors ourself */
    while ((opt = getopt(argc, argv, "ftbwy")) != EOF)
    {
      switch (opt)
      {
        case '?':
          printf("Invalid command line option\n");
          givehelp();
          return(1);
        case 'f':
          flag1 |= FULL; break;
        case 't':
          flag1 |= TABS; break;
        case 'b':
          flag1 |= TRIM; break;
        case 'w':
          flag1 &= ~WHITE; break;
        case 'y':
          flag1 |= CASE; break;
      }
    }

    ifile = optind;                  /* index of first file parm */

    /* step 1: read first file and hash it */
    file1 = fopen(argv[ifile],"r");
    if (file1==0) {
                  printf("Unable to open file '%s'\n",argv[ifile]);
                  return 2;
         }

         printf("Reading file '%s'.\n",argv[ifile]);
         n1 = input(file1,hash1,occ1);
         fseek(file1,0L,0);

         /* get the file name, with dir name stripped off */
         for (i=j=0; (k=argv[ifile][i]) != 0; ++i)
                  if (k == ':' || k == '\\' || k == '/') j = i + 1;
         s = argv[1] + j;

         /* if argv[ifile+1] ends in : or \, tack on 1st file name */
         for (i=j=0; (k=argv[ifile+1][i]) != 0; ++i)
                  if (k == ':' || k == '\\' || k == '/') j = i + 1;
         strcpy(name,argv[ifile+1]);
         if (j == i) strcpy(name+j,s);

         /* step 2: read second file and hash it */
         file2 = fopen(name,"r");
         if (file2==0) {
                  /* maybe argv[ifile] was a directory, so try again */
                  if (j != i) {
                           name[i] = '\\';
                           strcpy(name+i+1,s);
                           file2 = fopen(name,"r");
                  }
         }

         if (file2==0) {
                  printf("Unable to open file '%s'.\n",name);
                  return 2;
         }

         printf("Reading file '%s'.\n",name);
         n2 = input(file2,hash2,occ2);
         fseek(file2,0L,0);

         /* step 3: identify lines that are unique in both files */
         for (i=0; i<8192; i++) occ1[i] &= occ2[i];

         /* step 4: link together matching unique lines */
         for (i=0; i<n1; i++) {
                  if (getbits(occ1,-hash1[i])!=1) continue;
                  for (j=0; i+j<n2 || i-j>=0; j++) {
                           if (i+j<n2) if (hash2[i+j]==hash1[i]) {
                                    hash1[i] = i+j;
                                    hash2[i+j] = i;
                                    break;
                           }
                           if (i-j>=0) if (hash2[i-j]==hash1[i]) {
                                    hash1[i] = i-j;
                                    hash2[i-j] = i;
                                    break;
                           }
                  }
         }

         /* step 5: link the first and last lines, if possible */
         if (hash1[0]<0 && hash1[0]==hash2[0]) hash1[0] = hash2[0] = 0;
         if (hash1[n1-1]<0 && hash1[n1-1]==hash2[n2-1]) {
                  hash1[n1-1] = n2-1;
                  hash2[n2-1] = n1-1;
         }

         /* step 6: starting from linked lines, link following lines that match */
         linked = 0;
         for (i=0; i<n1; i++) {
                  if (hash1[i]>=0) linked = 1;
                  else if (linked==1) {
                           if (hash1[i]==hash2[hash1[i-1]+1]) {
                                    hash1[i] = hash1[i-1]+1;
                                    hash2[hash1[i]] = i;
                           }
                           else linked = 0;
                  }
         }

         /* step 7: link matching lines that precede linked lines */
         linked = 0;
         for (i=n1-1; i>=0; i--) {
                  if (hash1[i]>=0) linked = 1;
                  else if (linked==1) {
                           if (hash1[i]==hash2[hash1[i+1]-1]) {
                                    hash1[i] = hash1[i+1] - 1;
                                    hash2[hash1[i]] = i;
                           } else linked = 0;
                  }
         }

         /* step 8: display the results */
         for (i=j=0; i<n1 && j<n2;) {
                  if (hash1[i]<j && hash2[j]<i) {
                           output(i++,j++);
                           continue;
                  }
                  if (hash1[i]<j) {
                           output(i++,-1);
                           continue;
                  }
                  if (hash2[j]<i) {
                           output(-1,j++);
                           continue;
                  }
                  if (hash1[i]==j) {
                           for (k=1; i+k<=n1 && j+k<=n2 && hash1[i+k]==j+k; k++);
                           printf("\n*** COMPARE ==> Found %d line(s) which match. ***\n\n",k);
                           i += k;
                           j += k;
                           continue;
                  }
                  if (hash1[i]-j <= hash2[j]-i) {
                           for (k=j; k<hash1[i]; k++) output(-1,k);
                           j = (int)hash1[i];
                           continue;
                  } else {
                           for (k=i; k<hash2[j]; k++) output(k,-1);
                           i = (int)hash2[j];
                           continue;
                  }
         }
         if (i<n1) for (k=i; k<n1; k++) output(k,-1);
         if (j<n2) for (k=j; k<n2; k++) output(-1,k);
         fclose(file1);
         fclose(file2);
         return different;
}

void clrscr(void)
{
  int atrib;
  {                               /* get char attrib @ cursor pos */
  union REGS regs;
  regs.h.ah = 8;
  regs.h.bh = 0;
  int86(0x10, &regs, &regs);
  atrib = regs.h.ah;
  }
  {                               /* scroll 0 - clears screen */
  union REGS regs;
  regs.h.ah = 6;
  regs.h.al = 0;
  regs.h.ch = 0;
  regs.h.cl = 0;
  regs.h.dh = 24;
  regs.h.dl = 79;
  regs.h.bh = atrib;
  int86(0x10, &regs, &regs);
  }
}

void homescr(void)
{
  {                                    /* move cursor to "home" position */
  union REGS regs;
  regs.h.ah = 2;
  regs.h.dh = 0;
  regs.h.dl = 0;
  regs.h.bh = 0;
  int86(0x10, &regs, &regs);
  }
}

/*
  Parse the command line options, System V style.

  Standard option syntax is:

    option ::= SW [optLetter]* [argLetter space* argument]

  where
    - SW is either '/' or '-', according to the current setting
      of the MSDOS switchar (int 21h function 37h).
    - there is no space before any optLetter or argLetter.
    - opt/arg letters are alphabetic, not punctuation characters.
    - optLetters, if present, must be matched in optionS.
    - argLetters, if present, are found in optionS followed by ':'.
    - argument is any white-space delimited string.  Note that it
      can include the SW character.
    - upper and lower case letters are distinct.

  There may be multiple option clusters on a command line, each
  beginning with a SW, but all must appear before any non-option
  arguments (arguments not introduced by SW).  Opt/arg letters may
  be repeated: it is up to the caller to decide if that is an error.

  The character SW appearing alone as the last argument is an error.
  The lead-in sequence SWSW ("--" or "//") causes itself and all the
  rest of the line to be ignored (allowing non-options which begin
  with the switch char).

  The string *optionS allows valid opt/arg letters to be recognized.
  argLetters are followed with ':'.  Getopt () returns the value of
  the option character found, or EOF if no more options are in the
  command line.          If option is an argLetter then the global optarg is
  set to point to the argument string (having skipped any white-space).

  The global optind is initially 1 and is always left as the index
  of the next argument of argv[] which getopt has not taken.  Note
  that if "--" or "//" are used then optind is stepped to the next
  argument before getopt() returns EOF.

  If an error occurs, that is an SW char precedes an unknown letter,
  then getopt() will return a '?' character and normally prints an
  error message via perror().  If the global variable opterr is set
  to false (zero) before calling getopt() then the error message is
  not printed.

  For example, if the MSDOS switch char is '/' (the MSDOS norm) and

    *optionS == "A:F:PuU:wXZ:"

  then 'P', 'u', 'w', and 'X' are option letters and 'F', 'U', 'Z'
  are followed by arguments.  A valid command line may be:

    aCommand  /uPFPi /X /A L someFile

  where:
    - 'u' and 'P' will be returned as isolated option letters.
    - 'F' will return with "Pi" as its argument string.
    - 'X' is an isolated option.
    - 'A' will return with "L" as its argument.
    - "someFile" is not an option, and terminates getOpt.  The
      caller may collect remaining arguments using argv pointers.
*/

/*  If you want to make getopt a .lib routine, uncomment the following

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <io.h>
#include <dos.h>

int    optind         = 1;        // index of which argument is next
char   *optarg;                   // pointer to argument of current option
int    opterr         = 1;        // allow error message


static char   *letP = NULL;       // remember next option char's location
static char      SW = 0;          // DOS switch character, either '-' or '/'

*/
int getopt(int argc, char *argv[], char *optionS)
{
         unsigned char ch;
         char *optP;

         if (SW == 0) {
                  /* get SW using dos call 0x37 */
                  _AX = 0x3700;
                  geninterrupt(0x21);
                  SW = _DL;
         }

         if (argc > optind) {
                  if (letP == NULL) {
                           if ((letP = argv[optind]) == NULL ||
                                    *(letP++) != SW)  goto gopEOF;
                           if (*letP == SW) {
                                    optind++;  goto gopEOF;
                           }
                  }
                  if (0 == (ch = *(letP++))) {
                           optind++;  goto gopEOF;
                  }
                  if (':' == ch  ||  (optP = strchr(optionS, ch)) == NULL)
                           goto gopError;
                  if (':' == *(++optP)) {
                           optind++;
                           if (0 == *letP) {
                                    if (argc <= optind)  goto  gopError;
                                    letP = argv[optind++];
                           }
                           optarg = letP;
                           letP = NULL;
                  } else {
                           if (0 == *letP) {
                                    optind++;
                                    letP = NULL;
                           }
                           optarg = NULL;
                  }
                  return ch;
         }
gopEOF:
         optarg = letP = NULL;
         return EOF;

gopError:
         optarg = NULL;
         errno  = EINVAL;
         if (opterr)
                  perror ("get command line option");
         return ('?');
}

