/*
 * This file is for just setting up the structs, etc
 */
/* stdio SHOULD get included by Xos.h or something */
/* but it doesn't with sunos, at least */

#include <errno.h>
#include <stdio.h>	 /* for popen, and other things */
#include <stdlib.h>
#include <ctype.h>
#include <Xfuncs.h>
#include <Xlib.h>
#include <Xatom.h>
#include <Xutil.h>
#include <Intrinsic.h>
#include <StringDefs.h>
#include <Xos.h>

#include "defs.h"
#include "externs.h"

/* translations[] keeps track of which kanji it is okay to test the
 *	user on. Likewise with numberofkanji, highest, and lowest.
 *	YES, it is best to keep in a large array, otherwise
 *	it would be difficult to switch between grade levels.
 */
struct translationstruct *translations[MAXKANJIALLOWED];
int numberofkanji,highestkanji,lowestkanji;

static char *dictname=NULL;


/* random debugging util? */
void printline(unsigned char *s)
{
	while(*s){
		putchar(*s++);
	}
	putchar('\n');
}

/* Since there is no standard util to convert hex ascii to int,...
 *  have to supply our own..
 *  It isn't incredibly efficient.. let's hope the compiler is smart.
 *  only used in readstructs()
 */
int xtoi(unsigned char * s)
{
	int out=0;
	sscanf(s,"%x",&out);
	return out;
}

/* getline:
 *	reads a line (from dictionary).
 *	returns true (1) if read aline, otherwise,
 *	returns false (0);
 *
 *	used in "readstructs", below.
 */

unsigned char instring[2][MAXLINELEN];
unsigned char *inptr=NULL;
int whichstring;

int getline(FILE *fp,unsigned char *s)
{
	if(inptr==NULL){
		inptr= &instring[0][100];
		whichstring=0;
	}
	for(;;){
		int i;
		if(inptr == &instring[whichstring][100]){
			whichstring = 1-whichstring;
			inptr=instring[whichstring];
			i=fread(instring[whichstring],1,100,fp);
			if(i<100){
				instring[whichstring][i]='\0';
				/* okay, we put the termination signal in.
				 * But if there is a complete line in there,
				 * it should be read with our buffering
				 */
			}
		}
		switch(*inptr){
			case 0:
				*s = '\0';
				return 0;
			case 10:
			case 13:
				*inptr='\0';
				*s++ = *inptr++;
				return 1;
			default:
				*s++ = *inptr++;
		}
	}
}

/* nextword:
 *	Goes to first whitespace, then sets pointer to
 *	beginning of non-white-space.
 *
 *	Returns 1 on success, 0 on fail
 */
int nextword(unsigned char **stringp)
{
	while(!isspace(**stringp)){
		if(stringp == '\0')
			return 0;
		*stringp +=1;
	}
	/* now on space */
	while(isspace(**stringp)){
		if(stringp == '\0')
			return 0;
		*stringp +=1;
	}
	return 1;
	
}

/* nextchar:
 * returns pointer to next non-whitespace char
*/
unsigned char *nextchar(unsigned char *c)
{
	while(isspace(*c)){
		if(*c == '\0') break;
		c++;
	}
	return c;
}

/* StripBrackets:
 *	Gets rid of those annoying {enlish}{english2} brackets.
 *	PRESUMES first char of source is '{'!!
 *      Well, actually, it nicely sets a null string if otherwise.
 */
void StripBrackets(char *dest,unsigned char *source)
{
	unsigned char *parse = &source[1];

	if(source[0] != '{'){
		dest[0] = '\0';
		return;
	}
	/* (*dest) is always assumed to be needing a write */

	do {
		switch(*parse){
			case '{':
				*dest++ = ':';
				*dest++ = ' ';
				break;
			case '}':
				break;
			default:
				*dest++ = *parse;				
		}
		parse++;
	} while((*parse != '\n') && (*parse != '\0'));
	*dest = '\0';
	return;
}


XChar2b *dup_16(XChar2b *kanabuffer){
	int pronun_len;
	XChar2b *ret_str;
	
	pronun_len = strlen((char *) kanabuffer);

	ret_str = (XChar2b *) malloc(sizeof(char) * (pronun_len+4));
	if(ret_str== NULL){
		fprintf(stderr,"Not enough memory to read in dictionary\n");
		exit(0);
	}
#ifdef NOMEMSET
	strncpy(ret_str,kanabuffer, pronun_len+1);
#else
	memcpy(ret_str,kanabuffer,sizeof(char) * (pronun_len+1));
#endif
	return ret_str;
}




/* Okay, it's not actually pronunciation we're reading in
 * We are reading the "on-yoni" and "kun-yoni" readings
 * in kanjidic. Also, the optional okurigami.
 *
 * Format:
 *     reading{.oku} [reading{.oku}] ...
 */


/* 0x2500 stuff is kanakana? (ON)
 * 0x2400 is hiragana?  (KUN)
 */
void ReadPronunciation(unsigned char **Pstring,int kanjinum)
{

	XChar2b kbuff[MAXLINELEN];
	XChar2b *kptr = kbuff;
	unsigned char *parse = *Pstring;
	enum {ERROR,READING, OKURIGANA,BLANK, DONE};
	int state=BLANK;

	if(*parse == '{'){
		/* only english exists,
		 *  (no kanji, even)
		 *   so set character to be unusable.
		 */
		translations[kanjinum] = NULL;
		return;
	}
	while(*parse == ' ')
		parse++;

	/* THIS is going to get yeuky.
	 *  We are going to parse a line segment which has
	 *  reading.oku  pairs.
	 * This is REALLY annoying, because the line jumps between
	 * 8 -bit and 16-bit chars
	 */

	/* okay, bad practice... you tell me what would be better :-/ */

	while(1){

		/* bug in gcc? If we put 
		 *	int state=BLANK;
		 * here, it gets reset each time through
		 */

		if(kptr >&kbuff[MAXLINELEN]){
			fprintf(stderr,"ERROR! overflow reading in kanjidic\n");
			fprintf(stderr,"%s\n",*Pstring);
			exit(-1);
		}

		switch(*parse){
			case '.':
				parse++;

				/* we ALWAYS need to close this off later */
				state = OKURIGANA;
				kptr->byte1 = 0x21;
				kptr->byte2 = 0x4a;
				kptr++;

				break;
			case '-':
				parse++;
#ifdef USEEXTRABLANKS
				if(state == BLANK){
					kptr->byte1 = 0x21;
					kptr->byte2 = 0x21;
					kptr++;
				}
#endif
				kptr->byte1 = 0x21;
				kptr->byte2 = 0x41;
				kptr++;
#ifdef USEEXTRABLANKS
				if(state != BLANK){
					kptr->byte1 = 0x21;
					kptr->byte2 = 0x21;
					kptr++;
				}
#endif
				continue;
				/* start at top of while again */

			case '\0':
			case '\n':
			case '\r':
			case '{':
				if(state == OKURIGANA){
					kptr->byte1 = 0x21;
					kptr->byte2 = 0x4b;
					kptr++;
				}
				state = DONE;
				break;

			case ' ':
				if(state == OKURIGANA){
					kptr->byte1 = 0x21;
					kptr->byte2 = 0x4b;
					kptr++;
				}
				state = BLANK;

				parse++;
				kptr->byte1 = 0x21;
				kptr->byte2 = 0x21;
				kptr++;
				break;

			default:
				if(*parse <127){
					if(state == OKURIGANA){
						kptr->byte1 = 0x21;
						kptr->byte2 = 0x4b;
						kptr++;
						puts("error.. error on kana read-in... ");
						printf("on kanji %x, we got char %c\n",kanjinum,*parse);
						
					}
					state = BLANK;
					parse++;
				} else {
					if(state != OKURIGANA)
						state = READING;
				}
				break;
		}

		if(state == DONE){
			break;
		}
		if(state == BLANK)
			continue;
		/* else read in another char */
		kptr->byte1= (*parse++ & 0x7f);
		kptr->byte2= (*parse++ & 0x7f);
		kptr++;

	} /* while(1) */

	/* copy out to struct, and exit */
	kptr->byte1 = 0;
	kptr->byte2 = 0;
	translations[kanjinum]->pronunciation =
		dup_16(kbuff);

	*Pstring = parse;
	return;

}



/* readstructs:
 *	the main dictionary reading routine.
 *	Fills in the global translationstruct with
 *	all that is available for each selected kanji, in
 *	Grade, "pronunciation", english translation, and
 *	frequency of use (by native speakers)
 */
void readstructs(){
	unsigned char instring[MAXLINELEN];
	char dict[200];
	FILE *fp;
	char command_string[100];
	int namelen;/* length of filename, and flag */
	int extlen;

	GetXtString("dictfile","Dictfile",dict);
	dictname = dict;
#ifdef DEBUG
	printf("dictfile from resources is\" %s\"\n",dictname);
#endif

	if(access(dictname,R_OK)!= 0){
		fprintf(stderr,"Cannot open dict file %s\n",dictname);
		exit(0);
	}
#ifdef UNCOMPRESS

	namelen = strlen(dictname);
	extlen = strlen(UNCOMPRESSEXT);
	if(strncmp(&dictname[namelen-extlen],UNCOMPRESSEXT,extlen) != 0 ){
		namelen = 0;/* flag for later on */
		fp = fopen(dictname,"r");
	} else {
		sprintf(command_string,"%s %s",UNCOMPRESS,dictname);
		fp = (FILE *) popen(command_string,"r");
	}
#else
	fp = fopen(dictname,"r");
#endif /* UNCOMPRESS */
	if(fp == NULL){
		perror("cannot open kanji translation file");
		fprintf(stderr,"Looking for %s\n",dictname);
#ifdef UNCOMPRESS
		if(namelen >0)
			fprintf(stderr,"Using uncompression method \"%s\"\n",
				UNCOMPRESS);
#endif
		exit(0);
	}

	printf("opened dictionary %s \n",dictname);
	lowestkanji = highestkanji = 0;


#ifndef HAS_MEMSET
#ifdef HAS_BZERO
#else
	for(i=MINKANJIALLOWED;i<MAXKANJIALLOWED;i++)
		translations[i] = (void *) NULL;
#endif /* HAS_BZERO */
#else /* HAS_MEMSET*/
	memset(translations,0,sizeof(struct translationstruct *) * MAXKANJIALLOWED);
#endif /* HAS_MEMSET */
	while (getline(fp,instring) != 0) {
		int Kanji;
		int freq,grade,N,U,H;
		unsigned char *parse;
		int instrlen;	/* length of pronunciation */

		if(strlen(instring) <10) continue;

		/*try to get kanji Index right away */

#define BROKENFONTS 0
		
		Kanji = xtoi(&instring[2]) + (BROKENFONTS);


		/* skip comments, and kanji not specified in
		 * the usefile
		 */	
		if(Kanji < MINKANJIALLOWED) {
			continue;
		}

		parse = &instring[2];
		if(parse == NULL){
			continue;
		}
		/* now parse for grade level, frequency, and english */
		freq = grade = N = U = H = 0;

		nextword(&parse);

		/* Check for high bit set, which means
		 * start of kana definition of kana.
		 * We cheat a bit, and let this loop skip over
		 * numbers by the fact that they don't match
		 * the case statements.
		 */
		while ( (*parse < 127)  && (*parse != '{') ) {
			
			switch(*parse){
				case 'F':
					freq = atoi(++parse);
					break;
				case 'G':
					grade = atoi(++parse);
					break;
				case 'H':
					H = atoi(++parse);
					break;
				case 'N':
					N = atoi(++parse);
					break;
				case 'U':
					U = xtoi(++parse);
					break;
				default:
					break;
			}
			nextword(&parse);
		}
		
		
		/**********************************************
		 *  Now we know that we have a useable/wanted *
		 *  dictionary definition                     *
		 *********************************************/
		if((lowestkanji==highestkanji) && (highestkanji==0)){
			lowestkanji = highestkanji = Kanji;
		} else{
			if(Kanji < lowestkanji) lowestkanji = Kanji;
			if (Kanji > highestkanji) highestkanji = Kanji;
		}
		
		translations[Kanji] = (struct translationstruct *)
			malloc(sizeof(struct translationstruct));
		if (translations[Kanji] == NULL){
			perror("Cannot allocate memory for translation table\n");
			exit(errno);
		}
		translations[Kanji]->Uindex=U;
		translations[Kanji]->Hindex=H;
		translations[Kanji]->Nindex=N;
		translations[Kanji]->frequency = freq;
		translations[Kanji]->grade_level = grade;
		translations[Kanji]->incorrect=0;
		translations[Kanji]->ON_extra=NULL;

		ReadPronunciation(&parse,Kanji);
		if(translations[Kanji] == NULL) continue;
		/*
		 * translations[Kanji]->pronunciation[0].byte1=0;
		 * translations[Kanji]->pronunciation[0].byte2=0;
		 */


		instrlen = strlen(parse)+1;
		translations[Kanji]->english = (char *) malloc(instrlen);
		if(translations[Kanji]->english == NULL){
			perror("Cannot allocate memory for translation table\n");
			exit(errno);		
		}

		StripBrackets(translations[Kanji]->english,parse);

	} /* and repeat until end of file */
#ifdef UNCOMPRESS
	if(namelen >0)
		pclose(fp);
#endif	

	fclose(fp);
}

