<copyright> XL -- TOM (archiving) lexer.
    Written by <a href="mailto:tiggr@gerbil.org">Pieter J. Schoenmakers</a>

    Copyright &copy; 1998 Pieter J. Schoenmakers.

    This file is part of TOM.  TOM is distributed under the terms of the
    TOM License, a copy of which can be found in the TOM distribution; see
    the file LICENSE.

    <id>$Id: XL.t,v 1.5 1998/05/08 10:12:14 tiggr Exp $</id>
    </copyright>

<c>
#include <tom/util.h>
#include <stdlib.h>
</c>

/******************** XLTokens ********************/

<doc> The tokens available from {XL}.  </doc>
implementation class
XLTokens
{
  const XLT_PAR_CLOSE = -9;
  const XLT_PAR_OPEN = -8;
  const XLT_DOUBLE = -7;
  const XLT_FLOAT = -6;
  const XLT_LONG = -5;
  const XLT_INT = -4;
  const XLT_SYMBOL = -3;
  const XLT_EPSILON = -2;
  const XLT_EOF = -1;
}

end;

implementation instance XLTokens end;

/******************** XL ********************/

implementation class
XL: State, XLTokens
{
  <doc> Different states of the lexer state machine.  Basically, these
      states are the states of reading a floating point number, with a
      prefix for an integer, and an escape for a non-numeric input.
      </doc>
  const XLS_SYMBOL = 0;
  const XLS_SIGN = 1;
  const XLS_INT = 2;
  const XLS_DOT = 3;
  const XLS_FRAC = 4;
  const XLS_EXP_E = 5;
  const XLS_EXP_SIGN = 6;
  const XLS_EXP = 7;
}

end;

implementation instance
XL
{
  <doc> The stream being lexed.  </doc>
  public InputStream stream;

  <doc> The buffer used for building the text of the token.  </doc>
  MutableByteString buffer;

  <doc> The most recent integer value retrieved.  </doc>
  public long int_value;

  <doc> The most recent floating value retrieved.  </doc>
  public double float_value;

  <doc> The current line.  </doc>
  public int current_line;

  <doc> The current token.  </doc>
  public int token;

  <doc> The next character, i.e. the first character of the next token.
      This is {XLT_EOF} for end of stream, or {XLT_EPSILON} if this should
      be considered invalid (and read before starting the next token).
      </doc>
  int next_char;
}

<doc> Designated initializer.  </doc>
id
  initWithStream InputStream s
{
  (stream, token) = (s, XLT_EPSILON);
  buffer = [MutableByteString new];
  = [super init];
}

<doc> Return the {int_value} as an int.  Any loss of bits is not remarked.
    </doc>
int
  intValue
{
  = int (int_value);
}

<doc> Return the matched text.  </doc>
MutableString
  matched
{
  = buffer;
}

<doc> Skip space and return the next token.  </doc>
int
  nextToken
{
  if (token == XLT_EOF)
    return token;

  if (token == XLT_EPSILON)
    {
      /* This can be anything, just not epsilon or eof.  */
      token = XLT_DOUBLE;
      current_line = 1;
      next_char = [stream read];
    }
  else if (next_char == XLT_EPSILON)
    next_char = [stream read];

  while (next_char != -1 && [buffer isSpace byte (next_char)])
    {
      if (next_char == '\n')
	current_line++;
      next_char = [stream read];
    }

  if (next_char == -1)
    return token = XLT_EOF;

  if (next_char == '(')
    {
      next_char = XLT_EPSILON;
      return token = XLT_PAR_OPEN;
    }
  if (next_char == ')')
    {
      next_char = XLT_EPSILON;
      return token = XLT_PAR_CLOSE;
    }

  boolean double_d, neg_exp, neg = next_char == '-';
  int state = (neg ? XLS_SIGN : next_char == '.' ? XLS_DOT :
	       [buffer isDigit byte (next_char)] ? XLS_INT : XLS_SYMBOL);

  [buffer truncate 0];
  for (;;)
    {
      [buffer add byte (next_char)];
      next_char = [stream read];

      if (next_char == -1 || next_char == ')' || next_char == '(')
	break;

      if ([buffer isSpace byte (next_char)])
	{
	  next_char = XLT_EPSILON;
	  break;
	}

      if (state != XLS_SYMBOL)
	if ([buffer isDigit byte (next_char)])
	  {
	    if (state == XLS_DOT)
	      state = XLS_FRAC;
	    else if (state == XLS_EXP_E || state == XLS_EXP_SIGN)
	      state = XLS_EXP;
	    else if (state == XLS_SIGN)
	      state = XLS_INT;
	  }
	else if (state == XLS_INT && next_char == '.')
	  state = XLS_DOT;
	else if (state == XLS_INT || state == XLS_FRAC)
	  if (next_char == 'd' || next_char == 'D')
	    (state, double_d, next_char) = (XLS_EXP_E, TRUE, 'e');
	  else if (next_char == 'e' || next_char == 'E')
	    state = XLS_EXP_E;
	  else
	    state = XLS_SYMBOL;
	else if (state == XLS_EXP_E)
	  if (next_char == '-')
	    (state, neg_exp) = (XLS_EXP_SIGN, TRUE);
	  else if (next_char == '+')
	    state = XLS_EXP_SIGN;
	  else
	    state = XLS_SYMBOL;
	else
	  state = XLS_SYMBOL;
    }

  if (state == XLS_SYMBOL || state== XLS_SIGN || state == XLS_DOT
      || state == XLS_EXP_E || state == XLS_EXP_SIGN)
    return token = XLT_SYMBOL;

  if (state == XLS_INT)
    {
      int i, n = [buffer length];

      int_value = 0;
      for (i = neg ? 1 : 0; i < n; i++)
	{
	  int v = [[USASCIIEncoding shared] digitValue buffer[i]];

	  if (neg)
	    int_value = 10 * int_value - v;
	  else
	    int_value = 10 * int_value + v;
	}

      int top = int (int_value >> 32);
      return token = top == 0 || top == -1 ? XLT_INT : XLT_LONG;
    }
  else
    {
      All b = buffer;
      double d;

<c>
      {
	DECL_SEL (_pi__byteStringContents);
	tom_int len;
	char *s;

	C_STRING_WITH_TOM_STRING (s, len, b);
	d = atof (s);
      }
</c>

      float_value = d;
      return token = double_d ? XLT_DOUBLE : XLT_FLOAT;
    }
}

<doc> Skip whitespace, read a quoted string of bytes ({"quoting \\like\\
    \"this\""}) and return it.  The length should match the expected
    length.  Anything unexpected results in the return of a {NULL}
    pointer.  </doc>
(pointer, int) (contents, length)
  readBytes int expected_length
post
  length == expected_length
{
<c>
  {
    char *s = xmalloc (expected_length);
</c>

    while (next_char != -1
	   && (next_char == XLT_EPSILON || [buffer isSpace byte (next_char)]))
      {
	if (next_char == '\n')
	  current_line++;
	next_char = [stream read];
      }

    if (next_char != '"')
      {
	<c> xfree (s); </c>
	return;
      }

    int i;
    boolean escape;
    for (;;)
      {
	next_char = [stream read];
	if (i == expected_length)
	  if (escape || next_char != '"')
	    {
	      <c> xfree (s); </c>
	      return;
	    }
	  else
	    break;

	if (escape)
	  escape = FALSE;
	else if (next_char == '\\')
	  {
	    escape = TRUE;
	    continue;
	  }
	else if (next_char == -1)
	  {
	    <c> xfree (s); </c>
	    return;
	  }

	if (next_char == '\n')
	  current_line++;

	int b = next_char;
	<c> s[i++] = b; </c>
      }

    next_char = XLT_EPSILON;
    length = expected_length;
<c>
    contents = s;
  }
</c>
}

end;
