lex.c [plain text]

/* Language lexer for the GNU compiler for the Java(TM) language.
   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
   Free Software Foundation, Inc.
   Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING.  If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. 

Java and all Java-based marks are trademarks or registered trademarks
of Sun Microsystems, Inc. in the United States and other countries.
The Free Software Foundation is independent of Sun Microsystems, Inc.  */

/* It defines java_lex (yylex) that reads a Java ASCII source file
   possibly containing Unicode escape sequence or utf8 encoded
   characters and returns a token for everything found but comments,
   white spaces and line terminators. When necessary, it also fills
   the java_lval (yylval) union. It's implemented to be called by a
   re-entrant parser generated by Bison.

   The lexical analysis conforms to the Java grammar described in "The
   Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
   Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */

#include "keyword.h"
#include "flags.h"
#include "chartables.h"
#ifndef JC1_LITE
#include "timevar.h"
#endif

/* Function declarations.  */
static char *java_sprint_unicode (int);
static void java_unicode_2_utf8 (unicode_t);
static void java_lex_error (const char *, int);
#ifndef JC1_LITE
static int do_java_lex (YYSTYPE *);
static int java_lex (YYSTYPE *);
static int java_is_eol (FILE *, int);
static tree build_wfl_node (tree);
#endif
static int java_parse_escape_sequence (void);
static int java_start_char_p (unicode_t);
static int java_part_char_p (unicode_t);
static int java_space_char_p (unicode_t);
static void java_parse_doc_section (int);
static void java_parse_end_comment (int);
static int java_read_char (java_lexer *);
static int java_get_unicode (void);
static int java_peek_unicode (void);
static void java_next_unicode (void);
static int java_read_unicode (java_lexer *, int *);
#ifndef JC1_LITE
static int utf8_cmp (const unsigned char *, int, const char *);
#endif

java_lexer *java_new_lexer (FILE *, const char *);
#ifndef JC1_LITE
static void error_if_numeric_overflow (tree);
#endif

#ifdef HAVE_ICONV
/* This is nonzero if we have initialized `need_byteswap'.  */
static int byteswap_init = 0;

/* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
   big-endian order -- not native endian order.  We handle this by
   doing a conversion once at startup and seeing what happens.  This
   flag holds the results of this determination.  */
static int need_byteswap = 0;
#endif

void
java_init_lex (FILE *finput, const char *encoding)
{
#ifndef JC1_LITE
  int java_lang_imported = 0;

  if (!java_lang_id)
    java_lang_id = get_identifier ("java.lang");
  if (!inst_id)
    inst_id = get_identifier ("inst$");
  if (!wpv_id)
    wpv_id = get_identifier ("write_parm_value$");

  if (!java_lang_imported)
    {
      tree node = build_tree_list (build_unknown_wfl (java_lang_id),
				   NULL_TREE);
      read_import_dir (TREE_PURPOSE (node));
      TREE_CHAIN (node) = ctxp->import_demand_list;
      ctxp->import_demand_list = node;
      java_lang_imported = 1;
    }

  if (!wfl_operator)
    {
#ifndef JC1_LITE
#ifdef USE_MAPPED_LOCATION
      wfl_operator = build_expr_wfl (NULL_TREE, input_location);
#else
      wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
#endif
#endif
    }
  if (!label_id)
    label_id = get_identifier ("$L");
  if (!wfl_append) 
    wfl_append = build_unknown_wfl (get_identifier ("append"));
  if (!wfl_string_buffer)
    wfl_string_buffer = 
      build_unknown_wfl (get_identifier (flag_emit_class_files
				      ? "java.lang.StringBuffer"
					 : "gnu.gcj.runtime.StringBuffer"));
  if (!wfl_to_string)
    wfl_to_string = build_unknown_wfl (get_identifier ("toString"));

  CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
    CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;

  memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
  ctxp->current_parsed_class = NULL;
  ctxp->package = NULL_TREE;
#endif

#ifndef JC1_LITE
  ctxp->save_location = input_location;
#endif
  ctxp->java_error_flag = 0;
  ctxp->lexer = java_new_lexer (finput, encoding);
}

static char *
java_sprint_unicode (int c)
{
  static char buffer [10];
  if (c < ' ' || c >= 127)
    sprintf (buffer, "\\u%04x", c);
  else
    {
      buffer [0] = c;
      buffer [1] = '\0';
    }
  return buffer;
}

/* Create a new lexer object.  */

java_lexer *
java_new_lexer (FILE *finput, const char *encoding)
{
  java_lexer *lex = xmalloc (sizeof (java_lexer));
  int enc_error = 0;

  lex->finput = finput;
  lex->bs_count = 0;
  lex->unget_value = 0;
  lex->next_unicode = 0;
  lex->avail_unicode = 0;
  lex->next_columns = 1;
  lex->encoding = encoding;
  lex->position.line = 1;
  lex->position.col = 1;
#ifndef JC1_LITE
#ifdef USE_MAPPED_LOCATION
      input_location
	= linemap_line_start (&line_table, 1, 120);
#else
      input_line = 1;
#endif
#endif

#ifdef HAVE_ICONV
  lex->handle = iconv_open ("UCS-2", encoding);
  if (lex->handle != (iconv_t) -1)
    {
      lex->first = -1;
      lex->last = -1;
      lex->out_first = -1;
      lex->out_last = -1;
      lex->read_anything = 0;
      lex->use_fallback = 0;

      /* Work around broken iconv() implementations by doing checking at
	 runtime.  We assume that if the UTF-8 => UCS-2 encoder is broken,
	 then all UCS-2 encoders will be broken.  Perhaps not a valid
	 assumption.  */
      if (! byteswap_init)
	{
	  iconv_t handle;

	  byteswap_init = 1;

	  handle = iconv_open ("UCS-2", "UTF-8");
	  if (handle != (iconv_t) -1)
	    {
	      unicode_t result;
	      unsigned char in[3];
	      char *inp, *outp;
	      size_t inc, outc, r;

	      /* This is the UTF-8 encoding of \ufeff.  */
	      in[0] = 0xef;
	      in[1] = 0xbb;
	      in[2] = 0xbf;

	      inp = (char *) in;
	      inc = 3;
	      outp = (char *) &result;
	      outc = 2;

	      r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
			 &outp, &outc);
	      iconv_close (handle);
	      /* Conversion must be complete for us to use the result.  */
	      if (r != (size_t) -1 && inc == 0 && outc == 0)
		need_byteswap = (result != 0xfeff);
	    }
	}

      lex->byte_swap = need_byteswap;
    }
  else
#endif /* HAVE_ICONV */
    {
      /* If iconv failed, use the internal decoder if the default
	 encoding was requested.  This code is used on platforms where
	 iconv exists but is insufficient for our needs.  For
	 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.

	 On Solaris the default encoding, as returned by nl_langinfo(),
	 is `646' (aka ASCII), but the Solaris iconv_open() doesn't
	 understand that.  We work around that by pretending
	 `646' to be the same as UTF-8.   */
      if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
	enc_error = 1;
#ifdef HAVE_ICONV
      else
        {
	  lex->use_fallback = 1;
	  lex->encoding = "UTF-8";
	}
#endif /* HAVE_ICONV */
    }

  if (enc_error)
    fatal_error ("unknown encoding: %qs\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation.  If you aren't trying\nto use a particular encoding for your input file, try the\n%<--encoding=UTF-8%> option", encoding);

  return lex;
}

void
java_destroy_lexer (java_lexer *lex)
{
#ifdef HAVE_ICONV
  if (! lex->use_fallback)
    iconv_close (lex->handle);
#endif
  free (lex);
}

static int
java_read_char (java_lexer *lex)
{
#ifdef HAVE_ICONV
  if (! lex->use_fallback)
    {
      size_t ir, inbytesleft, in_save, out_count, out_save;
      char *inp, *outp;
      unicode_t result;

      /* If there is data which has already been converted, use it.  */
      if (lex->out_first == -1 || lex->out_first >= lex->out_last)
	{
	  lex->out_first = 0;
	  lex->out_last = 0;

	  while (1)
	    {
	      /* See if we need to read more data.  If FIRST == 0 then
		 the previous conversion attempt ended in the middle of
		 a character at the end of the buffer.  Otherwise we
		 only have to read if the buffer is empty.  */
	      if (lex->first == 0 || lex->first >= lex->last)
		{
		  int r;

		  if (lex->first >= lex->last)
		    {
		      lex->first = 0;
		      lex->last = 0;
		    }
		  if (feof (lex->finput))
		    return UEOF;
		  r = fread (&lex->buffer[lex->last], 1,
			     sizeof (lex->buffer) - lex->last,
			     lex->finput);
		  lex->last += r;
		}

	      inbytesleft = lex->last - lex->first;
	      out_count = sizeof (lex->out_buffer) - lex->out_last;

	      if (inbytesleft == 0)
		{
		  /* We've tried to read and there is nothing left.  */
		  return UEOF;
		}

	      in_save = inbytesleft;
	      out_save = out_count;
	      inp = &lex->buffer[lex->first];
	      outp = (char *) &lex->out_buffer[lex->out_last];
	      ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
			  &inbytesleft, &outp, &out_count);

	      /* If we haven't read any bytes, then look to see if we
		 have read a BOM.  */
	      if (! lex->read_anything && out_save - out_count >= 2)
		{
		  unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
		  if (uc == 0xfeff)
		    {
		      lex->byte_swap = 0;
		      lex->out_first += 2;
		    }
		  else if (uc == 0xfffe)
		    {
		      lex->byte_swap = 1;
		      lex->out_first += 2;
		    }
		  lex->read_anything = 1;
		}

	      if (lex->byte_swap)
		{
		  unsigned int i;
		  for (i = 0; i < out_save - out_count; i += 2)
		    {
		      char t = lex->out_buffer[lex->out_last + i];
		      lex->out_buffer[lex->out_last + i]
			= lex->out_buffer[lex->out_last + i + 1];
		      lex->out_buffer[lex->out_last + i + 1] = t;
		    }
		}

	      lex->first += in_save - inbytesleft;
	      lex->out_last += out_save - out_count;

	      /* If we converted anything at all, move along.  */
	      if (out_count != out_save)
		break;

	      if (ir == (size_t) -1)
		{
		  if (errno == EINVAL)
		    {
		      /* This is ok.  This means that the end of our buffer
			 is in the middle of a character sequence.  We just
			 move the valid part of the buffer to the beginning
			 to force a read.  */
		      memmove (&lex->buffer[0], &lex->buffer[lex->first],
			       lex->last - lex->first);
		      lex->last -= lex->first;
		      lex->first = 0;
		    }
		  else
		    {
		      /* A more serious error.  */
		      char buffer[128];
		      sprintf (buffer,
			       "Unrecognized character for encoding '%s'", 
		               lex->encoding);
		      java_lex_error (buffer, 0);
		      return UEOF;
		    }
		}
	    }
	}

      if (lex->out_first == -1 || lex->out_first >= lex->out_last)
	{
	  /* Don't have any data.  */
	  return UEOF;
	}

      /* Success.  */
      result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
      lex->out_first += 2;
      return result;
    }
  else
#endif /* HAVE_ICONV */
    {
      int c, c1, c2;
      c = getc (lex->finput);

      if (c == EOF)
	return UEOF;
      if (c < 128)
	return (unicode_t) c;
      else
	{
	  if ((c & 0xe0) == 0xc0)
	    {
	      c1 = getc (lex->finput);
	      if ((c1 & 0xc0) == 0x80)
		{
		  unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
		  /* Check for valid 2-byte characters.  We explicitly
		     allow \0 because this encoding is common in the
		     Java world.  */
		  if (r == 0 || (r >= 0x80 && r <= 0x7ff))
		    return r;
		}
	    }
	  else if ((c & 0xf0) == 0xe0)
	    {
	      c1 = getc (lex->finput);
	      if ((c1 & 0xc0) == 0x80)
		{
		  c2 = getc (lex->finput);
		  if ((c2 & 0xc0) == 0x80)
		    {
		      unicode_t r =  (unicode_t)(((c & 0xf) << 12) + 
						 (( c1 & 0x3f) << 6)
						 + (c2 & 0x3f));
		      /* Check for valid 3-byte characters.
			 Don't allow surrogate, \ufffe or \uffff.  */
		      if (IN_RANGE (r, 0x800, 0xffff)
			  && ! IN_RANGE (r, 0xd800, 0xdfff)
			  && r != 0xfffe && r != 0xffff)
			return r;
		    }
		}
	    }

	  /* We simply don't support invalid characters.  We also
	     don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
	     cannot be valid Java characters.  */
	  java_lex_error ("malformed UTF-8 character", 0);
	}
    }

  /* We only get here on error.  */
  return UEOF;
}

static int
java_read_unicode (java_lexer *lex, int *unicode_escape_p)
{
  int c;

  if (lex->unget_value)
    {
      c = lex->unget_value;
      lex->unget_value = 0;
    }
  else
    c = java_read_char (lex);

  *unicode_escape_p = 0;

  if (c != '\\')
    {
      lex->bs_count = 0;
      return c;
    }

  ++lex->bs_count;
  if ((lex->bs_count) % 2 == 1)
    {
      /* Odd number of \ seen.  */
      c = java_read_char (lex);
      if (c == 'u')
        {
	  unicode_t unicode = 0;
	  int shift = 12;

	  /* Recognize any number of `u's in \u.  */
	  while ((c = java_read_char (lex)) == 'u')
	    ;

	  shift = 12;
	  do
	    {
	      if (c == UEOF)
		{
		  java_lex_error ("prematurely terminated \\u sequence", 0);
		  return UEOF;
		}

	      if (hex_p (c))
		unicode |= (unicode_t)(hex_value (c) << shift);
	      else
		{
		  java_lex_error ("non-hex digit in \\u sequence", 0);
		  break;
		}

	      c = java_read_char (lex);
	      shift -= 4;
	    }
	  while (shift >= 0);

	  if (c != UEOF)
	    lex->unget_value = c;

	  lex->bs_count = 0;
	  *unicode_escape_p = 1;
	  return unicode;
	}
      lex->unget_value = c;
    }
  return (unicode_t) '\\';
}

/* Get the next Unicode character (post-Unicode-escape-handling).
   Move the current position to just after returned character. */

static int
java_get_unicode (void)
{
  int next = java_peek_unicode ();
  java_next_unicode ();
  return next;
}

/* Return the next Unicode character (post-Unicode-escape-handling).
   Do not move the current position, which remains just before
   the returned character. */

static int
java_peek_unicode (void)
{
  int unicode_escape_p;
  java_lexer *lex = ctxp->lexer;
  int next;

  if (lex->avail_unicode)
    return lex->next_unicode;

  next = java_read_unicode (lex, &unicode_escape_p);

  if (next == '\r')
    {
      /* We have to read ahead to see if we got \r\n.
	 In that case we return a single line terminator.  */
      int dummy;
      next = java_read_unicode (lex, &dummy);
      if (next != '\n' && next != UEOF)
	lex->unget_value = next;
      /* In either case we must return a newline.  */
      next = '\n';
    }

  lex->next_unicode = next;
  lex->avail_unicode = 1;

  if (next == UEOF)
    {
      lex->next_columns = 0;
      return next;
    }

  if (next == '\n')
    {
      lex->next_columns = 1 - lex->position.col;
    }
  else if (next == '\t')
    {
      int cur_col = lex->position.col;
      lex->next_columns = ((cur_col + 7) & ~7) + 1 - cur_col;
      
    }
  else
    {
      lex->next_columns = 1;
    }
  if (unicode_escape_p)
    lex->next_columns = 6;
  return next;
}

/* Move forward one Unicode character (post-Unicode-escape-handling).
   Only allowed after java_peek_unicode.  The combination java_peek_unicode
   followed by java_next_unicode is equivalent to java_get_unicode.  */

static void java_next_unicode (void)
{
  struct java_lexer *lex = ctxp->lexer;
  lex->position.col += lex->next_columns;
  if (lex->next_unicode == '\n')
    {
      lex->position.line++; 
#ifndef JC1_LITE
#ifdef USE_MAPPED_LOCATION
      input_location
	= linemap_line_start (&line_table, lex->position.line, 120);
#else
      input_line = lex->position.line;
#endif
#endif
    }
  lex->avail_unicode = 0;
}

#if 0
/* The inverse of java_next_unicode.
   Not currently used, but could be if it would be cleaner or faster.
   java_peek_unicode == java_get_unicode + java_unget_unicode.
   java_get_unicode == java_peek_unicode + java_next_unicode.
*/
static void java_unget_unicode ()
{
  struct java_lexer *lex = ctxp->lexer;
  if (lex->avail_unicode)
    fatal_error ("internal error - bad unget");
  lex->avail_unicode = 1;
  lex->position.col -= lex->next_columns;
}
#endif

/* Parse the end of a C style comment.
 * C is the first character following the '/' and '*'.  */
static void
java_parse_end_comment (int c)
{
  for ( ;; c = java_get_unicode ())
    {
      switch (c)
	{
	case UEOF:
	  java_lex_error ("Comment not terminated at end of input", 0);
	  return;
	case '*':
	  switch (c = java_peek_unicode ())
	    {
	    case UEOF:
	      java_lex_error ("Comment not terminated at end of input", 0);
	      return;
	    case '/':
	      java_next_unicode ();
	      return;
	    case '*':	/* Reparse only '*'.  */
	      ;
	    }
	}
    }
}

/* Parse the documentation section. Keywords must be at the beginning
   of a documentation comment line (ignoring white space and any `*'
   character). Parsed keyword(s): @DEPRECATED.  */

static void
java_parse_doc_section (int c)
{
  int last_was_star;

  /* We reset this here, because only the most recent doc comment
     applies to the following declaration.  */
  ctxp->deprecated = 0;

  /* We loop over all the lines of the comment.  We'll eventually exit
     if we hit EOF prematurely, or when we see the comment
     terminator.  */
  while (1)
    {
      /* These first steps need only be done if we're still looking
	 for the deprecated tag.  If we've already seen it, we might
	 as well skip looking for it again.  */
      if (! ctxp->deprecated)
	{
	  /* Skip whitespace and '*'s.  We must also check for the end
	     of the comment here.  */
	  while (JAVA_WHITE_SPACE_P (c) || c == '*')
	    {
	      last_was_star = (c == '*');
	      c = java_get_unicode ();
	      if (last_was_star && c == '/')
		{
		  /* We just saw the comment terminator.  */
		  return;
		}
	    }

	  if (c == UEOF)
	    goto eof;

	  if (c == '@')
	    {
	      const char *deprecated = "@deprecated";
	      int i;

	      for (i = 0; deprecated[i]; ++i)
		{
		  if (c != deprecated[i])
		    break;
		  /* We write the code in this way, with the
		     update at the end, so that after the loop
		     we're left with the next character in C.  */
		  c = java_get_unicode ();
		}

	      if (c == UEOF)
		goto eof;

	      /* @deprecated must be followed by a space or newline.
		 We also allow a '*' in case it appears just before
		 the end of a comment.  In this position only we also
		 must allow any Unicode space character.  */
	      if (c == ' ' || c == '\n' || c == '*' || java_space_char_p (c))
		{
		  if (! deprecated[i])
		    ctxp->deprecated = 1;
		}
	    }
	}

      /* We've examined the relevant content from this line.  Now we
	 skip the remaining characters and start over with the next
	 line.  We also check for end of comment here.  */
      while (c != '\n' && c != UEOF)
	{
	  last_was_star = (c == '*');
	  c = java_get_unicode ();
	  if (last_was_star && c == '/')
	    return;
	}

      if (c == UEOF)
	goto eof;
      /* We have to advance past the \n.  */
      c = java_get_unicode ();
      if (c == UEOF)
	goto eof;
    }

 eof:
  java_lex_error ("Comment not terminated at end of input", 0);
}

/* Return true if C is a valid start character for a Java identifier.
   This is only called if C >= 128 -- smaller values are handled
   inline.  However, this function handles all values anyway.  */
static int
java_start_char_p (unicode_t c)
{
  unsigned int hi = c / 256;
  const char *const page = type_table[hi];
  unsigned long val = (unsigned long) page;
  int flags;

  if ((val & ~ LETTER_MASK) != 0)
    flags = page[c & 255];
  else
    flags = val;

  return flags & LETTER_START;
}

/* Return true if C is a valid part character for a Java identifier.
   This is only called if C >= 128 -- smaller values are handled
   inline.  However, this function handles all values anyway.  */
static int
java_part_char_p (unicode_t c)
{
  unsigned int hi = c / 256;
  const char *const page = type_table[hi];
  unsigned long val = (unsigned long) page;
  int flags;

  if ((val & ~ LETTER_MASK) != 0)
    flags = page[c & 255];
  else
    flags = val;

  return flags & LETTER_PART;
}

/* Return true if C is whitespace.  */
static int
java_space_char_p (unicode_t c)
{
  unsigned int hi = c / 256;
  const char *const page = type_table[hi];
  unsigned long val = (unsigned long) page;
  int flags;

  if ((val & ~ LETTER_MASK) != 0)
    flags = page[c & 255];
  else
    flags = val;

  return flags & LETTER_SPACE;
}

static int
java_parse_escape_sequence (void)
{
  int c;

  switch (c = java_get_unicode ())
    {
    case 'b':
      return (unicode_t)0x8;
    case 't':
      return (unicode_t)0x9;
    case 'n':
      return (unicode_t)0xa;
    case 'f':
      return (unicode_t)0xc;
    case 'r':
      return (unicode_t)0xd;
    case '"':
      return (unicode_t)0x22;
    case '\'':
      return (unicode_t)0x27;
    case '\\':
      return (unicode_t)0x5c;
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7':
      {
	int more = 3;
	unicode_t char_lit = 0;

	if (c > '3')
	  {
	    /* According to the grammar, `\477' has a well-defined
	       meaning -- it is `\47' followed by `7'.  */
	    --more;
	  }
	char_lit = 0;
	for (;;)
	  {
	    char_lit = 8 * char_lit + c - '0';
	    if (--more == 0)
	      break;
	    c = java_peek_unicode ();
	    if (! RANGE (c, '0', '7'))
	      break;
	    java_next_unicode ();
	  }

	return char_lit;
      }
    default:
      java_lex_error ("Invalid character in escape sequence", -1);
      return JAVA_CHAR_ERROR;
    }
}

#ifndef JC1_LITE
#define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)

/* Subroutine of java_lex: converts floating-point literals to tree
   nodes.  LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
   store the result.  FFLAG indicates whether the literal was tagged
   with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
   is the line number on which to report any error.  */

static void java_perform_atof (YYSTYPE *, char *, int, int);

static void
java_perform_atof (YYSTYPE *java_lval, char *literal_token, int fflag,
		   int number_beginning)
{
  REAL_VALUE_TYPE value;
  tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);

  SET_REAL_VALUE_ATOF (value,
		       REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));

  if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
    {
      JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
      value = DCONST0;
    }
  else if (IS_ZERO (value))
    {
      /* We check to see if the value is really 0 or if we've found an
	 underflow.  We do this in the most primitive imaginable way.  */
      int really_zero = 1;
      char *p = literal_token;
      if (*p == '-')
	++p;
      while (*p && *p != 'e' && *p != 'E')
	{
	  if (*p != '0' && *p != '.')
	    {
	      really_zero = 0;
	      break;
	    }
	  ++p;
	}
      if (! really_zero)
	{
	  int save_col = ctxp->lexer->position.col;
	  ctxp->lexer->position.col = number_beginning;
	  java_lex_error ("Floating point literal underflow", 0);
	  ctxp->lexer->position.col = save_col;
	}
    }

  SET_LVAL_NODE (build_real (type, value));
}
#endif

static int yylex (YYSTYPE *);

static int
#ifdef JC1_LITE
yylex (YYSTYPE *java_lval)
#else
do_java_lex (YYSTYPE *java_lval)
#endif
{
  int c;
  char *string;

  /* Translation of the Unicode escape in the raw stream of Unicode
     characters. Takes care of line terminator.  */
 step1:
  /* Skip white spaces: SP, TAB and FF or ULT.  */ 
  for (;;)
    {
      c = java_peek_unicode ();
      if (c != '\n' && ! JAVA_WHITE_SPACE_P (c))
	break;
      java_next_unicode ();
    }

  /* Handle EOF here.  */
  if (c == UEOF)	/* Should probably do something here...  */
    return 0;

#ifndef JC1_LITE
#ifdef USE_MAPPED_LOCATION
  LINEMAP_POSITION_FOR_COLUMN (input_location, &line_table,
			       ctxp->lexer->position.col);
#else
  ctxp->lexer->token_start = ctxp->lexer->position;
#endif
#endif

  /* Numeric literals.  */
  if (JAVA_ASCII_DIGIT (c) || (c == '.'))
    {
      /* This section of code is borrowed from gcc/c-lex.c.  */
#define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
      int parts[TOTAL_PARTS];
      HOST_WIDE_INT high, low;
      /* End borrowed section.  */
      char literal_token [256];
      int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
      int  found_hex_digits = 0, found_non_octal_digits = -1;
      int  i;
#ifndef JC1_LITE
      int  number_beginning = ctxp->lexer->position.col;
      tree value;
#endif
     
      for (i = 0; i < TOTAL_PARTS; i++)
	parts [i] = 0;

      if (c == '0')
	{
	  java_next_unicode ();
	  c = java_peek_unicode ();
	  if (c == 'x' || c == 'X')
	    {
	      radix = 16;
	      java_next_unicode ();
	      c = java_peek_unicode ();
	    }
	  else if (JAVA_ASCII_DIGIT (c))
	    {
	      literal_token [literal_index++] = '0';
	      radix = 8;
	    }
	  else if (c == '.' || c == 'e' || c =='E')
	    {
	      literal_token [literal_index++] = '0';
	      /* Handle C during floating-point parsing.  */
	    }
	  else
	    {
	      /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}.  */
              switch (c)
		{		
		case 'L': case 'l':
		  java_next_unicode ();
		  SET_LVAL_NODE (long_zero_node);
		  return (INT_LIT_TK);
		case 'f': case 'F':
		  java_next_unicode ();
		  SET_LVAL_NODE (float_zero_node);
		  return (FP_LIT_TK);
		case 'd': case 'D':
		  java_next_unicode ();
		  SET_LVAL_NODE (double_zero_node);
		  return (FP_LIT_TK);
		default:
		  SET_LVAL_NODE (integer_zero_node);
		  return (INT_LIT_TK);
		}
	    }
	}
      /* Parse the first part of the literal, until we find something
	 which is not a number.  */
      while (radix == 16 ? JAVA_ASCII_HEXDIGIT (c) : JAVA_ASCII_DIGIT (c))
	{
	  /* We store in a string (in case it turns out to be a FP) and in
	     PARTS if we have to process a integer literal.  */
	  int numeric = hex_value (c);
	  int count;

	  /* Remember when we find a valid hexadecimal digit.  */
	  if (radix == 16)
	    found_hex_digits = 1;
          /* Remember when we find an invalid octal digit.  */
          else if (radix == 8 && numeric >= 8 && found_non_octal_digits < 0)
	    found_non_octal_digits = literal_index;

	  literal_token [literal_index++] = c;
	  /* This section of code if borrowed from gcc/c-lex.c.  */
	  for (count = 0; count < TOTAL_PARTS; count++)
	    {
	      parts[count] *= radix;
	      if (count)
		{
		  parts[count]   += (parts[count-1] >> HOST_BITS_PER_CHAR);
		  parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
		}
	      else
		parts[0] += numeric;
	    }
	  if (parts [TOTAL_PARTS-1] != 0)
	    overflow = 1;
	  /* End borrowed section.  */
	  java_next_unicode ();
	  c = java_peek_unicode ();
	}

      /* If we have something from the FP char set but not a digit, parse
	 a FP literal.  */
      if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
	{
	  /* stage==0: seen digits only
	   * stage==1: seen '.'
	   * stage==2: seen 'e' or 'E'.
	   * stage==3: seen '+' or '-' after 'e' or 'E'.
	   * stage==4: seen type suffix ('f'/'F'/'d'/'D')
	   */
	  int stage = 0;
	  int seen_digit = (literal_index ? 1 : 0);
	  int seen_exponent = 0;
	  int fflag = 0;	/* 1 for {f,F}, 0 for {d,D}. FP literal are
				   double unless specified.  */

	  /* It is ok if the radix is 8 because this just means we've
	     seen a leading `0'.  However, radix==16 is invalid.  */
	  if (radix == 16)
	    java_lex_error ("Can't express non-decimal FP literal", 0);
	  radix = 10;

	  for (;;)
	    {
	      if (c == '.')
		{
		  if (stage < 1)
		    {
		      stage = 1;
		      literal_token [literal_index++ ] = c;
		      java_next_unicode ();
		      c = java_peek_unicode ();
		      if (literal_index == 1 && !JAVA_ASCII_DIGIT (c))
			BUILD_OPERATOR (DOT_TK);
		    }
		  else
		    java_lex_error ("Invalid character in FP literal", 0);
		}

	      if (c == 'e' || c == 'E')
		{
		  if (stage < 2)
		    {
		      /* {E,e} must have seen at least a digit.  */
		      if (!seen_digit)
			java_lex_error
                          ("Invalid FP literal, mantissa must have digit", 0);
		      seen_digit = 0;
		      seen_exponent = 1;
		      stage = 2;
		      literal_token [literal_index++] = c;
		      java_next_unicode ();
		      c = java_peek_unicode ();
		    }
		  else
		    java_lex_error ("Invalid character in FP literal", 0);
		}
	      if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
		{
		  fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
		  stage = 4;	/* So we fall through.  */
		}

	      if ((c=='-' || c =='+') && stage == 2)
		{
		  stage = 3;
		  literal_token [literal_index++] = c;
		  java_next_unicode ();
		  c = java_peek_unicode ();
		}

	      if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
		  (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
		  (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
		  (stage == 3 && JAVA_ASCII_DIGIT (c)))
		{
		  if (JAVA_ASCII_DIGIT (c))
		    seen_digit = 1;
                  if (stage == 2)
                    stage = 3;
		  literal_token [literal_index++ ] = c;
		  java_next_unicode ();
		  c = java_peek_unicode ();
		}
	      else
		{
		  if (stage == 4) /* Don't push back fF/dD.  */
		    java_next_unicode ();
		  
		  /* An exponent (if any) must have seen a digit.  */
		  if (seen_exponent && !seen_digit)
		    java_lex_error
                      ("Invalid FP literal, exponent must have digit", 0);

		  literal_token [literal_index] = '\0';

#ifndef JC1_LITE
		  java_perform_atof (java_lval, literal_token,
				     fflag, number_beginning);
#endif
		  return FP_LIT_TK;
		}
	    }
	} /* JAVA_ASCII_FPCHAR (c) */

      /* Here we get back to converting the integral literal.  */
      if (radix == 16 && ! found_hex_digits)
	java_lex_error
	  ("0x must be followed by at least one hexadecimal digit", 0);
      else if (radix == 8 && found_non_octal_digits >= 0)
	{
	  int back = literal_index - found_non_octal_digits;
	  ctxp->lexer->position.col -= back;
	  java_lex_error ("Octal literal contains digit out of range", 0);
	  ctxp->lexer->position.col += back;
	}
      else if (c == 'L' || c == 'l')
	{
	  java_next_unicode ();
	  long_suffix = 1;
	}

      /* This section of code is borrowed from gcc/c-lex.c.  */
      if (!overflow)
	{
	  bytes = GET_TYPE_PRECISION (long_type_node);
	  for (i = bytes; i < TOTAL_PARTS; i++)
	    if (parts [i])
	      {
	        overflow = 1;
		break;
	      }
	}
      high = low = 0;
      for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
	{
	  high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
					      / HOST_BITS_PER_CHAR)]
		   << (i * HOST_BITS_PER_CHAR));
	  low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
	}
      /* End borrowed section.  */

#ifndef JC1_LITE
      /* Range checking.  */
      /* Temporarily set type to unsigned.  */
      value = build_int_cst_wide (long_suffix
				  ? unsigned_long_type_node
				  : unsigned_int_type_node, low, high);
      SET_LVAL_NODE (value);

      /* For base 10 numbers, only values up to the highest value
	 (plus one) can be written.  For instance, only ints up to
	 2147483648 can be written.  The special case of the largest
	 negative value is handled elsewhere.  For other bases, any
	 number can be represented.  */
      if (overflow || (radix == 10
		       && tree_int_cst_lt (long_suffix
					   ? decimal_long_max
					   : decimal_int_max,
					   value)))
	{
	  if (long_suffix)
	    JAVA_RANGE_ERROR ("Numeric overflow for 'long' literal");
	  else
	    JAVA_RANGE_ERROR ("Numeric overflow for 'int' literal");
	}

      /* Sign extend the value.  */
      value = build_int_cst_wide (long_suffix ? long_type_node : int_type_node,
				  low, high);
      value = force_fit_type (value, 0, false, false);

      if (radix != 10)
	{
	  value = copy_node (value);
	  JAVA_NOT_RADIX10_FLAG (value) = 1;
	}
      
      SET_LVAL_NODE (value);
#endif
      return INT_LIT_TK;
    }

  /* We may have an ID here.  */
  if (JAVA_START_CHAR_P (c))
    {
      int ascii_index = 0, all_ascii = 1;

      /* Keyword, boolean literal or null literal.  */
      while (c != UEOF && JAVA_PART_CHAR_P (c))
	{
	  java_unicode_2_utf8 (c);
	  if (c >= 128)
	    all_ascii = 0;
	  java_next_unicode ();
	  ascii_index++;
	  c = java_peek_unicode ();
	}

      obstack_1grow (&temporary_obstack, '\0');
      string = obstack_finish (&temporary_obstack);

      /* If we have something all ascii, we consider a keyword, a boolean
	 literal, a null literal or an all ASCII identifier.  Otherwise,
	 this is an identifier (possibly not respecting formation rule).  */
      if (all_ascii)
	{
	  const struct java_keyword *kw;
	  if ((kw=java_keyword (string, ascii_index)))
	    {
	      switch (kw->token)
		{
		case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
		case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
		case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
		case PRIVATE_TK:      case STRICT_TK:
		  SET_MODIFIER_CTX (kw->token);
		  return MODIFIER_TK;
		case FLOAT_TK:
		  SET_LVAL_NODE (float_type_node);
		  return FP_TK;
		case DOUBLE_TK:
		  SET_LVAL_NODE (double_type_node);
		  return FP_TK;
		case BOOLEAN_TK:
		  SET_LVAL_NODE (boolean_type_node);
		  return BOOLEAN_TK;
		case BYTE_TK:
		  SET_LVAL_NODE (byte_type_node);
		  return INTEGRAL_TK;
		case SHORT_TK:
		  SET_LVAL_NODE (short_type_node);
		  return INTEGRAL_TK;
		case INT_TK:
		  SET_LVAL_NODE (int_type_node);
		  return INTEGRAL_TK;
		case LONG_TK:
		  SET_LVAL_NODE (long_type_node);
		  return INTEGRAL_TK;
		case CHAR_TK:
		  SET_LVAL_NODE (char_type_node);
		  return INTEGRAL_TK;

		  /* Keyword based literals.  */
		case TRUE_TK:
		case FALSE_TK:
		  SET_LVAL_NODE ((kw->token == TRUE_TK ? 
				  boolean_true_node : boolean_false_node));
		  return BOOL_LIT_TK;
		case NULL_TK:
		  SET_LVAL_NODE (null_pointer_node);
		  return NULL_TK;

		case ASSERT_TK:
		  if (flag_assert)
		    {
		      BUILD_OPERATOR (kw->token);
		      return kw->token;
		    }
		  else
		    break;

		  /* Some keyword we want to retain information on the location
		     they where found.  */
		case CASE_TK:
		case DEFAULT_TK:
		case SUPER_TK:
		case THIS_TK:
		case RETURN_TK:
		case BREAK_TK:
		case CONTINUE_TK:
		case TRY_TK:
		case CATCH_TK:
		case THROW_TK:
		case INSTANCEOF_TK:
		  BUILD_OPERATOR (kw->token);

		default:
		  return kw->token;
		}
	    }
	}

      java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
      return ID_TK;
    }

  java_next_unicode ();

  /* Character literals.  */
  if (c == '\'')
    {
      int char_lit;
      
      if ((c = java_get_unicode ()) == '\\')
	char_lit = java_parse_escape_sequence ();
      else
	{
	  if (c == '\n' || c == '\'')
	    java_lex_error ("Invalid character literal", 0);
	  char_lit = c;
	}

      c = java_get_unicode ();

      if ((c == '\n') || (c == UEOF))
	java_lex_error ("Character literal not terminated at end of line", 0);
      if (c != '\'')
	java_lex_error ("Syntax error in character literal", 0);

      if (char_lit == JAVA_CHAR_ERROR)
        char_lit = 0;		/* We silently convert it to zero.  */

      SET_LVAL_NODE (build_int_cst (char_type_node, char_lit));
      return CHAR_LIT_TK;
    }

  /* String literals.  */
  if (c == '"')
    {
      int no_error = 1;
      char *string;

      for (;;)
	{
	  c = java_peek_unicode ();
	  if (c == '\n' || c == UEOF) /* ULT.  */
	    {
	      java_lex_error ("String not terminated at end of line", 0);
	      break;
	    }
	  java_next_unicode ();
	  if (c == '"')
	    break;
	  if (c == '\\')
	    c = java_parse_escape_sequence ();
	  if (c == JAVA_CHAR_ERROR)
	    {
	      no_error = 0;
	      c = 0;		/* We silently convert it to zero.  */
	    }
	  java_unicode_2_utf8 (c);
	}

      obstack_1grow (&temporary_obstack, '\0');
      string = obstack_finish (&temporary_obstack);
#ifndef JC1_LITE
      if (!no_error || (c != '"'))
	java_lval->node = error_mark_node; /* FIXME: Requires further
                                              testing.  */
      else
	java_lval->node = build_string (strlen (string), string);
#endif
      obstack_free (&temporary_obstack, string);
      return STRING_LIT_TK;
    }

  switch (c)
    {
    case '/':
      /* Check for comment.  */
      switch (c = java_peek_unicode ())
	{
	case '/':
	  java_next_unicode ();
	  for (;;)
	    {
	      c = java_get_unicode ();
	      if (c == UEOF)
		{
		  /* It is ok to end a `//' comment with EOF, unless
		     we're being pedantic.  */
		  if (pedantic)
		    java_lex_error ("Comment not terminated at end of input",
				    0);
		  return 0;
		}
	      if (c == '\n')	/* ULT */
		goto step1;
	    }
	  break;

	case '*':
	  java_next_unicode ();
	  if ((c = java_get_unicode ()) == '*')
	    {
	      c = java_get_unicode ();
	      if (c == '/')
		{
		  /* Empty documentation comment.  We have to reset
		     the deprecation marker as only the most recent
		     doc comment applies.  */
		  ctxp->deprecated = 0;
		}
	      else
		java_parse_doc_section (c);
	    }
	  else
	    java_parse_end_comment ((c = java_get_unicode ()));
	  goto step1;
	  break;

	case '=':
	  java_next_unicode ();
	  BUILD_OPERATOR2 (DIV_ASSIGN_TK);

	default:
	  BUILD_OPERATOR (DIV_TK);
	}

    case '(':
      BUILD_OPERATOR (OP_TK);
    case ')':
      return CP_TK;
    case '{':
#ifndef JC1_LITE
      java_lval->operator.token = OCB_TK;
      java_lval->operator.location = BUILD_LOCATION();
#ifdef USE_MAPPED_LOCATION
      if (ctxp->ccb_indent == 1)
	ctxp->first_ccb_indent1 = input_location;
#else
      if (ctxp->ccb_indent == 1)
	ctxp->first_ccb_indent1 = input_line;
#endif
#endif
      ctxp->ccb_indent++;
      return OCB_TK;
    case '}':
      ctxp->ccb_indent--;
#ifndef JC1_LITE
      java_lval->operator.token = CCB_TK;
      java_lval->operator.location = BUILD_LOCATION();
#ifdef USE_MAPPED_LOCATION
      if (ctxp->ccb_indent == 1)
        ctxp->last_ccb_indent1 = input_location;
#else
      if (ctxp->ccb_indent == 1)
        ctxp->last_ccb_indent1 = input_line;
#endif
#endif
      return CCB_TK;
    case '[':
      BUILD_OPERATOR (OSB_TK);
    case ']':
      return CSB_TK;
    case ';':
      return SC_TK;
    case ',':
      return C_TK;
    case '.':
      BUILD_OPERATOR (DOT_TK);

      /* Operators.  */
    case '=':
      c = java_peek_unicode ();
      if (c == '=')
	{
	  java_next_unicode ();
	  BUILD_OPERATOR (EQ_TK);
	}
      else
	{
	  /* Equals is used in two different locations. In the 
	     variable_declarator: rule, it has to be seen as '=' as opposed
	     to being seen as an ordinary assignment operator in
	     assignment_operators: rule.  */
	  BUILD_OPERATOR (ASSIGN_TK);
	}
      
    case '>':
      switch ((c = java_peek_unicode ()))
	{
	case '=':
	  java_next_unicode ();
	  BUILD_OPERATOR (GTE_TK);
	case '>':
	  java_next_unicode ();
	  switch ((c = java_peek_unicode ()))
	    {
	    case '>':
	      java_next_unicode ();
	      c = java_peek_unicode ();
	      if (c == '=')
		{
		  java_next_unicode ();
		  BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
		}
	      else
		{
		  BUILD_OPERATOR (ZRS_TK);
		}
	    case '=':
	      java_next_unicode ();
	      BUILD_OPERATOR2 (SRS_ASSIGN_TK);
	    default:
	      BUILD_OPERATOR (SRS_TK);
	    }
	default:
	  BUILD_OPERATOR (GT_TK);
	}
	
    case '<':
      switch ((c = java_peek_unicode ()))
	{
	case '=':
	  java_next_unicode ();
	  BUILD_OPERATOR (LTE_TK);
	case '<':
	  java_next_unicode ();
	  if ((c = java_peek_unicode ()) == '=')
	    {
	      java_next_unicode ();
	      BUILD_OPERATOR2 (LS_ASSIGN_TK);
	    }
	  else
	    {
	      BUILD_OPERATOR (LS_TK);
	    }
	default:
	  BUILD_OPERATOR (LT_TK);
	}

    case '&':
      switch ((c = java_peek_unicode ()))
	{
	case '&':
	  java_next_unicode ();
	  BUILD_OPERATOR (BOOL_AND_TK);
	case '=':
	  java_next_unicode ();
	  BUILD_OPERATOR2 (AND_ASSIGN_TK);
	default:
	  BUILD_OPERATOR (AND_TK);
	}

    case '|':
      switch ((c = java_peek_unicode ()))
	{
	case '|':
	  java_next_unicode ();
	  BUILD_OPERATOR (BOOL_OR_TK);
	case '=':
	  java_next_unicode ();
	  BUILD_OPERATOR2 (OR_ASSIGN_TK);
	default:
	  BUILD_OPERATOR (OR_TK);
	}

    case '+':
      switch ((c = java_peek_unicode ()))
	{
	case '+':
	  java_next_unicode ();
	  BUILD_OPERATOR (INCR_TK);
	case '=':
	  java_next_unicode ();
	  BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
	default:
	  BUILD_OPERATOR (PLUS_TK);
	}

    case '-':
      switch ((c = java_peek_unicode ()))
	{
	case '-':
	  java_next_unicode ();
	  BUILD_OPERATOR (DECR_TK);
	case '=':
	  java_next_unicode ();
	  BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
	default:
	  BUILD_OPERATOR (MINUS_TK);
	}

    case '*':
      if ((c = java_peek_unicode ()) == '=')
	{
	  java_next_unicode ();
	  BUILD_OPERATOR2 (MULT_ASSIGN_TK);
	}
      else
	{
	  BUILD_OPERATOR (MULT_TK);
	}

    case '^':
      if ((c = java_peek_unicode ()) == '=')
	{
	  java_next_unicode ();
	  BUILD_OPERATOR2 (XOR_ASSIGN_TK);
	}
      else
	{
	  BUILD_OPERATOR (XOR_TK);
	}

    case '%':
      if ((c = java_peek_unicode ()) == '=')
	{
	  java_next_unicode ();
	  BUILD_OPERATOR2 (REM_ASSIGN_TK);
	}
      else
	{
	  BUILD_OPERATOR (REM_TK);
	}

    case '!':
      if ((c = java_peek_unicode()) == '=')
	{
	  java_next_unicode ();
	  BUILD_OPERATOR (NEQ_TK);
	}
      else
	{
	  BUILD_OPERATOR (NEG_TK);
	}
	  
    case '?':
      BUILD_OPERATOR (REL_QM_TK);
    case ':':
      BUILD_OPERATOR (REL_CL_TK);
    case '~':
      BUILD_OPERATOR (NOT_TK);
    }
  
  if (c == 0x1a)		/* CTRL-Z.  */
    {
      if ((c = java_peek_unicode ()) == UEOF)
	return 0;		/* Ok here.  */
    }

  /* Everything else is an invalid character in the input.  */
  {
    char lex_error_buffer [128];
    sprintf (lex_error_buffer, "Invalid character '%s' in input", 
	     java_sprint_unicode (c));
    java_lex_error (lex_error_buffer, -1);
  }
  return 0;
}

#ifndef JC1_LITE

/* The exported interface to the lexer.  */
static int
java_lex (YYSTYPE *java_lval)
{
  int r;

  timevar_push (TV_LEX);
  r = do_java_lex (java_lval);
  timevar_pop (TV_LEX);
  return r;
}

/* This is called by the parser to see if an error should be generated
   due to numeric overflow.  This function only handles the particular
   case of the largest negative value, and is only called in the case
   where this value is not preceded by `-'.  */
static void
error_if_numeric_overflow (tree value)
{
  if (TREE_CODE (value) == INTEGER_CST
      && !JAVA_NOT_RADIX10_FLAG (value)
      && tree_int_cst_sgn (value) < 0)
    {
      if (TREE_TYPE (value) == long_type_node)
	java_lex_error ("Numeric overflow for 'long' literal", 0);
      else
	java_lex_error ("Numeric overflow for 'int' literal", 0);
    }
}

#endif /* JC1_LITE */

static void
java_unicode_2_utf8 (unicode_t unicode)
{
  if (RANGE (unicode, 0x01, 0x7f))
    obstack_1grow (&temporary_obstack, (char)unicode);
  else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
    {
      obstack_1grow (&temporary_obstack,
		     (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
      obstack_1grow (&temporary_obstack,
		     (unsigned char)(0x80 | (unicode & 0x3f)));
    }
  else				/* Range 0x800-0xffff.  */
    {
      obstack_1grow (&temporary_obstack,
		     (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
      obstack_1grow (&temporary_obstack,
		     (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
      obstack_1grow (&temporary_obstack,
		     (unsigned char)(0x80 | (unicode & 0x003f)));
    }
}

#ifndef JC1_LITE
static tree
build_wfl_node (tree node)
{
#ifdef USE_MAPPED_LOCATION
  node = build_expr_wfl (node, input_location);
#else
  node = build_expr_wfl (node, ctxp->filename,
			 ctxp->lexer->token_start.line,
			 ctxp->lexer->token_start.col);
#endif
  /* Prevent java_complete_lhs from short-circuiting node (if constant).  */
  TREE_TYPE (node) = NULL_TREE;
  return node;
}
#endif

static void
java_lex_error (const char *msg ATTRIBUTE_UNUSED, int forward ATTRIBUTE_UNUSED)
{
#ifndef JC1_LITE
  int col = (ctxp->lexer->position.col
	     + forward * ctxp->lexer->next_columns);
#if USE_MAPPED_LOCATION
  source_location save_location = input_location;
  LINEMAP_POSITION_FOR_COLUMN (input_location, &line_table, col);
  
  /* Might be caught in the middle of some error report.  */
  ctxp->java_error_flag = 0;
  java_error (NULL);
  java_error (msg);
  input_location = save_location;
#else
  java_lc save = ctxp->lexer->token_start;
  ctxp->lexer->token_start.line = ctxp->lexer->position.line;
  ctxp->lexer->token_start.col = col;

  /* Might be caught in the middle of some error report.  */
  ctxp->java_error_flag = 0;
  java_error (NULL);
  java_error (msg);
  ctxp->lexer->token_start = save;
#endif
#endif
}

#ifndef JC1_LITE
static int
java_is_eol (FILE *fp, int c)
{
  int next;
  switch (c)
    {
    case '\r':
      next = getc (fp);
      if (next != '\n' && next != EOF)
	ungetc (next, fp);
      return 1;
    case '\n':
      return 1;
    default:
      return 0;
    }  
}
#endif

char *
java_get_line_col (const char *filename ATTRIBUTE_UNUSED,
		   int line ATTRIBUTE_UNUSED, int col ATTRIBUTE_UNUSED)
{
#ifdef JC1_LITE
  return 0;
#else
  /* Dumb implementation. Doesn't try to cache or optimize things.  */
  /* First line of the file is line 1, first column is 1.  */

  /* COL == -1 means, at the CR/LF in LINE.  */
  /* COL == -2 means, at the first non space char in LINE.  */

  FILE *fp;
  int c, ccol, cline = 1;
  int current_line_col = 0;
  int first_non_space = 0;
  char *base;

  if (!(fp = fopen (filename, "r")))
    fatal_error ("can't open %s: %m", filename);

  while (cline != line)
    {
      c = getc (fp);
      if (c == EOF)
	{
	  static const char msg[] = "<<file too short - unexpected EOF>>";
	  obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
	  goto have_line;
	}
      if (java_is_eol (fp, c))
	cline++;
    }

  /* Gather the chars of the current line in a buffer.  */
  for (;;)
    {
      c = getc (fp);
      if (c < 0 || java_is_eol (fp, c))
	break;
      if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
	first_non_space = current_line_col;
      obstack_1grow (&temporary_obstack, c);
      current_line_col++;
    }
 have_line:

  obstack_1grow (&temporary_obstack, '\n');

  if (col == -1)
    {
      col = current_line_col;
      first_non_space = 0;
    }
  else if (col == -2)
    col = first_non_space;
  else
    first_non_space = 0;

  /* Place the '^' a the right position.  */
  base = obstack_base (&temporary_obstack);
  for (col += 2, ccol = 0; ccol < col; ccol++)
    {
      /* Compute \t when reaching first_non_space.  */
      char c = (first_non_space ?
		(base [ccol] == '\t' ? '\t' : ' ') : ' ');
      obstack_1grow (&temporary_obstack, c);
    }
  obstack_grow0 (&temporary_obstack, "^", 1);

  fclose (fp);
  return obstack_finish (&temporary_obstack);
#endif
}

#ifndef JC1_LITE
static int
utf8_cmp (const unsigned char *str, int length, const char *name)
{
  const unsigned char *limit = str + length;
  int i;

  for (i = 0; name[i]; ++i)
    {
      int ch = UTF8_GET (str, limit);
      if (ch != name[i])
	return ch - name[i];
    }

  return str == limit ? 0 : 1;
}

/* A sorted list of all C++ keywords.  */

static const char *const cxx_keywords[] =
{
  "_Complex",
  "__alignof",
  "__alignof__",
  "__asm",
  "__asm__",
  "__attribute",
  "__attribute__",
  "__builtin_va_arg",
  "__complex",
  "__complex__",
  "__const",
  "__const__",
  "__extension__",
  "__imag",
  "__imag__",
  "__inline",
  "__inline__",
  "__label__",
  "__null",
  "__real",
  "__real__",
  "__restrict",
  "__restrict__",
  "__signed",
  "__signed__",
  "__typeof",
  "__typeof__",
  "__volatile",
  "__volatile__",
  "and",
  "and_eq",
  "asm",
  "auto",
  "bitand",
  "bitor",
  "bool",
  "break",
  "case",
  "catch",
  "char",
  "class",
  "compl",
  "const",
  "const_cast",
  "continue",
  "default",
  "delete",
  "do",
  "double",
  "dynamic_cast",
  "else",
  "enum",
  "explicit",
  "export",
  "extern",
  "false",
  "float",
  "for",
  "friend",
  "goto",
  "if",
  "inline",
  "int",
  "long",
  "mutable",
  "namespace",
  "new",
  "not",
  "not_eq",
  "operator",
  "or",
  "or_eq",
  "private",
  "protected",
  "public",
  "register",
  "reinterpret_cast",
  "return",
  "short",
  "signed",
  "sizeof",
  "static",
  "static_cast",
  "struct",
  "switch",
  "template",
  "this",      
  "throw",
  "true",
  "try",
  "typedef",
  "typeid",
  "typename",
  "typeof",
  "union",
  "unsigned",
  "using",
  "virtual",
  "void",
  "volatile",
  "wchar_t",
  "while",
  "xor",
  "xor_eq"
};

/* Return true if NAME is a C++ keyword.  */

int
cxx_keyword_p (const char *name, int length)
{
  int last = ARRAY_SIZE (cxx_keywords);
  int first = 0;
  int mid = (last + first) / 2;
  int old = -1;

  for (mid = (last + first) / 2;
       mid != old;
       old = mid, mid = (last + first) / 2)
    {
      int kwl = strlen (cxx_keywords[mid]);
      int min_length = kwl > length ? length : kwl;
      int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]);

      if (r == 0)
	{
	  int i;
	  /* We've found a match if all the remaining characters are `$'.  */
	  for (i = min_length; i < length && name[i] == '$'; ++i)
	    ;
	  if (i == length)
	    return 1;
	  r = 1;
	}

      if (r < 0)
	last = mid;
      else
	first = mid;
    }
  return 0;
}
#endif /* JC1_LITE */