cppfiles.c   [plain text]


/* Part of CPP library.  (include file handling)
   Copyright (C) 1986, 87, 89, 92 - 95, 98, 1999 Free Software Foundation, Inc.
   Written by Per Bothner, 1994.
   Based on CCCP program by Paul Rubin, June 1986
   Adapted to ANSI C, Richard Stallman, Jan 1987
   Split out of cpplib.c, Zack Weinberg, Oct 1998

This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

 In other words, you are welcome to use, share and improve this program.
 You are forbidden to forbid anyone else to use, share and improve
 what you give them.   Help stamp out software-hoarding!  */

#include "config.h"
#include "system.h"
#include "cpplib.h"

/* The entry points to this file are: find_include_file, finclude,
   include_hash, append_include_chain, deps_output, and file_cleanup.
   file_cleanup is only called through CPP_BUFFER(pfile)->cleanup,
   so it's static anyway. */

static struct include_hash *redundant_include_p
					PROTO ((cpp_reader *,
						struct include_hash *,
						struct file_name_list *));
static struct file_name_map *read_name_map	PROTO ((cpp_reader *,
							const char *));
static char *read_filename_string	PROTO ((int, FILE *));
static char *remap_filename 		PROTO ((cpp_reader *, char *,
						struct file_name_list *));
static long read_and_prescan		PROTO ((cpp_reader *, cpp_buffer *,
						int, size_t));
static struct file_name_list *actual_directory PROTO ((cpp_reader *, char *));

static void initialize_input_buffer	PROTO ((cpp_reader *, int,
						struct stat *));

#if 0
static void hack_vms_include_specification PROTO ((char *));
#endif

/* Windows does not natively support inodes, and neither does MSDOS.
   VMS has non-numeric inodes. */
#ifdef VMS
#define INO_T_EQ(a, b) (!bcmp((char *) &(a), (char *) &(b), sizeof (a)))
#elif (defined _WIN32 && !defined CYGWIN && ! defined (_UWIN)) \
       || defined __MSDOS__
#define INO_T_EQ(a, b) 0
#else
#define INO_T_EQ(a, b) ((a) == (b))
#endif

/* Merge the four include chains together in the order quote, bracket,
   system, after.  Remove duplicate dirs (as determined by
   INO_T_EQ()).  The system_include and after_include chains are never
   referred to again after this function; all access is through the
   bracket_include path.

   For the future: Check if the directory is empty (but
   how?) and possibly preload the include hash. */

void
merge_include_chains (opts)
     struct cpp_options *opts;
{
  struct file_name_list *prev, *cur, *other;
  struct file_name_list *quote, *brack, *systm, *after;
  struct file_name_list *qtail, *btail, *stail, *atail;

  qtail = opts->pending->quote_tail;
  btail = opts->pending->brack_tail;
  stail = opts->pending->systm_tail;
  atail = opts->pending->after_tail;

  quote = opts->pending->quote_head;
  brack = opts->pending->brack_head;
  systm = opts->pending->systm_head;
  after = opts->pending->after_head;

  /* Paste together bracket, system, and after include chains. */
  if (stail)
    stail->next = after;
  else
    systm = after;
  if (btail)
    btail->next = systm;
  else
    brack = systm;

  /* This is a bit tricky.
     First we drop dupes from the quote-include list.
     Then we drop dupes from the bracket-include list.
     Finally, if qtail and brack are the same directory,
     we cut out qtail.

     We can't just merge the lists and then uniquify them because
     then we may lose directories from the <> search path that should
     be there; consider -Ifoo -Ibar -I- -Ifoo -Iquux. It is however
     safe to treat -Ibar -Ifoo -I- -Ifoo -Iquux as if written
     -Ibar -I- -Ifoo -Iquux.

     Note that this algorithm is quadratic in the number of -I switches,
     which is acceptable since there aren't usually that many of them.  */

  for (cur = quote, prev = NULL; cur; cur = cur->next)
    {
      for (other = quote; other != cur; other = other->next)
        if (INO_T_EQ (cur->ino, other->ino)
	    && cur->dev == other->dev)
          {
	    if (opts->verbose)
	      cpp_notice ("ignoring duplicate directory `%s'\n", cur->name);

	    prev->next = cur->next;
	    free (cur->name);
	    free (cur);
	    cur = prev;
	    break;
	  }
      prev = cur;
    }
  qtail = prev;

  for (cur = brack; cur; cur = cur->next)
    {
      for (other = brack; other != cur; other = other->next)
        if (INO_T_EQ (cur->ino, other->ino)
	    && cur->dev == other->dev)
          {
	    if (opts->verbose)
	      cpp_notice ("ignoring duplicate directory `%s'\n", cur->name);

	    prev->next = cur->next;
	    free (cur->name);
	    free (cur);
	    cur = prev;
	    break;
	  }
      prev = cur;
    }

  if (quote)
    {
      if (INO_T_EQ (qtail->ino, brack->ino) && qtail->dev == brack->dev)
        {
	  if (quote == qtail)
	    {
	      if (opts->verbose)
		cpp_notice ("ignoring duplicate directory `%s'\n",
			    quote->name);

	      free (quote->name);
	      free (quote);
	      quote = brack;
	    }
	  else
	    {
	      cur = quote;
	      while (cur->next != qtail)
		  cur = cur->next;
	      cur->next = brack;
	      if (opts->verbose)
		cpp_notice ("ignoring duplicate directory `%s'\n",
			    qtail->name);

	      free (qtail->name);
	      free (qtail);
	    }
	}
      else
	  qtail->next = brack;
    }
  else
      quote = brack;

  opts->quote_include = quote;
  opts->bracket_include = brack;
}

/* Look up or add an entry to the table of all includes.  This table
 is indexed by the name as it appears in the #include line.  The
 ->next_this_file chain stores all different files with the same
 #include name (there are at least three ways this can happen).  The
 hash function could probably be improved a bit. */

struct include_hash *
include_hash (pfile, fname, add)
     cpp_reader *pfile;
     char *fname;
     int add;
{
  unsigned int hash = 0;
  struct include_hash *l, *m;
  char *f = fname;

  while (*f)
    hash += *f++;

  l = pfile->all_include_files[hash % ALL_INCLUDE_HASHSIZE];
  m = 0;
  for (; l; m = l, l = l->next)
    if (!strcmp (l->nshort, fname))
      return l;

  if (!add)
    return 0;
  
  l = (struct include_hash *) xmalloc (sizeof (struct include_hash));
  l->next = NULL;
  l->next_this_file = NULL;
  l->foundhere = NULL;
  l->buf = NULL;
  l->limit = NULL;
  if (m)
    m->next = l;
  else
    pfile->all_include_files[hash % ALL_INCLUDE_HASHSIZE] = l;
  
  return l;
}

/* Return 0 if the file pointed to by IHASH has never been included before,
         -1 if it has been included before and need not be again,
	 or a pointer to an IHASH entry which is the file to be reread.
   "Never before" is with respect to the position in ILIST.

   This will not detect redundancies involving odd uses of the
   `current directory' rule for "" includes.  They aren't quite
   pathological, but I think they are rare enough not to worry about.
   The simplest example is:

   top.c:
   #include "a/a.h"
   #include "b/b.h"

   a/a.h:
   #include "../b/b.h"

   and the problem is that for `current directory' includes,
   ihash->foundhere is not on any of the global include chains,
   so the test below (i->foundhere == l) may be false even when
   the directories are in fact the same.  */

static struct include_hash *
redundant_include_p (pfile, ihash, ilist)
     cpp_reader *pfile;
     struct include_hash *ihash;
     struct file_name_list *ilist;
{
  struct file_name_list *l;
  struct include_hash *i;

  if (! ihash->foundhere)
    return 0;

  for (i = ihash; i; i = i->next_this_file)
    for (l = ilist; l; l = l->next)
       if (i->foundhere == l)
	 /* The control_macro works like this: If it's NULL, the file
	    is to be included again.  If it's "", the file is never to
	    be included again.  If it's a string, the file is not to be
	    included again if the string is the name of a defined macro. */
	 return (i->control_macro
		 && (i->control_macro[0] == '\0'
		     || cpp_lookup (pfile, i->control_macro, -1, -1)))
	     ? (struct include_hash *)-1 : i;

  return 0;
}

static int
file_cleanup (pbuf, pfile)
     cpp_buffer *pbuf;
     cpp_reader *pfile;
{
  if (pbuf->buf)
    {
      free (pbuf->buf);
      pbuf->buf = 0;
    }
  if (pfile->system_include_depth)
    pfile->system_include_depth--;
  return 0;
}

/* Search for include file FNAME in the include chain starting at
   SEARCH_START.  Return -2 if this file doesn't need to be included
   (because it was included already and it's marked idempotent),
   -1 if an error occurred, or a file descriptor open on the file.
   *IHASH is set to point to the include hash entry for this file, and
   *BEFORE is 1 if the file was included before (but needs to be read
   again). */
int
find_include_file (pfile, fname, search_start, ihash, before)
     cpp_reader *pfile;
     char *fname;
     struct file_name_list *search_start;
     struct include_hash **ihash;
     int *before;
{
  struct file_name_list *l;
  struct include_hash *ih, *jh;
  int f, len;
  char *name;
  
  ih = include_hash (pfile, fname, 1);
  jh = redundant_include_p (pfile, ih,
			    fname[0] == '/' ? ABSOLUTE_PATH : search_start);

  if (jh != 0)
    {
      *before = 1;
      *ihash = jh;

      if (jh == (struct include_hash *)-1)
	return -2;
      else
	return open (jh->name, O_RDONLY, 0666);
    }

  if (ih->foundhere)
    /* A file is already known by this name, but it's not the same file.
       Allocate another include_hash block and add it to the next_this_file
       chain. */
    {
      jh = (struct include_hash *)xmalloc (sizeof (struct include_hash));
      while (ih->next_this_file) ih = ih->next_this_file;

      ih->next_this_file = jh;
      jh = ih;
      ih = ih->next_this_file;

      ih->next = NULL;
      ih->next_this_file = NULL;
      ih->buf = NULL;
      ih->limit = NULL;
    }
  *before = 0;
  *ihash = ih;
  ih->nshort = xstrdup (fname);
  ih->control_macro = NULL;
  
  /* If the pathname is absolute, just open it. */ 
  if (fname[0] == '/')
    {
      ih->foundhere = ABSOLUTE_PATH;
      ih->name = ih->nshort;
      return open (ih->name, O_RDONLY, 0666);
    }

  /* Search directory path, trying to open the file. */

  len = strlen (fname);
  name = xmalloc (len + pfile->max_include_len + 2 + INCLUDE_LEN_FUDGE);

  for (l = search_start; l; l = l->next)
    {
      bcopy (l->name, name, l->nlen);
      name[l->nlen] = '/';
      strcpy (&name[l->nlen+1], fname);
      simplify_pathname (name);
      if (CPP_OPTIONS (pfile)->remap)
	name = remap_filename (pfile, name, l);

      f = open (name, O_RDONLY|O_NONBLOCK|O_NOCTTY, 0666);
#ifdef EACCES
      if (f == -1 && errno == EACCES)
	{
	  cpp_error(pfile, "included file `%s' exists but is not readable",
		    name);
	  return -1;
	}
#endif

      if (f >= 0)
        {
	  ih->foundhere = l;
	  ih->name = xrealloc (name, strlen (name)+1);
	  return f;
        }
    }
  
    if (jh)
      {
	jh->next_this_file = NULL;
	free (ih);
      }
    free (name);
    *ihash = (struct include_hash *)-1;
    return -1;
}

/* The file_name_map structure holds a mapping of file names for a
   particular directory.  This mapping is read from the file named
   FILE_NAME_MAP_FILE in that directory.  Such a file can be used to
   map filenames on a file system with severe filename restrictions,
   such as DOS.  The format of the file name map file is just a series
   of lines with two tokens on each line.  The first token is the name
   to map, and the second token is the actual name to use.  */

struct file_name_map
{
  struct file_name_map *map_next;
  char *map_from;
  char *map_to;
};

#define FILE_NAME_MAP_FILE "header.gcc"

/* Read a space delimited string of unlimited length from a stdio
   file.  */

static char *
read_filename_string (ch, f)
     int ch;
     FILE *f;
{
  char *alloc, *set;
  int len;

  len = 20;
  set = alloc = xmalloc (len + 1);
  if (! is_space[ch])
    {
      *set++ = ch;
      while ((ch = getc (f)) != EOF && ! is_space[ch])
	{
	  if (set - alloc == len)
	    {
	      len *= 2;
	      alloc = xrealloc (alloc, len + 1);
	      set = alloc + len / 2;
	    }
	  *set++ = ch;
	}
    }
  *set = '\0';
  ungetc (ch, f);
  return alloc;
}

/* This structure holds a linked list of file name maps, one per directory.  */

struct file_name_map_list
{
  struct file_name_map_list *map_list_next;
  char *map_list_name;
  struct file_name_map *map_list_map;
};

/* Read the file name map file for DIRNAME.  */

static struct file_name_map *
read_name_map (pfile, dirname)
     cpp_reader *pfile;
     const char *dirname;
{
  register struct file_name_map_list *map_list_ptr;
  char *name;
  FILE *f;

  for (map_list_ptr = CPP_OPTIONS (pfile)->map_list; map_list_ptr;
       map_list_ptr = map_list_ptr->map_list_next)
    if (! strcmp (map_list_ptr->map_list_name, dirname))
      return map_list_ptr->map_list_map;

  map_list_ptr = ((struct file_name_map_list *)
		  xmalloc (sizeof (struct file_name_map_list)));
  map_list_ptr->map_list_name = xstrdup (dirname);

  name = (char *) alloca (strlen (dirname) + strlen (FILE_NAME_MAP_FILE) + 2);
  strcpy (name, dirname);
  if (*dirname)
    strcat (name, "/");
  strcat (name, FILE_NAME_MAP_FILE);
  f = fopen (name, "r");
  if (!f)
    map_list_ptr->map_list_map = (struct file_name_map *)-1;
  else
    {
      int ch;
      int dirlen = strlen (dirname);

      while ((ch = getc (f)) != EOF)
	{
	  char *from, *to;
	  struct file_name_map *ptr;

	  if (is_space[ch])
	    continue;
	  from = read_filename_string (ch, f);
	  while ((ch = getc (f)) != EOF && is_hor_space[ch])
	    ;
	  to = read_filename_string (ch, f);

	  ptr = ((struct file_name_map *)
		 xmalloc (sizeof (struct file_name_map)));
	  ptr->map_from = from;

	  /* Make the real filename absolute.  */
	  if (*to == '/')
	    ptr->map_to = to;
	  else
	    {
	      ptr->map_to = xmalloc (dirlen + strlen (to) + 2);
	      strcpy (ptr->map_to, dirname);
	      ptr->map_to[dirlen] = '/';
	      strcpy (ptr->map_to + dirlen + 1, to);
	      free (to);
	    }	      

	  ptr->map_next = map_list_ptr->map_list_map;
	  map_list_ptr->map_list_map = ptr;

	  while ((ch = getc (f)) != '\n')
	    if (ch == EOF)
	      break;
	}
      fclose (f);
    }
  
  map_list_ptr->map_list_next = CPP_OPTIONS (pfile)->map_list;
  CPP_OPTIONS (pfile)->map_list = map_list_ptr;

  return map_list_ptr->map_list_map;
}  

/* Remap NAME based on the file_name_map (if any) for LOC. */

static char *
remap_filename (pfile, name, loc)
     cpp_reader *pfile;
     char *name;
     struct file_name_list *loc;
{
  struct file_name_map *map;
  const char *from, *p, *dir;

  if (! loc->name_map)
    loc->name_map = read_name_map (pfile,
				   loc->name
				   ? loc->name : ".");

  if (loc->name_map == (struct file_name_map *)-1)
    return name;
  
  from = name + strlen (loc->name) + 1;
  
  for (map = loc->name_map; map; map = map->map_next)
    if (!strcmp (map->map_from, from))
      return map->map_to;

  /* Try to find a mapping file for the particular directory we are
     looking in.  Thus #include <sys/types.h> will look up sys/types.h
     in /usr/include/header.gcc and look up types.h in
     /usr/include/sys/header.gcc.  */
  p = rindex (name, '/');
  if (!p)
    p = name;
  if (loc && loc->name
      && strlen (loc->name) == (size_t) (p - name)
      && !strncmp (loc->name, name, p - name))
    /* FILENAME is in SEARCHPTR, which we've already checked.  */
    return name;

  if (p == name)
    {
      dir = ".";
      from = name;
    }
  else
    {
      char * newdir = (char *) alloca (p - name + 1);
      bcopy (name, newdir, p - name);
      newdir[p - name] = '\0';
      dir = newdir;
      from = p + 1;
    }
  
  for (map = read_name_map (pfile, dir); map; map = map->map_next)
    if (! strcmp (map->map_from, name))
      return map->map_to;

  return name;
}

/* Read the contents of FD into the buffer on the top of PFILE's stack.
   IHASH points to the include hash entry for the file associated with
   FD.

   The caller is responsible for the cpp_push_buffer.  */

int
finclude (pfile, fd, ihash)
     cpp_reader *pfile;
     int fd;
     struct include_hash *ihash;
{
  struct stat st;
  size_t st_size;
  long length;
  cpp_buffer *fp;

  if (fstat (fd, &st) < 0)
    goto perror_fail;
  if (fcntl (fd, F_SETFL, 0) == -1)  /* turn off nonblocking mode */
    goto perror_fail;

  fp = CPP_BUFFER (pfile);

  /* If fd points to a plain file, we know how big it is, so we can
     allocate the buffer all at once.  If fd is a pipe or terminal, we
     can't.  Most C source files are 4k or less, so we guess that.  If
     fd is something weird, like a block device or a directory, we
     don't want to read it at all.

     Unfortunately, different systems use different st.st_mode values
     for pipes: some have S_ISFIFO, some S_ISSOCK, some are buggy and
     zero the entire struct stat except a couple fields.  Hence the
     mess below.

     In all cases, read_and_prescan will resize the buffer if it
     turns out there's more data than we thought.  */

  if (S_ISREG (st.st_mode))
    {
      /* off_t might have a wider range than size_t - in other words,
	 the max size of a file might be bigger than the address
	 space.  We can't handle a file that large.  (Anyone with
         a single source file bigger than 4GB needs to rethink
	 their coding style.)  */
      st_size = (size_t) st.st_size;
      if ((unsigned HOST_WIDEST_INT) st_size
	  != (unsigned HOST_WIDEST_INT) st.st_size)
	{
	  cpp_error (pfile, "file `%s' is too large", ihash->name);
	  goto fail;
	}
    }
  else if (S_ISFIFO (st.st_mode) || S_ISSOCK (st.st_mode)
	   /* Permit any kind of character device: the sensible ones are
	      ttys and /dev/null, but weeding out the others is too hard.  */
	   || S_ISCHR (st.st_mode)
	   /* Some 4.x (x<4) derivatives have a bug that makes fstat() of a
	      socket or pipe return a stat struct with most fields zeroed.  */
	   || (st.st_mode == 0 && st.st_nlink == 0 && st.st_size == 0))
    {
      /* Cannot get its file size before reading.  4k is a decent
         first guess. */
      st_size = 4096;
    }
  else
    {
      cpp_error (pfile, "`%s' is not a file, pipe, or tty", ihash->name);
      goto fail;
    }

  if (pfile->input_buffer == NULL)
    initialize_input_buffer (pfile, fd, &st);

  /* Read the file, converting end-of-line characters and trigraphs
     (if enabled). */
  fp->ihash = ihash;
  fp->nominal_fname = fp->fname = ihash->name;
  length = read_and_prescan (pfile, fp, fd, st_size);
  if (length < 0)
    goto fail;
  if (length == 0)
    ihash->control_macro = "";  /* never re-include */

  close (fd);
  fp->rlimit = fp->alimit = fp->buf + length;
  fp->cur = fp->buf;
  if (ihash->foundhere != ABSOLUTE_PATH)
      fp->system_header_p = ihash->foundhere->sysp;
  fp->lineno = 1;
  fp->colno = 1;
  fp->line_base = fp->buf;
  fp->cleanup = file_cleanup;

  /* The ->actual_dir field is only used when ignore_srcdir is not in effect;
     see do_include */
  if (!CPP_OPTIONS (pfile)->ignore_srcdir)
    fp->actual_dir = actual_directory (pfile, fp->fname);

  pfile->input_stack_listing_current = 0;
  return 1;

 perror_fail:
  cpp_error_from_errno (pfile, ihash->name);
 fail:
  cpp_pop_buffer (pfile);
  close (fd);
  return 0;
}

/* Given a path FNAME, extract the directory component and place it
   onto the actual_dirs list.  Return a pointer to the allocated
   file_name_list structure.  These structures are used to implement
   current-directory "" include searching. */

static struct file_name_list *
actual_directory (pfile, fname)
     cpp_reader *pfile;
     char *fname;
{
  char *last_slash, *dir;
  size_t dlen;
  struct file_name_list *x;
  
  dir = xstrdup (fname);
  last_slash = rindex (dir, '/');
  if (last_slash)
    {
      if (last_slash == dir)
        {
	  dlen = 1;
	  last_slash[1] = '\0';
	}
      else
	{
	  dlen = last_slash - dir;
	  *last_slash = '\0';
	}
    }
  else
    {
      dir[0] = '.';
      dir[1] = '\0';
      dlen = 1;
    }

  if (dlen > pfile->max_include_len)
    pfile->max_include_len = dlen;

  for (x = pfile->actual_dirs; x; x = x->alloc)
    if (!strcmp (x->name, dir))
      {
	free (dir);
	return x;
      }

  /* Not found, make a new one. */
  x = (struct file_name_list *) xmalloc (sizeof (struct file_name_list));
  x->name = dir;
  x->nlen = dlen;
  x->next = CPP_OPTIONS (pfile)->quote_include;
  x->alloc = pfile->actual_dirs;
  x->sysp = CPP_BUFFER (pfile)->system_header_p;
  x->name_map = NULL;

  pfile->actual_dirs = x;
  return x;
}

/* Determine the current line and column.  Used only by read_and_prescan. */
static void
find_position (start, limit, linep, colp)
     U_CHAR *start;
     U_CHAR *limit;
     unsigned long *linep;
     unsigned long *colp;
{
  unsigned long line = *linep, col = 0;
  while (start < limit)
    {
      U_CHAR ch = *start++;
      if (ch == '\n' || ch == '\r')
	line++, col = 1;
      else
	col++;
    }
  *linep = line, *colp = col;
}

/* Read the entire contents of file DESC into buffer BUF.  LEN is how
   much memory to allocate initially; more will be allocated if
   necessary.  Convert end-of-line markers (\n, \r, \r\n, \n\r) to
   canonical form (\n).  If enabled, convert and/or warn about
   trigraphs.  Convert backslash-newline to a one-character escape
   (\r) and remove it from "embarrassing" places (i.e. the middle of a
   token).  If there is no newline at the end of the file, add one and
   warn.  Returns -1 on failure, or the actual length of the data to
   be scanned.

   This function does a lot of work, and can be a serious performance
   bottleneck.  It has been tuned heavily; make sure you understand it
   before hacking.  The common case - no trigraphs, Unix style line
   breaks, backslash-newline set off by whitespace, newline at EOF -
   has been optimized at the expense of the others.  The performance
   penalty for DOS style line breaks (\r\n) is about 15%.
   
   Warnings lose particularly heavily since we have to determine the
   line number, which involves scanning from the beginning of the file
   or from the last warning.  The penalty for the absence of a newline
   at the end of reload1.c is about 60%.  (reload1.c is 329k.)

   If your file has more than one kind of end-of-line marker, you
   will get messed-up line numbering.  */

/* Table of characters that can't be handled in the inner loop.
   Keep these contiguous to optimize the performance of the code generated
   for the switch that uses them.  */
#define SPECCASE_EMPTY     0
#define SPECCASE_NUL       1
#define SPECCASE_CR        2
#define SPECCASE_BACKSLASH 3
#define SPECCASE_QUESTION  4

static long
read_and_prescan (pfile, fp, desc, len)
     cpp_reader *pfile;
     cpp_buffer *fp;
     int desc;
     size_t len;
{
  U_CHAR *buf = (U_CHAR *) xmalloc (len);
  U_CHAR *ip, *op, *line_base;
  U_CHAR *ibase;
  U_CHAR *speccase = pfile->input_speccase;
  unsigned long line;
  unsigned int deferred_newlines;
  int count;
  size_t offset;

  offset = 0;
  op = buf;
  line_base = buf;
  line = 1;
  ibase = pfile->input_buffer + 2;
  deferred_newlines = 0;

  for (;;)
    {
    read_next:

      count = read (desc, pfile->input_buffer + 2, pfile->input_buffer_len);
      if (count < 0)
	goto error;
      else if (count == 0)
	break;

      offset += count;
      ip = ibase;
      ibase = pfile->input_buffer + 2;
      ibase[count] = ibase[count+1] = '\0';

      if (offset > len)
	{
	  size_t delta_op;
	  size_t delta_line_base;
	  len *= 2;
	  if (offset > len)
	    /* len overflowed.
	       This could happen if the file is larger than half the
	       maximum address space of the machine. */
	    goto too_big;

	  delta_op = op - buf;
	  delta_line_base = line_base - buf;
	  buf = (U_CHAR *) xrealloc (buf, len);
	  op = buf + delta_op;
	  line_base = buf + delta_line_base;
	}

      for (;;)
	{
	  unsigned int span = 0;

	  /* Deal with \-newline in the middle of a token. */
	  if (deferred_newlines)
	    {
	      while (speccase[ip[span]] == SPECCASE_EMPTY
		     && ip[span] != '\n'
		     && ip[span] != '\t'
		     && ip[span] != ' ')
		span++;
	      memcpy (op, ip, span);
	      op += span;
	      ip += span;
	      if (*ip == '\n' || *ip == '\t'
		  || *ip == ' ' || *ip == ' ')
		while (deferred_newlines)
		  deferred_newlines--, *op++ = '\r';
	      span = 0;
	    }

	  /* Copy as much as we can without special treatment. */
	  while (speccase[ip[span]] == SPECCASE_EMPTY) span++;
	  memcpy (op, ip, span);
	  op += span;
	  ip += span;

	  switch (speccase[*ip++])
	    {
	    case SPECCASE_NUL:  /* \0 */
	      ibase[-1] = op[-1];
	      goto read_next;

	    case SPECCASE_CR:  /* \r */
	      if (*ip == '\n')
		ip++;
	      else if (*ip == '\0')
		{
		  *--ibase = '\r';
		  goto read_next;
		}
	      else if (ip[-2] == '\n')
		continue;
	      *op++ = '\n';
	      break;

	    case SPECCASE_BACKSLASH:  /* \ */
	    backslash:
	    {
	      /* If we're at the end of the intermediate buffer,
		 we have to shift the backslash down to the start
		 and come back next pass. */
	      if (*ip == '\0')
		{
		  *--ibase = '\\';
		  goto read_next;
		}
	      else if (*ip == '\n')
		{
		  ip++;
		  if (*ip == '\r') ip++;
		  if (*ip == '\n' || *ip == '\t' || *ip == ' ')
		    *op++ = '\r';
		  else if (op[-1] == '\t' || op[-1] == ' '
			   || op[-1] == '\r' || op[-1] == '\n')
		    *op++ = '\r';
		  else
		    deferred_newlines++;
		  line++;
		  line_base = op;
		}
	      else if (*ip == '\r')
		{
		  ip++;
		  if (*ip == '\n') ip++;
		  else if (*ip == '\0')
		    {
		      *--ibase = '\r';
		      *--ibase = '\\';
		      goto read_next;
		    }
		  else if (*ip == '\r' || *ip == '\t' || *ip == ' ')
		    *op++ = '\r';
		  else
		    deferred_newlines++;
		  line++;
		  line_base = op;
		}
	      else
		*op++ = '\\';
	    }
	    break;

	    case SPECCASE_QUESTION: /* ? */
	      {
		unsigned int d;
		/* If we're at the end of the intermediate buffer,
		   we have to shift the ?'s down to the start and
		   come back next pass. */
		d = ip[0];
		if (d == '\0')
		  {
		    *--ibase = '?';
		    goto read_next;
		  }
		if (d != '?')
		  {
		    *op++ = '?';
		    break;
		  }
		d = ip[1];
		if (d == '\0')
		  {
		    *--ibase = '?';
		    *--ibase = '?';
		    goto read_next;
		  }
		if (!trigraph_table[d])
		  {
		    *op++ = '?';
		    break;
		  }

		if (CPP_OPTIONS (pfile)->warn_trigraphs)
		  {
		    unsigned long col;
		    find_position (line_base, op, &line, &col);
		    line_base = op - col;
		    cpp_warning_with_line (pfile, line, col,
					   "trigraph ??%c encountered", d);
		  }
		if (CPP_OPTIONS (pfile)->trigraphs)
		  {
		    if (trigraph_table[d] == '\\')
		      goto backslash;
		    else
		      *op++ = trigraph_table[d];
		  }
		else
		  {
		    *op++ = '?';
		    *op++ = '?';
		    *op++ = d;
		  }
		ip += 2;
	      }
	    }
	}
    }

  if (offset == 0)
    return 0;

  /* Deal with pushed-back chars at true EOF.
     This may be any of:  ?? ? \ \r \n \\r \\n.
     \r must become \n, \\r or \\n must become \r.
     We know we have space already. */
  if (ibase == pfile->input_buffer)
    {
      if (*ibase == '?')
	{
	  *op++ = '?';
	  *op++ = '?';
	}
      else
	*op++ = '\r';
    }
  else if (ibase == pfile->input_buffer + 1)
    {
      if (*ibase == '\r')
	*op++ = '\n';
      else
	*op++ = *ibase;
    }

  if (op[-1] != '\n')
    {
      unsigned long col;
      find_position (line_base, op, &line, &col);
      cpp_warning_with_line (pfile, line, col, "no newline at end of file\n");
      if (offset + 1 > len)
	{
	  len += 1;
	  if (offset + 1 > len)
	    goto too_big;
	  buf = (U_CHAR *) xrealloc (buf, len);
	  op = buf + offset;
	}
      *op++ = '\n';
    }

  fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
  return op - buf;

 too_big:
  cpp_error (pfile, "file is too large (>%lu bytes)\n", (unsigned long)offset);
  free (buf);
  return -1;

 error:
  cpp_error_from_errno (pfile, fp->fname);
  free (buf);
  return -1;
}

/* Initialize the `input_buffer' and `input_speccase' tables.
   These are only used by read_and_prescan, but they're large and
   somewhat expensive to set up, so we want them allocated once for
   the duration of the cpp run.  */

static void
initialize_input_buffer (pfile, fd, st)
     cpp_reader *pfile;
     int fd;
     struct stat *st;
{
  long pipe_buf;
  U_CHAR *tmp;

  /* Table of characters that cannot be handled by the
     read_and_prescan inner loop.  The number of non-EMPTY entries
     should be as small as humanly possible.  */

  tmp = xmalloc (1 << CHAR_BIT);
  memset (tmp, SPECCASE_EMPTY, 1 << CHAR_BIT);
  tmp['\0'] = SPECCASE_NUL;
  tmp['\r'] = SPECCASE_CR;
  tmp['\\'] = SPECCASE_BACKSLASH;
  if (CPP_OPTIONS (pfile)->trigraphs || CPP_OPTIONS (pfile)->warn_trigraphs)
    tmp['?'] = SPECCASE_QUESTION;

  pfile->input_speccase = tmp;

  /* Determine the appropriate size for the input buffer.  Normal C
     source files are smaller than eight K.  If we are reading a pipe,
     we want to make sure the input buffer is bigger than the kernel's
     pipe buffer.  */
  pipe_buf = -1;

  if (! S_ISREG (st->st_mode))
    {
#ifdef _PC_PIPE_BUF
      pipe_buf = fpathconf (fd, _PC_PIPE_BUF);
#endif
      if (pipe_buf == -1)
	{
#ifdef PIPE_BUF
	  pipe_buf = PIPE_BUF;
#else
	  pipe_buf = 8192;
#endif
	}
    }

  if (pipe_buf < 8192)
    pipe_buf = 8192;
  /* PIPE_BUF bytes of buffer proper, 2 to detect running off the end
     without address arithmetic all the time, and 2 for pushback in
     the case there's a potential trigraph or end-of-line digraph at
     the end of a block. */

  tmp = xmalloc (pipe_buf + 2 + 2);
  pfile->input_buffer = tmp;
  pfile->input_buffer_len = pipe_buf;
}

/* Add output to `deps_buffer' for the -M switch.
   STRING points to the text to be output.
   SPACER is ':' for targets, ' ' for dependencies, zero for text
   to be inserted literally.  */

void
deps_output (pfile, string, spacer)
     cpp_reader *pfile;
     char *string;
     int spacer;
{
  int size;
  int cr = 0;

  if (!*string)
    return;

  size = strlen (string);

#ifndef MAX_OUTPUT_COLUMNS
#define MAX_OUTPUT_COLUMNS 72
#endif
  if (pfile->deps_column > 0
      && (pfile->deps_column + size) > MAX_OUTPUT_COLUMNS)
    {
      cr = 5;
      pfile->deps_column = 0;
    }

  if (pfile->deps_size + size + cr + 8 > pfile->deps_allocated_size)
    {
      pfile->deps_allocated_size = (pfile->deps_size + size + 50) * 2;
      pfile->deps_buffer = (char *) xrealloc (pfile->deps_buffer,
					      pfile->deps_allocated_size);
    }

  if (cr)
    {
      bcopy (" \\\n  ", &pfile->deps_buffer[pfile->deps_size], 5);
      pfile->deps_size += 5;
    }
  
  if (spacer == ' ' && pfile->deps_column > 0)
    pfile->deps_buffer[pfile->deps_size++] = ' ';
  bcopy (string, &pfile->deps_buffer[pfile->deps_size], size);
  pfile->deps_size += size;
  pfile->deps_column += size;
  if (spacer == ':')
    pfile->deps_buffer[pfile->deps_size++] = ':';
  pfile->deps_buffer[pfile->deps_size] = 0;
}

/* Simplify a path name in place, deleting redundant components.  This
   reduces OS overhead and guarantees that equivalent paths compare
   the same (modulo symlinks).

   Transforms made:
   foo/bar/../quux	foo/quux
   foo/./bar		foo/bar
   foo//bar		foo/bar
   /../quux		/quux
   //quux		//quux  (POSIX allows leading // as a namespace escape)

   Guarantees no trailing slashes. All transforms reduce the length
   of the string.
 */
void
simplify_pathname (path)
    char *path;
{
    char *from, *to;
    char *base;
    int absolute = 0;

#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
    /* Convert all backslashes to slashes. */
    for (from = path; *from; from++)
	if (*from == '\\') *from = '/';
    
    /* Skip over leading drive letter if present. */
    if (ISALPHA (path[0]) && path[1] == ':')
	from = to = &path[2];
    else
	from = to = path;
#else
    from = to = path;
#endif
    
    /* Remove redundant initial /s.  */
    if (*from == '/')
    {
	absolute = 1;
	to++;
	from++;
	if (*from == '/')
	{
	    if (*++from == '/')
		/* 3 or more initial /s are equivalent to 1 /.  */
		while (*++from == '/');
	    else
		/* On some hosts // differs from /; Posix allows this.  */
		to++;
	}
    }
    base = to;
    
    for (;;)
    {
	while (*from == '/')
	    from++;

	if (from[0] == '.' && from[1] == '/')
	    from += 2;
	else if (from[0] == '.' && from[1] == '\0')
	    goto done;
	else if (from[0] == '.' && from[1] == '.' && from[2] == '/')
	{
	    if (base == to)
	    {
		if (absolute)
		    from += 3;
		else
		{
		    *to++ = *from++;
		    *to++ = *from++;
		    *to++ = *from++;
		    base = to;
		}
	    }
	    else
	    {
		to -= 2;
		while (to > base && *to != '/') to--;
		if (*to == '/')
		    to++;
		from += 3;
	    }
	}
	else if (from[0] == '.' && from[1] == '.' && from[2] == '\0')
	{
	    if (base == to)
	    {
		if (!absolute)
		{
		    *to++ = *from++;
		    *to++ = *from++;
		}
	    }
	    else
	    {
		to -= 2;
		while (to > base && *to != '/') to--;
		if (*to == '/')
		    to++;
	    }
	    goto done;
	}
	else
	    /* Copy this component and trailing /, if any.  */
	    while ((*to++ = *from++) != '/')
	    {
		if (!to[-1])
		{
		    to--;
		    goto done;
		}
	    }
	
    }
    
 done:
    /* Trim trailing slash */
    if (to[0] == '/' && (!absolute || to > path+1))
	to--;

    /* Change the empty string to "." so that stat() on the result
       will always work. */
    if (to == path)
      *to++ = '.';
    
    *to = '\0';

    return;
}

/* It is not clear when this should be used if at all, so I've
   disabled it until someone who understands VMS can look at it. */
#if 0

/* Under VMS we need to fix up the "include" specification filename.

   Rules for possible conversions

	fullname		tried paths

	name			name
	./dir/name		[.dir]name
	/dir/name		dir:name
	/name			[000000]name, name
	dir/name		dir:[000000]name, dir:name, dir/name
	dir1/dir2/name		dir1:[dir2]name, dir1:[000000.dir2]name
	path:/name		path:[000000]name, path:name
	path:/dir/name		path:[000000.dir]name, path:[dir]name
	path:dir/name		path:[dir]name
	[path]:[dir]name	[path.dir]name
	path/[dir]name		[path.dir]name

   The path:/name input is constructed when expanding <> includes. */


static void
hack_vms_include_specification (fullname)
     char *fullname;
{
  register char *basename, *unixname, *local_ptr, *first_slash;
  int f, check_filename_before_returning, must_revert;
  char Local[512];

  check_filename_before_returning = 0;
  must_revert = 0;
  /* See if we can find a 1st slash. If not, there's no path information.  */
  first_slash = index (fullname, '/');
  if (first_slash == 0)
    return 0;				/* Nothing to do!!! */

  /* construct device spec if none given.  */

  if (index (fullname, ':') == 0)
    {

      /* If fullname has a slash, take it as device spec.  */

      if (first_slash == fullname)
	{
	  first_slash = index (fullname+1, '/');	/* 2nd slash ? */
	  if (first_slash)
	    *first_slash = ':';				/* make device spec  */
	  for (basename = fullname; *basename != 0; basename++)
	    *basename = *(basename+1);			/* remove leading slash  */
	}
      else if ((first_slash[-1] != '.')		/* keep ':/', './' */
	    && (first_slash[-1] != ':')
	    && (first_slash[-1] != ']'))	/* or a vms path  */
	{
	  *first_slash = ':';
	}
      else if ((first_slash[1] == '[')		/* skip './' in './[dir'  */
	    && (first_slash[-1] == '.'))
	fullname += 2;
    }

  /* Get part after first ':' (basename[-1] == ':')
     or last '/' (basename[-1] == '/').  */

  basename = base_name (fullname);

  local_ptr = Local;			/* initialize */

  /* We are trying to do a number of things here.  First of all, we are
     trying to hammer the filenames into a standard format, such that later
     processing can handle them.
     
     If the file name contains something like [dir.], then it recognizes this
     as a root, and strips the ".]".  Later processing will add whatever is
     needed to get things working properly.
     
     If no device is specified, then the first directory name is taken to be
     a device name (or a rooted logical).  */

  /* Point to the UNIX filename part (which needs to be fixed!)
     but skip vms path information.
     [basename != fullname since first_slash != 0].  */

  if ((basename[-1] == ':')		/* vms path spec.  */
      || (basename[-1] == ']')
      || (basename[-1] == '>'))
    unixname = basename;
  else
    unixname = fullname;

  if (*unixname == '/')
    unixname++;

  /* If the directory spec is not rooted, we can just copy
     the UNIX filename part and we are done.  */

  if (((basename - fullname) > 1)
     && (  (basename[-1] == ']')
        || (basename[-1] == '>')))
    {
      if (basename[-2] != '.')
	{

	/* The VMS part ends in a `]', and the preceding character is not a `.'.
	   -> PATH]:/name (basename = '/name', unixname = 'name')
	   We strip the `]', and then splice the two parts of the name in the
	   usual way.  Given the default locations for include files in cccp.c,
	   we will only use this code if the user specifies alternate locations
	   with the /include (-I) switch on the command line.  */

	  basename -= 1;	/* Strip "]" */
	  unixname--;		/* backspace */
	}
      else
	{

	/* The VMS part has a ".]" at the end, and this will not do.  Later
	   processing will add a second directory spec, and this would be a syntax
	   error.  Thus we strip the ".]", and thus merge the directory specs.
	   We also backspace unixname, so that it points to a '/'.  This inhibits the
	   generation of the 000000 root directory spec (which does not belong here
	   in this case).  */

	  basename -= 2;	/* Strip ".]" */
	  unixname--;		/* backspace */
	}
    }

  else

    {

      /* We drop in here if there is no VMS style directory specification yet.
         If there is no device specification either, we make the first dir a
         device and try that.  If we do not do this, then we will be essentially
         searching the users default directory (as if they did a #include "asdf.h").
        
         Then all we need to do is to push a '[' into the output string. Later
         processing will fill this in, and close the bracket.  */

      if ((unixname != fullname)	/* vms path spec found.  */
	 && (basename[-1] != ':'))
	*local_ptr++ = ':';		/* dev not in spec.  take first dir */

      *local_ptr++ = '[';		/* Open the directory specification */
    }

    if (unixname == fullname)		/* no vms dir spec.  */
      {
	must_revert = 1;
	if ((first_slash != 0)		/* unix dir spec.  */
	    && (*unixname != '/')	/* not beginning with '/'  */
	    && (*unixname != '.'))	/* or './' or '../'  */
	  *local_ptr++ = '.';		/* dir is local !  */
      }

  /* at this point we assume that we have the device spec, and (at least
     the opening "[" for a directory specification.  We may have directories
     specified already.

     If there are no other slashes then the filename will be
     in the "root" directory.  Otherwise, we need to add
     directory specifications.  */

  if (index (unixname, '/') == 0)
    {
      /* if no directories specified yet and none are following.  */
      if (local_ptr[-1] == '[')
	{
	  /* Just add "000000]" as the directory string */
	  strcpy (local_ptr, "000000]");
	  local_ptr += strlen (local_ptr);
	  check_filename_before_returning = 1; /* we might need to fool with this later */
	}
    }
  else
    {

      /* As long as there are still subdirectories to add, do them.  */
      while (index (unixname, '/') != 0)
	{
	  /* If this token is "." we can ignore it
	       if it's not at the beginning of a path.  */
	  if ((unixname[0] == '.') && (unixname[1] == '/'))
	    {
	      /* remove it at beginning of path.  */
	      if (  ((unixname == fullname)		/* no device spec  */
		    && (fullname+2 != basename))	/* starts with ./ */
							/* or  */
		 || ((basename[-1] == ':')		/* device spec  */
		    && (unixname-1 == basename)))	/* and ./ afterwards  */
		*local_ptr++ = '.';		 	/* make '[.' start of path.  */
	      unixname += 2;
	      continue;
	    }

	  /* Add a subdirectory spec. Do not duplicate "." */
	  if (  local_ptr[-1] != '.'
	     && local_ptr[-1] != '['
	     && local_ptr[-1] != '<')
	    *local_ptr++ = '.';

	  /* If this is ".." then the spec becomes "-" */
	  if (  (unixname[0] == '.')
	     && (unixname[1] == '.')
	     && (unixname[2] == '/'))
	    {
	      /* Add "-" and skip the ".." */
	      if ((local_ptr[-1] == '.')
		  && (local_ptr[-2] == '['))
		local_ptr--;			/* prevent [.-  */
	      *local_ptr++ = '-';
	      unixname += 3;
	      continue;
	    }

	  /* Copy the subdirectory */
	  while (*unixname != '/')
	    *local_ptr++= *unixname++;

	  unixname++;			/* Skip the "/" */
	}

      /* Close the directory specification */
      if (local_ptr[-1] == '.')		/* no trailing periods */
	local_ptr--;

      if (local_ptr[-1] == '[')		/* no dir needed */
	local_ptr--;
      else
	*local_ptr++ = ']';
    }

  /* Now add the filename.  */

  while (*unixname)
    *local_ptr++ = *unixname++;
  *local_ptr = 0;

  /* Now append it to the original VMS spec.  */

  strcpy ((must_revert==1)?fullname:basename, Local);

  /* If we put a [000000] in the filename, try to open it first. If this fails,
     remove the [000000], and return that name.  This provides flexibility
     to the user in that they can use both rooted and non-rooted logical names
     to point to the location of the file.  */

  if (check_filename_before_returning)
    {
      f = open (fullname, O_RDONLY, 0666);
      if (f >= 0)
	{
	  /* The file name is OK as it is, so return it as is.  */
	  close (f);
	  return 1;
	}

      /* The filename did not work.  Try to remove the [000000] from the name,
	 and return it.  */

      basename = index (fullname, '[');
      local_ptr = index (fullname, ']') + 1;
      strcpy (basename, local_ptr);		/* this gets rid of it */

    }

  return 1;
}
#endif	/* VMS */