archive_read_support_format_ar.c   [plain text]


/*-
 * Copyright (c) 2007 Kai Wang
 * Copyright (c) 2007 Tim Kientzle
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer
 *    in this position and unchanged.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "archive_platform.h"
__FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_ar.c 201101 2009-12-28 03:06:27Z kientzle $");

#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_LIMITS_H
#include <limits.h>
#endif

#include "archive.h"
#include "archive_entry.h"
#include "archive_private.h"
#include "archive_read_private.h"

struct ar {
	off_t	 entry_bytes_remaining;
	off_t	 entry_offset;
	off_t	 entry_padding;
	char	*strtab;
	size_t	 strtab_size;
};

/*
 * Define structure of the "ar" header.
 */
#define AR_name_offset 0
#define AR_name_size 16
#define AR_date_offset 16
#define AR_date_size 12
#define AR_uid_offset 28
#define AR_uid_size 6
#define AR_gid_offset 34
#define AR_gid_size 6
#define AR_mode_offset 40
#define AR_mode_size 8
#define AR_size_offset 48
#define AR_size_size 10
#define AR_fmag_offset 58
#define AR_fmag_size 2

static int	archive_read_format_ar_bid(struct archive_read *a);
static int	archive_read_format_ar_cleanup(struct archive_read *a);
static int	archive_read_format_ar_read_data(struct archive_read *a,
		    const void **buff, size_t *size, off_t *offset);
static int	archive_read_format_ar_skip(struct archive_read *a);
static int	archive_read_format_ar_read_header(struct archive_read *a,
		    struct archive_entry *e);
static uint64_t	ar_atol8(const char *p, unsigned char_cnt);
static uint64_t	ar_atol10(const char *p, unsigned char_cnt);
static int	ar_parse_gnu_filename_table(struct archive_read *a);
static int	ar_parse_common_header(struct ar *ar, struct archive_entry *,
		    const char *h);

int
archive_read_support_format_ar(struct archive *_a)
{
	struct archive_read *a = (struct archive_read *)_a;
	struct ar *ar;
	int r;

	ar = (struct ar *)malloc(sizeof(*ar));
	if (ar == NULL) {
		archive_set_error(&a->archive, ENOMEM,
		    "Can't allocate ar data");
		return (ARCHIVE_FATAL);
	}
	memset(ar, 0, sizeof(*ar));
	ar->strtab = NULL;

	r = __archive_read_register_format(a,
	    ar,
	    "ar",
	    archive_read_format_ar_bid,
	    NULL,
	    archive_read_format_ar_read_header,
	    archive_read_format_ar_read_data,
	    archive_read_format_ar_skip,
	    archive_read_format_ar_cleanup);

	if (r != ARCHIVE_OK) {
		free(ar);
		return (r);
	}
	return (ARCHIVE_OK);
}

static int
archive_read_format_ar_cleanup(struct archive_read *a)
{
	struct ar *ar;

	ar = (struct ar *)(a->format->data);
	if (ar->strtab)
		free(ar->strtab);
	free(ar);
	(a->format->data) = NULL;
	return (ARCHIVE_OK);
}

static int
archive_read_format_ar_bid(struct archive_read *a)
{
	const void *h;

	if (a->archive.archive_format != 0 &&
	    (a->archive.archive_format & ARCHIVE_FORMAT_BASE_MASK) !=
	    ARCHIVE_FORMAT_AR)
		return(0);

	/*
	 * Verify the 8-byte file signature.
	 * TODO: Do we need to check more than this?
	 */
	if ((h = __archive_read_ahead(a, 8, NULL)) == NULL)
		return (-1);
	if (strncmp((const char*)h, "!<arch>\n", 8) == 0) {
		return (64);
	}
	return (-1);
}

static int
archive_read_format_ar_read_header(struct archive_read *a,
    struct archive_entry *entry)
{
	char filename[AR_name_size + 1];
	struct ar *ar;
	uint64_t number; /* Used to hold parsed numbers before validation. */
	ssize_t bytes_read;
	size_t bsd_name_length, entry_size;
	char *p, *st;
	const void *b;
	const char *h;
	int r;

	ar = (struct ar*)(a->format->data);

	if (a->archive.file_position == 0) {
		/*
		 * We are now at the beginning of the archive,
		 * so we need first consume the ar global header.
		 */
		__archive_read_consume(a, 8);
		/* Set a default format code for now. */
		a->archive.archive_format = ARCHIVE_FORMAT_AR;
	}

	/* Read the header for the next file entry. */
	if ((b = __archive_read_ahead(a, 60, &bytes_read)) == NULL)
		/* Broken header. */
		return (ARCHIVE_EOF);
	__archive_read_consume(a, 60);
	h = (const char *)b;

	/* Verify the magic signature on the file header. */
	if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) {
		archive_set_error(&a->archive, EINVAL,
		    "Incorrect file header signature");
		return (ARCHIVE_WARN);
	}

	/* Copy filename into work buffer. */
	strncpy(filename, h + AR_name_offset, AR_name_size);
	filename[AR_name_size] = '\0';

	/*
	 * Guess the format variant based on the filename.
	 */
	if (a->archive.archive_format == ARCHIVE_FORMAT_AR) {
		/* We don't already know the variant, so let's guess. */
		/*
		 * Biggest clue is presence of '/': GNU starts special
		 * filenames with '/', appends '/' as terminator to
		 * non-special names, so anything with '/' should be
		 * GNU except for BSD long filenames.
		 */
		if (strncmp(filename, "#1/", 3) == 0)
			a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
		else if (strchr(filename, '/') != NULL)
			a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU;
		else if (strncmp(filename, "__.SYMDEF", 9) == 0)
			a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
		/*
		 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/'
		 * if name exactly fills 16-byte field?  If so, we
		 * can't assume entries without '/' are BSD. XXX
		 */
	}

	/* Update format name from the code. */
	if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU)
		a->archive.archive_format_name = "ar (GNU/SVR4)";
	else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD)
		a->archive.archive_format_name = "ar (BSD)";
	else
		a->archive.archive_format_name = "ar";

	/*
	 * Remove trailing spaces from the filename.  GNU and BSD
	 * variants both pad filename area out with spaces.
	 * This will only be wrong if GNU/SVR4 'ar' implementations
	 * omit trailing '/' for 16-char filenames and we have
	 * a 16-char filename that ends in ' '.
	 */
	p = filename + AR_name_size - 1;
	while (p >= filename && *p == ' ') {
		*p = '\0';
		p--;
	}

	/*
	 * Remove trailing slash unless first character is '/'.
	 * (BSD entries never end in '/', so this will only trim
	 * GNU-format entries.  GNU special entries start with '/'
	 * and are not terminated in '/', so we don't trim anything
	 * that starts with '/'.)
	 */
	if (filename[0] != '/' && *p == '/')
		*p = '\0';

	/*
	 * '//' is the GNU filename table.
	 * Later entries can refer to names in this table.
	 */
	if (strcmp(filename, "//") == 0) {
		/* This must come before any call to _read_ahead. */
		ar_parse_common_header(ar, entry, h);
		archive_entry_copy_pathname(entry, filename);
		archive_entry_set_filetype(entry, AE_IFREG);
		/* Get the size of the filename table. */
		number = ar_atol10(h + AR_size_offset, AR_size_size);
		if (number > SIZE_MAX) {
			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
			    "Filename table too large");
			return (ARCHIVE_FATAL);
		}
		entry_size = (size_t)number;
		if (entry_size == 0) {
			archive_set_error(&a->archive, EINVAL,
			    "Invalid string table");
			return (ARCHIVE_WARN);
		}
		if (ar->strtab != NULL) {
			archive_set_error(&a->archive, EINVAL,
			    "More than one string tables exist");
			return (ARCHIVE_WARN);
		}

		/* Read the filename table into memory. */
		st = malloc(entry_size);
		if (st == NULL) {
			archive_set_error(&a->archive, ENOMEM,
			    "Can't allocate filename table buffer");
			return (ARCHIVE_FATAL);
		}
		ar->strtab = st;
		ar->strtab_size = entry_size;
		if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL)
			return (ARCHIVE_FATAL);
		memcpy(st, b, entry_size);
		__archive_read_consume(a, entry_size);
		/* All contents are consumed. */
		ar->entry_bytes_remaining = 0;
		archive_entry_set_size(entry, ar->entry_bytes_remaining);

		/* Parse the filename table. */
		return (ar_parse_gnu_filename_table(a));
	}

	/*
	 * GNU variant handles long filenames by storing /<number>
	 * to indicate a name stored in the filename table.
	 * XXX TODO: Verify that it's all digits... Don't be fooled
	 * by "/9xyz" XXX
	 */
	if (filename[0] == '/' && filename[1] >= '0' && filename[1] <= '9') {
		number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1);
		/*
		 * If we can't look up the real name, warn and return
		 * the entry with the wrong name.
		 */
		if (ar->strtab == NULL || number > ar->strtab_size) {
			archive_set_error(&a->archive, EINVAL,
			    "Can't find long filename for entry");
			archive_entry_copy_pathname(entry, filename);
			/* Parse the time, owner, mode, size fields. */
			ar_parse_common_header(ar, entry, h);
			return (ARCHIVE_WARN);
		}

		archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]);
		/* Parse the time, owner, mode, size fields. */
		return (ar_parse_common_header(ar, entry, h));
	}

	/*
	 * BSD handles long filenames by storing "#1/" followed by the
	 * length of filename as a decimal number, then prepends the
	 * the filename to the file contents.
	 */
	if (strncmp(filename, "#1/", 3) == 0) {
		/* Parse the time, owner, mode, size fields. */
		/* This must occur before _read_ahead is called again. */
		ar_parse_common_header(ar, entry, h);

		/* Parse the size of the name, adjust the file size. */
		number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3);
		bsd_name_length = (size_t)number;
		/* Guard against the filename + trailing NUL
		 * overflowing a size_t and against the filename size
		 * being larger than the entire entry. */
		if (number > (uint64_t)(bsd_name_length + 1)
		    || (off_t)bsd_name_length > ar->entry_bytes_remaining) {
			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
			    "Bad input file size");
			return (ARCHIVE_FATAL);
		}
		ar->entry_bytes_remaining -= bsd_name_length;
		/* Adjust file size reported to client. */
		archive_entry_set_size(entry, ar->entry_bytes_remaining);

		/* Read the long name into memory. */
		if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) {
			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
			    "Truncated input file");
			return (ARCHIVE_FATAL);
		}
		__archive_read_consume(a, bsd_name_length);

		/* Store it in the entry. */
		p = (char *)malloc(bsd_name_length + 1);
		if (p == NULL) {
			archive_set_error(&a->archive, ENOMEM,
			    "Can't allocate fname buffer");
			return (ARCHIVE_FATAL);
		}
		strncpy(p, b, bsd_name_length);
		p[bsd_name_length] = '\0';
		archive_entry_copy_pathname(entry, p);
		free(p);
		return (ARCHIVE_OK);
	}

	/*
	 * "/" is the SVR4/GNU archive symbol table.
	 */
	if (strcmp(filename, "/") == 0) {
		archive_entry_copy_pathname(entry, "/");
		/* Parse the time, owner, mode, size fields. */
		r = ar_parse_common_header(ar, entry, h);
		/* Force the file type to a regular file. */
		archive_entry_set_filetype(entry, AE_IFREG);
		return (r);
	}

	/*
	 * "__.SYMDEF" is a BSD archive symbol table.
	 */
	if (strcmp(filename, "__.SYMDEF") == 0) {
		archive_entry_copy_pathname(entry, filename);
		/* Parse the time, owner, mode, size fields. */
		return (ar_parse_common_header(ar, entry, h));
	}

	/*
	 * Otherwise, this is a standard entry.  The filename
	 * has already been trimmed as much as possible, based
	 * on our current knowledge of the format.
	 */
	archive_entry_copy_pathname(entry, filename);
	return (ar_parse_common_header(ar, entry, h));
}

static int
ar_parse_common_header(struct ar *ar, struct archive_entry *entry,
    const char *h)
{
	uint64_t n;

	/* Copy remaining header */
	archive_entry_set_mtime(entry,
	    (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L);
	archive_entry_set_uid(entry,
	    (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size));
	archive_entry_set_gid(entry,
	    (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size));
	archive_entry_set_mode(entry,
	    (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size));
	n = ar_atol10(h + AR_size_offset, AR_size_size);

	ar->entry_offset = 0;
	ar->entry_padding = n % 2;
	archive_entry_set_size(entry, n);
	ar->entry_bytes_remaining = n;
	return (ARCHIVE_OK);
}

static int
archive_read_format_ar_read_data(struct archive_read *a,
    const void **buff, size_t *size, off_t *offset)
{
	ssize_t bytes_read;
	struct ar *ar;

	ar = (struct ar *)(a->format->data);

	if (ar->entry_bytes_remaining > 0) {
		*buff = __archive_read_ahead(a, 1, &bytes_read);
		if (bytes_read == 0) {
			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
			    "Truncated ar archive");
			return (ARCHIVE_FATAL);
		}
		if (bytes_read < 0)
			return (ARCHIVE_FATAL);
		if (bytes_read > ar->entry_bytes_remaining)
			bytes_read = (ssize_t)ar->entry_bytes_remaining;
		*size = bytes_read;
		*offset = ar->entry_offset;
		ar->entry_offset += bytes_read;
		ar->entry_bytes_remaining -= bytes_read;
		__archive_read_consume(a, (size_t)bytes_read);
		return (ARCHIVE_OK);
	} else {
		while (ar->entry_padding > 0) {
			*buff = __archive_read_ahead(a, 1, &bytes_read);
			if (bytes_read <= 0)
				return (ARCHIVE_FATAL);
			if (bytes_read > ar->entry_padding)
				bytes_read = (ssize_t)ar->entry_padding;
			__archive_read_consume(a, (size_t)bytes_read);
			ar->entry_padding -= bytes_read;
		}
		*buff = NULL;
		*size = 0;
		*offset = ar->entry_offset;
		return (ARCHIVE_EOF);
	}
}

static int
archive_read_format_ar_skip(struct archive_read *a)
{
	off_t bytes_skipped;
	struct ar* ar;

	ar = (struct ar *)(a->format->data);

	bytes_skipped = __archive_read_skip(a,
	    ar->entry_bytes_remaining + ar->entry_padding);
	if (bytes_skipped < 0)
		return (ARCHIVE_FATAL);

	ar->entry_bytes_remaining = 0;
	ar->entry_padding = 0;

	return (ARCHIVE_OK);
}

static int
ar_parse_gnu_filename_table(struct archive_read *a)
{
	struct ar *ar;
	char *p;
	size_t size;

	ar = (struct ar*)(a->format->data);
	size = ar->strtab_size;

	for (p = ar->strtab; p < ar->strtab + size - 1; ++p) {
		if (*p == '/') {
			*p++ = '\0';
			if (*p != '\n')
				goto bad_string_table;
			*p = '\0';
		}
	}
	/*
	 * GNU ar always pads the table to an even size.
	 * The pad character is either '\n' or '`'.
	 */
	if (p != ar->strtab + size && *p != '\n' && *p != '`')
		goto bad_string_table;

	/* Enforce zero termination. */
	ar->strtab[size - 1] = '\0';

	return (ARCHIVE_OK);

bad_string_table:
	archive_set_error(&a->archive, EINVAL,
	    "Invalid string table");
	free(ar->strtab);
	ar->strtab = NULL;
	return (ARCHIVE_WARN);
}

static uint64_t
ar_atol8(const char *p, unsigned char_cnt)
{
	uint64_t l, limit, last_digit_limit;
	unsigned int digit, base;

	base = 8;
	limit = UINT64_MAX / base;
	last_digit_limit = UINT64_MAX % base;

	while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
		p++;

	l = 0;
	digit = *p - '0';
	while (*p >= '0' && digit < base  && char_cnt-- > 0) {
		if (l>limit || (l == limit && digit > last_digit_limit)) {
			l = UINT64_MAX; /* Truncate on overflow. */
			break;
		}
		l = (l * base) + digit;
		digit = *++p - '0';
	}
	return (l);
}

static uint64_t
ar_atol10(const char *p, unsigned char_cnt)
{
	uint64_t l, limit, last_digit_limit;
	unsigned int base, digit;

	base = 10;
	limit = UINT64_MAX / base;
	last_digit_limit = UINT64_MAX % base;

	while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
		p++;
	l = 0;
	digit = *p - '0';
	while (*p >= '0' && digit < base  && char_cnt-- > 0) {
		if (l > limit || (l == limit && digit > last_digit_limit)) {
			l = UINT64_MAX; /* Truncate on overflow. */
			break;
		}
		l = (l * base) + digit;
		digit = *++p - '0';
	}
	return (l);
}