archive_read_support_compression_uu.c   [plain text]


/*-
 * Copyright (c) 2009 Michihiro NAKAJIMA
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "archive_platform.h"
__FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_compression_uu.c 201248 2009-12-30 06:12:03Z kientzle $");

#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif

#include "archive.h"
#include "archive_private.h"
#include "archive_read_private.h"

struct uudecode {
	int64_t		 total;
	unsigned char	*in_buff;
#define IN_BUFF_SIZE	(1024)
	int		 in_cnt;
	size_t		 in_allocated;
	unsigned char	*out_buff;
#define OUT_BUFF_SIZE	(64 * 1024)
	int		 state;
#define ST_FIND_HEAD	0
#define ST_READ_UU	1
#define ST_UUEND	2
#define ST_READ_BASE64	3
};

static int	uudecode_bidder_bid(struct archive_read_filter_bidder *,
		    struct archive_read_filter *filter);
static int	uudecode_bidder_init(struct archive_read_filter *);

static ssize_t	uudecode_filter_read(struct archive_read_filter *,
		    const void **);
static int	uudecode_filter_close(struct archive_read_filter *);

int
archive_read_support_compression_uu(struct archive *_a)
{
	struct archive_read *a = (struct archive_read *)_a;
	struct archive_read_filter_bidder *bidder;

	bidder = __archive_read_get_bidder(a);
	archive_clear_error(_a);
	if (bidder == NULL)
		return (ARCHIVE_FATAL);

	bidder->data = NULL;
	bidder->bid = uudecode_bidder_bid;
	bidder->init = uudecode_bidder_init;
	bidder->options = NULL;
	bidder->free = NULL;
	return (ARCHIVE_OK);
}

static const unsigned char ascii[256] = {
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '\n', 0, 0, '\r', 0, 0, /* 00 - 0F */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 20 - 2F */
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 30 - 3F */
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 50 - 5F */
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 60 - 6F */
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 70 - 7F */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */
};

static const unsigned char uuchar[256] = {
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00 - 0F */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 20 - 2F */
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 30 - 3F */
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 50 - 5F */
	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60 - 6F */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 70 - 7F */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */
};

static const unsigned char base64[256] = {
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00 - 0F */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, /* 20 - 2F */
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 30 - 3F */
	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 50 - 5F */
	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 60 - 6F */
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 70 - 7F */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */
};

static const int base64num[128] = {
	 0,  0,  0,  0,  0,  0,  0,  0,
	 0,  0,  0,  0,  0,  0,  0,  0, /* 00 - 0F */
	 0,  0,  0,  0,  0,  0,  0,  0,
	 0,  0,  0,  0,  0,  0,  0,  0, /* 10 - 1F */
	 0,  0,  0,  0,  0,  0,  0,  0,
	 0,  0,  0, 62,  0,  0,  0, 63, /* 20 - 2F */
	52, 53, 54, 55, 56, 57, 58, 59,
	60, 61,  0,  0,  0,  0,  0,  0, /* 30 - 3F */
	 0,  0,  1,  2,  3,  4,  5,  6,
	 7,  8,  9, 10, 11, 12, 13, 14, /* 40 - 4F */
	15, 16, 17, 18, 19, 20, 21, 22,
	23, 24, 25,  0,  0,  0,  0,  0, /* 50 - 5F */
	 0, 26, 27, 28, 29, 30, 31, 32,
	33, 34, 35, 36, 37, 38, 39, 40, /* 60 - 6F */
	41, 42, 43, 44, 45, 46, 47, 48,
	49, 50, 51,  0,  0,  0,  0,  0, /* 70 - 7F */
};

static ssize_t
get_line(const unsigned char *b, ssize_t avail, ssize_t *nlsize)
{
	ssize_t len;

	len = 0;
	while (len < avail) {
		switch (ascii[*b]) {
		case 0:	/* Non-ascii character or control character. */
			if (nlsize != NULL)
				*nlsize = 0;
			return (-1);
		case '\r':
			if (avail-len > 1 && b[1] == '\n') {
				if (nlsize != NULL)
					*nlsize = 2;
				return (len+2);
			}
			/* FALL THROUGH */
		case '\n':
			if (nlsize != NULL)
				*nlsize = 1;
			return (len+1);
		case 1:
			b++;
			len++;
			break;
		}
	}
	if (nlsize != NULL)
		*nlsize = 0;
	return (avail);
}

static ssize_t
bid_get_line(struct archive_read_filter *filter,
    const unsigned char **b, ssize_t *avail, ssize_t *ravail, ssize_t *nl)
{
	ssize_t len;
	int quit;
	
	quit = 0;
	if (*avail == 0) {
		*nl = 0;
		len = 0;
	} else
		len = get_line(*b, *avail, nl);
	/*
	 * Read bytes more while it does not reach the end of line.
	 */
	while (*nl == 0 && len == *avail && !quit) {
		ssize_t diff = *ravail - *avail;

		*b = __archive_read_filter_ahead(filter, 160 + *ravail, avail);
		if (*b == NULL) {
			if (*ravail >= *avail)
				return (0);
			/* Reading bytes reaches the end of file. */
			*b = __archive_read_filter_ahead(filter, *avail, avail);
			quit = 1;
		}
		*ravail = *avail;
		*b += diff;
		*avail -= diff;
		len = get_line(*b, *avail, nl);
	}
	return (len);
}

#define UUDECODE(c) (((c) - 0x20) & 0x3f)

static int
uudecode_bidder_bid(struct archive_read_filter_bidder *self,
    struct archive_read_filter *filter)
{
	const unsigned char *b;
	ssize_t avail, ravail;
	ssize_t len, nl;
	int l;
	int firstline;

	(void)self; /* UNUSED */

	b = __archive_read_filter_ahead(filter, 1, &avail);
	if (b == NULL)
		return (0);

	firstline = 20;
	ravail = avail;
	for (;;) {
		len = bid_get_line(filter, &b, &avail, &ravail, &nl);
		if (len < 0 || nl == 0)
			return (0);/* Binary data. */
		if (memcmp(b, "begin ", 6) == 0 && len - nl >= 11)
			l = 6;
		else if (memcmp(b, "begin-base64 ", 13) == 0 && len - nl >= 18)
			l = 13;
		else
			l = 0;

		if (l > 0 && (b[l] < '0' || b[l] > '7' ||
		    b[l+1] < '0' || b[l+1] > '7' ||
		    b[l+2] < '0' || b[l+2] > '7' || b[l+3] != ' '))
			l = 0;

		b += len;
		avail -= len;
		if (l)
			break;
		firstline = 0;
	}
	if (!avail)
		return (0);
	len = bid_get_line(filter, &b, &avail, &ravail, &nl);
	if (len < 0 || nl == 0)
		return (0);/* There are non-ascii characters. */
	avail -= len;

	if (l == 6) {
		if (!uuchar[*b])
			return (0);
		/* Get a length of decoded bytes. */
		l = UUDECODE(*b++); len--;
		if (l > 45)
			/* Normally, maximum length is 45(character 'M'). */
			return (0);
		while (l && len-nl > 0) {
			if (l > 0) {
				if (!uuchar[*b++])
					return (0);
				if (!uuchar[*b++])
					return (0);
				len -= 2;
				--l;
			}
			if (l > 0) {
				if (!uuchar[*b++])
					return (0);
				--len;
				--l;
			}
			if (l > 0) {
				if (!uuchar[*b++])
					return (0);
				--len;
				--l;
			}
		}
		if (len-nl < 0)
			return (0);
		if (len-nl == 1 &&
		    (uuchar[*b] ||		 /* Check sum. */
		     (*b >= 'a' && *b <= 'z'))) {/* Padding data(MINIX). */
			++b;
			--len;
		}
		b += nl;
		if (avail && uuchar[*b])
			return (firstline+30);
	}
	if (l == 13) {
		while (len-nl > 0) {
			if (!base64[*b++])
				return (0);
			--len;
		}
		b += nl;

		if (avail >= 5 && memcmp(b, "====\n", 5) == 0)
			return (firstline+40);
		if (avail >= 6 && memcmp(b, "====\r\n", 6) == 0)
			return (firstline+40);
		if (avail > 0 && base64[*b])
			return (firstline+30);
	}

	return (0);
}

static int
uudecode_bidder_init(struct archive_read_filter *self)
{
	struct uudecode   *uudecode;
	void *out_buff;
	void *in_buff;

	self->code = ARCHIVE_COMPRESSION_UU;
	self->name = "uu";
	self->read = uudecode_filter_read;
	self->skip = NULL; /* not supported */
	self->close = uudecode_filter_close;

	uudecode = (struct uudecode *)calloc(sizeof(*uudecode), 1);
	out_buff = malloc(OUT_BUFF_SIZE);
	in_buff = malloc(IN_BUFF_SIZE);
	if (uudecode == NULL || out_buff == NULL || in_buff == NULL) {
		archive_set_error(&self->archive->archive, ENOMEM,
		    "Can't allocate data for uudecode");
		free(uudecode);
		free(out_buff);
		free(in_buff);
		return (ARCHIVE_FATAL);
	}

	self->data = uudecode;
	uudecode->in_buff = in_buff;
	uudecode->in_cnt = 0;
	uudecode->in_allocated = IN_BUFF_SIZE;
	uudecode->out_buff = out_buff;
	uudecode->state = ST_FIND_HEAD;

	return (ARCHIVE_OK);
}

static int
ensure_in_buff_size(struct archive_read_filter *self,
    struct uudecode *uudecode, size_t size)
{

	if (size > uudecode->in_allocated) {
		unsigned char *ptr;
		size_t newsize;

		newsize = uudecode->in_allocated << 1;
		ptr = malloc(newsize);
		if (ptr == NULL ||
		    newsize < uudecode->in_allocated) {
			free(ptr);
			archive_set_error(&self->archive->archive,
			    ENOMEM,
    			    "Can't allocate data for uudecode");
			return (ARCHIVE_FATAL);
		}
		if (uudecode->in_cnt)
			memmove(ptr, uudecode->in_buff,
			    uudecode->in_cnt);
		free(uudecode->in_buff);
		uudecode->in_buff = ptr;
		uudecode->in_allocated = newsize;
	}
	return (ARCHIVE_OK);
}

static ssize_t
uudecode_filter_read(struct archive_read_filter *self, const void **buff)
{
	struct uudecode *uudecode;
	const unsigned char *b, *d;
	unsigned char *out;
	ssize_t avail_in, ravail;
	ssize_t used;
	ssize_t total;
	ssize_t len, llen, nl;

	uudecode = (struct uudecode *)self->data;

read_more:
	d = __archive_read_filter_ahead(self->upstream, 1, &avail_in);
	if (d == NULL && avail_in < 0)
		return (ARCHIVE_FATAL);
	/* Quiet a code analyzer; make sure avail_in must be zero
	 * when d is NULL. */
	if (d == NULL)
		avail_in = 0;
	used = 0;
	total = 0;
	out = uudecode->out_buff;
	ravail = avail_in;
	if (uudecode->in_cnt) {
		/*
		 * If there is remaining data which is saved by
		 * previous calling, use it first.
		 */
		if (ensure_in_buff_size(self, uudecode,
		    avail_in + uudecode->in_cnt) != ARCHIVE_OK)
			return (ARCHIVE_FATAL);
		memcpy(uudecode->in_buff + uudecode->in_cnt,
		    d, avail_in);
		d = uudecode->in_buff;
		avail_in += uudecode->in_cnt;
		uudecode->in_cnt = 0;
	}
	for (;used < avail_in; d += llen, used += llen) {
		int l, body;

		b = d;
		len = get_line(b, avail_in - used, &nl);
		if (len < 0) {
			/* Non-ascii character is found. */
			archive_set_error(&self->archive->archive,
			    ARCHIVE_ERRNO_MISC,
			    "Insufficient compressed data");
			return (ARCHIVE_FATAL);
		}
		llen = len;
		if (nl == 0) {
			/*
			 * Save remaining data which does not contain
			 * NL('\n','\r').
			 */
			if (ensure_in_buff_size(self, uudecode, len)
			    != ARCHIVE_OK)
				return (ARCHIVE_FATAL);
			if (uudecode->in_buff != b)
				memmove(uudecode->in_buff, b, len);
			uudecode->in_cnt = len;
			if (total == 0) {
				/* Do not return 0; it means end-of-file.
				 * We should try to read bytes more. */
				__archive_read_filter_consume(
				    self->upstream, ravail);
				goto read_more;
			}
			break;
		}
		if (total + len * 2 > OUT_BUFF_SIZE)
			break;
		switch (uudecode->state) {
		default:
		case ST_FIND_HEAD:
			if (len - nl > 13 && memcmp(b, "begin ", 6) == 0)
				l = 6;
			else if (len - nl > 18 &&
			    memcmp(b, "begin-base64 ", 13) == 0)
				l = 13;
			else
				l = 0;
			if (l != 0 && b[l] >= '0' && b[l] <= '7' &&
			    b[l+1] >= '0' && b[l+1] <= '7' &&
			    b[l+2] >= '0' && b[l+2] <= '7' && b[l+3] == ' ') {
				if (l == 6)
					uudecode->state = ST_READ_UU;
				else
					uudecode->state = ST_READ_BASE64;
			}
			break;
		case ST_READ_UU:
			body = len - nl;
			if (!uuchar[*b] || body <= 0) {
				archive_set_error(&self->archive->archive,
				    ARCHIVE_ERRNO_MISC,
				    "Insufficient compressed data");
				return (ARCHIVE_FATAL);
			}
			/* Get length of undecoded bytes of curent line. */
			l = UUDECODE(*b++);
			body--;
			if (l > body) {
				archive_set_error(&self->archive->archive,
				    ARCHIVE_ERRNO_MISC,
				    "Insufficient compressed data");
				return (ARCHIVE_FATAL);
			}
			if (l == 0) {
				uudecode->state = ST_UUEND;
				break;
			}
			while (l > 0) {
				int n = 0;

				if (l > 0) {
					if (!uuchar[b[0]] || !uuchar[b[1]])
						break;
					n = UUDECODE(*b++) << 18;
					n |= UUDECODE(*b++) << 12;
					*out++ = n >> 16; total++;
					--l;
				}
				if (l > 0) {
					if (!uuchar[b[0]])
						break;
					n |= UUDECODE(*b++) << 6;
					*out++ = (n >> 8) & 0xFF; total++;
					--l;
				}
				if (l > 0) {
					if (!uuchar[b[0]])
						break;
					n |= UUDECODE(*b++);
					*out++ = n & 0xFF; total++;
					--l;
				}
			}
			if (l) {
				archive_set_error(&self->archive->archive,
				    ARCHIVE_ERRNO_MISC,
				    "Insufficient compressed data");
				return (ARCHIVE_FATAL);
			}
			break;
		case ST_UUEND:
			if (len - nl == 3 && memcmp(b, "end ", 3) == 0)
				uudecode->state = ST_FIND_HEAD;
			else {
				archive_set_error(&self->archive->archive,
				    ARCHIVE_ERRNO_MISC,
				    "Insufficient compressed data");
				return (ARCHIVE_FATAL);
			}
			break;
		case ST_READ_BASE64:
			l = len - nl;
			if (l >= 3 && b[0] == '=' && b[1] == '=' &&
			    b[2] == '=') {
				uudecode->state = ST_FIND_HEAD;
				break;
			}
			while (l > 0) {
				int n = 0;

				if (l > 0) {
					if (!base64[b[0]] || !base64[b[1]])
						break;
					n = base64num[*b++] << 18;
					n |= base64num[*b++] << 12;
					*out++ = n >> 16; total++;
					l -= 2;
				}
				if (l > 0) {
					if (*b == '=')
						break;
					if (!base64[*b])
						break;
					n |= base64num[*b++] << 6;
					*out++ = (n >> 8) & 0xFF; total++;
					--l;
				}
				if (l > 0) {
					if (*b == '=')
						break;
					if (!base64[*b])
						break;
					n |= base64num[*b++];
					*out++ = n & 0xFF; total++;
					--l;
				}
			}
			if (l && *b != '=') {
				archive_set_error(&self->archive->archive,
				    ARCHIVE_ERRNO_MISC,
				    "Insufficient compressed data");
				return (ARCHIVE_FATAL);
			}
			break;
		}
	}

	__archive_read_filter_consume(self->upstream, ravail);

	*buff = uudecode->out_buff;
	uudecode->total += total;
	return (total);
}

static int
uudecode_filter_close(struct archive_read_filter *self)
{
	struct uudecode *uudecode;

	uudecode = (struct uudecode *)self->data;
	free(uudecode->in_buff);
	free(uudecode->out_buff);
	free(uudecode);

	return (ARCHIVE_OK);
}