#include "file.h"
#include "magic.h"
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <string.h>
#include <ctype.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/param.h>
#ifdef QUICK
#include <sys/mman.h>
#endif
#ifndef lint
FILE_RCSID("@(#)$Id: apprentice.c,v 1.1 2003/07/02 18:01:22 eseidel Exp $")
#endif
#define EATAB {while (isascii((unsigned char) *l) && \
isspace((unsigned char) *l)) ++l;}
#define LOWCASE(l) (isupper((unsigned char) (l)) ? \
tolower((unsigned char) (l)) : (l))
#if defined(__osf__) && defined(__DECC)
#ifdef MAP_FAILED
#undef MAP_FAILED
#endif
#endif
#ifndef MAP_FAILED
#define MAP_FAILED (void *) -1
#endif
#ifndef MAP_FILE
#define MAP_FILE 0
#endif
#ifdef __EMX__
char PATHSEP=';';
#else
char PATHSEP=':';
#endif
private int getvalue(struct magic_set *ms, struct magic *, char **);
private int hextoint(int);
private char *getstr(struct magic_set *, char *, char *, int, int *);
private int parse(struct magic_set *, struct magic **, uint32_t *, char *, int);
private void eatsize(char **);
private int apprentice_1(struct magic_set *, const char *, int, struct mlist *);
private int apprentice_file(struct magic_set *, struct magic **, uint32_t *,
const char *, int);
private void byteswap(struct magic *, uint32_t);
private void bs1(struct magic *);
private uint16_t swap2(uint16_t);
private uint32_t swap4(uint32_t);
private char *mkdbname(const char *, char *, size_t);
private int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
const char *);
private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
const char *);
private size_t maxmagic = 0;
#ifdef COMPILE_ONLY
const char *magicfile;
char *progname;
int lineno;
int main(int, char *[]);
int
main(int argc, char *argv[])
{
int ret;
if ((progname = strrchr(argv[0], '/')) != NULL)
progname++;
else
progname = argv[0];
if (argc != 2) {
(void)fprintf(stderr, "usage: %s file\n", progname);
exit(1);
}
magicfile = argv[1];
exit(apprentice(magicfile, COMPILE, MAGIC_CHECK));
}
#endif
private int
apprentice_1(struct magic_set *ms, const char *fn, int action,
struct mlist *mlist)
{
struct magic *magic = NULL;
uint32_t nmagic = 0;
struct mlist *ml;
int rv = -1;
int mapped;
if (sizeof(*magic) != FILE_MAGICSIZE) {
file_error(ms, "Magic element size %lu != %lu",
(unsigned long)sizeof(*magic),
(unsigned long)FILE_MAGICSIZE);
return -1;
}
if (action == FILE_COMPILE) {
rv = apprentice_file(ms, &magic, &nmagic, fn, action);
if (rv == 0) {
rv = apprentice_compile(ms, &magic, &nmagic, fn);
free(magic);
}
return rv;
}
#ifndef COMPILE_ONLY
if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) {
if (ms->flags & MAGIC_CHECK)
file_magwarn("Using regular magic file `%s'", fn);
rv = apprentice_file(ms, &magic, &nmagic, fn, action);
mapped = 0;
}
if (rv == -1)
return rv;
mapped = rv;
if ((ml = malloc(sizeof(*ml))) == NULL) {
file_oomem(ms);
return -1;
}
if (magic == NULL || nmagic == 0)
return -1;
ml->magic = magic;
ml->nmagic = nmagic;
ml->mapped = mapped;
mlist->prev->next = ml;
ml->prev = mlist->prev;
ml->next = mlist;
mlist->prev = ml;
return 0;
#endif
}
protected struct mlist *
file_apprentice(struct magic_set *ms, const char *fn, int action)
{
char *p, *mfn;
int file_err, errs = -1;
struct mlist *mlist;
if ((fn = mfn = strdup(fn)) == NULL) {
file_oomem(ms);
return NULL;
}
if ((mlist = malloc(sizeof(*mlist))) == NULL) {
free(mfn);
file_oomem(ms);
return NULL;
}
mlist->next = mlist->prev = mlist;
while (fn) {
p = strchr(fn, PATHSEP);
if (p)
*p++ = '\0';
file_err = apprentice_1(ms, fn, action, mlist);
if (file_err > errs)
errs = file_err;
fn = p;
}
if (errs == -1) {
free(mfn);
free(mlist);
mlist = NULL;
file_error(ms, "Couldn't find any magic files!");
return NULL;
}
free(mfn);
return mlist;
}
private int
apprentice_file(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
const char *fn, int action)
{
private const char hdr[] =
"cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
FILE *f;
char line[BUFSIZ+1];
int lineno;
int errs = 0;
f = fopen(fn, "r");
if (f == NULL) {
if (errno != ENOENT)
file_error(ms, "Can't read magic file %s (%s)",
fn, strerror(errno));
return -1;
}
maxmagic = MAXMAGIS;
*magicp = (struct magic *) calloc(maxmagic, sizeof(struct magic));
if (*magicp == NULL) {
(void)fclose(f);
file_oomem(ms);
return -1;
}
if (action == FILE_CHECK)
(void) printf("%s\n", hdr);
for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++) {
if (line[0]=='#')
continue;
if (strlen(line) <= (unsigned)1)
continue;
line[strlen(line)-1] = '\0';
if (parse(ms, magicp, nmagicp, line, action) != 0)
errs = 1;
}
(void)fclose(f);
if (errs) {
free(*magicp);
*magicp = NULL;
*nmagicp = 0;
}
return errs;
}
protected uint32_t
file_signextend(struct magic_set *ms, struct magic *m, uint32_t v)
{
if (!(m->flag & UNSIGNED))
switch(m->type) {
case FILE_BYTE:
v = (char) v;
break;
case FILE_SHORT:
case FILE_BESHORT:
case FILE_LESHORT:
v = (short) v;
break;
case FILE_DATE:
case FILE_BEDATE:
case FILE_LEDATE:
case FILE_LDATE:
case FILE_BELDATE:
case FILE_LELDATE:
case FILE_LONG:
case FILE_BELONG:
case FILE_LELONG:
v = (int32_t) v;
break;
case FILE_STRING:
case FILE_PSTRING:
break;
case FILE_REGEX:
break;
default:
if (ms->flags & MAGIC_CHECK)
file_magwarn("can't happen: m->type=%d\n",
m->type);
return ~0U;
}
return v;
}
private int
parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l,
int action)
{
int i = 0;
struct magic *m;
char *t;
private const char *fops = FILE_OPS;
uint32_t val;
#define ALLOC_INCR 200
if (*nmagicp + 1 >= maxmagic){
maxmagic += ALLOC_INCR;
if ((m = (struct magic *) realloc(*magicp,
sizeof(struct magic) * maxmagic)) == NULL) {
file_oomem(ms);
if (*magicp)
free(*magicp);
return -1;
}
*magicp = m;
memset(&(*magicp)[*nmagicp], 0, sizeof(struct magic)
* ALLOC_INCR);
}
m = &(*magicp)[*nmagicp];
m->flag = 0;
m->cont_level = 0;
while (*l == '>') {
++l;
m->cont_level++;
}
if (m->cont_level != 0 && *l == '(') {
++l;
m->flag |= INDIR;
}
if (m->cont_level != 0 && *l == '&') {
++l;
m->flag |= OFFADD;
}
m->offset = (int) strtoul(l, &t, 0);
if (l == t)
if (ms->flags & MAGIC_CHECK)
file_magwarn("offset %s invalid", l);
l = t;
if (m->flag & INDIR) {
m->in_type = FILE_LONG;
m->in_offset = 0;
if (*l == '.') {
l++;
switch (*l) {
case 'l':
m->in_type = FILE_LELONG;
break;
case 'L':
m->in_type = FILE_BELONG;
break;
case 'h':
case 's':
m->in_type = FILE_LESHORT;
break;
case 'H':
case 'S':
m->in_type = FILE_BESHORT;
break;
case 'c':
case 'b':
case 'C':
case 'B':
m->in_type = FILE_BYTE;
break;
default:
if (ms->flags & MAGIC_CHECK)
file_magwarn(
"indirect offset type %c invalid",
*l);
break;
}
l++;
}
if (*l == '~') {
m->in_op = FILE_OPINVERSE;
l++;
}
switch (*l) {
case '&':
m->in_op |= FILE_OPAND;
l++;
break;
case '|':
m->in_op |= FILE_OPOR;
l++;
break;
case '^':
m->in_op |= FILE_OPXOR;
l++;
break;
case '+':
m->in_op |= FILE_OPADD;
l++;
break;
case '-':
m->in_op |= FILE_OPMINUS;
l++;
break;
case '*':
m->in_op |= FILE_OPMULTIPLY;
l++;
break;
case '/':
m->in_op |= FILE_OPDIVIDE;
l++;
break;
case '%':
m->in_op |= FILE_OPMODULO;
l++;
break;
}
if (isdigit((unsigned char)*l))
m->in_offset = (uint32_t)strtoul(l, &t, 0);
else
t = l;
if (*t++ != ')')
if (ms->flags & MAGIC_CHECK)
file_magwarn("missing ')' in indirect offset");
l = t;
}
while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
++l;
EATAB;
#define NBYTE 4
#define NSHORT 5
#define NLONG 4
#define NSTRING 6
#define NDATE 4
#define NBESHORT 7
#define NBELONG 6
#define NBEDATE 6
#define NLESHORT 7
#define NLELONG 6
#define NLEDATE 6
#define NPSTRING 7
#define NLDATE 5
#define NBELDATE 7
#define NLELDATE 7
#define NREGEX 5
if (*l == 'u') {
++l;
m->flag |= UNSIGNED;
}
if (strncmp(l, "char", NBYTE)==0) {
m->type = FILE_BYTE;
l += NBYTE;
} else if (strncmp(l, "byte", NBYTE)==0) {
m->type = FILE_BYTE;
l += NBYTE;
} else if (strncmp(l, "short", NSHORT)==0) {
m->type = FILE_SHORT;
l += NSHORT;
} else if (strncmp(l, "long", NLONG)==0) {
m->type = FILE_LONG;
l += NLONG;
} else if (strncmp(l, "string", NSTRING)==0) {
m->type = FILE_STRING;
l += NSTRING;
} else if (strncmp(l, "date", NDATE)==0) {
m->type = FILE_DATE;
l += NDATE;
} else if (strncmp(l, "beshort", NBESHORT)==0) {
m->type = FILE_BESHORT;
l += NBESHORT;
} else if (strncmp(l, "belong", NBELONG)==0) {
m->type = FILE_BELONG;
l += NBELONG;
} else if (strncmp(l, "bedate", NBEDATE)==0) {
m->type = FILE_BEDATE;
l += NBEDATE;
} else if (strncmp(l, "leshort", NLESHORT)==0) {
m->type = FILE_LESHORT;
l += NLESHORT;
} else if (strncmp(l, "lelong", NLELONG)==0) {
m->type = FILE_LELONG;
l += NLELONG;
} else if (strncmp(l, "ledate", NLEDATE)==0) {
m->type = FILE_LEDATE;
l += NLEDATE;
} else if (strncmp(l, "pstring", NPSTRING)==0) {
m->type = FILE_PSTRING;
l += NPSTRING;
} else if (strncmp(l, "ldate", NLDATE)==0) {
m->type = FILE_LDATE;
l += NLDATE;
} else if (strncmp(l, "beldate", NBELDATE)==0) {
m->type = FILE_BELDATE;
l += NBELDATE;
} else if (strncmp(l, "leldate", NLELDATE)==0) {
m->type = FILE_LELDATE;
l += NLELDATE;
} else if (strncmp(l, "regex", NREGEX)==0) {
m->type = FILE_REGEX;
l += sizeof("regex");
} else {
if (ms->flags & MAGIC_CHECK)
file_magwarn("type %s invalid", l);
return -1;
}
if (*l == '~') {
if (FILE_STRING != m->type && FILE_PSTRING != m->type)
m->mask_op = FILE_OPINVERSE;
++l;
}
if ((t = strchr(fops, *l)) != NULL) {
uint32_t op = (uint32_t)(t - fops);
if (op != FILE_OPDIVIDE ||
(FILE_STRING != m->type && FILE_PSTRING != m->type)) {
++l;
m->mask_op |= op;
val = (uint32_t)strtoul(l, &l, 0);
m->mask = file_signextend(ms, m, val);
eatsize(&l);
} else {
m->mask = 0L;
while (!isspace(*++l)) {
switch (*l) {
case CHAR_IGNORE_LOWERCASE:
m->mask |= STRING_IGNORE_LOWERCASE;
break;
case CHAR_COMPACT_BLANK:
m->mask |= STRING_COMPACT_BLANK;
break;
case CHAR_COMPACT_OPTIONAL_BLANK:
m->mask |=
STRING_COMPACT_OPTIONAL_BLANK;
break;
default:
if (ms->flags & MAGIC_CHECK)
file_magwarn(
"string extension %c invalid",
*l);
return -1;
}
}
}
}
EATAB;
switch (*l) {
case '>':
case '<':
case '&':
case '^':
case '=':
m->reln = *l;
++l;
if (*l == '=') {
++l;
}
break;
case '!':
if (m->type != FILE_STRING && m->type != FILE_PSTRING) {
m->reln = *l;
++l;
break;
}
default:
if (*l == 'x' && isascii((unsigned char)l[1]) &&
isspace((unsigned char)l[1])) {
m->reln = *l;
++l;
goto GetDesc;
}
m->reln = '=';
break;
}
EATAB;
if (getvalue(ms, m, &l))
return -1;
GetDesc:
EATAB;
if (l[0] == '\b') {
++l;
m->nospflag = 1;
} else if ((l[0] == '\\') && (l[1] == 'b')) {
++l;
++l;
m->nospflag = 1;
} else
m->nospflag = 0;
while ((m->desc[i++] = *l++) != '\0' && i < MAXDESC)
;
#ifndef COMPILE_ONLY
if (action == FILE_CHECK) {
file_mdump(m);
}
#endif
++(*nmagicp);
return 0;
}
private int
getvalue(struct magic_set *ms, struct magic *m, char **p)
{
int slen;
switch (m->type) {
case FILE_STRING:
case FILE_PSTRING:
case FILE_REGEX:
*p = getstr(ms, *p, m->value.s, sizeof(m->value.s), &slen);
if (*p == NULL)
return -1;
m->vallen = slen;
return 0;
default:
if (m->reln != 'x') {
m->value.l = file_signextend(ms, m,
(uint32_t)strtoul(*p, p, 0));
eatsize(p);
}
return 0;
}
}
private char *
getstr(struct magic_set *ms, char *s, char *p, int plen, int *slen)
{
char *origs = s, *origp = p;
char *pmax = p + plen - 1;
int c;
int val;
while ((c = *s++) != '\0') {
if (isspace((unsigned char) c))
break;
if (p >= pmax) {
file_error(ms, "String too long: `%s'", origs);
return NULL;
}
if(c == '\\') {
switch(c = *s++) {
case '\0':
goto out;
default:
*p++ = (char) c;
break;
case 'n':
*p++ = '\n';
break;
case 'r':
*p++ = '\r';
break;
case 'b':
*p++ = '\b';
break;
case 't':
*p++ = '\t';
break;
case 'f':
*p++ = '\f';
break;
case 'v':
*p++ = '\v';
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
val = c - '0';
c = *s++;
if(c >= '0' && c <= '7') {
val = (val<<3) | (c - '0');
c = *s++;
if(c >= '0' && c <= '7')
val = (val<<3) | (c-'0');
else
--s;
}
else
--s;
*p++ = (char)val;
break;
case 'x':
val = 'x';
c = hextoint(*s++);
if (c >= 0) {
val = c;
c = hextoint(*s++);
if (c >= 0)
val = (val << 4) + c;
else
--s;
} else
--s;
*p++ = (char)val;
break;
}
} else
*p++ = (char)c;
}
out:
*p = '\0';
*slen = p - origp;
return s;
}
private int
hextoint(int c)
{
if (!isascii((unsigned char) c))
return -1;
if (isdigit((unsigned char) c))
return c - '0';
if ((c >= 'a')&&(c <= 'f'))
return c + 10 - 'a';
if (( c>= 'A')&&(c <= 'F'))
return c + 10 - 'A';
return -1;
}
protected void
file_showstr(FILE *fp, const char *s, size_t len)
{
char c;
for (;;) {
c = *s++;
if (len == ~0U) {
if (c == '\0')
break;
}
else {
if (len-- == 0)
break;
}
if(c >= 040 && c <= 0176)
(void) fputc(c, fp);
else {
(void) fputc('\\', fp);
switch (c) {
case '\n':
(void) fputc('n', fp);
break;
case '\r':
(void) fputc('r', fp);
break;
case '\b':
(void) fputc('b', fp);
break;
case '\t':
(void) fputc('t', fp);
break;
case '\f':
(void) fputc('f', fp);
break;
case '\v':
(void) fputc('v', fp);
break;
default:
(void) fprintf(fp, "%.3o", c & 0377);
break;
}
}
}
}
private void
eatsize(char **p)
{
char *l = *p;
if (LOWCASE(*l) == 'u')
l++;
switch (LOWCASE(*l)) {
case 'l':
case 's':
case 'h':
case 'b':
case 'c':
l++;
default:
break;
}
*p = l;
}
private int
apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
const char *fn)
{
int fd;
struct stat st;
uint32_t *ptr;
uint32_t version;
int needsbyteswap;
char buf[MAXPATHLEN];
char *dbname = mkdbname(fn, buf, sizeof(buf));
void *mm;
if (dbname == NULL)
return -1;
if ((fd = open(dbname, O_RDONLY)) == -1)
return -1;
if (fstat(fd, &st) == -1) {
file_error(ms, "Cannot stat `%s' (%s)", dbname,
strerror(errno));
goto error;
}
#ifdef QUICK
if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
file_error(ms, "Cannot map `%s' (%s)", dbname, strerror(errno));
goto error;
}
#else
if ((mm = malloc((size_t)st.st_size)) == NULL) {
file_oomem(ms);
goto error;
}
if (read(fd, mm, (size_t)st.st_size) != (size_t)st.st_size) {
file_error(ms, "Read failed (%s)", strerror(errno));
goto error;
}
#endif
*magicp = mm;
(void)close(fd);
fd = -1;
ptr = (uint32_t *)(void *)*magicp;
if (*ptr != MAGICNO) {
if (swap4(*ptr) != MAGICNO) {
file_error(ms, "Bad magic in `%s'", dbname);
goto error;
}
needsbyteswap = 1;
} else
needsbyteswap = 0;
if (needsbyteswap)
version = swap4(ptr[1]);
else
version = ptr[1];
if (version != VERSIONNO) {
file_error(ms, "version mismatch (%d != %d) in `%s'",
version, VERSIONNO, dbname);
goto error;
}
*nmagicp = (uint32_t)(st.st_size / sizeof(struct magic)) - 1;
(*magicp)++;
if (needsbyteswap)
byteswap(*magicp, *nmagicp);
return 0;
error:
if (fd != -1)
(void)close(fd);
if (mm) {
#ifdef QUICK
(void)munmap(mm, (size_t)st.st_size);
#else
free(mm);
#endif
} else {
*magicp = NULL;
*nmagicp = 0;
}
return -1;
}
private const uint32_t ar[] = {
MAGICNO, VERSIONNO
};
private int
apprentice_compile(struct magic_set *ms, struct magic **magicp,
uint32_t *nmagicp, const char *fn)
{
int fd;
char buf[MAXPATHLEN];
char *dbname = mkdbname(fn, buf, sizeof(buf));
if (dbname == NULL)
return -1;
if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC, 0644)) == -1) {
file_error(ms, "Cannot open `%s' (%s)", dbname, strerror(errno));
return -1;
}
if (write(fd, ar, sizeof(ar)) != sizeof(ar)) {
file_error(ms, "Error writing `%s' (%s)", dbname,
strerror(errno));
return -1;
}
if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET)
!= sizeof(struct magic)) {
file_error(ms, "Error seeking `%s' (%s)", dbname,
strerror(errno));
return -1;
}
if (write(fd, *magicp, sizeof(struct magic) * *nmagicp)
!= sizeof(struct magic) * *nmagicp) {
file_error(ms, "Error writing `%s' (%s)", dbname,
strerror(errno));
return -1;
}
(void)close(fd);
return 0;
}
private const char ext[] = ".mgc";
private char *
mkdbname(const char *fn, char *buf, size_t bufsiz)
{
const char *p;
if ((p = strrchr(fn, '/')) != NULL)
p++;
else
p = fn;
(void)snprintf(buf, bufsiz, "%s%s", p, ext);
return buf;
}
private void
byteswap(struct magic *magic, uint32_t nmagic)
{
uint32_t i;
for (i = 0; i < nmagic; i++)
bs1(&magic[i]);
}
private uint16_t
swap2(uint16_t sv)
{
uint16_t rv;
uint8_t *s = (uint8_t *)(void *)&sv;
uint8_t *d = (uint8_t *)(void *)&rv;
d[0] = s[1];
d[1] = s[0];
return rv;
}
private uint32_t
swap4(uint32_t sv)
{
uint32_t rv;
uint8_t *s = (uint8_t *)(void *)&sv;
uint8_t *d = (uint8_t *)(void *)&rv;
d[0] = s[3];
d[1] = s[2];
d[2] = s[1];
d[3] = s[0];
return rv;
}
private void
bs1(struct magic *m)
{
m->cont_level = swap2(m->cont_level);
m->offset = swap4((uint32_t)m->offset);
m->in_offset = swap4((uint32_t)m->in_offset);
if (m->type != FILE_STRING)
m->value.l = swap4(m->value.l);
m->mask = swap4(m->mask);
}