Blob Blame History Raw
/***********************************************************************
*                                                                      *
*               This software is part of the ast package               *
*          Copyright (c) 1985-2011 AT&T Intellectual Property          *
*                      and is licensed under the                       *
*                 Eclipse Public License, Version 1.0                  *
*                    by AT&T Intellectual Property                     *
*                                                                      *
*                A copy of the License is available at                 *
*          http://www.eclipse.org/org/documents/epl-v10.html           *
*         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
*                                                                      *
*              Information and Software Systems Research               *
*                            AT&T Research                             *
*                           Florham Park NJ                            *
*                                                                      *
*                 Glenn Fowler <gsf@research.att.com>                  *
*                  David Korn <dgk@research.att.com>                   *
*                   Phong Vo <kpv@research.att.com>                    *
*                                                                      *
***********************************************************************/
#pragma prototyped
/*
 * Glenn Fowler
 * AT&T Research
 *
 * library interface to file
 *
 * the sum of the hacks {s5,v10,planix} is _____ than the parts
 */

static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2011-03-09 $\0\n";

static const char lib[] = "libast:magic";

#include <ast.h>
#include <ctype.h>
#include <ccode.h>
#include <dt.h>
#include <modex.h>
#include <error.h>
#include <regex.h>
#include <swap.h>

#define T(m)		(*m?ERROR_translate(NiL,NiL,lib,m):m)

#define match(s,p)	strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE)

#define MAXNEST		10		/* { ... } nesting limit	*/
#define MINITEM		4		/* magic buffer rounding	*/

typedef struct				/* identifier dictionary entry	*/
{
	const char	name[16];	/* identifier name		*/
	int		value;		/* identifier value		*/
	Dtlink_t	link;		/* dictionary link		*/
} Info_t;

typedef struct Edit			/* edit substitution		*/
{
	struct Edit*	next;		/* next in list			*/
	regex_t*	from;		/* from pattern			*/
} Edit_t;

struct Entry;

typedef struct				/* loop info			*/
{
	struct Entry*	lab;		/* call this function		*/
	int		start;		/* start here			*/
	int		size;		/* increment by this amount	*/
	int		count;		/* dynamic loop count		*/
	int		offset;		/* dynamic offset		*/
} Loop_t;

typedef struct Entry			/* magic file entry		*/
{
	struct Entry*	next;		/* next in list			*/
	char*		expr;		/* offset expression		*/
	union
	{
	unsigned long	num;
	char*		str;
	struct Entry*	lab;
	regex_t*	sub;
	Loop_t*		loop;
	}		value;		/* comparison value		*/
	char*		desc;		/* file description		*/
	char*		mime;		/* file mime type		*/
	unsigned long	offset;		/* offset in bytes		*/
	unsigned long	mask;		/* mask before compare		*/
	char		cont;		/* continuation operation	*/
	char		type;		/* datum type			*/
	char		op;		/* comparison operation		*/
	char		nest;		/* { or } nesting operation	*/
	char		swap;		/* forced swap order		*/
} Entry_t;

#define CC_BIT		5

#if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2)
typedef unsigned short Cctype_t;
#else
typedef unsigned long Cctype_t;
#endif

#define CC_text		0x01
#define CC_control	0x02
#define CC_latin	0x04
#define CC_binary	0x08
#define CC_utf_8	0x10

#define CC_notext	CC_text		/* CC_text is flipped before checking */

#define CC_MASK		(CC_binary|CC_latin|CC_control|CC_text)

#define CCTYPE(c)	(((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text)

#define ID_NONE		0
#define ID_ASM		1
#define ID_C		2
#define ID_COBOL	3
#define ID_COPYBOOK	4
#define ID_CPLUSPLUS	5
#define ID_FORTRAN	6
#define ID_HTML		7
#define ID_INCL1	8
#define ID_INCL2	9
#define ID_INCL3	10
#define ID_MAM1		11
#define ID_MAM2		12
#define ID_MAM3		13
#define ID_NOTEXT	14
#define ID_PL1		15
#define ID_YACC		16

#define ID_MAX		ID_YACC

#define INFO_atime	1
#define INFO_blocks	2
#define INFO_ctime	3
#define INFO_fstype	4
#define INFO_gid	5
#define INFO_mode	6
#define INFO_mtime	7
#define INFO_name	8
#define INFO_nlink	9
#define INFO_size	10
#define INFO_uid	11

#define _MAGIC_PRIVATE_ \
	Magicdisc_t*	disc;			/* discipline		*/ \
	Vmalloc_t*	vm;			/* vmalloc region	*/ \
	Entry_t*	magic;			/* parsed magic table	*/ \
	Entry_t*	magiclast;		/* last entry in magic	*/ \
	char*		mime;			/* MIME type		*/ \
	unsigned char*	x2n;			/* CC_ALIEN=>CC_NATIVE	*/ \
	char		fbuf[SF_BUFSIZE + 1];	/* file data		*/ \
	char		xbuf[SF_BUFSIZE + 1];	/* indirect file data	*/ \
	char		nbuf[256];		/* !CC_NATIVE data	*/ \
	char		mbuf[64];		/* mime string		*/ \
	char		sbuf[64];		/* type suffix string	*/ \
	char		tbuf[2 * PATH_MAX];	/* type string		*/ \
	Cctype_t	cctype[UCHAR_MAX + 1];	/* char code types	*/ \
	unsigned int	count[UCHAR_MAX + 1];	/* char frequency count	*/ \
	unsigned int	multi[UCHAR_MAX + 1];	/* muti char count	*/ \
	int		keep[MAXNEST];		/* ckmagic nest stack	*/ \
	char*		cap[MAXNEST];		/* ckmagic mime stack	*/ \
	char*		msg[MAXNEST];		/* ckmagic text stack	*/ \
	Entry_t*	ret[MAXNEST];		/* ckmagic return stack	*/ \
	int		fbsz;			/* fbuf size		*/ \
	int		fbmx;			/* fbuf max size	*/ \
	int		xbsz;			/* xbuf size		*/ \
	int		swap;			/* swap() operation	*/ \
	unsigned long	flags;			/* disc+open flags	*/ \
	long		xoff;			/* xbuf offset		*/ \
	int		identifier[ID_MAX + 1];	/* Info_t identifier	*/ \
	Sfio_t*		fp;			/* fbuf fp		*/ \
	Sfio_t*		tmp;			/* tmp string		*/ \
	regdisc_t	redisc;			/* regex discipline	*/ \
	Dtdisc_t	dtdisc;			/* dict discipline	*/ \
	Dt_t*		idtab;			/* identifier dict	*/ \
	Dt_t*		infotab;		/* info keyword dict	*/

#include <magic.h>

static Info_t		dict[] =		/* keyword dictionary	*/
{
	{ 	"COMMON",	ID_FORTRAN	},
	{ 	"COMPUTE",	ID_COBOL	},
	{ 	"COMP",		ID_COPYBOOK	},
	{ 	"COMPUTATIONAL",ID_COPYBOOK	},
	{ 	"DCL",		ID_PL1		},
	{ 	"DEFINED",	ID_PL1		},
	{ 	"DIMENSION",	ID_FORTRAN	},
	{ 	"DIVISION",	ID_COBOL	},
	{ 	"FILLER",	ID_COPYBOOK	},
	{ 	"FIXED",	ID_PL1		},
	{ 	"FUNCTION",	ID_FORTRAN	},
	{ 	"HTML",		ID_HTML		},
	{ 	"INTEGER",	ID_FORTRAN	},
	{ 	"MAIN",		ID_PL1		},
	{ 	"OPTIONS",	ID_PL1		},
	{ 	"PERFORM",	ID_COBOL	},
	{ 	"PIC",		ID_COPYBOOK	},
	{ 	"REAL",		ID_FORTRAN	},
	{ 	"REDEFINES",	ID_COPYBOOK	},
	{ 	"S9",		ID_COPYBOOK	},
	{ 	"SECTION",	ID_COBOL	},
	{ 	"SELECT",	ID_COBOL	},
	{ 	"SUBROUTINE",	ID_FORTRAN	},
	{ 	"TEXT",		ID_ASM		},
	{ 	"VALUE",	ID_COPYBOOK	},
	{ 	"attr",		ID_MAM3		},
	{ 	"binary",	ID_YACC		},
	{ 	"block",	ID_FORTRAN	},
	{ 	"bss",		ID_ASM		},
	{ 	"byte",		ID_ASM		},
	{ 	"char",		ID_C		},
	{ 	"class",	ID_CPLUSPLUS	},
	{ 	"clr",		ID_NOTEXT	},
	{ 	"comm",		ID_ASM		},
	{ 	"common",	ID_FORTRAN	},
	{ 	"data",		ID_ASM		},
	{ 	"dimension",	ID_FORTRAN	},
	{ 	"done",		ID_MAM2		},
	{ 	"double",	ID_C		},
	{ 	"even",		ID_ASM		},
	{ 	"exec",		ID_MAM3		},
	{ 	"extern",	ID_C		},
	{ 	"float",	ID_C		},
	{ 	"function",	ID_FORTRAN	},
	{ 	"globl",	ID_ASM		},
	{ 	"h",		ID_INCL3	},
	{ 	"html",		ID_HTML		},
	{ 	"include",	ID_INCL1	},
	{ 	"int",		ID_C		},
	{ 	"integer",	ID_FORTRAN	},
	{ 	"jmp",		ID_NOTEXT	},
	{ 	"left",		ID_YACC		},
	{ 	"libc",		ID_INCL2	},
	{ 	"long",		ID_C		},
	{ 	"make",		ID_MAM1		},
	{ 	"mov",		ID_NOTEXT	},
	{ 	"private",	ID_CPLUSPLUS	},
	{ 	"public",	ID_CPLUSPLUS	},
	{ 	"real",		ID_FORTRAN	},
	{ 	"register",	ID_C		},
	{ 	"right",	ID_YACC		},
	{ 	"sfio",		ID_INCL2	},
	{ 	"static",	ID_C		},
	{ 	"stdio",	ID_INCL2	},
	{ 	"struct",	ID_C		},
	{ 	"subroutine",	ID_FORTRAN	},
	{ 	"sys",		ID_NOTEXT	},
	{ 	"term",		ID_YACC		},
	{ 	"text",		ID_ASM		},
	{ 	"tst",		ID_NOTEXT	},
	{ 	"type",		ID_YACC		},
	{ 	"typedef",	ID_C		},
	{ 	"u",		ID_INCL2	},
	{ 	"union",	ID_YACC		},
	{ 	"void",		ID_C		},
};

static Info_t		info[] =
{
	{	"atime",	INFO_atime		},
	{	"blocks",	INFO_blocks		},
	{	"ctime",	INFO_ctime		},
	{	"fstype",	INFO_fstype		},
	{	"gid",		INFO_gid		},
	{	"mode",		INFO_mode		},
	{	"mtime",	INFO_mtime		},
	{	"name",		INFO_name		},
	{	"nlink",	INFO_nlink		},
	{	"size",		INFO_size		},
	{	"uid",		INFO_uid		},
};

/*
 * return pointer to data at offset off and size siz
 */

static char*
getdata(register Magic_t* mp, register long off, register int siz)
{
	register long	n;

	if (off < 0)
		return 0;
	if (off + siz <= mp->fbsz)
		return mp->fbuf + off;
	if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz)
	{
		if (off + siz > mp->fbmx)
			return 0;
		n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2);
		if (sfseek(mp->fp, n, SEEK_SET) != n)
			return 0;
		if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0)
		{
			mp->xoff = 0;
			mp->xbsz = 0;
			return 0;
		}
		mp->xbuf[mp->xbsz] = 0;
		mp->xoff = n;
		if (off + siz > mp->xoff + mp->xbsz)
			return 0;
	}
	return mp->xbuf + off - mp->xoff;
}

/*
 * @... evaluator for strexpr()
 */

static long
indirect(const char* cs, char** e, void* handle)
{
	register char*		s = (char*)cs;
	register Magic_t*	mp = (Magic_t*)handle;
	register long		n = 0;
	register char*		p;

	if (s)
	{
		if (*s == '@')
		{
			n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0);
			switch (*(s = *e))
			{
			case 'b':
			case 'B':
				s++;
				if (p = getdata(mp, n, 1))
					n = *(unsigned char*)p;
				else
					s = (char*)cs;
				break;
			case 'h':
			case 'H':
				s++;
				if (p = getdata(mp, n, 2))
					n = swapget(mp->swap, p, 2);
				else
					s = (char*)cs;
				break;
			case 'q':
			case 'Q':
				s++;
				if (p = getdata(mp, n, 8))
					n = swapget(mp->swap, p, 8);
				else
					s = (char*)cs;
				break;
			default:
				if (isalnum(*s))
					s++;
				if (p = getdata(mp, n, 4))
					n = swapget(mp->swap, p, 4);
				else
					s = (char*)cs;
				break;
			}
		}
		*e = s;
	}
	else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
		(*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e);
	return n;
}

/*
 * emit regex error message
 */

static void
regmessage(Magic_t* mp, regex_t* re, int code)
{
	char	buf[128];

	if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
	{
		regerror(code, re, buf, sizeof(buf));
		(*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf);
	}
}

/*
 * decompose vcodex(3) method composition
 */

static char*
vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x)
{
	unsigned char*	map;
	const char*	o;
	int		c;
	int		n;
	int		i;
	int		a;

	map = CCMAP(CC_ASCII, CC_NATIVE);
	a = 0;
	i = 1;
	for (;;)
	{
		if (i)
			i = 0;
		else
			*b++ = '^';
		if (m < (x - 1) && !*(m + 1))
		{
			/*
			 * obsolete indices
			 */

			if (!a)
			{
				a = 1;
				o = "old, ";
				while (b < e && (c = *o++))
					*b++ = c;
			}
			switch (*m)
			{
			case 0:		o = "delta"; break;
			case 1:		o = "huffman"; break;
			case 2:		o = "huffgroup"; break;
			case 3:		o = "arith"; break;
			case 4:		o = "bwt"; break;
			case 5:		o = "rle"; break;
			case 6:		o = "mtf"; break;
			case 7:		o = "transpose"; break;
			case 8:		o = "table"; break;
			case 9:		o = "huffpart"; break;
			case 50:	o = "map"; break;
			case 100:	o = "recfm"; break;
			case 101:	o = "ss7"; break;
			default:	o = "UNKNOWN"; break;
			}
			m += 2;
			while (b < e && (c = *o++))
				*b++ = c;
		}
		else
			while (b < e && m < x && (c = *m++))
			{
				if (map)
					c = map[c];
				*b++ = c;
			}
		if (b >= e)
			break;
		n = 0;
		while (m < x)
		{
			n = (n<<7) | (*m & 0x7f);
			if (!(*m++ & 0x80))
				break;
		}
		if (n >= (x - m))
			break;
		m += n;
	}
	return b;
}

/*
 * check for magic table match in buf
 */

static char*
ckmagic(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st, unsigned long off)
{
	register Entry_t*	ep;
	register char*		p;
	register char*		b;
	register int		level = 0;
	int			call = -1;
	int			all = 0;
	int			c;
	int			str;
	char*			q;
	char*			t;
	char*			cur;
	char*			base = 0;
	unsigned long		num;
	unsigned long		mask;
	regmatch_t		matches[10];

	mp->swap = 0;
	b = mp->msg[0] = cur = buf;
	mp->mime = mp->cap[0] = 0;
	mp->keep[0] = 0;
	for (ep = mp->magic; ep; ep = ep->next)
	{
	fun:
		if (ep->nest == '{')
		{
			if (++level >= MAXNEST)
			{
				call = -1;
				level = 0;
				mp->keep[0] = 0;
				b = mp->msg[0];
				mp->mime = mp->cap[0];
				continue;
			}
			mp->keep[level] = mp->keep[level - 1] != 0;
			mp->msg[level] = b;
			mp->cap[level] = mp->mime;
		}
		switch (ep->cont)
		{
		case '#':
			if (mp->keep[level] && b > cur)
			{
				if ((mp->flags & MAGIC_ALL) && b < (end - 3))
				{
					all = 1;
					*b++ = '\n';
					cur = b;
					continue;
				}
				*b = 0;
				return buf;
			}
			mp->swap = 0;
			b = mp->msg[0] = cur;
			mp->mime = mp->cap[0] = 0;
			if (ep->type == ' ')
				continue;
			break;
		case '$':
			if (mp->keep[level] && call < (MAXNEST - 1))
			{
				mp->ret[++call] = ep;
				ep = ep->value.lab;
				goto fun;
			}
			continue;
		case ':':
			ep = mp->ret[call--];
			if (ep->op == 'l')
				goto fun;
			continue;
		case '|':
			if (mp->keep[level] > 1)
				goto checknest;
			/*FALLTHROUGH*/
		default:
			if (!mp->keep[level])
			{
				b = mp->msg[level];
				mp->mime = mp->cap[level];
				goto checknest;
			}
			break;
		}
		p = "";
		num = 0;
		if (!ep->expr)
			num = ep->offset + off;
		else
			switch (ep->offset)
			{
			case 0:
				num = strexpr(ep->expr, NiL, indirect, mp) + off;
				break;
			case INFO_atime:
				num = st->st_atime;
				ep->type = 'D';
				break;
			case INFO_blocks:
				num = iblocks(st);
				ep->type = 'N';
				break;
			case INFO_ctime:
				num = st->st_ctime;
				ep->type = 'D';
				break;
			case INFO_fstype:
				p = fmtfs(st);
				ep->type = toupper(ep->type);
				break;
			case INFO_gid:
				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
				{
					p = fmtgid(st->st_gid);
					ep->type = toupper(ep->type);
				}
				else
				{
					num = st->st_gid;
					ep->type = 'N';
				}
				break;
			case INFO_mode:
				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
				{
					p = fmtmode(st->st_mode, 0);
					ep->type = toupper(ep->type);
				}
				else
				{
					num = modex(st->st_mode);
					ep->type = 'N';
				}
				break;
			case INFO_mtime:
				num = st->st_ctime;
				ep->type = 'D';
				break;
			case INFO_name:
				if (!base)
				{
					if (base = strrchr(file, '/'))
						base++;
					else
						base = (char*)file;
				}
				p = base;
				ep->type = toupper(ep->type);
				break;
			case INFO_nlink:
				num = st->st_nlink;
				ep->type = 'N';
				break;
			case INFO_size:
				num = st->st_size;
				ep->type = 'N';
				break;
			case INFO_uid:
				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
				{
					p = fmtuid(st->st_uid);
					ep->type = toupper(ep->type);
				}
				else
				{
					num = st->st_uid;
					ep->type = 'N';
				}
				break;
			}
		switch (ep->type)
		{

		case 'b':
			if (!(p = getdata(mp, num, 1)))
				goto next;
			num = *(unsigned char*)p;
			break;

		case 'h':
			if (!(p = getdata(mp, num, 2)))
				goto next;
			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2);
			break;

		case 'd':
		case 'l':
		case 'v':
			if (!(p = getdata(mp, num, 4)))
				goto next;
			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4);
			break;

		case 'q':
			if (!(p = getdata(mp, num, 8)))
				goto next;
			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8);
			break;

		case 'e':
			if (!(p = getdata(mp, num, 0)))
				goto next;
			/*FALLTHROUGH*/
		case 'E':
			if (!ep->value.sub)
				goto next;
			if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
			{
				c = mp->fbsz;
				if (c >= sizeof(mp->nbuf))
					c = sizeof(mp->nbuf) - 1;
				p = (char*)memcpy(mp->nbuf, p, c);
				p[c] = 0;
				ccmapstr(mp->x2n, p, c);
				if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
				{
					if (c != REG_NOMATCH)
						regmessage(mp, ep->value.sub, c);
					goto next;
				}
			}
			p = ep->value.sub->re_sub->re_buf;
			q = T(ep->desc);
			t = *q ? q : p;
			if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b')
				*b++ = ' ';
			b += sfsprintf(b, end - b, *q ? q : "%s", p + (*p == '\b'));
			if (ep->mime)
				mp->mime = ep->mime;
			goto checknest;

		case 's':
			if (!(p = getdata(mp, num, ep->mask)))
				goto next;
			goto checkstr;
		case 'm':
			if (!(p = getdata(mp, num, 0)))
				goto next;
			/*FALLTHROUGH*/
		case 'M':
		case 'S':
		checkstr:
			for (;;)
			{
				if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p))
					break;
				if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask))
					break;
				if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf))
					goto next;
				p = (char*)memcpy(mp->nbuf, p, ep->mask);
				p[ep->mask] = 0;
				ccmapstr(mp->x2n, p, ep->mask);
			}
			q = T(ep->desc);
			if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
				*b++ = ' ';
			for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++);
			*t = 0;
			b += sfsprintf(b, end - b, q + (*q == '\b'), p);
			*t = c;
			if (ep->mime)
				mp->mime = ep->mime;
			goto checknest;

		}
		if (mask = ep->mask)
			num &= mask;
		switch (ep->op)
		{

		case '=':
		case '@':
			if (num == ep->value.num)
				break;
			if (ep->cont != '#')
				goto next;
			if (!mask)
				mask = ~mask;
			if (ep->type == 'h')
			{
				if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num)
				{
					if (!(mp->swap & (mp->swap + 1)))
						mp->swap = 7;
					goto swapped;
				}
			}
			else if (ep->type == 'l')
			{
				for (c = 1; c < 4; c++)
					if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num)
					{
						if (!(mp->swap & (mp->swap + 1)))
							mp->swap = 7;
						goto swapped;
					}
			}
			else if (ep->type == 'q')
			{
				for (c = 1; c < 8; c++)
					if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num)
						goto swapped;
			}
			goto next;

		case '!':
			if (num != ep->value.num)
				break;
			goto next;

		case '^':
			if (num ^ ep->value.num)
				break;
			goto next;

		case '>':
			if (num > ep->value.num)
				break;
			goto next;

		case '<':
			if (num < ep->value.num)
				break;
			goto next;

		case 'l':
			if (num > 0 && mp->keep[level] && call < (MAXNEST - 1))
			{
				if (!ep->value.loop->count)
				{
					ep->value.loop->count = num;
					ep->value.loop->offset = off;
					off = ep->value.loop->start;
				}
				else if (!--ep->value.loop->count)
				{
					off = ep->value.loop->offset;
					goto next;
				}
				else
					off += ep->value.loop->size;
				mp->ret[++call] = ep;
				ep = ep->value.loop->lab;
				goto fun;
			}
			goto next;

		case 'm':
			c = mp->swap;
			t = ckmagic(mp, file, b + (b > cur), end, st, num);
			mp->swap = c;
			if (t)
			{
				if (b > cur && b < end)
					*b = ' ';
				b += strlen(b);
			}
			else if (ep->cont == '&')
				goto next;
			break;

		case 'r':
#if _UWIN
		{
			char*			e;
			Sfio_t*			rp;
			Sfio_t*			gp;

			if (!(t = strrchr(file, '.')))
				goto next;
			sfprintf(mp->tmp, "/reg/classes_root/%s", t);
			if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r")))
				goto next;
			*ep->desc = 0;
			*ep->mime = 0;
			gp = 0;
			while (t = sfgetr(rp, '\n', 1))
			{
				if (strneq(t, "Content Type=", 13))
				{
					ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0);
					strcpy(ep->mime, t + 13);
					if (gp)
						break;
				}
				else
				{
					sfprintf(mp->tmp, "/reg/classes_root/%s", t);
					if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r")))
					{
						ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1);
						strcpy(ep->desc, t);
						if (*ep->mime)
							break;
					}
				}
			}
			sfclose(rp);
			if (!gp)
				goto next;
			if (!*ep->mime)
			{
				t = T(ep->desc);
				if (!strncasecmp(t, "microsoft", 9))
					t += 9;
				while (isspace(*t))
					t++;
				e = "application/x-ms-";
				ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e));
				e = strcopy(ep->mime, e);
				while ((c = *t++) && c != '.' && c != ' ')
					*e++ = isupper(c) ? tolower(c) : c;
				*e = 0;
			}
			while (t = sfgetr(gp, '\n', 1))
				if (*t && !streq(t, "\"\""))
				{
					ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0);
					strcpy(ep->desc, t);
					break;
				}
			sfclose(gp);
			if (!*ep->desc)
				goto next;
			if (!t)
				for (t = T(ep->desc); *t; t++)
					if (*t == '.')
						*t = ' ';
			if (!mp->keep[level])
				mp->keep[level] = 2;
			mp->mime = ep->mime;
			break;
		}
#else
			if (ep->cont == '#' && !mp->keep[level])
				mp->keep[level] = 1;
			goto next;
#endif

		case 'v':
			if (!(p = getdata(mp, num, 4)))
				goto next;
			c = 0;
			do
			{
				num++;
				c = (c<<7) | (*p & 0x7f);
			} while (*p++ & 0x80);
			if (!(p = getdata(mp, num, c)))
				goto next;
			if (mp->keep[level]++ && b > cur && b < (end - 1) && *(b - 1) != ' ')
			{
				*b++ = ',';
				*b++ = ' ';
			}
			b = vcdecomp(b, cur + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c);
			goto checknest;

		}
	swapped:
		q = T(ep->desc);
		if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
			*b++ = ' ';
		if (*q == '\b')
			q++;
		str = 0;
		for (t = q; *t; t++)
			if (*t == '%' && (c = *(t + 1)))
			{
				if (c == '%')
					t++;
				else
					while (c && c != '%')
					{
						if (c == 's')
						{
							str = 1;
							break;
						}
						else if (c == 'c' || c == 'd' || c == 'i' || c == 'u' || c == 'x' || c == 'X')
							goto format;
						t++;
						c = *(t + 1);
					}
			}
	format:
		if (!str)
			b += sfsprintf(b, end - b, q, num, num == 1 ? "" : "s", 0, 0, 0, 0, 0, 0);
		else if (ep->type == 'd' || ep->type == 'D')
			b += sfsprintf(b, end - b, q, fmttime("%?%QL", (time_t)num), 0, 0, 0, 0, 0, 0, 0);
		else if (ep->type == 'v')
			b += sfsprintf(b, end - b, q, fmtversion(num), 0, 0, 0, 0, 0, 0, 0);
		else
			b += sfsprintf(b, end - b, q, fmtnum(num, 0), num == 1 ? "" : "s", 0, 0, 0, 0, 0, 0);
		if (ep->mime && *ep->mime)
			mp->mime = ep->mime;
	checknest:
		if (ep->nest == '}')
		{
			if (!mp->keep[level])
			{
				b = mp->msg[level];
				mp->mime = mp->cap[level];
			}
			else if (level > 0)
				mp->keep[level - 1] = mp->keep[level];
			if (--level < 0)
			{
				level = 0;
				mp->keep[0] = 0;
			}
		}
		continue;
	next:
		if (ep->cont == '&')
			mp->keep[level] = 0;
		goto checknest;
	}
	if (all && b-- || mp->keep[level] && b > cur)
	{
		*b = 0;
		return buf;
	}
	return 0;
}

/*
 * check english language stats
 */

static int
ckenglish(register Magic_t* mp, int pun, int badpun)
{
	register char*	s;
	register int	vowl = 0;
	register int	freq = 0;
	register int	rare = 0;

	if (5 * badpun > pun)
		return 0;
	if (2 * mp->count[';'] > mp->count['E'] + mp->count['e'])
		return 0;
	if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e'])
		return 0;
	for (s = "aeiou"; *s; s++)
		vowl += mp->count[toupper(*s)] + mp->count[*s];
	for (s = "etaion"; *s; s++)
		freq += mp->count[toupper(*s)] + mp->count[*s];
	for (s = "vjkqxz"; *s; s++)
		rare += mp->count[toupper(*s)] + mp->count[*s];
	return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare;
}

/*
 * check programming language stats
 */

static char*
cklang(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st)
{
	register int		c;
	register unsigned char*	b;
	register unsigned char*	e;
	register int		q;
	register char*		s;
	char*			t;
	char*			base;
	char*			suff;
	char*			t1;
	char*			t2;
	char*			t3;
	int			n;
	int			badpun;
	int			code;
	int			pun;
	Cctype_t		flags;
	Info_t*			ip;

	b = (unsigned char*)mp->fbuf;
	e = b + mp->fbsz;
	memzero(mp->count, sizeof(mp->count));
	memzero(mp->multi, sizeof(mp->multi));
	memzero(mp->identifier, sizeof(mp->identifier));

	/*
	 * check character coding
	 */

	flags = 0;
	while (b < e)
		flags |= mp->cctype[*b++];
	b = (unsigned char*)mp->fbuf;
	code = 0;
	q = CC_ASCII;
	n = CC_MASK;
	for (c = 0; c < CC_MAPS; c++)
	{
		flags ^= CC_text;
		if ((flags & CC_MASK) < n)
		{
			n = flags & CC_MASK;
			q = c;
		}
		flags >>= CC_BIT;
	}
	flags = n;
	if (!(flags & (CC_binary|CC_notext)))
	{
		if (q != CC_NATIVE)
		{
			code = q;
			ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE);
		}
		if (b[0] == '#' && b[1] == '!')
		{
			for (b += 2; b < e && isspace(*b); b++);
			for (s = (char*)b; b < e && isprint(*b); b++);
			c = *b;
			*b = 0;
			if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK))
			{
				if (t = strrchr(s, '/'))
					s = t + 1;
				for (t = s; *t; t++)
					if (isspace(*t))
					{
						*t = 0;
						break;
					}
				sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh");
				mp->mime = mp->mbuf;
				if (match(s, "*sh"))
				{
					t1 = T("command");
					if (streq(s, "sh"))
						*s = 0;
					else
					{
						*b++ = ' ';
						*b = 0;
					}
				}
				else
				{
					t1 = T("interpreter");
					*b++ = ' ';
					*b = 0;
				}
				sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1);
				s = mp->sbuf;
				goto qualify;
			}
			*b = c;
			b = (unsigned char*)mp->fbuf;
		}
		badpun = 0;
		pun = 0;
		q = 0;
		s = 0;
		t = 0;
		while (b < e)
		{
			c = *b++;
			mp->count[c]++;
			if (c == q && (q != '*' || *b == '/' && b++))
			{
				mp->multi[q]++;
				q = 0;
			}
			else if (c == '\\')
			{
				s = 0;
				b++;
			}
			else if (!q)
			{
				if (isalpha(c) || c == '_')
				{
					if (!s)
						s = (char*)b - 1;
				}
				else if (!isdigit(c))
				{
					if (s)
					{
						if (s > mp->fbuf)
							switch (*(s - 1))
							{
							case ':':
								if (*b == ':')
									mp->multi[':']++;
								break;
							case '.':
								if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n'))
									mp->multi['.']++;
								break;
							case '\n':
							case '\\':
								if (*b == '{')
									t = (char*)b + 1;
								break;
							case '{':
								if (s == t && *b == '}')
									mp->multi['X']++;
								break;
							}
							if (!mp->idtab)
							{
								if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dtset))
									for (q = 0; q < elementsof(dict); q++)
										dtinsert(mp->idtab, &dict[q]);
								else if (mp->disc->errorf)
									(*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
								q = 0;
							}
							if (mp->idtab)
							{
								*(b - 1) = 0;
								if (ip = (Info_t*)dtmatch(mp->idtab, s))
									mp->identifier[ip->value]++;
								*(b - 1) = c;
							}
							s = 0;
						}
					switch (c)
					{
					case '\t':
						if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n')
							mp->multi['\t']++;
						break;
					case '"':
					case '\'':
						q = c;
						break;
					case '/':
						if (*b == '*')
							q = *b++;
						else if (*b == '/')
							q = '\n';
						break;
					case '$':
						if (*b == '(' && *(b + 1) != ' ')
							mp->multi['$']++;
						break;
					case '{':
					case '}':
					case '[':
					case ']':
					case '(':
						mp->multi[c]++;
						break;
					case ')':
						mp->multi[c]++;
						goto punctuation;
					case ':':
						if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2)))
							mp->multi[':']++;
						goto punctuation;
					case '.':
					case ',':
					case '%':
					case ';':
					case '?':
					punctuation:
						pun++;
						if (*b != ' ' && *b != '\n')
							badpun++;
						break;
					}
				}
			}
		}
	}
	else
		while (b < e)
			mp->count[*b++]++;
	base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file;
	suff = (t1 = strrchr(base, '.')) ? t1 + 1 : "";
	if (!flags)
	{
		if (match(suff, "*sh|bat|cmd"))
			goto id_sh;
		if (match(base, "*@(mkfile)"))
			goto id_mk;
		if (match(base, "*@(makefile|.mk)"))
			goto id_make;
		if (match(base, "*@(mamfile|.mam)"))
			goto id_mam;
		if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy"))
			goto id_c;
		if (match(suff, "f"))
			goto id_fortran;
		if (match(suff, "htm+(l)"))
			goto id_html;
		if (match(suff, "cpy"))
			goto id_copybook;
		if (match(suff, "cob|cbl|cb2"))
			goto id_cobol;
		if (match(suff, "pl[1i]"))
			goto id_pl1;
		if (match(suff, "tex"))
			goto id_tex;
		if (match(suff, "asm|s"))
			goto id_asm;
		if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.')))
		{
		id_sh:
			s = T("command script");
			mp->mime = "application/sh";
			goto qualify;
		}
		if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *"))
		{
			s = T("mail message");
			mp->mime = "message/rfc822";
			goto qualify;
		}
		if (match(base, "*@(mkfile)"))
		{
		id_mk:
			s = "mkfile";
			mp->mime = "application/mk";
			goto qualify;
		}
		if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0))
		{
		id_make:
			s = "makefile";
			mp->mime = "application/make";
			goto qualify;
		}
		if (mp->multi['.'] >= 3)
		{
			s = T("nroff input");
			mp->mime = "application/x-troff";
			goto qualify;
		}
		if (mp->multi['X'] >= 3)
		{
			s = T("TeX input");
			mp->mime = "application/x-tex";
			goto qualify;
		}
		if (mp->fbsz < SF_BUFSIZE &&
		    (mp->multi['('] == mp->multi[')'] &&
		     mp->multi['{'] == mp->multi['}'] &&
		     mp->multi['['] == mp->multi[']']) ||
		    mp->fbsz >= SF_BUFSIZE &&
		    (mp->multi['('] >= mp->multi[')'] &&
		     mp->multi['{'] >= mp->multi['}'] &&
		     mp->multi['['] >= mp->multi[']']))
		{
			c = mp->identifier[ID_INCL1];
			if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c ||
			    mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 ||
			    mp->count['='] >= 20 && mp->count[';'] >= 20)
			{
			id_c:
				t1 = "";
				t2 = "c ";
				t3 = T("program");
				switch (*suff)
				{
				case 'c':
				case 'C':
					mp->mime = "application/x-cc";
					break;
				case 'l':
				case 'L':
					t1 = "lex ";
					mp->mime = "application/x-lex";
					break;
				default:
					t3 = T("header");
					if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5)
					{
						mp->mime = "application/x-cc";
						break;
					}
					/*FALLTHROUGH*/
				case 'y':
				case 'Y':
					t1 = "yacc ";
					mp->mime = "application/x-yacc";
					break;
				}
				if (mp->identifier[ID_CPLUSPLUS] >= 3)
				{
					t2 = "c++ ";
					mp->mime = "application/x-c++";
				}
				sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3);
				s = mp->sbuf;
				goto qualify;
			}
		}
		if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 &&
		    (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] ||
		     mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2]))
		{
		id_mam:
			s = T("mam program");
			mp->mime = "application/x-mam";
			goto qualify;
		}
		if (mp->identifier[ID_FORTRAN] >= 8)
		{
		id_fortran:
			s = T("fortran program");
			mp->mime = "application/x-fortran";
			goto qualify;
		}
		if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2)
		{
		id_html:
			s = T("html input");
			mp->mime = "text/html";
			goto qualify;
		}
		if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
		{
		id_copybook:
			s = T("cobol copybook");
			mp->mime = "application/x-cobol";
			goto qualify;
		}
		if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
		{
		id_cobol:
			s = T("cobol program");
			mp->mime = "application/x-cobol";
			goto qualify;
		}
		if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
		{
		id_pl1:
			s = T("pl1 program");
			mp->mime = "application/x-pl1";
			goto qualify;
		}
		if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{'])
		{
		id_tex:
			s = T("TeX input");
			mp->mime = "text/tex";
			goto qualify;
		}
		if (mp->identifier[ID_ASM] >= 4)
		{
		id_asm:
			s = T("as program");
			mp->mime = "application/x-as";
			goto qualify;
		}
		if (ckenglish(mp, pun, badpun))
		{
			s = T("english text");
			mp->mime = "text/plain";
			goto qualify;
		}
	}
	else if (streq(base, "core"))
	{
		mp->mime = "x-system/core";
		return T("core dump");
	}
	if (flags & (CC_binary|CC_notext))
	{
		b = (unsigned char*)mp->fbuf;
		e = b + mp->fbsz;
		n = 0;
		for (;;)
		{
			c = *b++;
			q = 0;
			while (c & 0x80)
			{
				c <<= 1;
				q++;
			}
			switch (q)
			{
			case 4:
				if (b < e && (*b++ & 0xc0) != 0x80)
					break;
			case 3:
				if (b < e && (*b++ & 0xc0) != 0x80)
					break;
			case 2:
				if (b < e && (*b++ & 0xc0) != 0x80)
					break;
				n = 1;
			case 0:
				if (b >= e)
				{
					if (n)
					{
						flags &= ~(CC_binary|CC_notext);
						flags |= CC_utf_8;
					}
					break;
				}
				continue;
			}
			break;
		}
	}
	if (flags & (CC_binary|CC_notext))
	{
		unsigned long	d = 0;

		if ((q = mp->fbsz / UCHAR_MAX) >= 2)
		{
			/*
			 * compression/encryption via standard deviation
			 */


			for (c = 0; c < UCHAR_MAX; c++)
			{
				pun = mp->count[c] - q;
				d += pun * pun;
			}
			d /= mp->fbsz;
		}
		if (d <= 0)
			s = T("binary");
		else if (d < 4)
			s = T("encrypted");
		else if (d < 16)
			s = T("packed");
		else if (d < 64)
			s = T("compressed");
		else if (d < 256)
			s = T("delta");
		else
			s = T("data");
		mp->mime = "application/octet-stream";
		return s;
	}
	mp->mime = "text/plain";
	if (flags & CC_utf_8)
		s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text");
	else if (flags & CC_latin)
		s = (flags & CC_control) ? T("latin text with control characters") : T("latin text");
	else
		s = (flags & CC_control) ? T("text with control characters") : T("text");
 qualify:
	if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r'])
	{
		t = "dos ";
		mp->mime = "text/dos";
	}
	else
		t = "";
	if (code)
	{
		if (code == CC_ASCII)
			sfsprintf(buf, end - buf, "ascii %s%s", t, s);
		else
		{
			sfsprintf(buf, end - buf, "ebcdic%d %s%s", code - 1, t, s);
			mp->mime = "text/ebcdic";
		}
		s = buf;
	}
	else if (*t)
	{
		sfsprintf(buf, end - buf, "%s%s", t, s);
		s = buf;
	}
	return s;
}

/*
 * return the basic magic string for file,st in buf,size
 */

static char*
type(register Magic_t* mp, const char* file, struct stat* st, char* buf, char* end)
{
	register char*	s;
	register char*	t;

	mp->mime = 0;
	if (!S_ISREG(st->st_mode))
	{
		if (S_ISDIR(st->st_mode))
		{
			mp->mime = "x-system/dir";
			return T("directory");
		}
		if (S_ISLNK(st->st_mode))
		{
			mp->mime = "x-system/lnk";
			s = buf;
			s += sfsprintf(s, end - s, T("symbolic link to "));
			if (pathgetlink(file, s, end - s) < 0)
				return T("cannot read symbolic link text");
			return buf;
		}
		if (S_ISBLK(st->st_mode))
		{
			mp->mime = "x-system/blk";
			sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st));
			return buf;
		}
		if (S_ISCHR(st->st_mode))
		{
			mp->mime = "x-system/chr";
			sfsprintf(buf, end - buf, T("character special (%s)"), fmtdev(st));
			return buf;
		}
		if (S_ISFIFO(st->st_mode))
		{
			mp->mime = "x-system/fifo";
			return "fifo";
		}
#ifdef S_ISSOCK
		if (S_ISSOCK(st->st_mode))
		{
			mp->mime = "x-system/sock";
			return "socket";
		}
#endif
	}
	if (!(mp->fbmx = st->st_size))
		s = T("empty");
	else if (!mp->fp)
		s = T("cannot read");
	else
	{
		mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1);
		if (mp->fbsz < 0)
			s = fmterror(errno);
		else if (mp->fbsz == 0)
			s = T("empty");
		else
		{
			mp->fbuf[mp->fbsz] = 0;
			mp->xoff = 0;
			mp->xbsz = 0;
			if (!(s = ckmagic(mp, file, buf, end, st, 0)))
				s = cklang(mp, file, buf, end, st);
		}
	}
	if (!mp->mime)
		mp->mime = "application/unknown";
	else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2))
	{
		register char*	b;
		register char*	be;
		register char*	m;
		register char*	me;

		b = mp->mime;
		me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1;
		while (m < me && b < t)
			*m++ = *b++;
		b = t = s;
		for (;;)
		{
			if (!(be = strchr(t, ' ')))
			{
				be = b + strlen(b);
				break;
			}
			if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4))
				break;
			b = t;
			t = be + 1;
		}
		while (m < me && b < be)
			if ((*m++ = *b++) == ' ')
				*(m - 1) = '-';
		*m = 0;
	}
	return s;
}

/*
 * low level for magicload()
 */

static int
load(register Magic_t* mp, char* file, register Sfio_t* fp)
{
	register Entry_t*	ep;
	register char*		p;
	register char*		p2;
	char*			p3;
	char*			next;
	int			n;
	int			lge;
	int			lev;
	int			ent;
	int			old;
	int			cont;
	Info_t*			ip;
	Entry_t*		ret;
	Entry_t*		first;
	Entry_t*		last = 0;
	Entry_t*		fun['z' - 'a' + 1];

	memzero(fun, sizeof(fun));
	cont = '$';
	ent = 0;
	lev = 0;
	old = 0;
	ret = 0;
	error_info.file = file;
	error_info.line = 0;
	first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0);
	while (p = sfgetr(fp, '\n', 1))
	{
		error_info.line++;
		for (; isspace(*p); p++);

		/*
		 * nesting
		 */

		switch (*p)
		{
		case 0:
		case '#':
			cont = '#';
			continue;
		case '{':
			if (++lev < MAXNEST)
				ep->nest = *p;
			else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
				(*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST);
			continue;
		case '}':
			if (!last || lev <= 0)
			{
				if (mp->disc->errorf)
					(*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p);
			}
			else if (lev-- == ent)
			{
				ent = 0;
				ep->cont = ':';
				ep->offset = ret->offset;
				ep->nest = ' ';
				ep->type = ' ';
				ep->op = ' ';
				ep->desc = "[RETURN]";
				last = ep;
				ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
				ret = 0;
			}
			else
				last->nest = *p;
			continue;
		default:
			if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|')
			{
				n = *p++;
				if (n >= 'a' && n <= 'z')
					n -= 'a';
				else
				{
					if (mp->disc->errorf)
						(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
					n = 0;
				}
				if (ret && mp->disc->errorf)
					(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
				if (*p == '{')
				{
					ent = ++lev;
					ret = ep;
					ep->desc = "[FUNCTION]";
				}
				else
				{
					if (*(p + 1) != ')' && mp->disc->errorf)
						(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a');
					ep->desc = "[CALL]";
				}
				ep->cont = cont;
				ep->offset = n;
				ep->nest = ' ';
				ep->type = ' ';
				ep->op = ' ';
				last = ep;
				ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
				if (ret)
					fun[n] = last->value.lab = ep;
				else if (!(last->value.lab = fun[n]) && mp->disc->errorf)
					(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
				continue;
			}
			if (!ep->nest)
				ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' ';
			break;
		}

		/*
		 * continuation
		 */

		cont = '$';
		switch (*p)
		{
		case '>':
			old = 1;
			if (*(p + 1) == *p)
			{
				/*
				 * old style nesting push
				 */

				p++;
				old = 2;
				if (!lev && last)
				{
					lev = 1;
					last->nest = '{';
					if (last->cont == '>')
						last->cont = '&';
					ep->nest = '1';
				}
			}
			/*FALLTHROUGH*/
		case '+':
		case '&':
		case '|':
			ep->cont = *p++;
			break;
		default:
			if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf)
				(*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p);
			/*FALLTHROUGH*/
		case '*':
		case '0': case '1': case '2': case '3': case '4':
		case '5': case '6': case '7': case '8': case '9':
			ep->cont = (lev > 0) ? '&' : '#';
			break;
		}
		switch (old)
		{
		case 1:
			old = 0;
			if (lev)
			{
				/*
				 * old style nesting pop
				 */

				lev = 0;
				if (last)
					last->nest = '}';
				ep->nest = ' ';
				if (ep->cont == '&')
					ep->cont = '#';
			}
			break;
		case 2:
			old = 1;
			break;
		}
		if (isdigit(*p))
		{
			/*
			 * absolute offset
			 */

			ep->offset = strton(p, &next, NiL, 0);
			p2 = next;
		}
		else
		{
			for (p2 = p; *p2 && !isspace(*p2); p2++);
			if (!*p2)
			{
				if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
					(*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
				continue;
			}

			/*
			 * offset expression
			 */

			*p2++ = 0;
			ep->expr = vmstrdup(mp->vm, p);
			if (isalpha(*p))
				ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0;
			else if (*p == '(' && ep->cont == '>')
			{
				/*
				 * convert old style indirection to @
				 */

				p = ep->expr + 1;
				for (;;)
				{
					switch (*p++)
					{
					case 0:
					case '@':
					case '(':
						break;
					case ')':
						break;
					default:
						continue;
					}
					break;
				}
				if (*--p == ')')
				{
					*p = 0;
					*ep->expr = '@';
				}
			}
		}
		for (; isspace(*p2); p2++);
		for (p = p2; *p2 && !isspace(*p2); p2++);
		if (!*p2)
		{
			if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
				(*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
			continue;
		}
		*p2++ = 0;

		/*
		 * type
		 */

		if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e')
		{
			ep->swap = ~(*p == 'l' ? 7 : 0);
			p += 2;
		}
		if (*p == 's')
		{
			if (*(p + 1) == 'h')
				ep->type = 'h';
			else
				ep->type = 's';
		}
		else if (*p == 'a')
			ep->type = 's';
		else
			ep->type = *p;
		if (p = strchr(p, '&'))
		{
			/*
			 * old style mask
			 */

			ep->mask = strton(++p, NiL, NiL, 0);
		}
		for (; isspace(*p2); p2++);
		if (ep->mask)
			*--p2 = '=';

		/*
		 * comparison operation
		 */

		p = p2;
		if (p2 = strchr(p, '\t'))
			*p2++ = 0;
		else
		{
			int	qe = 0;
			int	qn = 0;

			/*
			 * assume balanced {}[]()\\""'' field
			 */

			for (p2 = p;;)
			{
				switch (n = *p2++)
				{
				case 0:
					break;
				case '{':
					if (!qe)
						qe = '}';
					if (qe == '}')
						qn++;
					continue;
				case '(':
					if (!qe)
						qe = ')';
					if (qe == ')')
						qn++;
					continue;
				case '[':
					if (!qe)
						qe = ']';
					if (qe == ']')
						qn++;
					continue;
				case '}':
				case ')':
				case ']':
					if (qe == n && qn > 0)
						qn--;
					continue;
				case '"':
				case '\'':
					if (!qe)
						qe = n;
					else if (qe == n)
						qe = 0;
					continue;
				case '\\':
					if (*p2)
						p2++;
					continue;
				default:
					if (!qe && isspace(n))
						break;
					continue;
				}
				if (n)
					*(p2 - 1) = 0;
				else
					p2--;
				break;
			}
		}
		lge = 0;
		if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
			ep->op = '=';
		else
		{
			if (*p == '&')
			{
				ep->mask = strton(++p, &next, NiL, 0);
				p = next;
			}
			switch (*p)
			{
			case '=':
			case '>':
			case '<':
			case '*':
				ep->op = *p++;
				if (*p == '=')
				{
					p++;
					switch (ep->op)
					{
					case '>':
						lge = -1;
						break;
					case '<':
						lge = 1;
						break;
					}
				}
				break;
			case '!':
			case '@':
				ep->op = *p++;
				if (*p == '=')
					p++;
				break;
			case 'x':
				p++;
				ep->op = '*';
				break;
			default:
				ep->op = '=';
				if (ep->mask)
					ep->value.num = ep->mask;
				break;
			}
		}
		if (ep->op != '*' && !ep->value.num)
		{
			if (ep->type == 'e')
			{
				if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0))
				{
					ep->value.sub->re_disc = &mp->redisc;
					if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE)))
					{
						p += ep->value.sub->re_npat;
						if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0)))
							p += ep->value.sub->re_npat;
					}
					if (n)
					{
						regmessage(mp, ep->value.sub, n);
						ep->value.sub = 0;
					}
					else if (*p && mp->disc->errorf)
						(*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p);
				}
			}
			else if (ep->type == 'm')
			{
				ep->mask = stresc(p) + 1;
				ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0);
				memcpy(ep->value.str, p, ep->mask);
				if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)"))
					ep->value.str[ep->mask - 1] = '*';
			}
			else if (ep->type == 's')
			{
				ep->mask = stresc(p);
				ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0);
				memcpy(ep->value.str, p, ep->mask);
			}
			else if (*p == '\'')
			{
				stresc(p);
				ep->value.num = *(unsigned char*)(p + 1) + lge;
			}
			else if (strmatch(p, "+([a-z])\\(*\\)"))
			{
				char*	t;

				t = p;
				ep->type = 'V';
				ep->op = *p;
				while (*p && *p++ != '(');
				switch (ep->op)
				{
				case 'l':
					n = *p++;
					if (n < 'a' || n > 'z')
					{
						if (mp->disc->errorf)
							(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
					}
					else if (!fun[n -= 'a'])
					{
						if (mp->disc->errorf)
							(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
					}
					else
					{
						ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0);
						ep->value.loop->lab = fun[n];
						while (*p && *p++ != ',');
						ep->value.loop->start = strton(p, &t, NiL, 0);
						while (*t && *t++ != ',');
						ep->value.loop->size = strton(t, &t, NiL, 0);
					}
					break;
				case 'm':
				case 'r':
					ep->desc = vmnewof(mp->vm, 0, char, 32, 0);
					ep->mime = vmnewof(mp->vm, 0, char, 32, 0);
					break;
				case 'v':
					break;
				default:
					if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
						(*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t);
					break;
				}
			}
			else
			{
				ep->value.num = strton(p, NiL, NiL, 0) + lge;
				if (ep->op == '@')
					ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num));
			}
		}

		/*
		 * file description
		 */

		if (p2)
		{
			for (; isspace(*p2); p2++);
			if (p = strchr(p2, '\t'))
			{
				/*
				 * check for message catalog index
				 */

				*p++ = 0;
				if (isalpha(*p2))
				{
					for (p3 = p2; isalnum(*p3); p3++);
					if (*p3++ == ':')
					{
						for (; isdigit(*p3); p3++);
						if (!*p3)
						{
							for (p2 = p; isspace(*p2); p2++);
							if (p = strchr(p2, '\t'))
								*p++ = 0;
						}
					}
				}
			}
			stresc(p2);
			ep->desc = vmstrdup(mp->vm, p2);
			if (p)
			{
				for (; isspace(*p); p++);
				if (*p)
					ep->mime = vmstrdup(mp->vm, p);
			}
		}
		else
			ep->desc = "";

		/*
		 * get next entry
		 */

		last = ep;
		ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
	}
	if (last)
	{
		last->next = 0;
		if (mp->magiclast)
			mp->magiclast->next = first;
		else
			mp->magic = first;
		mp->magiclast = last;
	}
	vmfree(mp->vm, ep);
	if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
	{
		if (lev < 0)
			(*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators");
		else if (lev > 0)
			(*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators");
		if (ret)
			(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
	}
	error_info.file = 0;
	error_info.line = 0;
	return 0;
}

/*
 * load a magic file into mp
 */

int
magicload(register Magic_t* mp, const char* file, unsigned long flags)
{
	register char*		s;
	register char*		e;
	register char*		t;
	int			n;
	int			found;
	int			list;
	Sfio_t*			fp;

	mp->flags = mp->disc->flags | flags;
	found = 0;
	if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1))
	{
		if (!(s = getenv(MAGIC_FILE_ENV)) || !*s)
			s = MAGIC_FILE;
	}
	for (;;)
	{
		if (!list)
			e = 0;
		else if (e = strchr(s, ':'))
		{
			/*
			 * ok, so ~ won't work for the last list element
			 * we do it for MAGIC_FILES_ENV anyway
			 */

			if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME")))
			{
				sfputr(mp->tmp, t, -1);
				s += n - 1;
			}
			sfwrite(mp->tmp, s, e - s);
			if (!(s = sfstruse(mp->tmp)))
				goto nospace;
		}
		if (!*s || streq(s, "-"))
			s = MAGIC_FILE;
		if (!(fp = sfopen(NiL, s, "r")))
		{
			if (list)
			{
				if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf))) && !strchr(s, '/'))
				{
					strcpy(mp->fbuf, s);
					sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf);
					if (!(s = sfstruse(mp->tmp)))
						goto nospace;
					if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf))))
						goto next;
				}
				if (!(fp = sfopen(NiL, t, "r")))
					goto next;
			}
			else
			{
				if (mp->disc->errorf)
					(*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s);
				return -1;
			}
		}
		found = 1;
		n = load(mp, s, fp);
		sfclose(fp);
		if (n && !list)
			return -1;
	next:
		if (!e)
			break;
		s = e + 1;
	}
	if (!found)
	{
		if (mp->flags & MAGIC_VERBOSE)
		{
			if (mp->disc->errorf)
				(*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file");
		}
		return -1;
	}
	return 0;
 nospace:
	if (mp->disc->errorf)
		(*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
	return -1;
}

/*
 * open a magic session
 */

Magic_t*
magicopen(Magicdisc_t* disc)
{
	register Magic_t*	mp;
	register int		i;
	register int		n;
	register int		f;
	register int		c;
	register Vmalloc_t*	vm;
	unsigned char*		map[CC_MAPS + 1];

	if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
		return 0;
	if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0)))
	{
		vmclose(vm);
		return 0;
	}
	mp->id = lib;
	mp->disc = disc;
	mp->vm = vm;
	mp->flags = disc->flags;
	mp->redisc.re_version = REG_VERSION;
	mp->redisc.re_flags = REG_NOFREE;
	mp->redisc.re_errorf = (regerror_t)disc->errorf;
	mp->redisc.re_resizef = (regresize_t)vmgetmem;
	mp->redisc.re_resizehandle = (void*)mp->vm;
	mp->dtdisc.key = offsetof(Info_t, name);
	mp->dtdisc.link = offsetof(Info_t, link);
	if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dtoset)))
		goto bad;
	for (n = 0; n < elementsof(info); n++)
		dtinsert(mp->infotab, &info[n]);
	for (i = 0; i < CC_MAPS; i++)
		map[i] = ccmap(i, CC_ASCII);
	mp->x2n = ccmap(CC_ALIEN, CC_NATIVE);
	for (n = 0; n <= UCHAR_MAX; n++)
	{
		f = 0;
		i = CC_MAPS;
		while (--i >= 0)
		{
			c = ccmapchr(map[i], n);
			f = (f << CC_BIT) | CCTYPE(c);
		}
		mp->cctype[n] = f;
	}
	return mp;
 bad:
	magicclose(mp);
	return 0;
}

/*
 * close a magicopen() session
 */

int
magicclose(register Magic_t* mp)
{
	if (!mp)
		return -1;
	if (mp->tmp)
		sfstrclose(mp->tmp);
	if (mp->vm)
		vmclose(mp->vm);
	return 0;
}

/*
 * return the magic string for file with optional stat info st
 */

char*
magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st)
{
	off_t	off;
	char*	s;

	mp->flags = mp->disc->flags;
	mp->mime = 0;
	if (!st)
		s = T("cannot stat");
	else
	{
		if (mp->fp = fp)
			off = sfseek(mp->fp, (off_t)0, SEEK_CUR);
		s = type(mp, file, st, mp->tbuf, &mp->tbuf[sizeof(mp->tbuf)-1]);
		if (mp->fp)
			sfseek(mp->fp, off, SEEK_SET);
		if (!(mp->flags & (MAGIC_MIME|MAGIC_ALL)))
		{
			if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128))
				sfprintf(mp->tmp, "%s ", T("short"));
			sfprintf(mp->tmp, "%s", s);
			if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)))
				sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable"));
			if (st->st_mode & S_ISUID)
				sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid));
			if (st->st_mode & S_ISGID)
				sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid));
			if (st->st_mode & S_ISVTX)
				sfprintf(mp->tmp, ", sticky");
			if (!(s = sfstruse(mp->tmp)))
				s = T("out of space");
		}
	}
	if (mp->flags & MAGIC_MIME)
		s = mp->mime;
	if (!s)
		s = T("error");
	return s;
}

/*
 * list the magic table in mp on sp
 */

int
magiclist(register Magic_t* mp, register Sfio_t* sp)
{
	register Entry_t*	ep = mp->magic;
	register Entry_t*	rp = 0;

	mp->flags = mp->disc->flags;
	sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n");
	while (ep)
	{
		sfprintf(sp, "%c %c\t", ep->cont, ep->nest);
		if (ep->expr)
			sfprintf(sp, "%s", ep->expr);
		else
			sfprintf(sp, "%ld", ep->offset);
		sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask);
		switch (ep->type)
		{
		case 'm':
		case 's':
			sfputr(sp, fmtesc(ep->value.str), -1);
			break;
		case 'V':
			switch (ep->op)
			{
			case 'l':
				sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset);
				break;
			case 'v':
				sfprintf(sp, "vcodex()");
				break;
			default:
				sfprintf(sp, "%p", ep->value.str);
				break;
			}
			break;
		default:
			sfprintf(sp, "%lo", ep->value.num);
			break;
		}
		sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc));
		if (ep->cont == '$' && !ep->value.lab->mask)
		{
			rp = ep;
			ep = ep->value.lab;
		}
		else
		{
			if (ep->cont == ':')
			{
				ep = rp;
				ep->value.lab->mask = 1;
			}
			ep = ep->next;
		}
	}
	return 0;
}