%{
/*
 * Copyright (c) 2009, 2010 Nhat Minh Lê
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/* LINTLIBRARY */

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <regxml/buffer.h>
#include <regxml/regxml.h>

#define DEFAULTTEXTSIZE 1024

void regxml_yyerror(const char *);
int regxml_yylex(void);

static int error;
static const char *subject;
static struct regxml_node *pattern;
static regxml_id_t psize, ncapture = 1;
static int autoentitize, reflags;
static char endc;
static struct regxml_buffer buffer;

static int new(regxml_op_t, struct regxml_node *, struct regxml_node *,
    struct regxml_node **);
static int newstr(regxml_op_t, char *, struct regxml_node **);

static struct regxml_node *newop(regxml_op_t,
    struct regxml_node *, struct regxml_node *);
static struct regxml_node *newnext(struct regxml_node *);
static struct regxml_node *newsibling(struct regxml_node *);
static struct regxml_node *newchild(struct regxml_node *);
static struct regxml_node *newdesc(struct regxml_node *);

static int mkreval(union regxml_value *);

static int addc(char);

static void resetids(struct regxml_node *);
static void assignids(int, struct regxml_node *);
static int childcycle(struct regxml_node *, struct regxml_node *);
static void resolve(struct regxml_node *[], struct regxml_node *);
static regxml_id_t marknullable(struct regxml_node *);
static int resolvenullable(struct regxml_node *);
static int mkthen(struct regxml_node *);
static int mkthennullable1(struct regxml_node *);
static int mkthennullable2(struct regxml_node *);
static int mkthennullable3(struct regxml_node *);
static void freenode(struct regxml_node *);
static void freepattern(struct regxml_node *);
%}

%union {
	struct regxml_node *node;
	char *str;
}

%token <str> T_STRING
%token T_NODE T_TEXT T_COMMENT T_PI T_ATTRIBUTE T_NAME T_VALUE T_RELPOS
%right '|'
%right T_AND T_OR
%right '/' '%' T_SLSHSLSH T_PRCTPRCT
%left '['
%left '=' T_NE '~' T_NR
%left '<' '>' T_LE T_GE
%left '+' '-'
%left '*' T_DIV T_MOD

%type <node> pattern constant

%%

start:	pattern			{ pattern = $1; }

pattern:
	'(' pattern ')'		{ $$ = $2; }
	| T_NODE '(' ')'	{
		$$ = newop(REGXML_FNODE, NULL, NULL);
	}
	| T_TEXT '(' ')'	{
		$$ = newop(REGXML_FTEXT, NULL, NULL);
	}
	| T_COMMENT '(' ')'	{
		$$ = newop(REGXML_FCOMMENT, NULL, NULL);
	}
	| T_PI '(' constant ')'	{
		$$ = newop(REGXML_FPI, $3, NULL);
	}
	| '@' constant		{
		$$ = newop(REGXML_FATTR, $2, NULL);
	}
	| T_ATTRIBUTE '(' constant ')' {
		$$ = newop(REGXML_FATTR, $3, NULL);
	}
	| T_NAME '(' ')'	{
		$$ = newop(REGXML_FNAME, NULL, NULL);
	}
	| T_VALUE '(' ')'	{
		$$ = newop(REGXML_FVALUE, NULL, NULL);
	}
	| T_RELPOS '(' ')'	{
		$$ = newop(REGXML_FRELPOS, NULL, NULL);
	}
	| constant		{ $$ = $1; }
	| pattern '+' pattern	{
		$$ = newop(REGXML_ADD, $1, $3);
	}
	| pattern '-' pattern	{
		$$ = newop(REGXML_SUB, $1, $3);
	}
	| pattern '*' pattern	{
		$$ = newop(REGXML_MUL, $1, $3);
	}
	| pattern T_DIV pattern	{
		$$ = newop(REGXML_DIV, $1, $3);
	}
	| pattern T_MOD pattern	{
		$$ = newop(REGXML_MOD, $1, $3);
	}
	| pattern '<' pattern	{
		$$ = newop(REGXML_LT, $1, $3);
	}
	| pattern '>' pattern	{
		$$ = newop(REGXML_GT, $1, $3);
	}
	| pattern T_LE pattern	{
		$$ = newop(REGXML_LE, $1, $3);
	}
	| pattern T_GE pattern	{
		$$ = newop(REGXML_GE, $1, $3);
	}
	| pattern '=' pattern	{
		$$ = newop(REGXML_EQ, $1, $3);
	}
	| pattern T_NE pattern	{
		$$ = newop(REGXML_NE, $1, $3);
	}
	| pattern '~' pattern	{
		$$ = NULL;
		if (error == 0 && $3->rxn_type == REGXML_CONSTANT) {
			if (mkreval(&$3->rxn_value) != 0) {
				freepattern($1);
				freepattern($3);
				error = REGXML_ESYSTEM;
			}
		}
		$$ = newop(REGXML_RE, $1, $3);
	}
	| pattern T_NR pattern	{
		$$ = NULL;
		if (error == 0 && $3->rxn_type == REGXML_CONSTANT) {
			if (mkreval(&$3->rxn_value) != 0) {
				freepattern($1);
				freepattern($3);
				error = REGXML_ESYSTEM;
			}
		}
		$$ = newop(REGXML_NR, $1, $3);
	}
	| '{' pattern '}'	{
		$$ = newop(REGXML_CAPTURE, $2, NULL);
	}
	| '*'			{
		$$ = newop(REGXML_ANY, NULL, NULL);
	}
	| pattern '[' pattern ']' {
		$$ = newop(REGXML_THEN, $3, $1);
	}
	| pattern '/' pattern	{
		$$ = newop(REGXML_THENX, $1, newchild($3));
	}
	| pattern '%' pattern	{
		$$ = newop(REGXML_THENX, $1, newnext($3));
	}
	| pattern T_SLSHSLSH pattern {
		$$ = newop(REGXML_THENX, $1, newdesc($3));
	}
	| pattern T_PRCTPRCT pattern {
		$$ = newop(REGXML_THENX, $1, newsibling($3));
	}
	| pattern T_AND pattern	{
		$$ = newop(REGXML_THEN, $1, $3);
	}
	| pattern T_OR pattern	{
		$$ = newop(REGXML_ELSE, $1, $3);
	}
	| '/' pattern		{
		$$ = newop(REGXML_THEN,
		    newop(REGXML_SUBROOT, NULL, NULL), $2);
	}
	| '<' pattern		{
		$$ = newop(REGXML_THEN,
		    newop(REGXML_RELPOS1, NULL, NULL), $2);
	}
	| pattern '>'		{
		$$ = newop(REGXML_THEN,
		    newop(REGXML_RELPOSN, NULL, NULL), $1);
	}
	| T_SLSHSLSH pattern	{ $$ = $2; }
	| pattern '?'		{
		$$ = $1;
		$$->rxn_nullable = 1;
	}
	| pattern '|' pattern	{
		$$ = newop(REGXML_OR, $1, $3);
	}
	;

constant:
	T_STRING		{
		$$ = NULL;
		if (error == 0 && newstr(REGXML_CONSTANT, $1, &$$) != 0)
			error = REGXML_ESYSTEM;
	}
	;

%%

static const struct {
	const char *name;
	int token;
} keywords[] = {
	{ "and", T_AND },
	{ "attribute", T_ATTRIBUTE },
	{ "comment", T_COMMENT },
	{ "div", T_DIV },
	{ "mod", T_MOD },
	{ "name", T_NAME },
	{ "node", T_NODE },
	{ "or", T_OR },
	{ "processing-instruction", T_PI },
	{ "relative-position", T_RELPOS },
	{ "text", T_TEXT },
	{ "value", T_VALUE },
};

static int
new(regxml_op_t type, struct regxml_node *left, struct regxml_node *right,
    struct regxml_node **ptr)
{
	struct regxml_node *node;

	node = malloc(sizeof *node);
	if (node == NULL)
		return -1;

	node->rxn_id = -1;
	node->rxn_mid = 0;
	node->rxn_type = type;
	node->rxn_left = left;
	node->rxn_right = right;
	node->rxn_nullable = 0;
	node->rxn_value.rxv_unit._type = REGXML_UNIT;

	*ptr = node;

	return 0;
}

static int
newstr(regxml_op_t type, char *s, struct regxml_node **ptr)
{
	struct regxml_node *node;

	node = malloc(sizeof *node);
	if (node == NULL)
		return -1;

	node->rxn_id = -1;
	node->rxn_mid = 0;
	node->rxn_type = type;
	node->rxn_left = NULL;
	node->rxn_right = NULL;
	node->rxn_nullable = 0;

	node->rxn_value.rxv_string._type = REGXML_STRING;
	node->rxn_value.rxv_string._shared = 0;
	node->rxn_value.rxv_string._data = s;
	node->rxn_value.rxv_string._len = strlen(s);

	*ptr = node;

	return 0;
}

static struct regxml_node *
newop(regxml_op_t op, struct regxml_node *left, struct regxml_node *right)
{
	struct regxml_node *node;

	node = NULL;
	if (error != 0)
		goto bad;
	if (new(op, left, right, &node) != 0)
		goto bad;
	return node;

bad:
	freepattern(left);
	freepattern(right);
	freenode(node);
	error = REGXML_ESYSTEM;
	return NULL;
}

static struct regxml_node *
newnext(struct regxml_node *left)
{
	struct regxml_node *node;

	node = NULL;
	if (error != 0)
		goto bad;
	if (new(REGXML_NEXT, left, NULL, &node) != 0)
		goto bad;
	return node;

bad:
	freepattern(left);
	freenode(node);
	error = REGXML_ESYSTEM;
	return NULL;
}

/*
 * NEXT<1>(OR(left, <1>))
 */
static struct regxml_node *
newsibling(struct regxml_node *left)
{
	struct regxml_node *node, *tmp;

	node = tmp = NULL;
	if (error != 0)
		goto bad;
	if (new(REGXML_OR, left, NULL, &tmp) != 0)
		goto bad;
	if (new(REGXML_NEXT, tmp, NULL, &node) != 0)
		goto bad;
	tmp->rxn_right = node;
	return node;

bad:
	freepattern(left);
	freenode(tmp);
	freenode(node);
	error = REGXML_ESYSTEM;
	return NULL;
}

/*
 * CHILD0(OR<1>(left, NEXT(<1>)))
 */
static struct regxml_node *
newchild(struct regxml_node *left)
{
	struct regxml_node *node, *tmp, *tmp1;

	node = tmp = tmp1 = NULL;
	if (error != 0)
		goto bad;
	if (new(REGXML_OR, left, NULL, &tmp) != 0)
		goto bad;
	if (new(REGXML_NEXT, tmp, NULL, &tmp1) != 0)
		goto bad;
	tmp->rxn_right = tmp1;
	if (new(REGXML_CHILD0, tmp, NULL, &node) != 0)
		goto bad;
	return node;

bad:
	freepattern(left);
	freenode(tmp1);
	freenode(tmp);
	freenode(node);
	error = REGXML_ESYSTEM;
	return NULL;
}

/*
 * CHILD0<1>(OR<2>(OR(left, <1>), NEXT(<2>)))
 */
static struct regxml_node *
newdesc(struct regxml_node *left)
{
	struct regxml_node *node, *tmp;

	node = tmp = NULL;
	if (error != 0)
		goto bad;
	if (new(REGXML_OR, left, NULL, &tmp) != 0)
		goto bad;
	node = newchild(tmp);
	if (node == NULL)
		return NULL;
	tmp->rxn_right = node;
	return node;

bad:
	freepattern(left);
	freenode(tmp);
	freenode(node);
	error = REGXML_ESYSTEM;
	return NULL;
}

static int
mkreval(union regxml_value *val)
{
	char *s;

	_DIAGASSERT(val->rxv_unit._type == REGXML_STRING);
	s = val->rxv_string._data;

	val->rxv_regex._type = REGXML_REGEX;
	val->rxv_regex._data = malloc(sizeof *val->rxv_regex._data);
	if (val->rxv_regex._data == NULL) {
		error = REGXML_ESYSTEM;
		return -1;
	}
	if (regcomp(val->rxv_regex._data, s, reflags|REG_EXTENDED) != 0) {
		error = REGXML_EREGEX;
		return -1;
	}
	free(s);
	return 0;
}

/* ARGSUSED */
void
regxml_yyerror(const char *s)
{
	/* XXX */
}

static int
addc(char c)
{
	int r;

	if (!autoentitize)
		goto plain;
	switch (c) {
	case '\'':
		r = regxml_buffer_append(&buffer, "&apos;");
		break;
	case '"':
		r = regxml_buffer_append(&buffer, "&quot;");
		break;
	case '<':
		r = regxml_buffer_append(&buffer, "&lt;");
		break;
	case '>':
		r = regxml_buffer_append(&buffer, "&gt;");
		break;
	default:
	plain:
		r = regxml_buffer_appendraw(&buffer, &c, 1);
	}
	if (r != 0)
		return -1;
	return 0;
}

int
regxml_yylex(void)
{
	size_t i;
	char quote;

re:
	switch (*subject) {
	case '\0':
		return 0;

	case ' ':
	case '\t':
	case '\r':
	case '\n':
	case '\f':
	case '\v':
		++subject;
		goto re;

	case '!':
		switch (*++subject) {
		case '=':
			++subject;
			return T_NE;
		case '~':
			++subject;
			return T_NR;
		default:
			goto bad;
		}

	case '<':
		if (*++subject == '=') {
			++subject;
			return T_LE;
		} else
			return '<';

	case '>':
		if (*++subject == '=') {
			++subject;
			return T_GE;
		} else
			return '>';

	case '/':
		if (*++subject == '/') {
			++subject;
			return T_SLSHSLSH;
		} else
			return '/';

	case '%':
		if (*++subject == '%') {
			++subject;
			return T_PRCTPRCT;
		} else
			return '%';

	case '(':
	case ')':
	case '@':
	case '+':
	case '-':
	case '=':
	case '~':
	case '{':
	case '}':
	case '*':
	case '[':
	case ']':
	case '?':
	case '|':
		return *subject++;

	case '\'':
	case '"':
		buffer.rxb_base[buffer.rxb_index = 0] = '\0';
		quote = *subject;
		while (*++subject != quote) {
			switch (*subject) {
			case '\0':
				goto bad;
			case '\\':
				if (addc(*++subject) != 0)
					return 0;
				break;
			default:
				if (addc(*subject) != 0)
					return 0;
			}
		}
		++subject;
		if (regxml_buffer_strdup(&buffer, &regxml_yylval.str) != 0) {
			error = REGXML_ESYSTEM;
			return 0;
		}
		return T_STRING;

	default:
		if (*subject == endc) {
			++subject;
			return 0;
		}

		buffer.rxb_base[buffer.rxb_index = 0] = '\0';
		do {
			if (addc(*subject++) != 0)
				return 0;
		} while (*subject != endc &&
		    strchr(" \t\r\n\f\v!<>/%()@+-=~{}*[]?|'\"",
			*subject) == NULL);

		for (i = 0; i < __arraycount(keywords); ++i) {
			if (strcmp(keywords[i].name, buffer.rxb_base) == 0)
				return keywords[i].token;
		}

		if (regxml_buffer_strdup(&buffer, &regxml_yylval.str) != 0) {
			error = REGXML_ESYSTEM;
			return 0;
		}
		return T_STRING;
	}

bad:
	regxml_yyerror("bad token");
	return 0;
}

/*
 * Reset IDs of all nodes rooted at node to -1. The nodes must have
 * IDs different from -1 before calling this function (not counting
 * internal recursive calls).
 */
static void
resetids(struct regxml_node *node)
{
	if (node == NULL || node->rxn_id == (regxml_id_t)-1)
		return;

	node->rxn_id = -1;
	resetids(node->rxn_left);
	resetids(node->rxn_right);
}

/*
 * Assign IDs to nodes rooted at node in a pre-order fashion (parent
 * has lower ID than children). In case of a cycle, the first node
 * seen gets the lower ID. Cycles can be detected by testing that the
 * ID of a child is higher than the ID of its parent.
 */
static void
assignids(int exprctx, struct regxml_node *node)
{
	if (node == NULL || node->rxn_id != (regxml_id_t)-1)
		return;

	if (node->rxn_type > REGXML_CONSTANT)
		exprctx = 0;
	if (!exprctx)
		node->rxn_id = psize++;

	if (node->rxn_type <= REGXML_CONSTANT)
		exprctx = 1;
	assignids(exprctx, node->rxn_left);
	assignids(exprctx, node->rxn_right);
}

/*
 * Return non-zero if child is an ancestor of node. This cycle
 * detection relies on the above grammar and the way IDs are assigned.
 */
static int
childcycle(struct regxml_node *node, struct regxml_node *child)
{
	return node->rxn_id != (regxml_id_t)-1 &&
	    child->rxn_id != (regxml_id_t)-1 &&
	    child->rxn_id <= node->rxn_id;
}

/*
 * Fill pvect with pointers to nodes rooted at node, according to
 * their IDs.
 */
static void
resolve(struct regxml_node *pvect[], struct regxml_node *node)
{
	if (node == NULL)
		return;

	if (node->rxn_id != (regxml_id_t)-1)
		pvect[node->rxn_id] = node;
	if (node->rxn_type == REGXML_CAPTURE)
		node->rxn_mid = ncapture++;

	if (node->rxn_left != NULL && !childcycle(node, node->rxn_left))
		resolve(pvect, node->rxn_left);
	if (node->rxn_right != NULL && !childcycle(node, node->rxn_right))
		resolve(pvect, node->rxn_right);
}

/*
 * Mark nullable nodes in one pass and return the number of new
 * nullable nodes found.
 */
static regxml_id_t
marknullable(struct regxml_node *node)
{
	regxml_id_t n;

	/* We mark visited nodes through their rxn_id. */
	if (node == NULL || node->rxn_id == 1)
		return 0;
	node->rxn_id = 1;

	n = 0;
	if (node->rxn_left != NULL)
		n += marknullable(node->rxn_left);
	if (node->rxn_right != NULL)
		n += marknullable(node->rxn_right);

	if (node->rxn_nullable)
		return n;

	switch (node->rxn_type) {
	default:
		break;
	case REGXML_CAPTURE:
	case REGXML_CHILD0:
	case REGXML_NEXT:
		if (node->rxn_left->rxn_nullable) {
			node->rxn_nullable = 1;
			++n;
		}
		break;
	case REGXML_THEN:
		if (node->rxn_right->rxn_nullable) {
			node->rxn_nullable = 1;
			++n;
		}
		break;
	case REGXML_THENX:
		if (node->rxn_left->rxn_nullable &&
		    node->rxn_right->rxn_nullable) {
			node->rxn_nullable = 1;
			++n;
		}
		break;
	case REGXML_ELSE:
		if (node->rxn_left->rxn_nullable) {
			node->rxn_nullable = 1;
			++n;
		}
		break;
	case REGXML_OR:
		if (node->rxn_left->rxn_nullable ||
		    node->rxn_right->rxn_nullable) {
			node->rxn_nullable = 1;
			++n;
		}
	}

	return n;
}

/*
 * Expand nodes into the appropriate non-nullable forms, according to
 * the nullability of their arguments. The resulting pattern tree is
 * an equivalent tree that no longer depends on the rxn_nullable
 * field.
 */
static int
resolvenullable(struct regxml_node *node)
{
	/* We mark visited nodes through their rxn_id. */
	if (node == NULL || node->rxn_id == 1)
		return 0;
	node->rxn_id = 1;

	switch (node->rxn_type) {
	case REGXML_THEN:
		if (node->rxn_left->rxn_nullable)
			node->rxn_type = REGXML_SELFR;
		break;
	case REGXML_ELSE:
		if (node->rxn_left->rxn_nullable)
			node->rxn_type = REGXML_SELFL;
		break;
	case REGXML_THENX:
		switch (node->rxn_left->rxn_nullable |
		    node->rxn_right->rxn_nullable << 1) {
		case 0x0:
			if (mkthen(node) != 0)
				return -1;
			break;
		case 0x1:
			if (mkthennullable1(node) != 0)
				return -1;
			break;
		case 0x2:
			if (mkthennullable2(node) != 0)
				return -1;
			break;
		case 0x3:
			if (mkthennullable3(node) != 0)
				return -1;
			break;
		}
		break;
	default:
		break;
	}

	if (node->rxn_left != NULL) {
		if (resolvenullable(node->rxn_left) != 0)
			return -1;
	}
	if (node->rxn_right != NULL) {
		if (resolvenullable(node->rxn_right) != 0)
			return -1;
	}

	return 0;
}

/*
 * THENX(a, X(b)) -> THEN(a, X(b))
 */
static int
mkthen(struct regxml_node *node)
{
	node->rxn_type = REGXML_THEN;
	return 0;
}

/*
 * THENX(a [nullable], X(b)) -> OR(THEN(a, X(b)), b)
 */
static int
mkthennullable1(struct regxml_node *node)
{
	struct regxml_node *then;

	if (new(REGXML_THEN, node->rxn_left, node->rxn_right, &then) != 0)
		return -1;
	node->rxn_type = REGXML_OR;
	node->rxn_left = then;
	node->rxn_right = node->rxn_right->rxn_left;

	return 0;
}

/*
 * THENX(a, X(b [nullable])) -> OR(a, THEN(a, X(b)))
 */
static int
mkthennullable2(struct regxml_node *node)
{
	struct regxml_node *then;

	if (new(REGXML_THEN, node->rxn_left, node->rxn_right, &then) != 0)
		return -1;
	node->rxn_type = REGXML_OR;
	node->rxn_right = then;

	return 0;
}

/*
 * THENX(a [nullable], X(b [nullable])) -> OR(OR(a, THEN(a, X(b))), b)
 */
static int
mkthennullable3(struct regxml_node *node)
{
	struct regxml_node *or, *then;

	if (new(REGXML_THEN, node->rxn_left, node->rxn_right, &then) != 0)
		return -1;
	if (new(REGXML_OR, node->rxn_left, then, &or) != 0) {
		free(then);
		return -1;
	}
	node->rxn_type = REGXML_OR;
	node->rxn_left = or;
	node->rxn_right = node->rxn_right->rxn_left;

	return 0;
}

static void
freenode(struct regxml_node *node)
{
	if (node == NULL)
		return;
	if (node->rxn_value.rxv_unit._type == REGXML_STRING)
		free(node->rxn_value.rxv_string._data);
	free(node);
}

static void
freepattern(struct regxml_node *node)
{
	if (node == NULL)
		return;

	if (node->rxn_left != NULL && !childcycle(node, node->rxn_left))
		freepattern(node->rxn_left);
	if (node->rxn_right != NULL && !childcycle(node, node->rxn_right))
		freepattern(node->rxn_right);

	freenode(node);
}

int
regxml_pattern_comp(struct regxml *reg, const char *s,
    char **endptr, int flags)
{
	int r;

	error = 0;
	subject = s;
	autoentitize = flags & REGXML_ENTITIZE;
	reflags = flags & REGXML_ICASE ? REG_ICASE : 0;
	endc = flags & REGXML_PDELIM ? *subject++ : '\0';

	r = regxml_buffer_init(&buffer, DEFAULTTEXTSIZE);
	if (r != 0)
		return r;

	if (regxml_yyparse() != 0) {
		error = REGXML_ESYNTAX;
		goto bad;
	}
	if (error != 0)
		goto bad;

	while (marknullable(pattern) != 0)
		resetids(pattern);

	resetids(pattern);
	r = resolvenullable(pattern);
	if (r != 0)
		goto bad;

	psize = 0;
	resetids(pattern);
	assignids(0, pattern);

	reg->rx_pvect = malloc(psize * sizeof *reg->rx_pvect);
	if (reg->rx_pvect == NULL) {
		error = REGXML_ESYSTEM;
		goto bad;
	}
	resolve(reg->rx_pvect, pattern);

	reg->rx_pattern = pattern;
	reg->rx_psize = psize;
	reg->rx_msize = 2 * ncapture;

	if (endptr != NULL)
		*endptr = __UNCONST(subject);
	return 0;

bad:
	freepattern(pattern);
	regxml_buffer_free(&buffer);
	return error;
}

void
regxml_pattern_free(struct regxml *reg)
{
	freepattern(reg->rx_pattern);
	free(reg->rx_pvect);
}
