#include <stdlib.h>
#include <stdio.h>
#include "re.h"

/******************************************************************
 * regexp parsing functions
 *
 * This section is used to parse the regexp and create a tree that is
 * then used to look for the match. To support other RE syntaxes, it
 * is simple to replace the parser with one written for the desired
 * syntax, as long as the form of the parse tree is the same.
 *
 * The tree is composed of nodes. Nodes hold repeat count, and type
 * information, and other data specific to the particular type. They
 * are linked in chains which must be traversed to complete the
 * match. There are currently 6 kinds of nodes:
 *
 * - STRING nodes: these are literal strings that must match (though
 *     they also recognize special characters like '.')
 *
 * - MEMBER nodes: A character must match one of the members of the string
 *
 * - RANGE nodes: encode a range as used in OR nodes, ie [a-z]
 *
 * - AND nodes: forks the chain to insert a subchain that also needs
 *     to be executed
 *
 * - OR nodes: similar to AND, forks a chain, but only one of the
 *     subchain needs to match to succeed.
 *
 * - SUCCESS node: appended to the topmost chain, if it is reached 
 *     then the search has succeeded
 *
 * In addition, there are 2 modification bits that can be set for each node
 * type:
 * 
 * - QUOTE bit: matches for this node should be returned in a result structure
 *
 * - OR_CHILD bit: The parent of this node is an OR, when it succeeds
 *  it should not do linked nodes, but instead jump to the parent
 *
 * As an example, the tree for "abc*d\(efg\[hij\(lm\)\]?)+xyz" would
 * look like:
 *
 *  "AND - SUCCESS
 *   |
 *  ab - c{0, -1} - d - AND{1, -1} - xyz
 *                             |
 *                            efg - OR{0, 1}
 *                                     |
 *                                    hij - AND
 *                                             |
 *                                            lm
 *
 *      RE SYNTAX
 *
 * One of the advantages of this organization is it is easy to change
 * the syntax. This describes the syntax used here, by linking in a
 * different parser a different parser could be used.
 *
 * The syntax used here is more similar to standard (elisp or unix)
 * REs, but not identical - there are a few things I don't think are
 * done the best way which I changed, and some features are not
 * implemented.
 *
 * I tried to be more consistent in defining which characters are
 * escaped. The original thought was to escape all special characters,
 * and have all non-escaped characters match themselves. I still like
 * this, but compromised on the following: all iteration modifiers
 * ('*', '+', '?', '{') do not require escaping, everything else does.
 * This makes it easier to remember, only one type of character needs
 * escaping (and I considered replacing "*" with "\"*")
 *
 * The repeat instructions *, +, and ? function just like expected:
 * they apply to the unit they follow. In addition there is a counter
 * {a,b} which requires the number of matches to be >= a and <= b. If
 * b is not specified then it just requires there be >= a matches.
 *
 * I have made the [] construction into a more general OR than
 * standard. While "\[a-z12\]" will still match any small letter or the
 * digits 1 or 2, the RE "\[a-m\(no\)p\]" will match any character
 * from a to m, the string "no", or the character 'p'. However, if
 * '\~' is used to negate the OR only strings can be used. (it is easy
 * to provide the functionality, but it is not clear how to work it -
 * for instance, how many characters would match '\[~a-z\(123\)\]"?)
 *
 * Finally, in elisp the "\(\)" construction serves the 2 purposes of
 * grouping and recording. I have separated them by adding the '\"'
 * command, which records the following match in the results. The
 * results are represented in a nested list structure, with the top
 * level being the entire match.
 *
 * SPECIAL CHARACTERS
 * - \n : match any digit [0-9]
 * - \a : match any alphabetic [a-z] (or [A-Z] if case matching is turned off)
 * - \A : match any alphabetic [A-Z] (or [a-z] if case matching is turned off)
 * - \- : match any alphabetic [a-zA-Z]
 * - \_ : match any whitespace [ \t\n]
 * - \\ : match a single '\' char
 * - \. : match any character
 * - \~ : (at beginning of RE) anchor beginning
 * - \$ : (at end of RE) anchor end
 * - \g : (at beginning of RE) force greedy matching algorthm
 * - \G : (at beginning of RE) force lazy matching algorthm
 * - \C : require subsequent alphabetic matches have the same case
 * - \c : match alphabetic characters regardless of case
 * 
 *
 * There are 3 functions that need to be provided by this file, or by
 * its replacement. They are:
 * - parseRE: returns a parse tree or NULL for error
 * - cleanupTree: recursively frees a parse tree
 * - matchChar: tells if the current RE location matches the current string location
 *
 * In addition, there should be an array of tests for testing the 
 * searcher in case it is built standalone.
 *
 ******************************************************************/

// This assures that empty string match
static struct node empty = {N_STRING | N_STATIC, 1, 1, NULL, {"", 0}
#ifdef DEBUG
                            , 0
#endif
};

//////////////////////////////////////////////////
//
// Node manipulation
//
//////////////////////////////////////////////////

// make a new node of type TYPE
static struct node *getNewNode(VD_INT4 type) {
    struct node *node = malloc(sizeof(struct node));
    node->type = (unsigned char) type;
    node->min = node->max = 1;
    node->link = NULL;
    nextID(&node->id, type);
    return node;
}

// Returns a new STRING node
static struct node *makeDataNode(VD_UTF8 *re, VD_INT4 end, VD_INT4 string) {
    struct node *new = getNewNode((string) ? N_STRING : N_MEMBER);
    new->d.string.start = re;
    new->d.string.len = end;
    return new;
}

// returns a new LOGIC node
static struct node *makeLogicNode(VD_UINT4 command) {
    struct node *new = getNewNode(command);
    new->d.logic.root = NULL;
    return new;
}

// returns a new RANGE node
static struct node *makeRangeNode(VD_UCS4 low, VD_UCS4 high) {
    struct node *new = getNewNode(N_RANGE);     //default, reset later if OR
    new->d.range.low = low;
    new->d.range.high = high;
    return new;
}

////////////////////////////////////////////////
// 
// Special functions for setting up OR nodes
//
////////////////////////////////////////////////

// Splice a RANGE node into a STRING node. Be careful about manipulating
// the tree so that the iteration in andStringsToOrStrings continues
static VD_INT4 replaceRange(struct node *node, VD_UCS4 low, VD_INT4 plow, VD_INT4 ptr) {
    struct node *n = NULL, *link = NULL;
    VD_INT4 len = node->d.string.len;
    VD_UTF8 *start = node->d.string.start;
    VD_UCS4 high;
    if (plow) {     // if there are earlier characters leave them in the current node
        node->d.string.len = plow;
        link = node->link;
        if (!(node->link = makeDataNode(start + plow, len - plow, 0))) {
            return 0;
        }
        node = node->link;
        node->link = link;
		node->type |= N_ORCHILD;
    }
    // here there is a string node beginning with the range,
    // make it into a RANGE node
    high = VDI_nextChar(start, &ptr, len);    // get high char
    node->type = N_RANGE | N_ORCHILD;
    node->d.range.low = low;
    node->d.range.high = high;
    // if there are leftover characters splice in a STRING node
    if (ptr < len) {
        link = node->link;
        node->link = makeDataNode(start + ptr, len - ptr, 1);
        node->link->link = n;
		node->link->type |= N_ORCHILD;
    }
    return 1;
}

   
// Initially AND strings and OR strings are parsed the same. This
// subroutine converts plain strings into linked strings and ranges,
// when the '-' character ie encountered. It also marks each node
// either with the ORCHILD flag or the NOT flag
static VD_INT4 andStringsToOrStrings(struct node *node, VD_INT4 not) {
    VD_UCS4 ch, low;
    VD_INT4 ptr, p0, plow;

    while (node) {
        node->type |= N_ORCHILD;
        if (node->type & N_STRING) {
            node->type = (node->type & ~N_STRING) | N_MEMBER;
            plow = ptr = 0;
            ch = VDI_nextChar(node->d.string.start, &ptr, node->d.string.len);    // skip first char
            while (ptr < node->d.string.len) {
                low = ch;
                p0 = ptr;
                ch = VDI_nextChar(node->d.string.start, &ptr, node->d.string.len);
                if (ch == VD_RE_RANGE_CHAR) {
                    if (!replaceRange(node, low, plow, ptr)) {
                        return 0;
                    }
                    break;
                }
                plow = p0;
            }
        }
        else if ((node->type & N_LOGIC) && not) {
            return 0;
        }
        node = node->link;
    }
    return 1;
}

// Function used for \|. It wraps each side of the OR in an AND if
// there is more than one element to be matched
static struct node *makeOrChild(struct node *node) {
    struct node *and = NULL;
    if (node) {
        if (node->link) {   // if there is more than 1 in the chain enclose it in an AND
            if (!(and = makeLogicNode(N_AND))) {
                return NULL;
            }
            and->d.logic.root = node;
            node = and;
        }
        node->type |= N_ORCHILD;
    }
    return node;
}

////////////////////////////////////////////////////
//
// subroutines to set up repeat counts
//
////////////////////////////////////////////////////

// subroutine to handle multiplicity after strings: the count applies
// only to the last character (or 2 chars if it is an escape sequence)
static struct node *multAfterString(VD_UTF8 *re, VD_INT4 *pstart, VD_INT4 end, VD_INT4 prev, struct node *node) {
    if (*pstart < 0) {
        return node;
    }
    if (*pstart < prev) {
        node->link = makeDataNode(re + *pstart, prev - *pstart, 1);
        node = node->link;
    }
    if (prev < end) {
        node->link = makeDataNode(re + prev, end - prev, 1);
        node = node->link;
    }
    *pstart = -1;
    return node;
}

// common repeat code, used for * + ? {}
struct node *doRepeat(struct node *node, VD_UTF8 *re, VD_INT4 len, VD_INT4 *pptr, 
					  VD_UINT4 min, VD_UINT4 max, VD_UINT4 *pquote) {
	VD_INT4 ptr = *pptr;
    node->min = min;
    node->max = max;
    node->type |= *pquote;
    *pquote = 0;
	if (VD_Unicode_UTF8GetIncr(re, &ptr, len) == VD_RE_LAZY_MODIFIER) {
		*pptr = ptr;
		node->type |= N_LAZY;
	}
	return node;
}

/////////////////////////////////////////
//
// general parser help functions
//
/////////////////////////////////////////

// make a string node of any unprocessed characters after a special character is found
static VD_INT4 terminateString(struct node **pnode, VD_UTF8 *re, VD_INT4 *pstart, VD_INT4 end, VD_UINT4 *pquote) {
    if (*pstart >= 0) {
        if (!((*pnode)->link = makeDataNode(re + *pstart, end - *pstart, 1))) {
            return 0;
        }
        *pnode = (*pnode)->link;
        (*pnode)->type |= *pquote;
        *pquote = 0;
        *pstart = -1;
    }
    return 1;
}

// help do the bookkeeping for the return from parseRE: pass back the current pointer
// and clean up in case of error
static struct node *doReturn(VD_INT4 failed, struct node *node) {
    if (failed) {
        VDI_cleanupTree(node);
        return NULL;
    }
// I looked into simplifying some trees here, but I don't think the
// savings justify the trouble.
    return (node->type & N_STATIC) ? node->link : node;
}

// main parsing routine: this returns a node with the head of the
// parse chain for this level, or NULL if there is an error. It
// returns the current pointer location in the variable PRE
//RWY
static struct node *parseREInternal (VD_UTF8 *re, VD_INT4 len, VD_INT4 *pptr, VD_UCS4 terminate) {
    VD_INT4 p, p0, pprev;
    VD_UTF8 *pch;
    struct node *n = NULL;
    struct node top = {N_STATIC, 1, 1, &empty};
    struct node *node = &top;
    VD_INT4 start = -1;
    VD_UCS4 ch;
    VD_UINT4 min, max, command, quote = 0;

    while (1) {
        pprev = p0;
        p0 = *pptr;
        ch = VDI_nextChar(re, pptr, len);
        if (*pptr == p0) {
            break;
        }
        switch (ch) {
        case VD_RE_ESCAPE_CHAR:
            switch(ch = VDI_nextChar(re, pptr, len)) {
            case VD_RE_OR_CHAR:
                if (!terminateString(&node, re, &start, p0, &quote)
                    || !(node = makeLogicNode(N_OR))
                    || !(node->d.logic.root = makeOrChild(top.link))
                    || !(node->d.logic.root->link = makeOrChild(parseREInternal(re, len, pptr, terminate)))) {
                    return doReturn(1, &top);
                }
                top.link = node;
                return doReturn(0, &top);
            case VD_RE_OPEN_OR_CHAR:
                command = N_OR;
                p = *pptr;
                if (VD_RE_NOT_OR_CHAR == VDI_nextChar(re, &p, len)) {
                    *pptr = p;
                    command = N_NOTOR;
                }
                if (!terminateString(&node, re, &start, p0, &quote)
					// if it is NOT then change this node into an AND
					// note this has the result that each of the nodes is pushed on the stack
                    || !(node = node->link = makeLogicNode(command))
					|| !(node->d.logic.root = parseREInternal(re, len, pptr, VD_RE_CLOSE_OR_CHAR))
                    || !andStringsToOrStrings(node->d.logic.root, command == N_NOTOR)) {
                    return doReturn(1, &top);
                }
                node->type |= quote;
                quote = 0;
                break;
            case VD_RE_OPEN_AND_CHAR:
                if (!terminateString(&node, re, &start, p0, &quote)
                    || !(node->link = makeLogicNode(N_AND))) {
                    return doReturn(1, &top);
                }
                node = node->link;
                node->type |= quote;
                quote = 0;
                if (!(node->d.logic.root = parseREInternal(re, len, pptr, VD_RE_CLOSE_AND_CHAR))) {
                    return doReturn(1, &top);
                }
                break;
            case VD_RE_QUOTE_CHAR: 
                if (!terminateString(&node, re, &start, p0, &quote)) {
                    return doReturn(1, &top);
                }
                quote = N_QUOTE;
                break;
            case VD_RE_CLOSE_OR_CHAR:
            case VD_RE_CLOSE_AND_CHAR:
//            case 0:		// leftover from asciz strings
                terminateString(&node, re, &start, p0, &quote);
                return doReturn((terminate != ch), &top);
            default:	// keep track of non-special characters to match
                if (start < 0) {
                    start = p0;
                }
            }
            break;
        case VD_RE_STAR_REPEAT_CHAR:
			node = multAfterString(re, &start, p0, pprev, node);
			node = doRepeat(node, re, len, pptr, 0, -1, &quote);
            break;
        case VD_RE_PLUS_REPEAT_CHAR:
			node = multAfterString(re, &start, p0, pprev, node);
 			node = doRepeat(node, re, len, pptr, 1, -1, &quote);
           break;
        case VD_RE_QMARK_REPEAT_CHAR:
			node = multAfterString(re, &start, p0, pprev, node);
 			node = doRepeat(node, re, len, pptr, 0, 1, &quote);
            break;
        case VD_RE_OPEN_REPEAT_CHAR:
            node = multAfterString(re, &start, p0, pprev, node);
            max = -1;
			min = (unsigned short) strtol(re + *pptr, (char **) &pch, 0);
            *pptr += (pch - re - *pptr);
            ch = VDI_nextChar(re, pptr, -1);
            if (',' == ch) {
                max = (unsigned short) strtol(re + *pptr, (char **) &pch, 0);
                *pptr += (pch - re - *pptr);
                ch = VDI_nextChar(re, pptr, -1);
            }
            if ((VD_RE_CLOSE_REPEAT_CHAR != ch) || (min > max)) {
                return doReturn(1, &top);
            }
 			node = doRepeat(node, re, len, pptr, min, max, &quote);
            break;
        default:
            if (start < 0) {
                start = p0;
            }
        }
    }

    if (start >= 0) {
        node->link = makeDataNode(re + start, *pptr - start, 1);
        node->link->type |= quote;
    }
    return doReturn(terminate, &top);
}


// This creates the parse tree from the input RE. The tree always
// starts with an AND node that contains the tree contents, and this
// AND node is followed by a SUCCESS node:
//
//   "AND - SUCCESS
//    |
//   (re tree) ...
//      .
//      .
//      .
//
// The SUCCESS node assures the search terminates at the right time
// and the "AND makes sure the overall results are reported 


 struct node *VDI_parseRE(VD_UTF8 *str, VD_INT4 len, VD_UINT4 debug) {
     struct node *root = makeLogicNode(N_AND);
     VD_INT4 ptr = 0;
     root->type |= N_QUOTE;
     if (!(root->d.logic.root = parseREInternal(str, len, &ptr, 0))) {
         printf("error parsing regexp '%s' around position %d\n", str, ptr);
         return NULL;
     }
     root->link = getNewNode(N_SUCCESS);
     DODEBUG(debug, dbTree(root));
     return root;
 }

// return all memory allocated for the regexp tree
void VDI_cleanupTree(struct node *node) {
    if (node) {
        if (node->link) {
            VDI_cleanupTree(node->link);
        }
        if (node->type & N_LOGIC) {
            VDI_cleanupTree(node->d.logic.root);
        }
        if (!(node->type & N_STATIC)) {
            free(node); 
        }
    }
}

/******************************************************************
 *
 * Functions for matching characters
 *
 * Originally this was in the main file, but string matching is
 * related to parsing.  The idea is, special characters can be
 * redefined by the syntax, but need to be evaluated in the execution.
 * One way to do it would be to make special node types for special
 * characters and break up the STRING nodes into chains of regular
 * characters separated by special characters. That is ugly, and could
 * cause other problems with things like repeat counts. This is a
 * compromise solution: it is simple and flexible, but it does move
 * some of the traversal execution into the parser module.
 *
 * The RE is always updated to reflect its length. The return value is
 * the number of bytes to advance the string pointer if it succeeds,
 * or negative to indicate no match.
 *
 ******************************************************************/
// RWY TODO: this needs to use the unicode database
VD_INT4 VDI_matchChar(VD_UTF8 **re, VD_UTF8 *str, VD_INT4 strlen, VD_UINT4 *penv) {
    VD_INT4 pre = 0, pstr = 0;
    VD_INT4 cre = VDI_nextChar(*re, &pre, -1);
    VD_INT4 cstr = VDI_nextChar(str, &pstr, strlen);
    VD_INT4 ret = 0;
    if (cre == VD_RE_ESCAPE_CHAR) {
        cre = VDI_nextChar(*re, &pre, -1);
        switch (cre) {
        case 'n': ret = (('0' <= cstr) && (cstr <= '9')); break;
        case 'a': ret = (('a' <= cstr) && (cstr <= 'z')) || ((*penv & E_NOMATCHCASE) && ('A' <= cstr) && (cstr <= 'Z')); break;
        case 'A': ret = (('A' <= cstr) && (cstr <= 'Z')) || ((*penv & E_NOMATCHCASE) && ('a' <= cstr) && (cstr <= 'z')); break;
        case '-': ret = (('A' <= cstr) && (cstr <= 'Z')) || (('a' <= cstr) && (cstr <= 'z')); break;
        case 'C': *penv &= ~E_NOMATCHCASE; ret = 1; pstr = 0; break;
        case 'c': *penv |= E_NOMATCHCASE; ret = 1; pstr = 0; break;
        case '_': ret = ((cstr == ' ') || (cstr == '\t') || (cstr == '\n')); break;
        case '\\': ret = (cstr == '\\'); break;
        case '.': ret = (cstr != 0); break;
        case '~': ret = 0; break;		// processed elsewhere
        case '$': ret = (strlen == 0); break;
        default: ret = (cstr == cre);
        }
    }
    else {
        ret = (cstr == cre);
        if (!ret && (*penv & E_NOMATCHCASE)) {
            cstr -= cre;
            ret = ((cstr == 'A' - 'a') || (cstr == 'a' - 'A'));
        }
    }
    *re += pre;
    return (ret) ? pstr : -1;
}

#ifdef STANDALONE
// tests are run in the main file but defined in parser, since
// different parsers would use different test strings
//
// REGEXP, DATA, EXPECTED_RESULT
// The expected result simulates nested lists by using <> to surround 
// sublists and , to separate elements of a list. If the test is not 
// supposed to find a match use NULL for the expected result

char *tests[][3] = {
    {"a\\(bc\\)*d", "ad", "ad"},
    {"a\\(bc\\)*d", "abcd", "abcd"},
    {"a\\(bc\\)*d", "abcbcd", "abcbcd"},
    {"a\\(bc\\)*", "a", "a"},
    {"a\\(bc\\)*", "abc", "abc"},
    {"a\\(bc\\)*", "abcbc", "abcbc"}, 
    {"a\\\"\\(bc\\)*bcd", "abcbcbcd", "abcbcbcd<bc,bc>"},

    {"a\\(bc\\)+d", "ad", NULL},
    {"a\\(bc\\)+d", "abcd", "abcd"},
    {"a\\(bc\\)+d", "abcbcd", "abcbcd"},
    {"a\\(bc\\)+", "a", NULL},
    {"a\\(bc\\)+", "abc", "abc"},
    {"a\\(bc\\)+", "abcbc", "abcbc"}, 
    {"a\\\"\\(bc\\)+bcd", "abcbcbcd", "abcbcbcd<bc,bc>"},

    {"a\\(bc\\)?d", "ad", "ad"},
    {"a\\(bc\\)?d", "abcd", "abcd"},
    {"a\\(bc\\)?d", "abcbcd", NULL},
    {"a\\(bc\\)?", "a", "a"},
    {"a\\(bc\\)?", "abc", "abc"},
    {"a\\(bc\\)?", "abcbc", "abc"}, 
    {"a\\\"\\(bc\\)?bcd", "abcbcbcd", NULL},

	{"\\(bc\\)*", "ssss", ""},
    {"\\(bc\\)*", "bc", "bc"},
    {"\\(bc\\)*", "bcbc", "bcbc"},

    {"a\\(bc\\)?d", "ad", "ad"},
    {"a\\(bc\\)?d", "abcd", "abcd"},
    {"a\\(bc\\)?d", "abcbcd", NULL},

    {"a\\(bc\\){3,4}d", "abcbcd", NULL},
    {"a\\(bc\\){3,4}d", "abcbcbcd", "abcbcbcd"},
    {"a\\(bc\\){3,4}d", "abcbcbcbcd", "abcbcbcbcd"},
    {"a\\(bc\\){3,4}", "abcbcbcbcd", "abcbcbcbc"},
    {"a\\(bc\\){3,4}d", "abcbcbcbcbcd", NULL},

    {"a\\(bc\\)+d", "ad", NULL},
    {"a\\(bc\\)+d", "abcd", "abcd"},
    {"a\\(bc\\)+d", "abcbcd", "abcbcd"},

    {"a*b", "b", "b"},
    {"a+b", "b", NULL},
    {"\\\"a*", "aaaaab", "aaaaa<a,a,a,a,a>"},
    {"a*", "aaaaa", "aaaaa"},

    {"\\(ab\\)*abc", "ababababcx", "ababababc"},
    {"\\\"\\(ab\\)*abc", "ababababcx", "ababababc<ab,ab,ab>"},
    {"\\\"\\(a\\(bc\\)*\\)*z", "aaabcbcaz", "aaabcbcaz<a,a,abcbc,a>"},
    {"\\(a\\(b\\(c\\)*\\)*\\)*", "abbcccbcaa", "abbcccbcaa"},

    {"\\[ab\\]", "b", "b"},
    {"\\[ab\\]", "a", "a"},
    {"\\[ab\\]", "c", NULL},
    {"x\\[ab\\]*", "x", "x"},
    {"x\\[ab\\]+", "x", NULL},

    {"a\\\"\\[bcd\\(ef\\)\\]*de", "abefefde", "abefefde<b,ef,ef>"},
	// following is infinite loop - acceptable?
//    {"\\(a*\\)*", "aaaa", "aaaa"},         // bug: make match of empty subexpression terminate

    {"bc*\\|x+y", "bcccxxxybxydlk", "bccc"},
    {"a\\(bc*\\|x+y\\)+z", "abcccxxxybxyzzz", "abcccxxxybxyz"},
    {"\\(bc*\\|x+y\\)*", "bcccxxxybxydlk", "bcccxxxybxy"},

    {"a\\n*b", "a123b", "a123b"},
    {"a\\n*b", "a12x3b", NULL},
    {"\\\"\\[\\(ab\\)\\(cd\\)+\\]+ab", "cdcdababx", "cdcdabab<cdcd,ab>"},
    {"a\\\"\\.*z", "amnopqzzxy", "amnopqzz<m,n,o,p,q,z>"},
    {"a\\\"\\(\\.*\\)z", "amnopqzzxyzxx", "amnopqzzxyz<mnopqzzxy>"},
    {"a\\[abc\\]*", "acbABC", "acb"},
    {"a\\c\\[abc\\]*", "acbABC", "acbABC"},
    {"\\[~abc\\]*", "wxyzabc", "wxyz"},
    {"\\[~ab-de\\]*", "wxyzc", "wxyz"},
    {"\\[~ab-de\\]*", "wxyz", "wxyz"},
    {"\\\"\\[~ab-de\\]*z", "wxyz", "wxyz<w,x,y>"},
    {"\\[~\\na\\]*", "wxyzabc", "wxyz"},
    {"\\[~\\na\\]*", "wxy5zabc", "wxy"},
    {"\\[a-c\\]+", "ab-cbe", "ab"},
    {"abc", "", NULL},
    {"a*", "", ""},
    {"", "a", ""},
	{"\\.*", "abcd", "abcd"},

    {"\\[-ac\\]+", "ac-cbe", "ac-c"},
    {"a\\(\\){3,4}b", "abc", "ab"},
    {"\\~abcd", "abcde", "abcd"},
    {"abcd\\$", "abcd", "abcd"},
    {"abcd\\$", "abcde", NULL},
    {"\\[\\62-\\67\\]*x\\0104\\0x45z", "ABCxDEz", "ABCxDEz"},
    {"\\[a-ce-gik-m\\]+", "abcefgiklmd", "abcefgiklm"},

	{"a\\\"\\(bc\\)+bc", "abcbcbcbcbc", "abcbcbcbcbc<bc,bc,bc,bc>"},
	{"a\\\"\\(bc\\)+?bc", "abcbcbcbcbc", "abcbc<bc>"},
	{"\\\"a*?a*?b", "aaaaaab", "aaaaaab"},
	{"\\\"a*a*?b", "aaaaaab", "aaaaaab<a,a,a,a,a,a>"},
	{"\\\"\\[\\(ab\\)+?cd\\]*", "cdababc", "cdababc<c,d,ab,ab,c>"},
	{"\\\"\\[\\(ab\\)+cd\\]*", "cdababc", "cdababc<c,d,abab,c>"},
	{"ab\\\"\\(cd\\|ef*\\)+", "abeecdcdefffg", "abeecdcdefff<e,e,cd,cd,efff>"},
	{"\\\\|\"])[(\\}\\{\\*\\+\\?\\z", "\\|\"])[(}{*+?z", "\\|\"])[(}{*+?z"}, 
    {NULL, NULL, NULL}
};

#endif