/////////////////////////////////////////////////////
//
// DEBUGGING STUFF
//
// if DEBUG and/or STANDALONE are defined, these set up the
// program conditions
//
///////////////////////////////////////////////////////
#define DEBUG           1           // include and call debugging output functions

#define STANDALONE      1           // include the MAIN function

// used for interactive calls
#define RE_SIZE         100     // maximum size of RE
#define STRING_SIZE     150     // maximum size of string

///////////////////////////////////////////////////////////
//
// EXECUTION PARAMETERS
//
// Following are defines that adjust the execution environment
// They can be changed to make it faster or more efficient
//
////////////////////////////////////////////////////////////

// number of state frames to allocate at a time on the stack
#define ALLOCUNIT       10
// set the maximum depth of the stack.  Actually it only checks when
// new frames are allocated, so the maximum possible stack size is
// MAXSTACKDEPTH + ALLOCUNIT - 1
#define MAXSTACKDEPTH   200         // should be bigger? Should be runtime?


//////////////////////////////////////////////////////////////
//
// VD STUFF
//
//////////////////////////////////////////////////////////////
#include "unicode/vd_unicode.h"
// this seems not to be in the standalone VD files I have
typedef signed short         VD_INT2;

// vd return values
#define VD_ERROR_RESULT     -1
#define VD_INTERRUPTED_CALL 0
#define VD_COMPLETED_CALL   1

#define VD_RE_BREAK_ITERATIONS  200

#define VD_RE_RANGE_CHAR        '-'
#define VD_RE_ESCAPE_CHAR       '\\'
#define VD_RE_LOOKINGAT_CHAR    '~'
#define VD_RE_OR_CHAR           '|'
#define VD_RE_OPEN_OR_CHAR      '['
#define VD_RE_CLOSE_OR_CHAR     ']'
#define VD_RE_OPEN_AND_CHAR     '('
#define VD_RE_CLOSE_AND_CHAR    ')'
#define VD_RE_NOT_OR_CHAR       '~'
#define VD_RE_QUOTE_CHAR        '"'
#define VD_RE_OPEN_REPEAT_CHAR  '{'
#define VD_RE_CLOSE_REPEAT_CHAR '}'
#define VD_RE_STAR_REPEAT_CHAR  '*'
#define VD_RE_PLUS_REPEAT_CHAR  '+'
#define VD_RE_QMARK_REPEAT_CHAR '?'
#define VD_RE_LAZY_MODIFIER		'?'

/////////////////////////////////////////////////////////////////
//
//    PARSE TREE NODE STRUCTURE
//
// Used to build the parse tree when the RE is read. TRAVERSE steps
// along this , finishing either when the stack is empty or when it
// reaches the strategically placed SUCCESS node at the final leaf of 
// the parse tree
//
//////////////////////////////////////////////////////////////////

// Node types (unsigned short node.type)
#define N_SUCCESS       0x0
#define N_AND           0x1
#define N_OR            0x2
#define N_LOGIC         (N_OR | N_AND)    // For convenience
#define N_NOTOR			0x4
#define N_RANGE         0x8
#define N_STRING        0x10
#define N_MEMBER        0x20
#define TYPEMASK        0x3f        // used to assign numbers to the nodes for debugging    

// node type modifiers
#define N_LAZY          0x40		// use lazy evaluation (default is greedy)
#define N_QUOTE         0x80        // Mark a node for inclusion in the result
#define N_ORCHILD       0x100        // parent is an OR
#define N_NOT           0x200       // parent is an OR NOT

// mark a node as not to be returned in the result
// This is used for various system nodes like EMPTY or ROOT
#define N_STATIC        0x400

struct node {
    VD_UINT4 type;        // node type: N_*
    VD_UINT4 min, max;    // number of times node can be walked
    struct node *link;          // link to trailing unit
    union {
        struct string {         // string to be matched
            VD_UTF8 *start;
            VD_INT4 len;
        } string;
        struct logic {          // AND or OR nodes
            struct node *root;
        } logic;
        struct range {          // string to be a member of
            VD_UCS4 low;
            VD_UCS4 high;
        } range;
    } d;
#ifdef DEBUG
    VD_INT4 id;                     // used to identify the node in debugging output
#endif
};

/////////////////////////////////////////////////////////
//
// STATE STRUCTURE
//
// The STATE structure is kept on a stack during traversal and 
// represents the current state as the program walks over the 
// parse tree.
//
/////////////////////////////////////////////////////////

// values for the state.nextAction field

// This is neat - originally the code had to check in several places
// for lazy or not. Now, when properly initalized, both can be checked by 
// the common bit mask, and advanced by shifting right 1 bit.
#define A_REPEAT_LAZY		0x1
#define A_CHAIN_LAZY		0x2
#define A_CHAIN_GREEDY		0x10
#define A_REPEAT_GREEDY		0x20
// these definitions check for the desired state, for both LAZY and GREEDY
#define A_REPEAT			(A_REPEAT_LAZY | A_REPEAT_GREEDY)
#define A_CHAIN				(A_CHAIN_LAZY | A_CHAIN_GREEDY)
#define A_NOTFAIL			(A_CHAIN | A_REPEAT)

struct state {
    struct node *node;
    // when the frame is created PTR holds the starting point of the
    // match.  If the match succeeds it is replaced with the end point
    VD_UTF8 *ptr;
    VD_INT4 parent;        // parent frame
    VD_UINT4 count;        // number of SUCCESSFUL reps (so it starts with 0)
    VD_UINT4 nextAction;   // see comment for A_* 
};


/////////////////////////////////////////////////////////////
//
// STATE STACK STRUCTURE
// 
// The stack that holds the states representing the current RE
// match position. It is used to backtrack in case of failure,
// and to reconstruct the solution after successful completion
///////////////////////////////////////////////////////////////

// Environment codes, for debugging and state info, used in stack->env
#define E_STACK           0x1       // print out the stack on each rep (debug)
#define E_FINALSTACK      0x2       // print out the stack on finish (debug)
#define E_TREE            0x4       // print out the parse tree (debug)
#define E_SHOWDEBUG       0x8       // print out various debugging info (debug)
#define E_RESULTS        0x10       // print out the RESULTS structure (debug)
#define E_RUNTESTS       0x20       // run the tests
#define E_LOOKINGAT      0x40       // only match string starting in first position
#define E_INTERACTIVE    0x80       // accept interactive RE and string
#define E_STATS         0x100       // print out stats after completion (debug)
#define E_NOMATCHCASE   0x200       // do not require case match

struct stateStack {
    struct state *states;       // states stack
    VD_UTF8 *strEnd;   // pointer to the end of the string
#ifdef DEBUG        // debugging information
    VD_INT4 maxSize;    // maximum size of stack
    VD_INT4 pushed;     // total number of frames pushed on the stack
    VD_INT4 failed;     // number of frames that failed compare
#endif
    VD_INT4 iterations; // number of iterations (use to break for VD)
    VD_INT4 ptr;                  // pointer to next location (1 over current top)
    VD_INT4 allocated;            // number of stack frames allocated 
    VD_UINT4 env;         // bits for debugging
};

/////////////////////////////////////////////////////////////////
//
// RESULT STRUCTURE
//
// Used to represent the output of a successful search. This
// forms a nested list where the top level is the entire matched
// search string, and lower layers represent quoted expressions
//
/////////////////////////////////////////////////////////////////

struct VD_RE_Result {
    struct VD_RE_Result *chain;       // nested results list
    struct VD_RE_Result *contains;    // next member of this result list
    VD_UTF8 *start;                // start of this matched string
    VD_INT4 len;                    // length of this matched string
};

// predeclarations see documentation in re1.c 
// (probably there should be 2 include files, a public one and
// a private one)

// internal use
//gets the requested state from off the stack. If WHICH < 0 counts
//down from the top (-1 is the top frame)
struct state *VDI_getState(struct stateStack *stack, VD_INT4 which);

// gets the number of the top frame on the stack
VD_INT4 VDI_topFrame(struct stateStack *stack);

// Gets the next character. It uses VD_UNICODE_UTF8GETINCR, but also
// handles literal numbers (that is, \0xabc)
VD_UCS4 VDI_nextChar(const VD_UTF8 *, VD_INT4 *, VD_INT4);

// see documentation in re-parse.c
// builds the parse tree for the regexp
struct node *VDI_parseRE(VD_UTF8 *, VD_INT4, VD_UINT4);

// returns resources used by the tree
void VDI_cleanupTree(struct node *node);

// called to check for character matches - since this can be
// syntax dependent it needs to be in the parse file
VD_INT4 VDI_matchChar(VD_UTF8 **, VD_UTF8 *, VD_INT4, VD_UINT4 *);

//int strlen(char *);

// in re-debug.c
// If debugging is enabled predeclare the functions, otherwise
// macro them out
#ifdef DEBUG
    #define DODEBUG(xx, yy) {if (xx) {yy;}}
    void dbSummary(struct stateStack *stack);
    void dbStack(struct stateStack *stack);
    void dbTree(struct node *root);
    void dbState(struct stateStack *stack, VD_INT4 depth);
    void dbNode(struct node *node, VD_INT4 recurse);
    void nextID(VD_INT4 *, VD_INT4);
    void dbResults(struct VD_RE_Result *, VD_INT4);
#else
    #define DODEBUG(xx, yy) 
    #define dbStack(xxx)
    #define dbSummary(xxx)
    #define dbTree(xxx)
    #define dbState(xxx, yyy)
    #define dbNode(xxx, yyy)
    #define nextID(xxx, yyy)
    #define dbResults(xxx, yyy)
#endif

