|
|
@@ -0,0 +1,1012 @@
|
|
|
+/*
|
|
|
+ * SPDX-FileCopyrightText: 1998-2001,2003-2011,2013 Stewart Heitmann
|
|
|
+ *
|
|
|
+ * SPDX-License-Identifier: BSD-3-Clause
|
|
|
+ */
|
|
|
+/*******************************************************************************
|
|
|
+ * arg_rex: Implements the regex command-line option
|
|
|
+ *
|
|
|
+ * This file is part of the argtable3 library.
|
|
|
+ *
|
|
|
+ * Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann
|
|
|
+ * <sheitmann@users.sourceforge.net>
|
|
|
+ * All rights reserved.
|
|
|
+ *
|
|
|
+ * Redistribution and use in source and binary forms, with or without
|
|
|
+ * modification, are permitted provided that the following conditions are met:
|
|
|
+ * * Redistributions of source code must retain the above copyright
|
|
|
+ * notice, this list of conditions and the following disclaimer.
|
|
|
+ * * Redistributions in binary form must reproduce the above copyright
|
|
|
+ * notice, this list of conditions and the following disclaimer in the
|
|
|
+ * documentation and/or other materials provided with the distribution.
|
|
|
+ * * Neither the name of STEWART HEITMANN nor the names of its contributors
|
|
|
+ * may be used to endorse or promote products derived from this software
|
|
|
+ * without specific prior written permission.
|
|
|
+ *
|
|
|
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
+ * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT,
|
|
|
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
|
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
|
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
|
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
|
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
+ ******************************************************************************/
|
|
|
+
|
|
|
+#include "argtable3.h"
|
|
|
+
|
|
|
+#ifndef ARG_AMALGAMATION
|
|
|
+#include "argtable3_private.h"
|
|
|
+#endif
|
|
|
+
|
|
|
+#include <stdlib.h>
|
|
|
+#include <string.h>
|
|
|
+
|
|
|
+#ifndef _TREX_H_
|
|
|
+#define _TREX_H_
|
|
|
+
|
|
|
+/*
|
|
|
+ * This module uses the T-Rex regular expression library to implement the regex
|
|
|
+ * logic. Here is the copyright notice of the library:
|
|
|
+ *
|
|
|
+ * Copyright (C) 2003-2006 Alberto Demichelis
|
|
|
+ *
|
|
|
+ * This software is provided 'as-is', without any express
|
|
|
+ * or implied warranty. In no event will the authors be held
|
|
|
+ * liable for any damages arising from the use of this software.
|
|
|
+ *
|
|
|
+ * Permission is granted to anyone to use this software for
|
|
|
+ * any purpose, including commercial applications, and to alter
|
|
|
+ * it and redistribute it freely, subject to the following restrictions:
|
|
|
+ *
|
|
|
+ * 1. The origin of this software must not be misrepresented;
|
|
|
+ * you must not claim that you wrote the original software.
|
|
|
+ * If you use this software in a product, an acknowledgment
|
|
|
+ * in the product documentation would be appreciated but
|
|
|
+ * is not required.
|
|
|
+ *
|
|
|
+ * 2. Altered source versions must be plainly marked as such,
|
|
|
+ * and must not be misrepresented as being the original software.
|
|
|
+ *
|
|
|
+ * 3. This notice may not be removed or altered from any
|
|
|
+ * source distribution.
|
|
|
+ */
|
|
|
+
|
|
|
+#ifdef __cplusplus
|
|
|
+extern "C" {
|
|
|
+#endif
|
|
|
+
|
|
|
+#define TRexChar char
|
|
|
+#define MAX_CHAR 0xFF
|
|
|
+#define _TREXC(c) (c)
|
|
|
+#define trex_strlen strlen
|
|
|
+#define trex_printf printf
|
|
|
+
|
|
|
+#ifndef TREX_API
|
|
|
+#define TREX_API extern
|
|
|
+#endif
|
|
|
+
|
|
|
+#define TRex_True 1
|
|
|
+#define TRex_False 0
|
|
|
+
|
|
|
+#define TREX_ICASE ARG_REX_ICASE
|
|
|
+
|
|
|
+typedef unsigned int TRexBool;
|
|
|
+typedef struct TRex TRex;
|
|
|
+
|
|
|
+typedef struct {
|
|
|
+ const TRexChar* begin;
|
|
|
+ int len;
|
|
|
+} TRexMatch;
|
|
|
+
|
|
|
+#ifdef __GNUC__
|
|
|
+TREX_API TRex* trex_compile(const TRexChar* pattern, const TRexChar** error, int flags) __attribute__((optimize(0)));
|
|
|
+#else
|
|
|
+TREX_API TRex* trex_compile(const TRexChar* pattern, const TRexChar** error, int flags);
|
|
|
+#endif
|
|
|
+TREX_API void trex_free(TRex* exp);
|
|
|
+TREX_API TRexBool trex_match(TRex* exp, const TRexChar* text);
|
|
|
+TREX_API TRexBool trex_search(TRex* exp, const TRexChar* text, const TRexChar** out_begin, const TRexChar** out_end);
|
|
|
+TREX_API TRexBool
|
|
|
+trex_searchrange(TRex* exp, const TRexChar* text_begin, const TRexChar* text_end, const TRexChar** out_begin, const TRexChar** out_end);
|
|
|
+TREX_API int trex_getsubexpcount(TRex* exp);
|
|
|
+TREX_API TRexBool trex_getsubexp(TRex* exp, int n, TRexMatch* subexp);
|
|
|
+
|
|
|
+#ifdef __cplusplus
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
+#endif
|
|
|
+
|
|
|
+struct privhdr {
|
|
|
+ const char* pattern;
|
|
|
+ int flags;
|
|
|
+};
|
|
|
+
|
|
|
+static void arg_rex_resetfn(struct arg_rex* parent) {
|
|
|
+ ARG_TRACE(("%s:resetfn(%p)\n", __FILE__, parent));
|
|
|
+ parent->count = 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int arg_rex_scanfn(struct arg_rex* parent, const char* argval) {
|
|
|
+ int errorcode = 0;
|
|
|
+ const TRexChar* error = NULL;
|
|
|
+ TRex* rex = NULL;
|
|
|
+ TRexBool is_match = TRex_False;
|
|
|
+
|
|
|
+ if (parent->count == parent->hdr.maxcount) {
|
|
|
+ /* maximum number of arguments exceeded */
|
|
|
+ errorcode = ARG_ERR_MAXCOUNT;
|
|
|
+ } else if (!argval) {
|
|
|
+ /* a valid argument with no argument value was given. */
|
|
|
+ /* This happens when an optional argument value was invoked. */
|
|
|
+ /* leave parent argument value unaltered but still count the argument. */
|
|
|
+ parent->count++;
|
|
|
+ } else {
|
|
|
+ struct privhdr* priv = (struct privhdr*)parent->hdr.priv;
|
|
|
+
|
|
|
+ /* test the current argument value for a match with the regular expression */
|
|
|
+ /* if a match is detected, record the argument value in the arg_rex struct */
|
|
|
+
|
|
|
+ rex = trex_compile(priv->pattern, &error, priv->flags);
|
|
|
+ is_match = trex_match(rex, argval);
|
|
|
+ if (!is_match)
|
|
|
+ errorcode = ARG_ERR_REGNOMATCH;
|
|
|
+ else
|
|
|
+ parent->sval[parent->count++] = argval;
|
|
|
+
|
|
|
+ trex_free(rex);
|
|
|
+ }
|
|
|
+
|
|
|
+ ARG_TRACE(("%s:scanfn(%p) returns %d\n", __FILE__, parent, errorcode));
|
|
|
+ return errorcode;
|
|
|
+}
|
|
|
+
|
|
|
+static int arg_rex_checkfn(struct arg_rex* parent) {
|
|
|
+ int errorcode = (parent->count < parent->hdr.mincount) ? ARG_ERR_MINCOUNT : 0;
|
|
|
+#if 0
|
|
|
+ struct privhdr *priv = (struct privhdr*)parent->hdr.priv;
|
|
|
+
|
|
|
+ /* free the regex "program" we constructed in resetfn */
|
|
|
+ regfree(&(priv->regex));
|
|
|
+
|
|
|
+ /*printf("%s:checkfn(%p) returns %d\n",__FILE__,parent,errorcode);*/
|
|
|
+#endif
|
|
|
+ return errorcode;
|
|
|
+}
|
|
|
+
|
|
|
+static void arg_rex_errorfn(struct arg_rex* parent, arg_dstr_t ds, int errorcode, const char* argval, const char* progname) {
|
|
|
+ const char* shortopts = parent->hdr.shortopts;
|
|
|
+ const char* longopts = parent->hdr.longopts;
|
|
|
+ const char* datatype = parent->hdr.datatype;
|
|
|
+
|
|
|
+ /* make argval NULL safe */
|
|
|
+ argval = argval ? argval : "";
|
|
|
+
|
|
|
+ arg_dstr_catf(ds, "%s: ", progname);
|
|
|
+ switch (errorcode) {
|
|
|
+ case ARG_ERR_MINCOUNT:
|
|
|
+ arg_dstr_cat(ds, "missing option ");
|
|
|
+ arg_print_option_ds(ds, shortopts, longopts, datatype, "\n");
|
|
|
+ break;
|
|
|
+
|
|
|
+ case ARG_ERR_MAXCOUNT:
|
|
|
+ arg_dstr_cat(ds, "excess option ");
|
|
|
+ arg_print_option_ds(ds, shortopts, longopts, argval, "\n");
|
|
|
+ break;
|
|
|
+
|
|
|
+ case ARG_ERR_REGNOMATCH:
|
|
|
+ arg_dstr_cat(ds, "illegal value ");
|
|
|
+ arg_print_option_ds(ds, shortopts, longopts, argval, "\n");
|
|
|
+ break;
|
|
|
+
|
|
|
+ default: {
|
|
|
+ #if 0
|
|
|
+ char errbuff[256];
|
|
|
+ regerror(errorcode, NULL, errbuff, sizeof(errbuff));
|
|
|
+ printf("%s\n", errbuff);
|
|
|
+ #endif
|
|
|
+ } break;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+struct arg_rex* arg_rex0(const char* shortopts, const char* longopts, const char* pattern, const char* datatype, int flags, const char* glossary) {
|
|
|
+ return arg_rexn(shortopts, longopts, pattern, datatype, 0, 1, flags, glossary);
|
|
|
+}
|
|
|
+
|
|
|
+struct arg_rex* arg_rex1(const char* shortopts, const char* longopts, const char* pattern, const char* datatype, int flags, const char* glossary) {
|
|
|
+ return arg_rexn(shortopts, longopts, pattern, datatype, 1, 1, flags, glossary);
|
|
|
+}
|
|
|
+
|
|
|
+struct arg_rex* arg_rexn(const char* shortopts,
|
|
|
+ const char* longopts,
|
|
|
+ const char* pattern,
|
|
|
+ const char* datatype,
|
|
|
+ int mincount,
|
|
|
+ int maxcount,
|
|
|
+ int flags,
|
|
|
+ const char* glossary) {
|
|
|
+ size_t nbytes;
|
|
|
+ struct arg_rex* result;
|
|
|
+ struct privhdr* priv;
|
|
|
+ int i;
|
|
|
+ const TRexChar* error = NULL;
|
|
|
+ TRex* rex = NULL;
|
|
|
+
|
|
|
+ if (!pattern) {
|
|
|
+ printf("argtable: ERROR - illegal regular expression pattern \"(NULL)\"\n");
|
|
|
+ printf("argtable: Bad argument table.\n");
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* foolproof things by ensuring maxcount is not less than mincount */
|
|
|
+ maxcount = (maxcount < mincount) ? mincount : maxcount;
|
|
|
+
|
|
|
+ nbytes = sizeof(struct arg_rex) /* storage for struct arg_rex */
|
|
|
+ + sizeof(struct privhdr) /* storage for private arg_rex data */
|
|
|
+ + (size_t)maxcount * sizeof(char*); /* storage for sval[maxcount] array */
|
|
|
+
|
|
|
+ /* init the arg_hdr struct */
|
|
|
+ result = (struct arg_rex*)xmalloc(nbytes);
|
|
|
+ result->hdr.flag = ARG_HASVALUE;
|
|
|
+ result->hdr.shortopts = shortopts;
|
|
|
+ result->hdr.longopts = longopts;
|
|
|
+ result->hdr.datatype = datatype ? datatype : pattern;
|
|
|
+ result->hdr.glossary = glossary;
|
|
|
+ result->hdr.mincount = mincount;
|
|
|
+ result->hdr.maxcount = maxcount;
|
|
|
+ result->hdr.parent = result;
|
|
|
+ result->hdr.resetfn = (arg_resetfn*)arg_rex_resetfn;
|
|
|
+ result->hdr.scanfn = (arg_scanfn*)arg_rex_scanfn;
|
|
|
+ result->hdr.checkfn = (arg_checkfn*)arg_rex_checkfn;
|
|
|
+ result->hdr.errorfn = (arg_errorfn*)arg_rex_errorfn;
|
|
|
+
|
|
|
+ /* store the arg_rex_priv struct immediately after the arg_rex struct */
|
|
|
+ result->hdr.priv = result + 1;
|
|
|
+ priv = (struct privhdr*)(result->hdr.priv);
|
|
|
+ priv->pattern = pattern;
|
|
|
+ priv->flags = flags;
|
|
|
+
|
|
|
+ /* store the sval[maxcount] array immediately after the arg_rex_priv struct */
|
|
|
+ result->sval = (const char**)(priv + 1);
|
|
|
+ result->count = 0;
|
|
|
+
|
|
|
+ /* foolproof the string pointers by initializing them to reference empty strings */
|
|
|
+ for (i = 0; i < maxcount; i++)
|
|
|
+ result->sval[i] = "";
|
|
|
+
|
|
|
+ /* here we construct and destroy a regex representation of the regular
|
|
|
+ * expression for no other reason than to force any regex errors to be
|
|
|
+ * trapped now rather than later. If we don't, then errors may go undetected
|
|
|
+ * until an argument is actually parsed.
|
|
|
+ */
|
|
|
+
|
|
|
+ rex = trex_compile(priv->pattern, &error, priv->flags);
|
|
|
+ if (rex == NULL) {
|
|
|
+ ARG_LOG(("argtable: %s \"%s\"\n", error ? error : _TREXC("undefined"), priv->pattern));
|
|
|
+ ARG_LOG(("argtable: Bad argument table.\n"));
|
|
|
+ }
|
|
|
+
|
|
|
+ trex_free(rex);
|
|
|
+
|
|
|
+ ARG_TRACE(("arg_rexn() returns %p\n", result));
|
|
|
+ return result;
|
|
|
+}
|
|
|
+
|
|
|
+/* see copyright notice in trex.h */
|
|
|
+#include <ctype.h>
|
|
|
+#include <setjmp.h>
|
|
|
+#include <stdlib.h>
|
|
|
+#include <string.h>
|
|
|
+
|
|
|
+#ifdef _UINCODE
|
|
|
+#define scisprint iswprint
|
|
|
+#define scstrlen wcslen
|
|
|
+#define scprintf wprintf
|
|
|
+#define _SC(x) L(x)
|
|
|
+#else
|
|
|
+#define scisprint isprint
|
|
|
+#define scstrlen strlen
|
|
|
+#define scprintf printf
|
|
|
+#define _SC(x) (x)
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef ARG_REX_DEBUG
|
|
|
+#include <stdio.h>
|
|
|
+
|
|
|
+static const TRexChar* g_nnames[] = {_SC("NONE"), _SC("OP_GREEDY"), _SC("OP_OR"), _SC("OP_EXPR"), _SC("OP_NOCAPEXPR"),
|
|
|
+ _SC("OP_DOT"), _SC("OP_CLASS"), _SC("OP_CCLASS"), _SC("OP_NCLASS"), _SC("OP_RANGE"),
|
|
|
+ _SC("OP_CHAR"), _SC("OP_EOL"), _SC("OP_BOL"), _SC("OP_WB")};
|
|
|
+
|
|
|
+#endif
|
|
|
+#define OP_GREEDY (MAX_CHAR + 1) /* * + ? {n} */
|
|
|
+#define OP_OR (MAX_CHAR + 2)
|
|
|
+#define OP_EXPR (MAX_CHAR + 3) /* parentesis () */
|
|
|
+#define OP_NOCAPEXPR (MAX_CHAR + 4) /* parentesis (?:) */
|
|
|
+#define OP_DOT (MAX_CHAR + 5)
|
|
|
+#define OP_CLASS (MAX_CHAR + 6)
|
|
|
+#define OP_CCLASS (MAX_CHAR + 7)
|
|
|
+#define OP_NCLASS (MAX_CHAR + 8) /* negates class the [^ */
|
|
|
+#define OP_RANGE (MAX_CHAR + 9)
|
|
|
+#define OP_CHAR (MAX_CHAR + 10)
|
|
|
+#define OP_EOL (MAX_CHAR + 11)
|
|
|
+#define OP_BOL (MAX_CHAR + 12)
|
|
|
+#define OP_WB (MAX_CHAR + 13)
|
|
|
+
|
|
|
+#define TREX_SYMBOL_ANY_CHAR ('.')
|
|
|
+#define TREX_SYMBOL_GREEDY_ONE_OR_MORE ('+')
|
|
|
+#define TREX_SYMBOL_GREEDY_ZERO_OR_MORE ('*')
|
|
|
+#define TREX_SYMBOL_GREEDY_ZERO_OR_ONE ('?')
|
|
|
+#define TREX_SYMBOL_BRANCH ('|')
|
|
|
+#define TREX_SYMBOL_END_OF_STRING ('$')
|
|
|
+#define TREX_SYMBOL_BEGINNING_OF_STRING ('^')
|
|
|
+#define TREX_SYMBOL_ESCAPE_CHAR ('\\')
|
|
|
+
|
|
|
+typedef int TRexNodeType;
|
|
|
+
|
|
|
+typedef struct tagTRexNode {
|
|
|
+ TRexNodeType type;
|
|
|
+ int left;
|
|
|
+ int right;
|
|
|
+ int next;
|
|
|
+} TRexNode;
|
|
|
+
|
|
|
+struct TRex {
|
|
|
+ const TRexChar* _eol;
|
|
|
+ const TRexChar* _bol;
|
|
|
+ const TRexChar* _p;
|
|
|
+ int _first;
|
|
|
+ int _op;
|
|
|
+ TRexNode* _nodes;
|
|
|
+ int _nallocated;
|
|
|
+ int _nsize;
|
|
|
+ int _nsubexpr;
|
|
|
+ TRexMatch* _matches;
|
|
|
+ int _currsubexp;
|
|
|
+ void* _jmpbuf;
|
|
|
+ const TRexChar** _error;
|
|
|
+ int _flags;
|
|
|
+};
|
|
|
+
|
|
|
+static int trex_list(TRex* exp);
|
|
|
+
|
|
|
+static int trex_newnode(TRex* exp, TRexNodeType type) {
|
|
|
+ TRexNode n;
|
|
|
+ int newid;
|
|
|
+ n.type = type;
|
|
|
+ n.next = n.right = n.left = -1;
|
|
|
+ if (type == OP_EXPR)
|
|
|
+ n.right = exp->_nsubexpr++;
|
|
|
+ if (exp->_nallocated < (exp->_nsize + 1)) {
|
|
|
+ exp->_nallocated *= 2;
|
|
|
+ exp->_nodes = (TRexNode*)xrealloc(exp->_nodes, (size_t)exp->_nallocated * sizeof(TRexNode));
|
|
|
+ }
|
|
|
+ exp->_nodes[exp->_nsize++] = n;
|
|
|
+ newid = exp->_nsize - 1;
|
|
|
+ return (int)newid;
|
|
|
+}
|
|
|
+
|
|
|
+static void trex_error(TRex* exp, const TRexChar* error) {
|
|
|
+ if (exp->_error)
|
|
|
+ *exp->_error = error;
|
|
|
+ longjmp(*((jmp_buf*)exp->_jmpbuf), -1);
|
|
|
+}
|
|
|
+
|
|
|
+static void trex_expect(TRex* exp, int n) {
|
|
|
+ if ((*exp->_p) != n)
|
|
|
+ trex_error(exp, _SC("expected paren"));
|
|
|
+ exp->_p++;
|
|
|
+}
|
|
|
+
|
|
|
+static TRexChar trex_escapechar(TRex* exp) {
|
|
|
+ if (*exp->_p == TREX_SYMBOL_ESCAPE_CHAR) {
|
|
|
+ exp->_p++;
|
|
|
+ switch (*exp->_p) {
|
|
|
+ case 'v':
|
|
|
+ exp->_p++;
|
|
|
+ return '\v';
|
|
|
+ case 'n':
|
|
|
+ exp->_p++;
|
|
|
+ return '\n';
|
|
|
+ case 't':
|
|
|
+ exp->_p++;
|
|
|
+ return '\t';
|
|
|
+ case 'r':
|
|
|
+ exp->_p++;
|
|
|
+ return '\r';
|
|
|
+ case 'f':
|
|
|
+ exp->_p++;
|
|
|
+ return '\f';
|
|
|
+ default:
|
|
|
+ return (*exp->_p++);
|
|
|
+ }
|
|
|
+ } else if (!scisprint((int)(*exp->_p)))
|
|
|
+ trex_error(exp, _SC("letter expected"));
|
|
|
+ return (*exp->_p++);
|
|
|
+}
|
|
|
+
|
|
|
+static int trex_charclass(TRex* exp, int classid) {
|
|
|
+ int n = trex_newnode(exp, OP_CCLASS);
|
|
|
+ exp->_nodes[n].left = classid;
|
|
|
+ return n;
|
|
|
+}
|
|
|
+
|
|
|
+static int trex_charnode(TRex* exp, TRexBool isclass) {
|
|
|
+ TRexChar t;
|
|
|
+ if (*exp->_p == TREX_SYMBOL_ESCAPE_CHAR) {
|
|
|
+ exp->_p++;
|
|
|
+ switch (*exp->_p) {
|
|
|
+ case 'n':
|
|
|
+ exp->_p++;
|
|
|
+ return trex_newnode(exp, '\n');
|
|
|
+ case 't':
|
|
|
+ exp->_p++;
|
|
|
+ return trex_newnode(exp, '\t');
|
|
|
+ case 'r':
|
|
|
+ exp->_p++;
|
|
|
+ return trex_newnode(exp, '\r');
|
|
|
+ case 'f':
|
|
|
+ exp->_p++;
|
|
|
+ return trex_newnode(exp, '\f');
|
|
|
+ case 'v':
|
|
|
+ exp->_p++;
|
|
|
+ return trex_newnode(exp, '\v');
|
|
|
+ case 'a':
|
|
|
+ case 'A':
|
|
|
+ case 'w':
|
|
|
+ case 'W':
|
|
|
+ case 's':
|
|
|
+ case 'S':
|
|
|
+ case 'd':
|
|
|
+ case 'D':
|
|
|
+ case 'x':
|
|
|
+ case 'X':
|
|
|
+ case 'c':
|
|
|
+ case 'C':
|
|
|
+ case 'p':
|
|
|
+ case 'P':
|
|
|
+ case 'l':
|
|
|
+ case 'u': {
|
|
|
+ t = *exp->_p;
|
|
|
+ exp->_p++;
|
|
|
+ return trex_charclass(exp, t);
|
|
|
+ }
|
|
|
+ case 'b':
|
|
|
+ case 'B':
|
|
|
+ if (!isclass) {
|
|
|
+ int node = trex_newnode(exp, OP_WB);
|
|
|
+ exp->_nodes[node].left = *exp->_p;
|
|
|
+ exp->_p++;
|
|
|
+ return node;
|
|
|
+ }
|
|
|
+ /* fall through */
|
|
|
+ default:
|
|
|
+ t = *exp->_p;
|
|
|
+ exp->_p++;
|
|
|
+ return trex_newnode(exp, t);
|
|
|
+ }
|
|
|
+ } else if (!scisprint((int)(*exp->_p))) {
|
|
|
+ trex_error(exp, _SC("letter expected"));
|
|
|
+ }
|
|
|
+ t = *exp->_p;
|
|
|
+ exp->_p++;
|
|
|
+ return trex_newnode(exp, t);
|
|
|
+}
|
|
|
+static int trex_class(TRex* exp) {
|
|
|
+ int ret = -1;
|
|
|
+ int first = -1, chain;
|
|
|
+ if (*exp->_p == TREX_SYMBOL_BEGINNING_OF_STRING) {
|
|
|
+ ret = trex_newnode(exp, OP_NCLASS);
|
|
|
+ exp->_p++;
|
|
|
+ } else
|
|
|
+ ret = trex_newnode(exp, OP_CLASS);
|
|
|
+
|
|
|
+ if (*exp->_p == ']')
|
|
|
+ trex_error(exp, _SC("empty class"));
|
|
|
+ chain = ret;
|
|
|
+ while (*exp->_p != ']' && exp->_p != exp->_eol) {
|
|
|
+ if (*exp->_p == '-' && first != -1) {
|
|
|
+ int r, t;
|
|
|
+ if (*exp->_p++ == ']')
|
|
|
+ trex_error(exp, _SC("unfinished range"));
|
|
|
+ r = trex_newnode(exp, OP_RANGE);
|
|
|
+ if (first > *exp->_p)
|
|
|
+ trex_error(exp, _SC("invalid range"));
|
|
|
+ if (exp->_nodes[first].type == OP_CCLASS)
|
|
|
+ trex_error(exp, _SC("cannot use character classes in ranges"));
|
|
|
+ exp->_nodes[r].left = exp->_nodes[first].type;
|
|
|
+ t = trex_escapechar(exp);
|
|
|
+ exp->_nodes[r].right = t;
|
|
|
+ exp->_nodes[chain].next = r;
|
|
|
+ chain = r;
|
|
|
+ first = -1;
|
|
|
+ } else {
|
|
|
+ if (first != -1) {
|
|
|
+ int c = first;
|
|
|
+ exp->_nodes[chain].next = c;
|
|
|
+ chain = c;
|
|
|
+ first = trex_charnode(exp, TRex_True);
|
|
|
+ } else {
|
|
|
+ first = trex_charnode(exp, TRex_True);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (first != -1) {
|
|
|
+ int c = first;
|
|
|
+ exp->_nodes[chain].next = c;
|
|
|
+ chain = c;
|
|
|
+ first = -1;
|
|
|
+ }
|
|
|
+ /* hack? */
|
|
|
+ exp->_nodes[ret].left = exp->_nodes[ret].next;
|
|
|
+ exp->_nodes[ret].next = -1;
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static int trex_parsenumber(TRex* exp) {
|
|
|
+ int ret = *exp->_p - '0';
|
|
|
+ int positions = 10;
|
|
|
+ exp->_p++;
|
|
|
+ while (isdigit((int)(*exp->_p))) {
|
|
|
+ ret = ret * 10 + (*exp->_p++ - '0');
|
|
|
+ if (positions == 1000000000)
|
|
|
+ trex_error(exp, _SC("overflow in numeric constant"));
|
|
|
+ positions *= 10;
|
|
|
+ };
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static int trex_element(TRex* exp) {
|
|
|
+ int ret = -1;
|
|
|
+ switch (*exp->_p) {
|
|
|
+ case '(': {
|
|
|
+ int expr, newn;
|
|
|
+ exp->_p++;
|
|
|
+
|
|
|
+ if (*exp->_p == '?') {
|
|
|
+ exp->_p++;
|
|
|
+ trex_expect(exp, ':');
|
|
|
+ expr = trex_newnode(exp, OP_NOCAPEXPR);
|
|
|
+ } else
|
|
|
+ expr = trex_newnode(exp, OP_EXPR);
|
|
|
+ newn = trex_list(exp);
|
|
|
+ exp->_nodes[expr].left = newn;
|
|
|
+ ret = expr;
|
|
|
+ trex_expect(exp, ')');
|
|
|
+ } break;
|
|
|
+ case '[':
|
|
|
+ exp->_p++;
|
|
|
+ ret = trex_class(exp);
|
|
|
+ trex_expect(exp, ']');
|
|
|
+ break;
|
|
|
+ case TREX_SYMBOL_END_OF_STRING:
|
|
|
+ exp->_p++;
|
|
|
+ ret = trex_newnode(exp, OP_EOL);
|
|
|
+ break;
|
|
|
+ case TREX_SYMBOL_ANY_CHAR:
|
|
|
+ exp->_p++;
|
|
|
+ ret = trex_newnode(exp, OP_DOT);
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ ret = trex_charnode(exp, TRex_False);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ {
|
|
|
+ TRexBool isgreedy = TRex_False;
|
|
|
+ unsigned short p0 = 0, p1 = 0;
|
|
|
+ switch (*exp->_p) {
|
|
|
+ case TREX_SYMBOL_GREEDY_ZERO_OR_MORE:
|
|
|
+ p0 = 0;
|
|
|
+ p1 = 0xFFFF;
|
|
|
+ exp->_p++;
|
|
|
+ isgreedy = TRex_True;
|
|
|
+ break;
|
|
|
+ case TREX_SYMBOL_GREEDY_ONE_OR_MORE:
|
|
|
+ p0 = 1;
|
|
|
+ p1 = 0xFFFF;
|
|
|
+ exp->_p++;
|
|
|
+ isgreedy = TRex_True;
|
|
|
+ break;
|
|
|
+ case TREX_SYMBOL_GREEDY_ZERO_OR_ONE:
|
|
|
+ p0 = 0;
|
|
|
+ p1 = 1;
|
|
|
+ exp->_p++;
|
|
|
+ isgreedy = TRex_True;
|
|
|
+ break;
|
|
|
+ case '{':
|
|
|
+ exp->_p++;
|
|
|
+ if (!isdigit((int)(*exp->_p)))
|
|
|
+ trex_error(exp, _SC("number expected"));
|
|
|
+ p0 = (unsigned short)trex_parsenumber(exp);
|
|
|
+ /*******************************/
|
|
|
+ switch (*exp->_p) {
|
|
|
+ case '}':
|
|
|
+ p1 = p0;
|
|
|
+ exp->_p++;
|
|
|
+ break;
|
|
|
+ case ',':
|
|
|
+ exp->_p++;
|
|
|
+ p1 = 0xFFFF;
|
|
|
+ if (isdigit((int)(*exp->_p))) {
|
|
|
+ p1 = (unsigned short)trex_parsenumber(exp);
|
|
|
+ }
|
|
|
+ trex_expect(exp, '}');
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ trex_error(exp, _SC(", or } expected"));
|
|
|
+ }
|
|
|
+ /*******************************/
|
|
|
+ isgreedy = TRex_True;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ if (isgreedy) {
|
|
|
+ int nnode = trex_newnode(exp, OP_GREEDY);
|
|
|
+ exp->_nodes[nnode].left = ret;
|
|
|
+ exp->_nodes[nnode].right = ((p0) << 16) | p1;
|
|
|
+ ret = nnode;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if ((*exp->_p != TREX_SYMBOL_BRANCH) && (*exp->_p != ')') && (*exp->_p != TREX_SYMBOL_GREEDY_ZERO_OR_MORE) &&
|
|
|
+ (*exp->_p != TREX_SYMBOL_GREEDY_ONE_OR_MORE) && (*exp->_p != '\0')) {
|
|
|
+ int nnode = trex_element(exp);
|
|
|
+ exp->_nodes[ret].next = nnode;
|
|
|
+ }
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static int trex_list(TRex* exp) {
|
|
|
+ int ret = -1, e;
|
|
|
+ if (*exp->_p == TREX_SYMBOL_BEGINNING_OF_STRING) {
|
|
|
+ exp->_p++;
|
|
|
+ ret = trex_newnode(exp, OP_BOL);
|
|
|
+ }
|
|
|
+ e = trex_element(exp);
|
|
|
+ if (ret != -1) {
|
|
|
+ exp->_nodes[ret].next = e;
|
|
|
+ } else
|
|
|
+ ret = e;
|
|
|
+
|
|
|
+ if (*exp->_p == TREX_SYMBOL_BRANCH) {
|
|
|
+ int temp, tright;
|
|
|
+ exp->_p++;
|
|
|
+ temp = trex_newnode(exp, OP_OR);
|
|
|
+ exp->_nodes[temp].left = ret;
|
|
|
+ tright = trex_list(exp);
|
|
|
+ exp->_nodes[temp].right = tright;
|
|
|
+ ret = temp;
|
|
|
+ }
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static TRexBool trex_matchcclass(int cclass, TRexChar c) {
|
|
|
+ switch (cclass) {
|
|
|
+ case 'a':
|
|
|
+ return isalpha(c) ? TRex_True : TRex_False;
|
|
|
+ case 'A':
|
|
|
+ return !isalpha(c) ? TRex_True : TRex_False;
|
|
|
+ case 'w':
|
|
|
+ return (isalnum(c) || c == '_') ? TRex_True : TRex_False;
|
|
|
+ case 'W':
|
|
|
+ return (!isalnum(c) && c != '_') ? TRex_True : TRex_False;
|
|
|
+ case 's':
|
|
|
+ return isspace(c) ? TRex_True : TRex_False;
|
|
|
+ case 'S':
|
|
|
+ return !isspace(c) ? TRex_True : TRex_False;
|
|
|
+ case 'd':
|
|
|
+ return isdigit(c) ? TRex_True : TRex_False;
|
|
|
+ case 'D':
|
|
|
+ return !isdigit(c) ? TRex_True : TRex_False;
|
|
|
+ case 'x':
|
|
|
+ return isxdigit(c) ? TRex_True : TRex_False;
|
|
|
+ case 'X':
|
|
|
+ return !isxdigit(c) ? TRex_True : TRex_False;
|
|
|
+ case 'c':
|
|
|
+ return iscntrl(c) ? TRex_True : TRex_False;
|
|
|
+ case 'C':
|
|
|
+ return !iscntrl(c) ? TRex_True : TRex_False;
|
|
|
+ case 'p':
|
|
|
+ return ispunct(c) ? TRex_True : TRex_False;
|
|
|
+ case 'P':
|
|
|
+ return !ispunct(c) ? TRex_True : TRex_False;
|
|
|
+ case 'l':
|
|
|
+ return islower(c) ? TRex_True : TRex_False;
|
|
|
+ case 'u':
|
|
|
+ return isupper(c) ? TRex_True : TRex_False;
|
|
|
+ }
|
|
|
+ return TRex_False; /*cannot happen*/
|
|
|
+}
|
|
|
+
|
|
|
+static TRexBool trex_matchclass(TRex* exp, TRexNode* node, TRexChar c) {
|
|
|
+ do {
|
|
|
+ switch (node->type) {
|
|
|
+ case OP_RANGE:
|
|
|
+ if (exp->_flags & TREX_ICASE) {
|
|
|
+ if (c >= toupper(node->left) && c <= toupper(node->right))
|
|
|
+ return TRex_True;
|
|
|
+ if (c >= tolower(node->left) && c <= tolower(node->right))
|
|
|
+ return TRex_True;
|
|
|
+ } else {
|
|
|
+ if (c >= node->left && c <= node->right)
|
|
|
+ return TRex_True;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case OP_CCLASS:
|
|
|
+ if (trex_matchcclass(node->left, c))
|
|
|
+ return TRex_True;
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ if (exp->_flags & TREX_ICASE) {
|
|
|
+ if (c == tolower(node->type) || c == toupper(node->type))
|
|
|
+ return TRex_True;
|
|
|
+ } else {
|
|
|
+ if (c == node->type)
|
|
|
+ return TRex_True;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } while ((node->next != -1) && ((node = &exp->_nodes[node->next]) != NULL));
|
|
|
+ return TRex_False;
|
|
|
+}
|
|
|
+
|
|
|
+static const TRexChar* trex_matchnode(TRex* exp, TRexNode* node, const TRexChar* str, TRexNode* next) {
|
|
|
+ TRexNodeType type = node->type;
|
|
|
+ switch (type) {
|
|
|
+ case OP_GREEDY: {
|
|
|
+ /* TRexNode *greedystop = (node->next != -1) ? &exp->_nodes[node->next] : NULL; */
|
|
|
+ TRexNode* greedystop = NULL;
|
|
|
+ int p0 = (node->right >> 16) & 0x0000FFFF, p1 = node->right & 0x0000FFFF, nmaches = 0;
|
|
|
+ const TRexChar *s = str, *good = str;
|
|
|
+
|
|
|
+ if (node->next != -1) {
|
|
|
+ greedystop = &exp->_nodes[node->next];
|
|
|
+ } else {
|
|
|
+ greedystop = next;
|
|
|
+ }
|
|
|
+
|
|
|
+ while ((nmaches == 0xFFFF || nmaches < p1)) {
|
|
|
+ const TRexChar* stop;
|
|
|
+ if ((s = trex_matchnode(exp, &exp->_nodes[node->left], s, greedystop)) == NULL)
|
|
|
+ break;
|
|
|
+ nmaches++;
|
|
|
+ good = s;
|
|
|
+ if (greedystop) {
|
|
|
+ /* checks that 0 matches satisfy the expression(if so skips) */
|
|
|
+ /* if not would always stop(for instance if is a '?') */
|
|
|
+ if (greedystop->type != OP_GREEDY || (greedystop->type == OP_GREEDY && ((greedystop->right >> 16) & 0x0000FFFF) != 0)) {
|
|
|
+ TRexNode* gnext = NULL;
|
|
|
+ if (greedystop->next != -1) {
|
|
|
+ gnext = &exp->_nodes[greedystop->next];
|
|
|
+ } else if (next && next->next != -1) {
|
|
|
+ gnext = &exp->_nodes[next->next];
|
|
|
+ }
|
|
|
+ stop = trex_matchnode(exp, greedystop, s, gnext);
|
|
|
+ if (stop) {
|
|
|
+ /* if satisfied stop it */
|
|
|
+ if (p0 == p1 && p0 == nmaches)
|
|
|
+ break;
|
|
|
+ else if (nmaches >= p0 && p1 == 0xFFFF)
|
|
|
+ break;
|
|
|
+ else if (nmaches >= p0 && nmaches <= p1)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (s >= exp->_eol)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ if (p0 == p1 && p0 == nmaches)
|
|
|
+ return good;
|
|
|
+ else if (nmaches >= p0 && p1 == 0xFFFF)
|
|
|
+ return good;
|
|
|
+ else if (nmaches >= p0 && nmaches <= p1)
|
|
|
+ return good;
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+ case OP_OR: {
|
|
|
+ const TRexChar* asd = str;
|
|
|
+ TRexNode* temp = &exp->_nodes[node->left];
|
|
|
+ while ((asd = trex_matchnode(exp, temp, asd, NULL)) != NULL) {
|
|
|
+ if (temp->next != -1)
|
|
|
+ temp = &exp->_nodes[temp->next];
|
|
|
+ else
|
|
|
+ return asd;
|
|
|
+ }
|
|
|
+ asd = str;
|
|
|
+ temp = &exp->_nodes[node->right];
|
|
|
+ while ((asd = trex_matchnode(exp, temp, asd, NULL)) != NULL) {
|
|
|
+ if (temp->next != -1)
|
|
|
+ temp = &exp->_nodes[temp->next];
|
|
|
+ else
|
|
|
+ return asd;
|
|
|
+ }
|
|
|
+ return NULL;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ case OP_EXPR:
|
|
|
+ case OP_NOCAPEXPR: {
|
|
|
+ TRexNode* n = &exp->_nodes[node->left];
|
|
|
+ const TRexChar* cur = str;
|
|
|
+ int capture = -1;
|
|
|
+ if (node->type != OP_NOCAPEXPR && node->right == exp->_currsubexp) {
|
|
|
+ capture = exp->_currsubexp;
|
|
|
+ exp->_matches[capture].begin = cur;
|
|
|
+ exp->_currsubexp++;
|
|
|
+ }
|
|
|
+
|
|
|
+ do {
|
|
|
+ TRexNode* subnext = NULL;
|
|
|
+ if (n->next != -1) {
|
|
|
+ subnext = &exp->_nodes[n->next];
|
|
|
+ } else {
|
|
|
+ subnext = next;
|
|
|
+ }
|
|
|
+ if ((cur = trex_matchnode(exp, n, cur, subnext)) == NULL) {
|
|
|
+ if (capture != -1) {
|
|
|
+ exp->_matches[capture].begin = 0;
|
|
|
+ exp->_matches[capture].len = 0;
|
|
|
+ }
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+ } while ((n->next != -1) && ((n = &exp->_nodes[n->next]) != NULL));
|
|
|
+
|
|
|
+ if (capture != -1)
|
|
|
+ exp->_matches[capture].len = (int)(cur - exp->_matches[capture].begin);
|
|
|
+ return cur;
|
|
|
+ }
|
|
|
+ case OP_WB:
|
|
|
+ if ((str == exp->_bol && !isspace((int)(*str))) || (str == exp->_eol && !isspace((int)(*(str - 1)))) || (!isspace((int)(*str)) && isspace((int)(*(str + 1)))) ||
|
|
|
+ (isspace((int)(*str)) && !isspace((int)(*(str + 1))))) {
|
|
|
+ return (node->left == 'b') ? str : NULL;
|
|
|
+ }
|
|
|
+ return (node->left == 'b') ? NULL : str;
|
|
|
+ case OP_BOL:
|
|
|
+ if (str == exp->_bol)
|
|
|
+ return str;
|
|
|
+ return NULL;
|
|
|
+ case OP_EOL:
|
|
|
+ if (str == exp->_eol)
|
|
|
+ return str;
|
|
|
+ return NULL;
|
|
|
+ case OP_DOT: {
|
|
|
+ str++;
|
|
|
+ }
|
|
|
+ return str;
|
|
|
+ case OP_NCLASS:
|
|
|
+ case OP_CLASS:
|
|
|
+ if (trex_matchclass(exp, &exp->_nodes[node->left], *str) ? (type == OP_CLASS ? TRex_True : TRex_False)
|
|
|
+ : (type == OP_NCLASS ? TRex_True : TRex_False)) {
|
|
|
+ str++;
|
|
|
+ return str;
|
|
|
+ }
|
|
|
+ return NULL;
|
|
|
+ case OP_CCLASS:
|
|
|
+ if (trex_matchcclass(node->left, *str)) {
|
|
|
+ str++;
|
|
|
+ return str;
|
|
|
+ }
|
|
|
+ return NULL;
|
|
|
+ default: /* char */
|
|
|
+ if (exp->_flags & TREX_ICASE) {
|
|
|
+ if (*str != tolower(node->type) && *str != toupper(node->type))
|
|
|
+ return NULL;
|
|
|
+ } else {
|
|
|
+ if (*str != node->type)
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+ str++;
|
|
|
+ return str;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/* public api */
|
|
|
+TRex* trex_compile(const TRexChar* pattern, const TRexChar** error, int flags) {
|
|
|
+ TRex* exp = (TRex*)xmalloc(sizeof(TRex));
|
|
|
+ exp->_eol = exp->_bol = NULL;
|
|
|
+ exp->_p = pattern;
|
|
|
+ exp->_nallocated = (int)(scstrlen(pattern) * sizeof(TRexChar));
|
|
|
+ exp->_nodes = (TRexNode*)xmalloc((size_t)exp->_nallocated * sizeof(TRexNode));
|
|
|
+ exp->_nsize = 0;
|
|
|
+ exp->_matches = 0;
|
|
|
+ exp->_nsubexpr = 0;
|
|
|
+ exp->_first = trex_newnode(exp, OP_EXPR);
|
|
|
+ exp->_error = error;
|
|
|
+ exp->_jmpbuf = xmalloc(sizeof(jmp_buf));
|
|
|
+ exp->_flags = flags;
|
|
|
+ if (setjmp(*((jmp_buf*)exp->_jmpbuf)) == 0) {
|
|
|
+ int res = trex_list(exp);
|
|
|
+ exp->_nodes[exp->_first].left = res;
|
|
|
+ if (*exp->_p != '\0')
|
|
|
+ trex_error(exp, _SC("unexpected character"));
|
|
|
+#ifdef ARG_REX_DEBUG
|
|
|
+ {
|
|
|
+ int nsize, i;
|
|
|
+ nsize = exp->_nsize;
|
|
|
+ scprintf(_SC("\n"));
|
|
|
+ for (i = 0; i < nsize; i++) {
|
|
|
+ if (exp->_nodes[i].type > MAX_CHAR)
|
|
|
+ scprintf(_SC("[%02d] %10s "), i, g_nnames[exp->_nodes[i].type - MAX_CHAR]);
|
|
|
+ else
|
|
|
+ scprintf(_SC("[%02d] %10c "), i, exp->_nodes[i].type);
|
|
|
+ scprintf(_SC("left %02d right %02d next %02d\n"), exp->_nodes[i].left, exp->_nodes[i].right, exp->_nodes[i].next);
|
|
|
+ }
|
|
|
+ scprintf(_SC("\n"));
|
|
|
+ }
|
|
|
+#endif
|
|
|
+ exp->_matches = (TRexMatch*)xmalloc((size_t)exp->_nsubexpr * sizeof(TRexMatch));
|
|
|
+ memset(exp->_matches, 0, (size_t)exp->_nsubexpr * sizeof(TRexMatch));
|
|
|
+ } else {
|
|
|
+ trex_free(exp);
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+ return exp;
|
|
|
+}
|
|
|
+
|
|
|
+void trex_free(TRex* exp) {
|
|
|
+ if (exp) {
|
|
|
+ xfree(exp->_nodes);
|
|
|
+ xfree(exp->_jmpbuf);
|
|
|
+ xfree(exp->_matches);
|
|
|
+ xfree(exp);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+TRexBool trex_match(TRex* exp, const TRexChar* text) {
|
|
|
+ const TRexChar* res = NULL;
|
|
|
+ exp->_bol = text;
|
|
|
+ exp->_eol = text + scstrlen(text);
|
|
|
+ exp->_currsubexp = 0;
|
|
|
+ res = trex_matchnode(exp, exp->_nodes, text, NULL);
|
|
|
+ if (res == NULL || res != exp->_eol)
|
|
|
+ return TRex_False;
|
|
|
+ return TRex_True;
|
|
|
+}
|
|
|
+
|
|
|
+TRexBool trex_searchrange(TRex* exp, const TRexChar* text_begin, const TRexChar* text_end, const TRexChar** out_begin, const TRexChar** out_end) {
|
|
|
+ const TRexChar* cur = NULL;
|
|
|
+ int node = exp->_first;
|
|
|
+ if (text_begin >= text_end)
|
|
|
+ return TRex_False;
|
|
|
+ exp->_bol = text_begin;
|
|
|
+ exp->_eol = text_end;
|
|
|
+ do {
|
|
|
+ cur = text_begin;
|
|
|
+ while (node != -1) {
|
|
|
+ exp->_currsubexp = 0;
|
|
|
+ cur = trex_matchnode(exp, &exp->_nodes[node], cur, NULL);
|
|
|
+ if (!cur)
|
|
|
+ break;
|
|
|
+ node = exp->_nodes[node].next;
|
|
|
+ }
|
|
|
+ text_begin++;
|
|
|
+ } while (cur == NULL && text_begin != text_end);
|
|
|
+
|
|
|
+ if (cur == NULL)
|
|
|
+ return TRex_False;
|
|
|
+
|
|
|
+ --text_begin;
|
|
|
+
|
|
|
+ if (out_begin)
|
|
|
+ *out_begin = text_begin;
|
|
|
+ if (out_end)
|
|
|
+ *out_end = cur;
|
|
|
+ return TRex_True;
|
|
|
+}
|
|
|
+
|
|
|
+TRexBool trex_search(TRex* exp, const TRexChar* text, const TRexChar** out_begin, const TRexChar** out_end) {
|
|
|
+ return trex_searchrange(exp, text, text + scstrlen(text), out_begin, out_end);
|
|
|
+}
|
|
|
+
|
|
|
+int trex_getsubexpcount(TRex* exp) {
|
|
|
+ return exp->_nsubexpr;
|
|
|
+}
|
|
|
+
|
|
|
+TRexBool trex_getsubexp(TRex* exp, int n, TRexMatch* subexp) {
|
|
|
+ if (n < 0 || n >= exp->_nsubexpr)
|
|
|
+ return TRex_False;
|
|
|
+ *subexp = exp->_matches[n];
|
|
|
+ return TRex_True;
|
|
|
+}
|