summaryrefslogtreecommitdiff
path: root/utils/tfstats/regexp/include/jm/regcomp.h
diff options
context:
space:
mode:
Diffstat (limited to 'utils/tfstats/regexp/include/jm/regcomp.h')
-rw-r--r--utils/tfstats/regexp/include/jm/regcomp.h1888
1 files changed, 1888 insertions, 0 deletions
diff --git a/utils/tfstats/regexp/include/jm/regcomp.h b/utils/tfstats/regexp/include/jm/regcomp.h
new file mode 100644
index 0000000..0065bba
--- /dev/null
+++ b/utils/tfstats/regexp/include/jm/regcomp.h
@@ -0,0 +1,1888 @@
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+// $NoKeywords: $
+//
+//=============================================================================//
+/*
+ *
+ * Copyright (c) 1998-9
+ * Dr John Maddock
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies and
+ * that both that copyright notice and this permission notice appear
+ * in supporting documentation. Dr John Maddock makes no representations
+ * about the suitability of this software for any purpose.
+ * It is provided "as is" without express or implied warranty.
+ *
+ */
+
+ /*
+ * FILE regcomp.h
+ * VERSION 2.12
+ * This is an internal header file, do not include directly
+ */
+
+JM_NAMESPACE(__JM)
+
+template <class traits>
+struct kmp_translator
+{
+ typedef typename traits::char_type char_type;
+ bool icase;
+ kmp_translator(bool c) : icase(c) {}
+ char_type operator()(char_type c
+#ifdef RE_LOCALE_CPP
+ , const __JM_STD::locale& l
+#endif
+ )
+ {
+ return traits::translate(c, icase MAYBE_PASS_LOCALE(l));
+ }
+};
+
+#if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES)
+//
+// Ugly ugly hack,
+// template don't merge if they contain switch statements so declare these
+// templates in unnamed namespace (ie with internal linkage), each translation
+// unit then gets its own local copy, it works seemlessly but bloats the app.
+namespace{
+#endif
+
+template <class charT, class traits, class Allocator>
+inline bool RE_CALL reg_expression<charT, traits, Allocator>::can_start(charT c, const unsigned char* __map, unsigned char mask, const __wide_type&)
+{
+ if((traits_size_type)(traits_uchar_type)c >= 256)
+ return true;
+ return JM_MAKE_BOOL(__map[(traits_uchar_type)c] & mask);
+}
+
+template <class charT, class traits, class Allocator>
+inline bool RE_CALL reg_expression<charT, traits, Allocator>::can_start(charT c, const unsigned char* __map, unsigned char mask, const __narrow_type&)
+{
+ return JM_MAKE_BOOL(__map[(traits_uchar_type)c] & mask);
+}
+
+template <class charT, class traits, class Allocator>
+CONSTRUCTOR_INLINE reg_expression<charT, traits, Allocator>::reg_expression(const Allocator& a)
+ : regbase(), data(a), pkmp(0)
+{
+}
+
+template <class charT, class traits, class Allocator>
+CONSTRUCTOR_INLINE reg_expression<charT, traits, Allocator>::reg_expression(const charT* p, jm_uintfast32_t f, const Allocator& a)
+ : data(a), pkmp(0)
+{
+ set_expression(p, f);
+}
+
+template <class charT, class traits, class Allocator>
+CONSTRUCTOR_INLINE reg_expression<charT, traits, Allocator>::reg_expression(const charT* p1, const charT* p2, jm_uintfast32_t f, const Allocator& a)
+ : data(a), pkmp(0)
+{
+ set_expression(p1, p2, f);
+}
+
+template <class charT, class traits, class Allocator>
+CONSTRUCTOR_INLINE reg_expression<charT, traits, Allocator>::reg_expression(const charT* p, size_type len, jm_uintfast32_t f, const Allocator& a)
+ : data(a), pkmp(0)
+{
+ set_expression(p, p + len, f);
+}
+
+template <class charT, class traits, class Allocator>
+reg_expression<charT, traits, Allocator>::reg_expression(const reg_expression<charT, traits, Allocator>& e)
+ : regbase(e), data(e.allocator()), pkmp(0)
+{
+ //
+ // we do a deep copy only if e is a valid expression, otherwise fail.
+ //
+ //_flags = 0;
+ //fail(e.error_code());
+ if(error_code() == 0)
+ set_expression(e.expression(), e.flags());
+}
+
+template <class charT, class traits, class Allocator>
+reg_expression<charT, traits, Allocator>::~reg_expression()
+{
+ if(pkmp)
+ kmp_free(pkmp, data.allocator());
+}
+
+template <class charT, class traits, class Allocator>
+reg_expression<charT, traits, Allocator>& RE_CALL reg_expression<charT, traits, Allocator>::operator=(const reg_expression<charT, traits, Allocator>& e)
+{
+ //
+ // we do a deep copy only if e is a valid expression, otherwise fail.
+ //
+ if(this == &e) return *this;
+ _flags = 0;
+ fail(e.error_code());
+ if(error_code() == 0)
+ set_expression(e.expression(), e.flags());
+ return *this;
+}
+
+template <class charT, class traits, class Allocator>
+inline bool RE_CALL reg_expression<charT, traits, Allocator>::operator==(const reg_expression<charT, traits, Allocator>& e)
+{
+ return (_flags == e.flags()) && (re_strcmp(expression(), e.expression()) == 0);
+}
+
+template <class charT, class traits, class Allocator>
+bool RE_CALL reg_expression<charT, traits, Allocator>::operator<(const reg_expression<charT, traits, Allocator>& e)
+{
+ int i = re_strcmp(expression(), e.expression());
+ if(i == 0)
+ return _flags < e.flags();
+ return i < 0;
+}
+
+template <class charT, class traits, class Allocator>
+Allocator RE_CALL reg_expression<charT, traits, Allocator>::allocator()const
+{
+ return data.allocator();
+}
+
+template <class charT, class traits, class Allocator>
+unsigned int RE_CALL reg_expression<charT, traits, Allocator>::parse_inner_set(const charT*& first, const charT* last)
+{
+ //
+ // we have an inner [...] construct
+ //
+ jm_assert(traits_type::syntax_type((traits_size_type)(traits_uchar_type)*first MAYBE_PASS_LOCALE(locale_inst)) == syntax_open_set);
+ const charT* base = first;
+ while( (first != last)
+ && (traits_type::syntax_type((traits_size_type)(traits_uchar_type)*first MAYBE_PASS_LOCALE(locale_inst)) != syntax_close_set) )
+ ++first;
+ if(first == last)
+ return 0;
+ ++first;
+ if((first-base) < 5)
+ return 0;
+ if(*(base+1) != *(first-2))
+ return 0;
+ unsigned int result = traits_type::syntax_type((traits_size_type)(traits_uchar_type)*(base+1) MAYBE_PASS_LOCALE(locale_inst));
+ if((result == syntax_colon) && ((first-base) == 5))
+ {
+ return traits_type::syntax_type((traits_size_type)(traits_uchar_type)*(base+2) MAYBE_PASS_LOCALE(locale_inst));
+ }
+ return ((result == syntax_colon) || (result == syntax_dot) || (result == syntax_equal)) ? result : 0;
+}
+
+
+template <class charT, class traits, class Allocator>
+bool RE_CALL reg_expression<charT, traits, Allocator>::skip_space(const charT*& first, const charT* last)
+{
+ //
+ // returns true if we get to last:
+ //
+ while((first != last) && (traits_type::is_class(*first, char_class_space MAYBE_PASS_LOCALE(locale_inst)) == true))
+ {
+ ++first;
+ }
+ return first == last;
+}
+
+template <class charT, class traits, class Allocator>
+void RE_CALL reg_expression<charT, traits, Allocator>::parse_range(const charT*& ptr, const charT* end, unsigned& min, unsigned& max)
+{
+ //
+ // we have {x} or {x,} or {x,y} NB no spaces inside braces
+ // anything else is illegal
+ // On input ptr points to "{"
+ //
+ ++ptr;
+ if(skip_space(ptr, end))
+ {
+ fail(REG_EBRACE);
+ return;
+ }
+ if(traits_type::syntax_type((traits_size_type)(traits_uchar_type)*ptr MAYBE_PASS_LOCALE(locale_inst)) != syntax_digit)
+ {
+ fail(REG_BADBR);
+ return;
+ }
+ min = traits_type::toi(ptr, end, 10 MAYBE_PASS_LOCALE(locale_inst));
+ if(skip_space(ptr, end))
+ {
+ fail(REG_EBRACE);
+ return;
+ }
+ if(traits_type::syntax_type((traits_size_type)(traits_uchar_type)*ptr MAYBE_PASS_LOCALE(locale_inst)) == syntax_comma)
+ {
+ //we have a second interval:
+ ++ptr;
+ if(skip_space(ptr, end))
+ {
+ fail(REG_EBRACE);
+ return;
+ }
+ if(traits_type::syntax_type((traits_size_type)(traits_uchar_type)*ptr MAYBE_PASS_LOCALE(locale_inst)) == syntax_digit)
+ max = traits_type::toi(ptr, end, 10 MAYBE_PASS_LOCALE(locale_inst));
+ else
+ max = (unsigned)-1;
+ }
+ else
+ max = min;
+
+ // validate input:
+ if(skip_space(ptr, end))
+ {
+ fail(REG_EBRACE);
+ return;
+ }
+ if(max < min)
+ {
+ fail(REG_ERANGE);
+ return;
+ }
+ if(_flags & bk_braces)
+ {
+ if(traits_type::syntax_type((traits_size_type)(traits_uchar_type)*ptr MAYBE_PASS_LOCALE(locale_inst)) != syntax_slash)
+ {
+ fail(REG_BADBR);
+ return;
+ }
+ else
+ {
+ // back\ is OK now check the }
+ ++ptr;
+ if((ptr == end) || (traits_type::syntax_type((traits_size_type)(traits_uchar_type)*ptr MAYBE_PASS_LOCALE(locale_inst)) != syntax_close_brace))
+ {
+ fail(REG_BADBR);
+ return;
+ }
+ }
+ }
+ else if(traits_type::syntax_type((traits_size_type)(traits_uchar_type)*ptr MAYBE_PASS_LOCALE(locale_inst)) != syntax_close_brace)
+ {
+ fail(REG_BADBR);
+ return;
+ }
+}
+
+template <class charT, class traits, class Allocator>
+charT RE_CALL reg_expression<charT, traits, Allocator>::parse_escape(const charT*& first, const charT* last)
+{
+ charT c;
+ switch(traits_type::syntax_type(*first MAYBE_PASS_LOCALE(locale_inst)))
+ {
+ case syntax_a:
+ c = '\a';
+ ++first;
+ break;
+ case syntax_f:
+ c = '\f';
+ ++first;
+ break;
+ case syntax_n:
+ c = '\n';
+ ++first;
+ break;
+ case syntax_r:
+ c = '\r';
+ ++first;
+ break;
+ case syntax_t:
+ c = '\t';
+ ++first;
+ break;
+ case syntax_v:
+ c = '\v';
+ ++first;
+ break;
+ case syntax_x:
+ ++first;
+ if(first == last)
+ {
+ fail(REG_EESCAPE);
+ break;
+ }
+ // maybe have \x{ddd}
+ if(traits_type::syntax_type(*first MAYBE_PASS_LOCALE(locale_inst)) == syntax_open_brace)
+ {
+ ++first;
+ if(first == last)
+ {
+ fail(REG_EESCAPE);
+ break;
+ }
+ if(traits_type::is_class(*first, char_class_xdigit MAYBE_PASS_LOCALE(locale_inst)) == false)
+ {
+ fail(REG_BADBR);
+ break;
+ }
+ c = (charT)traits_type::toi(first, last, -16 MAYBE_PASS_LOCALE(locale_inst));
+ if((first == last) || (traits_type::syntax_type(*first MAYBE_PASS_LOCALE(locale_inst)) != syntax_close_brace))
+ {
+ fail(REG_BADBR);
+ }
+ ++first;
+ break;
+ }
+ else
+ {
+ if(traits_type::is_class(*first, char_class_xdigit MAYBE_PASS_LOCALE(locale_inst)) == false)
+ {
+ fail(REG_BADBR);
+ break;
+ }
+ c = (charT)traits_type::toi(first, last, -16 MAYBE_PASS_LOCALE(locale_inst));
+ }
+ break;
+ case syntax_c:
+ ++first;
+ if(first == last)
+ {
+ fail(REG_EESCAPE);
+ break;
+ }
+ if(((traits_uchar_type)(*first) < (traits_uchar_type)'@')
+ || ((traits_uchar_type)(*first) > (traits_uchar_type)127) )
+ {
+ fail(REG_EESCAPE);
+ return (charT)0;
+ }
+ c = (charT)((traits_uchar_type)(*first) - (traits_uchar_type)'@');
+ ++first;
+ break;
+ case syntax_e:
+ c = (charT)27;
+ ++first;
+ break;
+ case syntax_digit:
+ c = (charT)traits_type::toi(first, last, -8 MAYBE_PASS_LOCALE(locale_inst));
+ break;
+ default:
+ c = *first;
+ ++first;
+ }
+ return c;
+}
+
+template <class charT, class traits, class Allocator>
+void RE_CALL reg_expression<charT, traits, Allocator>::compile_maps()
+{
+ re_syntax_base* record = (re_syntax_base*)data.data();
+ // always compile the first __map:
+ memset(startmap, 0, 256);
+ record->can_be_null = 0;
+ compile_map(record, startmap, NULL, mask_all);
+
+ while(record->type != syntax_element_match)
+ {
+ if((record->type == syntax_element_alt) || (record->type == syntax_element_rep))
+ {
+ memset(&(((re_jump*)record)->__map), 0, 256);
+ record->can_be_null = 0;
+ compile_map(record->next.p, ((re_jump*)record)->__map, &(record->can_be_null), mask_take, ((re_jump*)record)->alt.p);
+ compile_map(((re_jump*)record)->alt.p, ((re_jump*)record)->__map, &(record->can_be_null), mask_skip);
+ }
+ else
+ {
+ record->can_be_null = 0;
+ compile_map(record, NULL, &(record->can_be_null), mask_all);
+ }
+ record = record->next.p;
+ }
+ record->can_be_null = mask_all;
+}
+
+template <class charT, class traits_type, class Allocator>
+bool RE_CALL re_maybe_set_member(charT c,
+ re_set_long* set,
+ const reg_expression<charT, traits_type, Allocator>& e)
+{
+ const charT* p = (const charT*)(set+1);
+ bool icase = e.flags() & regbase::icase;
+ charT col = traits_type::translate(c, icase MAYBE_PASS_LOCALE(e.locale()));
+ for(unsigned int i = 0; i < set->csingles; ++i)
+ {
+ if(col == *p)
+ return set->isnot ? false : true;
+
+ while(*p)++p;
+ ++p; // skip null
+ }
+ return set->isnot ? true : false;
+}
+
+template <class charT, class traits, class Allocator>
+bool RE_CALL reg_expression<charT, traits, Allocator>::probe_start(
+ re_syntax_base* node, charT cc, re_syntax_base* terminal) const
+{
+ unsigned int c;
+
+ switch(node->type)
+ {
+ case syntax_element_startmark:
+ case syntax_element_endmark:
+ case syntax_element_start_line:
+ case syntax_element_word_boundary:
+ case syntax_element_buffer_start:
+ case syntax_element_restart_continue:
+ // doesn't tell us anything about the next character, so:
+ return probe_start(node->next.p, cc, terminal);
+ case syntax_element_literal:
+ // only the first character of the literal can match:
+ // note these have already been translated:
+ if(*(charT*)(((re_literal*)node)+1) == traits_type::translate(cc, (_flags & regbase::icase) MAYBE_PASS_LOCALE(locale_inst)))
+ return true;
+ return false;
+ case syntax_element_end_line:
+ // next character (if there is one!) must be a newline:
+ if(traits_type::is_separator(traits_type::translate(cc, (_flags & regbase::icase) MAYBE_PASS_LOCALE(locale_inst))))
+ return true;
+ return false;
+ case syntax_element_wild:
+ return true;
+ case syntax_element_match:
+ return true;
+ case syntax_element_within_word:
+ case syntax_element_word_start:
+ return traits_type::is_class(traits_type::translate(cc, (_flags & regbase::icase) MAYBE_PASS_LOCALE(locale_inst)), char_class_word MAYBE_PASS_LOCALE(locale_inst));
+ case syntax_element_word_end:
+ // what follows must not be a word character,
+ return traits_type::is_class(traits_type::translate(cc, (_flags & regbase::icase) MAYBE_PASS_LOCALE(locale_inst)), char_class_word MAYBE_PASS_LOCALE(locale_inst)) ? false : true;
+ case syntax_element_buffer_end:
+ // we can be null, nothing must follow,
+ // NB we assume that this is followed by
+ // syntax_element_match, if its not then we can
+ // never match anything anyway!!
+ return false;
+ case syntax_element_soft_buffer_end:
+ // we can be null, only newlines must follow,
+ // NB we assume that this is followed by
+ // syntax_element_match, if its not then we can
+ // never match anything anyway!!
+ return traits_type::is_separator(traits_type::translate(cc, (_flags & regbase::icase) MAYBE_PASS_LOCALE(locale_inst)));
+ case syntax_element_backref:
+ // there's no easy way to determine this
+ // which is not to say it can't be done!
+ // for now:
+ return true;
+ case syntax_element_long_set:
+ // we can not be null,
+ // we need to add already translated values in the set
+ // to values in the __map
+ return re_maybe_set_member(cc, (re_set_long*)node, *this) || re_is_set_member((const charT*)&cc, (const charT*)(&cc+1), (re_set_long*)node, *this) != &cc;
+ case syntax_element_set:
+ // set all the elements that are set in corresponding set:
+ c = (traits_size_type)(traits_uchar_type)traits_type::translate(cc, (_flags & regbase::icase) MAYBE_PASS_LOCALE(locale_inst));
+ return ((re_set*)node)->__map[c] != 0;
+ case syntax_element_jump:
+ if(((re_jump*)node)->alt.p < node)
+ {
+ // backwards jump,
+ // caused only by end of repeat section, we'll treat this
+ // the same as a match, because the sub-expression has matched.
+ // this is only caused by NULL repeats as in "(a*)*" or "(\<)*"
+ // these are really nonsensence and make the matching code much
+ // harder, it would be nice to get rid of them altogether.
+ if(node->next.p == terminal)
+ return true;
+ else
+ return probe_start(((re_jump*)node)->alt.p, cc, terminal);
+ }
+ else
+ // take the jump and compile:
+ return probe_start(((re_jump*)node)->alt.p, cc, terminal);
+ case syntax_element_alt:
+ // we need to take the OR of the two alternatives:
+ return probe_start(((re_jump*)node)->alt.p, cc, terminal) || probe_start(node->next.p, cc, terminal);
+ case syntax_element_rep:
+ // we need to take the OR of the two alternatives
+ if(((re_repeat*)node)->min == 0)
+ return probe_start(node->next.p, cc, ((re_jump*)node)->alt.p) || probe_start(((re_jump*)node)->alt.p, cc, terminal);
+ else
+ return probe_start(node->next.p, cc, ((re_jump*)node)->alt.p);
+ case syntax_element_combining:
+ return !traits_type::is_combining(traits_type::translate(cc, (_flags & regbase::icase) MAYBE_PASS_LOCALE(locale_inst)));
+ }
+ return false;
+}
+
+template <class charT, class traits, class Allocator>
+bool RE_CALL reg_expression<charT, traits, Allocator>::probe_start_null(re_syntax_base* node, re_syntax_base* terminal)const
+{
+ switch(node->type)
+ {
+ case syntax_element_startmark:
+ case syntax_element_endmark:
+ case syntax_element_start_line:
+ case syntax_element_word_boundary:
+ case syntax_element_buffer_start:
+ case syntax_element_restart_continue:
+ case syntax_element_end_line:
+ case syntax_element_word_end:
+ // doesn't tell us anything about the next character, so:
+ return probe_start_null(node->next.p, terminal);
+ case syntax_element_match:
+ case syntax_element_buffer_end:
+ case syntax_element_soft_buffer_end:
+ case syntax_element_backref:
+ return true;
+ case syntax_element_jump:
+ if(((re_jump*)node)->alt.p < node)
+ {
+ // backwards jump,
+ // caused only by end of repeat section, we'll treat this
+ // the same as a match, because the sub-expression has matched.
+ // this is only caused by NULL repeats as in "(a*)*" or "(\<)*"
+ // these are really nonsensence and make the matching code much
+ // harder, it would be nice to get rid of them altogether.
+ if(node->next.p == terminal)
+ return true;
+ else
+ return probe_start_null(((re_jump*)node)->alt.p, terminal);
+ }
+ else
+ // take the jump and compile:
+ return probe_start_null(((re_jump*)node)->alt.p, terminal);
+ case syntax_element_alt:
+ // we need to take the OR of the two alternatives:
+ return probe_start_null(((re_jump*)node)->alt.p, terminal) || probe_start_null(node->next.p, terminal);
+ case syntax_element_rep:
+ // only need to consider skipping the repeat:
+ return probe_start_null(((re_jump*)node)->alt.p, terminal);
+ }
+ return false;
+}
+
+template <class charT, class traits, class Allocator>
+void RE_CALL reg_expression<charT, traits, Allocator>::compile_map(
+ re_syntax_base* node, unsigned char* __map,
+ unsigned int* pnull, unsigned char mask, re_syntax_base* terminal)const
+{
+ if(__map)
+ {
+ for(unsigned int i = 0; i < 256; ++i)
+ {
+ if(probe_start(node, (charT)i, terminal))
+ __map[i] |= mask;
+ }
+ }
+ if(pnull && probe_start_null(node, terminal))
+ *pnull |= mask;
+}
+
+template <class charT, class traits, class Allocator>
+void RE_CALL reg_expression<charT, traits, Allocator>::move_offsets(re_syntax_base* j, unsigned size)
+{
+ // move all offsets starting with j->link forward by size
+ // called after an insert:
+ j = (re_syntax_base*)((const char*)data.data() + j->next.i);
+ while(true)
+ {
+ switch(j->type)
+ {
+ case syntax_element_rep:
+ ((re_jump*)j)->alt.i += size;
+ j->next.i += size;
+ break;
+ case syntax_element_jump:
+ case syntax_element_alt:
+ ((re_jump*)j)->alt.i += size;
+ j->next.i += size;
+ break;
+ default:
+ j->next.i += size;
+ break;
+ }
+ if(j->next.i == size)
+ break;
+ j = (re_syntax_base*)((const char*)data.data() + j->next.i);
+ }
+}
+
+template <class charT, class traits, class Allocator>
+re_syntax_base* RE_CALL reg_expression<charT, traits, Allocator>::compile_set_simple(re_syntax_base* dat, unsigned long cls, bool isnot)
+{
+ jstack<re_str<charT>, Allocator> singles(64, data.allocator());
+ jstack<re_str<charT>, Allocator> ranges(64, data.allocator());
+ jstack<jm_uintfast32_t, Allocator> classes(64, data.allocator());
+ jstack<re_str<charT>, Allocator> equivalents(64, data.allocator());
+ classes.push(cls);
+ if(dat)
+ {
+ data.align();
+ dat->next.i = data.size();
+ }
+ return compile_set_aux(singles, ranges, classes, equivalents, isnot, is_byte<charT>::width_type());
+}
+
+template <class charT, class traits, class Allocator>
+re_syntax_base* RE_CALL reg_expression<charT, traits, Allocator>::compile_set(const charT*& first, const charT* last)
+{
+ jstack<re_str<charT>, Allocator> singles(64, data.allocator());
+ jstack<re_str<charT>, Allocator> ranges(64, data.allocator());
+ jstack<jm_uintfast32_t, Allocator> classes(64, data.allocator());
+ jstack<re_str<charT>, Allocator> equivalents(64, data.allocator());
+ bool has_digraphs = false;
+ jm_assert(traits_type::syntax_type((traits_size_type)(traits_uchar_type)*first MAYBE_PASS_LOCALE(locale_inst)) == syntax_open_set);
+ ++first;
+ bool started = false;
+ bool done = false;
+ bool isnot = false;
+
+ enum last_type
+ {
+ last_single,
+ last_none,
+ last_dash
+ };
+
+ unsigned l = last_none;
+ re_str<charT> s;
+
+ while((first != last) && !done)
+ {
+ traits_size_type c = (traits_size_type)(traits_uchar_type)*first;
+ switch(traits_type::syntax_type(c MAYBE_PASS_LOCALE(locale_inst)))
+ {
+ case syntax_caret:
+ if(!started && !isnot)
+ {
+ isnot = true;
+ }
+ else
+ {
+ s = (charT)c;
+ goto char_set_literal;
+ }
+ break;
+ case syntax_open_set:
+ {
+ if((_flags & char_classes) == 0)
+ {
+ s = (charT)c;
+ goto char_set_literal;
+ }
+ // check to see if we really have a class:
+ const charT* base = first;
+ switch(parse_inner_set(first, last))
+ {
+ case syntax_colon:
+ {
+ if(l == last_dash)
+ {
+ fail(REG_ERANGE);
+ return NULL;
+ }
+ jm_uintfast32_t id = traits_type::lookup_classname(base+2, first-2 MAYBE_PASS_LOCALE(locale_inst));
+ if(_flags & regbase::icase)
+ {
+ if((id == char_class_upper) || (id == char_class_lower))
+ {
+ id = char_class_alpha;
+ }
+ }
+ if(id == 0)
+ {
+ fail(REG_ECTYPE);
+ return NULL;
+ }
+ classes.push(id);
+ started = true;
+ l = last_none;
+ }
+ break;
+ case syntax_dot:
+ //
+ // we have a collating element [.collating-name.]
+ //
+ if(traits_type::lookup_collatename(s, base+2, first-2 MAYBE_PASS_LOCALE(locale_inst)))
+ {
+ --first;
+ if(s.size() > 1)
+ has_digraphs = true;
+ goto char_set_literal;
+ }
+ fail(REG_ECOLLATE);
+ return NULL;
+ case syntax_equal:
+ //
+ // we have an equivalence class [=collating-name=]
+ //
+ if(traits_type::lookup_collatename(s, base+2, first-2 MAYBE_PASS_LOCALE(locale_inst)))
+ {
+ unsigned i = 0;
+ while(s[i])
+ {
+ s[i] = traits_type::translate(s[i], (_flags & regbase::icase) MAYBE_PASS_LOCALE(locale_inst));
+ ++i;
+ }
+ re_str<charT> s2;
+ traits_type::transform_primary(s2, s MAYBE_PASS_LOCALE(locale_inst));
+ equivalents.push(s2);
+ started = true;
+ l = last_none;
+ break;
+ }
+ fail(REG_ECOLLATE);
+ return NULL;
+ case syntax_left_word:
+ if((started == false) && (traits_type::syntax_type((traits_size_type)(traits_uchar_type)*first MAYBE_PASS_LOCALE(locale_inst)) == syntax_close_set))
+ {
+ ++first;
+ return add_simple(0, syntax_element_word_start);
+ }
+ fail(REG_EBRACK);
+ return NULL;
+ case syntax_right_word:
+ if((started == false) && (traits_type::syntax_type((traits_size_type)(traits_uchar_type)*first MAYBE_PASS_LOCALE(locale_inst)) == syntax_close_set))
+ {
+ ++first;
+ return add_simple(0, syntax_element_word_end);
+ }
+ fail(REG_EBRACK);
+ return NULL;
+ default:
+ if(started == false)
+ {
+ unsigned int t = traits_type::syntax_type((traits_size_type)(traits_uchar_type)*(base+1) MAYBE_PASS_LOCALE(locale_inst));
+ if((t != syntax_colon) && (t != syntax_dot) && (t != syntax_equal))
+ {
+ first = base;
+ s = (charT)c;
+ goto char_set_literal;
+ }
+ }
+ fail(REG_EBRACK);
+ return NULL;
+ }
+ if(first == last)
+ {
+ fail(REG_EBRACK);
+ return NULL;
+ }
+ continue;
+ }
+ case syntax_close_set:
+ if(started == false)
+ {
+ s = (charT)c;
+ goto char_set_literal;
+ }
+ done = true;
+ break;
+ case syntax_dash:
+ if(!started)
+ {
+ s = (charT)c;
+ goto char_set_literal;
+ }
+ ++first;
+ if(traits_type::syntax_type((traits_size_type)(traits_uchar_type)*first MAYBE_PASS_LOCALE(locale_inst)) == syntax_close_set)
+ {
+ --first;
+ s = (charT)c;
+ goto char_set_literal;
+ }
+ if((singles.empty() == true) || (l != last_single))
+ {
+ fail(REG_ERANGE);
+ return NULL;
+ }
+ ranges.push(singles.peek());
+ if(singles.peek().size() <= 1) // leave digraphs and ligatures in place
+ singles.pop();
+ l = last_dash;
+ continue;
+ case syntax_slash:
+ if(_flags & regbase::escape_in_lists)
+ {
+ ++first;
+ if(first == last)
+ continue;
+ switch(traits_type::syntax_type(*first MAYBE_PASS_LOCALE(locale_inst)))
+ {
+ case syntax_w:
+ if(l == last_dash)
+ {
+ fail(REG_ERANGE);
+ return NULL;
+ }
+ classes.push(char_class_word);
+ started = true;
+ l = last_none;
+ ++first;
+ continue;
+ case syntax_d:
+ if(l == last_dash)
+ {
+ fail(REG_ERANGE);
+ return NULL;
+ }
+ classes.push(char_class_digit);
+ started = true;
+ l = last_none;
+ ++first;
+ continue;
+ case syntax_s:
+ if(l == last_dash)
+ {
+ fail(REG_ERANGE);
+ return NULL;
+ }
+ classes.push(char_class_space);
+ started = true;
+ l = last_none;
+ ++first;
+ continue;
+ case syntax_l:
+ if(l == last_dash)
+ {
+ fail(REG_ERANGE);
+ return NULL;
+ }
+ classes.push(char_class_lower);
+ started = true;
+ l = last_none;
+ ++first;
+ continue;
+ case syntax_u:
+ if(l == last_dash)
+ {
+ fail(REG_ERANGE);
+ return NULL;
+ }
+ classes.push(char_class_upper);
+ started = true;
+ l = last_none;
+ ++first;
+ continue;
+ case syntax_W:
+ case syntax_D:
+ case syntax_S:
+ case syntax_U:
+ case syntax_L:
+ fail(REG_EESCAPE);
+ return NULL;
+ default:
+ c = parse_escape(first, last);
+ --first;
+ s = (charT)c;
+ goto char_set_literal;
+ }
+ }
+ else
+ {
+ s = (charT)c;
+ goto char_set_literal;
+ }
+ default:
+ s = (charT)c;
+ char_set_literal:
+ unsigned i = 0;
+ while(s[i])
+ {
+ s[i] = traits_type::translate(s[i], (_flags & regbase::icase) MAYBE_PASS_LOCALE(locale_inst));
+ ++i;
+ }
+ started = true;
+ if(l == last_dash)
+ {
+ ranges.push(s);
+ l = last_none;
+ if(s.size() > 1) // add ligatures to singles list as well
+ singles.push(s);
+ }
+ else
+ {
+ singles.push(s);
+ l = last_single;
+ }
+ }
+ ++first;
+ }
+ if(!done)
+ return NULL;
+
+ re_syntax_base* result;
+ if(has_digraphs)
+ result = compile_set_aux(singles, ranges, classes, equivalents, isnot, __wide_type());
+ else
+ result = compile_set_aux(singles, ranges, classes, equivalents, isnot, is_byte<charT>::width_type());
+ #ifdef __BORLANDC__
+ // delayed throw:
+ if((result == 0) && (_flags & regbase::use_except))
+ fail(code);
+ #endif
+ return result;
+}
+
+template <class charT, class traits, class Allocator>
+re_syntax_base* RE_CALL reg_expression<charT, traits, Allocator>::compile_set_aux(jstack<re_str<charT>, Allocator>& singles, jstack<re_str<charT>, Allocator>& ranges, jstack<jm_uintfast32_t, Allocator>& classes, jstack<re_str<charT>, Allocator>& equivalents, bool isnot, const __wide_type&)
+{
+ size_type base = data.size();
+ data.extend(sizeof(re_set_long));
+ unsigned int csingles = 0;
+ unsigned int cranges = 0;
+ jm_uintfast32_t cclasses = 0;
+ unsigned int cequivalents = 0;
+ bool nocollate_state = flags() & regbase::nocollate;
+
+ while(singles.empty() == false)
+ {
+ ++csingles;
+ const re_str<charT>& s = singles.peek();
+ unsigned len = (re_strlen(s.c_str()) + 1) * sizeof(charT);
+ memcpy((charT*)data.extend(len), s.c_str(), len);
+ //*(charT*)data.extend(sizeof(charT)) = charT(singles.peek());
+ singles.pop();
+ }
+ while(ranges.empty() == false)
+ {
+ re_str<charT> c1, c2;
+ if(nocollate_state)
+ c1 = ranges.peek();
+ else
+ traits_type::transform(c1, ranges.peek() MAYBE_PASS_LOCALE(locale_inst));
+ ranges.pop();
+ if(nocollate_state)
+ c2 = ranges.peek();
+ else
+ traits_type::transform(c2, ranges.peek() MAYBE_PASS_LOCALE(locale_inst));
+ ranges.pop();
+ if(c1 < c2)
+ {
+ // for some reason bc5 crashes when throwing exceptions
+ // from here - probably an EH-compiler bug, but hard to
+ // be sure...
+ // delay throw to later:
+ #ifdef __BORLANDC__
+ jm_uintfast32_t f = _flags;
+ _flags &= ~regbase::use_except;
+ #endif
+ fail(REG_ERANGE);
+ #ifdef __BORLANDC__
+ _flags = f;
+ #endif
+ return NULL;
+ }
+ ++cranges;
+ unsigned len = (re_strlen(c1.c_str()) + 1) * sizeof(charT);
+ memcpy(data.extend(len), c1.c_str(), len);
+ len = (re_strlen(c2.c_str()) + 1) * sizeof(charT);
+ memcpy(data.extend(len), c2.c_str(), len);
+ }
+ while(classes.empty() == false)
+ {
+ cclasses |= classes.peek();
+ classes.pop();
+ }
+ while(equivalents.empty() == false)
+ {
+ ++cequivalents;
+ const re_str<charT>& s = equivalents.peek();
+ unsigned len = (re_strlen(s.c_str()) + 1) * sizeof(charT);
+ memcpy((charT*)data.extend(len), s.c_str(), len);
+ equivalents.pop();
+ }
+
+ re_set_long* dat = (re_set_long*)((unsigned char*)data.data() + base);
+ dat->type = syntax_element_long_set;
+ dat->csingles = csingles;
+ dat->cranges = cranges;
+ dat->cclasses = cclasses;
+ dat->cequivalents = cequivalents;
+ dat->isnot = isnot;
+ dat->next.i = -1;
+ return dat;
+}
+
+template <class charT, class traits, class Allocator>
+re_syntax_base* RE_CALL reg_expression<charT, traits, Allocator>::compile_set_aux(jstack<re_str<charT>, Allocator>& singles, jstack<re_str<charT>, Allocator>& ranges, jstack<jm_uintfast32_t, Allocator>& classes, jstack<re_str<charT>, Allocator>& equivalents, bool isnot, const __narrow_type&)
+{
+ re_set* dat = (re_set*)data.extend(sizeof(re_set));
+ memset(dat, 0, sizeof(re_set));
+
+ while(singles.empty() == false)
+ {
+ dat->__map[(traits_size_type)(traits_uchar_type)*(singles.peek().c_str())] = mask_all;
+ singles.pop();
+ }
+ while(ranges.empty() == false)
+ {
+ re_str<charT> c1, c2, c3, c4;
+
+ if(flags() & regbase::nocollate)
+ c1 = ranges.peek();
+ else
+ traits_type::transform(c1, ranges.peek() MAYBE_PASS_LOCALE(locale_inst));
+ ranges.pop();
+ if(flags() & regbase::nocollate)
+ c2 = ranges.peek();
+ else
+ traits_type::transform(c2, ranges.peek() MAYBE_PASS_LOCALE(locale_inst));
+ ranges.pop();
+
+ if(c1 < c2)
+ {
+ // for some reason bc5 crashes when throwing exceptions
+ // from here - probably an EH-compiler bug, but hard to
+ // be sure...
+ // delay throw to later:
+ #ifdef __BORLANDC__
+ jm_uintfast32_t f = _flags;
+ _flags &= ~regbase::use_except;
+ #endif
+ fail(REG_ERANGE);
+ #ifdef __BORLANDC__
+ _flags = f;
+ #endif
+ return NULL;
+ }
+ for(unsigned int i = 0; i < 256; ++i)
+ {
+ c4 = (charT)i;
+ if(flags() & regbase::nocollate)
+ c3 = c4;
+ else
+ traits_type::transform(c3, c4 MAYBE_PASS_LOCALE(locale_inst));
+ if((c3 <= c1) && (c3 >= c2))
+ dat->__map[i] = mask_all;
+ }
+ }
+ while(equivalents.empty() == false)
+ {
+ re_str<charT> c1, c2;
+ for(unsigned int i = 0; i < 256; ++i)
+ {
+ c2 = (charT)i;
+ traits_type::transform_primary(c1, c2 MAYBE_PASS_LOCALE(locale_inst));
+ if(c1 == equivalents.peek())
+ dat->__map[i] = mask_all;
+ }
+ equivalents.pop();
+ }
+
+ jm_uintfast32_t flags = 0;
+ while(classes.empty() == false)
+ {
+ flags |= classes.peek();
+ classes.pop();
+ }
+ if(flags)
+ {
+ for(unsigned int i = 0; i < 256; ++i)
+ {
+ if(traits_type::is_class(charT(i), flags MAYBE_PASS_LOCALE(locale_inst)))
+ dat->__map[(traits_uchar_type)traits_type::translate((charT)i, (_flags & regbase::icase) MAYBE_PASS_LOCALE(locale_inst))] = mask_all;
+ }
+ }
+
+ if(isnot)
+ {
+ for(unsigned int i = 0; i < 256; ++i)
+ {
+ dat->__map[i] = !dat->__map[i];
+ }
+ }
+
+ dat->type = syntax_element_set;
+ dat->next.i = -1;
+ return dat;
+}
+
+
+template <class charT, class traits, class Allocator>
+void RE_CALL reg_expression<charT, traits, Allocator>::fixup_apply(re_syntax_base* b, unsigned cbraces)
+{
+ typedef JM_MAYBE_TYPENAME REBIND_TYPE(bool, Allocator) b_alloc;
+
+ register unsigned char* base = (unsigned char*)b;
+ register re_syntax_base* ptr = b;
+ bool* pb = 0;
+ b_alloc a(data.allocator());
+#ifndef JM_NO_EXCEPTIONS
+ try
+ {
+#endif
+ pb = a.allocate(cbraces);
+ for(unsigned i = 0; i < cbraces; ++i)
+ pb[i] = false;
+
+ repeats = 0;
+
+ while(ptr->next.i)
+ {
+ switch(ptr->type)
+ {
+ case syntax_element_rep:
+ ((re_jump*)ptr)->alt.p = (re_syntax_base*)(base + ((re_jump*)ptr)->alt.i);
+ ((re_repeat*)ptr)->id = repeats;
+ ++repeats;
+ goto rebase;
+ case syntax_element_jump:
+ case syntax_element_alt:
+ ((re_jump*)ptr)->alt.p = (re_syntax_base*)(base + ((re_jump*)ptr)->alt.i);
+ goto rebase;
+ case syntax_element_backref:
+ if((((re_brace*)ptr)->index >= cbraces) || (pb[((re_brace*)ptr)->index] == false) )
+ {
+ fail(REG_ESUBREG);
+ a.deallocate(pb, cbraces);
+ return;
+ }
+ goto rebase;
+ case syntax_element_endmark:
+ pb[((re_brace*)ptr)->index] = true;
+ goto rebase;
+ default:
+ rebase:
+ ptr->next.p = (re_syntax_base*)(base + ptr->next.i);
+ ptr = ptr->next.p;
+ }
+ }
+ a.deallocate(pb, cbraces);
+ pb = 0;
+#ifndef JM_NO_EXCEPTIONS
+ }
+ catch(...)
+ {
+ if(pb)
+ a.deallocate(pb, cbraces);
+ throw;
+ }
+#endif
+}
+
+
+template <class charT, class traits, class Allocator>
+unsigned int RE_CALL reg_expression<charT, traits, Allocator>::set_expression(const charT* p, const charT* end, jm_uintfast32_t f)
+{
+ if(p == expression())
+ {
+ re_str<charT> s(p, end);
+ return set_expression(s.c_str(), f);
+ }
+#if defined(RE_LOCALE_C) || defined(RE_LOCALE_W32)
+ locale_initialiser.update();
+#else
+ if(JM_HAS_FACET(locale_inst, regfacet<charT>) == false)
+ {
+#ifdef _MSC_VER
+ locale_inst = __JM_STD::_ADDFAC(locale_inst, new regfacet<charT>());
+#else
+ locale_inst = __JM_STD::locale(locale_inst, new regfacet<charT>());
+#endif
+ }
+ JM_USE_FACET(locale_inst, regfacet<charT>).update(locale_inst);
+#endif
+ const charT* base = p;
+ data.clear();
+ _flags = f;
+ fail(REG_NOERROR); // clear any error
+
+ if(p >= end)
+ {
+ fail(REG_EMPTY);
+ return code;
+ }
+
+ const charT* ptr = p;
+ marks = 0;
+ jstack<unsigned int, Allocator> mark(64, data.allocator());
+ jstack<unsigned int, Allocator> markid(64, data.allocator());
+ unsigned int last_mark_popped = 0;
+ register traits_size_type c;
+ register re_syntax_base* dat;
+
+ unsigned rep_min, rep_max;
+
+ //
+ // set up header:
+ //
+ ++marks;
+ dat = 0;
+
+ if(_flags & regbase::literal)
+ {
+ while(ptr != end)
+ {
+ dat = add_literal(dat, traits::translate(*ptr, (_flags & regbase::icase) MAYBE_PASS_LOCALE(locale_inst)));
+ ++ptr;
+ }
+ }
+
+ while (ptr < end)
+ {
+ c = (traits_size_type)(traits_uchar_type)*ptr;
+ switch(traits_type::syntax_type(c MAYBE_PASS_LOCALE(locale_inst)))
+ {
+ case syntax_open_bracket:
+ if(_flags & bk_parens)
+ {
+ dat = add_literal(dat, (charT)c);
+ ++ptr;
+ continue;
+ }
+ open_bracked_jump:
+ // extend:
+ dat = add_simple(dat, syntax_element_startmark, sizeof(re_brace));
+ markid.push(marks);
+ ((re_brace*)dat)->index = marks++;
+ mark.push(data.index(dat));
+ ++ptr;
+ break;
+ case syntax_close_bracket:
+ if(_flags & bk_parens)
+ {
+ dat = add_literal(dat, (charT)c);
+ ++ptr;
+ continue;
+ }
+
+ close_bracked_jump:
+ if(dat)
+ {
+ data.align();
+ dat->next.i = data.size();
+ }
+
+ if(mark.empty())
+ {
+ fail(REG_EPAREN);
+ return code;
+ }
+ // see if we have an empty alternative:
+ if(mark.peek() == data.index(dat) )
+ {
+ re_syntax_base* para = (re_syntax_base*)((char*)data.data() + mark.peek());
+ if(para->type == syntax_element_jump)
+ {
+ fail(REG_EMPTY);
+ return code;
+ }
+ }
+
+ // pop any pushed alternatives and set the target end destination:
+ dat = (re_syntax_base*)((unsigned char*)data.data() + mark.peek());
+ while(dat->type == syntax_element_jump)
+ {
+ ((re_jump*)dat)->alt.i = data.size();
+ mark.pop();
+ dat = (re_jump*)((unsigned char*)data.data() + mark.peek());
+ if(mark.empty())
+ {
+ fail(REG_EPAREN);
+ return code;
+ }
+ }
+
+ dat = add_simple(0, syntax_element_endmark, sizeof(re_brace));
+ ((re_brace*)dat)->index = markid.peek();
+ markid.pop();
+ last_mark_popped = mark.peek();
+ mark.pop();
+ ++ptr;
+ break;
+ case syntax_char:
+ dat = add_literal(dat, (charT)c);
+ ++ptr;
+ break;
+ case syntax_slash:
+ if(++ptr == end)
+ {
+ fail(REG_EESCAPE);
+ return code;
+ }
+ c = (traits_size_type)(traits_uchar_type)*ptr;
+ switch(traits_type::syntax_type(c MAYBE_PASS_LOCALE(locale_inst)))
+ {
+ case syntax_open_bracket:
+ if(_flags & bk_parens)
+ goto open_bracked_jump;
+ break;
+ case syntax_close_bracket:
+ if(_flags & bk_parens)
+ goto close_bracked_jump;
+ break;
+ case syntax_plus:
+ if((_flags & bk_plus_qm) && ((_flags & limited_ops) == 0))
+ {
+ rep_min = 1;
+ rep_max = (unsigned)-1;
+ goto repeat_jump;
+ }
+ break;
+ case syntax_question:
+ if((_flags & bk_plus_qm) && ((_flags & limited_ops) == 0))
+ {
+ rep_min = 0;
+ rep_max = 1;
+ goto repeat_jump;
+ }
+ break;
+ case syntax_or:
+ if(((_flags & bk_vbar) == 0) || (_flags & limited_ops))
+ break;
+ goto alt_string_jump;
+ case syntax_open_brace:
+ if( ((_flags & bk_braces) == 0) || ((_flags & intervals) == 0))
+ break;
+
+ // we have {x} or {x,} or {x,y}:
+ parse_range(ptr, end, rep_min, rep_max);
+ goto repeat_jump;
+
+ case syntax_digit:
+ if(_flags & bk_refs)
+ {
+ // update previous:
+ int i = traits_type::toi((charT)c MAYBE_PASS_LOCALE(locale_inst));
+ if(i == 0)
+ {
+ // we can have \025 which means take char whose
+ // code is 25 (octal), so parse string:
+ c = traits_type::toi(ptr, end, -8 MAYBE_PASS_LOCALE(locale_inst));
+ --ptr;
+ break;
+ }
+ dat = add_simple(dat, syntax_element_backref, sizeof(re_brace));
+ ((re_brace*)dat)->index = i;
+ ++ptr;
+ continue;
+ }
+ break;
+ case syntax_b: // syntax_element_word_boundary
+ dat = add_simple(dat, syntax_element_word_boundary);
+ ++ptr;
+ continue;
+ case syntax_B:
+ dat = add_simple(dat, syntax_element_within_word);
+ ++ptr;
+ continue;
+ case syntax_left_word:
+ dat = add_simple(dat, syntax_element_word_start);
+ ++ptr;
+ continue;
+ case syntax_right_word:
+ dat = add_simple(dat, syntax_element_word_end);
+ ++ptr;
+ continue;
+ case syntax_w: //syntax_element_word_char
+ dat = compile_set_simple(dat, char_class_word);
+ ++ptr;
+ continue;
+ case syntax_W:
+ dat = compile_set_simple(dat, char_class_word, true);
+ ++ptr;
+ continue;
+ case syntax_d: //syntax_element_word_char
+ dat = compile_set_simple(dat, char_class_digit);
+ ++ptr;
+ continue;
+ case syntax_D:
+ dat = compile_set_simple(dat, char_class_digit, true);
+ ++ptr;
+ continue;
+ case syntax_s: //syntax_element_word_char
+ dat = compile_set_simple(dat, char_class_space);
+ ++ptr;
+ continue;
+ case syntax_S:
+ dat = compile_set_simple(dat, char_class_space, true);
+ ++ptr;
+ continue;
+ case syntax_l: //syntax_element_word_char
+ dat = compile_set_simple(dat, char_class_lower);
+ ++ptr;
+ continue;
+ case syntax_L:
+ dat = compile_set_simple(dat, char_class_lower, true);
+ ++ptr;
+ continue;
+ case syntax_u: //syntax_element_word_char
+ dat = compile_set_simple(dat, char_class_upper);
+ ++ptr;
+ continue;
+ case syntax_U:
+ dat = compile_set_simple(dat, char_class_upper, true);
+ ++ptr;
+ continue;
+ case syntax_Q:
+ ++ptr;
+ while(true)
+ {
+ if(ptr == end)
+ {
+ fail(REG_EESCAPE);
+ return code;
+ }
+ if(traits_type::syntax_type((traits_size_type)(traits_uchar_type)*ptr MAYBE_PASS_LOCALE(locale_inst)) == syntax_slash)
+ {
+ ++ptr;
+ if((ptr != end) && (traits_type::syntax_type((traits_size_type)(traits_uchar_type)*ptr MAYBE_PASS_LOCALE(locale_inst)) == syntax_E))
+ break;
+ else
+ {
+ dat = add_literal(dat, *(ptr-1));
+ continue;
+ }
+ }
+ dat = add_literal(dat, *ptr);
+ ++ptr;
+ }
+ ++ptr;
+ continue;
+ case syntax_C:
+ dat = add_simple(dat, syntax_element_wild);
+ ++ptr;
+ continue;
+ case syntax_X:
+ dat = add_simple(dat, syntax_element_combining);
+ ++ptr;
+ continue;
+ case syntax_Z:
+ dat = add_simple(dat, syntax_element_soft_buffer_end);
+ ++ptr;
+ continue;
+ case syntax_G:
+ dat = add_simple(dat, syntax_element_restart_continue);
+ ++ptr;
+ continue;
+ case syntax_start_buffer:
+ dat = add_simple(dat, syntax_element_buffer_start);
+ ++ptr;
+ continue;
+ case syntax_end_buffer:
+ dat = add_simple(dat, syntax_element_buffer_end);
+ ++ptr;
+ continue;
+ default:
+ c = (traits_size_type)(traits_uchar_type)parse_escape(ptr, end);
+ dat = add_literal(dat, (charT)c);
+ continue;
+ }
+ dat = add_literal(dat, (charT)c);
+ ++ptr;
+ break;
+ case syntax_dollar:
+ dat = add_simple(dat, syntax_element_end_line, sizeof(re_syntax_base));
+ ++ptr;
+ continue;
+ case syntax_caret:
+ dat = add_simple(dat, syntax_element_start_line, sizeof(re_syntax_base));
+ ++ptr;
+ continue;
+ case syntax_dot:
+ dat = add_simple(dat, syntax_element_wild, sizeof(re_syntax_base));
+ ++ptr;
+ continue;
+ case syntax_star:
+ rep_min = 0;
+ rep_max = (unsigned)-1;
+
+ repeat_jump:
+ {
+ unsigned offset;
+ if(dat == 0)
+ {
+ fail(REG_BADRPT);
+ return code;
+ }
+ switch(dat->type)
+ {
+ case syntax_element_endmark:
+ offset = last_mark_popped;
+ break;
+ case syntax_element_literal:
+ if(((re_literal*)dat)->length > 1)
+ {
+ // update previous:
+ charT lit = *(charT*)((char*)dat + sizeof(re_literal) + ((((re_literal*)dat)->length-1)*sizeof(charT)));
+ --((re_literal*)dat)->length;
+ dat = add_simple(dat, syntax_element_literal, sizeof(re_literal) + sizeof(charT));
+ ((re_literal*)dat)->length = 1;
+ *((charT*)(((re_literal*)dat)+1)) = lit;
+ }
+ offset = (char*)dat - (char*)data.data();
+ break;
+ case syntax_element_backref:
+ case syntax_element_long_set:
+ case syntax_element_set:
+ case syntax_element_wild:
+ case syntax_element_combining:
+ // we're repeating a single item:
+ offset = (char*)dat - (char*)data.data();
+ break;
+ default:
+ fail(REG_BADRPT);
+ return code;
+ }
+ data.align();
+ dat->next.i = data.size();
+ //unsigned pos = (char*)dat - (char*)data.data();
+
+ // add the trailing jump:
+ add_simple(dat, syntax_element_jump, re_jump_size);
+
+ // now insert the leading repeater:
+ dat = (re_syntax_base*)data.insert(offset, re_repeater_size);
+ dat->next.i = ((char*)dat - (char*)data.data()) + re_repeater_size;
+ dat->type = syntax_element_rep;
+ ((re_repeat*)dat)->alt.i = data.size();
+ ((re_repeat*)dat)->min = rep_min;
+ ((re_repeat*)dat)->max = rep_max;
+ ((re_repeat*)dat)->leading = false;
+ move_offsets(dat, re_repeater_size);
+ dat = (re_syntax_base*)((char*)data.data() + data.size() - re_jump_size);
+ ((re_repeat*)dat)->alt.i = offset;
+ ++ptr;
+ continue;
+ }
+ case syntax_plus:
+ if(_flags & (bk_plus_qm | limited_ops))
+ {
+ dat = add_literal(dat, (charT)c);
+ ++ptr;
+ continue;
+ }
+ rep_min = 1;
+ rep_max = (unsigned)-1;
+ goto repeat_jump;
+ case syntax_question:
+ if(_flags & (bk_plus_qm | limited_ops))
+ {
+ dat = add_literal(dat, (charT)c);
+ ++ptr;
+ continue;
+ }
+ rep_min = 0;
+ rep_max = 1;
+ goto repeat_jump;
+ case syntax_open_set:
+ // update previous:
+ if(dat)
+ {
+ data.align();
+ dat->next.i = data.size();
+ }
+ // extend:
+ dat = compile_set(ptr, end);
+ if(dat == 0)
+ {
+ if((_flags & regbase::failbit) == 0)
+ fail(REG_EBRACK);
+ return code;
+ }
+ break;
+ case syntax_or:
+ {
+ if(_flags & (bk_vbar | limited_ops))
+ {
+ dat = add_literal(dat, (charT)c);
+ ++ptr;
+ continue;
+ }
+
+ alt_string_jump:
+
+ // update previous:
+ if(dat == 0)
+ {
+ // start of pattern can't have empty "|"
+ fail(REG_EMPTY);
+ return code;
+ }
+ // see if we have an empty alternative:
+ if(mark.empty() == false)
+ if(mark.peek() == data.index(dat))
+ {
+ fail(REG_EMPTY);
+ return code;
+ }
+ // extend:
+ /*dat = */add_simple(dat, syntax_element_jump, re_jump_size);
+ data.align();
+
+ // now work out where to insert:
+ unsigned int offset = 0;
+ if(mark.empty() == false)
+ {
+ // we have a '(' or '|' to go back to:
+ offset = mark.peek();
+ re_syntax_base* base = (re_syntax_base*)((unsigned char*)data.data() + offset);
+ offset = base->next.i;
+ }
+ re_jump* j = (re_jump*)data.insert(offset, re_jump_size);
+ j->type = syntax_element_alt;
+ j->next.i = offset + re_jump_size;
+ j->alt.i = data.size();
+ move_offsets(j, re_jump_size);
+ dat = (re_syntax_base*)((unsigned char*)data.data() + data.size() - re_jump_size);
+ mark.push(data.size() - re_jump_size);
+ ++ptr;
+ break;
+ }
+ case syntax_open_brace:
+ if((_flags & bk_braces) || ((_flags & intervals) == 0))
+ {
+ dat = add_literal(dat, (charT)c);
+ ++ptr;
+ continue;
+ }
+ // we have {x} or {x,} or {x,y}:
+ parse_range(ptr, end, rep_min, rep_max);
+ goto repeat_jump;
+ case syntax_newline:
+ if(_flags & newline_alt)
+ goto alt_string_jump;
+ dat = add_literal(dat, (charT)c);
+ ++ptr;
+ continue;
+ case syntax_close_brace:
+ if(_flags & bk_braces)
+ {
+ dat = add_literal(dat, (charT)c);
+ ++ptr;
+ continue;
+ }
+ fail(REG_BADPAT);
+ return code;
+ default:
+ dat = add_literal(dat, (charT)c);
+ ++ptr;
+ break;
+ } // switch
+ } // while
+
+ //
+ // update previous:
+ if(dat)
+ {
+ data.align();
+ dat->next.i = data.size();
+ }
+
+ // see if we have an empty alternative:
+ if(mark.empty() == false)
+ if(mark.peek() == data.index(dat) )
+ {
+ re_syntax_base* para = (re_syntax_base*)((char*)data.data() + mark.peek());
+ if(para->type == syntax_element_jump)
+ {
+ fail(REG_EMPTY);
+ return code;
+ }
+ }
+ //
+ // set up tail:
+ //
+ if(mark.empty() == false)
+ {
+ // pop any pushed alternatives and set the target end destination:
+ dat = (re_syntax_base*)((unsigned char*)data.data() + mark.peek());
+ while(dat->type == syntax_element_jump)
+ {
+ ((re_jump*)dat)->alt.i = data.size();
+ mark.pop();
+ if(mark.empty() == true)
+ break;
+ dat = (re_jump*)((unsigned char*)data.data() + mark.peek());
+ }
+ }
+
+ dat = (re_brace*)data.extend(sizeof(re_syntax_base));
+ dat->type = syntax_element_match;
+ dat->next.i = 0;
+
+ if(mark.empty() == false)
+ {
+ fail(REG_EPAREN);
+ return code;
+ }
+
+ //
+ // allocate space for start __map:
+ startmap = (unsigned char*)data.extend(256 + ((end - base + 1) * sizeof(charT)));
+ //
+ // and copy the expression we just compiled:
+ _expression = (charT*)((const char*)startmap + 256);
+ memcpy(_expression, base, (end - base) * sizeof(charT));
+ *(_expression + (end - base)) = charT(0);
+
+ //
+ // now we need to apply fixups to the array
+ // so that we can use pointers and not indexes
+ fixup_apply((re_syntax_base*)data.data(), marks);
+
+ // check for error during fixup:
+ if(_flags & regbase::failbit)
+ return code;
+
+ //
+ // finally compile the maps so that we can make intelligent choices
+ // whenever we encounter an alternative:
+ compile_maps();
+ if(pkmp)
+ {
+ kmp_free(pkmp, data.allocator());
+ pkmp = 0;
+ }
+ re_syntax_base* sbase = (re_syntax_base*)data.data();
+ _restart_type = probe_restart(sbase);
+ _leading_len = fixup_leading_rep(sbase, 0);
+ if((sbase->type == syntax_element_literal) && (sbase->next.p->type == syntax_element_match))
+ {
+ _restart_type = restart_fixed_lit;
+ if(0 == pkmp)
+ {
+ charT* p1 = (charT*)((char*)sbase + sizeof(re_literal));
+ charT* p2 = p1 + ((re_literal*)sbase)->length;
+ pkmp = kmp_compile(p1, p2, charT(), kmp_translator<traits>(_flags&regbase::icase), data.allocator() MAYBE_PASS_LOCALE(locale_inst));
+ }
+ }
+ return code;
+}
+
+template <class charT, class traits, class Allocator>
+re_syntax_base* RE_CALL reg_expression<charT, traits, Allocator>::add_simple(re_syntax_base* dat, syntax_element_type type, unsigned int size)
+{
+ if(dat)
+ {
+ data.align();
+ dat->next.i = data.size();
+ }
+ if(size < sizeof(re_syntax_base))
+ size = sizeof(re_syntax_base);
+ dat = (re_syntax_base*)data.extend(size);
+ dat->type = type;
+ dat->next.i = 0;
+ return dat;
+}
+
+template <class charT, class traits, class Allocator>
+re_syntax_base* RE_CALL reg_expression<charT, traits, Allocator>::add_literal(re_syntax_base* dat, charT c)
+{
+ if(dat && (dat->type == syntax_element_literal))
+ {
+ // add another charT to the list:
+ __JM_STDC::ptrdiff_t pos = (unsigned char*)dat - (unsigned char*)data.data();
+ *(charT*)data.extend(sizeof(charT)) = traits::translate(c, (_flags & regbase::icase) MAYBE_PASS_LOCALE(locale_inst));
+ dat = (re_syntax_base*)((unsigned char*)data.data() + pos);
+ ++(((re_literal*)dat)->length);
+ }
+ else
+ {
+ // extend:
+ dat = add_simple(dat, syntax_element_literal, sizeof(re_literal) + sizeof(charT));
+ ((re_literal*)dat)->length = 1;
+ *((charT*)(((re_literal*)dat)+1)) = traits::translate(c, (_flags & regbase::icase) MAYBE_PASS_LOCALE(locale_inst));
+ }
+ return dat;
+}
+
+template <class charT, class traits, class Allocator>
+unsigned int RE_CALL reg_expression<charT, traits, Allocator>::probe_restart(re_syntax_base* dat)
+{
+ switch(dat->type)
+ {
+ case syntax_element_startmark:
+ case syntax_element_endmark:
+ return probe_restart(dat->next.p);
+ case syntax_element_start_line:
+ return regbase::restart_line;
+ case syntax_element_word_start:
+ return regbase::restart_word;
+ case syntax_element_buffer_start:
+ return regbase::restart_buf;
+ case syntax_element_restart_continue:
+ return regbase::restart_continue;
+ default:
+ return regbase::restart_any;
+ }
+}
+
+template <class charT, class traits, class Allocator>
+unsigned int RE_CALL reg_expression<charT, traits, Allocator>::fixup_leading_rep(re_syntax_base* dat, re_syntax_base* end)
+{
+ unsigned int len = 0;
+ bool leading_lit = end ? false : true;
+ while(dat != end)
+ {
+ switch(dat->type)
+ {
+ case syntax_element_literal:
+ len += ((re_literal*)dat)->length;
+ if((leading_lit) && (((re_literal*)dat)->length > 2))
+ {
+ // we can do a literal search for the leading literal string
+ // using Knuth-Morris-Pratt (or whatever), and only then check for
+ // matches. We need a decent length string though to make it
+ // worth while.
+ _leading_string = (charT*)((char*)dat + sizeof(re_literal));
+ _leading_string_len = ((re_literal*)dat)->length;
+ _restart_type = restart_lit;
+ leading_lit = false;
+ const charT* p1 = _leading_string;
+ const charT* p2 = _leading_string + _leading_string_len;
+ pkmp = kmp_compile(p1, p2, charT(), kmp_translator<traits>(_flags&regbase::icase), data.allocator() MAYBE_PASS_LOCALE(locale_inst));
+ }
+ break;
+ case syntax_element_wild:
+ ++len;
+ leading_lit = false;
+ break;
+ case syntax_element_match:
+ return len;
+ case syntax_element_backref:
+ //case syntax_element_jump:
+ case syntax_element_alt:
+ case syntax_element_combining:
+ return 0;
+ case syntax_element_long_set:
+ {
+ // we need to verify that there are no multi-character
+ // collating elements inside the repeat:
+ const charT* p = (const charT*)((const char*)dat + sizeof(re_set_long));
+ unsigned int csingles = ((re_set_long*)dat)->csingles;
+ for(unsigned int i = 0; i < csingles; ++i)
+ {
+ if(re_strlen(p) > 1)
+ return 0;
+ while(*p)++p;
+ ++p;
+ }
+ ++len;
+ leading_lit = false;
+ break;
+ }
+ case syntax_element_set:
+ ++len;
+ leading_lit = false;
+ break;
+ case syntax_element_rep:
+ if(1 == fixup_leading_rep(dat->next.p, ((re_repeat*)dat)->alt.p) )
+ {
+ ((re_repeat*)dat)->leading = true;
+ return len;
+ }
+ return 0;
+ }
+ dat = dat->next.p;
+ }
+ return len;
+}
+
+#if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES)
+} // namespace
+#endif
+
+JM_END_NAMESPACE
+
+
+
+
+
+
+