diff options
Diffstat (limited to 'utils/tfstats/regexp/include/jm/regmatch.h')
| -rw-r--r-- | utils/tfstats/regexp/include/jm/regmatch.h | 1707 |
1 files changed, 1707 insertions, 0 deletions
diff --git a/utils/tfstats/regexp/include/jm/regmatch.h b/utils/tfstats/regexp/include/jm/regmatch.h new file mode 100644 index 0000000..8acc0f5 --- /dev/null +++ b/utils/tfstats/regexp/include/jm/regmatch.h @@ -0,0 +1,1707 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// +/* + * + * Copyright (c) 1998-9 + * Dr John Maddock + * + * Permission to use, copy, modify, distribute and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear + * in supporting documentation. Dr John Maddock makes no representations + * about the suitability of this software for any purpose. + * It is provided "as is" without express or implied warranty. + * + */ + + /* + * FILE regmatch.h + * VERSION 2.12 + * regular expression matching algorithms + */ + + +#ifndef __REGMATCH_H +#define __REGMATCH_H + + +JM_NAMESPACE(__JM) + +template <class iterator, class charT, class traits_type, class Allocator> +iterator RE_CALL re_is_set_member(iterator next, + iterator last, + re_set_long* set, + const reg_expression<charT, traits_type, Allocator>& e) +{ + const charT* p = (const charT*)(set+1); + iterator ptr; + unsigned int i; + bool icase = e.flags() & regbase::icase; + + // try and match a single character, could be a multi-character + // collating element... + for(i = 0; i < set->csingles; ++i) + { + ptr = next; + while(*p && (ptr != last)) + { + if(traits_type::translate(*ptr, icase MAYBE_PASS_LOCALE(e.locale())) != *p) + break; + ++p; + ++ptr; + } + if(*p == 0) // if null we've matched + return set->isnot ? next : (ptr == next) ? ++next : ptr; + + while(*p)++p; + ++p; // skip null + } + + charT col = traits_type::translate(*next, icase MAYBE_PASS_LOCALE(e.locale())); + + + if(set->cranges || set->cequivalents) + { + re_str<charT> s2(col); + re_str<charT> s1; + // + // try and match a range, NB only a single character can match + if(set->cranges) + { + if(e.flags() & regbase::nocollate) + s1 = s2; + else + traits_type::transform(s1, s2 MAYBE_PASS_LOCALE(e.locale())); + for(i = 0; i < set->cranges; ++i) + { + if(s1 <= p) + { + while(*p)++p; + ++p; + if(s1 >= p) + return set->isnot ? next : ++next; + } + else + { + // skip first string + while(*p)++p; + ++p; + } + // skip second string + while(*p)++p; + ++p; + } + } + // + // try and match an equivalence class, NB only a single character can match + if(set->cequivalents) + { + traits_type::transform_primary(s1, s2 MAYBE_PASS_LOCALE(e.locale())); + for(i = 0; i < set->cequivalents; ++i) + { + if(s1 == p) + return set->isnot ? next : ++next; + // skip string + while(*p)++p; + ++p; + } + } + } + + if(traits_type::is_class(col, set->cclasses MAYBE_PASS_LOCALE(e.locale())) == true) + return set->isnot ? next : ++next; + return set->isnot ? ++next : next; +} + +template <class iterator, class Allocator> +class __priv_match_data +{ +public: + typedef JM_MAYBE_TYPENAME REBIND_TYPE(int, Allocator) i_alloc; + typedef JM_MAYBE_TYPENAME REBIND_TYPE(iterator, Allocator) it_alloc; + + reg_match_base<iterator, Allocator> temp_match; + // failure stacks: + jstack<reg_match_base<iterator, Allocator>, Allocator> matches; + jstack<iterator, Allocator> prev_pos; + jstack<const re_syntax_base*, Allocator> prev_record; + jstack<int, Allocator> prev_acc; + int* accumulators; + unsigned int caccumulators; + iterator* loop_starts; + + __priv_match_data(const reg_match_base<iterator, Allocator>&); + + ~__priv_match_data() + { + free(); + } + void free(); + void set_accumulator_size(unsigned int size); + int* get_accumulators() + { + return accumulators; + } + iterator* get_loop_starts() + { + return loop_starts; + } +}; + +template <class iterator, class Allocator> +__priv_match_data<iterator, Allocator>::__priv_match_data(const reg_match_base<iterator, Allocator>& m) + : temp_match(m), matches(64, m.allocator()), prev_pos(64, m.allocator()), prev_record(64, m.allocator()) +{ + accumulators = 0; + caccumulators = 0; + loop_starts = 0; +} + +template <class iterator, class Allocator> +void __priv_match_data<iterator, Allocator>::set_accumulator_size(unsigned int size) +{ + if(size > caccumulators) + { + free(); + caccumulators = size; + accumulators = i_alloc(temp_match.allocator()).allocate(caccumulators); + loop_starts = it_alloc(temp_match.allocator()).allocate(caccumulators); + for(unsigned i = 0; i < caccumulators; ++i) + new (loop_starts + i) iterator(); + } +} + +template <class iterator, class Allocator> +void __priv_match_data<iterator, Allocator>::free() +{ + if(caccumulators) + { + //REBIND_INSTANCE(int, Allocator, temp_match.allocator()).deallocate(accumulators, caccumulators); + i_alloc temp1(temp_match.allocator()); + temp1.deallocate(accumulators, caccumulators); + for(unsigned i = 0; i < caccumulators; ++i) + jm_destroy(loop_starts + i); + //REBIND_INSTANCE(iterator, Allocator, temp_match.allocator()).deallocate(loop_starts, caccumulators); + it_alloc temp2(temp_match.allocator()); + temp2.deallocate(loop_starts, caccumulators); + } +} + +// +// proc query_match +// returns true if the specified regular expression matches +// at position first. Fills in what matched in m. +// +template <class iterator, class Allocator, class charT, class traits, class Allocator2> +bool query_match(iterator first, iterator last, reg_match<iterator, Allocator>& m, const reg_expression<charT, traits, Allocator2>& e, unsigned flags = match_default) +{ + // prepare m for failure: + if((flags & match_init) == 0) + { + m.set_size(e.mark_count(), first, last); + } + __priv_match_data<iterator, Allocator> pd(m); + iterator restart; + return query_match_aux(first, last, m, e, flags, pd, &restart); +} + +// +// query_match convenience interfaces: +#ifndef JM_NO_PARTIAL_FUNC_SPEC +// +// this isn't really a partial specialisation, but template function +// overloading - if the compiler doesn't support partial specialisation +// then it really won't support this either: +template <class charT, class Allocator, class traits, class Allocator2> +inline bool query_match(const charT* str, + reg_match<const charT*, Allocator>& m, + const reg_expression<charT, traits, Allocator2>& e, + unsigned flags = match_default) +{ + return query_match(str, str + traits::length(str), m, e, flags); +} + +#ifndef JM_NO_STRING_H +template <class ST, class SA, class Allocator, class charT, class traits, class Allocator2> +inline bool query_match(const __JM_STD::basic_string<charT, ST, SA>& s, + reg_match<typename __JM_STD::basic_string<charT, ST, SA>::const_iterator, Allocator>& m, + const reg_expression<charT, traits, Allocator2>& e, + unsigned flags = match_default) +{ + return query_match(s.begin(), s.end(), m, e, flags); +} +#endif +#else // partial specialisation +inline bool query_match(const char* str, + cmatch& m, + const regex& e, + unsigned flags = match_default) +{ + return query_match(str, str + regex::traits_type::length(str), m, e, flags); +} +#ifndef JM_NO_WCSTRING +inline bool query_match(const wchar_t* str, + wcmatch& m, + const wregex& e, + unsigned flags = match_default) +{ + return query_match(str, str + wregex::traits_type::length(str), m, e, flags); +} +#endif +#ifndef JM_NO_STRING_H +inline bool query_match(const __JM_STD::string& s, + reg_match<__JM_STD::string::const_iterator, regex::alloc_type>& m, + const regex& e, + unsigned flags = match_default) +{ + return query_match(s.begin(), s.end(), m, e, flags); +} +#if !defined(JM_NO_STRING_DEF_ARGS) && !defined(JM_NO_WCSTRING) +inline bool query_match(const __JM_STD::basic_string<wchar_t>& s, + reg_match<__JM_STD::basic_string<wchar_t>::const_iterator, wregex::alloc_type>& m, + const wregex& e, + unsigned flags = match_default) +{ + return query_match(s.begin(), s.end(), m, e, flags); +} +#endif + +#endif + +#endif + + +#if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES) +// +// Ugly ugly hack, +// template don't merge if they contain switch statements so declare these +// templates in unnamed namespace (ie with internal linkage), each translation +// unit then gets its own local copy, it works seemlessly but bloats the app. +namespace{ +#endif + +template <class iterator, class Allocator, class charT, class traits, class Allocator2> +bool query_match_aux(iterator first, + iterator last, + reg_match<iterator, Allocator>& m, + const reg_expression<charT, traits, Allocator2>& e, + unsigned flags, + __priv_match_data<iterator, Allocator>& pd, + iterator* restart) +{ + if(e.flags() & regbase::failbit) + return false; + + typedef typename traits::size_type traits_size_type; + typedef typename traits::uchar_type traits_uchar_type; + typedef typename is_byte<charT>::width_type width_type; + + #ifdef RE_LOCALE_CPP + const __JM_STD::locale& locale_inst = e.locale(); + #endif + + // declare some local aliases to reduce pointer loads + // good optimising compilers should make this unnecessary!! + jstack<reg_match_base<iterator, Allocator>, Allocator>& matches = pd.matches; + jstack<iterator, Allocator>& prev_pos = pd.prev_pos; + jstack<const re_syntax_base*, Allocator>& prev_record = pd.prev_record; + jstack<int, Allocator>& prev_acc = pd.prev_acc; + reg_match_base<iterator, Allocator>& temp_match = pd.temp_match; + temp_match.set_first(first); + + //temp_match.set_size(e.mark_count(), first, last); + register const re_syntax_base* ptr = e.first(); + bool match_found = false; + bool need_push_match = (e.mark_count() > 1); + int cur_acc = -1; // no active accumulator + pd.set_accumulator_size(e.repeat_count()); + int* accumulators = pd.get_accumulators(); + iterator* start_loop = pd.get_loop_starts(); + int k; // for loops + bool icase = e.flags() & regbase::icase; + *restart = first; + iterator base = first; + + // prepare m for failure: + /* + if((flags & match_init) == 0) + { + m.init_fail(first, last); + } */ + + retry: + + while(first != last) + { + jm_assert(ptr); + switch(ptr->type) + { + case syntax_element_match: + match_jump: + { + // match found, save then fallback in case we missed a + // longer one. + if((flags & match_not_null) && (first == temp_match[0].first)) + goto failure; + temp_match.set_second(first); + m.maybe_assign(temp_match); + match_found = true; + if((flags & match_any) || ((first == last) && (need_push_match == false))) + { + // either we don't care what we match or we've matched + // the whole string and can't match anything longer. + while(matches.empty() == false) + matches.pop(); + while(prev_pos.empty() == false) + prev_pos.pop(); + while(prev_record.empty() == false) + prev_record.pop(); + while(prev_acc.empty() == false) + prev_acc.pop(); + return true; + } + } + goto failure; + case syntax_element_startmark: + temp_match.set_first(first, ((re_brace*)ptr)->index); + ptr = ptr->next.p; + break; + case syntax_element_endmark: + temp_match.set_second(first, ((re_brace*)ptr)->index); + ptr = ptr->next.p; + break; + case syntax_element_literal: + { + unsigned int len = ((re_literal*)ptr)->length; + charT* what = (charT*)(((re_literal*)ptr) + 1); + // + // compare string with what we stored in + // our records: + for(unsigned int i = 0; i < len; ++i, ++first) + { + if((first == last) || (traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst)) != what[i])) + goto failure; + } + ptr = ptr->next.p; + break; + } + case syntax_element_start_line: + outer_line_check: + if(first == temp_match[0].first) + { + // we're at the start of the buffer + if(flags & match_prev_avail) + { + inner_line_check: + // check the previous value even though its before + // the start of our "buffer". + iterator t(first); + --t; + if(traits::is_separator(*t) && !((*t == '\r') && (*first == '\n')) ) + { + ptr = ptr->next.p; + continue; + } + goto failure; + } + if((flags & match_not_bol) == 0) + { + ptr = ptr->next.p; + continue; + } + goto failure; + } + // we're in the middle of the string + goto inner_line_check; + case syntax_element_end_line: + // we're not yet at the end so *first is always valid: + if(traits::is_separator(*first)) + { + if((first != base) || (flags & match_prev_avail)) + { + // check that we're not in the middle of \r\n sequence + iterator t(first); + --t; + if((*t == '\r') && (*first == '\n')) + { + goto failure; + } + } + ptr = ptr->next.p; + continue; + } + goto failure; + case syntax_element_wild: + // anything except possibly NULL or \n: + if(traits::is_separator(*first)) + { + if(flags & match_not_dot_newline) + goto failure; + ptr = ptr->next.p; + ++first; + continue; + } + if(*first == charT(0)) + { + if(flags & match_not_dot_null) + goto failure; + ptr = ptr->next.p; + ++first; + continue; + } + ptr = ptr->next.p; + ++first; + break; + case syntax_element_word_boundary: + { + // prev and this character must be opposites: + bool b = traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)); + if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0)) + { + if(flags & match_not_bow) + b ^= true; + else + b ^= false; + } + else + { + --first; + b ^= traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)); + ++first; + } + if(b) + { + ptr = ptr->next.p; + continue; + } + goto failure; + } + case syntax_element_within_word: + // both prev and this character must be char_class_word: + if(traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst))) + { + bool b; + if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0)) + b = false; + else + { + --first; + b = traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)); + ++first; + } + if(b) + { + ptr = ptr->next.p; + continue; + } + } + goto failure; + case syntax_element_word_start: + if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0)) + { + // start of buffer: + if(flags & match_not_bow) + goto failure; + if(traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst))) + { + ptr = ptr->next.p; + continue; + } + goto failure; + } + // otherwise inside buffer: + if(traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst))) + { + iterator t(first); + --t; + if(traits::is_class(*t, char_class_word MAYBE_PASS_LOCALE(locale_inst)) == false) + { + ptr = ptr->next.p; + continue; + } + } + goto failure; // if we fall through to here then we've failed + case syntax_element_word_end: + if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0)) + goto failure; // start of buffer can't be end of word + + // otherwise inside buffer: + if(traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)) == false) + { + iterator t(first); + --t; + if(traits::is_class(*t, char_class_word MAYBE_PASS_LOCALE(locale_inst))) + { + ptr = ptr->next.p; + continue; + } + } + goto failure; // if we fall through to here then we've failed + case syntax_element_buffer_start: + if((first != temp_match[0].first) || (flags & match_not_bob)) + goto failure; + // OK match: + ptr = ptr->next.p; + break; + case syntax_element_buffer_end: + if((first != last) || (flags & match_not_eob)) + goto failure; + // OK match: + ptr = ptr->next.p; + break; + case syntax_element_backref: + { + // compare with what we previously matched: + iterator i = temp_match[((re_brace*)ptr)->index].first; + iterator j = temp_match[((re_brace*)ptr)->index].second; + while(i != j) + { + if((first == last) || (traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst)) != traits::translate(*i, icase MAYBE_PASS_LOCALE(locale_inst)))) + goto failure; + ++i; + ++first; + } + ptr = ptr->next.p; + break; + } + case syntax_element_long_set: + { + // let the traits class do the work: + iterator t = re_is_set_member(first, last, (re_set_long*)ptr, e); + if(t != first) + { + ptr = ptr->next.p; + first = t; + continue; + } + goto failure; + } + case syntax_element_set: + // lookup character in table: + if(((re_set*)ptr)->__map[(traits_uchar_type)traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst))]) + { + ptr = ptr->next.p; + ++first; + continue; + } + goto failure; + case syntax_element_jump: + ptr = ((re_jump*)ptr)->alt.p; + continue; + case syntax_element_alt: + { + // alt_jump: + if(reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_jump*)ptr)->__map, (unsigned char)mask_take, width_type())) + { + // we can take the first alternative, + // see if we need to push next alternative: + if(reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_jump*)ptr)->__map, mask_skip, width_type())) + { + if(need_push_match) + matches.push(temp_match); + for(k = 0; k <= cur_acc; ++k) + prev_pos.push(start_loop[k]); + prev_pos.push(first); + prev_record.push(ptr); + for(k = 0; k <= cur_acc; ++k) + prev_acc.push(accumulators[k]); + prev_acc.push(cur_acc); + } + ptr = ptr->next.p; + continue; + } + if(reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_jump*)ptr)->__map, mask_skip, width_type())) + { + ptr = ((re_jump*)ptr)->alt.p; + continue; + } + goto failure; // neither option is possible + } + case syntax_element_rep: + { + // repeater_jump: + // if we're moving to a higher id (nested repeats etc) + // zero out our accumualtors: + if(cur_acc < ((re_repeat*)ptr)->id) + { + cur_acc = ((re_repeat*)ptr)->id; + accumulators[cur_acc] = 0; + start_loop[cur_acc] = iterator(); + } + + cur_acc = ((re_repeat*)ptr)->id; + + if(((re_repeat*)ptr)->leading) + *restart = first; + + //charT c = traits::translate(*first MAYBE_PASS_LOCALE(locale_inst)); + + // first of all test for special case where this is last element, + // if that is the case then repeat as many times as possible: + + if(((re_repeat*)ptr)->alt.p->type == syntax_element_match) + { + // see if we can take the repeat: + if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max) + && reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_repeat*)ptr)->__map, mask_take, width_type())) + { + // push terminating match as fallback: + if((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min) + { + if((prev_record.empty() == false) && (prev_record.peek() == ((re_repeat*)ptr)->alt.p)) + { + // we already have the required fallback + // don't add any more, just update this one: + if(need_push_match) + matches.peek() = temp_match; + prev_pos.peek() = first; + } + else + { + if(need_push_match) + matches.push(temp_match); + prev_pos.push(first); + prev_record.push(((re_repeat*)ptr)->alt.p); + } + } + // move to next item in list: + if(first != start_loop[cur_acc]) + { + ++accumulators[cur_acc]; + ptr = ptr->next.p; + start_loop[cur_acc] = first; + continue; + } + goto failure; + } + // see if we can skip the repeat: + if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min) + && reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_repeat*)ptr)->__map, mask_skip, width_type())) + { + ptr = ((re_repeat*)ptr)->alt.p; + continue; + } + // otherwise fail: + goto failure; + } + + // see if we can skip the repeat: + if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min) + && reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_repeat*)ptr)->__map, mask_skip, width_type())) + { + // see if we can push failure info: + if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max) + && reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_repeat*)ptr)->__map, mask_take, width_type())) + { + // check to see if the last loop matched a NULL string + // if so then we really don't want to loop again: + if(((unsigned int)accumulators[cur_acc] == ((re_repeat*)ptr)->min) + || (first != start_loop[cur_acc])) + { + if(need_push_match) + matches.push(temp_match); + prev_pos.push(first); + prev_record.push(ptr); + for(k = 0; k <= cur_acc; ++k) + prev_acc.push(accumulators[k]); + //prev_acc.push(cur_acc); + } + } + ptr = ((re_repeat*)ptr)->alt.p; + continue; + } + + // otherwise see if we can take the repeat: + if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max) + && reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_repeat*)ptr)->__map, mask_take, width_type()) && + (first != start_loop[cur_acc])) + { + // move to next item in list: + ++accumulators[cur_acc]; + ptr = ptr->next.p; + start_loop[cur_acc] = first; + continue; + } + + // if we get here then neither option is allowed so fail: + goto failure; + + } + case syntax_element_combining: + if(traits::is_combining(traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst)))) + goto failure; + ++first; + while((first != last) && traits::is_combining(traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst))))++first; + ptr = ptr->next.p; + continue; + case syntax_element_soft_buffer_end: + { + if(flags & match_not_eob) + goto failure; + iterator p(first); + while((p != last) && traits::is_separator(traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst))))++p; + if(p != last) + goto failure; + ptr = ptr->next.p; + continue; + } + case syntax_element_restart_continue: + if(first != temp_match[-1].first) + goto failure; + ptr = ptr->next.p; + continue; + default: + jm_assert(0); // should never get to here!! + return false; + } + } + + // + // if we get to here then we've run out of characters to match against, + // we could however still have non-character regex items left + if(ptr->can_be_null == 0) + goto failure; + while(true) + { + jm_assert(ptr); + switch(ptr->type) + { + case syntax_element_match: + goto match_jump; + case syntax_element_startmark: + temp_match.set_first(first, ((re_brace*)ptr)->index); + ptr = ptr->next.p; + break; + case syntax_element_endmark: + temp_match.set_second(first, ((re_brace*)ptr)->index); + ptr = ptr->next.p; + break; + case syntax_element_start_line: + goto outer_line_check; + case syntax_element_end_line: + // we're at the end so *first is never valid: + if((flags & match_not_eol) == 0) + { + ptr = ptr->next.p; + continue; + } + goto failure; + case syntax_element_word_boundary: + case syntax_element_word_end: + if(((flags & match_not_eow) == 0) && (first != temp_match[0].first)) + { + iterator t(first); + --t; + if(traits::is_class(*t, char_class_word MAYBE_PASS_LOCALE(locale_inst))) + { + ptr = ptr->next.p; + continue; + } + } + goto failure; + case syntax_element_buffer_end: + case syntax_element_soft_buffer_end: + if(flags & match_not_eob) + goto failure; + // OK match: + ptr = ptr->next.p; + break; + case syntax_element_jump: + ptr = ((re_jump*)ptr)->alt.p; + continue; + case syntax_element_alt: + if(ptr->can_be_null & mask_take) + { + // we can test the first alternative, + // see if we need to push next alternative: + if(ptr->can_be_null & mask_skip) + { + if(need_push_match) + matches.push(temp_match); + for(k = 0; k <= cur_acc; ++k) + prev_pos.push(start_loop[k]); + prev_pos.push(first); + prev_record.push(ptr); + for(k = 0; k <= cur_acc; ++k) + prev_acc.push(accumulators[k]); + prev_acc.push(cur_acc); + } + ptr = ptr->next.p; + continue; + } + if(ptr->can_be_null & mask_skip) + { + ptr = ((re_jump*)ptr)->alt.p; + continue; + } + goto failure; // neither option is possible + case syntax_element_rep: + // if we're moving to a higher id (nested repeats etc) + // zero out our accumualtors: + if(cur_acc < ((re_repeat*)ptr)->id) + { + cur_acc = ((re_repeat*)ptr)->id; + accumulators[cur_acc] = 0; + start_loop[cur_acc] = first; + } + + cur_acc = ((re_repeat*)ptr)->id; + + // see if we can skip the repeat: + if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min) + && (ptr->can_be_null & mask_skip)) + { + // don't push failure info, there's no point: + ptr = ((re_repeat*)ptr)->alt.p; + continue; + } + + // otherwise see if we can take the repeat: + if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max) + && ((ptr->can_be_null & (mask_take | mask_skip)) == (mask_take | mask_skip))) + { + // move to next item in list: + ++accumulators[cur_acc]; + ptr = ptr->next.p; + start_loop[cur_acc] = first; + continue; + } + + // if we get here then neither option is allowed so fail: + goto failure; + case syntax_element_restart_continue: + if(first != temp_match[-1].first) + goto failure; + ptr = ptr->next.p; + continue; + default: + goto failure; + } + } + + failure: + + if(prev_record.empty() == false) + { + ptr = prev_record.peek(); + switch(ptr->type) + { + case syntax_element_alt: + // get next alternative: + ptr = ((re_jump*)ptr)->alt.p; + if(need_push_match) + matches.pop(temp_match); + prev_acc.pop(cur_acc); + for(k = cur_acc; k >= 0; --k) + prev_acc.pop(accumulators[k]); + prev_pos.pop(first); + for(k = cur_acc; k >= 0; --k) + prev_pos.pop(start_loop[k]); + prev_record.pop(); + goto retry; + case syntax_element_rep: + // we're doing least number of repeats first, + // increment count and repeat again: + if(need_push_match) + matches.pop(temp_match); + prev_pos.pop(first); + cur_acc = ((re_repeat*)ptr)->id; + for(k = cur_acc; k >= 0; --k) + prev_acc.pop(accumulators[k]); + prev_record.pop(); + if((unsigned int)++accumulators[cur_acc] > ((re_repeat*)ptr)->max) + goto failure; // repetions exhausted. + ptr = ptr->next.p; + start_loop[cur_acc] = first; + goto retry; + case syntax_element_match: + if(need_push_match) + matches.pop(temp_match); + prev_pos.pop(first); + prev_record.pop(); + goto retry; + default: + jm_assert(0); + // mustn't get here!! + } + } + + if(match_found) + return true; + + // if we get to here then everything has failed + // and no match was found: + return false; +} +#if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES) +} // namespace +#endif + + +template <class iterator> +void __skip_and_inc(unsigned int& clines, iterator& last_line, iterator& first, const iterator last) +{ + while(first != last) + { + if(*first == '\n') + { + last_line = ++first; + ++clines; + } + else + ++first; + } +} + +template <class iterator> +void __skip_and_dec(unsigned int& clines, iterator& last_line, iterator& first, iterator base, unsigned int len) +{ + bool need_line = false; + for(unsigned int i = 0; i < len; ++i) + { + --first; + if(*first == '\n') + { + need_line = true; + --clines; + } + } + + if(need_line) + { + last_line = first; + + if(last_line != base) + --last_line; + else + return; + + while((last_line != base) && (*last_line != '\n')) + --last_line; + if(*last_line == '\n') + ++last_line; + } +} + +template <class iterator> +inline void __inc_one(unsigned int& clines, iterator& last_line, iterator& first) +{ + if(*first == '\n') + { + last_line = ++first; + ++clines; + } + else + ++first; +} + +template <class iterator, class Allocator> +struct grep_search_predicate +{ + reg_match<iterator, Allocator>* pm; + grep_search_predicate(reg_match<iterator, Allocator>* p) : pm(p) {} + bool operator()(const reg_match<iterator, Allocator>& m) + { + *pm = static_cast<const reg_match_base<iterator, Allocator>&>(m); + return false; + } +}; + +#if !defined(JM_NO_TEMPLATE_RETURNS) && !defined(JM_NO_PARTIAL_FUNC_SPEC) + +template <class iterator, class Allocator> +inline const reg_match_base<iterator, Allocator>& grep_out_type(const grep_search_predicate<iterator, Allocator>& o, const Allocator&) +{ + return *(o.pm); +} + +#endif + +template <class T, class Allocator> +inline const Allocator& grep_out_type(const T&, const Allocator& a) +{ + return a; +} + +#if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES) +// +// Ugly ugly hack, +// template don't merge if they contain switch statements so declare these +// templates in unnamed namespace (ie with internal linkage), each translation +// unit then gets its own local copy, it works seemlessly but bloats the app. +namespace{ +#endif + +// +// reg_grep2: +// find all non-overlapping matches within the sequence first last: +// +template <class Predicate, class I, class charT, class traits, class A, class A2> +unsigned int reg_grep2(Predicate foo, I first, I last, const reg_expression<charT, traits, A>& e, unsigned flags, A2 a) +{ + if(e.flags() & regbase::failbit) + return 0; + + typedef typename traits::size_type traits_size_type; + typedef typename traits::uchar_type traits_uchar_type; + typedef typename is_byte<charT>::width_type width_type; + + reg_match<I, A2> m(grep_out_type(foo, a)); + I restart; + m.set_size(e.mark_count(), first, last); + m.set_line(1, first); + + #ifdef RE_LOCALE_CPP + const __JM_STD::locale& locale_inst = e.locale(); + #endif + + unsigned int clines = 1; + unsigned int cmatches = 0; + I last_line = first; + I next_base; + I base = first; + bool need_init; + + flags |= match_init; + + __priv_match_data<I, A2> pd(m); + + const unsigned char* __map = e.get_map(); + unsigned int type; + + if(first == last) + { + // special case, only test if can_be_null, + // don't dereference any pointers!! + if(e.first()->can_be_null) + if(query_match_aux(first, last, m, e, flags, pd, &restart)) + { + foo(m); + ++cmatches; + } + return cmatches; + } + + // try one time whatever: + if( reg_expression<charT, traits, A>::can_start(*first, __map, (unsigned char)mask_any, width_type() ) ) + { + if(query_match_aux(first, last, m, e, flags, pd, &restart)) + { + ++cmatches; + if(foo(m) == false) + return cmatches; + // update to end of what matched + // trying to match again with match_not_null set if this + // is a null match... + need_init = true; + if(first == m[0].second) + { + next_base = m[0].second; + pd.temp_match.init_fail(next_base, last); + m.init_fail(next_base, last); + if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) + { + ++cmatches; + if(foo(m) == false) + return cmatches; + } + else + { + need_init = false; + for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); + if(restart != last) + ++restart; + __skip_and_inc(clines, last_line, first, restart); + } + } + if(need_init) + { + __skip_and_inc(clines, last_line, first, m[0].second); + next_base = m[0].second; + pd.temp_match.init_fail(next_base, last); + m.init_fail(next_base, last); + } + } + else + { + for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); + if(restart != last) + ++restart; + __skip_and_inc(clines, last_line, first, restart); + } + } + else + __inc_one(clines, last_line, first); + flags |= match_prev_avail | match_not_bob; + + + // depending on what the first record is we may be able to + // optimise the search: + type = (flags & match_continuous) ? regbase::restart_continue : e.restart_type(); + + if(type == regbase::restart_buf) + return cmatches; + + switch(type) + { + case regbase::restart_lit: + case regbase::restart_fixed_lit: + { + const kmp_info<charT>* info = e.get_kmp(); + int len = info->len; + const charT* x = info->pstr; + int j = 0; + bool icase = e.flags() & regbase::icase; + while (first != last) + { + while((j > -1) && (x[j] != traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst)))) + j = info->kmp_next[j]; + __inc_one(clines, last_line, first); + ++j; + if(j >= len) + { + if(type == regbase::restart_fixed_lit) + { + __skip_and_dec(clines, last_line, first, base, j); + restart = first; + restart += len; + m.set_first(first); + m.set_second(restart); + m.set_line(clines, last_line); + ++cmatches; + if(foo(m) == false) + return cmatches; + __skip_and_inc(clines, last_line, first, restart); + next_base = m[0].second; + pd.temp_match.init_fail(next_base, last); + m.init_fail(next_base, last); + j = 0; + } + else + { + restart = first; + __skip_and_dec(clines, last_line, first, base, j); + if(query_match_aux(first, last, m, e, flags, pd, &restart)) + { + + m.set_line(clines, last_line); + ++cmatches; + if(foo(m) == false) + return cmatches; + // update to end of what matched + __skip_and_inc(clines, last_line, first, m[0].second); + next_base = m[0].second; + pd.temp_match.init_fail(next_base, last); + m.init_fail(next_base, last); + j = 0; + } + else + { + for(int k = 0; (restart != first) && (k < j); ++k, --restart); + if(restart != last) + ++restart; + __skip_and_inc(clines, last_line, first, restart); + j = 0; //we could do better than this... + } + } + } + } + break; + } + case regbase::restart_any: + { + while(first != last) + { + if( reg_expression<charT, traits, A>::can_start(*first, __map, (unsigned char)mask_any, width_type()) ) + { + if(query_match_aux(first, last, m, e, flags, pd, &restart)) + { + + m.set_line(clines, last_line); + ++cmatches; + if(foo(m) == false) + return cmatches; + // update to end of what matched + // trying to match again with match_not_null set if this + // is a null match... + need_init = true; + if(first == m[0].second) + { + next_base = m[0].second; + pd.temp_match.init_fail(next_base, last); + m.init_fail(next_base, last); + if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) + { + m.set_line(clines, last_line); + ++cmatches; + if(foo(m) == false) + return cmatches; + } + else + { + need_init = false; + for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); + if(restart != last) + ++restart; + __skip_and_inc(clines, last_line, first, restart); + } + } + if(need_init) + { + __skip_and_inc(clines, last_line, first, m[0].second); + next_base = m[0].second; + pd.temp_match.init_fail(next_base, last); + m.init_fail(next_base, last); + } + continue; + } + else + { + for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); + if(restart != last) + ++restart; + __skip_and_inc(clines, last_line, first, restart); + } + } + else + __inc_one(clines, last_line, first); + } + } + break; + case regbase::restart_word: + { + // do search optimised for word starts: + while(first != last) + { + --first; + if(*first == '\n') + --clines; + // skip the word characters: + while((first != last) && traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst))) + ++first; + // now skip the white space: + while((first != last) && (traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)) == false)) + __inc_one(clines, last_line, first); + if(first == last) + break; + + if( reg_expression<charT, traits, A>::can_start(*first, __map, (unsigned char)mask_any, width_type()) ) + { + if(query_match_aux(first, last, m, e, flags, pd, &restart)) + { + m.set_line(clines, last_line); + ++cmatches; + if(foo(m) == false) + return cmatches; + // update to end of what matched + // trying to match again with match_not_null set if this + // is a null match... + need_init = true; + if(first == m[0].second) + { + next_base = m[0].second; + pd.temp_match.init_fail(next_base, last); + m.init_fail(next_base, last); + if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) + { + m.set_line(clines, last_line); + ++cmatches; + if(foo(m) == false) + return cmatches; + } + else + { + need_init = false; + for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); + if(restart != last) + ++restart; + __skip_and_inc(clines, last_line, first, restart); + } + } + if(need_init) + { + __skip_and_inc(clines, last_line, first, m[0].second); + next_base = m[0].second; + pd.temp_match.init_fail(next_base, last); + m.init_fail(next_base, last); + } + } + else + { + for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); + if(restart != last) + ++restart; + __skip_and_inc(clines, last_line, first, restart); + } + } + else + __inc_one(clines, last_line, first); + } + } + break; + case regbase::restart_line: + { + // do search optimised for line starts: + while(first != last) + { + // find first charcter after a line break: + --first; + if(*first == '\n') + --clines; + while((first != last) && (*first != '\n')) + ++first; + if(first == last) + break; + ++first; + if(first == last) + break; + + ++clines; + last_line = first; + + if( reg_expression<charT, traits, A>::can_start(*first, __map, (unsigned char)mask_any, width_type()) ) + { + if(query_match_aux(first, last, m, e, flags, pd, &restart)) + { + m.set_line(clines, last_line); + ++cmatches; + if(foo(m) == false) + return cmatches; + // update to end of what matched + // trying to match again with match_not_null set if this + // is a null match... + need_init = true; + if(first == m[0].second) + { + next_base = m[0].second; + pd.temp_match.init_fail(next_base, last); + m.init_fail(next_base, last); + if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) + { + m.set_line(clines, last_line); + ++cmatches; + if(foo(m) == false) + return cmatches; + } + else + { + need_init = false; + for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); + if(restart != last) + ++restart; + __skip_and_inc(clines, last_line, first, restart); + } + } + if(need_init) + { + __skip_and_inc(clines, last_line, first, m[0].second); + next_base = m[0].second; + pd.temp_match.init_fail(next_base, last); + m.init_fail(next_base, last); + } + } + else + { + for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); + if(restart != last) + ++restart; + __skip_and_inc(clines, last_line, first, restart); + } + } + else + __inc_one(clines, last_line, first); + } + } + break; + case regbase::restart_continue: + { + while(first != last) + { + if( reg_expression<charT, traits, A>::can_start(*first, __map, (unsigned char)mask_any, width_type()) ) + { + if(query_match_aux(first, last, m, e, flags, pd, &restart)) + { + m.set_line(clines, last_line); + ++cmatches; + if(foo(m) == false) + return cmatches; + // update to end of what matched + // trying to match again with match_not_null set if this + // is a null match... + if(first == m[0].second) + { + next_base = m[0].second; + pd.temp_match.init_fail(next_base, last); + m.init_fail(next_base, last); + if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) + { + m.set_line(clines, last_line); + ++cmatches; + if(foo(m) == false) + return cmatches; + } + else + return cmatches; // can't continue from null match + } + __skip_and_inc(clines, last_line, first, m[0].second); + next_base = m[0].second; + pd.temp_match.init_fail(next_base, last); + m.init_fail(next_base, last); + continue; + } + } + return cmatches; + } + } + break; + } + + + // finally check trailing null string: + if(e.first()->can_be_null) + { + if(query_match_aux(first, last, m, e, flags, pd, &restart)) + { + m.set_line(clines, last_line); + ++cmatches; + if(foo(m) == false) + return cmatches; + } + } + + return cmatches; +} +#if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES) +} // namespace +#endif + + +template <class iterator, class Allocator, class charT, class traits, class Allocator2> +bool reg_search(iterator first, iterator last, reg_match<iterator, Allocator>& m, const reg_expression<charT, traits, Allocator2>& e, unsigned flags = match_default) +{ + if(e.flags() & regbase::failbit) + return false; + + typedef typename traits::size_type traits_size_type; + typedef typename traits::uchar_type traits_uchar_type; + + // prepare m for failure: + if((flags & match_init) == 0) + { + m.set_size(e.mark_count(), first, last); + } + + flags |= match_init; + return reg_grep2(grep_search_predicate<iterator, Allocator>(&m), first, last, e, flags, m.allocator()); +} + +// +// reg_search convenience interfaces: +#ifndef JM_NO_PARTIAL_FUNC_SPEC +// +// this isn't really a partial specialisation, but template function +// overloading - if the compiler doesn't support partial specialisation +// then it really won't support this either: +template <class charT, class Allocator, class traits, class Allocator2> +inline bool reg_search(const charT* str, + reg_match<const charT*, Allocator>& m, + const reg_expression<charT, traits, Allocator2>& e, + unsigned flags = match_default) +{ + return reg_search(str, str + traits::length(str), m, e, flags); +} + +#ifndef JM_NO_STRING_H +template <class ST, class SA, class Allocator, class charT, class traits, class Allocator2> +inline bool reg_search(const __JM_STD::basic_string<charT, ST, SA>& s, + reg_match<typename __JM_STD::basic_string<charT, ST, SA>::const_iterator, Allocator>& m, + const reg_expression<charT, traits, Allocator2>& e, + unsigned flags = match_default) +{ + return reg_search(s.begin(), s.end(), m, e, flags); +} +#endif +#else // partial specialisation +inline bool reg_search(const char* str, + cmatch& m, + const regex& e, + unsigned flags = match_default) +{ + return reg_search(str, str + regex::traits_type::length(str), m, e, flags); +} +#ifndef JM_NO_WCSTRING +inline bool reg_search(const wchar_t* str, + wcmatch& m, + const wregex& e, + unsigned flags = match_default) +{ + return reg_search(str, str + wregex::traits_type::length(str), m, e, flags); +} +#endif +#ifndef JM_NO_STRING_H +inline bool reg_search(const __JM_STD::string& s, + reg_match<__JM_STD::string::const_iterator, regex::alloc_type>& m, + const regex& e, + unsigned flags = match_default) +{ + return reg_search(s.begin(), s.end(), m, e, flags); +} +#if !defined(JM_NO_STRING_DEF_ARGS) && !defined(JM_NO_WCSTRING) +inline bool reg_search(const __JM_STD::basic_string<wchar_t>& s, + reg_match<__JM_STD::basic_string<wchar_t>::const_iterator, wregex::alloc_type>& m, + const wregex& e, + unsigned flags = match_default) +{ + return reg_search(s.begin(), s.end(), m, e, flags); +} +#endif + +#endif + +#endif + + +// +// reg_grep: +// find all non-overlapping matches within the sequence first last: +// +template <class Predicate, class iterator, class charT, class traits, class Allocator> +inline unsigned int reg_grep(Predicate foo, iterator first, iterator last, const reg_expression<charT, traits, Allocator>& e, unsigned flags = match_default) +{ + return reg_grep2(foo, first, last, e, flags, e.allocator()); +} + +// +// reg_grep convenience interfaces: +#ifndef JM_NO_PARTIAL_FUNC_SPEC +// +// this isn't really a partial specialisation, but template function +// overloading - if the compiler doesn't support partial specialisation +// then it really won't support this either: +template <class Predicate, class charT, class Allocator, class traits> +inline bool reg_grep(Predicate foo, const charT* str, + const reg_expression<charT, traits, Allocator>& e, + unsigned flags = match_default) +{ + return reg_grep(foo, str, str + traits::length(str), e, flags); +} + +#ifndef JM_NO_STRING_H +template <class Predicate, class ST, class SA, class Allocator, class charT, class traits> +inline bool reg_grep(Predicate foo, const __JM_STD::basic_string<charT, ST, SA>& s, + const reg_expression<charT, traits, Allocator>& e, + unsigned flags = match_default) +{ + return reg_grep(foo, s.begin(), s.end(), e, flags); +} +#endif +#else // partial specialisation +inline bool reg_grep(bool (*foo)(const cmatch&), const char* str, + const regex& e, + unsigned flags = match_default) +{ + return reg_grep(foo, str, str + regex::traits_type::length(str), e, flags); +} +#ifndef JM_NO_WCSTRING +inline bool reg_grep(bool (*foo)(const wcmatch&), const wchar_t* str, + const wregex& e, + unsigned flags = match_default) +{ + return reg_grep(foo, str, str + wregex::traits_type::length(str), e, flags); +} +#endif +#ifndef JM_NO_STRING_H +inline bool reg_grep(bool (*foo)(const reg_match<__JM_STD::string::const_iterator, regex::alloc_type>&), const __JM_STD::string& s, + const regex& e, + unsigned flags = match_default) +{ + return reg_grep(foo, s.begin(), s.end(), e, flags); +} +#if !defined(JM_NO_STRING_DEF_ARGS) && !defined(JM_NO_WCSTRING) +inline bool reg_grep(bool (*foo)(const reg_match<__JM_STD::basic_string<wchar_t>::const_iterator, wregex::alloc_type>&), + const __JM_STD::basic_string<wchar_t>& s, + const wregex& e, + unsigned flags = match_default) +{ + return reg_grep(foo, s.begin(), s.end(), e, flags); +} +#endif + +#endif + +#endif + + +// +// finally for compatablity with version 1.x of the library +// we need a form of reg_grep that takes an output iterator +// as its first argument: +// + +// +// struct grep_match: +// stores what matched during a reg_grep, +// the output iterator type passed to reg_grep must have an +// operator*() that returns a type with an +// operator=(const grep_match<iterator, Allocator>&); +// +template <class iterator, class Allocator> +struct grep_match +{ + unsigned int line; + iterator line_start; + reg_match<iterator, Allocator> what; + + grep_match(Allocator a = Allocator()) : what(a) {} + + grep_match(unsigned int l, iterator p1, const reg_match<iterator, Allocator>& m) + : what(m) { line = l; line_start = p1; } + + bool operator == (const grep_match& ) + { return false; } + + bool operator < (const grep_match&) + { return false; } +}; + +template <class O, class I, class A> +struct grep_adaptor +{ + O oi; + reg_match<I, A> m; + grep_adaptor(O i, A a) : m(a), oi(i) {} + bool operator()(const reg_match_base<I, A>& w) + { + m.what = w; + m.line = w.line(); + m.line_start = w.line_start(); + *oi = m; + ++oi; + return true; + } +}; + +template <class Out, class iterator, class charT, class traits, class Allocator> +inline unsigned int reg_grep_old(Out oi, iterator first, iterator last, const reg_expression<charT, traits, Allocator>& e, unsigned flags = match_default) +{ + return reg_grep2(grep_adaptor<Out, iterator, Allocator>(oi, e.allocator()), first, last, e, flags, e.allocator()); +} + + + +JM_END_NAMESPACE // namespace regex + +#endif // __REGMATCH_H + + + + + + + |