yann@1365: Original patch from: gentoo/src/patchsets/glibc/2.9/0052_all_glibc-2.9-regex-BZ9697.patch yann@1365: yann@1365: -= BEGIN original header =- yann@1365: http://sourceware.org/ml/libc-alpha/2009-01/msg00005.html yann@1365: yann@1365: From ea8ca0dfcbf2721bcf2c08ce3c01d5764b827572 Mon Sep 17 00:00:00 2001 yann@1365: From: Ulrich Drepper yann@1365: Date: Thu, 8 Jan 2009 00:42:28 +0000 yann@1365: Subject: [PATCH] (re_compile_fastmap_iter): Rewrite COMPLEX_BRACKET handling. yann@1365: yann@1365: -= END original header =- yann@1365: yann@1365: diff -durN glibc-2_9.orig/posix/regcomp.c glibc-2_9/posix/regcomp.c yann@1365: --- glibc-2_9.orig/posix/regcomp.c 2008-05-15 05:07:21.000000000 +0200 yann@1365: +++ glibc-2_9/posix/regcomp.c 2009-02-02 22:00:41.000000000 +0100 yann@1365: @@ -350,47 +350,67 @@ yann@1365: #ifdef RE_ENABLE_I18N yann@1365: else if (type == COMPLEX_BRACKET) yann@1365: { yann@1365: - int i; yann@1365: re_charset_t *cset = dfa->nodes[node].opr.mbcset; yann@1365: - if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes yann@1365: - || cset->nranges || cset->nchar_classes) yann@1365: - { yann@1365: + int i; yann@1365: + yann@1365: # ifdef _LIBC yann@1365: - if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0) yann@1365: + /* See if we have to try all bytes which start multiple collation yann@1365: + elements. yann@1365: + e.g. In da_DK, we want to catch 'a' since "aa" is a valid yann@1365: + collation element, and don't catch 'b' since 'b' is yann@1365: + the only collation element which starts from 'b' (and yann@1365: + it is caught by SIMPLE_BRACKET). */ yann@1365: + if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0 yann@1365: + && (cset->ncoll_syms || cset->nranges)) yann@1365: { yann@1365: - /* In this case we want to catch the bytes which are yann@1365: - the first byte of any collation elements. yann@1365: - e.g. In da_DK, we want to catch 'a' since "aa" yann@1365: - is a valid collation element, and don't catch yann@1365: - 'b' since 'b' is the only collation element yann@1365: - which starts from 'b'. */ yann@1365: const int32_t *table = (const int32_t *) yann@1365: _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); yann@1365: for (i = 0; i < SBC_MAX; ++i) yann@1365: if (table[i] < 0) yann@1365: re_set_fastmap (fastmap, icase, i); yann@1365: } yann@1365: -# else yann@1365: - if (dfa->mb_cur_max > 1) yann@1365: - for (i = 0; i < SBC_MAX; ++i) yann@1365: - if (__btowc (i) == WEOF) yann@1365: - re_set_fastmap (fastmap, icase, i); yann@1365: -# endif /* not _LIBC */ yann@1365: - } yann@1365: - for (i = 0; i < cset->nmbchars; ++i) yann@1365: +# endif /* _LIBC */ yann@1365: + yann@1365: + /* See if we have to start the match at all multibyte characters, yann@1365: + i.e. where we would not find an invalid sequence. This only yann@1365: + applies to multibyte character sets; for single byte character yann@1365: + sets, the SIMPLE_BRACKET again suffices. */ yann@1365: + if (dfa->mb_cur_max > 1 yann@1365: + && (cset->nchar_classes || cset->non_match yann@1365: +# ifdef _LIBC yann@1365: + || cset->nequiv_classes yann@1365: +# endif /* _LIBC */ yann@1365: + )) yann@1365: { yann@1365: - char buf[256]; yann@1365: - mbstate_t state; yann@1365: - memset (&state, '\0', sizeof (state)); yann@1365: - if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) yann@1365: - re_set_fastmap (fastmap, icase, *(unsigned char *) buf); yann@1365: - if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) yann@1365: + unsigned char c = 0; yann@1365: + do yann@1365: { yann@1365: - if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) yann@1365: - != (size_t) -1) yann@1365: - re_set_fastmap (fastmap, 0, *(unsigned char *) buf); yann@1365: + mbstate_t mbs; yann@1365: + memset (&mbs, 0, sizeof (mbs)); yann@1365: + if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2) yann@1365: + re_set_fastmap (fastmap, false, (int) c); yann@1365: } yann@1365: + while (++c != 0); yann@1365: } yann@1365: + yann@1365: + else yann@1365: + { yann@1365: + /* ... Else catch all bytes which can start the mbchars. */ yann@1365: + for (i = 0; i < cset->nmbchars; ++i) yann@1365: + { yann@1365: + char buf[256]; yann@1365: + mbstate_t state; yann@1365: + memset (&state, '\0', sizeof (state)); yann@1365: + if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) yann@1365: + re_set_fastmap (fastmap, icase, *(unsigned char *) buf); yann@1365: + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) yann@1365: + { yann@1365: + if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) yann@1365: + != (size_t) -1) yann@1365: + re_set_fastmap (fastmap, false, *(unsigned char *) buf); yann@1365: + } yann@1365: + } yann@1365: + } yann@1365: } yann@1365: #endif /* RE_ENABLE_I18N */ yann@1365: else if (type == OP_PERIOD