patches/glibc/2.9/140-regex-BZ9697.patch
author Anthony Foiani <anthony.foiani@gmail.com>
Thu May 19 23:06:16 2011 +0200 (2011-05-19)
changeset 2461 ec30b191f0e3
parent 1201 c9967a6e3b25
permissions -rw-r--r--
complibs/ppl: build only C and C++ interfaces for PPL

By default, PPL wants to build interfaces for any of a variety of
langauges it finds on the local host (python, java, possibly perl, also
more esoteric languages such as ocaml and prolog).

These extra interfaces can double the compile time for the library. For
single-process builds, I found a savings of more than 40%:

default / j1: 716s total, 143.2s avg, 0.52s stdev
just_c / j1: 406s total, 81.2s avg, 0.33s stdev
just_c_cpp / j1: 413s total, 82.6s avg, 0.22s stdev

And for multi-process builds, it approached 50%:

default / j4: 625s total, 125.0s avg, 0.57s stdev
just_c / j4: 338s total, 67.6s avg, 1.25s stdev
just_c_cpp / j4: 327s total, 65.4s avg, 0.36s stdev

Since the PPL we build within ct-ng is only used by GCC, we only need to
build the C and C++ interfaces.

Signed-Off-By: Anthony Foiani <anthony.foiani@gmail.com>
     1 Original patch from: gentoo/src/patchsets/glibc/2.9/0052_all_glibc-2.9-regex-BZ9697.patch
     2 
     3 -= BEGIN original header =-
     4 http://sourceware.org/ml/libc-alpha/2009-01/msg00005.html
     5 
     6 From ea8ca0dfcbf2721bcf2c08ce3c01d5764b827572 Mon Sep 17 00:00:00 2001
     7 From: Ulrich Drepper <drepper@redhat.com>
     8 Date: Thu, 8 Jan 2009 00:42:28 +0000
     9 Subject: [PATCH] (re_compile_fastmap_iter): Rewrite COMPLEX_BRACKET handling.
    10 
    11 -= END original header =-
    12 
    13 diff -durN glibc-2_9.orig/posix/regcomp.c glibc-2_9/posix/regcomp.c
    14 --- glibc-2_9.orig/posix/regcomp.c	2008-05-15 05:07:21.000000000 +0200
    15 +++ glibc-2_9/posix/regcomp.c	2009-02-02 22:00:41.000000000 +0100
    16 @@ -350,47 +350,67 @@
    17  #ifdef RE_ENABLE_I18N
    18        else if (type == COMPLEX_BRACKET)
    19  	{
    20 -	  int i;
    21  	  re_charset_t *cset = dfa->nodes[node].opr.mbcset;
    22 -	  if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
    23 -	      || cset->nranges || cset->nchar_classes)
    24 -	    {
    25 +	  int i;
    26 +
    27  # ifdef _LIBC
    28 -	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
    29 +	  /* See if we have to try all bytes which start multiple collation
    30 +	     elements.
    31 +	     e.g. In da_DK, we want to catch 'a' since "aa" is a valid
    32 +		  collation element, and don't catch 'b' since 'b' is
    33 +		  the only collation element which starts from 'b' (and
    34 +		  it is caught by SIMPLE_BRACKET).  */
    35 +	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
    36 +		  && (cset->ncoll_syms || cset->nranges))
    37  		{
    38 -		  /* In this case we want to catch the bytes which are
    39 -		     the first byte of any collation elements.
    40 -		     e.g. In da_DK, we want to catch 'a' since "aa"
    41 -			  is a valid collation element, and don't catch
    42 -			  'b' since 'b' is the only collation element
    43 -			  which starts from 'b'.  */
    44  		  const int32_t *table = (const int32_t *)
    45  		    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
    46  		  for (i = 0; i < SBC_MAX; ++i)
    47  		    if (table[i] < 0)
    48  		      re_set_fastmap (fastmap, icase, i);
    49  		}
    50 -# else
    51 -	      if (dfa->mb_cur_max > 1)
    52 -		for (i = 0; i < SBC_MAX; ++i)
    53 -		  if (__btowc (i) == WEOF)
    54 -		    re_set_fastmap (fastmap, icase, i);
    55 -# endif /* not _LIBC */
    56 -	    }
    57 -	  for (i = 0; i < cset->nmbchars; ++i)
    58 +# endif /* _LIBC */
    59 +
    60 +	  /* See if we have to start the match at all multibyte characters,
    61 +	     i.e. where we would not find an invalid sequence.  This only
    62 +	     applies to multibyte character sets; for single byte character
    63 +	     sets, the SIMPLE_BRACKET again suffices.  */
    64 +	  if (dfa->mb_cur_max > 1
    65 +	      && (cset->nchar_classes || cset->non_match
    66 +# ifdef _LIBC
    67 +		  || cset->nequiv_classes
    68 +# endif /* _LIBC */
    69 +		 ))
    70  	    {
    71 -	      char buf[256];
    72 -	      mbstate_t state;
    73 -	      memset (&state, '\0', sizeof (state));
    74 -	      if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
    75 -		re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
    76 -	      if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
    77 +	      unsigned char c = 0;
    78 +	      do
    79  		{
    80 -		  if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
    81 -		      != (size_t) -1)
    82 -		    re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
    83 +		  mbstate_t mbs;
    84 +		  memset (&mbs, 0, sizeof (mbs));
    85 +		  if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
    86 +		    re_set_fastmap (fastmap, false, (int) c);
    87  		}
    88 +	      while (++c != 0);
    89  	    }
    90 +
    91 +	  else
    92 +	    {
    93 +	      /* ... Else catch all bytes which can start the mbchars.  */
    94 +	      for (i = 0; i < cset->nmbchars; ++i)
    95 +		{
    96 +		  char buf[256];
    97 +		  mbstate_t state;
    98 +		  memset (&state, '\0', sizeof (state));
    99 +		  if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
   100 +		    re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
   101 +		  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
   102 +		    {
   103 +		      if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
   104 +			  != (size_t) -1)
   105 +			re_set_fastmap (fastmap, false, *(unsigned char *) buf);
   106 +		    }
   107 + 		}
   108 + 	    }
   109  	}
   110  #endif /* RE_ENABLE_I18N */
   111        else if (type == OP_PERIOD