perl: add a patch to fix a crash with invalid utf-8 characters, from LFS.
This commit is contained in:
parent
0d462ad48d
commit
e7585f23bc
|
@ -0,0 +1,167 @@
|
|||
Submitted By: Robert Connolly <robert at linuxfromscratch dot org> (ashes)
|
||||
Date: 2010-02-24
|
||||
Initial Package Version: 5.10.1
|
||||
Upstream Status: From upstream
|
||||
Origin: Git
|
||||
http://perl5.git.perl.org/perl.git/patch/0abd0d78a73da1c4d13b1c700526b7e5d03b32d4
|
||||
Description: Bug fix for invalid utf-8 characters causing Perl to crash.
|
||||
|
||||
http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2009-3626
|
||||
|
||||
From 0abd0d78a73da1c4d13b1c700526b7e5d03b32d4 Mon Sep 17 00:00:00 2001
|
||||
From: Yves Orton <demerphq@gmail.com>
|
||||
Date: Sun, 25 Oct 2009 20:37:08 +0100
|
||||
Subject: [PATCH] disable non-unicode case insensitive trie matching
|
||||
|
||||
Also revert 8902bb05b18c9858efa90229ca1ee42b17277554 as it merely
|
||||
masked one symptom of the deeper problems.
|
||||
|
||||
Also fixes RT #69973, which was a segfault which was exposed by
|
||||
8902bb05, see the ticket for further details.
|
||||
|
||||
http://rt.perl.org/rt3//Public/Bug/Display.html?id=69973
|
||||
|
||||
At the code of this is the problem that in unicode matching a bunch
|
||||
of code points have case folding rules beyond just A-Z/a-z. Since
|
||||
the case folding rules are decided at runtime by the string, we cant
|
||||
use the same TRIE tables for both unicode/non-unicode matching.
|
||||
|
||||
Until this is reconciled or some other solution is found case insensitive
|
||||
matching only gets the TRIE optimisation when the pattern is uniocde.
|
||||
|
||||
From CaseFolding.txt:
|
||||
|
||||
00B5; C; 03BC; # MICRO SIGN
|
||||
00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
|
||||
00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
00C6; C; 00E6; # LATIN CAPITAL LETTER AE
|
||||
00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
|
||||
00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
|
||||
00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
|
||||
00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
|
||||
00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
|
||||
00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
|
||||
---
|
||||
ext/re/t/regop.t | 12 ++++++------
|
||||
regcomp.c | 17 +++++++++++------
|
||||
regexec.c | 9 ++-------
|
||||
3 files changed, 19 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/ext/re/t/regop.t b/ext/re/t/regop.t
|
||||
index 9118bf6..46e6ec0 100644
|
||||
--- ext/re/t/regop.t
|
||||
+++ ext/re/t/regop.t
|
||||
@@ -231,12 +231,12 @@ anchored "ABC" at 0
|
||||
#Freeing REx: "(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\."......
|
||||
%MATCHED%
|
||||
floating ""$ at 3..4 (checking floating)
|
||||
-1:1[1] 3:2[1] 5:2[64] 45:83[1] 47:84[1] 48:85[0]
|
||||
-stclass EXACTF <.> minlen 3
|
||||
-Found floating substr ""$ at offset 30...
|
||||
-Does not contradict STCLASS...
|
||||
-Guessed: match at offset 26
|
||||
-Matching stclass EXACTF <.> against ".exe"
|
||||
+#1:1[1] 3:2[1] 5:2[64] 45:83[1] 47:84[1] 48:85[0]
|
||||
+#stclass EXACTF <.> minlen 3
|
||||
+#Found floating substr ""$ at offset 30...
|
||||
+#Does not contradict STCLASS...
|
||||
+#Guessed: match at offset 26
|
||||
+#Matching stclass EXACTF <.> against ".exe"
|
||||
---
|
||||
#Compiling REx "[q]"
|
||||
#size 12 nodes Got 100 bytes for offset annotations.
|
||||
diff --git a/regcomp.c b/regcomp.c
|
||||
index 6e9fa26..eb5f12f 100644
|
||||
--- regcomp.c
|
||||
+++ regcomp.c
|
||||
@@ -2833,13 +2833,18 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
- Currently we assume that the trie can handle unicode and ascii
|
||||
- matches fold cased matches. If this proves true then the following
|
||||
- define will prevent tries in this situation.
|
||||
-
|
||||
- #define TRIE_TYPE_IS_SAFE (UTF || optype==EXACT)
|
||||
-*/
|
||||
+ Currently we do not believe that the trie logic can
|
||||
+ handle case insensitive matching properly when the
|
||||
+ pattern is not unicode (thus forcing unicode semantics).
|
||||
+
|
||||
+ If/when this is fixed the following define can be swapped
|
||||
+ in below to fully enable trie logic.
|
||||
+
|
||||
#define TRIE_TYPE_IS_SAFE 1
|
||||
+
|
||||
+*/
|
||||
+#define TRIE_TYPE_IS_SAFE (UTF || optype==EXACT)
|
||||
+
|
||||
if ( last && TRIE_TYPE_IS_SAFE ) {
|
||||
make_trie( pRExC_state,
|
||||
startbranch, first, cur, tail, count,
|
||||
diff --git a/regexec.c b/regexec.c
|
||||
index 402ede3..ec09c28 100644
|
||||
--- regexec.c
|
||||
+++ regexec.c
|
||||
@@ -1105,16 +1105,15 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
|
||||
|
||||
#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
|
||||
uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
|
||||
- UV uvc_unfolded = 0; \
|
||||
switch (trie_type) { \
|
||||
case trie_utf8_fold: \
|
||||
if ( foldlen>0 ) { \
|
||||
- uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
|
||||
+ uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
|
||||
foldlen -= len; \
|
||||
uscan += len; \
|
||||
len=0; \
|
||||
} else { \
|
||||
- uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
|
||||
+ uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
|
||||
uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
|
||||
foldlen -= UNISKIP( uvc ); \
|
||||
uscan = foldbuf + UNISKIP( uvc ); \
|
||||
@@ -1140,7 +1139,6 @@ uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
|
||||
uvc = (UV)*uc; \
|
||||
len = 1; \
|
||||
} \
|
||||
- \
|
||||
if (uvc < 256) { \
|
||||
charid = trie->charmap[ uvc ]; \
|
||||
} \
|
||||
@@ -1153,9 +1151,6 @@ uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
|
||||
charid = (U16)SvIV(*svpp); \
|
||||
} \
|
||||
} \
|
||||
- if (!charid && trie_type == trie_utf8_fold && !UTF) { \
|
||||
- charid = trie->charmap[uvc_unfolded]; \
|
||||
- } \
|
||||
} STMT_END
|
||||
|
||||
#define REXEC_FBC_EXACTISH_CHECK(CoNd) \
|
||||
--
|
||||
1.6.5.2.74.g610f9.dirty
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
# Template build file for 'perl'.
|
||||
pkgname=perl
|
||||
version=5.10.1
|
||||
revision=1
|
||||
distfiles="http://www.cpan.org/src/$pkgname-$version.tar.gz"
|
||||
build_style=configure
|
||||
configure_script="./Configure"
|
||||
|
@ -45,6 +46,4 @@ pre_configure()
|
|||
-Dsitelib=${p5_base}/site_perl/${p5_apiver} \
|
||||
-Dvendorlib=${p5_base}/vendor_perl/${p5_apiver}"
|
||||
export configure_args="${args}"
|
||||
|
||||
sed -i -e "s|'/bin/pwd'|'/usr/local/bin/pwd'|" lib/Cwd.pm
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue