From 75fc2c4e684a81731f18147635862a73609233e1 Mon Sep 17 00:00:00 2001
From: "Christoph M. Becker" <cmbecker69@gmx.de>
Date: Wed, 23 Jun 2021 15:48:48 +0200
Subject: [PATCH 1/4] Fix #74264: grapheme_sttrpos() broken for negative
offsets
We must not assume that `usearch_last()` gives the proper result for
negative offsets. Instead we'd need to continue to search backwards
(`usearch_previous`) until we find a proper match. However, apparently
searching backwards is broken, so we work around by searching forward
from the start of the string until we pass the `offset_pos`, and then
use the previous result.
---
ext/intl/grapheme/grapheme_util.c | 24 ++++++++++++++++++------
ext/intl/tests/bug74264.phpt | 26 ++++++++++++++++++++++++++
2 files changed, 44 insertions(+), 6 deletions(-)
create mode 100644 ext/intl/tests/bug74264.phpt
diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c
index 5bc23be2093b..926614deb768 100644
--- a/ext/intl/grapheme/grapheme_util.c
+++ b/ext/intl/grapheme/grapheme_util.c
@@ -133,7 +133,7 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last)
{
UChar *uhaystack = NULL, *uneedle = NULL;
- int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0;
+ int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0, prev_pos = USEARCH_DONE;
unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
UBreakIterator* bi = NULL;
UErrorCode status;
@@ -179,16 +179,28 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle,
STRPOS_CHECK_STATUS(status, "Invalid search offset");
}
status = U_ZERO_ERROR;
- usearch_setOffset(src, offset_pos, &status);
+ usearch_setOffset(src, last ? 0 : offset_pos, &status);
STRPOS_CHECK_STATUS(status, "Invalid search offset");
}
if(last) {
- char_pos = usearch_last(src, &status);
- if(char_pos < offset_pos) {
- /* last one is beyound our start offset */
- char_pos = USEARCH_DONE;
+ if (offset >= 0) {
+ char_pos = usearch_last(src, &status);
+ if(char_pos < offset_pos) {
+ /* last one is beyond our start offset */
+ char_pos = USEARCH_DONE;
+ }
+ } else {
+ /* searching backwards is broken, so we search forwards, albeit it's less efficient */
+ do {
+ char_pos = usearch_next(src, &status);
+ if (char_pos == USEARCH_DONE || char_pos > offset_pos) {
+ char_pos = prev_pos;
+ break;
+ }
+ prev_pos = char_pos;
+ } while(1);
}
} else {
char_pos = usearch_next(src, &status);
diff --git a/ext/intl/tests/bug74264.phpt b/ext/intl/tests/bug74264.phpt
new file mode 100644
index 000000000000..13826cb4b094
--- /dev/null
+++ b/ext/intl/tests/bug74264.phpt
@@ -0,0 +1,26 @@
+--TEST--
+Bug #74264 (grapheme_sttrpos() broken for negative offsets)
+--SKIPIF--
+<?php
+if (!extension_loaded('intl')) die("skip intl extension not available");
+?>
+--FILE--
+<?php
+foreach (range(-5, -1) as $offset) {
+ var_dump(
+ grapheme_strrpos('déjàààà', 'à', $offset),
+ grapheme_strripos('DÉJÀÀÀÀ', 'à', $offset)
+ );
+}
+?>
+--EXPECT--
+bool(false)
+bool(false)
+int(3)
+int(3)
+int(4)
+int(4)
+int(5)
+int(5)
+int(6)
+int(6)
From 7f75d59f5257cf52bba57396a4dc0e813bdfc1b0 Mon Sep 17 00:00:00 2001
From: "Christoph M. Becker" <cmbecker69@gmx.de>
Date: Mon, 5 Jul 2021 14:41:34 +0200
Subject: [PATCH 2/4] Search backwards using overlap flag
---
ext/intl/grapheme/grapheme_util.c | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c
index 926614deb768..25468bf74bea 100644
--- a/ext/intl/grapheme/grapheme_util.c
+++ b/ext/intl/grapheme/grapheme_util.c
@@ -179,7 +179,7 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle,
STRPOS_CHECK_STATUS(status, "Invalid search offset");
}
status = U_ZERO_ERROR;
- usearch_setOffset(src, last ? 0 : offset_pos, &status);
+ usearch_setOffset(src, offset_pos + (last && offset < 0), &status);
STRPOS_CHECK_STATUS(status, "Invalid search offset");
}
@@ -192,15 +192,9 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle,
char_pos = USEARCH_DONE;
}
} else {
- /* searching backwards is broken, so we search forwards, albeit it's less efficient */
- do {
- char_pos = usearch_next(src, &status);
- if (char_pos == USEARCH_DONE || char_pos > offset_pos) {
- char_pos = prev_pos;
- break;
- }
- prev_pos = char_pos;
- } while(1);
+ usearch_setAttribute(src, USEARCH_OVERLAP, USEARCH_ON, &status);
+ STRPOS_CHECK_STATUS(status, "Error setting overlap flag");
+ char_pos = usearch_previous(src, &status);
}
} else {
char_pos = usearch_next(src, &status);
From ec6ffc440dfb7f40c78d09f706aa5e8f601faa89 Mon Sep 17 00:00:00 2001
From: "Christoph M. Becker" <cmbecker69@gmx.de>
Date: Mon, 5 Jul 2021 15:52:05 +0200
Subject: [PATCH 3/4] Revert "Search backwards using overlap flag"
This reverts commit 7f75d59f5257cf52bba57396a4dc0e813bdfc1b0.
---
ext/intl/grapheme/grapheme_util.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c
index 25468bf74bea..926614deb768 100644
--- a/ext/intl/grapheme/grapheme_util.c
+++ b/ext/intl/grapheme/grapheme_util.c
@@ -179,7 +179,7 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle,
STRPOS_CHECK_STATUS(status, "Invalid search offset");
}
status = U_ZERO_ERROR;
- usearch_setOffset(src, offset_pos + (last && offset < 0), &status);
+ usearch_setOffset(src, last ? 0 : offset_pos, &status);
STRPOS_CHECK_STATUS(status, "Invalid search offset");
}
@@ -192,9 +192,15 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle,
char_pos = USEARCH_DONE;
}
} else {
- usearch_setAttribute(src, USEARCH_OVERLAP, USEARCH_ON, &status);
- STRPOS_CHECK_STATUS(status, "Error setting overlap flag");
- char_pos = usearch_previous(src, &status);
+ /* searching backwards is broken, so we search forwards, albeit it's less efficient */
+ do {
+ char_pos = usearch_next(src, &status);
+ if (char_pos == USEARCH_DONE || char_pos > offset_pos) {
+ char_pos = prev_pos;
+ break;
+ }
+ prev_pos = char_pos;
+ } while(1);
}
} else {
char_pos = usearch_next(src, &status);
From 56681033f39fe5e56f429bff3f4d74c5a3cf7cd5 Mon Sep 17 00:00:00 2001
From: "Christoph M. Becker" <cmbecker69@gmx.de>
Date: Mon, 5 Jul 2021 16:04:14 +0200
Subject: [PATCH 4/4] Move prev_pos declaration into narrower scope
---
ext/intl/grapheme/grapheme_util.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c
index 926614deb768..8a21fa51ee4a 100644
--- a/ext/intl/grapheme/grapheme_util.c
+++ b/ext/intl/grapheme/grapheme_util.c
@@ -133,7 +133,7 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last)
{
UChar *uhaystack = NULL, *uneedle = NULL;
- int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0, prev_pos = USEARCH_DONE;
+ int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0;
unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
UBreakIterator* bi = NULL;
UErrorCode status;
@@ -193,6 +193,7 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle,
}
} else {
/* searching backwards is broken, so we search forwards, albeit it's less efficient */
+ int32_t prev_pos = USEARCH_DONE;
do {
char_pos = usearch_next(src, &status);
if (char_pos == USEARCH_DONE || char_pos > offset_pos) {