Skip to content

Commit d4b6464

Browse files
authored
Merge pull request #20456 from calixteman/issue20225
When searching for a group of punctuation signs, only add extraspaces around the group
2 parents b41959b + 039b9e4 commit d4b6464

File tree

4 files changed

+39
-6
lines changed

4 files changed

+39
-6
lines changed

test/pdfs/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,3 +762,4 @@
762762
!extract_link.pdf
763763
!two_paragraphs.pdf
764764
!paragraph_and_link.pdf
765+
!issue20225.pdf

test/pdfs/issue20225.pdf

9.07 KB
Binary file not shown.

test/unit/pdf_find_controller_spec.js

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -634,8 +634,8 @@ describe("pdf_find_controller", function () {
634634
pageIndex: 0,
635635
matchIndex: 0,
636636
},
637-
pageMatches: [[1497]],
638-
pageMatchesLength: [[25]],
637+
pageMatches: [[1498]],
638+
pageMatchesLength: [[24]],
639639
});
640640
});
641641

@@ -1138,6 +1138,26 @@ describe("pdf_find_controller", function () {
11381138
});
11391139
});
11401140

1141+
it("performs a search with a group of punctuation signs", async () => {
1142+
const { eventBus, pdfFindController } =
1143+
await initPdfFindController("issue20225.pdf");
1144+
1145+
await testSearch({
1146+
eventBus,
1147+
pdfFindController,
1148+
state: {
1149+
query: "....",
1150+
},
1151+
matchesPerPage: [1],
1152+
selectedMatch: {
1153+
pageIndex: 0,
1154+
matchIndex: 0,
1155+
},
1156+
pageMatches: [[8]],
1157+
pageMatchesLength: [[4]],
1158+
});
1159+
});
1160+
11411161
describe("custom matcher", () => {
11421162
it("calls to the matcher with the right arguments", async () => {
11431163
const QUERY = "Foo bar";

web/pdf_find_controller.js

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ let DIACRITICS_EXCEPTION_STR; // Lazily initialized, see below.
7878

7979
const DIACRITICS_REG_EXP = /\p{M}+/gu;
8080
const SPECIAL_CHARS_REG_EXP =
81-
/([.*+?^${}()|[\]\\])|(\p{P})|(\s+)|(\p{M})|(\p{L})/gu;
81+
/([*+^${}()|[\]\\])|(\p{P}+)|(\s+)|(\p{M})|(\p{L})/gu;
8282
const NOT_DIACRITIC_FROM_END_REG_EXP = /([^\p{M}])\p{M}*$/u;
8383
const NOT_DIACRITIC_FROM_START_REG_EXP = /^\p{M}*([^\p{M}])/u;
8484

@@ -708,6 +708,18 @@ class PDFFindController {
708708
#convertToRegExpString(query, hasDiacritics) {
709709
const { matchDiacritics } = this.#state;
710710
let isUnicode = false;
711+
const addExtraWhitespaces = (original, fixed) => {
712+
if (original === query) {
713+
return fixed;
714+
}
715+
if (query.startsWith(original)) {
716+
return `${fixed}[ ]*`;
717+
}
718+
if (query.endsWith(original)) {
719+
return `[ ]*${fixed}`;
720+
}
721+
return `[ ]*${fixed}[ ]*`;
722+
};
711723
query = query.replaceAll(
712724
SPECIAL_CHARS_REG_EXP,
713725
(
@@ -723,11 +735,11 @@ class PDFFindController {
723735

724736
if (p1) {
725737
// Escape characters like *+?... to not interfere with regexp syntax.
726-
return `[ ]*\\${p1}[ ]*`;
738+
return addExtraWhitespaces(p1, `\\${p1}`);
727739
}
728740
if (p2) {
729-
// Allow whitespaces around punctuation signs.
730-
return `[ ]*${p2}[ ]*`;
741+
// Allow whitespaces around group of punctuation signs.
742+
return addExtraWhitespaces(p2, p2.replaceAll(/[.?]/g, "\\$&"));
731743
}
732744
if (p3) {
733745
// Replace spaces by \s+ to be sure to match any spaces.

0 commit comments

Comments
 (0)