Skip to content

Commit dd29900

Browse files
committed
Test to ensure that charset detection from <meta> tag works when preceeded by an irrelevant <meta> tag.
1 parent c18e2ac commit dd29900

File tree

2 files changed

+12
-0
lines changed

2 files changed

+12
-0
lines changed

src/test/java/org/jsoup/integration/ParseTest.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,17 @@ public void testBaidu() throws IOException {
108108
doc.select("title").outerHtml());
109109
}
110110

111+
@Test
112+
public void testBaiduVariant() throws IOException {
113+
// tests <meta charset> when preceded by another <meta>
114+
File in = getFile("/htmltests/baidu-variant.html");
115+
Document doc = Jsoup.parse(in, null,
116+
"http://www.baidu.com/"); // http charset is gb2312, but NOT specifying it, to test http-equiv parse
117+
// check auto-detect from meta
118+
assertEquals("GB2312", doc.outputSettings().charset().displayName());
119+
assertEquals("<title>百度一下,你就知道</title>", doc.select("title").outerHtml());
120+
}
121+
111122
@Test
112123
public void testHtml5Charset() throws IOException {
113124
// test that <meta charset="gb2312"> works
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<!doctype html><html><head><meta name="test" content="Test"><meta http-equiv="Content-Type" content="text/html;charset=gb2312"><title>°Ù¶Èһϣ¬Äã¾ÍÖªµÀ</title>

0 commit comments

Comments
 (0)