Skip to content

Commit 1387872

Browse files
committed
Mark BOM sniff as cold
1 parent 6bb2e0a commit 1387872

File tree

1 file changed

+33
-26
lines changed

1 file changed

+33
-26
lines changed

src/util.rs

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -149,32 +149,8 @@ impl CharReader {
149149
Encoding::Unknown | Encoding::Utf16 => {
150150
buf[pos] = next;
151151
pos += 1;
152-
153-
// sniff BOM
154-
if pos <= 3 && buf[..pos] == [0xEF, 0xBB, 0xBF][..pos] {
155-
if pos == 3 && self.encoding != Encoding::Utf16 {
156-
pos = 0;
157-
self.encoding = Encoding::Utf8;
158-
}
159-
} else if pos <= 2 && buf[..pos] == [0xFE, 0xFF][..pos] {
160-
if pos == 2 {
161-
pos = 0;
162-
self.encoding = Encoding::Utf16Be;
163-
}
164-
} else if pos <= 2 && buf[..pos] == [0xFF, 0xFE][..pos] {
165-
if pos == 2 {
166-
pos = 0;
167-
self.encoding = Encoding::Utf16Le;
168-
}
169-
} else if pos == 1 && self.encoding == Encoding::Utf16 {
170-
// sniff ASCII char in UTF-16
171-
self.encoding = if next == 0 { Encoding::Utf16Be } else { Encoding::Utf16Le };
172-
} else {
173-
// UTF-8 is the default, but XML decl can change it to other 8-bit encoding
174-
self.encoding = Encoding::Default;
175-
if pos == 1 && next.is_ascii() {
176-
return Ok(Some(next.into()));
177-
}
152+
if let Some(value) = self.sniff_bom(&buf[..pos], &mut pos) {
153+
return value;
178154
}
179155
},
180156
Encoding::Utf16Be => {
@@ -206,6 +182,37 @@ impl CharReader {
206182
}
207183
}
208184
}
185+
186+
#[cold]
187+
fn sniff_bom(&mut self, buf: &[u8], pos: &mut usize) -> Option<Result<Option<char>, CharReadError>> {
188+
// sniff BOM
189+
if buf.len() <= 3 && [0xEF, 0xBB, 0xBF].starts_with(buf) {
190+
if buf.len() == 3 && self.encoding != Encoding::Utf16 {
191+
*pos = 0;
192+
self.encoding = Encoding::Utf8;
193+
}
194+
} else if buf.len() <= 2 && [0xFE, 0xFF].starts_with(buf) {
195+
if buf.len() == 2 {
196+
*pos = 0;
197+
self.encoding = Encoding::Utf16Be;
198+
}
199+
} else if buf.len() <= 2 && [0xFF, 0xFE].starts_with(buf) {
200+
if buf.len() == 2 {
201+
*pos = 0;
202+
self.encoding = Encoding::Utf16Le;
203+
}
204+
} else if buf.len() == 1 && self.encoding == Encoding::Utf16 {
205+
// sniff ASCII char in UTF-16
206+
self.encoding = if buf[0] == 0 { Encoding::Utf16Be } else { Encoding::Utf16Le };
207+
} else {
208+
// UTF-8 is the default, but XML decl can change it to other 8-bit encoding
209+
self.encoding = Encoding::Default;
210+
if buf.len() == 1 && buf[0].is_ascii() {
211+
return Some(Ok(Some(buf[0].into())));
212+
}
213+
}
214+
None
215+
}
209216
}
210217

211218
#[cfg(test)]

0 commit comments

Comments
 (0)