@@ -149,32 +149,8 @@ impl CharReader {
149149 Encoding :: Unknown | Encoding :: Utf16 => {
150150 buf[ pos] = next;
151151 pos += 1 ;
152-
153- // sniff BOM
154- if pos <= 3 && buf[ ..pos] == [ 0xEF , 0xBB , 0xBF ] [ ..pos] {
155- if pos == 3 && self . encoding != Encoding :: Utf16 {
156- pos = 0 ;
157- self . encoding = Encoding :: Utf8 ;
158- }
159- } else if pos <= 2 && buf[ ..pos] == [ 0xFE , 0xFF ] [ ..pos] {
160- if pos == 2 {
161- pos = 0 ;
162- self . encoding = Encoding :: Utf16Be ;
163- }
164- } else if pos <= 2 && buf[ ..pos] == [ 0xFF , 0xFE ] [ ..pos] {
165- if pos == 2 {
166- pos = 0 ;
167- self . encoding = Encoding :: Utf16Le ;
168- }
169- } else if pos == 1 && self . encoding == Encoding :: Utf16 {
170- // sniff ASCII char in UTF-16
171- self . encoding = if next == 0 { Encoding :: Utf16Be } else { Encoding :: Utf16Le } ;
172- } else {
173- // UTF-8 is the default, but XML decl can change it to other 8-bit encoding
174- self . encoding = Encoding :: Default ;
175- if pos == 1 && next. is_ascii ( ) {
176- return Ok ( Some ( next. into ( ) ) ) ;
177- }
152+ if let Some ( value) = self . sniff_bom ( & buf[ ..pos] , & mut pos) {
153+ return value;
178154 }
179155 } ,
180156 Encoding :: Utf16Be => {
@@ -206,6 +182,37 @@ impl CharReader {
206182 }
207183 }
208184 }
185+
186+ #[ cold]
187+ fn sniff_bom ( & mut self , buf : & [ u8 ] , pos : & mut usize ) -> Option < Result < Option < char > , CharReadError > > {
188+ // sniff BOM
189+ if buf. len ( ) <= 3 && [ 0xEF , 0xBB , 0xBF ] . starts_with ( buf) {
190+ if buf. len ( ) == 3 && self . encoding != Encoding :: Utf16 {
191+ * pos = 0 ;
192+ self . encoding = Encoding :: Utf8 ;
193+ }
194+ } else if buf. len ( ) <= 2 && [ 0xFE , 0xFF ] . starts_with ( buf) {
195+ if buf. len ( ) == 2 {
196+ * pos = 0 ;
197+ self . encoding = Encoding :: Utf16Be ;
198+ }
199+ } else if buf. len ( ) <= 2 && [ 0xFF , 0xFE ] . starts_with ( buf) {
200+ if buf. len ( ) == 2 {
201+ * pos = 0 ;
202+ self . encoding = Encoding :: Utf16Le ;
203+ }
204+ } else if buf. len ( ) == 1 && self . encoding == Encoding :: Utf16 {
205+ // sniff ASCII char in UTF-16
206+ self . encoding = if buf[ 0 ] == 0 { Encoding :: Utf16Be } else { Encoding :: Utf16Le } ;
207+ } else {
208+ // UTF-8 is the default, but XML decl can change it to other 8-bit encoding
209+ self . encoding = Encoding :: Default ;
210+ if buf. len ( ) == 1 && buf[ 0 ] . is_ascii ( ) {
211+ return Some ( Ok ( Some ( buf[ 0 ] . into ( ) ) ) ) ;
212+ }
213+ }
214+ None
215+ }
209216}
210217
211218#[ cfg( test) ]
0 commit comments