|
5 | 5 | import re |
6 | 6 | import collections |
7 | 7 | import numbers |
8 | | -import codecs |
9 | | -import csv |
10 | 8 | import types |
11 | 9 | from datetime import datetime, timedelta |
12 | 10 | from functools import partial |
|
19 | 17 | import pandas.lib as lib |
20 | 18 | import pandas.tslib as tslib |
21 | 19 | from pandas import compat |
22 | | -from pandas.compat import StringIO, BytesIO, range, long, u, zip, map, string_types, iteritems |
| 20 | +from pandas.compat import BytesIO, range, long, u, zip, map, string_types, iteritems |
23 | 21 | from pandas.core.dtypes import CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, DatetimeTZDtypeType |
24 | 22 | from pandas.core.config import get_option |
25 | 23 |
|
@@ -2808,154 +2806,6 @@ def _all_none(*args): |
2808 | 2806 | return True |
2809 | 2807 |
|
2810 | 2808 |
|
2811 | | -class UTF8Recoder: |
2812 | | - |
2813 | | - """ |
2814 | | - Iterator that reads an encoded stream and reencodes the input to UTF-8 |
2815 | | - """ |
2816 | | - |
2817 | | - def __init__(self, f, encoding): |
2818 | | - self.reader = codecs.getreader(encoding)(f) |
2819 | | - |
2820 | | - def __iter__(self): |
2821 | | - return self |
2822 | | - |
2823 | | - def read(self, bytes=-1): |
2824 | | - return self.reader.read(bytes).encode('utf-8') |
2825 | | - |
2826 | | - def readline(self): |
2827 | | - return self.reader.readline().encode('utf-8') |
2828 | | - |
2829 | | - def next(self): |
2830 | | - return next(self.reader).encode("utf-8") |
2831 | | - |
2832 | | - # Python 3 iterator |
2833 | | - __next__ = next |
2834 | | - |
2835 | | - |
2836 | | -def _get_handle(path, mode, encoding=None, compression=None): |
2837 | | - """Gets file handle for given path and mode. |
2838 | | - NOTE: Under Python 3.2, getting a compressed file handle means reading in |
2839 | | - the entire file, decompressing it and decoding it to ``str`` all at once |
2840 | | - and then wrapping it in a StringIO. |
2841 | | - """ |
2842 | | - if compression is not None: |
2843 | | - if encoding is not None and not compat.PY3: |
2844 | | - msg = 'encoding + compression not yet supported in Python 2' |
2845 | | - raise ValueError(msg) |
2846 | | - |
2847 | | - if compression == 'gzip': |
2848 | | - import gzip |
2849 | | - f = gzip.GzipFile(path, mode) |
2850 | | - elif compression == 'bz2': |
2851 | | - import bz2 |
2852 | | - f = bz2.BZ2File(path, mode) |
2853 | | - else: |
2854 | | - raise ValueError('Unrecognized compression type: %s' % |
2855 | | - compression) |
2856 | | - if compat.PY3: |
2857 | | - from io import TextIOWrapper |
2858 | | - f = TextIOWrapper(f, encoding=encoding) |
2859 | | - return f |
2860 | | - else: |
2861 | | - if compat.PY3: |
2862 | | - if encoding: |
2863 | | - f = open(path, mode, encoding=encoding) |
2864 | | - else: |
2865 | | - f = open(path, mode, errors='replace') |
2866 | | - else: |
2867 | | - f = open(path, mode) |
2868 | | - |
2869 | | - return f |
2870 | | - |
2871 | | - |
2872 | | -if compat.PY3: # pragma: no cover |
2873 | | - def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds): |
2874 | | - # ignore encoding |
2875 | | - return csv.reader(f, dialect=dialect, **kwds) |
2876 | | - |
2877 | | - def UnicodeWriter(f, dialect=csv.excel, encoding="utf-8", **kwds): |
2878 | | - return csv.writer(f, dialect=dialect, **kwds) |
2879 | | -else: |
2880 | | - class UnicodeReader: |
2881 | | - |
2882 | | - """ |
2883 | | - A CSV reader which will iterate over lines in the CSV file "f", |
2884 | | - which is encoded in the given encoding. |
2885 | | -
|
2886 | | - On Python 3, this is replaced (below) by csv.reader, which handles |
2887 | | - unicode. |
2888 | | - """ |
2889 | | - |
2890 | | - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): |
2891 | | - f = UTF8Recoder(f, encoding) |
2892 | | - self.reader = csv.reader(f, dialect=dialect, **kwds) |
2893 | | - |
2894 | | - def next(self): |
2895 | | - row = next(self.reader) |
2896 | | - return [compat.text_type(s, "utf-8") for s in row] |
2897 | | - |
2898 | | - # python 3 iterator |
2899 | | - __next__ = next |
2900 | | - |
2901 | | - def __iter__(self): # pragma: no cover |
2902 | | - return self |
2903 | | - |
2904 | | - class UnicodeWriter: |
2905 | | - |
2906 | | - """ |
2907 | | - A CSV writer which will write rows to CSV file "f", |
2908 | | - which is encoded in the given encoding. |
2909 | | - """ |
2910 | | - |
2911 | | - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): |
2912 | | - # Redirect output to a queue |
2913 | | - self.queue = StringIO() |
2914 | | - self.writer = csv.writer(self.queue, dialect=dialect, **kwds) |
2915 | | - self.stream = f |
2916 | | - self.encoder = codecs.getincrementalencoder(encoding)() |
2917 | | - self.quoting = kwds.get("quoting", None) |
2918 | | - |
2919 | | - def writerow(self, row): |
2920 | | - def _check_as_is(x): |
2921 | | - return (self.quoting == csv.QUOTE_NONNUMERIC and |
2922 | | - is_number(x)) or isinstance(x, str) |
2923 | | - |
2924 | | - row = [x if _check_as_is(x) |
2925 | | - else pprint_thing(x).encode('utf-8') for x in row] |
2926 | | - |
2927 | | - self.writer.writerow([s for s in row]) |
2928 | | - # Fetch UTF-8 output from the queue ... |
2929 | | - data = self.queue.getvalue() |
2930 | | - data = data.decode("utf-8") |
2931 | | - # ... and reencode it into the target encoding |
2932 | | - data = self.encoder.encode(data) |
2933 | | - # write to the target stream |
2934 | | - self.stream.write(data) |
2935 | | - # empty queue |
2936 | | - self.queue.truncate(0) |
2937 | | - |
2938 | | - def writerows(self, rows): |
2939 | | - def _check_as_is(x): |
2940 | | - return (self.quoting == csv.QUOTE_NONNUMERIC and |
2941 | | - is_number(x)) or isinstance(x, str) |
2942 | | - |
2943 | | - for i, row in enumerate(rows): |
2944 | | - rows[i] = [x if _check_as_is(x) |
2945 | | - else pprint_thing(x).encode('utf-8') for x in row] |
2946 | | - |
2947 | | - self.writer.writerows([[s for s in row] for row in rows]) |
2948 | | - # Fetch UTF-8 output from the queue ... |
2949 | | - data = self.queue.getvalue() |
2950 | | - data = data.decode("utf-8") |
2951 | | - # ... and reencode it into the target encoding |
2952 | | - data = self.encoder.encode(data) |
2953 | | - # write to the target stream |
2954 | | - self.stream.write(data) |
2955 | | - # empty queue |
2956 | | - self.queue.truncate(0) |
2957 | | - |
2958 | | - |
2959 | 2809 | def get_dtype_kinds(l): |
2960 | 2810 | """ |
2961 | 2811 | Parameters |
|
0 commit comments