1
   #!/usr/bin/python

import csv
import re

string_1 = ('OneTouch AT')
string_2 = ('LinkRunner AT')
string_3 = ('AirCheck')

#searched = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
print "hello Pythong! "

#def does_match(string):
#    stringl = string.lower()
#    return any(s in stringl for s in searched)

inFile  = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile  = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)

for row in reader:
    found = False
    for col in row:
        if col in [string_1, string_2, string_3] and not found:
            writer.writerow(row)
            found = True


#for row in reader:
 #   if any(does_match(col) for col in row):
  #      writer.writerow(row[:2]) # write only 2 first columns

inFile.close()
outFile.close()

I'm trying to figure out how to search a CSV file for 3 items. If those items exist print the row. Ideally I would like only Columns 1 and 3 to print to a new file.

Sample Data File

LinkRunner AT Video,10,20
Wireless Performance Video OneTouch AT,1,2
Wired OneTouch AT,200,300
LinkRunner AT,200,300
AirCheck,200,300
1
  • The only thing that is not working currently is I would like to print out only the 1st and 3rd column. Also ideally it would printer out the aircheck lines first, then Linkrunner lines then onetouch at lines. Commented Feb 13, 2014 at 21:33

3 Answers 3

3

I'm trying to figure out how to search a CSV file for 3 items. If those items exist print the row. Ideally I would like only Columns 1 and 3 to print to a new file.

Try this:

import csv

search_for = ['OneTouch AT','LinkRunner AT','AirCheck']

with open('in.csv') as inf, open('out.csv','w') as outf:
    reader = csv.reader(inf)
    writer = csv.writer(outf, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    for row in reader:
        if row[0] in search_for:
            print('Found: {}'.format(row))
            writer.writerow(row)
Sign up to request clarification or add additional context in comments.

Comments

0
#!/usr/bin/python

import csv
import numpy as np

class search_csv(object):
    def __init__(self, infile, outfile):
        infile = open(infile, 'rb')
        read_infile = [i for i in csv.reader(infile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)]
        self.non_numpy_data = read_infile
        self.data = np.array(read_infile, dtype=None)
        self.outfile = open(outfile, 'wb')
        self.writer_ = csv.writer(self.outfile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)

    def write_to(self, matched_values):
        self.writer_.writerows(matched_values)
        print ' Matched Values Written '
        return True

    def searcher(self, items, return_cols=[0,2]): ##// items should be passed as list -> ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
        find_these = np.array(items, dtype=None)
        matching_y = np.in1d(self.data, find_these).reshape(self.data.shape).nonzero()[0]
        matching_data = self.data[matching_y][:,return_cols]
        self.write_to(matching_data)
        self.outfile.close()
        return True

    def non_numpy_search(self, items, return_cols=[0,2]):
        lst = []
        for i in self.non_numpy_data:
            for ii in items:
                if ii in i:
                    z = []
                    for idx in return_cols:
                        z.append(i[idx])
                    lst.append(z)
                break
        self.write_to(lst)
        return True


### now use the class ###

SEARCHING_FOR = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']

IN_FILE = 'in_file.csv'
OUT_FILE = 'out_file.csv'

non_numpy_search(IN_FILE, OUT_FILE).non_numpy_search(SEARCHING_FOR)

By the phrasing of your question I'm assuming you just want to complete the task at hand and don't really care how. So copy and paste this in and use your data file as the 'IN_FILE' value and the file name you want to write to as the 'OUT_FILE' value. Place the values you want to search for in the 'SEARCHING_FOR' list as you're done.

Things to note.... SEARCHING_FOR should be a list.

the values in SEARCHING_FOR are matched EXACTLY so 'A' will not match 'a'. If you want a to use a regex or something more complex let me know.

In function 'non_numpy_search' there is a 'return_cols' parameter. It defaults to the first and 3rd column.

If you don't have numpy let me know.

7 Comments

I got the following message: Traceback (most recent call last): File "csvparserV2.0.py", line 34, in <module> search_csv(IN_FILE, OUT_FILE).searcher(SEARCHING_FOR) File "csvparserV2.0.py", line 21, in searcher matching_y = np.in1d(self.data, find_these).reshape(self.data.shape).nonzero()[0] File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/numpy/lib/arraysetops.py", line 335, in in1d order = ar.argsort(kind='mergesort') TypeError: requested sort not available for type
do you know what version of numpy you're running? print np.__version__
if the file isn't ginormous the edit I just posted should work fine.
also with the 'non_numpy_search' if you try to match a row based on more than one value from 'SEARCHING_FOR' you are going to have to modify the code.
>>> import numpy >>> print numpy.__version__ 1.6.2 >>>
|
0
#!/usr/bin/python

import csv
import re
import sys
import gdata.docs.service


#string_1 = ('OneTouch AT')
#string_2 = ('LinkRunner AT')
#string_3 = ('AirCheck')

searched = ['aircheck', 'linkrunner at', 'onetouch at']

def find_group(row):
    """Return the group index of a row
        0 if the row contains searched[0]
        1 if the row contains searched[1]
        etc
        -1 if not found
    """
    for col in row:
        col = col.lower()
        for j, s in enumerate(searched):
            if s in col:
                return j
        return -1

def does_match(string):
    stringl = string.lower()
    return any(s in stringl for s in searched)

#Opens Input file for read and output file to write.
inFile  = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile  = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)


#for row in reader:
#   found = False
#   for col in row:
#       if col in [string_1, string_2, string_3] and not found:
#           writer.writerow(row)
#           found = True



"""Built a list of items to sort. If row 12 contains 'LinkRunner AT' (group 1),
    one stores a triple (1, 12, row)
    When the triples are sorted later, all rows in group 0 will come first, then
    all rows in group 1, etc.
"""
stored = []
for i, row in enumerate(reader):
    g = find_group(row)
    if g >= 0:
        stored.append((g, i, row))
stored.sort()

for g, i, row in stored:
    writer.writerow(tuple(row[k] for k in (0,2))) # output col 1 & 5

#for row in reader:
 #   if any(does_match(col) for col in row):
  #      writer.writerow(row[:2]) # write only 2 first columns

# Closing Input and Output files.
inFile.close()
outFile.close()

1 Comment

After some google searches a bit more reading I figured out another way to do this. :-) Goal has been accomplished. Next goal would be to update a spreadsheet on google docs.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.