Search for string in CSV Files using python and write the results

Question

   #!/usr/bin/python

import csv
import re

string_1 = ('OneTouch AT')
string_2 = ('LinkRunner AT')
string_3 = ('AirCheck')

#searched = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
print "hello Pythong! "

#def does_match(string):
#    stringl = string.lower()
#    return any(s in stringl for s in searched)

inFile  = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile  = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)

for row in reader:
    found = False
    for col in row:
        if col in [string_1, string_2, string_3] and not found:
            writer.writerow(row)
            found = True


#for row in reader:
 #   if any(does_match(col) for col in row):
  #      writer.writerow(row[:2]) # write only 2 first columns

inFile.close()
outFile.close()

I'm trying to figure out how to search a CSV file for 3 items. If those items exist print the row. Ideally I would like only Columns 1 and 3 to print to a new file.

Sample Data File

LinkRunner AT Video,10,20
Wireless Performance Video OneTouch AT,1,2
Wired OneTouch AT,200,300
LinkRunner AT,200,300
AirCheck,200,300

The only thing that is not working currently is I would like to print out only the 1st and 3rd column. Also ideally it would printer out the aircheck lines first, then Linkrunner lines then onetouch at lines. — Kris
– Kris, Commented Feb 13, 2014 at 21:33

Burhan Khalid · Accepted Answer · 2014-02-14 13:37:54Z

3

I'm trying to figure out how to search a CSV file for 3 items. If those items exist print the row. Ideally I would like only Columns 1 and 3 to print to a new file.

Try this:

import csv

search_for = ['OneTouch AT','LinkRunner AT','AirCheck']

with open('in.csv') as inf, open('out.csv','w') as outf:
    reader = csv.reader(inf)
    writer = csv.writer(outf, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    for row in reader:
        if row[0] in search_for:
            print('Found: {}'.format(row))
            writer.writerow(row)

answered Feb 14, 2014 at 13:37

Burhan Khalid

175k20 gold badges254 silver badges291 bronze badges

Sign up to request clarification or add additional context in comments.

Comments

Ryder Brooks · Accepted Answer · 2014-02-13 22:49:10Z

0

#!/usr/bin/python

import csv
import numpy as np

class search_csv(object):
    def __init__(self, infile, outfile):
        infile = open(infile, 'rb')
        read_infile = [i for i in csv.reader(infile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)]
        self.non_numpy_data = read_infile
        self.data = np.array(read_infile, dtype=None)
        self.outfile = open(outfile, 'wb')
        self.writer_ = csv.writer(self.outfile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)

    def write_to(self, matched_values):
        self.writer_.writerows(matched_values)
        print ' Matched Values Written '
        return True

    def searcher(self, items, return_cols=[0,2]): ##// items should be passed as list -> ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
        find_these = np.array(items, dtype=None)
        matching_y = np.in1d(self.data, find_these).reshape(self.data.shape).nonzero()[0]
        matching_data = self.data[matching_y][:,return_cols]
        self.write_to(matching_data)
        self.outfile.close()
        return True

    def non_numpy_search(self, items, return_cols=[0,2]):
        lst = []
        for i in self.non_numpy_data:
            for ii in items:
                if ii in i:
                    z = []
                    for idx in return_cols:
                        z.append(i[idx])
                    lst.append(z)
                break
        self.write_to(lst)
        return True


### now use the class ###

SEARCHING_FOR = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']

IN_FILE = 'in_file.csv'
OUT_FILE = 'out_file.csv'

non_numpy_search(IN_FILE, OUT_FILE).non_numpy_search(SEARCHING_FOR)

By the phrasing of your question I'm assuming you just want to complete the task at hand and don't really care how. So copy and paste this in and use your data file as the 'IN_FILE' value and the file name you want to write to as the 'OUT_FILE' value. Place the values you want to search for in the 'SEARCHING_FOR' list as you're done.

Things to note.... SEARCHING_FOR should be a list.

the values in SEARCHING_FOR are matched EXACTLY so 'A' will not match 'a'. If you want a to use a regex or something more complex let me know.

In function 'non_numpy_search' there is a 'return_cols' parameter. It defaults to the first and 3rd column.

If you don't have numpy let me know.

edited Feb 13, 2014 at 22:49

answered Feb 13, 2014 at 22:07

Ryder Brooks

2,1392 gold badges22 silver badges32 bronze badges

7 Comments

Kris Over a year ago

I got the following message: Traceback (most recent call last): File "csvparserV2.0.py", line 34, in <module> search_csv(IN_FILE, OUT_FILE).searcher(SEARCHING_FOR) File "csvparserV2.0.py", line 21, in searcher matching_y = np.in1d(self.data, find_these).reshape(self.data.shape).nonzero()[0] File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/numpy/lib/arraysetops.py", line 335, in in1d order = ar.argsort(kind='mergesort') TypeError: requested sort not available for type

Ryder Brooks Over a year ago

do you know what version of numpy you're running? print np.__version__

Ryder Brooks Over a year ago

if the file isn't ginormous the edit I just posted should work fine.

Ryder Brooks Over a year ago

also with the 'non_numpy_search' if you try to match a row based on more than one value from 'SEARCHING_FOR' you are going to have to modify the code.

Kris Over a year ago

>>> import numpy >>> print numpy.__version__ 1.6.2 >>>

|

Kris · Accepted Answer · 2014-02-14 13:30:51Z

#!/usr/bin/python

import csv
import re
import sys
import gdata.docs.service


#string_1 = ('OneTouch AT')
#string_2 = ('LinkRunner AT')
#string_3 = ('AirCheck')

searched = ['aircheck', 'linkrunner at', 'onetouch at']

def find_group(row):
    """Return the group index of a row
        0 if the row contains searched[0]
        1 if the row contains searched[1]
        etc
        -1 if not found
    """
    for col in row:
        col = col.lower()
        for j, s in enumerate(searched):
            if s in col:
                return j
        return -1

def does_match(string):
    stringl = string.lower()
    return any(s in stringl for s in searched)

#Opens Input file for read and output file to write.
inFile  = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile  = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)


#for row in reader:
#   found = False
#   for col in row:
#       if col in [string_1, string_2, string_3] and not found:
#           writer.writerow(row)
#           found = True



"""Built a list of items to sort. If row 12 contains 'LinkRunner AT' (group 1),
    one stores a triple (1, 12, row)
    When the triples are sorted later, all rows in group 0 will come first, then
    all rows in group 1, etc.
"""
stored = []
for i, row in enumerate(reader):
    g = find_group(row)
    if g >= 0:
        stored.append((g, i, row))
stored.sort()

for g, i, row in stored:
    writer.writerow(tuple(row[k] for k in (0,2))) # output col 1 & 5

#for row in reader:
 #   if any(does_match(col) for col in row):
  #      writer.writerow(row[:2]) # write only 2 first columns

# Closing Input and Output files.
inFile.close()
outFile.close()

After some google searches a bit more reading I figured out another way to do this. :-) Goal has been accomplished. Next goal would be to update a spreadsheet on google docs.

Collectives™ on Stack Overflow

Search for string in CSV Files using python and write the results

3 Answers 3

Comments

7 Comments

1 Comment

Your Answer

Linked

Hot Network Questions

Collectives™ on Stack Overflow

3 Answers 3

Comments

7 Comments

1 Comment

Your Answer

Sign up or log in

Post as a guest

Linked

Related