Read the second row from CSV file to Python

Question

I have a csv file:

Index,X1,X2,X3,X4,X5,Y
1,-1.608052,-0.377992,1.204209,1.313808,1.218265,1
2,0.393766,0.630685,-1.222062,0.090558,0.015893,0
3,-0.466243,0.276972,2.519047,0.673745,0.16729,1
4,1.47121,-0.046791,-0.303291,-0.365437,1.989287,0
5,-1.672906,1.25588,-0.355706,0.123143,-2.241941,1

I want to create a classification system program and the Data is in the second row. I'm trying to get the data from the second row. I tried with next(list) like this:

def load_DataTrain(filename):
    try:
        with open(filename, newline='') as iFile:
            return list(reader(iFile, delimiter=','))
            next(list)
    except FileNotFoundError as e:
        raise e

But it doesn't work and i get an error because the program read from the first row. I didnt use pandas or csv.reader to read my csv. This is the code that i get from Divyesh GitHub:

from csv import reader
from sys import exit
from math import sqrt
from operator import itemgetter

def load_DataTrain(filename):
    try:
        with open(filename) as iFile:
            return list(reader(iFile, delimiter=','))
            next(list)
    except FileNotFoundError as e:
        raise e

def convert_to_float(DataTrain, mode):
    new_set = []
    try:
        if mode == 'training':
            for data in DataTrain:
                new_set.append([float(x) for x in data[:len(data)-1]] + [data[len(data)-1]])

        elif mode == 'test':
            for data in DataTrain:
                new_set.append([float(x) for x in data])

        else:
            print('Invalid mode, program will exit.')
            exit()

        return new_set

    except ValueError as v:
        print(v)
        print('Invalid data set format, program will exit.')
        exit()


def get_classes(training_set):
    return list(set([c[-1] for c in training_set]))


def find_neighbors(distances, k):
    return distances[0:k]


def find_response(neighbors, classes):
    votes = [0] * len(classes)

    for instance in neighbors:
        for ctr, c in enumerate(classes):
            if instance[-2] == c:
                votes[ctr] += 1

    return max(enumerate(votes), key=itemgetter(1))


def knn(training_set, test_set, k):
    distances = []
    dist = 0
    limit = len(training_set[0]) - 1

    # generate response classes from training data
    classes = get_classes(training_set)

    try:
        for test_instance in test_set:
            for row in training_set:
                for x, y in zip(row[:limit], test_instance):
                    dist += (x-y) * (x-y)
                distances.append(row + [sqrt(dist)])
                dist = 0

            distances.sort(key=itemgetter(len(distances[0])-1))

            # find k nearest neighbors
            neighbors = find_neighbors(distances, k)

            # get the class with maximum votes
            index, value = find_response(neighbors, classes)

            # Display prediction
            print('The predicted class for sample ' + str(test_instance) + ' is : ' + classes[index])
            print('Number of votes : ' + str(value) + ' out of ' + str(k))

            # empty the distance list
            distances.clear()

    except Exception as e:
        print(e)


def main():
    try:
        # get value of k
        k = int(input('Enter the value of k : '))

        # load the training and test data set
        training_file = input('Enter name of training data file : ')
        test_file = input('Enter name of test data file : ')
        training_set = convert_to_float(load_DataTrain(training_file), 'training')
        test_set = convert_to_float(load_DataTrain(test_file), 'test')

        if not training_set:
            print('Empty training set')

        elif not test_set:
            print('Empty test set')

        elif k > len(training_set):
            print('Expected number of neighbors is higher than number of training data instances')

        else:
            knn(training_set, test_set, k)

    except ValueError as v:
        print(v)

    except FileNotFoundError:
        print('File not found')


if __name__ == '__main__':
    main()

And the result is:

could not convert string to float: 'Index'

What should i supposed to do to read from the second row in csv file?

You can also use pandas library. It will a lot easier working with CSV — Hayat
– Hayat, Commented Dec 1, 2018 at 14:53

Srce Cde · Accepted Answer · 2018-12-01 15:21:11Z

Minor change in your function.

If you want to return only 2nd row then you can replace [1:] to [1] in the below code.

from csv import reader
def load_DataTrain(filename):
    try:
        with open(filename, newline='') as iris:
            # returning from 2nd row
            return list(reader(iris, delimiter=','))[1:]
    except FileNotFoundError as e:
        raise e
load_DataTrain("file.csv")

Output:

[['1', '-1.608052', '-0.377992', '1.204209', '1.313808', '1.218265', '1'],
 ['2', '0.393766', '0.630685', '-1.222062', '0.090558', '0.015893', '0'],
 ['3', '-0.466243', '0.276972', '2.519047', '0.673745', '0.16729', '1'],
 ['4', '1.47121', '-0.046791', '-0.303291', '-0.365437', '1.989287', '0'],
 ['5', '-1.672906', '1.25588', '-0.355706', '0.123143', '-2.241941', '1']]

An alternative using pandas

Change df.values.tolist() with df.iloc[0].values.tolist() for returning only 2nd row.

import pandas as pd
df = pd.read_csv("dummy.csv")
pprint(df.values.tolist())

Output:

[[1.0, -1.608052, -0.377992, 1.204209, 1.313808, 1.218265, 1.0],
 [2.0, 0.393766, 0.630685, -1.222062, 0.090558, 0.015893, 0.0],
 [3.0,
  -0.466243,
  0.276972,
  2.519047,
  0.6737449999999999,
  0.16729000000000002,
  1.0],
 [4.0,
  1.4712100000000001,
  -0.046791,
  -0.303291,
  -0.365437,
  1.9892869999999998,
  0.0],
 [5.0,
  -1.6729060000000002,
  1.2558799999999999,
  -0.355706,
  0.123143,
  -2.241941,
  1.0]]

samuq · Accepted Answer · 2018-12-01 14:50:28Z

3

def returnSecondRow(delimit):
    i=0
    with open(filename) as fh:
        reader = csv.reader(fh, delimiter=delimit)
        for row in reader:
            if i==1:
                # do nothing
                return row
            i += 1

answered Dec 1, 2018 at 14:50

samuq

3461 gold badge6 silver badges19 bronze badges

1 Comment

P.hunter Over a year ago

moreover you can just break out of the look after 2nd row. because the data is in second row.

Lukas Grotz · Accepted Answer · 2018-12-01 15:12:56Z

3

Since you‘re using the iris dataset in your example, i guess you‘re venturing into machine learning? If that‘s the case, I think it would be wiser to use pandas to read and process your .csv file.

import pandas as pd

df = pd.read_csv(filename)
row_two = df.iloc[1]

answered Dec 1, 2018 at 15:12

Lukas Grotz

864 bronze badges

Comments

Arnold · Accepted Answer · 2018-12-01 15:35:18Z

1

Using base Python (no imports)

f = open("Data.csv","r")
head = f.readline()
print(head)
for line in f:
    line = line.strip('\n')
    line = line.split(",")
    line =[float(i) for i in line]
    line[0] = int(line[0])
    line[-1] = int(line[-1])
    print(line, "and do something")

answered Dec 1, 2018 at 15:35

Arnold

1633 silver badges9 bronze badges

Collectives™ on Stack Overflow

Read the second row from CSV file to Python

4 Answers 4

Comments

1 Comment

Comments

Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

4 Answers 4

Comments

1 Comment

Comments

Comments

Your Answer

Sign up or log in

Post as a guest

Related