Skip to main content
Question Protected by CommunityBot
removed unnecessary words, improved formatting.
Source Link

Any help anyone can give me would be really appreciated! I've already scoured similar error messages, but none of the ones I found really answer my problem. I'm pretty stuck!

Here is what my arrays look like:

>>> X = np.array([[df.tran_cityname, df.tran_signupos, df.tran_signupchannel, df.tran_vmake, df.tran_vmodel, df.tran_vyear]])

>>> Y = np.array(df['completed_trip_status'].values.tolist())

>>> X
array([[[   1,    1,    2,    3,    1,    1,    1,    1,    1,    3,    1,
            3,    1,    1,    1,    1,    2,    1,    3,    1,    3,    3,
            2,    3,    3,    1,    1,    1,    1],
        [   0,    5,    5,    1,    1,    1,    2,    2,    0,    2,    2,
            3,    1,    2,    5,    5,    2,    1,    2,    2,    2,    2,
            2,    4,    3,    5,    1,    0,    1],
        [   2,    2,    1,    3,    3,    3,    2,    3,    3,    2,    3,
            2,    3,    2,    2,    3,    2,    2,    1,    1,    2,    1,
            2,    2,    1,    2,    3,    1,    1],
        [   0,    0,    0,   42,   17,    8,   42,    0,    0,    0,   22,
            0,   22,    0,    0,   42,    0,    0,    0,    0,   11,    0,
            0,    0,    0,    0,   28,   17,   18],
        [   0,    0,    0,   70,  291,   88,  234,    0,    0,    0,  222,
            0,  222,    0,    0,  234,    0,    0,    0,    0,   89,    0,
            0,    0,    0,    0,   40,  291,  131],
        [   0,    0,    0, 2016, 2016, 2006, 2014,    0,    0,    0, 2015,
            0, 2015,    0,    0, 2015,    0,    0,    0,    0, 2015,    0,
            0,    0,    0,    0, 2016, 2016, 2010]]])

>>> Y
array(['NO', 'NO', 'NO', 'YES', 'NO', 'NO', 'YES', 'NO', 'NO', 'NO', 'NO',
       'NO', 'YES', 'NO', 'NO', 'YES', 'NO', 'NO', 'NO', 'NO', 'NO', 'NO',
       'NO', 'NO', 'NO', 'NO', 'NO', 'NO', 'NO'], 
      dtype='|S3')

>>> X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

Traceback (most recent call last):

  File "<stdin>", line 1, in <module>
  File "/Library/Python/2.7/site-packages/sklearn/cross_validation.py", line 2039, in train_test_split
    arrays = indexable(*arrays)
  File "/Library/Python/2.7/site-packages/sklearn/utils/validation.py", line 206, in indexable
    check_consistent_length(*result)
  File "/Library/Python/2.7/site-packages/sklearn/utils/validation.py", line 181, in check_consistent_length
    " samples: %r" % [int(l) for l in lengths])

ValueError: Found input variables with inconsistent numbers of samples: [1, 29]

Traceback (most recent call last):

  File "<stdin>", line 1, in <module>
  File "/Library/Python/2.7/site-packages/sklearn/cross_validation.py", line

2039, in train_test_split arrays = indexable(*arrays) File "/Library/Python/2.7/site-packages/sklearn/utils/validation.py", line 206, in indexable check_consistent_length(*result) File "/Library/Python/2.7/site-packages/sklearn/utils/validation.py", line 181, in check_consistent_length " samples: %r" % [int(l) for l in lengths])

ValueError: Found input variables with inconsistent numbers of samples: [1, 29]

Any help anyone can give me would be really appreciated! I've already scoured similar error messages, but none of the ones I found really answer my problem. I'm pretty stuck!

Here is what my arrays look like:

>>> X = np.array([[df.tran_cityname, df.tran_signupos, df.tran_signupchannel, df.tran_vmake, df.tran_vmodel, df.tran_vyear]])

>>> Y = np.array(df['completed_trip_status'].values.tolist())

>>> X
array([[[   1,    1,    2,    3,    1,    1,    1,    1,    1,    3,    1,
            3,    1,    1,    1,    1,    2,    1,    3,    1,    3,    3,
            2,    3,    3,    1,    1,    1,    1],
        [   0,    5,    5,    1,    1,    1,    2,    2,    0,    2,    2,
            3,    1,    2,    5,    5,    2,    1,    2,    2,    2,    2,
            2,    4,    3,    5,    1,    0,    1],
        [   2,    2,    1,    3,    3,    3,    2,    3,    3,    2,    3,
            2,    3,    2,    2,    3,    2,    2,    1,    1,    2,    1,
            2,    2,    1,    2,    3,    1,    1],
        [   0,    0,    0,   42,   17,    8,   42,    0,    0,    0,   22,
            0,   22,    0,    0,   42,    0,    0,    0,    0,   11,    0,
            0,    0,    0,    0,   28,   17,   18],
        [   0,    0,    0,   70,  291,   88,  234,    0,    0,    0,  222,
            0,  222,    0,    0,  234,    0,    0,    0,    0,   89,    0,
            0,    0,    0,    0,   40,  291,  131],
        [   0,    0,    0, 2016, 2016, 2006, 2014,    0,    0,    0, 2015,
            0, 2015,    0,    0, 2015,    0,    0,    0,    0, 2015,    0,
            0,    0,    0,    0, 2016, 2016, 2010]]])

>>> Y
array(['NO', 'NO', 'NO', 'YES', 'NO', 'NO', 'YES', 'NO', 'NO', 'NO', 'NO',
       'NO', 'YES', 'NO', 'NO', 'YES', 'NO', 'NO', 'NO', 'NO', 'NO', 'NO',
       'NO', 'NO', 'NO', 'NO', 'NO', 'NO', 'NO'], 
      dtype='|S3')

>>> X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

Traceback (most recent call last):

  File "<stdin>", line 1, in <module>
  File "/Library/Python/2.7/site-packages/sklearn/cross_validation.py", line 2039, in train_test_split
    arrays = indexable(*arrays)
  File "/Library/Python/2.7/site-packages/sklearn/utils/validation.py", line 206, in indexable
    check_consistent_length(*result)
  File "/Library/Python/2.7/site-packages/sklearn/utils/validation.py", line 181, in check_consistent_length
    " samples: %r" % [int(l) for l in lengths])

ValueError: Found input variables with inconsistent numbers of samples: [1, 29]

Here is what my arrays look like:

>>> X = np.array([[df.tran_cityname, df.tran_signupos, df.tran_signupchannel, df.tran_vmake, df.tran_vmodel, df.tran_vyear]])

>>> Y = np.array(df['completed_trip_status'].values.tolist())

>>> X
array([[[   1,    1,    2,    3,    1,    1,    1,    1,    1,    3,    1,
            3,    1,    1,    1,    1,    2,    1,    3,    1,    3,    3,
            2,    3,    3,    1,    1,    1,    1],
        [   0,    5,    5,    1,    1,    1,    2,    2,    0,    2,    2,
            3,    1,    2,    5,    5,    2,    1,    2,    2,    2,    2,
            2,    4,    3,    5,    1,    0,    1],
        [   2,    2,    1,    3,    3,    3,    2,    3,    3,    2,    3,
            2,    3,    2,    2,    3,    2,    2,    1,    1,    2,    1,
            2,    2,    1,    2,    3,    1,    1],
        [   0,    0,    0,   42,   17,    8,   42,    0,    0,    0,   22,
            0,   22,    0,    0,   42,    0,    0,    0,    0,   11,    0,
            0,    0,    0,    0,   28,   17,   18],
        [   0,    0,    0,   70,  291,   88,  234,    0,    0,    0,  222,
            0,  222,    0,    0,  234,    0,    0,    0,    0,   89,    0,
            0,    0,    0,    0,   40,  291,  131],
        [   0,    0,    0, 2016, 2016, 2006, 2014,    0,    0,    0, 2015,
            0, 2015,    0,    0, 2015,    0,    0,    0,    0, 2015,    0,
            0,    0,    0,    0, 2016, 2016, 2010]]])

>>> Y
array(['NO', 'NO', 'NO', 'YES', 'NO', 'NO', 'YES', 'NO', 'NO', 'NO', 'NO',
       'NO', 'YES', 'NO', 'NO', 'YES', 'NO', 'NO', 'NO', 'NO', 'NO', 'NO',
       'NO', 'NO', 'NO', 'NO', 'NO', 'NO', 'NO'], 
      dtype='|S3')

>>> X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

Traceback (most recent call last):

  File "<stdin>", line 1, in <module>
  File "/Library/Python/2.7/site-packages/sklearn/cross_validation.py", line

2039, in train_test_split arrays = indexable(*arrays) File "/Library/Python/2.7/site-packages/sklearn/utils/validation.py", line 206, in indexable check_consistent_length(*result) File "/Library/Python/2.7/site-packages/sklearn/utils/validation.py", line 181, in check_consistent_length " samples: %r" % [int(l) for l in lengths])

ValueError: Found input variables with inconsistent numbers of samples: [1, 29]

Need help with Sci-Kit-Learn - train_test_split() error: Found input variables with inconsistent numbers of samples

Source Link
josh_gray
  • 663
  • 1
  • 5
  • 4

Need help with Sci-Kit-Learn - Found input variables with inconsistent numbers of samples

Fairly new to Python but building out my first RF model based on some classification data. I've converted all of the labels into int64 numerical data and loaded into X and Y as a numpy array, but I am hitting an error when I am trying to train the models.

Any help anyone can give me would be really appreciated! I've already scoured similar error messages, but none of the ones I found really answer my problem. I'm pretty stuck!

Here is what my arrays look like:

>>> X = np.array([[df.tran_cityname, df.tran_signupos, df.tran_signupchannel, df.tran_vmake, df.tran_vmodel, df.tran_vyear]])

>>> Y = np.array(df['completed_trip_status'].values.tolist())

>>> X
array([[[   1,    1,    2,    3,    1,    1,    1,    1,    1,    3,    1,
            3,    1,    1,    1,    1,    2,    1,    3,    1,    3,    3,
            2,    3,    3,    1,    1,    1,    1],
        [   0,    5,    5,    1,    1,    1,    2,    2,    0,    2,    2,
            3,    1,    2,    5,    5,    2,    1,    2,    2,    2,    2,
            2,    4,    3,    5,    1,    0,    1],
        [   2,    2,    1,    3,    3,    3,    2,    3,    3,    2,    3,
            2,    3,    2,    2,    3,    2,    2,    1,    1,    2,    1,
            2,    2,    1,    2,    3,    1,    1],
        [   0,    0,    0,   42,   17,    8,   42,    0,    0,    0,   22,
            0,   22,    0,    0,   42,    0,    0,    0,    0,   11,    0,
            0,    0,    0,    0,   28,   17,   18],
        [   0,    0,    0,   70,  291,   88,  234,    0,    0,    0,  222,
            0,  222,    0,    0,  234,    0,    0,    0,    0,   89,    0,
            0,    0,    0,    0,   40,  291,  131],
        [   0,    0,    0, 2016, 2016, 2006, 2014,    0,    0,    0, 2015,
            0, 2015,    0,    0, 2015,    0,    0,    0,    0, 2015,    0,
            0,    0,    0,    0, 2016, 2016, 2010]]])

>>> Y
array(['NO', 'NO', 'NO', 'YES', 'NO', 'NO', 'YES', 'NO', 'NO', 'NO', 'NO',
       'NO', 'YES', 'NO', 'NO', 'YES', 'NO', 'NO', 'NO', 'NO', 'NO', 'NO',
       'NO', 'NO', 'NO', 'NO', 'NO', 'NO', 'NO'], 
      dtype='|S3')

>>> X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

Traceback (most recent call last):

  File "<stdin>", line 1, in <module>
  File "/Library/Python/2.7/site-packages/sklearn/cross_validation.py", line 2039, in train_test_split
    arrays = indexable(*arrays)
  File "/Library/Python/2.7/site-packages/sklearn/utils/validation.py", line 206, in indexable
    check_consistent_length(*result)
  File "/Library/Python/2.7/site-packages/sklearn/utils/validation.py", line 181, in check_consistent_length
    " samples: %r" % [int(l) for l in lengths])

ValueError: Found input variables with inconsistent numbers of samples: [1, 29]