I'm trying to fit a (223129, 108) dataset with scikit's linear models (Ridge(), Lasso(), LinearRegression()) and get the following error. Not sure what to do, the data doesn't seem large enough to run out of memory (I have 16GB). Any ideas?
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-34-8ea705d45c5d> in <module>()
----> 1 cv_loop(T,yn, model=reg, per_test=0.2,cv_random=False,tresh=450)
<ipython-input-1-ea163943e461> in cv_loop(X, y, model, per_test, cv_random, tresh)
48 preds_all=np.zeros((y_cv.shape))
49 for i in range(y_n):
---> 50 model.fit(X_train, y_train[:,i])
51
52 preds = model.predict(X_cv)
C:\Users\m&g\AppData\Local\Enthought\Canopy32\User\lib\site-packages\scikit_learn-0.14.1-py2.7-win32.egg\sklearn\linear_model\coordinate_descent.pyc in fit(self, X, y, Xy, coef_init)
608 "estimator", stacklevel=2)
609 X = atleast2d_or_csc(X, dtype=np.float64, order='F',
--> 610 copy=self.copy_X and self.fit_intercept)
611 # From now on X can be touched inplace
612 y = np.asarray(y, dtype=np.float64)
C:\Users\m&g\AppData\Local\Enthought\Canopy32\User\lib\site-packages\scikit_learn-0.14.1-py2.7-win32.egg\sklearn\utils\validation.pyc in atleast2d_or_csc(X, dtype, order, copy, force_all_finite)
122 """
123 return _atleast2d_or_sparse(X, dtype, order, copy, sparse.csc_matrix,
--> 124 "tocsc", force_all_finite)
125
126
C:\Users\m&g\AppData\Local\Enthought\Canopy32\User\lib\site-packages\scikit_learn-0.14.1-py2.7-win32.egg\sklearn\utils\validation.pyc in _atleast2d_or_sparse(X, dtype, order, copy, sparse_class, convmethod, force_all_finite)
109 else:
110 X = array2d(X, dtype=dtype, order=order, copy=copy,
--> 111 force_all_finite=force_all_finite)
112 if force_all_finite:
113 _assert_all_finite(X)
C:\Users\m&g\AppData\Local\Enthought\Canopy32\User\lib\site-packages\scikit_learn-0.14.1-py2.7-win32.egg\sklearn\utils\validation.pyc in array2d(X, dtype, order, copy, force_all_finite)
89 raise TypeError('A sparse matrix was passed, but dense data '
90 'is required. Use X.toarray() to convert to dense.')
---> 91 X_2d = np.asarray(np.atleast_2d(X), dtype=dtype, order=order)
92 if force_all_finite:
93 _assert_all_finite(X_2d)
C:\Users\m&g\AppData\Local\Enthought\Canopy32\App\appdata\canopy-1.0.3.1262.win-x86\lib\site-packages\numpy\core\numeric.pyc in asarray(a, dtype, order)
318
319 """
--> 320 return array(a, dtype, copy=False, order=order)
321
322 def asanyarray(a, dtype=None, order=None):
MemoryError:
np.ones((223129, 108)).astype(np.float64)gives me an array with about 183 megabytes.