Untitled
unknown
plain_text
3 years ago
1.3 kB
6
Indexable
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
Input In [18], in <cell line: 11>()
9 cv = CountVectorizer(stop_words='english')
10 X_cv = cv.fit_transform(X_text)
---> 11 X_cv_df = pd.DataFrame(X_cv.toarray(), columns=cv.get_feature_names_out())
12 X_cv_df = X_cv_df.values.reshape(X.shape[0], -1)
14 # Split data into training and test sets
File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/scipy/sparse/compressed.py:1039, in _cs_matrix.toarray(self, order, out)
1037 if out is None and order is None:
1038 order = self._swap('cf')[0]
-> 1039 out = self._process_toarray_args(order, out)
1040 if not (out.flags.c_contiguous or out.flags.f_contiguous):
1041 raise ValueError('Output array must be C or F contiguous')
File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/scipy/sparse/base.py:1202, in spmatrix._process_toarray_args(self, order, out)
1200 return out
1201 else:
-> 1202 return np.zeros(self.shape, dtype=self.dtype, order=order)
MemoryError: Unable to allocate 18.5 GiB for an array with shape (41880, 59141) and data type int64Editor is loading...