ValueError: y should be a 1d array, got an array of shape () instead
I'm using a reviews data and trying to apply classifier model and get prediction. Here is the code i'm trying.
dataset = pd.read_csv('Scraping reviews.csv')
import numpy as np
X = np.linspace(0, 2*np.pi, 8)
y = np.sin(X) + np.random.normal(0, 0.4, 8)
X = X.reshape(-1, 1)
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(dataset)
#X_train_counts=X_train_counts.reshape(4,1)
X_train_counts.shape
[out] (2,2)
from sklearn.feature_extraction.text import TfidfTransformer
tf_transformer = TfidfTransformer(use_idf=False).fit(X_train_counts)
X_train_tf = tf_transformer.transform(X_train_counts)
#X_train_tf=X_train_tf.reshape(4,1)
X_train_tf.shape
[out] (2,2)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
#X_train_tfidf=X_train_tfidf.reshape(4,1)
X_train_tfidf.shape
[out] (2,2)
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(X_train_tfidf, X_train_counts)
[out] ---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
ipython-input-494-7734b71b758f in module
1 from sklearn.naive_bayes import MultinomialNB
2
---- 3 clf = MultinomialNB().fit(X_train_tfidf, X_train_counts)
~\anaconda3\lib\site-packages\sklearn\naive_bayes.py in fit(self, X, y, sample_weight)
613 self : object
614
-- 615 X, y = self._check_X_y(X, y)
616 _, n_features = X.shape
617 self.n_features_ = n_features
~\anaconda3\lib\site-packages\sklearn\naive_bayes.py in _check_X_y(self, X, y)
478
479 def _check_X_y(self, X, y):
-- 480 return self._validate_data(X, y, accept_sparse='csr')
481
482 def _update_class_log_prior(self, class_prior=None):
~\anaconda3\lib\site-packages\sklearn\base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
430 y = check_array(y, **check_y_params)
431 else:
-- 432 X, y = check_X_y(X, y, **check_params)
433 out = X, y
434
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
70 FutureWarning)
71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
--- 72 return f(**kwargs)
73 return inner_f
74
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
805 ensure_2d=False, dtype=None)
806 else:
-- 807 y = column_or_1d(y, warn=True)
808 _assert_all_finite(y)
809 if y_numeric and y.dtype.kind == 'O':
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
70 FutureWarning)
71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
--- 72 return f(**kwargs)
73 return inner_f
74
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in column_or_1d(y, warn)
843 return np.ravel(y)
844
-- 845 raise ValueError(
846 y should be a 1d array,
847 got an array of shape {} instead..format(shape))
ValueError: y should be a 1d array, got an array of shape () instead.
I tried reshaping X_train_counts, X_train_tf, X_train_tfidf but nothing is working. Please help me with this. Thanks.
Topic sentiment-analysis classification nlp machine-learning
Category Data Science