unable to pass X_train and y_train in my regressor variable. i got a ValueError
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.read_csv('housing.csv')
data.drop('ocean_proximity', axis=1, inplace = True)
longitude latitude housing_median_age total_rooms total_bedrooms population households median_income median_house_value
0 -122.23 37.88 41.0 880.0 129.0 322.0 126.0 8.3252 452600.0
1 -122.22 37.86 21.0 7099.0 1106.0 2401.0 1138.0 8.3014 358500.0
2 -122.24 37.85 52.0 1467.0 190.0 496.0 177.0 7.2574 352100.0
3 -122.25 37.85 52.0 1274.0 235.0 558.0 219.0 5.6431 341300.0
4 -122.25 37.85 52.0 1627.0 280.0 565.0 259.0 3.8462 342200.0
X = data.iloc[:, 6:-1].values
y= data.iloc[:, -1].values
from sklearn.model_selection import train_test_split
X_train, y_train, X_test, y_test = train_test_split (X,y, test_size = 0.2, random_state = 0)
[[ 65. 4.2386]
[447. 4.3898]
[368. 3.9333]
[393. 3.1977]
[468. 5.6315]
[298. 1.3882]]
[[371. 4.1518]
[429. 5.7796]
[534. 4.3487]
[326. 3.2027]
[374. 6.1436]
[406. 3.3326]]
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(np.array(X_train).reshape(-1, 1), y_train)
ValueError Traceback (most recent call last)
ipython-input-345-0edbf6e4cc5c in module
---- 1 regressor.fit(np.array(X_train).reshape(-1, 1), y_train)
~\anaconda3\lib\site-packages\sklearn\linear_model\_base.py in fit(self, X, y, sample_weight)
516 accept_sparse = False if self.positive else ['csr', 'csc', 'coo']
-- 518 X, y = self._validate_data(X, y, accept_sparse=accept_sparse,
519 y_numeric=True, multi_output=True)
~\anaconda3\lib\site-packages\sklearn\base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
431 y = check_array(y, **check_y_params)
432 else:
-- 433 X, y = check_X_y(X, y, **check_params)
434 out = X, y
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args = 0:
--- 63 return f(*args, **kwargs)
65 # extra_args 0
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
829 y = y.astype(np.float64)
-- 831 check_consistent_length(X, y)
833 return X, y
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_consistent_length(*arrays)
260 uniques = np.unique(lengths)
261 if len(uniques) 1:
-- 262 raise ValueError(Found input variables with inconsistent numbers of
263 samples: %r % [int(l) for l in lengths])
ValueError: Found input variables with inconsistent numbers of samples: [33024, 4128]
Topic numpy linear-regression pandas python
Category Data Science