please help, i got an error while trying to my data, i got an error like x and y must be thesame size
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.read_csv('housing.csv')
data.drop('ocean_proximity', axis=1, inplace = True)
data.head()
longitude latitude housing_median_age total_rooms total_bedrooms population households median_income median_house_value
0 -122.23 37.88 41.0 880.0 129.0 322.0 126.0 8.3252 452600.0
1 -122.22 37.86 21.0 7099.0 1106.0 2401.0 1138.0 8.3014 358500.0
2 -122.24 37.85 52.0 1467.0 190.0 496.0 177.0 7.2574 352100.0
3 -122.25 37.85 52.0 1274.0 235.0 558.0 219.0 5.6431 341300.0
4 -122.25 37.85 52.0 1627.0 280.0 565.0 259.0 3.8462 342200.0
X = data.iloc[:, 6:-1].values
y= data.iloc[:, -1].values
from sklearn.model_selection import train_test_split
X_train, y_train, X_test, y_test = train_test_split (X,y, test_size = 0.33, random_state = 42)
print(X_train)
[[ 65. 4.2386]
[447. 4.3898]
[368. 3.9333]
...
[393. 3.1977]
[468. 5.6315]
[298. 1.3882]]
print(y_train)
[[371. 4.1518]
[429. 5.7796]
[534. 4.3487]
...
[326. 3.2027]
[374. 6.1436]
[406. 3.3326]]
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)
LinearRegression()
y_pred =regressor.predict(X_test)
plt.scatter(X_train, y_train, color='blue')
plt.plot(X_train, regressor.predict(X_train), color='red')
plt.title('visualizing trainning set')
plt.xlabel('median_income')
plt.ylabel('median_house_value')
plt.show()
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
ipython-input-412-a47ebba88fd9 in module
---- 1 plt.scatter(X_train, y_train, color='blue')
2 plt.plot(X_train, regressor.predict(X_train), color='red')
3 plt.title('visualizing trainning set')
4 plt.xlabel('median_income')
5 plt.ylabel('median_house_value')
~\anaconda3\lib\site-packages\matplotlib\pyplot.py in scatter(x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, plotnonfinite, data, **kwargs)
2888 verts=cbook.deprecation._deprecated_parameter,
2889 edgecolors=None, *, plotnonfinite=False, data=None, **kwargs):
- 2890 __ret = gca().scatter(
2891 x, y, s=s, c=c, marker=marker, cmap=cmap, norm=norm,
2892 vmin=vmin, vmax=vmax, alpha=alpha, linewidths=linewidths,
~\anaconda3\lib\site-packages\matplotlib\__init__.py in inner(ax, data, *args, **kwargs)
1445 def inner(ax, *args, data=None, **kwargs):
1446 if data is None:
- 1447 return func(ax, *map(sanitize_sequence, args), **kwargs)
1448
1449 bound = new_sig.bind(ax, *args, **kwargs)
~\anaconda3\lib\site-packages\matplotlib\cbook\deprecation.py in wrapper(*inner_args, **inner_kwargs)
409 else deprecation_addendum,
410 **kwargs)
-- 411 return func(*inner_args, **inner_kwargs)
412
413 return wrapper
~\anaconda3\lib\site-packages\matplotlib\axes\_axes.py in scatter(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, plotnonfinite, **kwargs)
4439 y = np.ma.ravel(y)
4440 if x.size != y.size:
- 4441 raise ValueError(x and y must be the same size)
4442
4443 if s is None:
ValueError: x and y must be the same size
Topic matplotlib linear-regression pandas
Category Data Science