please help, i got an error while trying to my data, i got an error like x and y must be thesame size

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
data = pd.read_csv('housing.csv')


data.drop('ocean_proximity', axis=1, inplace = True)
data.head()
longitude   latitude    housing_median_age  total_rooms total_bedrooms  population  households  median_income   median_house_value
0   -122.23 37.88   41.0    880.0   129.0   322.0   126.0   8.3252  452600.0
1   -122.22 37.86   21.0    7099.0  1106.0  2401.0  1138.0  8.3014  358500.0
2   -122.24 37.85   52.0    1467.0  190.0   496.0   177.0   7.2574  352100.0
3   -122.25 37.85   52.0    1274.0  235.0   558.0   219.0   5.6431  341300.0
4   -122.25 37.85   52.0    1627.0  280.0   565.0   259.0   3.8462  342200.0

X = data.iloc[:, 6:-1].values
y= data.iloc[:, -1].values

from sklearn.model_selection import train_test_split
X_train, y_train, X_test, y_test = train_test_split (X,y, test_size = 0.33, random_state = 42)

print(X_train)
[[ 65.       4.2386]
 [447.       4.3898]
 [368.       3.9333]
 ...
 [393.       3.1977]
 [468.       5.6315]
 [298.       1.3882]]

print(y_train)
[[371.       4.1518]
 [429.       5.7796]
 [534.       4.3487]
 ...
 [326.       3.2027]
 [374.       6.1436]
 [406.       3.3326]]

from sklearn.linear_model import LinearRegression
regressor = LinearRegression()

regressor.fit(X_train, y_train)
LinearRegression()

y_pred =regressor.predict(X_test)

plt.scatter(X_train, y_train, color='blue')
plt.plot(X_train, regressor.predict(X_train), color='red')
plt.title('visualizing trainning set')
plt.xlabel('median_income')
plt.ylabel('median_house_value')
plt.show()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
ipython-input-412-a47ebba88fd9 in module
---- 1 plt.scatter(X_train, y_train, color='blue')
      2 plt.plot(X_train, regressor.predict(X_train), color='red')
      3 plt.title('visualizing trainning set')
      4 plt.xlabel('median_income')
      5 plt.ylabel('median_house_value')

~\anaconda3\lib\site-packages\matplotlib\pyplot.py in scatter(x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, plotnonfinite, data, **kwargs)
   2888         verts=cbook.deprecation._deprecated_parameter,
   2889         edgecolors=None, *, plotnonfinite=False, data=None, **kwargs):
- 2890     __ret = gca().scatter(
   2891         x, y, s=s, c=c, marker=marker, cmap=cmap, norm=norm,
   2892         vmin=vmin, vmax=vmax, alpha=alpha, linewidths=linewidths,

~\anaconda3\lib\site-packages\matplotlib\__init__.py in inner(ax, data, *args, **kwargs)
   1445     def inner(ax, *args, data=None, **kwargs):
   1446         if data is None:
- 1447             return func(ax, *map(sanitize_sequence, args), **kwargs)
   1448 
   1449         bound = new_sig.bind(ax, *args, **kwargs)

~\anaconda3\lib\site-packages\matplotlib\cbook\deprecation.py in wrapper(*inner_args, **inner_kwargs)
    409                          else deprecation_addendum,
    410                 **kwargs)
-- 411         return func(*inner_args, **inner_kwargs)
    412 
    413     return wrapper

~\anaconda3\lib\site-packages\matplotlib\axes\_axes.py in scatter(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, plotnonfinite, **kwargs)
   4439         y = np.ma.ravel(y)
   4440         if x.size != y.size:
- 4441             raise ValueError(x and y must be the same size)
   4442 
   4443         if s is None:

ValueError: x and y must be the same size

Topic matplotlib linear-regression pandas

Category Data Science

About

Geeks Mental is a community that publishes articles and tutorials about Web, Android, Data Science, new techniques and Linux security.