import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns


house_sales = pd.read_csv('../static/home_data.csv')


house_sales.head()


len(house_sales)

21613


house_sales.dtypes

id                 int64
date              object
price              int64
bedrooms           int64
bathrooms        float64
sqft_living        int64
sqft_lot           int64
floors           float64
waterfront         int64
view               int64
condition          int64
grade              int64
sqft_above         int64
sqft_basement      int64
yr_built           int64
yr_renovated       int64
zipcode            int64
lat              float64
long             float64
sqft_living15      int64
sqft_lot15         int64
dtype: object


house_sales.isnull().sum()

id               0
date             0
price            0
bedrooms         0
bathrooms        0
sqft_living      0
sqft_lot         0
floors           0
waterfront       0
view             0
condition        0
grade            0
sqft_above       0
sqft_basement    0
yr_built         0
yr_renovated     0
zipcode          0
lat              0
long             0
sqft_living15    0
sqft_lot15       0
dtype: int64


house_sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21613 entries, 0 to 21612
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             21613 non-null  int64  
 1   date           21613 non-null  object 
 2   price          21613 non-null  int64  
 3   bedrooms       21613 non-null  int64  
 4   bathrooms      21613 non-null  float64
 5   sqft_living    21613 non-null  int64  
 6   sqft_lot       21613 non-null  int64  
 7   floors         21613 non-null  float64
 8   waterfront     21613 non-null  int64  
 9   view           21613 non-null  int64  
 10  condition      21613 non-null  int64  
 11  grade          21613 non-null  int64  
 12  sqft_above     21613 non-null  int64  
 13  sqft_basement  21613 non-null  int64  
 14  yr_built       21613 non-null  int64  
 15  yr_renovated   21613 non-null  int64  
 16  zipcode        21613 non-null  int64  
 17  lat            21613 non-null  float64
 18  long           21613 non-null  float64
 19  sqft_living15  21613 non-null  int64  
 20  sqft_lot15     21613 non-null  int64  
dtypes: float64(4), int64(16), object(1)
memory usage: 3.5+ MB


house_sales['date'] = pd.to_datetime(house_sales['date'])


# Quels sont les champs concernés ?
col_sqft = [col_name for col_name in house_sales.columns if 'sqft' in col_name]
col_sqft

['sqft_living',
 'sqft_lot',
 'sqft_above',
 'sqft_basement',
 'sqft_living15',
 'sqft_lot15']


# Pour chaque champ, on effectue la conversion
for col_name in col_sqft:
    house_sales[col_name.replace('sqft', 'sqm')] = house_sales[col_name] / 10.764


# On supprime les anciens champs, 'inplace' (sans faire de copie de la DataFrame)
house_sales.drop(col_sqft, axis=1, inplace=True)


house_sales['renovated'] = np.where(house_sales.yr_renovated > 0, True, False)
house_sales.head()


house_sales.dtypes

id                       int64
date            datetime64[ns]
price                    int64
bedrooms                 int64
bathrooms              float64
floors                 float64
waterfront               int64
view                     int64
condition                int64
grade                    int64
yr_built                 int64
yr_renovated             int64
zipcode                  int64
lat                    float64
long                   float64
sqm_living             float64
sqm_lot                float64
sqm_above              float64
sqm_basement           float64
sqm_living15           float64
sqm_lot15              float64
renovated                 bool
dtype: object


house_sales['price'].plot(kind="hist", figsize=(10,4), title="Price of Houses in King's County")

<Axes: title={'center': "Price of Houses in King's County"}, ylabel='Frequency'>


price_dist = sns.histplot(house_sales["price"], kde=True, bins=200)
price_dist.figure.set_size_inches(10, 4)
price_dist.set(xlabel="Price in Millions", title="Price Density of Houses in King's County")
price_dist

<Axes: title={'center': "Price Density of Houses in King's County"}, xlabel='Price in Millions', ylabel='Count'>


logged_price_dist = sns.histplot(np.log(house_sales["price"]), kde=True)
logged_price_dist.figure.set_size_inches(10,6)
logged_price_dist.set(xlabel="Log Price in Millions", title="Log Price Density of Houses in King's County")
logged_price_dist

<Axes: title={'center': "Log Price Density of Houses in King's County"}, xlabel='Log Price in Millions', ylabel='Count'>


house_sales['floors'].value_counts()

floors
1.0    10680
2.0     8241
1.5     1910
3.0      613
2.5      161
3.5        8
Name: count, dtype: int64


count_by_floor = house_sales['floors'].value_counts()[house_sales['floors'].sort_values().unique()]
count_by_floor

floors
1.0    10680
1.5     1910
2.0     8241
2.5      161
3.0      613
3.5        8
Name: count, dtype: int64


count_by_floor.plot(kind="bar", title="House count by number of floors")

<Axes: title={'center': 'House count by number of floors'}, xlabel='floors'>


sns.boxplot(x=house_sales.waterfront, y=house_sales.price)

<Axes: xlabel='waterfront', ylabel='price'>


len(house_sales[house_sales.waterfront == 1])

163


ax = sns.boxplot(x=house_sales['grade'], y=house_sales['price'])
ax.figure.set_size_inches(10,6)
ax.set_title('Price distribution across grade levels')
ax

<Axes: title={'center': 'Price distribution across grade levels'}, xlabel='grade', ylabel='price'>


corr = house_sales.loc[:, ~house_sales.columns.isin(['id', 'date', 'lat', 'long'])].corr()
# on aurait aussi pu écrire `corr = house_sales.drop(['id', 'date', 'lat', 'long'], axis=1).corr()`
corr


corr['price'].sort_values()

zipcode        -0.053203
condition       0.036362
yr_built        0.054012
sqm_lot15       0.082447
sqm_lot         0.089661
renovated       0.126092
yr_renovated    0.126434
floors          0.256794
waterfront      0.266369
bedrooms        0.308350
sqm_basement    0.323816
view            0.397293
bathrooms       0.525138
sqm_living15    0.585379
sqm_above       0.605567
grade           0.667434
sqm_living      0.702035
price           1.000000
Name: price, dtype: float64


# Avec la palette par défaut... 
ax = sns.heatmap(
    corr, 
    vmin=-1,
    vmax=1,
    center=0,
)


# En ne prenant pas en compte certaines colonnes
# et en choisissant une palette de couleurs
ax = sns.heatmap(
    corr, 
    vmin=-1, vmax=1, center=0,
    cmap="coolwarm",
    square=True,
    annot=True
)
# On change la position des labels sur l'axe des abscisses
ax.set_xticklabels(
    ax.get_xticklabels(),
    rotation=45,
    horizontalalignment='right'
)
# Et on change la taille de la figure
ax.figure.set_size_inches(15, 15)
ax

<Axes: >


x = house_sales[['sqm_living']]
y = house_sales['price']


sns.regplot(x=x, y=y)

<Axes: xlabel='sqm_living', ylabel='price'>


help(sns.regplot)

Help on function regplot in module seaborn.regression:

regplot(data=None, *, x=None, y=None, x_estimator=None, x_bins=None, x_ci='ci', scatter=True, fit_reg=True, ci=95, n_boot=1000, units=None, seed=None, order=1, logistic=False, lowess=False, robust=False, logx=False, x_partial=None, y_partial=None, truncate=True, dropna=True, x_jitter=None, y_jitter=None, label=None, color=None, marker='o', scatter_kws=None, line_kws=None, ax=None)
    Plot data and a linear regression model fit.
    
    There are a number of mutually exclusive options for estimating the
    regression model. See the :ref:`tutorial <regression_tutorial>` for more
    information.    
    
    Parameters
    ----------
    x, y: string, series, or vector array
        Input variables. If strings, these should correspond with column names
        in ``data``. When pandas objects are used, axes will be labeled with
        the series name.
    data : DataFrame
        Tidy ("long-form") dataframe where each column is a variable and each
        row is an observation.    
    x_estimator : callable that maps vector -> scalar, optional
        Apply this function to each unique value of ``x`` and plot the
        resulting estimate. This is useful when ``x`` is a discrete variable.
        If ``x_ci`` is given, this estimate will be bootstrapped and a
        confidence interval will be drawn.    
    x_bins : int or vector, optional
        Bin the ``x`` variable into discrete bins and then estimate the central
        tendency and a confidence interval. This binning only influences how
        the scatterplot is drawn; the regression is still fit to the original
        data.  This parameter is interpreted either as the number of
        evenly-sized (not necessary spaced) bins or the positions of the bin
        centers. When this parameter is used, it implies that the default of
        ``x_estimator`` is ``numpy.mean``.    
    x_ci : "ci", "sd", int in [0, 100] or None, optional
        Size of the confidence interval used when plotting a central tendency
        for discrete values of ``x``. If ``"ci"``, defer to the value of the
        ``ci`` parameter. If ``"sd"``, skip bootstrapping and show the
        standard deviation of the observations in each bin.    
    scatter : bool, optional
        If ``True``, draw a scatterplot with the underlying observations (or
        the ``x_estimator`` values).    
    fit_reg : bool, optional
        If ``True``, estimate and plot a regression model relating the ``x``
        and ``y`` variables.    
    ci : int in [0, 100] or None, optional
        Size of the confidence interval for the regression estimate. This will
        be drawn using translucent bands around the regression line. The
        confidence interval is estimated using a bootstrap; for large
        datasets, it may be advisable to avoid that computation by setting
        this parameter to None.    
    n_boot : int, optional
        Number of bootstrap resamples used to estimate the ``ci``. The default
        value attempts to balance time and stability; you may want to increase
        this value for "final" versions of plots.    
    units : variable name in ``data``, optional
        If the ``x`` and ``y`` observations are nested within sampling units,
        those can be specified here. This will be taken into account when
        computing the confidence intervals by performing a multilevel bootstrap
        that resamples both units and observations (within unit). This does not
        otherwise influence how the regression is estimated or drawn.    
    seed : int, numpy.random.Generator, or numpy.random.RandomState, optional
        Seed or random number generator for reproducible bootstrapping.    
    order : int, optional
        If ``order`` is greater than 1, use ``numpy.polyfit`` to estimate a
        polynomial regression.    
    logistic : bool, optional
        If ``True``, assume that ``y`` is a binary variable and use
        ``statsmodels`` to estimate a logistic regression model. Note that this
        is substantially more computationally intensive than linear regression,
        so you may wish to decrease the number of bootstrap resamples
        (``n_boot``) or set ``ci`` to None.    
    lowess : bool, optional
        If ``True``, use ``statsmodels`` to estimate a nonparametric lowess
        model (locally weighted linear regression). Note that confidence
        intervals cannot currently be drawn for this kind of model.    
    robust : bool, optional
        If ``True``, use ``statsmodels`` to estimate a robust regression. This
        will de-weight outliers. Note that this is substantially more
        computationally intensive than standard linear regression, so you may
        wish to decrease the number of bootstrap resamples (``n_boot``) or set
        ``ci`` to None.    
    logx : bool, optional
        If ``True``, estimate a linear regression of the form y ~ log(x), but
        plot the scatterplot and regression model in the input space. Note that
        ``x`` must be positive for this to work.    
    {x,y}_partial : strings in ``data`` or matrices
        Confounding variables to regress out of the ``x`` or ``y`` variables
        before plotting.    
    truncate : bool, optional
        If ``True``, the regression line is bounded by the data limits. If
        ``False``, it extends to the ``x`` axis limits.
    
    {x,y}_jitter : floats, optional
        Add uniform random noise of this size to either the ``x`` or ``y``
        variables. The noise is added to a copy of the data after fitting the
        regression, and only influences the look of the scatterplot. This can
        be helpful when plotting variables that take discrete values.    
    label : string
        Label to apply to either the scatterplot or regression line (if
        ``scatter`` is ``False``) for use in a legend.
    color : matplotlib color
        Color to apply to all plot elements; will be superseded by colors
        passed in ``scatter_kws`` or ``line_kws``.
    marker : matplotlib marker code
        Marker to use for the scatterplot glyphs.
    {scatter,line}_kws : dictionaries
        Additional keyword arguments to pass to ``plt.scatter`` and
        ``plt.plot``.    
    ax : matplotlib Axes, optional
        Axes object to draw the plot onto, otherwise uses the current Axes.
    
    Returns
    -------
    ax : matplotlib Axes
        The Axes object containing the plot.
    
    See Also
    --------
    lmplot : Combine :func:`regplot` and :class:`FacetGrid` to plot multiple
             linear relationships in a dataset.
    jointplot : Combine :func:`regplot` and :class:`JointGrid` (when used with
                ``kind="reg"``).
    pairplot : Combine :func:`regplot` and :class:`PairGrid` (when used with
               ``kind="reg"``).
    residplot : Plot the residuals of a linear regression model.
    
    Notes
    -----
    
    The :func:`regplot` and :func:`lmplot` functions are closely related, but
    the former is an axes-level function while the latter is a figure-level
    function that combines :func:`regplot` and :class:`FacetGrid`.    
    
    
    It's also easy to combine :func:`regplot` and :class:`JointGrid` or
    :class:`PairGrid` through the :func:`jointplot` and :func:`pairplot`
    functions, although these do not directly accept all of :func:`regplot`'s
    parameters.
    
    Examples
    --------
    
    .. include: ../docstrings/regplot.rst


sns.regplot(
    x=x,
    y=y,
    line_kws={'color': 'red'},
    scatter_kws={'alpha': 0.3},
)

<Axes: xlabel='sqm_living', ylabel='price'>


import statsmodels.api as sm

model = sm.OLS(y, sm.add_constant(x))
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  price   R-squared:                       0.493
Model:                            OLS   Adj. R-squared:                  0.493
Method:                 Least Squares   F-statistic:                 2.100e+04
Date:                Tue, 16 May 2023   Prob (F-statistic):               0.00
Time:                        09:33:02   Log-Likelihood:            -3.0027e+05
No. Observations:               21613   AIC:                         6.005e+05
Df Residuals:                   21611   BIC:                         6.006e+05
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const      -4.358e+04   4402.690     -9.899      0.000   -5.22e+04    -3.5e+04
sqm_living  3020.6321     20.843    144.920      0.000    2979.777    3061.487
==============================================================================
Omnibus:                    14832.490   Durbin-Watson:                   1.983
Prob(Omnibus):                  0.000   Jarque-Bera (JB):           546444.709
Skew:                           2.824   Prob(JB):                         0.00
Kurtosis:                      26.977   Cond. No.                         523.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


from sklearn import linear_model

model = linear_model.LinearRegression()
results = model.fit(x, y)

print(results.intercept_, results.coef_)

-43580.74032708525 [3020.63207124]


model.predict([[438]])

/home/mthh/code/presentation-python-r-shs/env/lib/python3.10/site-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names
  warnings.warn(

array([1279456.10687803])


plt.scatter(x, y, color='g', alpha=0.3)
plt.plot(x, model.predict(x), color='r')
plt.title('House price vs. sqm living')
plt.xlabel('Living space (sq m)')
plt.ylabel('Price (million dollars)')
plt.show()


import rpy2.rinterface

%load_ext rpy2.ipython


%%R -i house_sales -o my_coef
#   ^^ avec -i et le(s) nom(s) de variable(s) Python à utiliser en R
#          ^^ avec -o et pour la sortie R vers Python
model <- lm('price ~ sqm_living', house_sales)
print(summary(model))
my_coef <- coef(model)

/home/mthh/code/presentation-python-r-shs/env/lib/python3.10/site-packages/rpy2/robjects/pandas2ri.py:65: UserWarning: Error while trying to convert the column "id". Fall back to string conversion. The error is: integer 7129300520 does not fit '32-bit int'
  warnings.warn('Error while trying to convert '

Call:
lm(formula = "price ~ sqm_living", data = house_sales)

Residuals:
     Min       1Q   Median       3Q      Max 
-1476062  -147486   -24043   106182  4362067 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -43580.74    4402.69  -9.899   <2e-16 ***
sqm_living    3020.63      20.84 144.920   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 261500 on 21611 degrees of freedom
Multiple R-squared:  0.4929,	Adjusted R-squared:  0.4928 
F-statistic: 2.1e+04 on 1 and 21611 DF,  p-value: < 2.2e-16


print(my_coef)

[-43580.74032708   3020.63207124]


import geopandas as gpd
from shapely import Point

geometry = [Point(xy) for xy in zip(house_sales.long, house_sales.lat)]
gdf = gpd.GeoDataFrame(house_sales, crs="EPSG:4326", geometry=geometry)


gdf.plot(color="orange", markersize=0.4, alpha=0.3)

<Axes: >


import contextily as cx

gdf_wm = gdf.to_crs(epsg=3857)
ax = gdf_wm.plot(figsize=(10, 10), color="red", markersize=0.4, alpha=0.5)
cx.add_basemap(ax)


# price per square metre
gdf_wm['ppsqm'] = gdf_wm['price'] / gdf['sqm_living']
ax = gdf_wm.plot('ppsqm', scheme='Quantiles', k=6, figsize=(16,16), legend=True)
cx.add_basemap(ax)


import folium


# Cette fois on ne veut visualiser que les maisons au bord de l'eau
gdf_with_view = gdf[gdf.waterfront == 1]


# On récupére l'emprise du jeu de données
bounds = gdf_with_view.total_bounds


# On calcul les coordonées du centre
center = [
    (bounds[2] + bounds[0]) / 2,
    (bounds[3] + bounds[1]) / 2,
]
center # Longitude, Latitude, mais Folium veut Latitude, Longitude

[-122.28649999999999, 47.55025]


map = folium.Map(location=center[::-1], tiles="Stamen Terrain", zoom_start=10)

for index, row in gdf_with_view.iterrows():
    coordinates = [row['lat'], row['long']]
    map.add_child(
        folium.Marker(
            location=coordinates,
            popup=f'''
                Prix: {row['price']}$
                <br>
                Superficie habitable: {round(row['sqm_living'], 1)}m2
            ''',
            icon=folium.Icon(color="blue"),
        )
    )

map

	id	date	price	bedrooms	bathrooms	sqft_living	sqft_lot	floors	...	grade	sqft_above	sqft_basement	yr_built	yr_renovated	zipcode	lat	long	sqft_living15	sqft_lot15
0	7129300520	20141013T000000	221900	3	1.00	1180	5650	1.0	...	7	1180	0	1955	0	98178	47.5112	-122.257	1340	5650
1	6414100192	20141209T000000	538000	3	2.25	2570	7242	2.0	...	7	2170	400	1951	1991	98125	47.7210	-122.319	1690	7639
2	5631500400	20150225T000000	180000	2	1.00	770	10000	1.0	...	6	770	0	1933	0	98028	47.7379	-122.233	2720	8062
3	2487200875	20141209T000000	604000	4	3.00	1960	5000	1.0	...	7	1050	910	1965	0	98136	47.5208	-122.393	1360	5000
4	1954400510	20150218T000000	510000	3	2.00	1680	8080	1.0	...	8	1680	0	1987	0	98074	47.6168	-122.045	1800	7503

	id	date	price	bedrooms	bathrooms	floors	condition	grade	...	zipcode	lat	long	sqm_living	sqm_lot	sqm_above	sqm_basement	sqm_living15	sqm_lot15	renovated
0	7129300520	2014-10-13	221900	3	1.00	1.0	3	7	...	98178	47.5112	-122.257	109.624675	524.897808	109.624675	0.000000	124.489038	524.897808	False
1	6414100192	2014-12-09	538000	3	2.25	2.0	3	7	...	98125	47.7210	-122.319	238.758826	672.798216	201.597919	37.160907	157.004831	709.680416	True
2	5631500400	2015-02-25	180000	2	1.00	1.0	3	6	...	98028	47.7379	-122.233	71.534745	929.022668	71.534745	0.000000	252.694166	748.978075	False
3	2487200875	2014-12-09	604000	4	3.00	1.0	5	7	...	98136	47.5208	-122.393	182.088443	464.511334	97.547380	84.541063	126.347083	464.511334	False
4	1954400510	2015-02-18	510000	3	2.00	1.0	3	8	...	98074	47.6168	-122.045	156.075808	750.650316	156.075808	0.000000	167.224080	697.045708	False

	price	bedrooms	bathrooms	floors	waterfront	view	condition	grade	yr_built	yr_renovated	zipcode	sqm_living	sqm_lot	sqm_above	sqm_basement	sqm_living15	sqm_lot15	renovated
price	1.000000	0.308350	0.525138	0.256794	0.266369	0.397293	0.036362	0.667434	0.054012	0.126434	-0.053203	0.702035	0.089661	0.605567	0.323816	0.585379	0.082447	0.126092
bedrooms	0.308350	1.000000	0.515884	0.175429	-0.006582	0.079532	0.028472	0.356967	0.154178	0.018841	-0.152668	0.576671	0.031703	0.477600	0.303093	0.391638	0.029244	0.018553
bathrooms	0.525138	0.515884	1.000000	0.500653	0.063744	0.187737	-0.124982	0.664983	0.506019	0.050739	-0.203866	0.754665	0.087740	0.685342	0.283770	0.568634	0.087175	0.050260
floors	0.256794	0.175429	0.500653	1.000000	0.023698	0.029444	-0.263768	0.458183	0.489319	0.006338	-0.059121	0.353949	-0.005201	0.523885	-0.245705	0.279885	-0.011269	0.006260
waterfront	0.266369	-0.006582	0.063744	0.023698	1.000000	0.401857	0.016653	0.082775	-0.026161	0.092885	0.030285	0.103818	0.021604	0.072075	0.080588	0.086463	0.030703	0.093294
view	0.397293	0.079532	0.187737	0.029444	0.401857	1.000000	0.045990	0.251321	-0.053440	0.103917	0.084827	0.284611	0.074710	0.167649	0.276947	0.280439	0.072575	0.104062
condition	0.036362	0.028472	-0.124982	-0.263768	0.016653	0.045990	1.000000	-0.144674	-0.361417	-0.060618	0.003026	-0.058753	-0.008958	-0.158214	0.174105	-0.092824	-0.003406	-0.060139
grade	0.667434	0.356967	0.664983	0.458183	0.082775	0.251321	-0.144674	1.000000	0.446963	0.014414	-0.184862	0.762704	0.113621	0.755923	0.168392	0.713202	0.119248	0.014008
yr_built	0.054012	0.154178	0.506019	0.489319	-0.026161	-0.053440	-0.361417	0.446963	1.000000	-0.224874	-0.346869	0.318049	0.053080	0.423898	-0.133124	0.326229	0.070958	-0.225195
yr_renovated	0.126434	0.018841	0.050739	0.006338	0.092885	0.103917	-0.060618	0.014414	-0.224874	1.000000	0.064357	0.055363	0.007644	0.023285	0.071323	-0.002673	0.007854	0.999968
zipcode	-0.053203	-0.152668	-0.203866	-0.059121	0.030285	0.084827	0.003026	-0.184862	-0.346869	0.064357	1.000000	-0.199430	-0.129574	-0.261190	0.074845	-0.279033	-0.147221	0.064335
sqm_living	0.702035	0.576671	0.754665	0.353949	0.103818	0.284611	-0.058753	0.762704	0.318049	0.055363	-0.199430	1.000000	0.172826	0.876597	0.435043	0.756420	0.183286	0.055094
sqm_lot	0.089661	0.031703	0.087740	-0.005201	0.021604	0.074710	-0.008958	0.113621	0.053080	0.007644	-0.129574	0.172826	1.000000	0.183512	0.015286	0.144608	0.718557	0.007745
sqm_above	0.605567	0.477600	0.685342	0.523885	0.072075	0.167649	-0.158214	0.755923	0.423898	0.023285	-0.261190	0.876597	0.183512	1.000000	-0.051943	0.731870	0.194050	0.023178
sqm_basement	0.323816	0.303093	0.283770	-0.245705	0.080588	0.276947	0.174105	0.168392	-0.133124	0.071323	0.074845	0.435043	0.015286	-0.051943	1.000000	0.200355	0.017276	0.070963
sqm_living15	0.585379	0.391638	0.568634	0.279885	0.086463	0.280439	-0.092824	0.713202	0.326229	-0.002673	-0.279033	0.756420	0.144608	0.731870	0.200355	1.000000	0.183192	-0.002755
sqm_lot15	0.082447	0.029244	0.087175	-0.011269	0.030703	0.072575	-0.003406	0.119248	0.070958	0.007854	-0.147221	0.183286	0.718557	0.194050	0.017276	0.183192	1.000000	0.007920
renovated	0.126092	0.018553	0.050260	0.006260	0.093294	0.104062	-0.060139	0.014008	-0.225195	0.999968	0.064335	0.055094	0.007745	0.023178	0.070963	-0.002755	0.007920	1.000000

House Sales in King County¶

0. Import des données¶

1. Explorer les données¶

2. Régression linéaire¶

3. Convertir le jeu de données en un jeu de données spatiales¶