# Importing Required Libraries
```python
#data reading and manipulation packages
import pandas as pd
import numpy as np
import openpyxl as oxl
#data visualization packages
import matplotlib.pyplot as plt
import matplotlib as mlt
import seaborn as sns
#machine learning packages
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,mean_squared_error
from sklearn.linear_model import LogisticRegression
```
# Reading .csv file & performing EDA
## Exploratory Data Analysis
```python
#reading .csv file using pandas
df = pd.read_csv('boston.csv',sep = ",")
df
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>crim</th>
<th>zn</th>
<th>indus</th>
<th>chas</th>
<th>nox</th>
<th>rm</th>
<th>age</th>
<th>dis</th>
<th>rad</th>
<th>tax</th>
<th>ptratio</th>
<th>black</th>
<th>lstat</th>
<th>medv</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>0.00632</td>
<td>18.0</td>
<td>2.31</td>
<td>0</td>
<td>0.538</td>
<td>6.575</td>
<td>65.2</td>
<td>4.0900</td>
<td>1</td>
<td>296</td>
<td>15.3</td>
<td>396.90</td>
<td>4.98</td>
<td>24.0</td>
</tr>
<tr>
<th>1</th>
<td>0.02731</td>
<td>0.0</td>
<td>7.07</td>
<td>0</td>
<td>0.469</td>
<td>6.421</td>
<td>78.9</td>
<td>4.9671</td>
<td>2</td>
<td>242</td>
<td>17.8</td>
<td>396.90</td>
<td>9.14</td>
<td>21.6</td>
</tr>
<tr>
<th>2</th>
<td>0.02729</td>
<td>0.0</td>
<td>7.07</td>
<td>0</td>
<td>0.469</td>
<td>7.185</td>
<td>61.1</td>
<td>4.9671</td>
<td>2</td>
<td>242</td>
<td>17.8</td>
<td>392.83</td>
<td>4.03</td>
<td>34.7</td>
</tr>
<tr>
<th>3</th>
<td>0.03237</td>
<td>0.0</td>
<td>2.18</td>
<td>0</td>
<td>0.458</td>
<td>6.998</td>
<td>45.8</td>
<td>6.0622</td>
<td>3</td>
<td>222</td>
<td>18.7</td>
<td>394.63</td>
<td>2.94</td>
<td>33.4</td>
</tr>
<tr>
<th>4</th>
<td>0.06905</td>
<td>0.0</td>
<td>2.18</td>
<td>0</td>
<td>0.458</td>
<td>7.147</td>
<td>54.2</td>
<td>6.0622</td>
<td>3</td>
<td>222</td>
<td>18.7</td>
<td>396.90</td>
<td>5.33</td>
<td>36.2</td>
</tr>
<tr>
<th>...</th>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
</tr>
<tr>
<th>501</th>
<td>0.06263</td>
<td>0.0</td>
<td>11.93</td>
<td>0</td>
<td>0.573</td>
<td>6.593</td>
<td>69.1</td>
<td>2.4786</td>
<td>1</td>
<td>273</td>
<td>21.0</td>
<td>391.99</td>
<td>9.67</td>
<td>22.4</td>
</tr>
<tr>
<th>502</th>
<td>0.04527</td>
<td>0.0</td>
<td>11.93</td>
<td>0</td>
<td>0.573</td>
<td>6.120</td>
<td>76.7</td>
<td>2.2875</td>
<td>1</td>
<td>273</td>
<td>21.0</td>
<td>396.90</td>
<td>9.08</td>
<td>20.6</td>
</tr>
<tr>
<th>503</th>
<td>0.06076</td>
<td>0.0</td>
<td>11.93</td>
<td>0</td>
<td>0.573</td>
<td>6.976</td>
<td>91.0</td>
<td>2.1675</td>
<td>1</td>
<td>273</td>
<td>21.0</td>
<td>396.90</td>
<td>5.64</td>
<td>23.9</td>
</tr>
<tr>
<th>504</th>
<td>0.10959</td>
<td>0.0</td>
<td>11.93</td>
<td>0</td>
<td>0.573</td>
<td>6.794</td>
<td>89.3</td>
<td>2.3889</td>
<td>1</td>
<td>273</td>
<td>21.0</td>
<td>393.45</td>
<td>6.48</td>
<td>22.0</td>
</tr>
<tr>
<th>505</th>
<td>0.04741</td>
<td>0.0</td>
<td>11.93</td>
<td>0</td>
<td>0.573</td>
<td>6.030</td>
<td>80.8</td>
<td>2.5050</td>
<td>1</td>
<td>273</td>
<td>21.0</td>
<td>396.90</td>
<td>7.88</td>
<td>11.9</td>
</tr>
</tbody>
</table>
<p>506 rows × 14 columns</p>
</div>
### General Information about Dataset
- CRIM per capita crime rate by town
- ZN proportion of residential land zoned for lots over 25,000 sq.ft.
- INDUS proportion of non-retail business acres per town
- CHAS Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
- NOX nitric oxides concentration (parts per 10 million)
- RM average number of rooms per dwelling
- AGE proportion of owner-occupied units built prior to 1940
- DIS weighted distances to five Boston employment centres
- RAD index of accessibility to radial highways
- TAX full-value property-tax rate per 10,000usd
- PTRATIO pupil-teacher ratio by town
- B 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
- LSTAT % lower status of the population
```python
#display first 5 rows
df.head()
#to display first n rows
#df.head(n)
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>crim</th>
<th>zn</th>
<th>indus</th>
<th>chas</th>
<th>nox</th>
<th>rm</th>
<th>age</th>
<th>dis</th>
<th>rad</th>
<th>tax</th>
<th>ptratio</th>
<th>black</th>
<th>lstat</th>
<th>medv</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>0.00632</td>
<td>18.0</td>
<td>2.31</td>
<td>0</td>
<td>0.538</td>
<td>6.575</td>
<td>65.2</td>
<td>4.0900</td>
<td>1</td>
<td>296</td>
<td>15.3</td>
<td>396.90</td>
<td>4.98</td>
<td>24.0</td>
</tr>
<tr>
<th>1</th>
<td>0.02731</td>
<td>0.0</td>
<td>7.07</td>
<td>0</td>
<td>0.469</td>
<td>6.421</td>
<td>78.9</td>
<td>4.9671</td>
<td>2</td>
<td>242</td>
<td>17.8</td>
<td>396.90</td>
<td>9.14</td>
<td>21.6</td>
</tr>
<tr>
<th>2</th>
<td>0.02729</td>
<td>0.0</td>
<td>7.07</td>
<td>0</td>
<td>0.469</td>
<td>7.185</td>
<td>61.1</td>
<td>4.9671</td>
<td>2</td>
<td>242</td>
<td>17.8</td>
<td>392.83</td>
<td>4.03</td>
<td>34.7</td>
</tr>
<tr>
<th>3</th>
<td>0.03237</td>
<td>0.0</td>
<td>2.18</td>
<td>0</td>
<td>0.458</td>
<td>6.998</td>
<td>45.8</td>
<td>6.0622</td>
<td>3</td>
<td>222</td>
<td>18.7</td>
<td>394.63</td>
<td>2.94</td>
<td>33.4</td>
</tr>
<tr>
<th>4</th>
<td>0.06905</td>
<td>0.0</td>
<td>2.18</td>
<td>0</td>
<td>0.458</td>
<td>7.147</td>
<td>54.2</td>
<td>6.0622</td>
<td>3</td>
<td>222</td>
<td>18.7</td>
<td>396.90</td>
<td>5.33</td>
<td>36.2</td>
</tr>
</tbody>
</table>
</div>
```python
#display last 5 rows
df.tail()
#to display last n rows
#df.tail(n)
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>crim</th>
<th>zn</th>
<th>indus</th>
<th>chas</th>
<th>nox</th>
<th>rm</th>
<th>age</th>
<th>dis</th>
<th>rad</th>
<th>tax</th>
<th>ptratio</th>
<th>black</th>
<th>lstat</th>
<th>medv</th>
</tr>
</thead>
<tbody>
<tr>
<th>501</th>
<td>0.06263</td>
<td>0.0</td>
<td>11.93</td>
<td>0</td>
<td>0.573</td>
<td>6.593</td>
<td>69.1</td>
<td>2.4786</td>
<td>1</td>
<td>273</td>
<td>21.0</td>
<td>391.99</td>
<td>9.67</td>
<td>22.4</td>
</tr>
<tr>
<th>502</th>
<td>0.04527</td>
<td>0.0</td>
<td>11.93</td>
<td>0</td>
<td>0.573</td>
<td>6.120</td>
<td>76.7</td>
<td>2.2875</td>
<td>1</td>
<td>273</td>
<td>21.0</td>
<td>396.90</td>
<td>9.08</td>
<td>20.6</td>
</tr>
<tr>
<th>503</th>
<td>0.06076</td>
<td>0.0</td>
<td>11.93</td>
<td>0</td>
<td>0.573</td>
<td>6.976</td>
<td>91.0</td>
<td>2.1675</td>
<td>1</td>
<td>273</td>
<td>21.0</td>
<td>396.90</td>
<td>5.64</td>
<td>23.9</td>
</tr>
<tr>
<th>504</th>
<td>0.10959</td>
<td>0.0</td>
<td>11.93</td>
<td>0</td>
<td>0.573</td>
<td>6.794</td>
<td>89.3</td>
<td>2.3889</td>
<td>1</td>
<td>273</td>
<td>21.0</td>
<td>393.45</td>
<td>6.48</td>
<td>22.0</td>
</tr>
<tr>
<th>505</th>
<td>0.04741</td>
<td>0.0</td>
<td>11.93</td>
<td>0</td>
<td>0.573</td>
<td>6.030</td>
<td>80.8</td>
<td>2.5050</td>
<td>1</td>
<td>273</td>
<td>21.0</td>
<td>396.90</td>
<td>7.88</td>
<td>11.9</td>
</tr>
</tbody>
</table>
</div>
```python
#to display an sample row
df.sample()
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>crim</th>
<th>zn</th>
<th>indus</th>
<th>chas</th>
<th>nox</th>
<th>rm</th>
<th>age</th>
<th>dis</th>
<th>rad</th>
<th>tax</th>
<th>ptratio</th>
<th>black</th>
<th>lstat</th>
<th>medv</th>
</tr>
</thead>
<tbody>
<tr>
<th>285</th>
<td>0.01096</td>
<td>55.0</td>
<td>2.25</td>
<td>0</td>
<td>0.389</td>
<td>6.453</td>
<td>31.9</td>
<td>7.3073</td>
<td>1</td>
<td>300</td>
<td>15.3</td>
<td>394.72</td>
<td>8.23</td>
<td>22.0</td>
</tr>
</tbody>
</table>
</div>
```python
#to display n sample from the dataset
df.sample(3)
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>crim</th>
<th>zn</th>
<th>indus</th>
<th>chas</th>
<th>nox</th>
<th>rm</th>
<th>age</th>
<th>dis</th>
<th>rad</th>
<th>tax</th>
<th>ptratio</th>
<th>black</th>
<th>lstat</th>
<th>medv</th>
</tr>
</thead>
<tbody>
<tr>
<th>355</th>
<td>0.10659</td>
<td>80.0</td>
<td>1.91</td>
<td>0</td>
<td>0.413</td>
<td>5.936</td>
<td>19.5</td>
<td>10.5857</td>
<td>4</td>
<td>334</td>
<td>22.0</td>
<td>376.04</td>
<td>5.57</td>
<td>20.6</td>
</tr>
<tr>
<th>445</th>
<td>10.67180</td>
<td>0.0</td>
<td>18.10</td>
<td>0</td>
<td>0.740</td>
<td>6.459</td>
<td>94.8</td>
<td>1.9879</td>
<td>24</td>
<td>666</td>
<td>20.2</td>
<td>43.06</td>
<td>23.98</td>
<td>11.8</td>
</tr>
<tr>
<th>365</th>
<td>4.55587</td>
<td>0.0</td>
<td>18.10</td>
<td>0</td>
<td>0.718</td>
<td>3.561</td>
<td>87.9</td>
<td>1.6132</td>
<td>24</td>
<td>666</td>
<td>20.2</td>
<td>354.70</td>
<td>7.12</td>
<td>27.5</td>
</tr>
</tbody>
</table>
</div>
```python
#display shape of the dataset
#that is total columns and rows
df.shape
```
(506, 14)
```python
#display all columns
df.columns
```
Index(['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax',
'ptratio', 'black', 'lstat', 'medv'],
dtype='object')
```python
#basic information about dataset
df.info()
```
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 crim 506 non-null float64
1 zn 506 non-null float64
2 indus 506 non-null float64
3 chas 506 non-null int64
4 nox 506 non-null float64
5 rm 506 non-null float64
6 age 506 non-null float64
7 dis 506 non-null float64
8 rad 506 non-null int64
9 tax 506 non-null int64
10 ptratio 506 non-null float64
11 black 506 non-null float64
12 lstat 506 non-null float64
13 medv 506 non-null float64
dtypes: float64(11), int64(3)
memory usage: 55.5 KB
```python
#to get datatype of each column
df.dtypes
```
crim float64
zn float64
indus float64
chas int64
nox float64
rm float64
age float64
dis float64
rad int64
tax int64
ptratio float64
black float64
lstat float64
medv float64
dtype: object
```python
#statistical data about our dataset
df.describe()
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>crim</th>
<th>zn</th>
<th>indus</th>
<th>chas</th>
<th>nox</th>
<th>rm</th>
<th>age</th>
<th>dis</th>
<th>rad</th>
<th>tax</th>
<th>ptratio</th>
<th>black</th>
<th>lstat</th>
<th>medv</th>
</tr>
</thead>
<tbody>
<tr>
<th>count</th>
<td>506.000000</td>
<td>506.000000</td>
<td>506.000000</td>
<td>506.000000</td>
<td>506.000000</td>
<td>506.000000</td>
<td>506.000000</td>
<td>506.000000</td>
<td>506.000000</td>
<td>506.000000</td>
<td>506.000000</td>
<td>506.000000</td>
<td>506.000000</td>
<td>506.000000</td>
</tr>
<tr>
<th>mean</th>
<td>3.613524</td>
<td>11.363636</td>
<td>11.136779</td>
<td>0.069170</td>
<td>0.554695</td>
<td>6.284634</td>
<td>68.574901</td>
<td>3.795043</td>
<td>9.549407</td>
<td>408.237154</td>
<td>18.455534</td>
<td>356.674032</td>
<td>12.653063</td>
<td>22.532806</td>
</tr>
<tr>
<th>std</th>
<td>8.601545</td>
<td>23.322453</td>
<td>6.860353</td>
<td>0.253994</td>
<td>0.115878</td>
<td>0.702617</td>
<td>28.148861</td>
<td>2.105710</td>
<td>8.707259</td>
<td>168.537116</td>
<td>2.164946</td>
<td>91.294864</td>
<td>7.141062</td>
<td>9.197104</td>
</tr>
<tr>
<th>min</th>
<td>0.006320</td>
<td>0.000000</td>
<td>0.460000</td>
<td>0.000000</td>
<td>0.385000</td>
<td>3.561000</td>
<td>2.900000</td>
<td>1.129600</td>
<td>1.000000</td>
<td>187.000000</td>
<td>12.600000</td>
<td>0.320000</td>
<td>1.730000</td>
<td>5.000000</td>
</tr>
<tr>
<th>25%</th>
<td>0.082045</td>
<td>0.000000</td>
<td>5.190000</td>
<td>0.000000</td>
<td>0.449000</td>
<td>5.885500</td>
<td>45.025000</td>
<td>2.100175</td>
<td>4.000000</td>
<td>279.000000</td>
<td>17.400000</td>
<td>375.377500</td>
<td>6.950000</td>
<td>17.025000</td>
</tr>
<tr>
<th>50%</th>
<td>0.256510</td>
<td>0.000000</td>
<td>9.690000</td>
<td>0.000000</td>
<td>0.538000</td>
<td>6.208500</td>
<td>77.500000</td>
<td>3.207450</td>
<td>5.000000</td>
<td>330.000000</td>
<td>19.050000</td>
<td>391.440000</td>
<td>11.360000</td>
<td>21.200000</td>
</tr>
<tr>
<th>75%</th>
<td>3.677083</td>
<td>12.500000</td>
<td>18.100000</td>
<td>0.000000</td>
<td>0.624000</td>
<td>6.623500</td>
<td>94.075000</td>
<td>5.188425</td>
<td>24.000000</td>
<td>666.000000</td>
<td>20.200000</td>
<td>396.225000</td>
<td>16.955000</td>
<td>25.000000</td>
</tr>
<tr>
<th>max</th>
<td>88.976200</td>
<td>100.000000</td>
<td>27.740000</td>
<td>1.000000</td>
<td>0.871000</td>
<td>8.780000</td>
<td>100.000000</td>
<td>12.126500</td>
<td>24.000000</td>
<td>711.000000</td>
<td>22.000000</td>
<td>396.900000</td>
<td>37.970000</td>
<td>50.000000</td>
</tr>
</tbody>
</table>
</div>
### Data Cleaning
- Check for Null Values
- Check for Duplicates Values/Rows
- Check for outliers
```python
#check for null values in dataset
#check for missing values
df.isnull()
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>crim</th>
<th>zn</th>
<th>indus</th>
<th>chas</th>
<th>nox</th>
<th>rm</th>
<th>age</th>
<th>dis</th>
<th>rad</th>
<th>tax</th>
<th>ptratio</th>
<th>black</th>
<th>lstat</th>
<th>medv</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
</tr>
<tr>
<th>1</th>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
</tr>
<tr>
<th>2</th>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
</tr>
<tr>
<th>3</th>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
</tr>
<tr>
<th>4</th>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
</tr>
<tr>
<th>...</th>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
</tr>
<tr>
<th>501</th>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
</tr>
<tr>
<th>502</th>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
</tr>
<tr>
<th>503</th>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
</tr>
<tr>
<th>504</th>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
</tr>
<tr>
<th>505</th>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
</tr>
</tbody>
</table>
<p>506 rows × 14 columns</p>
</div>
```python
#sum of null values column-wise
df.isnull().sum()
```
crim 0
zn 0
indus 0
chas 0
nox 0
rm 0
age 0
dis 0
rad 0
tax 0
ptratio 0
black 0
lstat 0
medv 0
dtype: int64
```python
#total count of null values in the dataset
df.isnull().sum().sum()
```
np.int64(0)
```python
#check for duplicate rows
df.duplicated()
```
0 False
1 False
2 False
3 False
4 False
...
501 False
502 False
503 False
504 False
505 False
Length: 506, dtype: bool
```python
#sum of duplicated rows in the dataset
df.duplicated().sum()
```
np.int64(0)
# Data Visualization
```python
#Histogram
#distribution of all features
df.hist(figsize=(18,14),color = "y")
plt.show()
```

```python
#Relationship between number of rooms and house price
#plotting an scatter plot
x = df['rm']
y = df['medv']
plt.scatter(x,y,color = "r",marker = "*",label = "Data Points")
plt.xlabel('No. of Rooms',fontsize = "13")
plt.ylabel('House Price',fontsize = "13")
plt.title("Relationship Plot",fontsize = "13")
plt.legend()
plt.show()
```

# Check for outlier using box-plot
for i in df.columns:
if df[i].dtype != "object":
plt.boxplot(df[i])
plt.xlabel(i)
plt.show()
```python
#Check for outlier using box-plot
#Box-Plot is for Outlier Detection
plt.figure(figsize=(12,6))
sns.boxplot(data=df)
plt.xticks(rotation = 0)
#plt.legend()
plt.show()
```

```python
# Correlation between features
plt.figure(figsize=(12,6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()
```

### Feature Selection
```python
#Separate festures (X) and target (y)
#X = Independent Variables/ Input Columns
#y = Dependent Variable/Target Column
X = df.drop(columns = 'medv')
y = df['medv']
```
```python
#List of Input Columns/Independent Variables
X.columns
```
Index(['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax',
'ptratio', 'black', 'lstat'],
dtype='object')
```python
#top 5 rows of input columns
X.head()
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>crim</th>
<th>zn</th>
<th>indus</th>
<th>chas</th>
<th>nox</th>
<th>rm</th>
<th>age</th>
<th>dis</th>
<th>rad</th>
<th>tax</th>
<th>ptratio</th>
<th>black</th>
<th>lstat</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>0.00632</td>
<td>18.0</td>
<td>2.31</td>
<td>0</td>
<td>0.538</td>
<td>6.575</td>
<td>65.2</td>
<td>4.0900</td>
<td>1</td>
<td>296</td>
<td>15.3</td>
<td>396.90</td>
<td>4.98</td>
</tr>
<tr>
<th>1</th>
<td>0.02731</td>
<td>0.0</td>
<td>7.07</td>
<td>0</td>
<td>0.469</td>
<td>6.421</td>
<td>78.9</td>
<td>4.9671</td>
<td>2</td>
<td>242</td>
<td>17.8</td>
<td>396.90</td>
<td>9.14</td>
</tr>
<tr>
<th>2</th>
<td>0.02729</td>
<td>0.0</td>
<td>7.07</td>
<td>0</td>
<td>0.469</td>
<td>7.185</td>
<td>61.1</td>
<td>4.9671</td>
<td>2</td>
<td>242</td>
<td>17.8</td>
<td>392.83</td>
<td>4.03</td>
</tr>
<tr>
<th>3</th>
<td>0.03237</td>
<td>0.0</td>
<td>2.18</td>
<td>0</td>
<td>0.458</td>
<td>6.998</td>
<td>45.8</td>
<td>6.0622</td>
<td>3</td>
<td>222</td>
<td>18.7</td>
<td>394.63</td>
<td>2.94</td>
</tr>
<tr>
<th>4</th>
<td>0.06905</td>
<td>0.0</td>
<td>2.18</td>
<td>0</td>
<td>0.458</td>
<td>7.147</td>
<td>54.2</td>
<td>6.0622</td>
<td>3</td>
<td>222</td>
<td>18.7</td>
<td>396.90</td>
<td>5.33</td>
</tr>
</tbody>
</table>
</div>
```python
#last 5 rows of input columns
X.tail()
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>crim</th>
<th>zn</th>
<th>indus</th>
<th>chas</th>
<th>nox</th>
<th>rm</th>
<th>age</th>
<th>dis</th>
<th>rad</th>
<th>tax</th>
<th>ptratio</th>
<th>black</th>
<th>lstat</th>
</tr>
</thead>
<tbody>
<tr>
<th>501</th>
<td>0.06263</td>
<td>0.0</td>
<td>11.93</td>
<td>0</td>
<td>0.573</td>
<td>6.593</td>
<td>69.1</td>
<td>2.4786</td>
<td>1</td>
<td>273</td>
<td>21.0</td>
<td>391.99</td>
<td>9.67</td>
</tr>
<tr>
<th>502</th>
<td>0.04527</td>
<td>0.0</td>
<td>11.93</td>
<td>0</td>
<td>0.573</td>
<td>6.120</td>
<td>76.7</td>
<td>2.2875</td>
<td>1</td>
<td>273</td>
<td>21.0</td>
<td>396.90</td>
<td>9.08</td>
</tr>
<tr>
<th>503</th>
<td>0.06076</td>
<td>0.0</td>
<td>11.93</td>
<td>0</td>
<td>0.573</td>
<td>6.976</td>
<td>91.0</td>
<td>2.1675</td>
<td>1</td>
<td>273</td>
<td>21.0</td>
<td>396.90</td>
<td>5.64</td>
</tr>
<tr>
<th>504</th>
<td>0.10959</td>
<td>0.0</td>
<td>11.93</td>
<td>0</td>
<td>0.573</td>
<td>6.794</td>
<td>89.3</td>
<td>2.3889</td>
<td>1</td>
<td>273</td>
<td>21.0</td>
<td>393.45</td>
<td>6.48</td>
</tr>
<tr>
<th>505</th>
<td>0.04741</td>
<td>0.0</td>
<td>11.93</td>
<td>0</td>
<td>0.573</td>
<td>6.030</td>
<td>80.8</td>
<td>2.5050</td>
<td>1</td>
<td>273</td>
<td>21.0</td>
<td>396.90</td>
<td>7.88</td>
</tr>
</tbody>
</table>
</div>
```python
#top 5 rows of target columns
y.head()
```
0 24.0
1 21.6
2 34.7
3 33.4
4 36.2
Name: medv, dtype: float64
```python
#last 5 rows of target columns
y.tail()
```
501 22.4
502 20.6
503 23.9
504 22.0
505 11.9
Name: medv, dtype: float64
# Train-Test-Split
```python
#Split the data into training and testing data
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 42)
```
```python
#first 5 rows of input training data
X_train.head()
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>crim</th>
<th>zn</th>
<th>indus</th>
<th>chas</th>
<th>nox</th>
<th>rm</th>
<th>age</th>
<th>dis</th>
<th>rad</th>
<th>tax</th>
<th>ptratio</th>
<th>black</th>
<th>lstat</th>
</tr>
</thead>
<tbody>
<tr>
<th>477</th>
<td>15.02340</td>
<td>0.0</td>
<td>18.10</td>
<td>0</td>
<td>0.6140</td>
<td>5.304</td>
<td>97.3</td>
<td>2.1007</td>
<td>24</td>
<td>666</td>
<td>20.2</td>
<td>349.48</td>
<td>24.91</td>
</tr>
<tr>
<th>15</th>
<td>0.62739</td>
<td>0.0</td>
<td>8.14</td>
<td>0</td>
<td>0.5380</td>
<td>5.834</td>
<td>56.5</td>
<td>4.4986</td>
<td>4</td>
<td>307</td>
<td>21.0</td>
<td>395.62</td>
<td>8.47</td>
</tr>
<tr>
<th>332</th>
<td>0.03466</td>
<td>35.0</td>
<td>6.06</td>
<td>0</td>
<td>0.4379</td>
<td>6.031</td>
<td>23.3</td>
<td>6.6407</td>
<td>1</td>
<td>304</td>
<td>16.9</td>
<td>362.25</td>
<td>7.83</td>
</tr>
<tr>
<th>423</th>
<td>7.05042</td>
<td>0.0</td>
<td>18.10</td>
<td>0</td>
<td>0.6140</td>
<td>6.103</td>
<td>85.1</td>
<td>2.0218</td>
<td>24</td>
<td>666</td>
<td>20.2</td>
<td>2.52</td>
<td>23.29</td>
</tr>
<tr>
<th>19</th>
<td>0.72580</td>
<td>0.0</td>
<td>8.14</td>
<td>0</td>
<td>0.5380</td>
<td>5.727</td>
<td>69.5</td>
<td>3.7965</td>
<td>4</td>
<td>307</td>
<td>21.0</td>
<td>390.95</td>
<td>11.28</td>
</tr>
</tbody>
</table>
</div>
```python
#first 5 rows of input testing data
X_test.head()
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>crim</th>
<th>zn</th>
<th>indus</th>
<th>chas</th>
<th>nox</th>
<th>rm</th>
<th>age</th>
<th>dis</th>
<th>rad</th>
<th>tax</th>
<th>ptratio</th>
<th>black</th>
<th>lstat</th>
</tr>
</thead>
<tbody>
<tr>
<th>173</th>
<td>0.09178</td>
<td>0.0</td>
<td>4.05</td>
<td>0</td>
<td>0.510</td>
<td>6.416</td>
<td>84.1</td>
<td>2.6463</td>
<td>5</td>
<td>296</td>
<td>16.6</td>
<td>395.50</td>
<td>9.04</td>
</tr>
<tr>
<th>274</th>
<td>0.05644</td>
<td>40.0</td>
<td>6.41</td>
<td>1</td>
<td>0.447</td>
<td>6.758</td>
<td>32.9</td>
<td>4.0776</td>
<td>4</td>
<td>254</td>
<td>17.6</td>
<td>396.90</td>
<td>3.53</td>
</tr>
<tr>
<th>491</th>
<td>0.10574</td>
<td>0.0</td>
<td>27.74</td>
<td>0</td>
<td>0.609</td>
<td>5.983</td>
<td>98.8</td>
<td>1.8681</td>
<td>4</td>
<td>711</td>
<td>20.1</td>
<td>390.11</td>
<td>18.07</td>
</tr>
<tr>
<th>72</th>
<td>0.09164</td>
<td>0.0</td>
<td>10.81</td>
<td>0</td>
<td>0.413</td>
<td>6.065</td>
<td>7.8</td>
<td>5.2873</td>
<td>4</td>
<td>305</td>
<td>19.2</td>
<td>390.91</td>
<td>5.52</td>
</tr>
<tr>
<th>452</th>
<td>5.09017</td>
<td>0.0</td>
<td>18.10</td>
<td>0</td>
<td>0.713</td>
<td>6.297</td>
<td>91.8</td>
<td>2.3682</td>
<td>24</td>
<td>666</td>
<td>20.2</td>
<td>385.09</td>
<td>17.27</td>
</tr>
</tbody>
</table>
</div>
```python
#first 5 rows of output/target training data
y_train.head()
```
477 12.0
15 19.9
332 19.4
423 13.4
19 18.2
Name: medv, dtype: float64
```python
#first 5 rows of output/target testing data
y_test.head()
```
173 23.6
274 32.4
491 13.6
72 22.8
452 16.1
Name: medv, dtype: float64
# Model Training
```python
#training the data by passing the training data (X_train,y_train)
model = LinearRegression()
model.fit(X_train,y_train)
#here our model is trained and our best-fit line is ready
```
<style>#sk-container-id-1 {
/* Definition of color scheme common for light and dark mode */
--sklearn-color-text: #000;
--sklearn-color-text-muted: #666;
--sklearn-color-line: gray;
/* Definition of color scheme for unfitted estimators */
--sklearn-color-unfitted-level-0: #fff5e6;
--sklearn-color-unfitted-level-1: #f6e4d2;
--sklearn-color-unfitted-level-2: #ffe0b3;
--sklearn-color-unfitted-level-3: chocolate;
/* Definition of color scheme for fitted estimators */
--sklearn-color-fitted-level-0: #f0f8ff;
--sklearn-color-fitted-level-1: #d4ebff;
--sklearn-color-fitted-level-2: #b3dbfd;
--sklearn-color-fitted-level-3: cornflowerblue;
}
#sk-container-id-1.light {
/* Specific color for light theme */
--sklearn-color-text-on-default-background: black;
--sklearn-color-background: white;
--sklearn-color-border-box: black;
--sklearn-color-icon: #696969;
}
#sk-container-id-1.dark {
--sklearn-color-text-on-default-background: white;
--sklearn-color-background: #111;
--sklearn-color-border-box: white;
--sklearn-color-icon: #878787;
}
#sk-container-id-1 {
color: var(--sklearn-color-text);
}
#sk-container-id-1 pre {
padding: 0;
}
#sk-container-id-1 input.sk-hidden--visually {
border: 0;
clip: rect(1px 1px 1px 1px);
clip: rect(1px, 1px, 1px, 1px);
height: 1px;
margin: -1px;
overflow: hidden;
padding: 0;
position: absolute;
width: 1px;
}
#sk-container-id-1 div.sk-dashed-wrapped {
border: 1px dashed var(--sklearn-color-line);
margin: 0 0.4em 0.5em 0.4em;
box-sizing: border-box;
padding-bottom: 0.4em;
background-color: var(--sklearn-color-background);
}
#sk-container-id-1 div.sk-container {
/* jupyter's `normalize.less` sets `[hidden] { display: none; }`
but bootstrap.min.css set `[hidden] { display: none !important; }`
so we also need the `!important` here to be able to override the
default hidden behavior on the sphinx rendered scikit-learn.org.
See: https://github.com/scikit-learn/scikit-learn/issues/21755 */
display: inline-block !important;
position: relative;
}
#sk-container-id-1 div.sk-text-repr-fallback {
display: none;
}
div.sk-parallel-item,
div.sk-serial,
div.sk-item {
/* draw centered vertical line to link estimators */
background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));
background-size: 2px 100%;
background-repeat: no-repeat;
background-position: center center;
}
/* Parallel-specific style estimator block */
#sk-container-id-1 div.sk-parallel-item::after {
content: "";
width: 100%;
border-bottom: 2px solid var(--sklearn-color-text-on-default-background);
flex-grow: 1;
}
#sk-container-id-1 div.sk-parallel {
display: flex;
align-items: stretch;
justify-content: center;
background-color: var(--sklearn-color-background);
position: relative;
}
#sk-container-id-1 div.sk-parallel-item {
display: flex;
flex-direction: column;
}
#sk-container-id-1 div.sk-parallel-item:first-child::after {
align-self: flex-end;
width: 50%;
}
#sk-container-id-1 div.sk-parallel-item:last-child::after {
align-self: flex-start;
width: 50%;
}
#sk-container-id-1 div.sk-parallel-item:only-child::after {
width: 0;
}
/* Serial-specific style estimator block */
#sk-container-id-1 div.sk-serial {
display: flex;
flex-direction: column;
align-items: center;
background-color: var(--sklearn-color-background);
padding-right: 1em;
padding-left: 1em;
}
/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is
clickable and can be expanded/collapsed.
- Pipeline and ColumnTransformer use this feature and define the default style
- Estimators will overwrite some part of the style using the `sk-estimator` class
*/
/* Pipeline and ColumnTransformer style (default) */
#sk-container-id-1 div.sk-toggleable {
/* Default theme specific background. It is overwritten whether we have a
specific estimator or a Pipeline/ColumnTransformer */
background-color: var(--sklearn-color-background);
}
/* Toggleable label */
#sk-container-id-1 label.sk-toggleable__label {
cursor: pointer;
display: flex;
width: 100%;
margin-bottom: 0;
padding: 0.5em;
box-sizing: border-box;
text-align: center;
align-items: center;
justify-content: center;
gap: 0.5em;
}
#sk-container-id-1 label.sk-toggleable__label .caption {
font-size: 0.6rem;
font-weight: lighter;
color: var(--sklearn-color-text-muted);
}
#sk-container-id-1 label.sk-toggleable__label-arrow:before {
/* Arrow on the left of the label */
content: "▸";
float: left;
margin-right: 0.25em;
color: var(--sklearn-color-icon);
}
#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {
color: var(--sklearn-color-text);
}
/* Toggleable content - dropdown */
#sk-container-id-1 div.sk-toggleable__content {
display: none;
text-align: left;
/* unfitted */
background-color: var(--sklearn-color-unfitted-level-0);
}
#sk-container-id-1 div.sk-toggleable__content.fitted {
/* fitted */
background-color: var(--sklearn-color-fitted-level-0);
}
#sk-container-id-1 div.sk-toggleable__content pre {
margin: 0.2em;
border-radius: 0.25em;
color: var(--sklearn-color-text);
/* unfitted */
background-color: var(--sklearn-color-unfitted-level-0);
}
#sk-container-id-1 div.sk-toggleable__content.fitted pre {
/* unfitted */
background-color: var(--sklearn-color-fitted-level-0);
}
#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {
/* Expand drop-down */
display: block;
width: 100%;
overflow: visible;
}
#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {
content: "▾";
}
/* Pipeline/ColumnTransformer-specific style */
#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {
color: var(--sklearn-color-text);
background-color: var(--sklearn-color-unfitted-level-2);
}
#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {
background-color: var(--sklearn-color-fitted-level-2);
}
/* Estimator-specific style */
/* Colorize estimator box */
#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {
/* unfitted */
background-color: var(--sklearn-color-unfitted-level-2);
}
#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {
/* fitted */
background-color: var(--sklearn-color-fitted-level-2);
}
#sk-container-id-1 div.sk-label label.sk-toggleable__label,
#sk-container-id-1 div.sk-label label {
/* The background is the default theme color */
color: var(--sklearn-color-text-on-default-background);
}
/* On hover, darken the color of the background */
#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {
color: var(--sklearn-color-text);
background-color: var(--sklearn-color-unfitted-level-2);
}
/* Label box, darken color on hover, fitted */
#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {
color: var(--sklearn-color-text);
background-color: var(--sklearn-color-fitted-level-2);
}
/* Estimator label */
#sk-container-id-1 div.sk-label label {
font-family: monospace;
font-weight: bold;
line-height: 1.2em;
}
#sk-container-id-1 div.sk-label-container {
text-align: center;
}
/* Estimator-specific */
#sk-container-id-1 div.sk-estimator {
font-family: monospace;
border: 1px dotted var(--sklearn-color-border-box);
border-radius: 0.25em;
box-sizing: border-box;
margin-bottom: 0.5em;
/* unfitted */
background-color: var(--sklearn-color-unfitted-level-0);
}
#sk-container-id-1 div.sk-estimator.fitted {
/* fitted */
background-color: var(--sklearn-color-fitted-level-0);
}
/* on hover */
#sk-container-id-1 div.sk-estimator:hover {
/* unfitted */
background-color: var(--sklearn-color-unfitted-level-2);
}
#sk-container-id-1 div.sk-estimator.fitted:hover {
/* fitted */
background-color: var(--sklearn-color-fitted-level-2);
}
/* Specification for estimator info (e.g. "i" and "?") */
/* Common style for "i" and "?" */
.sk-estimator-doc-link,
a:link.sk-estimator-doc-link,
a:visited.sk-estimator-doc-link {
float: right;
font-size: smaller;
line-height: 1em;
font-family: monospace;
background-color: var(--sklearn-color-unfitted-level-0);
border-radius: 1em;
height: 1em;
width: 1em;
text-decoration: none !important;
margin-left: 0.5em;
text-align: center;
/* unfitted */
border: var(--sklearn-color-unfitted-level-3) 1pt solid;
color: var(--sklearn-color-unfitted-level-3);
}
.sk-estimator-doc-link.fitted,
a:link.sk-estimator-doc-link.fitted,
a:visited.sk-estimator-doc-link.fitted {
/* fitted */
background-color: var(--sklearn-color-fitted-level-0);
border: var(--sklearn-color-fitted-level-3) 1pt solid;
color: var(--sklearn-color-fitted-level-3);
}
/* On hover */
div.sk-estimator:hover .sk-estimator-doc-link:hover,
.sk-estimator-doc-link:hover,
div.sk-label-container:hover .sk-estimator-doc-link:hover,
.sk-estimator-doc-link:hover {
/* unfitted */
background-color: var(--sklearn-color-unfitted-level-3);
border: var(--sklearn-color-fitted-level-0) 1pt solid;
color: var(--sklearn-color-unfitted-level-0);
text-decoration: none;
}
div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,
.sk-estimator-doc-link.fitted:hover,
div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,
.sk-estimator-doc-link.fitted:hover {
/* fitted */
background-color: var(--sklearn-color-fitted-level-3);
border: var(--sklearn-color-fitted-level-0) 1pt solid;
color: var(--sklearn-color-fitted-level-0);
text-decoration: none;
}
/* Span, style for the box shown on hovering the info icon */
.sk-estimator-doc-link span {
display: none;
z-index: 9999;
position: relative;
font-weight: normal;
right: .2ex;
padding: .5ex;
margin: .5ex;
width: min-content;
min-width: 20ex;
max-width: 50ex;
color: var(--sklearn-color-text);
box-shadow: 2pt 2pt 4pt #999;
/* unfitted */
background: var(--sklearn-color-unfitted-level-0);
border: .5pt solid var(--sklearn-color-unfitted-level-3);
}
.sk-estimator-doc-link.fitted span {
/* fitted */
background: var(--sklearn-color-fitted-level-0);
border: var(--sklearn-color-fitted-level-3);
}
.sk-estimator-doc-link:hover span {
display: block;
}
/* "?"-specific style due to the `<a>` HTML tag */
#sk-container-id-1 a.estimator_doc_link {
float: right;
font-size: 1rem;
line-height: 1em;
font-family: monospace;
background-color: var(--sklearn-color-unfitted-level-0);
border-radius: 1rem;
height: 1rem;
width: 1rem;
text-decoration: none;
/* unfitted */
color: var(--sklearn-color-unfitted-level-1);
border: var(--sklearn-color-unfitted-level-1) 1pt solid;
}
#sk-container-id-1 a.estimator_doc_link.fitted {
/* fitted */
background-color: var(--sklearn-color-fitted-level-0);
border: var(--sklearn-color-fitted-level-1) 1pt solid;
color: var(--sklearn-color-fitted-level-1);
}
/* On hover */
#sk-container-id-1 a.estimator_doc_link:hover {
/* unfitted */
background-color: var(--sklearn-color-unfitted-level-3);
color: var(--sklearn-color-background);
text-decoration: none;
}
#sk-container-id-1 a.estimator_doc_link.fitted:hover {
/* fitted */
background-color: var(--sklearn-color-fitted-level-3);
}
.estimator-table {
font-family: monospace;
}
.estimator-table summary {
padding: .5rem;
cursor: pointer;
}
.estimator-table summary::marker {
font-size: 0.7rem;
}
.estimator-table details[open] {
padding-left: 0.1rem;
padding-right: 0.1rem;
padding-bottom: 0.3rem;
}
.estimator-table .parameters-table {
margin-left: auto !important;
margin-right: auto !important;
margin-top: 0;
}
.estimator-table .parameters-table tr:nth-child(odd) {
background-color: #fff;
}
.estimator-table .parameters-table tr:nth-child(even) {
background-color: #f6f6f6;
}
.estimator-table .parameters-table tr:hover {
background-color: #e0e0e0;
}
.estimator-table table td {
border: 1px solid rgba(106, 105, 104, 0.232);
}
/*
`table td`is set in notebook with right text-align.
We need to overwrite it.
*/
.estimator-table table td.param {
text-align: left;
position: relative;
padding: 0;
}
.user-set td {
color:rgb(255, 94, 0);
text-align: left !important;
}
.user-set td.value {
color:rgb(255, 94, 0);
background-color: transparent;
}
.default td {
color: black;
text-align: left !important;
}
.user-set td i,
.default td i {
color: black;
}
/*
Styles for parameter documentation links
We need styling for visited so jupyter doesn't overwrite it
*/
a.param-doc-link,
a.param-doc-link:link,
a.param-doc-link:visited {
text-decoration: underline dashed;
text-underline-offset: .3em;
color: inherit;
display: block;
padding: .5em;
}
/* "hack" to make the entire area of the cell containing the link clickable */
a.param-doc-link::before {
position: absolute;
content: "";
inset: 0;
}
.param-doc-description {
display: none;
position: absolute;
z-index: 9999;
left: 0;
padding: .5ex;
margin-left: 1.5em;
color: var(--sklearn-color-text);
box-shadow: .3em .3em .4em #999;
width: max-content;
text-align: left;
max-height: 10em;
overflow-y: auto;
/* unfitted */
background: var(--sklearn-color-unfitted-level-0);
border: thin solid var(--sklearn-color-unfitted-level-3);
}
/* Fitted state for parameter tooltips */
.fitted .param-doc-description {
/* fitted */
background: var(--sklearn-color-fitted-level-0);
border: thin solid var(--sklearn-color-fitted-level-3);
}
.param-doc-link:hover .param-doc-description {
display: block;
}
.copy-paste-icon {
background-image: url(data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA0NDggNTEyIj48IS0tIUZvbnQgQXdlc29tZSBGcmVlIDYuNy4yIGJ5IEBmb250YXdlc29tZSAtIGh0dHBzOi8vZm9udGF3ZXNvbWUuY29tIExpY2Vuc2UgLSBodHRwczovL2ZvbnRhd2Vzb21lLmNvbS9saWNlbnNlL2ZyZWUgQ29weXJpZ2h0IDIwMjUgRm9udGljb25zLCBJbmMuLS0+PHBhdGggZD0iTTIwOCAwTDMzMi4xIDBjMTIuNyAwIDI0LjkgNS4xIDMzLjkgMTQuMWw2Ny45IDY3LjljOSA5IDE0LjEgMjEuMiAxNC4xIDMzLjlMNDQ4IDMzNmMwIDI2LjUtMjEuNSA0OC00OCA0OGwtMTkyIDBjLTI2LjUgMC00OC0yMS41LTQ4LTQ4bDAtMjg4YzAtMjYuNSAyMS41LTQ4IDQ4LTQ4ek00OCAxMjhsODAgMCAwIDY0LTY0IDAgMCAyNTYgMTkyIDAgMC0zMiA2NCAwIDAgNDhjMCAyNi41LTIxLjUgNDgtNDggNDhMNDggNTEyYy0yNi41IDAtNDgtMjEuNS00OC00OEwwIDE3NmMwLTI2LjUgMjEuNS00OCA0OC00OHoiLz48L3N2Zz4=);
background-repeat: no-repeat;
background-size: 14px 14px;
background-position: 0;
display: inline-block;
width: 14px;
height: 14px;
cursor: pointer;
}
</style><body><div id="sk-container-id-1" class="sk-top-container"><div class="sk-text-repr-fallback"><pre>LinearRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class="sk-container" hidden><div class="sk-item"><div class="sk-estimator fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-1" type="checkbox" checked><label for="sk-estimator-id-1" class="sk-toggleable__label fitted sk-toggleable__label-arrow"><div><div>LinearRegression</div></div><div><a class="sk-estimator-doc-link fitted" rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.LinearRegression.html">?<span>Documentation for LinearRegression</span></a><span class="sk-estimator-doc-link fitted">i<span>Fitted</span></span></div></label><div class="sk-toggleable__content fitted" data-param-prefix="">
<div class="estimator-table">
<details>
<summary>Parameters</summary>
<table class="parameters-table">
<tbody>
<tr class="default">
<td><i class="copy-paste-icon"
onclick="copyToClipboard('fit_intercept',
this.parentElement.nextElementSibling)"
></i></td>
<td class="param">
<a class="param-doc-link"
rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.LinearRegression.html#:~:text=fit_intercept,-bool%2C%20default%3DTrue">
fit_intercept
<span class="param-doc-description">fit_intercept: bool, default=True<br><br>Whether to calculate the intercept for this model. If set<br>to False, no intercept will be used in calculations<br>(i.e. data is expected to be centered).</span>
</a>
</td>
<td class="value">True</td>
</tr>
<tr class="default">
<td><i class="copy-paste-icon"
onclick="copyToClipboard('copy_X',
this.parentElement.nextElementSibling)"
></i></td>
<td class="param">
<a class="param-doc-link"
rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.LinearRegression.html#:~:text=copy_X,-bool%2C%20default%3DTrue">
copy_X
<span class="param-doc-description">copy_X: bool, default=True<br><br>If True, X will be copied; else, it may be overwritten.</span>
</a>
</td>
<td class="value">True</td>
</tr>
<tr class="default">
<td><i class="copy-paste-icon"
onclick="copyToClipboard('tol',
this.parentElement.nextElementSibling)"
></i></td>
<td class="param">
<a class="param-doc-link"
rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.LinearRegression.html#:~:text=tol,-float%2C%20default%3D1e-6">
tol
<span class="param-doc-description">tol: float, default=1e-6<br><br>The precision of the solution (`coef_`) is determined by `tol` which<br>specifies a different convergence criterion for the `lsqr` solver.<br>`tol` is set as `atol` and `btol` of :func:`scipy.sparse.linalg.lsqr` when<br>fitting on sparse training data. This parameter has no effect when fitting<br>on dense data.<br><br>.. versionadded:: 1.7</span>
</a>
</td>
<td class="value">1e-06</td>
</tr>
<tr class="default">
<td><i class="copy-paste-icon"
onclick="copyToClipboard('n_jobs',
this.parentElement.nextElementSibling)"
></i></td>
<td class="param">
<a class="param-doc-link"
rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.LinearRegression.html#:~:text=n_jobs,-int%2C%20default%3DNone">
n_jobs
<span class="param-doc-description">n_jobs: int, default=None<br><br>The number of jobs to use for the computation. This will only provide<br>speedup in case of sufficiently large problems, that is if firstly<br>`n_targets > 1` and secondly `X` is sparse or if `positive` is set<br>to `True`. ``None`` means 1 unless in a<br>:obj:`joblib.parallel_backend` context. ``-1`` means using all<br>processors. See :term:`Glossary <n_jobs>` for more details.</span>
</a>
</td>
<td class="value">None</td>
</tr>
<tr class="default">
<td><i class="copy-paste-icon"
onclick="copyToClipboard('positive',
this.parentElement.nextElementSibling)"
></i></td>
<td class="param">
<a class="param-doc-link"
rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.LinearRegression.html#:~:text=positive,-bool%2C%20default%3DFalse">
positive
<span class="param-doc-description">positive: bool, default=False<br><br>When set to ``True``, forces the coefficients to be positive. This<br>option is only supported for dense arrays.<br><br>For a comparison between a linear regression model with positive constraints<br>on the regression coefficients and a linear regression without such constraints,<br>see :ref:`sphx_glr_auto_examples_linear_model_plot_nnls.py`.<br><br>.. versionadded:: 0.24</span>
</a>
</td>
<td class="value">False</td>
</tr>
</tbody>
</table>
</details>
</div>
</div></div></div></div></div><script>function copyToClipboard(text, element) {
// Get the parameter prefix from the closest toggleable content
const toggleableContent = element.closest('.sk-toggleable__content');
const paramPrefix = toggleableContent ? toggleableContent.dataset.paramPrefix : '';
const fullParamName = paramPrefix ? `${paramPrefix}${text}` : text;
const originalStyle = element.style;
const computedStyle = window.getComputedStyle(element);
const originalWidth = computedStyle.width;
const originalHTML = element.innerHTML.replace('Copied!', '');
navigator.clipboard.writeText(fullParamName)
.then(() => {
element.style.width = originalWidth;
element.style.color = 'green';
element.innerHTML = "Copied!";
setTimeout(() => {
element.innerHTML = originalHTML;
element.style = originalStyle;
}, 2000);
})
.catch(err => {
console.error('Failed to copy:', err);
element.style.color = 'red';
element.innerHTML = "Failed!";
setTimeout(() => {
element.innerHTML = originalHTML;
element.style = originalStyle;
}, 2000);
});
return false;
}
document.querySelectorAll('.copy-paste-icon').forEach(function(element) {
const toggleableContent = element.closest('.sk-toggleable__content');
const paramPrefix = toggleableContent ? toggleableContent.dataset.paramPrefix : '';
const paramName = element.parentElement.nextElementSibling
.textContent.trim().split(' ')[0];
const fullParamName = paramPrefix ? `${paramPrefix}${paramName}` : paramName;
element.setAttribute('title', fullParamName);
});
/**
* Adapted from Skrub
* https://github.com/skrub-data/skrub/blob/403466d1d5d4dc76a7ef569b3f8228db59a31dc3/skrub/_reporting/_data/templates/report.js#L789
* @returns "light" or "dark"
*/
function detectTheme(element) {
const body = document.querySelector('body');
// Check VSCode theme
const themeKindAttr = body.getAttribute('data-vscode-theme-kind');
const themeNameAttr = body.getAttribute('data-vscode-theme-name');
if (themeKindAttr && themeNameAttr) {
const themeKind = themeKindAttr.toLowerCase();
const themeName = themeNameAttr.toLowerCase();
if (themeKind.includes("dark") || themeName.includes("dark")) {
return "dark";
}
if (themeKind.includes("light") || themeName.includes("light")) {
return "light";
}
}
// Check Jupyter theme
if (body.getAttribute('data-jp-theme-light') === 'false') {
return 'dark';
} else if (body.getAttribute('data-jp-theme-light') === 'true') {
return 'light';
}
// Guess based on a parent element's color
const color = window.getComputedStyle(element.parentNode, null).getPropertyValue('color');
const match = color.match(/^rgb\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)\s*$/i);
if (match) {
const [r, g, b] = [
parseFloat(match[1]),
parseFloat(match[2]),
parseFloat(match[3])
];
// https://en.wikipedia.org/wiki/HSL_and_HSV#Lightness
const luma = 0.299 * r + 0.587 * g + 0.114 * b;
if (luma > 180) {
// If the text is very bright we have a dark theme
return 'dark';
}
if (luma < 75) {
// If the text is very dark we have a light theme
return 'light';
}
// Otherwise fall back to the next heuristic.
}
// Fallback to system preference
return window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light';
}
function forceTheme(elementId) {
const estimatorElement = document.querySelector(`#${elementId}`);
if (estimatorElement === null) {
console.error(`Element with id ${elementId} not found.`);
} else {
const theme = detectTheme(estimatorElement);
estimatorElement.classList.add(theme);
}
}
forceTheme('sk-container-id-1');</script></body>
```python
#line of Best-Fit Visualization
xx = df['rm']
yy = df['medv']
# Explicitly define x and y
sns.regplot(x=xx, y=yy, marker="*", color="r",line_kws={"color": "green","label":"Regression"},label = "Data Points")
plt.xlabel("Rooms",fontsize = 13)
plt.ylabel("Price",fontsize = 13)
plt.title("Best Fit Line (Regression)",fontsize = 13)
plt.legend()
plt.show()
```

```python
model.coef_
#here are the values of w's from w1 to w13
```
array([-1.13055924e-01, 3.01104641e-02, 4.03807204e-02, 2.78443820e+00,
-1.72026334e+01, 4.43883520e+00, -6.29636221e-03, -1.44786537e+00,
2.62429736e-01, -1.06467863e-02, -9.15456240e-01, 1.23513347e-02,
-5.08571424e-01])
```python
model.intercept_
#here is the value of intercept that is c
```
np.float64(30.24675099392349)
```python
#hence the equation of best fit lines becomes
#y = w1x1 + w2x2 + ....... + w13x13 + c
```
# Equation of Best Fit Regression line
```python
# Display coefficients along with respective features
coeff_df = pd.DataFrame({
'Feature': X.columns,
'Coefficient': model.coef_
})
print(coeff_df)
print("\nIntercept:", model.intercept_)
```
Feature Coefficient
0 crim -0.113056
1 zn 0.030110
2 indus 0.040381
3 chas 2.784438
4 nox -17.202633
5 rm 4.438835
6 age -0.006296
7 dis -1.447865
8 rad 0.262430
9 tax -0.010647
10 ptratio -0.915456
11 black 0.012351
12 lstat -0.508571
Intercept: 30.24675099392349
```python
eq = f"y = {model.intercept_:.2f}"
for coef, col in zip(model.coef_, X.columns):
eq += f" + ({coef:.2f} * {col})"
print(eq)
```
y = 30.25 + (-0.11 * crim) + (0.03 * zn) + (0.04 * indus) + (2.78 * chas) + (-17.20 * nox) + (4.44 * rm) + (-0.01 * age) + (-1.45 * dis) + (0.26 * rad) + (-0.01 * tax) + (-0.92 * ptratio) + (0.01 * black) + (-0.51 * lstat)
# Prediction
### Comparision of Actual and Predicted
```python
y_pred = model.predict(X_test)
y_pred
```
array([28.99672362, 36.02556534, 14.81694405, 25.03197915, 18.76987992,
23.25442929, 17.66253818, 14.34119 , 23.01320703, 20.63245597,
24.90850512, 18.63883645, -6.08842184, 21.75834668, 19.23922576,
26.19319733, 20.64773313, 5.79472718, 40.50033966, 17.61289074,
27.24909479, 30.06625441, 11.34179277, 24.16077616, 17.86058499,
15.83609765, 22.78148106, 14.57704449, 22.43626052, 19.19631835,
22.43383455, 25.21979081, 25.93909562, 17.70162434, 16.76911711,
16.95125411, 31.23340153, 20.13246729, 23.76579011, 24.6322925 ,
13.94204955, 32.25576301, 42.67251161, 17.32745046, 27.27618614,
16.99310991, 14.07009109, 25.90341861, 20.29485982, 29.95339638,
21.28860173, 34.34451856, 16.04739105, 26.22562412, 39.53939798,
22.57950697, 18.84531367, 32.72531661, 25.0673037 , 12.88628956,
22.68221908, 30.48287757, 31.52626806, 15.90148607, 20.22094826,
16.71089812, 20.52384893, 25.96356264, 30.61607978, 11.59783023,
20.51232627, 27.48111878, 11.01962332, 15.68096344, 23.79316251,
6.19929359, 21.6039073 , 41.41377225, 18.76548695, 8.87931901,
20.83076916, 13.25620627, 20.73963699, 9.36482222, 23.22444271,
31.9155003 , 19.10228271, 25.51579303, 29.04256769, 20.14358566,
25.5859787 , 5.70159447, 20.09474756, 14.95069156, 12.50395648,
20.72635294, 24.73957161, -0.164237 , 13.68486682, 16.18359697,
22.27621999, 24.47902364])
```python
y_pred.shape
```
(102,)
```python
#how much our predictions are close to actual prices
results = pd.DataFrame({"Actual":y_test,"Predicted":y_pred})
results.head()
#displaying first 5 rows
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>Actual</th>
<th>Predicted</th>
</tr>
</thead>
<tbody>
<tr>
<th>173</th>
<td>23.6</td>
<td>28.996724</td>
</tr>
<tr>
<th>274</th>
<td>32.4</td>
<td>36.025565</td>
</tr>
<tr>
<th>491</th>
<td>13.6</td>
<td>14.816944</td>
</tr>
<tr>
<th>72</th>
<td>22.8</td>
<td>25.031979</td>
</tr>
<tr>
<th>452</th>
<td>16.1</td>
<td>18.769880</td>
</tr>
</tbody>
</table>
</div>
# Model Evaluation
```python
print("R2_Scre :-",r2_score(y_test,y_pred)) # model accuracy
print("Mean Squared Error :- ",mean_squared_error(y_test,y_pred))
```
R2_Scre :- 0.6687594935356317
Mean Squared Error :- 24.291119474973538
```python
rmse = np.sqrt(mean_squared_error(y_test,y_pred)) #mse ** 0.5 -- Square root of Mean Squared Error
print("Root Mean Squared Error :- ",rmse)
```
Root Mean Squared Error :- 4.928602182665339
```python
```
```python
# Example custom input
# Order must match X.columns
custom_input = [[0.1, 18, 2.3, 0, 0.5, 6, 65, 4, 1, 300, 15, 390, 5]]
predicted_price = model.predict(custom_input)
print("Predicted Price:", predicted_price[0])
```
Predicted Price: 28.31199256486372
F:\Python_Files2026\Lib\site-packages\sklearn\utils\validation.py:2691: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names
warnings.warn(
```python
```
```python
```
```python
a = y_test
b = y_pred
plt.scatter(a,b)
plt.show()
```

```python
plt.figure(figsize=(10,5))
plt.plot(y_test.values, color='orange', label='Actual')
plt.plot(y_pred, color='green', label='Predicted')
plt.xlabel("Data Points")
plt.ylabel("Price")
plt.title("Actual vs Predicted Values")
plt.legend()
plt.show()
```

```python
plt.figure(figsize=(10,5))
# Actual values
plt.scatter(range(len(y_test)), y_test.values,
color='orange', label='Actual')
# Predicted values
plt.scatter(range(len(y_pred)), y_pred,
color='green', label='Predicted')
plt.xlabel("Data Points")
plt.ylabel("Price")
plt.title("Actual vs Predicted Values (Scatter Plot)")
plt.legend()
plt.show()
```

```python
import matplotlib.pyplot as plt
a = y_test
b = y_pred
plt.figure(figsize=(7,5))
# Scatter plot
plt.scatter(a, b, color='blue')
# Labels
plt.xlabel("Actual Values (y_test)")
plt.ylabel("Predicted Values (y_pred)")
# Title
plt.title("Actual vs Predicted Values")
# Optional: add diagonal reference line (very useful 🔥)
plt.plot([a.min(), a.max()], [a.min(), a.max()], color='red', linestyle='--')
plt.show()
```

```python
```
Comments
Post a Comment