In [None]:
import numpy as np
import pandas as pd

In [None]:
# https://www.statsmodels.org/stable/index.html
import statsmodels.api as sm

In [None]:
# Download Dataset from https://www.dropbox.com/scl/fi/bkcdp9tpqqh6dfr6phtt8/AnnArbor.xlsx?rlkey=0agfqwc7f0kt7oqb3e2h6q3qs&dl=1
# and add it to colab

In [None]:
# annArborDf = pd.read_excel("./AnnArbor.xlsx")
annArborDf = pd.read_excel("https://www.dropbox.com/scl/fi/bkcdp9tpqqh6dfr6phtt8/AnnArbor.xlsx?rlkey=0agfqwc7f0kt7oqb3e2h6q3qs&dl=1")

In [None]:
annArborDf

In [None]:
annArborDf.size

In [None]:
annArborDf.describe()

In [None]:
annArborDf.shape

In [None]:
import matplotlib.pyplot as plt

In [None]:
# Plotting
fig1 = plt.figure(
 figsize=(8, 8)
)

In [None]:
plt.scatter(
 annArborDf["Beds"],
 annArborDf["Rent"],
 color='blue',
 alpha=0.9,
 label='Data Points - scatter',
)

plt.xlabel('Beds')
plt.ylabel('Rent')
plt.legend()
plt.grid(True)



plt.show()

In [None]:
plt.scatter(
 annArborDf["Baths"],
 annArborDf["Rent"],
 color='blue',
 alpha=0.9,
 label='Data Points - scatter',
)

plt.xlabel('Baths')
plt.ylabel('Rent')
plt.legend()
plt.grid(True)



plt.show()

In [None]:
plt.scatter(
 annArborDf["Sqft"],
 annArborDf["Rent"],
 color='blue',
 alpha=0.9,
 label='Data Points - scatter',
)

plt.xlabel('Sqft')
plt.ylabel('Rent')
plt.legend()
plt.grid(True)



plt.show()

In [None]:
rentSqftModel1 = sm.OLS(
 annArborDf["Rent"],
 sm.add_constant(annArborDf[["Sqft"]])
)
rentSqftModel1Fit = rentSqftModel1.fit()
print(rentSqftModel1Fit.summary())

In [None]:
from functions.exportModel import exportModel
exportModel({
 "modelName": "rentSqftModel1Fit",
 "model": rentSqftModel1Fit,
 "description": "Predict Rent based on Sqft for annArborDf",
 "modelType": "sm.OLS",
 "baseRelativePath": "..",
 "inputs": [
 {
 "name": "const",
 "type": "int"
 },
 {
 "name": "Sqft",
 "type": "float"
 }
 ],
 "output": {
 "name": "Rent",
 "type": "float"
 }
})

In [None]:
predictedRent1 = rentSqftModel1Fit.predict(sm.add_constant(annArborDf["Sqft"]))
annArborDf['predictedRent1'] = predictedRent1
annArborDf

In [None]:
plt.scatter(
 annArborDf["Rent"],
 annArborDf["Sqft"],
 color='blue',
 alpha=0.5,
 label='Data Points - scatter',
)

intercept = rentSqftModel1Fit.params['const']
sqFtSlope = rentSqftModel1Fit.params['Sqft']
x_values = np.linspace(500, 4500, 200)
y_values = intercept + sqFtSlope * x_values

plt.plot(
 x_values,
 y_values,
 color='red',
 label='rentSqftModel1Fit - predictedRent1'
)
plt.xlabel('Sqft')
plt.ylabel('Rent')
plt.legend()
plt.grid(True)


plt.show()

In [None]:
rentBedsBathsSqftModel = sm.OLS(
 annArborDf["Rent"],
 sm.add_constant(annArborDf[["Beds", "Baths", "Sqft"]])
)
rentBedsBathsSqftModelFit = rentBedsBathsSqftModel.fit()
print(rentBedsBathsSqftModelFit.summary())

In [None]:
from functions.exportModel import exportModel
exportModel({
 "modelName": "rentBedsBathsSqftModelFit",
 "model": rentBedsBathsSqftModelFit,
 "description": "Predict Rent based on Beds,Baths,Sqft for annArborDf",
 "modelType": "sm.OLS",
 "baseRelativePath": "..",
 "inputs": [
 {
 "name": "const",
 "type": "int"
 },
 {
 "name": "Beds",
 "type": "int"
 },
 {
 "name": "Baths",
 "type": "int"
 },
 {
 "name": "Sqft",
 "type": "float"
 }
 
 ],
 "output": {
 "name": "Rent",
 "type": "float"
 }
})

In [None]:
import math

In [None]:
from functions.transformers import transformersDict
# annArborDf['log(Sqft)'] = annArborDf.apply(lambda row: math.log(row['Sqft']), axis=1)
annArborDf['log(Sqft)'] = annArborDf.apply(transformersDict.get('Sqft_log'), axis=1)
annArborDf

In [None]:
rentBedsBathsLogSqftModel= sm.OLS(
 annArborDf["Rent"],
 sm.add_constant(annArborDf[["Beds", "Baths", "log(Sqft)"]])
)
rentBedsBathsLogSqftModelFit = rentBedsBathsLogSqftModel.fit()
print(rentBedsBathsLogSqftModelFit.summary())

In [None]:
from functions.exportModel import exportModel
exportModel({
 "modelName": "rentBedsBathsLogSqftModelFit",
 "model": rentBedsBathsLogSqftModelFit,
 "description": "Predict Rent based on Beds,Baths,log(Sqft) for annArborDf",
 "modelType": "sm.OLS",
 "baseRelativePath": "..",
 "inputs": [
 {
 "name": "const",
 "type": "int"
 },
 {
 "name": "Beds",
 "type": "int"
 },
 {
 "name": "Baths",
 "type": "int"
 },
 {
 "name": "Sqft",
 "type": "float"
 }
 
 ],
 "transformers":[
 {
 "name": "log(Sqft)",
 "transformer": "Sqft_log"
 }
 ],
 "output": {
 "name": "Rent",
 "type": "float"
 }
})

In [None]:
annArborDf['log(Rent)'] = annArborDf.apply(lambda row: math.log(row['Rent']), axis=1)

In [None]:
rentSqftModel4 = sm.OLS(
 annArborDf["log(Rent)"],
 sm.add_constant(annArborDf[["Beds", "Baths", "Sqft"]])
)
rentSqftModel4Fit = rentSqftModel4.fit()
print(rentSqftModel4Fit.summary())

In [None]:
logRentBedsBathsLogSqftModel = sm.OLS(
 annArborDf["log(Rent)"],
 sm.add_constant(annArborDf[["Beds", "Baths", "log(Sqft)"]])
)
logRentBedsBathsLogSqftModelFit = logRentBedsBathsLogSqftModel.fit()
print(logRentBedsBathsLogSqftModelFit.summary())

In [None]:
from functions.exportModel import exportModel
exportModel({
 "modelName": "logRentBedsBathsLogSqftModelFit",
 "model": logRentBedsBathsLogSqftModelFit,
 "description": "Predict log(Rent) based on Beds,Baths,log(Sqft) for annArborDf",
 "modelType": "sm.OLS",
 "baseRelativePath": "..",
 "inputs": [
 {
 "name": "const",
 "type": "int"
 },
 {
 "name": "Beds",
 "type": "int"
 },
 {
 "name": "Baths",
 "type": "int"
 },
 {
 "name": "Sqft",
 "type": "float"
 }
 
 ],
 "transformers":[
 {
 "name": "log(Sqft)",
 "transformer": "Sqft_log"
 }
 ],
 "output": {
 "name": "log(Rent)",
 "type": "float"
 }
})

In [None]:
rentSqftModel6 = sm.OLS(
 annArborDf["log(Rent)"],
 sm.add_constant(annArborDf[["Beds", "log(Sqft)"]])
)
rentSqftModel6Fit = rentSqftModel6.fit()
print(rentSqftModel6Fit.summary())

In [None]:
# plt.scatter(
# annArborDf["Sqft"],
# annArborDf["Rent"],
# color='blue',
# alpha=0.9,
# label='Data Points - scatter',
# )

plt.scatter(
 annArborDf["log(Sqft)"],
 annArborDf["Rent"],
 color='red',
 alpha=0.9,
 label='Data Points - scatter',
)

# plt.scatter(
# annArborDf["log(Sqft)"],
# annArborDf["log(Rent)"],
# color='Green',
# alpha=0.9,
# label='Data Points - scatter',
# )

# plt.scatter(
# annArborDf["Sqft"],
# annArborDf["log(Rent)"],
# color='Yellow',
# alpha=0.9,
# label='Data Points - scatter',
# )



# plt.xlabel('Sqft')
plt.ylabel('Rent')
plt.legend()
plt.grid(True)



plt.show()