chore: bump version to 1.0.13
This commit is contained in:
parent
eb4ad2a8c2
commit
945aa1e2b9
1
.gitignore
vendored
1
.gitignore
vendored
@ -10,3 +10,4 @@ build/
|
|||||||
node_modules
|
node_modules
|
||||||
|
|
||||||
~~
|
~~
|
||||||
|
~~*
|
||||||
@ -35,4 +35,12 @@ python setup.py sdist bdist_wheel
|
|||||||
## Push project
|
## Push project
|
||||||
```
|
```
|
||||||
twine upload dist/*
|
twine upload dist/*
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run Jupyter notebooks
|
||||||
|
```
|
||||||
|
export PYTHONPATH="${PYTHONPATH}:$(pwd)"
|
||||||
|
jupyter notebook \
|
||||||
|
--notebook-dir="./notebooks" \
|
||||||
|
--ip=0.0.0.0 --port=3225
|
||||||
```
|
```
|
||||||
17
helpers/__init__.py
Normal file
17
helpers/__init__.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
def add_constant_column(df):
|
||||||
|
"""
|
||||||
|
Adds a constant column 'const' with value 1 as the first column to the DataFrame.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (pd.DataFrame): Input DataFrame.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: DataFrame with the added constant column as the first column.
|
||||||
|
"""
|
||||||
|
# Create a new DataFrame to avoid modifying the original DataFrame
|
||||||
|
df_with_const = df.copy()
|
||||||
|
|
||||||
|
# Add a constant column with value 1
|
||||||
|
df_with_const.insert(0, 'const', 1)
|
||||||
|
|
||||||
|
return df_with_const
|
||||||
@ -1,2 +1,104 @@
|
|||||||
from mlModelSaver.mlModelSaver import MlModelSaver
|
import pickle
|
||||||
|
import json
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
def ensure_directory_exists(directory_path):
|
||||||
|
"""
|
||||||
|
Ensure that the specified directory exists. If it doesn't, create it.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
directory_path (str): The path of the directory to ensure exists.
|
||||||
|
"""
|
||||||
|
os.makedirs(directory_path, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
def check_file_exists(file_path):
|
||||||
|
"""
|
||||||
|
Check if the specified file exists.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
file_path (str): The path of the file to check.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the file exists, False otherwise.
|
||||||
|
"""
|
||||||
|
if os.path.isfile(file_path):
|
||||||
|
print(f"File '{file_path}' exists.")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(f"File '{file_path}' does not exist.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
supportedModels = {
|
||||||
|
"sm.OLS": {
|
||||||
|
"supported": True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
supportedDataType = {
|
||||||
|
"int": {
|
||||||
|
"supported": True
|
||||||
|
},
|
||||||
|
"float": {
|
||||||
|
"supported": True
|
||||||
|
},
|
||||||
|
"binary":{
|
||||||
|
"supported": True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def default_transformer(x):
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
def mlModelSavePredict(self, df, typeOfPredict = 'normal'):
|
||||||
|
dfAfterTransformation = self.mlModelSaverTransformer(df)
|
||||||
|
output = []
|
||||||
|
outputsName = self.mlModelSaverConfig.get("outputs", [{"name": "result"}])
|
||||||
|
outputsName = [item["name"] for item in outputsName]
|
||||||
|
if typeOfPredict == 'normal':
|
||||||
|
results = self.predict(dfAfterTransformation)
|
||||||
|
for value in results:
|
||||||
|
output.append({
|
||||||
|
outputsName[0]: value,
|
||||||
|
})
|
||||||
|
return output
|
||||||
|
|
||||||
|
class MlModelSaver:
|
||||||
|
|
||||||
|
cachedModels = {}
|
||||||
|
|
||||||
|
def __init__(self, config):
|
||||||
|
self.baseRelativePath = config.get('baseRelativePath', '.')
|
||||||
|
self.modelsFolder = f'{self.baseRelativePath}/{config.get('modelsFolder', '~~modelsFolder')}'
|
||||||
|
ensure_directory_exists(self.modelsFolder)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def showSupportedModels(self):
|
||||||
|
supported_keys = [key for key, value in supportedModels.items() if value.get('supported')]
|
||||||
|
return supported_keys
|
||||||
|
|
||||||
|
def exportModel(self, model, config):
|
||||||
|
transformer = config.get("transformer", default_transformer)
|
||||||
|
model.mlModelSaverTransformer = transformer
|
||||||
|
if "transformer" in config:
|
||||||
|
del config["transformer"]
|
||||||
|
model.mlModelSaverConfig = config
|
||||||
|
isModelSupporter = supportedModels.get(
|
||||||
|
config.get("modelType", ''),
|
||||||
|
{}
|
||||||
|
).get("supported", False)
|
||||||
|
if not isModelSupporter:
|
||||||
|
raise ValueError(f'only {self.showSupportedModels()} are supported and {config.get("modelType", '')} is not supported')
|
||||||
|
modelName = model.mlModelSaverConfig['modelName']
|
||||||
|
model.mlModelSavePredict = partial(mlModelSavePredict, model)
|
||||||
|
filename = f'{self.modelsFolder}/{modelName}.pkl'
|
||||||
|
pickle.dump(model, open(filename, 'wb'))
|
||||||
|
loaded_model = pickle.load(open(filename, 'rb'))
|
||||||
|
self.cachedModels[loaded_model.mlModelSaverConfig.get("modelName")] = loaded_model
|
||||||
|
return loaded_model
|
||||||
|
|||||||
@ -1,67 +0,0 @@
|
|||||||
import pickle
|
|
||||||
import json
|
|
||||||
|
|
||||||
supportedModels = {
|
|
||||||
"sm.OLS": {
|
|
||||||
"supported": True
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
supportedDataType = {
|
|
||||||
"int": {
|
|
||||||
"supported": True
|
|
||||||
},
|
|
||||||
"float": {
|
|
||||||
"supported": True
|
|
||||||
},
|
|
||||||
"binary":{
|
|
||||||
"supported": True
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class MlModelSaver:
|
|
||||||
|
|
||||||
def __init__(self, config):
|
|
||||||
self.baseRelativePath = config.get('baseRelativePath', '.')
|
|
||||||
self.modelsFolder = config.get('modelsFolder', '~~modelsFolder')
|
|
||||||
|
|
||||||
def showSupportedModels(self):
|
|
||||||
supported_keys = [key for key, value in supportedModels.items() if value.get('supported')]
|
|
||||||
return supported_keys
|
|
||||||
|
|
||||||
def exportModel(self, model, config):
|
|
||||||
model.mlModelSaverConfig = config
|
|
||||||
isModelSupporter = supportedModels.get(
|
|
||||||
config.get("modelType", ''),
|
|
||||||
{}
|
|
||||||
).get("supported", False)
|
|
||||||
if not isModelSupporter:
|
|
||||||
raise ValueError(f'only {self.showSupportedModels()} are supported and {config.get("modelType", '')} is not supported')
|
|
||||||
print(model.mlModelSaverConfig)
|
|
||||||
# modelName = config['modelName']
|
|
||||||
# modelsConfig[modelName] = {}
|
|
||||||
# modelsConfig[modelName]['name'] = modelName
|
|
||||||
# model = config['model']
|
|
||||||
# inputs = config['inputs']
|
|
||||||
# output = config['output']
|
|
||||||
# transformers = config.get('transformers', [])
|
|
||||||
# description = config['description']
|
|
||||||
# modelsConfig[modelName]['description'] = description
|
|
||||||
# modelsConfig[modelName]['inputs'] = inputs
|
|
||||||
# if len(transformers) > 0:
|
|
||||||
# modelsConfig[modelName]['transformers'] = transformers
|
|
||||||
# modelsConfig[modelName]['output'] = output
|
|
||||||
# modelType = config.get('modelType', '')
|
|
||||||
# modelsConfig[modelName]['modelType'] = modelType
|
|
||||||
# if hasattr(model, 'customMetrics'):
|
|
||||||
# customMetrics = model.customMetrics
|
|
||||||
# modelsConfig[modelName]['customMetrics'] = customMetrics
|
|
||||||
# else:
|
|
||||||
# pass
|
|
||||||
# filename = f'{baseRelativePath}/models/{modelName}'
|
|
||||||
# pickle.dump(model, open(filename, 'wb'))
|
|
||||||
# with open(f'{baseRelativePath}/models/configs.json', "w") as outputFile:
|
|
||||||
# json.dump(modelsConfig, outputFile, indent = 4)
|
|
||||||
|
|
||||||
# loaded_model = pickle.load(open(filename, 'rb'))
|
|
||||||
# return loaded_model
|
|
||||||
284
notebooks/.ipynb_checkpoints/001-checkpoint.ipynb
Normal file
284
notebooks/.ipynb_checkpoints/001-checkpoint.ipynb
Normal file
@ -0,0 +1,284 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 206
|
||||||
|
},
|
||||||
|
"id": "ZbwpTMgRjUMS",
|
||||||
|
"outputId": "7fca63af-b277-4dad-bc59-44ad128cb10a"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Sales</th>\n",
|
||||||
|
" <th>Temperature</th>\n",
|
||||||
|
" <th>Advertising</th>\n",
|
||||||
|
" <th>Discount</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>17235</td>\n",
|
||||||
|
" <td>33</td>\n",
|
||||||
|
" <td>15</td>\n",
|
||||||
|
" <td>5.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>19854</td>\n",
|
||||||
|
" <td>42</td>\n",
|
||||||
|
" <td>25</td>\n",
|
||||||
|
" <td>5.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>45786</td>\n",
|
||||||
|
" <td>58</td>\n",
|
||||||
|
" <td>40</td>\n",
|
||||||
|
" <td>10.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>49745</td>\n",
|
||||||
|
" <td>67</td>\n",
|
||||||
|
" <td>70</td>\n",
|
||||||
|
" <td>20.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>65894</td>\n",
|
||||||
|
" <td>73</td>\n",
|
||||||
|
" <td>75</td>\n",
|
||||||
|
" <td>20.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" Sales Temperature Advertising Discount\n",
|
||||||
|
"0 17235 33 15 5.0\n",
|
||||||
|
"1 19854 42 25 5.0\n",
|
||||||
|
"2 45786 58 40 10.0\n",
|
||||||
|
"3 49745 67 70 20.0\n",
|
||||||
|
"4 65894 73 75 20.0"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"from mlModelSaver import MlModelSaver\n",
|
||||||
|
"from helpers import add_constant_column\n",
|
||||||
|
"\n",
|
||||||
|
"mowersDf = pd.read_excel('https://www.dropbox.com/scl/fi/y2rktyoqb8rrshrnlpvw1/Mowers.xlsx?rlkey=e5bi1d8sx5hml4ylfkjv7cryh&dl=1')\n",
|
||||||
|
"mowersDf.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {
|
||||||
|
"id": "SPmxr6rde0Od"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# https://www.statsmodels.org/stable/index.html\n",
|
||||||
|
"import statsmodels.api as sm\n",
|
||||||
|
"# Your answer"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "4jjcJF3SfX-h",
|
||||||
|
"outputId": "dac6566b-1320-48ca-db70-712d4a7ff82b"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"modelPredictSaleByTemperatureAdvertisingDiscount = sm.OLS(\n",
|
||||||
|
" mowersDf[\"Sales\"],\n",
|
||||||
|
" add_constant_column(mowersDf[[\"Temperature\", \"Advertising\", \"Discount\"]])\n",
|
||||||
|
")\n",
|
||||||
|
"modelPredictSaleByTemperatureAdvertisingDiscountFit = modelPredictSaleByTemperatureAdvertisingDiscount.fit()\n",
|
||||||
|
"# print(modelPredictSaleByTemperatureAdvertisingDiscountFit.summary())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x12e499550>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from mlModelSaver import MlModelSaver\n",
|
||||||
|
"mlModelSaverInstance = MlModelSaver({\n",
|
||||||
|
" \"baseRelativePath\": \"..\",\n",
|
||||||
|
" \"modelsFolder\": \"~~tmp/testModels\"\n",
|
||||||
|
"})\n",
|
||||||
|
"\n",
|
||||||
|
"loadedModel = mlModelSaverInstance.exportModel(\n",
|
||||||
|
" modelPredictSaleByTemperatureAdvertisingDiscountFit,\n",
|
||||||
|
" {\n",
|
||||||
|
" \"modelName\": \"modelPredictSaleByTemperatureAdvertisingDiscountFit\",\n",
|
||||||
|
" \"description\": \"modelPredictSaleByTemperatureAdvertisingDiscountFit\",\n",
|
||||||
|
" \"modelType\": \"sm.OLS\",\n",
|
||||||
|
" \"inputs\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Temperature\",\n",
|
||||||
|
" \"type\": \"float\",\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Advertising\",\n",
|
||||||
|
" \"type\": \"float\"\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Discount\",\n",
|
||||||
|
" \"type\": \"float\"\n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
" \"transformer\": add_constant_column,\n",
|
||||||
|
" \"outputs\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Sales\",\n",
|
||||||
|
" \"type\": \"float\"\n",
|
||||||
|
" }\n",
|
||||||
|
" ]\n",
|
||||||
|
" }\n",
|
||||||
|
")\n",
|
||||||
|
"loadedModel"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 17,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"testData = [{\n",
|
||||||
|
" 'Temperature': 42,\n",
|
||||||
|
" 'Advertising': 15,\n",
|
||||||
|
" 'Discount': 5\n",
|
||||||
|
"}]\n",
|
||||||
|
"\n",
|
||||||
|
"# Create a DataFrame from the dictionary\n",
|
||||||
|
"testDf = pd.DataFrame(testData)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 18,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"0 19590.46727\n",
|
||||||
|
"dtype: float64"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 18,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"modelPredictSaleByTemperatureAdvertisingDiscountFit.predict( add_constant_column(testDf))\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 19,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[{'Sales': 19590.467270313893}]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 19,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"loadedModel.mlModelSavePredict(testDf)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "4YoK17TkeGCw"
|
||||||
|
},
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"provenance": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
1906
notebooks/.ipynb_checkpoints/Advance_regression-checkpoint.ipynb
Normal file
1906
notebooks/.ipynb_checkpoints/Advance_regression-checkpoint.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
6
notebooks/.ipynb_checkpoints/Untitled-checkpoint.ipynb
Normal file
6
notebooks/.ipynb_checkpoints/Untitled-checkpoint.ipynb
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"cells": [],
|
||||||
|
"metadata": {},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
284
notebooks/001.ipynb
Normal file
284
notebooks/001.ipynb
Normal file
@ -0,0 +1,284 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 206
|
||||||
|
},
|
||||||
|
"id": "ZbwpTMgRjUMS",
|
||||||
|
"outputId": "7fca63af-b277-4dad-bc59-44ad128cb10a"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Sales</th>\n",
|
||||||
|
" <th>Temperature</th>\n",
|
||||||
|
" <th>Advertising</th>\n",
|
||||||
|
" <th>Discount</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>17235</td>\n",
|
||||||
|
" <td>33</td>\n",
|
||||||
|
" <td>15</td>\n",
|
||||||
|
" <td>5.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>19854</td>\n",
|
||||||
|
" <td>42</td>\n",
|
||||||
|
" <td>25</td>\n",
|
||||||
|
" <td>5.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>45786</td>\n",
|
||||||
|
" <td>58</td>\n",
|
||||||
|
" <td>40</td>\n",
|
||||||
|
" <td>10.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>49745</td>\n",
|
||||||
|
" <td>67</td>\n",
|
||||||
|
" <td>70</td>\n",
|
||||||
|
" <td>20.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>65894</td>\n",
|
||||||
|
" <td>73</td>\n",
|
||||||
|
" <td>75</td>\n",
|
||||||
|
" <td>20.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" Sales Temperature Advertising Discount\n",
|
||||||
|
"0 17235 33 15 5.0\n",
|
||||||
|
"1 19854 42 25 5.0\n",
|
||||||
|
"2 45786 58 40 10.0\n",
|
||||||
|
"3 49745 67 70 20.0\n",
|
||||||
|
"4 65894 73 75 20.0"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"from mlModelSaver import MlModelSaver\n",
|
||||||
|
"from helpers import add_constant_column\n",
|
||||||
|
"\n",
|
||||||
|
"mowersDf = pd.read_excel('https://www.dropbox.com/scl/fi/y2rktyoqb8rrshrnlpvw1/Mowers.xlsx?rlkey=e5bi1d8sx5hml4ylfkjv7cryh&dl=1')\n",
|
||||||
|
"mowersDf.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {
|
||||||
|
"id": "SPmxr6rde0Od"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# https://www.statsmodels.org/stable/index.html\n",
|
||||||
|
"import statsmodels.api as sm\n",
|
||||||
|
"# Your answer"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "4jjcJF3SfX-h",
|
||||||
|
"outputId": "dac6566b-1320-48ca-db70-712d4a7ff82b"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"modelPredictSaleByTemperatureAdvertisingDiscount = sm.OLS(\n",
|
||||||
|
" mowersDf[\"Sales\"],\n",
|
||||||
|
" add_constant_column(mowersDf[[\"Temperature\", \"Advertising\", \"Discount\"]])\n",
|
||||||
|
")\n",
|
||||||
|
"modelPredictSaleByTemperatureAdvertisingDiscountFit = modelPredictSaleByTemperatureAdvertisingDiscount.fit()\n",
|
||||||
|
"# print(modelPredictSaleByTemperatureAdvertisingDiscountFit.summary())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x12e499550>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from mlModelSaver import MlModelSaver\n",
|
||||||
|
"mlModelSaverInstance = MlModelSaver({\n",
|
||||||
|
" \"baseRelativePath\": \"..\",\n",
|
||||||
|
" \"modelsFolder\": \"~~tmp/testModels\"\n",
|
||||||
|
"})\n",
|
||||||
|
"\n",
|
||||||
|
"loadedModel = mlModelSaverInstance.exportModel(\n",
|
||||||
|
" modelPredictSaleByTemperatureAdvertisingDiscountFit,\n",
|
||||||
|
" {\n",
|
||||||
|
" \"modelName\": \"modelPredictSaleByTemperatureAdvertisingDiscountFit\",\n",
|
||||||
|
" \"description\": \"modelPredictSaleByTemperatureAdvertisingDiscountFit\",\n",
|
||||||
|
" \"modelType\": \"sm.OLS\",\n",
|
||||||
|
" \"inputs\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Temperature\",\n",
|
||||||
|
" \"type\": \"float\",\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Advertising\",\n",
|
||||||
|
" \"type\": \"float\"\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Discount\",\n",
|
||||||
|
" \"type\": \"float\"\n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
" \"transformer\": add_constant_column,\n",
|
||||||
|
" \"outputs\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Sales\",\n",
|
||||||
|
" \"type\": \"float\"\n",
|
||||||
|
" }\n",
|
||||||
|
" ]\n",
|
||||||
|
" }\n",
|
||||||
|
")\n",
|
||||||
|
"loadedModel"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 17,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"testData = [{\n",
|
||||||
|
" 'Temperature': 42,\n",
|
||||||
|
" 'Advertising': 15,\n",
|
||||||
|
" 'Discount': 5\n",
|
||||||
|
"}]\n",
|
||||||
|
"\n",
|
||||||
|
"# Create a DataFrame from the dictionary\n",
|
||||||
|
"testDf = pd.DataFrame(testData)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 18,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"0 19590.46727\n",
|
||||||
|
"dtype: float64"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 18,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"modelPredictSaleByTemperatureAdvertisingDiscountFit.predict( add_constant_column(testDf))\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 19,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[{'Sales': 19590.467270313893}]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 19,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"loadedModel.mlModelSavePredict(testDf)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "4YoK17TkeGCw"
|
||||||
|
},
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"provenance": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
4
notebooks/Readme.md
Normal file
4
notebooks/Readme.md
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
```
|
||||||
|
pip install jupyterlab
|
||||||
|
pip install notebook
|
||||||
|
```
|
||||||
170
notebooks/Untitled.ipynb
Normal file
170
notebooks/Untitled.ipynb
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "1a33d2cd-5d9f-40a6-bc28-5b2e8026226c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>A</th>\n",
|
||||||
|
" <th>B</th>\n",
|
||||||
|
" <th>C</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>7</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" A B C\n",
|
||||||
|
"0 1 4 7"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"# Example DataFrame\n",
|
||||||
|
"data = {\n",
|
||||||
|
" 'A': [1, 2, 3],\n",
|
||||||
|
" 'B': [4, 5, 6],\n",
|
||||||
|
" 'C': [7, 8, 9]\n",
|
||||||
|
"}\n",
|
||||||
|
"df = pd.DataFrame(data)\n",
|
||||||
|
"\n",
|
||||||
|
"# Create a new DataFrame with only the first row using .iloc[0:1]\n",
|
||||||
|
"first_row_df = df.iloc[0:1]\n",
|
||||||
|
"first_row_df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"id": "51c38582-bccf-4e03-918f-9d65bbec1dda",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"New DataFrame with First Row:\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>A</th>\n",
|
||||||
|
" <th>B</th>\n",
|
||||||
|
" <th>C</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>7</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" A B C\n",
|
||||||
|
"0 1 4 7"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Create a new DataFrame with only the first row using .head(1)\n",
|
||||||
|
"first_row_df = df.head(1)\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"New DataFrame with First Row:\")\n",
|
||||||
|
"first_row_df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "664e8423-f560-4c23-8a94-242f8ec283cd",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
1906
notebooks/wip/Advance_regression.ipynb
Normal file
1906
notebooks/wip/Advance_regression.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
2742
notebooks/wip/Advance_regression2.ipynb
Normal file
2742
notebooks/wip/Advance_regression2.ipynb
Normal file
File diff suppressed because one or more lines are too long
701
notebooks/wip/Advance_regression3.ipynb
Normal file
701
notebooks/wip/Advance_regression3.ipynb
Normal file
@ -0,0 +1,701 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "xwFyEsosINqT"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import pandas as pd"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "pKewSQysItJ-"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# https://www.statsmodels.org/stable/index.html\n",
|
||||||
|
"import statsmodels.api as sm"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "Lz-DyAtNWsJR"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Download Dataset from https://www.dropbox.com/scl/fi/bkcdp9tpqqh6dfr6phtt8/AnnArbor.xlsx?rlkey=0agfqwc7f0kt7oqb3e2h6q3qs&dl=1\n",
|
||||||
|
"# and add it to colab"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "0zM8FGMJXJ70"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# annArborDf = pd.read_excel(\"./AnnArbor.xlsx\")\n",
|
||||||
|
"annArborDf = pd.read_excel(\"https://www.dropbox.com/scl/fi/bkcdp9tpqqh6dfr6phtt8/AnnArbor.xlsx?rlkey=0agfqwc7f0kt7oqb3e2h6q3qs&dl=1\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 1000
|
||||||
|
},
|
||||||
|
"id": "t0LUca0Myqw5",
|
||||||
|
"outputId": "249ab087-895f-4fa6-993e-e8dd50ef87c1"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"annArborDf"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "GQRNPIeyy6ub",
|
||||||
|
"outputId": "00211933-f2b1-40c6-d9cf-187560ffa305"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"annArborDf.size"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "yumMybniy85d"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"annArborDf.describe()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "aspq6hoPy_xZ",
|
||||||
|
"outputId": "96892272-a1d5-400e-a177-6c96746619d8"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"annArborDf.shape"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "z_hVTvPrzYJr"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import matplotlib.pyplot as plt"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 34
|
||||||
|
},
|
||||||
|
"id": "pIniVuaIzaaZ",
|
||||||
|
"outputId": "6a061f6a-8bff-42c0-d705-0c2bd06eb5ff"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Plotting\n",
|
||||||
|
"fig1 = plt.figure(\n",
|
||||||
|
" figsize=(8, 8)\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 449
|
||||||
|
},
|
||||||
|
"id": "VHdpDE7o42Pf",
|
||||||
|
"outputId": "ac876802-b6d1-4926-d069-0532ee9e7a0b"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"plt.scatter(\n",
|
||||||
|
" annArborDf[\"Beds\"],\n",
|
||||||
|
" annArborDf[\"Rent\"],\n",
|
||||||
|
" color='blue',\n",
|
||||||
|
" alpha=0.9,\n",
|
||||||
|
" label='Data Points - scatter',\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"plt.xlabel('Beds')\n",
|
||||||
|
"plt.ylabel('Rent')\n",
|
||||||
|
"plt.legend()\n",
|
||||||
|
"plt.grid(True)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 449
|
||||||
|
},
|
||||||
|
"id": "knAa4W9R47rZ",
|
||||||
|
"outputId": "81359d91-03b7-4f70-c381-c88172f800a9"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"plt.scatter(\n",
|
||||||
|
" annArborDf[\"Baths\"],\n",
|
||||||
|
" annArborDf[\"Rent\"],\n",
|
||||||
|
" color='blue',\n",
|
||||||
|
" alpha=0.9,\n",
|
||||||
|
" label='Data Points - scatter',\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"plt.xlabel('Baths')\n",
|
||||||
|
"plt.ylabel('Rent')\n",
|
||||||
|
"plt.legend()\n",
|
||||||
|
"plt.grid(True)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 449
|
||||||
|
},
|
||||||
|
"id": "dOnWJbFOzczV",
|
||||||
|
"outputId": "c6d6b86b-dd85-45d1-b543-928441c11dc4"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"plt.scatter(\n",
|
||||||
|
" annArborDf[\"Sqft\"],\n",
|
||||||
|
" annArborDf[\"Rent\"],\n",
|
||||||
|
" color='blue',\n",
|
||||||
|
" alpha=0.9,\n",
|
||||||
|
" label='Data Points - scatter',\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"plt.xlabel('Sqft')\n",
|
||||||
|
"plt.ylabel('Rent')\n",
|
||||||
|
"plt.legend()\n",
|
||||||
|
"plt.grid(True)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "alIhUPPUzvli",
|
||||||
|
"outputId": "8ed14c4b-a596-49ac-912a-0dcb4145df89"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"rentSqftModel1 = sm.OLS(\n",
|
||||||
|
" annArborDf[\"Rent\"],\n",
|
||||||
|
" sm.add_constant(annArborDf[[\"Sqft\"]])\n",
|
||||||
|
")\n",
|
||||||
|
"rentSqftModel1Fit = rentSqftModel1.fit()\n",
|
||||||
|
"print(rentSqftModel1Fit.summary())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from functions.exportModel import exportModel\n",
|
||||||
|
"exportModel({\n",
|
||||||
|
" \"modelName\": \"rentSqftModel1Fit\",\n",
|
||||||
|
" \"model\": rentSqftModel1Fit,\n",
|
||||||
|
" \"description\": \"Predict Rent based on Sqft for annArborDf\",\n",
|
||||||
|
" \"modelType\": \"sm.OLS\",\n",
|
||||||
|
" \"baseRelativePath\": \"..\",\n",
|
||||||
|
" \"inputs\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"const\",\n",
|
||||||
|
" \"type\": \"int\"\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Sqft\",\n",
|
||||||
|
" \"type\": \"float\"\n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
" \"output\": {\n",
|
||||||
|
" \"name\": \"Rent\",\n",
|
||||||
|
" \"type\": \"float\"\n",
|
||||||
|
" }\n",
|
||||||
|
"})"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 1000
|
||||||
|
},
|
||||||
|
"id": "S-AyfiLN0Due",
|
||||||
|
"outputId": "aacd248d-5a72-4ce0-ab0a-048f30d398ca"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"predictedRent1 = rentSqftModel1Fit.predict(sm.add_constant(annArborDf[\"Sqft\"]))\n",
|
||||||
|
"annArborDf['predictedRent1'] = predictedRent1\n",
|
||||||
|
"annArborDf"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "9ouX-mzz4sl-"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 454
|
||||||
|
},
|
||||||
|
"id": "L55GN8hZ4wXi",
|
||||||
|
"outputId": "712ace2c-5a04-48e0-acf0-cc42430f2aa9"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"plt.scatter(\n",
|
||||||
|
" annArborDf[\"Rent\"],\n",
|
||||||
|
" annArborDf[\"Sqft\"],\n",
|
||||||
|
" color='blue',\n",
|
||||||
|
" alpha=0.5,\n",
|
||||||
|
" label='Data Points - scatter',\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"intercept = rentSqftModel1Fit.params['const']\n",
|
||||||
|
"sqFtSlope = rentSqftModel1Fit.params['Sqft']\n",
|
||||||
|
"x_values = np.linspace(500, 4500, 200)\n",
|
||||||
|
"y_values = intercept + sqFtSlope * x_values\n",
|
||||||
|
"\n",
|
||||||
|
"plt.plot(\n",
|
||||||
|
" x_values,\n",
|
||||||
|
" y_values,\n",
|
||||||
|
" color='red',\n",
|
||||||
|
" label='rentSqftModel1Fit - predictedRent1'\n",
|
||||||
|
")\n",
|
||||||
|
"plt.xlabel('Sqft')\n",
|
||||||
|
"plt.ylabel('Rent')\n",
|
||||||
|
"plt.legend()\n",
|
||||||
|
"plt.grid(True)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "swSVnmy44Ddg",
|
||||||
|
"outputId": "251afab3-0563-4eb7-e23a-b526238c7584"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"rentBedsBathsSqftModel = sm.OLS(\n",
|
||||||
|
" annArborDf[\"Rent\"],\n",
|
||||||
|
" sm.add_constant(annArborDf[[\"Beds\", \"Baths\", \"Sqft\"]])\n",
|
||||||
|
")\n",
|
||||||
|
"rentBedsBathsSqftModelFit = rentBedsBathsSqftModel.fit()\n",
|
||||||
|
"print(rentBedsBathsSqftModelFit.summary())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from functions.exportModel import exportModel\n",
|
||||||
|
"exportModel({\n",
|
||||||
|
" \"modelName\": \"rentBedsBathsSqftModelFit\",\n",
|
||||||
|
" \"model\": rentBedsBathsSqftModelFit,\n",
|
||||||
|
" \"description\": \"Predict Rent based on Beds,Baths,Sqft for annArborDf\",\n",
|
||||||
|
" \"modelType\": \"sm.OLS\",\n",
|
||||||
|
" \"baseRelativePath\": \"..\",\n",
|
||||||
|
" \"inputs\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"const\",\n",
|
||||||
|
" \"type\": \"int\"\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Beds\",\n",
|
||||||
|
" \"type\": \"int\"\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Baths\",\n",
|
||||||
|
" \"type\": \"int\"\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Sqft\",\n",
|
||||||
|
" \"type\": \"float\"\n",
|
||||||
|
" }\n",
|
||||||
|
" \n",
|
||||||
|
" ],\n",
|
||||||
|
" \"output\": {\n",
|
||||||
|
" \"name\": \"Rent\",\n",
|
||||||
|
" \"type\": \"float\"\n",
|
||||||
|
" }\n",
|
||||||
|
"})"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "6lKEw7Wt57Px"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import math"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 1000
|
||||||
|
},
|
||||||
|
"id": "da3o51IG5u7r",
|
||||||
|
"outputId": "abe849ba-7689-468c-f327-b183c4d3f70a"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from functions.transformers import transformersDict\n",
|
||||||
|
"# annArborDf['log(Sqft)'] = annArborDf.apply(lambda row: math.log(row['Sqft']), axis=1)\n",
|
||||||
|
"annArborDf['log(Sqft)'] = annArborDf.apply(transformersDict.get('Sqft_log'), axis=1)\n",
|
||||||
|
"annArborDf"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "lYYrtI0O5lSG",
|
||||||
|
"outputId": "6a980e88-5630-4e5e-f887-875ab5f1d748"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"rentBedsBathsLogSqftModel= sm.OLS(\n",
|
||||||
|
" annArborDf[\"Rent\"],\n",
|
||||||
|
" sm.add_constant(annArborDf[[\"Beds\", \"Baths\", \"log(Sqft)\"]])\n",
|
||||||
|
")\n",
|
||||||
|
"rentBedsBathsLogSqftModelFit = rentBedsBathsLogSqftModel.fit()\n",
|
||||||
|
"print(rentBedsBathsLogSqftModelFit.summary())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from functions.exportModel import exportModel\n",
|
||||||
|
"exportModel({\n",
|
||||||
|
" \"modelName\": \"rentBedsBathsLogSqftModelFit\",\n",
|
||||||
|
" \"model\": rentBedsBathsLogSqftModelFit,\n",
|
||||||
|
" \"description\": \"Predict Rent based on Beds,Baths,log(Sqft) for annArborDf\",\n",
|
||||||
|
" \"modelType\": \"sm.OLS\",\n",
|
||||||
|
" \"baseRelativePath\": \"..\",\n",
|
||||||
|
" \"inputs\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"const\",\n",
|
||||||
|
" \"type\": \"int\"\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Beds\",\n",
|
||||||
|
" \"type\": \"int\"\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Baths\",\n",
|
||||||
|
" \"type\": \"int\"\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Sqft\",\n",
|
||||||
|
" \"type\": \"float\"\n",
|
||||||
|
" }\n",
|
||||||
|
" \n",
|
||||||
|
" ],\n",
|
||||||
|
" \"transformers\":[\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"log(Sqft)\",\n",
|
||||||
|
" \"transformer\": \"Sqft_log\"\n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
" \"output\": {\n",
|
||||||
|
" \"name\": \"Rent\",\n",
|
||||||
|
" \"type\": \"float\"\n",
|
||||||
|
" }\n",
|
||||||
|
"})"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "amUWG6386dyn"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"annArborDf['log(Rent)'] = annArborDf.apply(lambda row: math.log(row['Rent']), axis=1)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "LxcjPBLn6iAq",
|
||||||
|
"outputId": "f827bc12-0083-4fb9-ea95-53a58cc0999b"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"rentSqftModel4 = sm.OLS(\n",
|
||||||
|
" annArborDf[\"log(Rent)\"],\n",
|
||||||
|
" sm.add_constant(annArborDf[[\"Beds\", \"Baths\", \"Sqft\"]])\n",
|
||||||
|
")\n",
|
||||||
|
"rentSqftModel4Fit = rentSqftModel4.fit()\n",
|
||||||
|
"print(rentSqftModel4Fit.summary())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "WM5h3QnN60IY",
|
||||||
|
"outputId": "56dd02c1-b8a8-4fcc-951f-676d574e6a62"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"logRentBedsBathsLogSqftModel = sm.OLS(\n",
|
||||||
|
" annArborDf[\"log(Rent)\"],\n",
|
||||||
|
" sm.add_constant(annArborDf[[\"Beds\", \"Baths\", \"log(Sqft)\"]])\n",
|
||||||
|
")\n",
|
||||||
|
"logRentBedsBathsLogSqftModelFit = logRentBedsBathsLogSqftModel.fit()\n",
|
||||||
|
"print(logRentBedsBathsLogSqftModelFit.summary())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from functions.exportModel import exportModel\n",
|
||||||
|
"exportModel({\n",
|
||||||
|
" \"modelName\": \"logRentBedsBathsLogSqftModelFit\",\n",
|
||||||
|
" \"model\": logRentBedsBathsLogSqftModelFit,\n",
|
||||||
|
" \"description\": \"Predict log(Rent) based on Beds,Baths,log(Sqft) for annArborDf\",\n",
|
||||||
|
" \"modelType\": \"sm.OLS\",\n",
|
||||||
|
" \"baseRelativePath\": \"..\",\n",
|
||||||
|
" \"inputs\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"const\",\n",
|
||||||
|
" \"type\": \"int\"\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Beds\",\n",
|
||||||
|
" \"type\": \"int\"\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Baths\",\n",
|
||||||
|
" \"type\": \"int\"\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"Sqft\",\n",
|
||||||
|
" \"type\": \"float\"\n",
|
||||||
|
" }\n",
|
||||||
|
" \n",
|
||||||
|
" ],\n",
|
||||||
|
" \"transformers\":[\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"log(Sqft)\",\n",
|
||||||
|
" \"transformer\": \"Sqft_log\"\n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
" \"output\": {\n",
|
||||||
|
" \"name\": \"log(Rent)\",\n",
|
||||||
|
" \"type\": \"float\"\n",
|
||||||
|
" }\n",
|
||||||
|
"})"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "1PHrUcM6694a",
|
||||||
|
"outputId": "7b463d70-25d1-4073-bf7e-4e93f31c5fb2"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"rentSqftModel6 = sm.OLS(\n",
|
||||||
|
" annArborDf[\"log(Rent)\"],\n",
|
||||||
|
" sm.add_constant(annArborDf[[\"Beds\", \"log(Sqft)\"]])\n",
|
||||||
|
")\n",
|
||||||
|
"rentSqftModel6Fit = rentSqftModel6.fit()\n",
|
||||||
|
"print(rentSqftModel6Fit.summary())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 430
|
||||||
|
},
|
||||||
|
"id": "BybWTp_k7hzc",
|
||||||
|
"outputId": "335b1499-534c-47d2-bdb6-7c0f3b456160"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# plt.scatter(\n",
|
||||||
|
"# annArborDf[\"Sqft\"],\n",
|
||||||
|
"# annArborDf[\"Rent\"],\n",
|
||||||
|
"# color='blue',\n",
|
||||||
|
"# alpha=0.9,\n",
|
||||||
|
"# label='Data Points - scatter',\n",
|
||||||
|
"# )\n",
|
||||||
|
"\n",
|
||||||
|
"plt.scatter(\n",
|
||||||
|
" annArborDf[\"log(Sqft)\"],\n",
|
||||||
|
" annArborDf[\"Rent\"],\n",
|
||||||
|
" color='red',\n",
|
||||||
|
" alpha=0.9,\n",
|
||||||
|
" label='Data Points - scatter',\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"# plt.scatter(\n",
|
||||||
|
"# annArborDf[\"log(Sqft)\"],\n",
|
||||||
|
"# annArborDf[\"log(Rent)\"],\n",
|
||||||
|
"# color='Green',\n",
|
||||||
|
"# alpha=0.9,\n",
|
||||||
|
"# label='Data Points - scatter',\n",
|
||||||
|
"# )\n",
|
||||||
|
"\n",
|
||||||
|
"# plt.scatter(\n",
|
||||||
|
"# annArborDf[\"Sqft\"],\n",
|
||||||
|
"# annArborDf[\"log(Rent)\"],\n",
|
||||||
|
"# color='Yellow',\n",
|
||||||
|
"# alpha=0.9,\n",
|
||||||
|
"# label='Data Points - scatter',\n",
|
||||||
|
"# )\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# plt.xlabel('Sqft')\n",
|
||||||
|
"plt.ylabel('Rent')\n",
|
||||||
|
"plt.legend()\n",
|
||||||
|
"plt.grid(True)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"provenance": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
1361
notebooks/wip/Evaluating_Binary_Classification.ipynb
Normal file
1361
notebooks/wip/Evaluating_Binary_Classification.ipynb
Normal file
File diff suppressed because one or more lines are too long
BIN
notebooks/wip/Gym.xlsx
Normal file
BIN
notebooks/wip/Gym.xlsx
Normal file
Binary file not shown.
1708
notebooks/wip/KNN.ipynb
Normal file
1708
notebooks/wip/KNN.ipynb
Normal file
File diff suppressed because one or more lines are too long
2308
notebooks/wip/KNN_adjusted.ipynb
Normal file
2308
notebooks/wip/KNN_adjusted.ipynb
Normal file
File diff suppressed because one or more lines are too long
1785
notebooks/wip/Linear_Probability_and_logistic_Regression.ipynb
Normal file
1785
notebooks/wip/Linear_Probability_and_logistic_Regression.ipynb
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
3837
notebooks/wip/Linear_regression_example.ipynb
Normal file
3837
notebooks/wip/Linear_regression_example.ipynb
Normal file
File diff suppressed because one or more lines are too long
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "mlModelSaver",
|
"name": "mlModelSaver",
|
||||||
"version": "1.0.12",
|
"version": "1.0.13",
|
||||||
"description": "Make life easier for save and serving ml models",
|
"description": "Make life easier for save and serving ml models",
|
||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
"repository": "git@github.com:smartdev-ca/mlModelSaver.git",
|
"repository": "git@github.com:smartdev-ca/mlModelSaver.git",
|
||||||
|
|||||||
@ -8,7 +8,7 @@ sys.path.insert(
|
|||||||
os.path.abspath(
|
os.path.abspath(
|
||||||
os.path.join(
|
os.path.join(
|
||||||
os.path.dirname(__file__),
|
os.path.dirname(__file__),
|
||||||
'../mlModelSaver'
|
'..'
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -21,7 +21,7 @@ def test_ensureCLassInstance():
|
|||||||
"modelsFolder": "test_modelsFolder"
|
"modelsFolder": "test_modelsFolder"
|
||||||
})
|
})
|
||||||
assert mlModelSaverInstance1.baseRelativePath == "test_baseRelativePath"
|
assert mlModelSaverInstance1.baseRelativePath == "test_baseRelativePath"
|
||||||
assert mlModelSaverInstance1.modelsFolder == "test_modelsFolder"
|
assert mlModelSaverInstance1.modelsFolder == "test_baseRelativePath/test_modelsFolder"
|
||||||
tesSupportedModels = mlModelSaverInstance1.showSupportedModels()
|
tesSupportedModels = mlModelSaverInstance1.showSupportedModels()
|
||||||
assert tesSupportedModels == ['sm.OLS']
|
assert tesSupportedModels == ['sm.OLS']
|
||||||
|
|
||||||
@ -31,22 +31,25 @@ def test_OLS_LinearRegression():
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import statsmodels.api as sm
|
import statsmodels.api as sm
|
||||||
|
from helpers import add_constant_column
|
||||||
salaryMisDf = pd.read_excel("./datasets/Salary_MIS.xlsx")
|
salaryMisDf = pd.read_excel("./datasets/Salary_MIS.xlsx")
|
||||||
salaryBasedOnGpaMisStatistics = sm.OLS(
|
salaryBasedOnGpaMisStatistics = sm.OLS(
|
||||||
salaryMisDf["Salary"],
|
salaryMisDf["Salary"],
|
||||||
sm.add_constant(salaryMisDf[["GPA", "MIS", "Statistics"]])
|
add_constant_column(salaryMisDf[["GPA", "MIS", "Statistics"]])
|
||||||
)
|
)
|
||||||
salaryBasedOnGpaMisStatisticsFit = salaryBasedOnGpaMisStatistics.fit()
|
salaryBasedOnGpaMisStatisticsFit = salaryBasedOnGpaMisStatistics.fit()
|
||||||
mlModelSaverInstance2 = MlModelSaver({
|
mlModelSaverInstance2 = MlModelSaver({
|
||||||
"baseRelativePath": ".",
|
"baseRelativePath": ".",
|
||||||
"modelsFolder": "~~tmp/models"
|
"modelsFolder": "~~tmp/testModels"
|
||||||
})
|
})
|
||||||
|
|
||||||
mlModelSaverInstance2.exportModel(
|
|
||||||
|
|
||||||
|
loadedModel = mlModelSaverInstance2.exportModel(
|
||||||
salaryBasedOnGpaMisStatisticsFit,
|
salaryBasedOnGpaMisStatisticsFit,
|
||||||
{
|
{
|
||||||
"modelName": "salaryBasedOnGpaMisStatisticsFit",
|
"modelName": "salaryBasedOnGpaMisStatistics",
|
||||||
"description": "Predict Salary based on GPA MIS Statistics for sallaryMisDf",
|
"description": "Predict Salary based on GPA MIS Statistics for salaryMisDf",
|
||||||
"modelType": "sm.OLS",
|
"modelType": "sm.OLS",
|
||||||
"inputs": [
|
"inputs": [
|
||||||
{
|
{
|
||||||
@ -62,7 +65,8 @@ def test_OLS_LinearRegression():
|
|||||||
"type": "binary"
|
"type": "binary"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"output": [
|
"transformer": add_constant_column,
|
||||||
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "Salary",
|
"name": "Salary",
|
||||||
"type": "int"
|
"type": "int"
|
||||||
@ -70,4 +74,9 @@ def test_OLS_LinearRegression():
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
assert 2 == 2
|
from mlModelSaver import check_file_exists
|
||||||
|
assert check_file_exists("./~~tmp/testModels/salaryBasedOnGpaMisStatistics.pkl") == True
|
||||||
|
testData = salaryMisDf[["GPA", "MIS", "Statistics"]].iloc[0:2]
|
||||||
|
predictedValueWithLoadedModel = loadedModel.mlModelSavePredict(testData, 'normal')
|
||||||
|
assert predictedValueWithLoadedModel == [{'Salary': 73.9924679451542}, {'Salary': 69.55525482441558}]
|
||||||
|
assert list(mlModelSaverInstance2.cachedModels.keys()) == ['salaryBasedOnGpaMisStatistics']
|
||||||
|
|||||||
@ -1,42 +1,138 @@
|
|||||||
|
anyio==4.4.0
|
||||||
|
appnope==0.1.4
|
||||||
|
argon2-cffi==23.1.0
|
||||||
|
argon2-cffi-bindings==21.2.0
|
||||||
|
arrow==1.3.0
|
||||||
|
asttokens==2.4.1
|
||||||
|
async-lru==2.0.4
|
||||||
|
attrs==23.2.0
|
||||||
autopep8==2.2.0
|
autopep8==2.2.0
|
||||||
|
Babel==2.15.0
|
||||||
|
beautifulsoup4==4.12.3
|
||||||
|
bleach==6.1.0
|
||||||
certifi==2024.6.2
|
certifi==2024.6.2
|
||||||
|
cffi==1.16.0
|
||||||
charset-normalizer==3.3.2
|
charset-normalizer==3.3.2
|
||||||
|
comm==0.2.2
|
||||||
|
contourpy==1.2.1
|
||||||
|
cycler==0.12.1
|
||||||
|
debugpy==1.8.1
|
||||||
|
decorator==5.1.1
|
||||||
|
defusedxml==0.7.1
|
||||||
docutils==0.21.2
|
docutils==0.21.2
|
||||||
et-xmlfile==1.1.0
|
et-xmlfile==1.1.0
|
||||||
|
executing==2.0.1
|
||||||
|
fastjsonschema==2.20.0
|
||||||
|
fonttools==4.53.0
|
||||||
|
fqdn==1.5.1
|
||||||
|
h11==0.14.0
|
||||||
|
httpcore==1.0.5
|
||||||
|
httpx==0.27.0
|
||||||
idna==3.7
|
idna==3.7
|
||||||
importlib_metadata==7.1.0
|
importlib_metadata==7.1.0
|
||||||
iniconfig==2.0.0
|
iniconfig==2.0.0
|
||||||
|
ipykernel==6.29.4
|
||||||
|
ipython==8.25.0
|
||||||
|
ipywidgets==8.1.3
|
||||||
|
isoduration==20.11.0
|
||||||
jaraco.classes==3.4.0
|
jaraco.classes==3.4.0
|
||||||
jaraco.context==5.3.0
|
jaraco.context==5.3.0
|
||||||
jaraco.functools==4.0.1
|
jaraco.functools==4.0.1
|
||||||
|
jedi==0.19.1
|
||||||
|
Jinja2==3.1.4
|
||||||
|
json5==0.9.25
|
||||||
|
jsonpointer==3.0.0
|
||||||
|
jsonschema==4.22.0
|
||||||
|
jsonschema-specifications==2023.12.1
|
||||||
|
jupyter==1.0.0
|
||||||
|
jupyter-console==6.6.3
|
||||||
|
jupyter-events==0.10.0
|
||||||
|
jupyter-lsp==2.2.5
|
||||||
|
jupyter_client==8.6.2
|
||||||
|
jupyter_core==5.7.2
|
||||||
|
jupyter_server==2.14.1
|
||||||
|
jupyter_server_terminals==0.5.3
|
||||||
|
jupyterlab==4.2.2
|
||||||
|
jupyterlab_pygments==0.3.0
|
||||||
|
jupyterlab_server==2.27.2
|
||||||
|
jupyterlab_widgets==3.0.11
|
||||||
keyring==25.2.1
|
keyring==25.2.1
|
||||||
|
kiwisolver==1.4.5
|
||||||
markdown-it-py==3.0.0
|
markdown-it-py==3.0.0
|
||||||
|
MarkupSafe==2.1.5
|
||||||
|
matplotlib==3.9.0
|
||||||
|
matplotlib-inline==0.1.7
|
||||||
mdurl==0.1.2
|
mdurl==0.1.2
|
||||||
|
mistune==3.0.2
|
||||||
|
mlModelSaver==1.0.12
|
||||||
more-itertools==10.3.0
|
more-itertools==10.3.0
|
||||||
|
nbclient==0.10.0
|
||||||
|
nbconvert==7.16.4
|
||||||
|
nbformat==5.10.4
|
||||||
|
nest-asyncio==1.6.0
|
||||||
nh3==0.2.17
|
nh3==0.2.17
|
||||||
|
notebook==7.2.1
|
||||||
|
notebook_shim==0.2.4
|
||||||
numpy==1.26.4
|
numpy==1.26.4
|
||||||
openpyxl==3.1.4
|
openpyxl==3.1.4
|
||||||
|
overrides==7.7.0
|
||||||
packaging==24.1
|
packaging==24.1
|
||||||
pandas==2.2.2
|
pandas==2.2.2
|
||||||
|
pandocfilters==1.5.1
|
||||||
|
parso==0.8.4
|
||||||
patsy==0.5.6
|
patsy==0.5.6
|
||||||
|
pexpect==4.9.0
|
||||||
|
pillow==10.3.0
|
||||||
pkginfo==1.11.1
|
pkginfo==1.11.1
|
||||||
|
platformdirs==4.2.2
|
||||||
pluggy==1.5.0
|
pluggy==1.5.0
|
||||||
|
prometheus_client==0.20.0
|
||||||
|
prompt_toolkit==3.0.47
|
||||||
|
psutil==5.9.8
|
||||||
|
ptyprocess==0.7.0
|
||||||
|
pure-eval==0.2.2
|
||||||
pycodestyle==2.11.1
|
pycodestyle==2.11.1
|
||||||
|
pycparser==2.22
|
||||||
Pygments==2.18.0
|
Pygments==2.18.0
|
||||||
|
pyparsing==3.1.2
|
||||||
pytest==8.2.2
|
pytest==8.2.2
|
||||||
python-dateutil==2.9.0.post0
|
python-dateutil==2.9.0.post0
|
||||||
|
python-json-logger==2.0.7
|
||||||
pytz==2024.1
|
pytz==2024.1
|
||||||
|
PyYAML==6.0.1
|
||||||
|
pyzmq==26.0.3
|
||||||
|
qtconsole==5.5.2
|
||||||
|
QtPy==2.4.1
|
||||||
readme_renderer==43.0
|
readme_renderer==43.0
|
||||||
|
referencing==0.35.1
|
||||||
requests==2.32.3
|
requests==2.32.3
|
||||||
requests-toolbelt==1.0.0
|
requests-toolbelt==1.0.0
|
||||||
|
rfc3339-validator==0.1.4
|
||||||
rfc3986==2.0.0
|
rfc3986==2.0.0
|
||||||
|
rfc3986-validator==0.1.1
|
||||||
rich==13.7.1
|
rich==13.7.1
|
||||||
|
rpds-py==0.18.1
|
||||||
scipy==1.13.1
|
scipy==1.13.1
|
||||||
|
Send2Trash==1.8.3
|
||||||
setuptools==70.0.0
|
setuptools==70.0.0
|
||||||
six==1.16.0
|
six==1.16.0
|
||||||
|
sniffio==1.3.1
|
||||||
|
soupsieve==2.5
|
||||||
|
stack-data==0.6.3
|
||||||
statsmodels==0.14.2
|
statsmodels==0.14.2
|
||||||
|
terminado==0.18.1
|
||||||
|
tinycss2==1.3.0
|
||||||
|
tornado==6.4.1
|
||||||
|
traitlets==5.14.3
|
||||||
twine==5.1.0
|
twine==5.1.0
|
||||||
|
types-python-dateutil==2.9.0.20240316
|
||||||
tzdata==2024.1
|
tzdata==2024.1
|
||||||
|
uri-template==1.3.0
|
||||||
urllib3==2.2.1
|
urllib3==2.2.1
|
||||||
|
wcwidth==0.2.13
|
||||||
|
webcolors==24.6.0
|
||||||
|
webencodings==0.5.1
|
||||||
|
websocket-client==1.8.0
|
||||||
wheel==0.43.0
|
wheel==0.43.0
|
||||||
|
widgetsnbextension==4.0.11
|
||||||
zipp==3.19.2
|
zipp==3.19.2
|
||||||
|
|||||||
2
setup.py
2
setup.py
@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='mlModelSaver',
|
name='mlModelSaver',
|
||||||
version='1.0.12',
|
version='1.0.13',
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
description='Make life easier for saving and serving ML models',
|
description='Make life easier for saving and serving ML models',
|
||||||
long_description=open('DOCS.md').read(), # Assumes you have a README.md file
|
long_description=open('DOCS.md').read(), # Assumes you have a README.md file
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user