model-registry/notebooks/wip/Advance_regression.ipynb

1907 lines
60 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "xwFyEsosINqT"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "pKewSQysItJ-"
},
"outputs": [],
"source": [
"# https://www.statsmodels.org/stable/index.html\n",
"import statsmodels.api as sm"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"id": "Lz-DyAtNWsJR"
},
"outputs": [],
"source": [
"# Download Dataset from https://www.dropbox.com/scl/fi/zt2vtwhpz8ndblsxqdqx1/Salary_MIS.xlsx?rlkey=2uk6m7m9w90isv6zsynhhhpyv&st=gxumjns5&dl=1\n",
"# and add it to colab"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"id": "6rRHygNBIpgA"
},
"outputs": [],
"source": [
"sallaryMisDf = pd.read_excel(\"https://www.dropbox.com/scl/fi/zt2vtwhpz8ndblsxqdqx1/Salary_MIS.xlsx?rlkey=2uk6m7m9w90isv6zsynhhhpyv&st=gxumjns5&dl=1\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"id": "0zM8FGMJXJ70"
},
"outputs": [],
"source": [
"# sallaryMisDf = pd.read_excel(\"./Salary_MIS.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"id": "wsIgDGYcXT_z"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Salary</th>\n",
" <th>GPA</th>\n",
" <th>MIS</th>\n",
" <th>Statistics</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>72</td>\n",
" <td>3.53</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>66</td>\n",
" <td>2.86</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>72</td>\n",
" <td>3.69</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>63</td>\n",
" <td>3.24</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>65</td>\n",
" <td>3.21</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>115</th>\n",
" <td>66</td>\n",
" <td>3.27</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>116</th>\n",
" <td>63</td>\n",
" <td>2.86</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>117</th>\n",
" <td>78</td>\n",
" <td>3.04</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>118</th>\n",
" <td>64</td>\n",
" <td>2.99</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>119</th>\n",
" <td>66</td>\n",
" <td>3.65</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>120 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" Salary GPA MIS Statistics\n",
"0 72 3.53 1 0\n",
"1 66 2.86 1 0\n",
"2 72 3.69 0 0\n",
"3 63 3.24 0 0\n",
"4 65 3.21 0 0\n",
".. ... ... ... ...\n",
"115 66 3.27 0 0\n",
"116 63 2.86 1 0\n",
"117 78 3.04 1 1\n",
"118 64 2.99 0 0\n",
"119 66 3.65 0 0\n",
"\n",
"[120 rows x 4 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sallaryMisDf"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"id": "nw2BHv7PmpVU"
},
"outputs": [
{
"data": {
"text/plain": [
"(120, 4)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sallaryMisDf.shape"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"id": "mWaKOoGvmrE8"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Salary</th>\n",
" <th>GPA</th>\n",
" <th>MIS</th>\n",
" <th>Statistics</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>120.000000</td>\n",
" <td>120.000000</td>\n",
" <td>120.000000</td>\n",
" <td>120.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>69.875000</td>\n",
" <td>3.242750</td>\n",
" <td>0.316667</td>\n",
" <td>0.341667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>6.594577</td>\n",
" <td>0.493834</td>\n",
" <td>0.467127</td>\n",
" <td>0.476257</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>53.000000</td>\n",
" <td>2.410000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>65.750000</td>\n",
" <td>2.805000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>70.000000</td>\n",
" <td>3.280000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>73.250000</td>\n",
" <td>3.692500</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>88.000000</td>\n",
" <td>3.980000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Salary GPA MIS Statistics\n",
"count 120.000000 120.000000 120.000000 120.000000\n",
"mean 69.875000 3.242750 0.316667 0.341667\n",
"std 6.594577 0.493834 0.467127 0.476257\n",
"min 53.000000 2.410000 0.000000 0.000000\n",
"25% 65.750000 2.805000 0.000000 0.000000\n",
"50% 70.000000 3.280000 0.000000 0.000000\n",
"75% 73.250000 3.692500 1.000000 1.000000\n",
"max 88.000000 3.980000 1.000000 1.000000"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sallaryMisDf.describe()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"id": "w-fAHOgMmyH5"
},
"outputs": [
{
"data": {
"text/plain": [
"(120, 4)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sallaryMisDf.shape"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"id": "MDlD1b-aY4Yc"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>const</th>\n",
" <th>GPA</th>\n",
" <th>MIS</th>\n",
" <th>Statistics</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>3.53</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>2.86</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.0</td>\n",
" <td>3.69</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1.0</td>\n",
" <td>3.24</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.0</td>\n",
" <td>3.21</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>115</th>\n",
" <td>1.0</td>\n",
" <td>3.27</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>116</th>\n",
" <td>1.0</td>\n",
" <td>2.86</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>117</th>\n",
" <td>1.0</td>\n",
" <td>3.04</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>118</th>\n",
" <td>1.0</td>\n",
" <td>2.99</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>119</th>\n",
" <td>1.0</td>\n",
" <td>3.65</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>120 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" const GPA MIS Statistics\n",
"0 1.0 3.53 1 0\n",
"1 1.0 2.86 1 0\n",
"2 1.0 3.69 0 0\n",
"3 1.0 3.24 0 0\n",
"4 1.0 3.21 0 0\n",
".. ... ... ... ...\n",
"115 1.0 3.27 0 0\n",
"116 1.0 2.86 1 0\n",
"117 1.0 3.04 1 1\n",
"118 1.0 2.99 0 0\n",
"119 1.0 3.65 0 0\n",
"\n",
"[120 rows x 4 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sm.add_constant(sallaryMisDf[[\"GPA\", \"MIS\", \"Statistics\"]])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"id": "MjFUWOq2m6P3"
},
"outputs": [],
"source": [
"salaryBasedOnGpaMisStatistics = sm.OLS(\n",
" sallaryMisDf[\"Salary\"],\n",
" sm.add_constant(sallaryMisDf[[\"GPA\", \"MIS\", \"Statistics\"]])\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"id": "3yteijRmnabA"
},
"outputs": [],
"source": [
"salaryBasedOnGpaMisStatisticsFit = salaryBasedOnGpaMisStatistics.fit()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x3473b7020>"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from functions.exportModel import exportModel\n",
"exportModel({\n",
" \"modelName\": \"salaryBasedOnGpaMisStatisticsFit\",\n",
" \"model\": salaryBasedOnGpaMisStatisticsFit,\n",
" \"description\": \"Predict Salary based on GPA MIS Statistics for sallaryMisDf\",\n",
" \"modelType\": \"sm.OLS\",\n",
" \"baseRelativePath\": \"..\",\n",
" \"inputs\": [\n",
" {\n",
" \"name\": \"const\",\n",
" \"type\": \"int\"\n",
" },\n",
" {\n",
" \"name\": \"GPA\",\n",
" \"type\": \"float\"\n",
" },\n",
" {\n",
" \"name\": \"MIS\",\n",
" \"type\": \"binary\"\n",
" },\n",
" {\n",
" \"name\": \"Statistics\",\n",
" \"type\": \"binary\"\n",
" }\n",
" ],\n",
" \"output\": {\n",
" \"name\": \"Salary\",\n",
" \"type\": \"int\"\n",
" }\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"id": "adXMPcPPndd1"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: Salary R-squared: 0.795\n",
"Model: OLS Adj. R-squared: 0.790\n",
"Method: Least Squares F-statistic: 150.3\n",
"Date: Sun, 09 Jun 2024 Prob (F-statistic): 8.35e-40\n",
"Time: 01:24:53 Log-Likelihood: -300.92\n",
"No. Observations: 120 AIC: 609.8\n",
"Df Residuals: 116 BIC: 621.0\n",
"Df Model: 3 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const 44.0072 1.860 23.662 0.000 40.324 47.691\n",
"GPA 6.6227 0.569 11.649 0.000 5.497 7.749\n",
"MIS 6.6071 0.595 11.098 0.000 5.428 7.786\n",
"Statistics 6.7309 0.591 11.391 0.000 5.561 7.901\n",
"==============================================================================\n",
"Omnibus: 1.144 Durbin-Watson: 2.164\n",
"Prob(Omnibus): 0.564 Jarque-Bera (JB): 0.758\n",
"Skew: -0.172 Prob(JB): 0.685\n",
"Kurtosis: 3.182 Cond. No. 24.4\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
]
}
],
"source": [
"print(salaryBasedOnGpaMisStatisticsFit.summary())"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"id": "H5PP4w6epEwm"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Salary</th>\n",
" <th>GPA</th>\n",
" <th>MIS</th>\n",
" <th>Statistics</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>72</td>\n",
" <td>3.53</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>66</td>\n",
" <td>2.86</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>72</td>\n",
" <td>3.69</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>63</td>\n",
" <td>3.24</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>65</td>\n",
" <td>3.21</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>115</th>\n",
" <td>66</td>\n",
" <td>3.27</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>116</th>\n",
" <td>63</td>\n",
" <td>2.86</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>117</th>\n",
" <td>78</td>\n",
" <td>3.04</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>118</th>\n",
" <td>64</td>\n",
" <td>2.99</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>119</th>\n",
" <td>66</td>\n",
" <td>3.65</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>120 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" Salary GPA MIS Statistics\n",
"0 72 3.53 1 0\n",
"1 66 2.86 1 0\n",
"2 72 3.69 0 0\n",
"3 63 3.24 0 0\n",
"4 65 3.21 0 0\n",
".. ... ... ... ...\n",
"115 66 3.27 0 0\n",
"116 63 2.86 1 0\n",
"117 78 3.04 1 1\n",
"118 64 2.99 0 0\n",
"119 66 3.65 0 0\n",
"\n",
"[120 rows x 4 columns]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sallaryMisDf"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"id": "jgXOZuY4ocyq"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Salary</th>\n",
" <th>GPA</th>\n",
" <th>MIS</th>\n",
" <th>Statistics</th>\n",
" <th>misXStatistics</th>\n",
" <th>misXStatistics1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>72</td>\n",
" <td>3.53</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>66</td>\n",
" <td>2.86</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>72</td>\n",
" <td>3.69</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>63</td>\n",
" <td>3.24</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>65</td>\n",
" <td>3.21</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>115</th>\n",
" <td>66</td>\n",
" <td>3.27</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>116</th>\n",
" <td>63</td>\n",
" <td>2.86</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>117</th>\n",
" <td>78</td>\n",
" <td>3.04</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>118</th>\n",
" <td>64</td>\n",
" <td>2.99</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>119</th>\n",
" <td>66</td>\n",
" <td>3.65</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>120 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" Salary GPA MIS Statistics misXStatistics misXStatistics1\n",
"0 72 3.53 1 0 0 0.0\n",
"1 66 2.86 1 0 0 0.0\n",
"2 72 3.69 0 0 0 0.0\n",
"3 63 3.24 0 0 0 0.0\n",
"4 65 3.21 0 0 0 0.0\n",
".. ... ... ... ... ... ...\n",
"115 66 3.27 0 0 0 0.0\n",
"116 63 2.86 1 0 0 0.0\n",
"117 78 3.04 1 1 1 1.0\n",
"118 64 2.99 0 0 0 0.0\n",
"119 66 3.65 0 0 0 0.0\n",
"\n",
"[120 rows x 6 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from functions.transformers import transformersDict\n",
"sallaryMisDf[\"misXStatistics\"] = sallaryMisDf[\"MIS\"] * sallaryMisDf[\"Statistics\"]\n",
"# sallaryMisDf['misXStatistics1'] = sallaryMisDf.apply(lambda row: row['MIS'] * row['Statistics'], axis=1)\n",
"sallaryMisDf['misXStatistics1'] = sallaryMisDf.apply(transformersDict.get('MIS_X_Statistics'), axis=1)\n",
"\n",
"sallaryMisDf"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"id": "FwXG9Q54pbne"
},
"outputs": [],
"source": [
"salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics = sm.OLS(\n",
" sallaryMisDf[\"Salary\"],\n",
" sm.add_constant(\n",
" sallaryMisDf[[\n",
" \"GPA\",\n",
" \"MIS\",\n",
" \"Statistics\",\n",
" \"misXStatistics1\"\n",
" ]]\n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"id": "w7hob-54phqv"
},
"outputs": [],
"source": [
"salaryBasedOnGpaMisStatistics_Transfoms_misXStatisticsFit = salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics.fit()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x3473d1e20>"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from functions.exportModel import exportModel\n",
"exportModel({\n",
" \"modelName\": \"salaryBasedOnGpaMisStatistics_Transfoms_misXStatisticsFit\",\n",
" \"model\": salaryBasedOnGpaMisStatistics_Transfoms_misXStatisticsFit,\n",
" \"description\": \"Predict Salary based on GPA MIS Statistics and interaction MIS * Statistics for sallaryMisDf\",\n",
" \"modelType\": \"sm.OLS\",\n",
" \"baseRelativePath\": \"..\",\n",
" \"inputs\": [\n",
" {\n",
" \"name\": \"const\",\n",
" \"type\": \"int\"\n",
" },\n",
" {\n",
" \"name\": \"GPA\",\n",
" \"type\": \"float\"\n",
" },\n",
" {\n",
" \"name\": \"MIS\",\n",
" \"type\": \"binary\"\n",
" },\n",
" {\n",
" \"name\": \"Statistics\",\n",
" \"type\": \"binary\"\n",
" }\n",
" ],\n",
" \"transformers\":[\n",
" {\n",
" \"name\": \"misXStatistics\",\n",
" \"transformer\": \"MIS_X_Statistics\"\n",
" }\n",
" ],\n",
" \"output\": {\n",
" \"name\": \"Salary\",\n",
" \"type\": \"int\"\n",
" }\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"id": "NMNYYAespkAn"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: Salary R-squared: 0.810\n",
"Model: OLS Adj. R-squared: 0.803\n",
"Method: Least Squares F-statistic: 122.2\n",
"Date: Sun, 09 Jun 2024 Prob (F-statistic): 1.87e-40\n",
"Time: 01:24:53 Log-Likelihood: -296.63\n",
"No. Observations: 120 AIC: 603.3\n",
"Df Residuals: 115 BIC: 617.2\n",
"Df Model: 4 \n",
"Covariance Type: nonrobust \n",
"===================================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"-----------------------------------------------------------------------------------\n",
"const 44.0993 1.803 24.464 0.000 40.529 47.670\n",
"GPA 6.7109 0.552 12.162 0.000 5.618 7.804\n",
"MIS 5.3250 0.725 7.343 0.000 3.889 6.761\n",
"Statistics 5.5350 0.704 7.861 0.000 4.140 6.930\n",
"misXStatistics1 3.4915 1.196 2.918 0.004 1.122 5.861\n",
"==============================================================================\n",
"Omnibus: 0.396 Durbin-Watson: 2.073\n",
"Prob(Omnibus): 0.820 Jarque-Bera (JB): 0.109\n",
"Skew: -0.013 Prob(JB): 0.947\n",
"Kurtosis: 3.146 Cond. No. 24.4\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
]
}
],
"source": [
"print(salaryBasedOnGpaMisStatistics_Transfoms_misXStatisticsFit.summary())"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"id": "ZnQnXfdRv7dP"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Salary</th>\n",
" <th>GPA</th>\n",
" <th>MIS</th>\n",
" <th>Statistics</th>\n",
" <th>misXStatistics</th>\n",
" <th>misXStatistics1</th>\n",
" <th>misXGpa</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>72</td>\n",
" <td>3.53</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>3.53</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>66</td>\n",
" <td>2.86</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>2.86</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>72</td>\n",
" <td>3.69</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>63</td>\n",
" <td>3.24</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>65</td>\n",
" <td>3.21</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>115</th>\n",
" <td>66</td>\n",
" <td>3.27</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>116</th>\n",
" <td>63</td>\n",
" <td>2.86</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>2.86</td>\n",
" </tr>\n",
" <tr>\n",
" <th>117</th>\n",
" <td>78</td>\n",
" <td>3.04</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>3.04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>118</th>\n",
" <td>64</td>\n",
" <td>2.99</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>119</th>\n",
" <td>66</td>\n",
" <td>3.65</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>120 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" Salary GPA MIS Statistics misXStatistics misXStatistics1 misXGpa\n",
"0 72 3.53 1 0 0 0.0 3.53\n",
"1 66 2.86 1 0 0 0.0 2.86\n",
"2 72 3.69 0 0 0 0.0 0.00\n",
"3 63 3.24 0 0 0 0.0 0.00\n",
"4 65 3.21 0 0 0 0.0 0.00\n",
".. ... ... ... ... ... ... ...\n",
"115 66 3.27 0 0 0 0.0 0.00\n",
"116 63 2.86 1 0 0 0.0 2.86\n",
"117 78 3.04 1 1 1 1.0 3.04\n",
"118 64 2.99 0 0 0 0.0 0.00\n",
"119 66 3.65 0 0 0 0.0 0.00\n",
"\n",
"[120 rows x 7 columns]"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# sallaryMisDf['misXGpa'] = sallaryMisDf.apply(lambda row: row['MIS'] * row['GPA'], axis=1)\n",
"sallaryMisDf['misXGpa'] = sallaryMisDf.apply(transformersDict.get('MIS_X_GPA'), axis=1)\n",
"\n",
"sallaryMisDf"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"id": "6CjgMmDAwEPw"
},
"outputs": [],
"source": [
"salaryBasedOnGpaMisStatistics_Transfoms_misXGpa = sm.OLS(\n",
" sallaryMisDf[\"Salary\"],\n",
" sm.add_constant(\n",
" sallaryMisDf[[\n",
" \"GPA\",\n",
" \"MIS\",\n",
" \"Statistics\",\n",
" \"misXGpa\"\n",
" ]]\n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"id": "VmYH7tHmwMzm"
},
"outputs": [],
"source": [
"salaryBasedOnGpaMisStatistics_Transfoms_misXGpaFit = salaryBasedOnGpaMisStatistics_Transfoms_misXGpa.fit()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x3473f2a20>"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from functions.exportModel import exportModel\n",
"exportModel({\n",
" \"modelName\": \"salaryBasedOnGpaMisStatistics_Transfoms_misXGpaFit\",\n",
" \"model\": salaryBasedOnGpaMisStatistics_Transfoms_misXGpaFit,\n",
" \"description\": \"Predict Salary based on GPA MIS Statistics and interaction misXGpa for sallaryMisDf\",\n",
" \"modelType\": \"sm.OLS\",\n",
" \"baseRelativePath\": \"..\",\n",
" \"inputs\": [\n",
" {\n",
" \"name\": \"const\",\n",
" \"type\": \"int\"\n",
" },\n",
" {\n",
" \"name\": \"GPA\",\n",
" \"type\": \"float\"\n",
" },\n",
" {\n",
" \"name\": \"MIS\",\n",
" \"type\": \"binary\"\n",
" },\n",
" {\n",
" \"name\": \"Statistics\",\n",
" \"type\": \"binary\"\n",
" }\n",
" ],\n",
" \"transformers\":[\n",
" {\n",
" \"name\": \"misXGpa\",\n",
" \"transformer\": \"MIS_X_GPA\"\n",
" }\n",
" ],\n",
" \"output\": {\n",
" \"name\": \"Salary\",\n",
" \"type\": \"int\"\n",
" }\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"id": "rL8pX5dTwP8H"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: Salary R-squared: 0.795\n",
"Model: OLS Adj. R-squared: 0.788\n",
"Method: Least Squares F-statistic: 111.8\n",
"Date: Sun, 09 Jun 2024 Prob (F-statistic): 1.11e-38\n",
"Time: 01:24:53 Log-Likelihood: -300.91\n",
"No. Observations: 120 AIC: 611.8\n",
"Df Residuals: 115 BIC: 625.8\n",
"Df Model: 4 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const 44.1653 2.307 19.142 0.000 39.595 48.736\n",
"GPA 6.5737 0.709 9.278 0.000 5.170 7.977\n",
"MIS 6.1605 3.873 1.591 0.114 -1.511 13.832\n",
"Statistics 6.7350 0.594 11.330 0.000 5.558 7.912\n",
"misXGpa 0.1381 1.184 0.117 0.907 -2.206 2.483\n",
"==============================================================================\n",
"Omnibus: 1.114 Durbin-Watson: 2.167\n",
"Prob(Omnibus): 0.573 Jarque-Bera (JB): 0.727\n",
"Skew: -0.167 Prob(JB): 0.695\n",
"Kurtosis: 3.185 Cond. No. 57.3\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
]
}
],
"source": [
"print(salaryBasedOnGpaMisStatistics_Transfoms_misXGpaFit.summary())"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"id": "z-idrSTJwi90"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Salary</th>\n",
" <th>GPA</th>\n",
" <th>MIS</th>\n",
" <th>Statistics</th>\n",
" <th>misXStatistics</th>\n",
" <th>misXStatistics1</th>\n",
" <th>misXGpa</th>\n",
" <th>statisticsXGpa</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>72</td>\n",
" <td>3.53</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>3.53</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>66</td>\n",
" <td>2.86</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>2.86</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>72</td>\n",
" <td>3.69</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>63</td>\n",
" <td>3.24</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>65</td>\n",
" <td>3.21</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>115</th>\n",
" <td>66</td>\n",
" <td>3.27</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>116</th>\n",
" <td>63</td>\n",
" <td>2.86</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>2.86</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>117</th>\n",
" <td>78</td>\n",
" <td>3.04</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>3.04</td>\n",
" <td>3.04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>118</th>\n",
" <td>64</td>\n",
" <td>2.99</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>119</th>\n",
" <td>66</td>\n",
" <td>3.65</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>120 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" Salary GPA MIS Statistics misXStatistics misXStatistics1 misXGpa \\\n",
"0 72 3.53 1 0 0 0.0 3.53 \n",
"1 66 2.86 1 0 0 0.0 2.86 \n",
"2 72 3.69 0 0 0 0.0 0.00 \n",
"3 63 3.24 0 0 0 0.0 0.00 \n",
"4 65 3.21 0 0 0 0.0 0.00 \n",
".. ... ... ... ... ... ... ... \n",
"115 66 3.27 0 0 0 0.0 0.00 \n",
"116 63 2.86 1 0 0 0.0 2.86 \n",
"117 78 3.04 1 1 1 1.0 3.04 \n",
"118 64 2.99 0 0 0 0.0 0.00 \n",
"119 66 3.65 0 0 0 0.0 0.00 \n",
"\n",
" statisticsXGpa \n",
"0 0.00 \n",
"1 0.00 \n",
"2 0.00 \n",
"3 0.00 \n",
"4 0.00 \n",
".. ... \n",
"115 0.00 \n",
"116 0.00 \n",
"117 3.04 \n",
"118 0.00 \n",
"119 0.00 \n",
"\n",
"[120 rows x 8 columns]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# sallaryMisDf['statisticsXGpa'] = sallaryMisDf.apply(lambda row: row['Statistics'] * row['GPA'], axis=1)\n",
"sallaryMisDf['statisticsXGpa'] = sallaryMisDf.apply(transformersDict.get('GPA_X_Statistics'), axis=1)\n",
"\n",
"sallaryMisDf"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"id": "im61d1RUwpQJ"
},
"outputs": [],
"source": [
"salaryBasedOnGpaMisStatistics_Transfoms_statisticsXGpa = sm.OLS(\n",
" sallaryMisDf[\"Salary\"],\n",
" sm.add_constant(\n",
" sallaryMisDf[[\n",
" \"GPA\",\n",
" \"MIS\",\n",
" \"Statistics\",\n",
" \"statisticsXGpa\"\n",
" ]]\n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"id": "WZ9eNcnMwvB3"
},
"outputs": [],
"source": [
"salaryBasedOnGpaMisStatistics_Transfoms_statisticsXGpaFit = salaryBasedOnGpaMisStatistics_Transfoms_statisticsXGpa.fit()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x3473f1040>"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from functions.exportModel import exportModel\n",
"exportModel({\n",
" \"modelName\": \"salaryBasedOnGpaMisStatistics_Transfoms_statisticsXGpaFit\",\n",
" \"model\": salaryBasedOnGpaMisStatistics_Transfoms_statisticsXGpaFit,\n",
" \"description\": \"Predict Salary based on GPA MIS Statistics and interaction misXGpa for statisticsXGpa\",\n",
" \"modelType\": \"sm.OLS\",\n",
" \"baseRelativePath\": \"..\",\n",
" \"inputs\": [\n",
" {\n",
" \"name\": \"const\",\n",
" \"type\": \"int\"\n",
" },\n",
" {\n",
" \"name\": \"GPA\",\n",
" \"type\": \"float\"\n",
" },\n",
" {\n",
" \"name\": \"MIS\",\n",
" \"type\": \"binary\"\n",
" },\n",
" {\n",
" \"name\": \"Statistics\",\n",
" \"type\": \"binary\"\n",
" }\n",
" ],\n",
" \"transformers\":[\n",
" {\n",
" \"name\": \"statisticsXGpa\",\n",
" \"transformer\": \"GPA_X_Statistics\"\n",
" }\n",
" ],\n",
" \"output\": {\n",
" \"name\": \"Salary\",\n",
" \"type\": \"int\"\n",
" }\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"id": "P5MFMA4NwzcE"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: Salary R-squared: 0.803\n",
"Model: OLS Adj. R-squared: 0.796\n",
"Method: Least Squares F-statistic: 116.9\n",
"Date: Sun, 09 Jun 2024 Prob (F-statistic): 1.44e-39\n",
"Time: 01:24:53 Log-Likelihood: -298.78\n",
"No. Observations: 120 AIC: 607.6\n",
"Df Residuals: 115 BIC: 621.5\n",
"Df Model: 4 \n",
"Covariance Type: nonrobust \n",
"==================================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"----------------------------------------------------------------------------------\n",
"const 41.2856 2.267 18.215 0.000 36.796 45.775\n",
"GPA 7.4828 0.701 10.674 0.000 6.094 8.871\n",
"MIS 6.5400 0.588 11.118 0.000 5.375 7.705\n",
"Statistics 14.5988 3.891 3.752 0.000 6.892 22.306\n",
"statisticsXGpa -2.3890 1.168 -2.045 0.043 -4.703 -0.075\n",
"==============================================================================\n",
"Omnibus: 0.348 Durbin-Watson: 2.118\n",
"Prob(Omnibus): 0.840 Jarque-Bera (JB): 0.149\n",
"Skew: -0.079 Prob(JB): 0.928\n",
"Kurtosis: 3.068 Cond. No. 59.1\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
]
}
],
"source": [
"print(salaryBasedOnGpaMisStatistics_Transfoms_statisticsXGpaFit.summary())"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"id": "gJGNzwfdw-mg"
},
"outputs": [],
"source": [
"salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics_misXGpa_statisticsXGpa = sm.OLS(\n",
" sallaryMisDf[\"Salary\"],\n",
" sm.add_constant(\n",
" sallaryMisDf[[\n",
" \"GPA\",\n",
" \"MIS\",\n",
" \"Statistics\",\n",
" \"misXStatistics\",\n",
" \"misXGpa\",\n",
" \"statisticsXGpa\"\n",
" ]]\n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"id": "NPGVE5cFxW-q"
},
"outputs": [],
"source": [
"salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics_misXGpa_statisticsXGpaFit = salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics_misXGpa_statisticsXGpa.fit()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x34741ec30>"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from functions.exportModel import exportModel\n",
"exportModel({\n",
" \"modelName\": \"salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics_misXGpa_statisticsXGpaFit\",\n",
" \"model\": salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics_misXGpa_statisticsXGpaFit,\n",
" \"description\": \"Predict Salary based on GPA MIS Statistics and interaction misXStatistics, misXGpa, statisticsXGpa\",\n",
" \"modelType\": \"sm.OLS\",\n",
" \"baseRelativePath\": \"..\",\n",
" \"inputs\": [\n",
" {\n",
" \"name\": \"const\",\n",
" \"type\": \"int\"\n",
" },\n",
" {\n",
" \"name\": \"GPA\",\n",
" \"type\": \"float\"\n",
" },\n",
" {\n",
" \"name\": \"MIS\",\n",
" \"type\": \"binary\"\n",
" },\n",
" {\n",
" \"name\": \"Statistics\",\n",
" \"type\": \"binary\"\n",
" }\n",
" ],\n",
" \"transformers\":[\n",
" {\n",
" \"name\": \"misXStatistics\",\n",
" \"transformer\": \"MIS_X_Statistics\"\n",
" },\n",
" {\n",
" \"name\": \"misXGpa\",\n",
" \"transformer\": \"MIS_X_GPA\"\n",
" },\n",
" {\n",
" \"name\": \"statisticsXGpa\",\n",
" \"transformer\": \"GPA_X_Statistics\"\n",
" }\n",
" ],\n",
" \"output\": {\n",
" \"name\": \"Salary\",\n",
" \"type\": \"int\"\n",
" }\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"id": "qRpqQP9LxaO-"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: Salary R-squared: 0.815\n",
"Model: OLS Adj. R-squared: 0.805\n",
"Method: Least Squares F-statistic: 83.09\n",
"Date: Sun, 09 Jun 2024 Prob (F-statistic): 4.15e-39\n",
"Time: 01:24:53 Log-Likelihood: -294.81\n",
"No. Observations: 120 AIC: 603.6\n",
"Df Residuals: 113 BIC: 623.1\n",
"Df Model: 6 \n",
"Covariance Type: nonrobust \n",
"==================================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"----------------------------------------------------------------------------------\n",
"const 41.7092 2.481 16.809 0.000 36.793 46.625\n",
"GPA 7.4604 0.769 9.708 0.000 5.938 8.983\n",
"MIS 5.1669 3.757 1.375 0.172 -2.276 12.610\n",
"Statistics 12.6641 3.923 3.229 0.002 4.893 20.435\n",
"misXStatistics 3.3076 1.204 2.747 0.007 0.922 5.693\n",
"misXGpa 0.0512 1.158 0.044 0.965 -2.243 2.345\n",
"statisticsXGpa -2.1451 1.158 -1.853 0.066 -4.439 0.148\n",
"==============================================================================\n",
"Omnibus: 0.398 Durbin-Watson: 2.028\n",
"Prob(Omnibus): 0.820 Jarque-Bera (JB): 0.148\n",
"Skew: 0.067 Prob(JB): 0.928\n",
"Kurtosis: 3.108 Cond. No. 63.5\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
]
}
],
"source": [
"print(salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics_misXGpa_statisticsXGpaFit.summary())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}