{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "id": "xwFyEsosINqT" }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "pKewSQysItJ-" }, "outputs": [], "source": [ "# https://www.statsmodels.org/stable/index.html\n", "import statsmodels.api as sm" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "Lz-DyAtNWsJR" }, "outputs": [], "source": [ "# Download Dataset from https://www.dropbox.com/scl/fi/zt2vtwhpz8ndblsxqdqx1/Salary_MIS.xlsx?rlkey=2uk6m7m9w90isv6zsynhhhpyv&st=gxumjns5&dl=1\n", "# and add it to colab" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "6rRHygNBIpgA" }, "outputs": [], "source": [ "sallaryMisDf = pd.read_excel(\"https://www.dropbox.com/scl/fi/zt2vtwhpz8ndblsxqdqx1/Salary_MIS.xlsx?rlkey=2uk6m7m9w90isv6zsynhhhpyv&st=gxumjns5&dl=1\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "0zM8FGMJXJ70" }, "outputs": [], "source": [ "# sallaryMisDf = pd.read_excel(\"./Salary_MIS.xlsx\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "wsIgDGYcXT_z" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SalaryGPAMISStatistics
0723.5310
1662.8610
2723.6900
3633.2400
4653.2100
...............
115663.2700
116632.8610
117783.0411
118642.9900
119663.6500
\n", "

120 rows × 4 columns

\n", "
" ], "text/plain": [ " Salary GPA MIS Statistics\n", "0 72 3.53 1 0\n", "1 66 2.86 1 0\n", "2 72 3.69 0 0\n", "3 63 3.24 0 0\n", "4 65 3.21 0 0\n", ".. ... ... ... ...\n", "115 66 3.27 0 0\n", "116 63 2.86 1 0\n", "117 78 3.04 1 1\n", "118 64 2.99 0 0\n", "119 66 3.65 0 0\n", "\n", "[120 rows x 4 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sallaryMisDf" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "id": "nw2BHv7PmpVU" }, "outputs": [ { "data": { "text/plain": [ "(120, 4)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sallaryMisDf.shape" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "mWaKOoGvmrE8" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SalaryGPAMISStatistics
count120.000000120.000000120.000000120.000000
mean69.8750003.2427500.3166670.341667
std6.5945770.4938340.4671270.476257
min53.0000002.4100000.0000000.000000
25%65.7500002.8050000.0000000.000000
50%70.0000003.2800000.0000000.000000
75%73.2500003.6925001.0000001.000000
max88.0000003.9800001.0000001.000000
\n", "
" ], "text/plain": [ " Salary GPA MIS Statistics\n", "count 120.000000 120.000000 120.000000 120.000000\n", "mean 69.875000 3.242750 0.316667 0.341667\n", "std 6.594577 0.493834 0.467127 0.476257\n", "min 53.000000 2.410000 0.000000 0.000000\n", "25% 65.750000 2.805000 0.000000 0.000000\n", "50% 70.000000 3.280000 0.000000 0.000000\n", "75% 73.250000 3.692500 1.000000 1.000000\n", "max 88.000000 3.980000 1.000000 1.000000" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sallaryMisDf.describe()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "id": "w-fAHOgMmyH5" }, "outputs": [ { "data": { "text/plain": [ "(120, 4)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sallaryMisDf.shape" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "id": "MDlD1b-aY4Yc" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
constGPAMISStatistics
01.03.5310
11.02.8610
21.03.6900
31.03.2400
41.03.2100
...............
1151.03.2700
1161.02.8610
1171.03.0411
1181.02.9900
1191.03.6500
\n", "

120 rows × 4 columns

\n", "
" ], "text/plain": [ " const GPA MIS Statistics\n", "0 1.0 3.53 1 0\n", "1 1.0 2.86 1 0\n", "2 1.0 3.69 0 0\n", "3 1.0 3.24 0 0\n", "4 1.0 3.21 0 0\n", ".. ... ... ... ...\n", "115 1.0 3.27 0 0\n", "116 1.0 2.86 1 0\n", "117 1.0 3.04 1 1\n", "118 1.0 2.99 0 0\n", "119 1.0 3.65 0 0\n", "\n", "[120 rows x 4 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sm.add_constant(sallaryMisDf[[\"GPA\", \"MIS\", \"Statistics\"]])" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "id": "MjFUWOq2m6P3" }, "outputs": [], "source": [ "salaryBasedOnGpaMisStatistics = sm.OLS(\n", " sallaryMisDf[\"Salary\"],\n", " sm.add_constant(sallaryMisDf[[\"GPA\", \"MIS\", \"Statistics\"]])\n", ")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "id": "3yteijRmnabA" }, "outputs": [], "source": [ "salaryBasedOnGpaMisStatisticsFit = salaryBasedOnGpaMisStatistics.fit()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from functions.exportModel import exportModel\n", "exportModel({\n", " \"modelName\": \"salaryBasedOnGpaMisStatisticsFit\",\n", " \"model\": salaryBasedOnGpaMisStatisticsFit,\n", " \"description\": \"Predict Salary based on GPA MIS Statistics for sallaryMisDf\",\n", " \"modelType\": \"sm.OLS\",\n", " \"baseRelativePath\": \"..\",\n", " \"inputs\": [\n", " {\n", " \"name\": \"const\",\n", " \"type\": \"int\"\n", " },\n", " {\n", " \"name\": \"GPA\",\n", " \"type\": \"float\"\n", " },\n", " {\n", " \"name\": \"MIS\",\n", " \"type\": \"binary\"\n", " },\n", " {\n", " \"name\": \"Statistics\",\n", " \"type\": \"binary\"\n", " }\n", " ],\n", " \"output\": {\n", " \"name\": \"Salary\",\n", " \"type\": \"int\"\n", " }\n", "})" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "id": "adXMPcPPndd1" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Salary R-squared: 0.795\n", "Model: OLS Adj. R-squared: 0.790\n", "Method: Least Squares F-statistic: 150.3\n", "Date: Sun, 09 Jun 2024 Prob (F-statistic): 8.35e-40\n", "Time: 01:24:53 Log-Likelihood: -300.92\n", "No. Observations: 120 AIC: 609.8\n", "Df Residuals: 116 BIC: 621.0\n", "Df Model: 3 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const 44.0072 1.860 23.662 0.000 40.324 47.691\n", "GPA 6.6227 0.569 11.649 0.000 5.497 7.749\n", "MIS 6.6071 0.595 11.098 0.000 5.428 7.786\n", "Statistics 6.7309 0.591 11.391 0.000 5.561 7.901\n", "==============================================================================\n", "Omnibus: 1.144 Durbin-Watson: 2.164\n", "Prob(Omnibus): 0.564 Jarque-Bera (JB): 0.758\n", "Skew: -0.172 Prob(JB): 0.685\n", "Kurtosis: 3.182 Cond. No. 24.4\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] } ], "source": [ "print(salaryBasedOnGpaMisStatisticsFit.summary())" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "id": "H5PP4w6epEwm" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SalaryGPAMISStatistics
0723.5310
1662.8610
2723.6900
3633.2400
4653.2100
...............
115663.2700
116632.8610
117783.0411
118642.9900
119663.6500
\n", "

120 rows × 4 columns

\n", "
" ], "text/plain": [ " Salary GPA MIS Statistics\n", "0 72 3.53 1 0\n", "1 66 2.86 1 0\n", "2 72 3.69 0 0\n", "3 63 3.24 0 0\n", "4 65 3.21 0 0\n", ".. ... ... ... ...\n", "115 66 3.27 0 0\n", "116 63 2.86 1 0\n", "117 78 3.04 1 1\n", "118 64 2.99 0 0\n", "119 66 3.65 0 0\n", "\n", "[120 rows x 4 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sallaryMisDf" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "id": "jgXOZuY4ocyq" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SalaryGPAMISStatisticsmisXStatisticsmisXStatistics1
0723.531000.0
1662.861000.0
2723.690000.0
3633.240000.0
4653.210000.0
.....................
115663.270000.0
116632.861000.0
117783.041111.0
118642.990000.0
119663.650000.0
\n", "

120 rows × 6 columns

\n", "
" ], "text/plain": [ " Salary GPA MIS Statistics misXStatistics misXStatistics1\n", "0 72 3.53 1 0 0 0.0\n", "1 66 2.86 1 0 0 0.0\n", "2 72 3.69 0 0 0 0.0\n", "3 63 3.24 0 0 0 0.0\n", "4 65 3.21 0 0 0 0.0\n", ".. ... ... ... ... ... ...\n", "115 66 3.27 0 0 0 0.0\n", "116 63 2.86 1 0 0 0.0\n", "117 78 3.04 1 1 1 1.0\n", "118 64 2.99 0 0 0 0.0\n", "119 66 3.65 0 0 0 0.0\n", "\n", "[120 rows x 6 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from functions.transformers import transformersDict\n", "sallaryMisDf[\"misXStatistics\"] = sallaryMisDf[\"MIS\"] * sallaryMisDf[\"Statistics\"]\n", "# sallaryMisDf['misXStatistics1'] = sallaryMisDf.apply(lambda row: row['MIS'] * row['Statistics'], axis=1)\n", "sallaryMisDf['misXStatistics1'] = sallaryMisDf.apply(transformersDict.get('MIS_X_Statistics'), axis=1)\n", "\n", "sallaryMisDf" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "id": "FwXG9Q54pbne" }, "outputs": [], "source": [ "salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics = sm.OLS(\n", " sallaryMisDf[\"Salary\"],\n", " sm.add_constant(\n", " sallaryMisDf[[\n", " \"GPA\",\n", " \"MIS\",\n", " \"Statistics\",\n", " \"misXStatistics1\"\n", " ]]\n", " )\n", ")" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "id": "w7hob-54phqv" }, "outputs": [], "source": [ "salaryBasedOnGpaMisStatistics_Transfoms_misXStatisticsFit = salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics.fit()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from functions.exportModel import exportModel\n", "exportModel({\n", " \"modelName\": \"salaryBasedOnGpaMisStatistics_Transfoms_misXStatisticsFit\",\n", " \"model\": salaryBasedOnGpaMisStatistics_Transfoms_misXStatisticsFit,\n", " \"description\": \"Predict Salary based on GPA MIS Statistics and interaction MIS * Statistics for sallaryMisDf\",\n", " \"modelType\": \"sm.OLS\",\n", " \"baseRelativePath\": \"..\",\n", " \"inputs\": [\n", " {\n", " \"name\": \"const\",\n", " \"type\": \"int\"\n", " },\n", " {\n", " \"name\": \"GPA\",\n", " \"type\": \"float\"\n", " },\n", " {\n", " \"name\": \"MIS\",\n", " \"type\": \"binary\"\n", " },\n", " {\n", " \"name\": \"Statistics\",\n", " \"type\": \"binary\"\n", " }\n", " ],\n", " \"transformers\":[\n", " {\n", " \"name\": \"misXStatistics\",\n", " \"transformer\": \"MIS_X_Statistics\"\n", " }\n", " ],\n", " \"output\": {\n", " \"name\": \"Salary\",\n", " \"type\": \"int\"\n", " }\n", "})" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "id": "NMNYYAespkAn" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Salary R-squared: 0.810\n", "Model: OLS Adj. R-squared: 0.803\n", "Method: Least Squares F-statistic: 122.2\n", "Date: Sun, 09 Jun 2024 Prob (F-statistic): 1.87e-40\n", "Time: 01:24:53 Log-Likelihood: -296.63\n", "No. Observations: 120 AIC: 603.3\n", "Df Residuals: 115 BIC: 617.2\n", "Df Model: 4 \n", "Covariance Type: nonrobust \n", "===================================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "-----------------------------------------------------------------------------------\n", "const 44.0993 1.803 24.464 0.000 40.529 47.670\n", "GPA 6.7109 0.552 12.162 0.000 5.618 7.804\n", "MIS 5.3250 0.725 7.343 0.000 3.889 6.761\n", "Statistics 5.5350 0.704 7.861 0.000 4.140 6.930\n", "misXStatistics1 3.4915 1.196 2.918 0.004 1.122 5.861\n", "==============================================================================\n", "Omnibus: 0.396 Durbin-Watson: 2.073\n", "Prob(Omnibus): 0.820 Jarque-Bera (JB): 0.109\n", "Skew: -0.013 Prob(JB): 0.947\n", "Kurtosis: 3.146 Cond. No. 24.4\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] } ], "source": [ "print(salaryBasedOnGpaMisStatistics_Transfoms_misXStatisticsFit.summary())" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "id": "ZnQnXfdRv7dP" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SalaryGPAMISStatisticsmisXStatisticsmisXStatistics1misXGpa
0723.531000.03.53
1662.861000.02.86
2723.690000.00.00
3633.240000.00.00
4653.210000.00.00
........................
115663.270000.00.00
116632.861000.02.86
117783.041111.03.04
118642.990000.00.00
119663.650000.00.00
\n", "

120 rows × 7 columns

\n", "
" ], "text/plain": [ " Salary GPA MIS Statistics misXStatistics misXStatistics1 misXGpa\n", "0 72 3.53 1 0 0 0.0 3.53\n", "1 66 2.86 1 0 0 0.0 2.86\n", "2 72 3.69 0 0 0 0.0 0.00\n", "3 63 3.24 0 0 0 0.0 0.00\n", "4 65 3.21 0 0 0 0.0 0.00\n", ".. ... ... ... ... ... ... ...\n", "115 66 3.27 0 0 0 0.0 0.00\n", "116 63 2.86 1 0 0 0.0 2.86\n", "117 78 3.04 1 1 1 1.0 3.04\n", "118 64 2.99 0 0 0 0.0 0.00\n", "119 66 3.65 0 0 0 0.0 0.00\n", "\n", "[120 rows x 7 columns]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# sallaryMisDf['misXGpa'] = sallaryMisDf.apply(lambda row: row['MIS'] * row['GPA'], axis=1)\n", "sallaryMisDf['misXGpa'] = sallaryMisDf.apply(transformersDict.get('MIS_X_GPA'), axis=1)\n", "\n", "sallaryMisDf" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "id": "6CjgMmDAwEPw" }, "outputs": [], "source": [ "salaryBasedOnGpaMisStatistics_Transfoms_misXGpa = sm.OLS(\n", " sallaryMisDf[\"Salary\"],\n", " sm.add_constant(\n", " sallaryMisDf[[\n", " \"GPA\",\n", " \"MIS\",\n", " \"Statistics\",\n", " \"misXGpa\"\n", " ]]\n", " )\n", ")" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "id": "VmYH7tHmwMzm" }, "outputs": [], "source": [ "salaryBasedOnGpaMisStatistics_Transfoms_misXGpaFit = salaryBasedOnGpaMisStatistics_Transfoms_misXGpa.fit()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from functions.exportModel import exportModel\n", "exportModel({\n", " \"modelName\": \"salaryBasedOnGpaMisStatistics_Transfoms_misXGpaFit\",\n", " \"model\": salaryBasedOnGpaMisStatistics_Transfoms_misXGpaFit,\n", " \"description\": \"Predict Salary based on GPA MIS Statistics and interaction misXGpa for sallaryMisDf\",\n", " \"modelType\": \"sm.OLS\",\n", " \"baseRelativePath\": \"..\",\n", " \"inputs\": [\n", " {\n", " \"name\": \"const\",\n", " \"type\": \"int\"\n", " },\n", " {\n", " \"name\": \"GPA\",\n", " \"type\": \"float\"\n", " },\n", " {\n", " \"name\": \"MIS\",\n", " \"type\": \"binary\"\n", " },\n", " {\n", " \"name\": \"Statistics\",\n", " \"type\": \"binary\"\n", " }\n", " ],\n", " \"transformers\":[\n", " {\n", " \"name\": \"misXGpa\",\n", " \"transformer\": \"MIS_X_GPA\"\n", " }\n", " ],\n", " \"output\": {\n", " \"name\": \"Salary\",\n", " \"type\": \"int\"\n", " }\n", "})" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "id": "rL8pX5dTwP8H" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Salary R-squared: 0.795\n", "Model: OLS Adj. R-squared: 0.788\n", "Method: Least Squares F-statistic: 111.8\n", "Date: Sun, 09 Jun 2024 Prob (F-statistic): 1.11e-38\n", "Time: 01:24:53 Log-Likelihood: -300.91\n", "No. Observations: 120 AIC: 611.8\n", "Df Residuals: 115 BIC: 625.8\n", "Df Model: 4 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const 44.1653 2.307 19.142 0.000 39.595 48.736\n", "GPA 6.5737 0.709 9.278 0.000 5.170 7.977\n", "MIS 6.1605 3.873 1.591 0.114 -1.511 13.832\n", "Statistics 6.7350 0.594 11.330 0.000 5.558 7.912\n", "misXGpa 0.1381 1.184 0.117 0.907 -2.206 2.483\n", "==============================================================================\n", "Omnibus: 1.114 Durbin-Watson: 2.167\n", "Prob(Omnibus): 0.573 Jarque-Bera (JB): 0.727\n", "Skew: -0.167 Prob(JB): 0.695\n", "Kurtosis: 3.185 Cond. No. 57.3\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] } ], "source": [ "print(salaryBasedOnGpaMisStatistics_Transfoms_misXGpaFit.summary())" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "id": "z-idrSTJwi90" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SalaryGPAMISStatisticsmisXStatisticsmisXStatistics1misXGpastatisticsXGpa
0723.531000.03.530.00
1662.861000.02.860.00
2723.690000.00.000.00
3633.240000.00.000.00
4653.210000.00.000.00
...........................
115663.270000.00.000.00
116632.861000.02.860.00
117783.041111.03.043.04
118642.990000.00.000.00
119663.650000.00.000.00
\n", "

120 rows × 8 columns

\n", "
" ], "text/plain": [ " Salary GPA MIS Statistics misXStatistics misXStatistics1 misXGpa \\\n", "0 72 3.53 1 0 0 0.0 3.53 \n", "1 66 2.86 1 0 0 0.0 2.86 \n", "2 72 3.69 0 0 0 0.0 0.00 \n", "3 63 3.24 0 0 0 0.0 0.00 \n", "4 65 3.21 0 0 0 0.0 0.00 \n", ".. ... ... ... ... ... ... ... \n", "115 66 3.27 0 0 0 0.0 0.00 \n", "116 63 2.86 1 0 0 0.0 2.86 \n", "117 78 3.04 1 1 1 1.0 3.04 \n", "118 64 2.99 0 0 0 0.0 0.00 \n", "119 66 3.65 0 0 0 0.0 0.00 \n", "\n", " statisticsXGpa \n", "0 0.00 \n", "1 0.00 \n", "2 0.00 \n", "3 0.00 \n", "4 0.00 \n", ".. ... \n", "115 0.00 \n", "116 0.00 \n", "117 3.04 \n", "118 0.00 \n", "119 0.00 \n", "\n", "[120 rows x 8 columns]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# sallaryMisDf['statisticsXGpa'] = sallaryMisDf.apply(lambda row: row['Statistics'] * row['GPA'], axis=1)\n", "sallaryMisDf['statisticsXGpa'] = sallaryMisDf.apply(transformersDict.get('GPA_X_Statistics'), axis=1)\n", "\n", "sallaryMisDf" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "id": "im61d1RUwpQJ" }, "outputs": [], "source": [ "salaryBasedOnGpaMisStatistics_Transfoms_statisticsXGpa = sm.OLS(\n", " sallaryMisDf[\"Salary\"],\n", " sm.add_constant(\n", " sallaryMisDf[[\n", " \"GPA\",\n", " \"MIS\",\n", " \"Statistics\",\n", " \"statisticsXGpa\"\n", " ]]\n", " )\n", ")" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "id": "WZ9eNcnMwvB3" }, "outputs": [], "source": [ "salaryBasedOnGpaMisStatistics_Transfoms_statisticsXGpaFit = salaryBasedOnGpaMisStatistics_Transfoms_statisticsXGpa.fit()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from functions.exportModel import exportModel\n", "exportModel({\n", " \"modelName\": \"salaryBasedOnGpaMisStatistics_Transfoms_statisticsXGpaFit\",\n", " \"model\": salaryBasedOnGpaMisStatistics_Transfoms_statisticsXGpaFit,\n", " \"description\": \"Predict Salary based on GPA MIS Statistics and interaction misXGpa for statisticsXGpa\",\n", " \"modelType\": \"sm.OLS\",\n", " \"baseRelativePath\": \"..\",\n", " \"inputs\": [\n", " {\n", " \"name\": \"const\",\n", " \"type\": \"int\"\n", " },\n", " {\n", " \"name\": \"GPA\",\n", " \"type\": \"float\"\n", " },\n", " {\n", " \"name\": \"MIS\",\n", " \"type\": \"binary\"\n", " },\n", " {\n", " \"name\": \"Statistics\",\n", " \"type\": \"binary\"\n", " }\n", " ],\n", " \"transformers\":[\n", " {\n", " \"name\": \"statisticsXGpa\",\n", " \"transformer\": \"GPA_X_Statistics\"\n", " }\n", " ],\n", " \"output\": {\n", " \"name\": \"Salary\",\n", " \"type\": \"int\"\n", " }\n", "})" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "id": "P5MFMA4NwzcE" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Salary R-squared: 0.803\n", "Model: OLS Adj. R-squared: 0.796\n", "Method: Least Squares F-statistic: 116.9\n", "Date: Sun, 09 Jun 2024 Prob (F-statistic): 1.44e-39\n", "Time: 01:24:53 Log-Likelihood: -298.78\n", "No. Observations: 120 AIC: 607.6\n", "Df Residuals: 115 BIC: 621.5\n", "Df Model: 4 \n", "Covariance Type: nonrobust \n", "==================================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "----------------------------------------------------------------------------------\n", "const 41.2856 2.267 18.215 0.000 36.796 45.775\n", "GPA 7.4828 0.701 10.674 0.000 6.094 8.871\n", "MIS 6.5400 0.588 11.118 0.000 5.375 7.705\n", "Statistics 14.5988 3.891 3.752 0.000 6.892 22.306\n", "statisticsXGpa -2.3890 1.168 -2.045 0.043 -4.703 -0.075\n", "==============================================================================\n", "Omnibus: 0.348 Durbin-Watson: 2.118\n", "Prob(Omnibus): 0.840 Jarque-Bera (JB): 0.149\n", "Skew: -0.079 Prob(JB): 0.928\n", "Kurtosis: 3.068 Cond. No. 59.1\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] } ], "source": [ "print(salaryBasedOnGpaMisStatistics_Transfoms_statisticsXGpaFit.summary())" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "id": "gJGNzwfdw-mg" }, "outputs": [], "source": [ "salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics_misXGpa_statisticsXGpa = sm.OLS(\n", " sallaryMisDf[\"Salary\"],\n", " sm.add_constant(\n", " sallaryMisDf[[\n", " \"GPA\",\n", " \"MIS\",\n", " \"Statistics\",\n", " \"misXStatistics\",\n", " \"misXGpa\",\n", " \"statisticsXGpa\"\n", " ]]\n", " )\n", ")" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "id": "NPGVE5cFxW-q" }, "outputs": [], "source": [ "salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics_misXGpa_statisticsXGpaFit = salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics_misXGpa_statisticsXGpa.fit()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from functions.exportModel import exportModel\n", "exportModel({\n", " \"modelName\": \"salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics_misXGpa_statisticsXGpaFit\",\n", " \"model\": salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics_misXGpa_statisticsXGpaFit,\n", " \"description\": \"Predict Salary based on GPA MIS Statistics and interaction misXStatistics, misXGpa, statisticsXGpa\",\n", " \"modelType\": \"sm.OLS\",\n", " \"baseRelativePath\": \"..\",\n", " \"inputs\": [\n", " {\n", " \"name\": \"const\",\n", " \"type\": \"int\"\n", " },\n", " {\n", " \"name\": \"GPA\",\n", " \"type\": \"float\"\n", " },\n", " {\n", " \"name\": \"MIS\",\n", " \"type\": \"binary\"\n", " },\n", " {\n", " \"name\": \"Statistics\",\n", " \"type\": \"binary\"\n", " }\n", " ],\n", " \"transformers\":[\n", " {\n", " \"name\": \"misXStatistics\",\n", " \"transformer\": \"MIS_X_Statistics\"\n", " },\n", " {\n", " \"name\": \"misXGpa\",\n", " \"transformer\": \"MIS_X_GPA\"\n", " },\n", " {\n", " \"name\": \"statisticsXGpa\",\n", " \"transformer\": \"GPA_X_Statistics\"\n", " }\n", " ],\n", " \"output\": {\n", " \"name\": \"Salary\",\n", " \"type\": \"int\"\n", " }\n", "})" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "id": "qRpqQP9LxaO-" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Salary R-squared: 0.815\n", "Model: OLS Adj. R-squared: 0.805\n", "Method: Least Squares F-statistic: 83.09\n", "Date: Sun, 09 Jun 2024 Prob (F-statistic): 4.15e-39\n", "Time: 01:24:53 Log-Likelihood: -294.81\n", "No. Observations: 120 AIC: 603.6\n", "Df Residuals: 113 BIC: 623.1\n", "Df Model: 6 \n", "Covariance Type: nonrobust \n", "==================================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "----------------------------------------------------------------------------------\n", "const 41.7092 2.481 16.809 0.000 36.793 46.625\n", "GPA 7.4604 0.769 9.708 0.000 5.938 8.983\n", "MIS 5.1669 3.757 1.375 0.172 -2.276 12.610\n", "Statistics 12.6641 3.923 3.229 0.002 4.893 20.435\n", "misXStatistics 3.3076 1.204 2.747 0.007 0.922 5.693\n", "misXGpa 0.0512 1.158 0.044 0.965 -2.243 2.345\n", "statisticsXGpa -2.1451 1.158 -1.853 0.066 -4.439 0.148\n", "==============================================================================\n", "Omnibus: 0.398 Durbin-Watson: 2.028\n", "Prob(Omnibus): 0.820 Jarque-Bera (JB): 0.148\n", "Skew: 0.067 Prob(JB): 0.928\n", "Kurtosis: 3.108 Cond. No. 63.5\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] } ], "source": [ "print(salaryBasedOnGpaMisStatistics_Transfoms_misXStatistics_misXGpa_statisticsXGpaFit.summary())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 4 }