diff --git a/.gitignore b/.gitignore index 4e2a953..298ccf1 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,6 @@ __pycache__ dist/ build/ -node_modules \ No newline at end of file +node_modules + +~~ diff --git a/.vscode/settings.json b/.vscode/settings.json index fd1a328..16567b2 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,6 @@ { "cSpell.words": [ - "Jafari" + "Jafari", + "statsmodels" ] } \ No newline at end of file diff --git a/datasets/Salary_MIS.xlsx b/datasets/Salary_MIS.xlsx new file mode 100644 index 0000000..0b2c584 Binary files /dev/null and b/datasets/Salary_MIS.xlsx differ diff --git a/mlModelSaver/__init__.py b/mlModelSaver/__init__.py index e69de29..7ab76cb 100644 --- a/mlModelSaver/__init__.py +++ b/mlModelSaver/__init__.py @@ -0,0 +1,2 @@ +from mlModelSaver.mlModelSaver import MlModelSaver + diff --git a/mlModelSaver/mlModelSaver.py b/mlModelSaver/mlModelSaver.py index f90b91d..fd455ee 100644 --- a/mlModelSaver/mlModelSaver.py +++ b/mlModelSaver/mlModelSaver.py @@ -1,2 +1,67 @@ -def mlModelSaver(): - print("mlModelSaver") \ No newline at end of file +import pickle +import json + +supportedModels = { + "sm.OLS": { + "supported": True + } +} + +supportedDataType = { + "int": { + "supported": True + }, + "float": { + "supported": True + }, + "binary":{ + "supported": True + } +} + +class MlModelSaver: + + def __init__(self, config): + self.baseRelativePath = config.get('baseRelativePath', '.') + self.modelsFolder = config.get('modelsFolder', '~~modelsFolder') + + def showSupportedModels(self): + supported_keys = [key for key, value in supportedModels.items() if value.get('supported')] + return supported_keys + + def exportModel(self, model, config): + model.mlModelSaverConfig = config + isModelSupporter = supportedModels.get( + config.get("modelType", ''), + {} + ).get("supported", False) + if not isModelSupporter: + raise ValueError(f'only {self.showSupportedModels()} are supported and {config.get("modelType", '')} is not supported') + print(model.mlModelSaverConfig) + # modelName = config['modelName'] + # modelsConfig[modelName] = {} + # modelsConfig[modelName]['name'] = modelName + # model = config['model'] + # inputs = config['inputs'] + # output = config['output'] + # transformers = config.get('transformers', []) + # description = config['description'] + # modelsConfig[modelName]['description'] = description + # modelsConfig[modelName]['inputs'] = inputs + # if len(transformers) > 0: + # modelsConfig[modelName]['transformers'] = transformers + # modelsConfig[modelName]['output'] = output + # modelType = config.get('modelType', '') + # modelsConfig[modelName]['modelType'] = modelType + # if hasattr(model, 'customMetrics'): + # customMetrics = model.customMetrics + # modelsConfig[modelName]['customMetrics'] = customMetrics + # else: + # pass + # filename = f'{baseRelativePath}/models/{modelName}' + # pickle.dump(model, open(filename, 'wb')) + # with open(f'{baseRelativePath}/models/configs.json', "w") as outputFile: + # json.dump(modelsConfig, outputFile, indent = 4) + + # loaded_model = pickle.load(open(filename, 'rb')) + # return loaded_model \ No newline at end of file diff --git a/package.json b/package.json index 519d5df..b44d4e4 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "mlModelSaver", - "version": "1.0.10", + "version": "1.0.11", "description": "Make life easier for save and serving ml models", "main": "index.js", "repository": "git@github.com:smartdev-ca/mlModelSaver.git", diff --git a/pytests/test_mlModelSaver.py b/pytests/test_mlModelSaver.py new file mode 100644 index 0000000..8493047 --- /dev/null +++ b/pytests/test_mlModelSaver.py @@ -0,0 +1,73 @@ +# test_mlModelSaver.py + +import sys +import os + +sys.path.insert( + 0, + os.path.abspath( + os.path.join( + os.path.dirname(__file__), + '../mlModelSaver' + ) + ) +) + + +def test_ensureCLassInstance(): + from mlModelSaver import MlModelSaver + mlModelSaverInstance1 = MlModelSaver({ + "baseRelativePath": "test_baseRelativePath", + "modelsFolder": "test_modelsFolder" + }) + assert mlModelSaverInstance1.baseRelativePath == "test_baseRelativePath" + assert mlModelSaverInstance1.modelsFolder == "test_modelsFolder" + tesSupportedModels = mlModelSaverInstance1.showSupportedModels() + assert tesSupportedModels == ['sm.OLS'] + + +def test_OLS_LinearRegression(): + from mlModelSaver import MlModelSaver + import numpy as np + import pandas as pd + import statsmodels.api as sm + salaryMisDf = pd.read_excel("./datasets/Salary_MIS.xlsx") + salaryBasedOnGpaMisStatistics = sm.OLS( + salaryMisDf["Salary"], + sm.add_constant(salaryMisDf[["GPA", "MIS", "Statistics"]]) + ) + salaryBasedOnGpaMisStatisticsFit = salaryBasedOnGpaMisStatistics.fit() + mlModelSaverInstance2 = MlModelSaver({ + "baseRelativePath": ".", + "modelsFolder": "~~tmp/models" + }) + + mlModelSaverInstance2.exportModel( + salaryBasedOnGpaMisStatisticsFit, + { + "modelName": "salaryBasedOnGpaMisStatisticsFit", + "description": "Predict Salary based on GPA MIS Statistics for sallaryMisDf", + "modelType": "sm.OLS", + "inputs": [ + { + "name": "GPA", + "type": "float", + }, + { + "name": "MIS", + "type": "binary" + }, + { + "name": "Statistics", + "type": "binary" + } + ], + "output": [ + { + "name": "Salary", + "type": "int" + } + ] + } + ) + assert 2 == 2 diff --git a/releaseNewVersionAndAddTag.sh b/releaseNewVersionAndAddTag.sh index 525d66c..cdf3b9b 100755 --- a/releaseNewVersionAndAddTag.sh +++ b/releaseNewVersionAndAddTag.sh @@ -8,6 +8,8 @@ new_version=$(node -p "require('./package.json').version") rm -rf build rm -rf dist +pytest -s -v + python setup.py sdist bdist_wheel twine upload dist/* diff --git a/requirements.txt b/requirements.txt index 1a876e6..43ec90f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,11 @@ +autopep8==2.2.0 certifi==2024.6.2 charset-normalizer==3.3.2 docutils==0.21.2 +et-xmlfile==1.1.0 idna==3.7 importlib_metadata==7.1.0 +iniconfig==2.0.0 jaraco.classes==3.4.0 jaraco.context==5.3.0 jaraco.functools==4.0.1 @@ -11,15 +14,29 @@ markdown-it-py==3.0.0 mdurl==0.1.2 more-itertools==10.3.0 nh3==0.2.17 +numpy==1.26.4 +openpyxl==3.1.4 +packaging==24.1 +pandas==2.2.2 +patsy==0.5.6 pkginfo==1.11.1 +pluggy==1.5.0 +pycodestyle==2.11.1 Pygments==2.18.0 +pytest==8.2.2 +python-dateutil==2.9.0.post0 +pytz==2024.1 readme_renderer==43.0 requests==2.32.3 requests-toolbelt==1.0.0 rfc3986==2.0.0 rich==13.7.1 +scipy==1.13.1 setuptools==70.0.0 +six==1.16.0 +statsmodels==0.14.2 twine==5.1.0 +tzdata==2024.1 urllib3==2.2.1 wheel==0.43.0 zipp==3.19.2 diff --git a/setup.py b/setup.py index 830d633..8f0396d 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup( name='mlModelSaver', - version='1.0.10', + version='1.0.11', packages=find_packages(), description='Make life easier for saving and serving ML models', long_description=open('DOCS.md').read(), # Assumes you have a README.md file