From 88cd2d9fd133b941fa7f9577b2e72d1e8a7dfa0e Mon Sep 17 00:00:00 2001 From: Jason Jafari Date: Sat, 15 Jun 2024 20:18:58 -0400 Subject: [PATCH] chore: bump version to 1.0.16 --- mlModelSaver/__init__.py | 28 ++++-- package.json | 2 +- pytests/test_mlModelSaver.py | 190 +++++++++++++---------------------- setup.py | 2 +- 4 files changed, 93 insertions(+), 129 deletions(-) diff --git a/mlModelSaver/__init__.py b/mlModelSaver/__init__.py index b680f14..2cb0aa6 100644 --- a/mlModelSaver/__init__.py +++ b/mlModelSaver/__init__.py @@ -77,16 +77,16 @@ class MlModelSaver: self.modelsFolder = f'{self.baseRelativePath}/{config.get('modelsFolder', '~~modelsFolder')}' ensure_directory_exists(self.modelsFolder) - def listOfPickels(self): + def listOfPickles(self): files = os.listdir(self.modelsFolder) - pickelsList = [file for file in files if file.endswith('.pkl')] - return pickelsList + picklesList = [file for file in files if file.endswith('.pkl')] + return picklesList def listOfModels(self): - pickelsList = self.listOfPickels() + picklesList = self.listOfPickles() modelsList = [] - for pickekFileName in pickelsList: - modelsList.append(pickekFileName.split(".pkl")[0]) + for pickleFileName in picklesList: + modelsList.append(pickleFileName.split(".pkl")[0]) return modelsList @@ -95,6 +95,12 @@ class MlModelSaver: supported_keys = [key for key, value in supportedModels.items() if value.get('supported')] return supported_keys + def loadModelByName(self, modelName): + filename = f'{self.modelsFolder}/{modelName}.pkl' + loaded_model = pickle.load(open(filename, 'rb')) + self.cachedModels[loaded_model.mlModelSaverConfig.get("modelName")] = loaded_model + return loaded_model + def exportModel(self, model, config): transformer = config.get("transformer", default_transformer) model.mlModelSaverTransformer = transformer @@ -111,8 +117,12 @@ class MlModelSaver: model.mlModelSavePredict = partial(mlModelSavePredict, model) filename = f'{self.modelsFolder}/{modelName}.pkl' pickle.dump(model, open(filename, 'wb')) - loaded_model = pickle.load(open(filename, 'rb')) - self.cachedModels[loaded_model.mlModelSaverConfig.get("modelName")] = loaded_model - return loaded_model + return self.loadModelByName(modelName) + + def getModel(self, modelName): + model = self.cachedModels.get(modelName, None) + if model != None: + return model + return self.loadModelByName(modelName) diff --git a/package.json b/package.json index 0017058..8d9316f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "mlModelSaver", - "version": "1.0.15", + "version": "1.0.16", "description": "Make life easier for save and serving ml models", "main": "index.js", "repository": "git@github.com:smartdev-ca/mlModelSaver.git", diff --git a/pytests/test_mlModelSaver.py b/pytests/test_mlModelSaver.py index 2cb0aa6..13ac61c 100644 --- a/pytests/test_mlModelSaver.py +++ b/pytests/test_mlModelSaver.py @@ -1,128 +1,82 @@ -import pickle -import json +# test_mlModelSaver.py +import sys import os -from functools import partial - -def ensure_directory_exists(directory_path): - """ - Ensure that the specified directory exists. If it doesn't, create it. - - Parameters: - directory_path (str): The path of the directory to ensure exists. - """ - os.makedirs(directory_path, exist_ok=True) +sys.path.insert( + 0, + os.path.abspath( + os.path.join( + os.path.dirname(__file__), + '..' + ) + ) +) -def check_file_exists(file_path): - """ - Check if the specified file exists. - - Parameters: - file_path (str): The path of the file to check. - - Returns: - bool: True if the file exists, False otherwise. - """ - if os.path.isfile(file_path): - print(f"File '{file_path}' exists.") - return True - else: - print(f"File '{file_path}' does not exist.") - return False +def test_ensureCLassInstance(): + from mlModelSaver import MlModelSaver + mlModelSaverInstance1 = MlModelSaver({ + "baseRelativePath": "test_baseRelativePath", + "modelsFolder": "test_modelsFolder" + }) + assert mlModelSaverInstance1.baseRelativePath == "test_baseRelativePath" + assert mlModelSaverInstance1.modelsFolder == "test_baseRelativePath/test_modelsFolder" + tesSupportedModels = mlModelSaverInstance1.showSupportedModels() + assert tesSupportedModels == ['sm.OLS'] -supportedModels = { - "sm.OLS": { - "supported": True - } -} - -supportedDataType = { - "int": { - "supported": True - }, - "float": { - "supported": True - }, - "binary":{ - "supported": True - } -} - -def default_transformer(x): - return x - - -def mlModelSavePredict(self, df, typeOfPredict = 'normal'): - dfAfterTransformation = self.mlModelSaverTransformer(df) - output = [] - outputsName = self.mlModelSaverConfig.get("outputs", [{"name": "result"}]) - outputsName = [item["name"] for item in outputsName] - if typeOfPredict == 'normal': - results = self.predict(dfAfterTransformation) - for value in results: - output.append({ - outputsName[0]: value, - }) - return output - -class MlModelSaver: - - cachedModels = {} - - def __init__(self, config): - self.baseRelativePath = config.get('baseRelativePath', '.') - self.modelsFolder = f'{self.baseRelativePath}/{config.get('modelsFolder', '~~modelsFolder')}' - ensure_directory_exists(self.modelsFolder) - - def listOfPickles(self): - files = os.listdir(self.modelsFolder) - picklesList = [file for file in files if file.endswith('.pkl')] - return picklesList - - def listOfModels(self): - picklesList = self.listOfPickles() - modelsList = [] - for pickleFileName in picklesList: - modelsList.append(pickleFileName.split(".pkl")[0]) - return modelsList +def test_OLS_LinearRegression(): + from mlModelSaver import MlModelSaver + import numpy as np + import pandas as pd + import statsmodels.api as sm + from helpers import add_constant_column + salaryMisDf = pd.read_excel("./datasets/Salary_MIS.xlsx") + salaryBasedOnGpaMisStatistics = sm.OLS( + salaryMisDf["Salary"], + add_constant_column(salaryMisDf[["GPA", "MIS", "Statistics"]]) + ) + salaryBasedOnGpaMisStatisticsFit = salaryBasedOnGpaMisStatistics.fit() + mlModelSaverInstance2 = MlModelSaver({ + "baseRelativePath": ".", + "modelsFolder": "~~tmp/testModels" + }) - def showSupportedModels(self): - supported_keys = [key for key, value in supportedModels.items() if value.get('supported')] - return supported_keys - - def loadModelByName(self, modelName): - filename = f'{self.modelsFolder}/{modelName}.pkl' - loaded_model = pickle.load(open(filename, 'rb')) - self.cachedModels[loaded_model.mlModelSaverConfig.get("modelName")] = loaded_model - return loaded_model - - def exportModel(self, model, config): - transformer = config.get("transformer", default_transformer) - model.mlModelSaverTransformer = transformer - if "transformer" in config: - del config["transformer"] - model.mlModelSaverConfig = config - isModelSupporter = supportedModels.get( - config.get("modelType", ''), - {} - ).get("supported", False) - if not isModelSupporter: - raise ValueError(f'only {self.showSupportedModels()} are supported and {config.get("modelType", '')} is not supported') - modelName = model.mlModelSaverConfig['modelName'] - model.mlModelSavePredict = partial(mlModelSavePredict, model) - filename = f'{self.modelsFolder}/{modelName}.pkl' - pickle.dump(model, open(filename, 'wb')) - return self.loadModelByName(modelName) - - def getModel(self, modelName): - model = self.cachedModels.get(modelName, None) - if model != None: - return model - return self.loadModelByName(modelName) - - + loadedModel = mlModelSaverInstance2.exportModel( + salaryBasedOnGpaMisStatisticsFit, + { + "modelName": "salaryBasedOnGpaMisStatistics", + "description": "Predict Salary based on GPA MIS Statistics for salaryMisDf", + "modelType": "sm.OLS", + "inputs": [ + { + "name": "GPA", + "type": "float", + }, + { + "name": "MIS", + "type": "binary" + }, + { + "name": "Statistics", + "type": "binary" + } + ], + "transformer": add_constant_column, + "outputs": [ + { + "name": "Salary", + "type": "int" + } + ] + } + ) + from mlModelSaver import check_file_exists + assert check_file_exists("./~~tmp/testModels/salaryBasedOnGpaMisStatistics.pkl") == True + testData = salaryMisDf[["GPA", "MIS", "Statistics"]].iloc[0:2] + predictedValueWithLoadedModel = loadedModel.mlModelSavePredict(testData, 'normal') + assert predictedValueWithLoadedModel == [{'Salary': 73.9924679451542}, {'Salary': 69.55525482441558}] + assert list(mlModelSaverInstance2.cachedModels.keys()) == ['salaryBasedOnGpaMisStatistics'] diff --git a/setup.py b/setup.py index 23f3730..e1cfee7 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup( name='mlModelSaver', - version='1.0.15', + version='1.0.16', packages=find_packages(), description='Make life easier for saving and serving ML models', long_description=open('DOCS.md').read(), # Assumes you have a README.md file