From f0a5c3dc13ee06aef935f06229812493c3ae82dd Mon Sep 17 00:00:00 2001 From: Jason Jafari Date: Sat, 15 Jun 2024 20:17:21 -0400 Subject: [PATCH] chore: bump version to 1.0.15 --- mlModelSaver/__init__.py | 14 +++ package.json | 2 +- pytests/test_mlModelSaver.py | 190 ++++++++++++++++++++++------------- setup.py | 2 +- 4 files changed, 134 insertions(+), 74 deletions(-) diff --git a/mlModelSaver/__init__.py b/mlModelSaver/__init__.py index eb49465..b680f14 100644 --- a/mlModelSaver/__init__.py +++ b/mlModelSaver/__init__.py @@ -77,6 +77,18 @@ class MlModelSaver: self.modelsFolder = f'{self.baseRelativePath}/{config.get('modelsFolder', '~~modelsFolder')}' ensure_directory_exists(self.modelsFolder) + def listOfPickels(self): + files = os.listdir(self.modelsFolder) + pickelsList = [file for file in files if file.endswith('.pkl')] + return pickelsList + + def listOfModels(self): + pickelsList = self.listOfPickels() + modelsList = [] + for pickekFileName in pickelsList: + modelsList.append(pickekFileName.split(".pkl")[0]) + return modelsList + def showSupportedModels(self): @@ -102,3 +114,5 @@ class MlModelSaver: loaded_model = pickle.load(open(filename, 'rb')) self.cachedModels[loaded_model.mlModelSaverConfig.get("modelName")] = loaded_model return loaded_model + + diff --git a/package.json b/package.json index cfd3450..0017058 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "mlModelSaver", - "version": "1.0.14", + "version": "1.0.15", "description": "Make life easier for save and serving ml models", "main": "index.js", "repository": "git@github.com:smartdev-ca/mlModelSaver.git", diff --git a/pytests/test_mlModelSaver.py b/pytests/test_mlModelSaver.py index 13ac61c..2cb0aa6 100644 --- a/pytests/test_mlModelSaver.py +++ b/pytests/test_mlModelSaver.py @@ -1,82 +1,128 @@ -# test_mlModelSaver.py +import pickle +import json -import sys import os -sys.path.insert( - 0, - os.path.abspath( - os.path.join( - os.path.dirname(__file__), - '..' - ) - ) -) +from functools import partial + +def ensure_directory_exists(directory_path): + """ + Ensure that the specified directory exists. If it doesn't, create it. + + Parameters: + directory_path (str): The path of the directory to ensure exists. + """ + os.makedirs(directory_path, exist_ok=True) -def test_ensureCLassInstance(): - from mlModelSaver import MlModelSaver - mlModelSaverInstance1 = MlModelSaver({ - "baseRelativePath": "test_baseRelativePath", - "modelsFolder": "test_modelsFolder" - }) - assert mlModelSaverInstance1.baseRelativePath == "test_baseRelativePath" - assert mlModelSaverInstance1.modelsFolder == "test_baseRelativePath/test_modelsFolder" - tesSupportedModels = mlModelSaverInstance1.showSupportedModels() - assert tesSupportedModels == ['sm.OLS'] +def check_file_exists(file_path): + """ + Check if the specified file exists. + + Parameters: + file_path (str): The path of the file to check. + + Returns: + bool: True if the file exists, False otherwise. + """ + if os.path.isfile(file_path): + print(f"File '{file_path}' exists.") + return True + else: + print(f"File '{file_path}' does not exist.") + return False -def test_OLS_LinearRegression(): - from mlModelSaver import MlModelSaver - import numpy as np - import pandas as pd - import statsmodels.api as sm - from helpers import add_constant_column - salaryMisDf = pd.read_excel("./datasets/Salary_MIS.xlsx") - salaryBasedOnGpaMisStatistics = sm.OLS( - salaryMisDf["Salary"], - add_constant_column(salaryMisDf[["GPA", "MIS", "Statistics"]]) - ) - salaryBasedOnGpaMisStatisticsFit = salaryBasedOnGpaMisStatistics.fit() - mlModelSaverInstance2 = MlModelSaver({ - "baseRelativePath": ".", - "modelsFolder": "~~tmp/testModels" - }) +supportedModels = { + "sm.OLS": { + "supported": True + } +} + +supportedDataType = { + "int": { + "supported": True + }, + "float": { + "supported": True + }, + "binary":{ + "supported": True + } +} + +def default_transformer(x): + return x + + +def mlModelSavePredict(self, df, typeOfPredict = 'normal'): + dfAfterTransformation = self.mlModelSaverTransformer(df) + output = [] + outputsName = self.mlModelSaverConfig.get("outputs", [{"name": "result"}]) + outputsName = [item["name"] for item in outputsName] + if typeOfPredict == 'normal': + results = self.predict(dfAfterTransformation) + for value in results: + output.append({ + outputsName[0]: value, + }) + return output + +class MlModelSaver: + + cachedModels = {} + + def __init__(self, config): + self.baseRelativePath = config.get('baseRelativePath', '.') + self.modelsFolder = f'{self.baseRelativePath}/{config.get('modelsFolder', '~~modelsFolder')}' + ensure_directory_exists(self.modelsFolder) + + def listOfPickles(self): + files = os.listdir(self.modelsFolder) + picklesList = [file for file in files if file.endswith('.pkl')] + return picklesList + + def listOfModels(self): + picklesList = self.listOfPickles() + modelsList = [] + for pickleFileName in picklesList: + modelsList.append(pickleFileName.split(".pkl")[0]) + return modelsList - loadedModel = mlModelSaverInstance2.exportModel( - salaryBasedOnGpaMisStatisticsFit, - { - "modelName": "salaryBasedOnGpaMisStatistics", - "description": "Predict Salary based on GPA MIS Statistics for salaryMisDf", - "modelType": "sm.OLS", - "inputs": [ - { - "name": "GPA", - "type": "float", - }, - { - "name": "MIS", - "type": "binary" - }, - { - "name": "Statistics", - "type": "binary" - } - ], - "transformer": add_constant_column, - "outputs": [ - { - "name": "Salary", - "type": "int" - } - ] - } - ) - from mlModelSaver import check_file_exists - assert check_file_exists("./~~tmp/testModels/salaryBasedOnGpaMisStatistics.pkl") == True - testData = salaryMisDf[["GPA", "MIS", "Statistics"]].iloc[0:2] - predictedValueWithLoadedModel = loadedModel.mlModelSavePredict(testData, 'normal') - assert predictedValueWithLoadedModel == [{'Salary': 73.9924679451542}, {'Salary': 69.55525482441558}] - assert list(mlModelSaverInstance2.cachedModels.keys()) == ['salaryBasedOnGpaMisStatistics'] + def showSupportedModels(self): + supported_keys = [key for key, value in supportedModels.items() if value.get('supported')] + return supported_keys + + def loadModelByName(self, modelName): + filename = f'{self.modelsFolder}/{modelName}.pkl' + loaded_model = pickle.load(open(filename, 'rb')) + self.cachedModels[loaded_model.mlModelSaverConfig.get("modelName")] = loaded_model + return loaded_model + + def exportModel(self, model, config): + transformer = config.get("transformer", default_transformer) + model.mlModelSaverTransformer = transformer + if "transformer" in config: + del config["transformer"] + model.mlModelSaverConfig = config + isModelSupporter = supportedModels.get( + config.get("modelType", ''), + {} + ).get("supported", False) + if not isModelSupporter: + raise ValueError(f'only {self.showSupportedModels()} are supported and {config.get("modelType", '')} is not supported') + modelName = model.mlModelSaverConfig['modelName'] + model.mlModelSavePredict = partial(mlModelSavePredict, model) + filename = f'{self.modelsFolder}/{modelName}.pkl' + pickle.dump(model, open(filename, 'wb')) + return self.loadModelByName(modelName) + + def getModel(self, modelName): + model = self.cachedModels.get(modelName, None) + if model != None: + return model + return self.loadModelByName(modelName) + + diff --git a/setup.py b/setup.py index 5367ef6..23f3730 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup( name='mlModelSaver', - version='1.0.14', + version='1.0.15', packages=find_packages(), description='Make life easier for saving and serving ML models', long_description=open('DOCS.md').read(), # Assumes you have a README.md file