diff --git a/README.md b/README.md index 585d6b3d..05579841 100644 --- a/README.md +++ b/README.md @@ -1 +1,63 @@ # Example MLflow project +This is a simple example ML project that demonstrates how to use [MLflow](https://mlflow.org/) to track machine learning experiments using the Wine Quality dataset. + +## ๐Ÿ“ Project Structure +. +โ”œโ”€โ”€ conda.yaml # Conda environment dependencies +โ”œโ”€โ”€ LICENSE.txt # License file +โ”œโ”€โ”€ MLproject # MLflow project configuration +โ”œโ”€โ”€ README.md # Project description (this file) +โ”œโ”€โ”€ train.py # Training script +โ””โ”€โ”€ wine-quality.csv # Dataset + +## ๐Ÿ“ฆ Requirements + +- Python 3.7+ +- MLflow +- scikit-learn +- pandas +- numpy + +You can install dependencies using: + +```bash +pip install -r requirements.txt +Or using the conda.yaml file: + +bash +Copy +Edit +conda env create -f conda.yaml +conda activate mlflow-env +๐Ÿงช Running the Training Script +To run the training script with MLflow: + +bash +Copy +Edit +python train.py 0.5 0.5 +Or with MLflow CLI: + +bash +Copy +Edit +mlflow run . -P alpha=0.5 -P l1_ratio=0.5 +๐Ÿ“ˆ Logged Metrics +This project logs the following metrics: + +RMSE (Root Mean Squared Error) + +MAE (Mean Absolute Error) + +Rยฒ (R-squared) + +The trained model is also saved and logged with MLflow +๐Ÿ“š Dataset Info +The dataset used is the Wine Quality dataset from UCI Machine Learning Repository. + +css +Copy +Edit +P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. +"Modeling wine preferences by data mining from physicochemical properties". +Decision Support Systems, Elsevier, 47(4):547-553, 2009. \ No newline at end of file diff --git a/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/meta.yaml b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/meta.yaml new file mode 100644 index 00000000..caa1996b --- /dev/null +++ b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/meta.yaml @@ -0,0 +1,14 @@ +artifact_uri: file:///C:/Users/hp/mlflow-example/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/artifacts +end_time: 1753625116637 +entry_point_name: '' +experiment_id: '0' +lifecycle_stage: active +run_id: 79442e6bb3ff42c5a7ca58e4858e5f62 +run_name: carefree-jay-416 +source_name: '' +source_type: 4 +source_version: '' +start_time: 1753625111329 +status: 3 +tags: [] +user_id: hp diff --git a/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/metrics/mae b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/metrics/mae new file mode 100644 index 00000000..fe16dc4f --- /dev/null +++ b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/metrics/mae @@ -0,0 +1,2 @@ +1753625111412 0.6278761410160693 0 +1753625111412 0.6278761410160693 0 diff --git a/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/metrics/r2 b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/metrics/r2 new file mode 100644 index 00000000..2ddac491 --- /dev/null +++ b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/metrics/r2 @@ -0,0 +1,2 @@ +1753625111400 0.12678721972772689 0 +1753625111400 0.12678721972772689 0 diff --git a/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/metrics/rmse b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/metrics/rmse new file mode 100644 index 00000000..d4972ac1 --- /dev/null +++ b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/metrics/rmse @@ -0,0 +1,2 @@ +1753625111380 0.82224284975954 0 +1753625111380 0.82224284975954 0 diff --git a/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/outputs/m-716f80ebde1c4e5bb22cf127c5367bca/meta.yaml b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/outputs/m-716f80ebde1c4e5bb22cf127c5367bca/meta.yaml new file mode 100644 index 00000000..520392b4 --- /dev/null +++ b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/outputs/m-716f80ebde1c4e5bb22cf127c5367bca/meta.yaml @@ -0,0 +1,6 @@ +destination_id: m-716f80ebde1c4e5bb22cf127c5367bca +destination_type: MODEL_OUTPUT +source_id: m-716f80ebde1c4e5bb22cf127c5367bca +source_type: RUN_OUTPUT +step: 0 +tags: {} diff --git a/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/params/alpha b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/params/alpha new file mode 100644 index 00000000..ea2303bc --- /dev/null +++ b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/params/alpha @@ -0,0 +1 @@ +0.5 \ No newline at end of file diff --git a/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/params/l1_ratio b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/params/l1_ratio new file mode 100644 index 00000000..ea2303bc --- /dev/null +++ b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/params/l1_ratio @@ -0,0 +1 @@ +0.5 \ No newline at end of file diff --git a/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/tags/mlflow.runName b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/tags/mlflow.runName new file mode 100644 index 00000000..999e293b --- /dev/null +++ b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/tags/mlflow.runName @@ -0,0 +1 @@ +carefree-jay-416 \ No newline at end of file diff --git a/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/tags/mlflow.source.git.commit b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/tags/mlflow.source.git.commit new file mode 100644 index 00000000..60d5b264 --- /dev/null +++ b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/tags/mlflow.source.git.commit @@ -0,0 +1 @@ +0651d1c962aa35e4dd02608c51a7b0efc2412407 \ No newline at end of file diff --git a/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/tags/mlflow.source.name b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/tags/mlflow.source.name new file mode 100644 index 00000000..eadf20fe --- /dev/null +++ b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/tags/mlflow.source.name @@ -0,0 +1 @@ +train.py \ No newline at end of file diff --git a/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/tags/mlflow.source.type b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/tags/mlflow.source.type new file mode 100644 index 00000000..0c2c1fe9 --- /dev/null +++ b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/tags/mlflow.source.type @@ -0,0 +1 @@ +LOCAL \ No newline at end of file diff --git a/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/tags/mlflow.user b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/tags/mlflow.user new file mode 100644 index 00000000..402376f9 --- /dev/null +++ b/mlruns/0/79442e6bb3ff42c5a7ca58e4858e5f62/tags/mlflow.user @@ -0,0 +1 @@ +hp \ No newline at end of file diff --git a/mlruns/0/meta.yaml b/mlruns/0/meta.yaml new file mode 100644 index 00000000..8ec245da --- /dev/null +++ b/mlruns/0/meta.yaml @@ -0,0 +1,6 @@ +artifact_location: file:///C:/Users/hp/mlflow-example/mlruns/0 +creation_time: 1753625110390 +experiment_id: '0' +last_update_time: 1753625110390 +lifecycle_stage: active +name: Default diff --git a/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts/MLmodel b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts/MLmodel new file mode 100644 index 00000000..a9a72b66 --- /dev/null +++ b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts/MLmodel @@ -0,0 +1,22 @@ +artifact_path: file:///C:/Users/hp/mlflow-example/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts +flavors: + python_function: + env: + conda: conda.yaml + virtualenv: python_env.yaml + loader_module: mlflow.sklearn + model_path: model.pkl + predict_fn: predict + python_version: 3.10.0 + sklearn: + code: null + pickled_model: model.pkl + serialization_format: cloudpickle + sklearn_version: 1.7.0 +mlflow_version: 3.1.4 +model_id: m-716f80ebde1c4e5bb22cf127c5367bca +model_size_bytes: 879 +model_uuid: m-716f80ebde1c4e5bb22cf127c5367bca +prompts: null +run_id: 79442e6bb3ff42c5a7ca58e4858e5f62 +utc_time_created: '2025-07-27 14:05:11.490711' diff --git a/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts/conda.yaml b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts/conda.yaml new file mode 100644 index 00000000..3a133858 --- /dev/null +++ b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts/conda.yaml @@ -0,0 +1,14 @@ +channels: +- conda-forge +dependencies: +- python=3.10.0 +- pip<=21.2.3 +- pip: + - mlflow==3.1.4 + - cloudpickle==3.1.1 + - numpy==2.2.6 + - pandas==2.3.0 + - psutil==7.0.0 + - scikit-learn==1.7.0 + - scipy==1.15.3 +name: mlflow-env diff --git a/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts/model.pkl b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts/model.pkl new file mode 100644 index 00000000..4545c210 Binary files /dev/null and b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts/model.pkl differ diff --git a/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts/python_env.yaml b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts/python_env.yaml new file mode 100644 index 00000000..000597a8 --- /dev/null +++ b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts/python_env.yaml @@ -0,0 +1,7 @@ +python: 3.10.0 +build_dependencies: +- pip==21.2.3 +- setuptools==57.4.0 +- wheel +dependencies: +- -r requirements.txt diff --git a/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts/requirements.txt b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts/requirements.txt new file mode 100644 index 00000000..94156299 --- /dev/null +++ b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts/requirements.txt @@ -0,0 +1,7 @@ +mlflow==3.1.4 +cloudpickle==3.1.1 +numpy==2.2.6 +pandas==2.3.0 +psutil==7.0.0 +scikit-learn==1.7.0 +scipy==1.15.3 \ No newline at end of file diff --git a/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/meta.yaml b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/meta.yaml new file mode 100644 index 00000000..5e938f3a --- /dev/null +++ b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/meta.yaml @@ -0,0 +1,10 @@ +artifact_location: file:///C:/Users/hp/mlflow-example/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/artifacts +creation_timestamp: 1753625111456 +experiment_id: '0' +last_updated_timestamp: 1753625116628 +model_id: m-716f80ebde1c4e5bb22cf127c5367bca +model_type: null +name: model +source_run_id: 79442e6bb3ff42c5a7ca58e4858e5f62 +status: 2 +status_message: null diff --git a/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/metrics/mae b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/metrics/mae new file mode 100644 index 00000000..66e9ad03 --- /dev/null +++ b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/metrics/mae @@ -0,0 +1 @@ +1753625111412 0.6278761410160693 0 79442e6bb3ff42c5a7ca58e4858e5f62 diff --git a/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/metrics/r2 b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/metrics/r2 new file mode 100644 index 00000000..a6264d40 --- /dev/null +++ b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/metrics/r2 @@ -0,0 +1 @@ +1753625111400 0.12678721972772689 0 79442e6bb3ff42c5a7ca58e4858e5f62 diff --git a/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/metrics/rmse b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/metrics/rmse new file mode 100644 index 00000000..74a1f48d --- /dev/null +++ b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/metrics/rmse @@ -0,0 +1 @@ +1753625111380 0.82224284975954 0 79442e6bb3ff42c5a7ca58e4858e5f62 diff --git a/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/params/alpha b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/params/alpha new file mode 100644 index 00000000..ea2303bc --- /dev/null +++ b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/params/alpha @@ -0,0 +1 @@ +0.5 \ No newline at end of file diff --git a/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/params/l1_ratio b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/params/l1_ratio new file mode 100644 index 00000000..ea2303bc --- /dev/null +++ b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/params/l1_ratio @@ -0,0 +1 @@ +0.5 \ No newline at end of file diff --git a/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/tags/mlflow.source.git.commit b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/tags/mlflow.source.git.commit new file mode 100644 index 00000000..60d5b264 --- /dev/null +++ b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/tags/mlflow.source.git.commit @@ -0,0 +1 @@ +0651d1c962aa35e4dd02608c51a7b0efc2412407 \ No newline at end of file diff --git a/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/tags/mlflow.source.name b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/tags/mlflow.source.name new file mode 100644 index 00000000..eadf20fe --- /dev/null +++ b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/tags/mlflow.source.name @@ -0,0 +1 @@ +train.py \ No newline at end of file diff --git a/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/tags/mlflow.source.type b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/tags/mlflow.source.type new file mode 100644 index 00000000..0c2c1fe9 --- /dev/null +++ b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/tags/mlflow.source.type @@ -0,0 +1 @@ +LOCAL \ No newline at end of file diff --git a/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/tags/mlflow.user b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/tags/mlflow.user new file mode 100644 index 00000000..402376f9 --- /dev/null +++ b/mlruns/0/models/m-716f80ebde1c4e5bb22cf127c5367bca/tags/mlflow.user @@ -0,0 +1 @@ +hp \ No newline at end of file diff --git a/train.py b/train.py index 26765faf..062cffb4 100644 --- a/train.py +++ b/train.py @@ -6,6 +6,7 @@ import warnings import sys +#importing the necessary libraries import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score @@ -15,16 +16,15 @@ import mlflow import mlflow.sklearn - +#define a function to evaluate the metrics def eval_metrics(actual, pred): - rmse = np.sqrt(mean_squared_error(actual, pred)) - mae = mean_absolute_error(actual, pred) - r2 = r2_score(actual, pred) + rmse = np.sqrt(mean_squared_error(actual, pred)) #root mean squared error + mae = mean_absolute_error(actual, pred) #mean absolute error + r2 = r2_score(actual, pred) #R-squared score return rmse, mae, r2 - - if __name__ == "__main__": + #ignore warnings for clean output warnings.filterwarnings("ignore") np.random.seed(40) @@ -47,7 +47,7 @@ def eval_metrics(actual, pred): with mlflow.start_run(): lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42) lr.fit(train_x, train_y) - + # Predicting the quality of wine using the trained model predicted_qualities = lr.predict(test_x) (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities) @@ -56,11 +56,11 @@ def eval_metrics(actual, pred): print(" RMSE: %s" % rmse) print(" MAE: %s" % mae) print(" R2: %s" % r2) - + # Log parameters and metrics to MLflow mlflow.log_param("alpha", alpha) mlflow.log_param("l1_ratio", l1_ratio) mlflow.log_metric("rmse", rmse) mlflow.log_metric("r2", r2) mlflow.log_metric("mae", mae) - + # save the trained model in MLflow mlflow.sklearn.log_model(lr, "model")