Skip to content

Commit cd17c28

Browse files
Project import generated by Copybara. (#27)
1 parent 753c72f commit cd17c28

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+3180
-1503
lines changed

CHANGELOG.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,28 @@
11
# Release History
22

3+
## 1.0.2 (2023-06-22)
4+
5+
### Behavior Changes
6+
- Model Registry: Prohibit non-snowflake-native models from being logged.
7+
- Model Registry: `_use_local_snowml` parameter in options of `deploy()` has been removed.
8+
- Model Registry: A default `False` `embed_local_ml_library` parameter has been added to the options of `log_model()`. With this set to `False` (default), the version of the local snowflake-ml-python library will be recorded and used when deploying the model. With this set to `True`, local snowflake-ml-python library will be embedded into the logged model, and will be used when you load or deploy the model.
9+
10+
### New Features
11+
- Model Registry: A new optional argument named `code_paths` has been added to the arguments of `log_model()` for users to specify additional code paths to be imported when loading and deploying the model.
12+
- Model Registry: A new optional argument named `options` has been added to the arguments of `log_model()` to specify any additional options when saving the model.
13+
- Model Development: Added metrics:
14+
- d2_absolute_error_score
15+
- d2_pinball_score
16+
- explained_variance_score
17+
- mean_absolute_error
18+
- mean_absolute_percentage_error
19+
- mean_squared_error
20+
21+
### Bug Fixes
22+
23+
- Model Development: `accuracy_score()` now works when given label column names are lists of a single value.
24+
25+
326
## 1.0.1 (2023-06-16)
427
### Behavior Changes
528

ci/build_and_run_tests.sh

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
#!/bin/bash
2+
3+
# Usage
4+
# copy_and_run_tests.sh <workspace> [--env pip|conda] [--with-snowpark]
5+
#
6+
# Args
7+
# workspace: path to the workspace, SnowML code should be in snowml directory.
8+
#
9+
# Optional Args
10+
# env: Set the environment, choose from pip and conda
11+
# with-snowpark: Build and test with snowpark in snowpark-python directory in the workspace.
12+
#
13+
# Action
14+
# - Copy the integration tests from workspace folder and execute them in testing Python env using pytest.
15+
# - This is to mimic the behavior of using snowml wheel package in user land.
16+
17+
set -o pipefail
18+
set -eu
19+
20+
PROG=$0
21+
22+
help()
23+
{
24+
exit_code=$1
25+
echo "Invalid usage, must provide argument for workspace"
26+
echo "Usage: ${PROG} <workspace> [--env pip|conda] [--with-snowpark]"
27+
exit ${exit_code}
28+
}
29+
30+
WORKSPACE=$1 && shift || help 1
31+
ENV="pip"
32+
WITH_SNOWPARK=false
33+
SNOWML_DIR="snowml"
34+
SNOWPARK_DIR="snowpark-python"
35+
36+
while (($#)); do
37+
case $1 in
38+
-e|--env)
39+
shift
40+
if [[ $1 = "pip" || $1 = "conda" ]]; then
41+
ENV=$1
42+
else
43+
help 1
44+
fi
45+
;;
46+
--with-snowpark)
47+
WITH_SNOWPARK=true
48+
;;
49+
-h|--help)
50+
help 0
51+
;;
52+
*)
53+
help 1
54+
;;
55+
esac
56+
shift
57+
done
58+
59+
# Check Python3.8 exist
60+
# TODO(SNOW-845592): ideally we should download py3.8 from conda if not exist. Currently we just fail.
61+
set +eu
62+
source /opt/rh/rh-python38/enable
63+
PYTHON38_EXIST=$?
64+
if [ $PYTHON38_EXIST -ne 0 ]; then
65+
echo "Failed to execute tests: Python3.8 is not installed."
66+
rm -rf "${TEMP_TEST_DIR}"
67+
exit ${PYTHON38_EXIST}
68+
fi
69+
set -eu
70+
71+
cd "${WORKSPACE}"
72+
73+
# Create temp release folder
74+
TEMP_TEST_DIR=$(mktemp -d "${WORKSPACE}/tmp_XXXXX")
75+
76+
pushd ${SNOWML_DIR}
77+
# Get the version from snowflake/ml/version.bzl
78+
VERSION=$(grep -oE "VERSION = \"[0-9]+\\.[0-9]+\\.[0-9]+.*\"" snowflake/ml/version.bzl | cut -d'"' -f2)
79+
echo "Extracted Package Version from code: ${VERSION}"
80+
81+
# Get optional requirements from snowflake/ml/requirements.bzl
82+
OPTIONAL_REQUIREMENTS=$(cat snowflake/ml/requirements.bzl | python3 -c "import sys; exec(sys.stdin.read()); print(' '.join(map(lambda x: '\"'+x+'\"', EXTRA_REQUIREMENTS['all'])))")
83+
84+
# Compare test required dependencies with wheel pkg dependencies and exclude tests if necessary
85+
EXCLUDE_TESTS=$(mktemp "${TEMP_TEST_DIR}/exclude_tests_XXXXX")
86+
./ci/get_excluded_tests.sh -f "${EXCLUDE_TESTS}"
87+
# Copy tests into temp directory
88+
pushd "${TEMP_TEST_DIR}"
89+
rsync -av --exclude-from "${EXCLUDE_TESTS}" "${WORKSPACE}/${SNOWML_DIR}/tests" .
90+
ls tests/integ/snowflake/ml
91+
popd
92+
popd
93+
94+
# Build snowml package
95+
if [ ${ENV} = "pip" ]; then
96+
# Clean build workspace
97+
rm -f ${WORKSPACE}/*.whl
98+
99+
# Build Snowpark
100+
if [ "${WITH_SNOWPARK}" = true ]; then
101+
pushd ${SNOWPARK_DIR}
102+
rm -rf venv
103+
python3.8 -m venv venv
104+
source venv/bin/activate
105+
python3.8 -m pip install -U pip setuptools wheel
106+
echo "Building snowpark wheel from main:$(git rev-parse HEAD)."
107+
pip wheel . --no-deps
108+
cp snowflake_snowpark_python-*.whl ${WORKSPACE}
109+
deactivate
110+
popd
111+
fi
112+
113+
# Build SnowML
114+
pushd ${SNOWML_DIR}
115+
bazel build //snowflake/ml:wheel
116+
cp bazel-bin/snowflake/ml/snowflake_ml_python-*.whl ${WORKSPACE}
117+
popd
118+
else
119+
which conda
120+
121+
# Clean conda build workspace
122+
rm -rf ${WORKSPACE}/conda-bld
123+
124+
# Build Snowpark
125+
if [ "${WITH_SNOWPARK}" = true ]; then
126+
pushd ${SNOWPARK_DIR}
127+
conda build recipe/ --python=3.8 --numpy=1.16 --croot "${WORKSPACE}/conda-bld"
128+
popd
129+
fi
130+
131+
# Build SnowML
132+
pushd ${SNOWML_DIR}
133+
# Build conda package
134+
conda build --channel=conda-forge --prefix-length 50 --croot "${WORKSPACE}/conda-bld" ci/conda_recipe
135+
conda build purge
136+
popd
137+
fi
138+
139+
# Start testing
140+
pushd "${TEMP_TEST_DIR}"
141+
142+
# Set up common pytest flag
143+
COMMON_PYTEST_FLAG=()
144+
COMMON_PYTEST_FLAG+=(--strict-markers) # Strict the pytest markers to avoid typo in markers
145+
COMMON_PYTEST_FLAG+=(--import-mode=append)
146+
COMMON_PYTEST_FLAG+=(-n 10)
147+
148+
149+
if [ ${ENV} = "pip" ]; then
150+
# Copy wheel package
151+
cp "${WORKSPACE}/snowflake_ml_python-${VERSION}-py3-none-any.whl" "${TEMP_TEST_DIR}"
152+
153+
# Create testing env
154+
python3.8 -m venv testenv
155+
source testenv/bin/activate
156+
# Install all of the packages in single line,
157+
# otherwise it will fail in dependency resolution.
158+
python3.8 -m pip install --upgrade pip
159+
python3.8 -m pip list
160+
python3.8 -m pip install "snowflake_ml_python-${VERSION}-py3-none-any.whl[all]" pytest-xdist inflection --no-cache-dir --force-reinstall
161+
if [ "${WITH_SNOWPARK}" = true ]; then
162+
cp ${WORKSPACE}/snowflake_snowpark_python-*.whl "${TEMP_TEST_DIR}"
163+
python3.8 -m pip install $(find . -maxdepth 1 -iname 'snowflake_snowpark_python-*.whl') --force-reinstall
164+
fi
165+
python3.8 -m pip list
166+
167+
# Set up pip specific pytest flags
168+
PIP_PYTEST_FLAG=()
169+
PIP_PYTEST_FLAG+=(-m "not pip_incompatible") # Filter out those pip incompatible tests.
170+
171+
# Run the tests
172+
set +e
173+
TEST_SRCDIR="${TEMP_TEST_DIR}" python3.8 -m pytest "${COMMON_PYTEST_FLAG[@]}" "${PIP_PYTEST_FLAG[@]}" tests/
174+
TEST_RETCODE=$?
175+
set -e
176+
else
177+
# Create local conda channel
178+
conda index ${WORKSPACE}/conda-bld
179+
180+
# Clean conda cache
181+
conda clean --all --force-pkgs-dirs -y
182+
183+
# Create testing env
184+
conda create -y -p testenv -c "file://${WORKSPACE}/conda-bld" -c "https://repo.anaconda.com/pkgs/snowflake/" --override-channel "python=3.8" snowflake-ml-python pytest-xdist inflection ${OPTIONAL_REQUIREMENTS}
185+
conda list -p testenv
186+
187+
# Run the tests
188+
set +e
189+
TEST_SRCDIR="${TEMP_TEST_DIR}" conda run -p testenv --no-capture-output python3.8 -m pytest "${COMMON_PYTEST_FLAG[@]}" tests/
190+
TEST_RETCODE=$?
191+
set -e
192+
193+
# Clean the conda environment
194+
conda env remove -p testenv
195+
fi
196+
197+
popd
198+
199+
# clean up temp dir
200+
rm -rf "${TEMP_TEST_DIR}"
201+
202+
echo "Done running ${PROG}"
203+
exit ${TEST_RETCODE}

ci/conda_recipe/meta.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ build:
1717
noarch: python
1818
package:
1919
name: snowflake-ml-python
20-
version: 1.0.1
20+
version: 1.0.2
2121
requirements:
2222
build:
2323
- python

ci/copy_and_run_tests.sh

Lines changed: 0 additions & 85 deletions
This file was deleted.

codegen/sklearn_wrapper_template.py_template

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -548,26 +548,37 @@ class {transform.original_class_name}(BaseTransformer):
548548
# input cols need to match unquoted / quoted
549549
input_cols = self.input_cols
550550
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
551+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
551552

552553
estimator = self._sklearn_object
553554

554-
input_df = dataset[input_cols] # Select input columns with quoted column names.
555-
if hasattr(estimator, "feature_names_in_"):
556-
missing_features = []
557-
for i, f in enumerate(getattr(estimator, "feature_names_in_")):
558-
if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
559-
missing_features.append(f)
560-
561-
if len(missing_features) > 0:
562-
raise ValueError(
563-
"The feature names should match with those that were passed during fit.\n"
564-
f"Features seen during fit call but not present in the input: {{missing_features}}\n"
565-
f"Features in the input dataframe : {{input_cols}}\n"
566-
)
567-
input_df.columns = getattr(estimator, "feature_names_in_")
568-
else:
569-
# Just rename the column names to unquoted identifiers.
570-
input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
555+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
556+
missing_features = []
557+
features_in_dataset = set(dataset.columns)
558+
columns_to_select = []
559+
for i, f in enumerate(features_required_by_estimator):
560+
if (
561+
i >= len(input_cols)
562+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
563+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
564+
and quoted_input_cols[i] not in features_in_dataset)
565+
):
566+
missing_features.append(f)
567+
elif input_cols[i] in features_in_dataset:
568+
columns_to_select.append(input_cols[i])
569+
elif unquoted_input_cols[i] in features_in_dataset:
570+
columns_to_select.append(unquoted_input_cols[i])
571+
else:
572+
columns_to_select.append(quoted_input_cols[i])
573+
574+
if len(missing_features) > 0:
575+
raise ValueError(
576+
"The feature names should match with those that were passed during fit.\n"
577+
f"Features seen during fit call but not present in the input: {{missing_features}}\n"
578+
f"Features in the input dataframe : {{input_cols}}\n"
579+
)
580+
input_df = dataset[columns_to_select]
581+
input_df.columns = features_required_by_estimator
571582

572583
transformed_numpy_array = getattr(estimator, inference_method)(
573584
input_df

codegen/transformer_autogen_test_template.py_template

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,16 @@ import numpy as np
77
import pandas as pd
88
import json
99
import random
10+
import pytest
1011

1112
from typing import Optional, Any
1213
from absl.testing.absltest import TestCase, main
1314
{transform.test_estimator_imports}
1415
from snowflake.ml.utils.connection_params import SnowflakeLoginOptions
1516
from snowflake.snowpark import Session, DataFrame
1617

18+
19+
@pytest.mark.pip_incompatible
1720
class {transform.test_class_name}(TestCase):
1821
def setUp(self):
1922
"""Creates Snowpark and Snowflake environments for testing."""

0 commit comments

Comments
 (0)