From 20c63eda028e10b4c977f3e5b884d25654177a2e Mon Sep 17 00:00:00 2001 From: "TF.Text Team" Date: Tue, 23 Sep 2025 11:53:43 -0700 Subject: [PATCH] Fix the build for the tensorflow text PiperOrigin-RevId: 810525790 --- WORKSPACE | 63 +++++++++++-------- oss_scripts/pip_package/requirements.in | 4 +- tensorflow_text/__init__.py | 2 +- .../whitespace_tokenizer_config_builder.cc | 30 ++++----- 4 files changed, 55 insertions(+), 44 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index b5b90f39b..7a78a482d 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -4,34 +4,30 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") http_archive( name = "icu", - strip_prefix = "icu-release-64-2", - sha256 = "dfc62618aa4bd3ca14a3df548cd65fe393155edd213e49c39f3a30ccd618fc27", + build_file = "//third_party/icu:BUILD.bzl", + strip_prefix = "icu-release-75-1", urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/unicode-org/icu/archive/release-64-2.zip", - "https://github.com/unicode-org/icu/archive/release-64-2.zip", + "https://github.com/unicode-org/icu/archive/refs/tags/release-75-1.zip", ], - build_file = "//third_party/icu:BUILD.bzl", - patches = ["//third_party/icu:udata.patch"], - patch_args = ["-p1"], ) http_archive( name = "com_google_sentencepiece", - strip_prefix = "sentencepiece-0.1.96", + build_file = "//third_party/sentencepiece:BUILD", + patch_args = ["-p1"], + patches = ["//third_party/sentencepiece:sp.patch"], sha256 = "8409b0126ebd62b256c685d5757150cf7fcb2b92a2f2b98efb3f38fc36719754", + strip_prefix = "sentencepiece-0.1.96", urls = [ - "https://github.com/google/sentencepiece/archive/refs/tags/v0.1.96.zip" + "https://github.com/google/sentencepiece/archive/refs/tags/v0.1.96.zip", ], - build_file = "//third_party/sentencepiece:BUILD", - patches = ["//third_party/sentencepiece:sp.patch"], - patch_args = ["-p1"], ) http_archive( name = "cppitertools", - urls = ["https://github.com/ryanhaining/cppitertools/archive/refs/tags/v2.0.zip"], sha256 = "e56741b108d6baced98c4ccd83fd0d5a545937f2845978799c28d0312c0dee3d", strip_prefix = "cppitertools-2.0", + urls = ["https://github.com/ryanhaining/cppitertools/archive/refs/tags/v2.0.zip"], ) http_archive( @@ -56,10 +52,9 @@ http_archive( http_archive( name = "org_tensorflow", - strip_prefix = "tensorflow-40998f44c0c500ce0f6e3b1658dfbc54f838a82a", - sha256 = "5a5bc4599964c71277dcac0d687435291e5810d2ac2f6283cc96736febf73aaf", + strip_prefix = "tensorflow-2.20.0", urls = [ - "https://github.com/tensorflow/tensorflow/archive/40998f44c0c500ce0f6e3b1658dfbc54f838a82a.zip" + "https://github.com/tensorflow/tensorflow/archive/v2.20.0.zip", ], ) @@ -74,13 +69,13 @@ http_archive( http_archive( name = "pybind11", + build_file = "//third_party/pybind11:BUILD.bzl", + sha256 = "efc901aa0aab439a3fea6efeaf930b5a349fb06394bf845c64ce15a9cf8f0240", + strip_prefix = "pybind11-2.13.4", urls = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/pybind/pybind11/archive/v2.13.4.tar.gz", "https://github.com/pybind/pybind11/archive/v2.13.4.tar.gz", ], - sha256 = "efc901aa0aab439a3fea6efeaf930b5a349fb06394bf845c64ce15a9cf8f0240", - strip_prefix = "pybind11-2.13.4", - build_file = "//third_party/pybind11:BUILD.bzl", ) http_archive( @@ -99,27 +94,31 @@ load("//tensorflow_text:tftext.bzl", "py_deps_profile") py_deps_profile( name = "release_or_nightly", - requirements_in = "//oss_scripts/pip_package:requirements.in", - pip_repo_name = "pypi", deps_map = { - "tensorflow": ["tf-nightly", "tf_header_lib", "libtensorflow_framework"], - "tf-keras": ["tf-keras-nightly"] + "tensorflow": [ + "tf-nightly", + "tf_header_lib", + "libtensorflow_framework", + ], + "tf-keras": ["tf-keras-nightly"], }, + pip_repo_name = "pypi", + requirements_in = "//oss_scripts/pip_package:requirements.in", switch = { - "IS_NIGHTLY": "nightly" - } + "IS_NIGHTLY": "false", + }, ) load("@org_tensorflow//third_party/py:python_init_repositories.bzl", "python_init_repositories") python_init_repositories( + default_python_version = "system", requirements = { "3.9": "//oss_scripts/pip_package:requirements_lock_3_9.txt", "3.10": "//oss_scripts/pip_package:requirements_lock_3_10.txt", "3.11": "//oss_scripts/pip_package:requirements_lock_3_11.txt", "3.12": "//oss_scripts/pip_package:requirements_lock_3_12.txt", }, - default_python_version = "system", ) load("@org_tensorflow//third_party/py:python_init_toolchains.bzl", "python_init_toolchains") @@ -136,18 +135,28 @@ install_deps() # Initialize TensorFlow dependencies. load("@org_tensorflow//tensorflow:workspace3.bzl", "tf_workspace3") + tf_workspace3() + load("@org_tensorflow//tensorflow:workspace2.bzl", "tf_workspace2") + tf_workspace2() + load("@org_tensorflow//tensorflow:workspace1.bzl", "tf_workspace1") + tf_workspace1() + load("@org_tensorflow//tensorflow:workspace0.bzl", "tf_workspace0") + tf_workspace0() # Set up Android. load("@org_tensorflow//third_party/android:android_configure.bzl", "android_configure") -android_configure(name="local_config_android") + +android_configure(name = "local_config_android") + load("@local_config_android//:android.bzl", "android_workspace") + android_workspace() load( diff --git a/oss_scripts/pip_package/requirements.in b/oss_scripts/pip_package/requirements.in index 9cc9f75d0..179bb7c9f 100644 --- a/oss_scripts/pip_package/requirements.in +++ b/oss_scripts/pip_package/requirements.in @@ -2,7 +2,7 @@ setuptools==70.0.0 dm-tree==0.1.8 # Limit for macos support. numpy protobuf==4.25.3 # b/397977335 - Fix crash on python 3.9, 3.10. -tensorflow -tf-keras +tensorflow==2.20.0 +tf-keras<=3.11.3 tensorflow-datasets tensorflow-metadata diff --git a/tensorflow_text/__init__.py b/tensorflow_text/__init__.py index d40d3b6cc..a6b4e8bb4 100644 --- a/tensorflow_text/__init__.py +++ b/tensorflow_text/__init__.py @@ -110,4 +110,4 @@ ] remove_undocumented(__name__, _allowed_symbols) -__version__ = "2.13.0" +__version__ = "2.20.0" diff --git a/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.cc b/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.cc index e3a4d026b..83f11e80f 100644 --- a/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.cc +++ b/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.cc @@ -15,12 +15,11 @@ #include "tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h" #include +#include -#include "icu4c/source/common/unicode/uchar.h" #include "icu4c/source/common/unicode/umachine.h" #include "icu4c/source/common/unicode/uniset.h" -#include "icu4c/source/common/unicode/uset.h" -#include "icu4c/source/common/unicode/utf8.h" +#include "icu4c/source/common/unicode/unistr.h" #include "icu4c/source/common/unicode/utypes.h" namespace tensorflow { @@ -29,24 +28,27 @@ namespace text { namespace { const icu::UnicodeSet& WhiteSpaceSet() { - // Will not fail because the data is hardcoded in the ICU library. - UErrorCode error_code = U_ZERO_ERROR; - const USet* c_set = u_getBinaryPropertySet(UCHAR_WHITE_SPACE, &error_code); - // assert(U_SUCCESS(error_code)); - const icu::UnicodeSet* set = icu::UnicodeSet::fromUSet(c_set); - return *set; + // Use a C++11 static lambda to safely initialize the UnicodeSet. + static const icu::UnicodeSet white_space_set = []() { + UErrorCode status = U_ZERO_ERROR; + // The pattern "[:White_Space:]" selects all whitespace characters. + icu::UnicodeSet set(u"[:White_Space:]", status); + // This should never fail as the pattern is hardcoded and valid. + assert(U_SUCCESS(status)); + return set; + }(); + return white_space_set; } } // namespace std::string BuildWhitespaceString() { - std::string str; - char buf[U8_MAX_LENGTH]; + icu::UnicodeString ustr; for (auto cp : WhiteSpaceSet().codePoints()) { - int len = 0; - U8_APPEND_UNSAFE(buf, len, cp); - str.append(buf, len); + ustr.append(cp); } + std::string str; + ustr.toUTF8String(str); return str; }