Skip to content

Commit 20c63ed

Browse files
Fix the build for the tensorflow text
PiperOrigin-RevId: 810525790
1 parent aa839b1 commit 20c63ed

File tree

4 files changed

+55
-44
lines changed

4 files changed

+55
-44
lines changed

WORKSPACE

Lines changed: 36 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,34 +4,30 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
44

55
http_archive(
66
name = "icu",
7-
strip_prefix = "icu-release-64-2",
8-
sha256 = "dfc62618aa4bd3ca14a3df548cd65fe393155edd213e49c39f3a30ccd618fc27",
7+
build_file = "//third_party/icu:BUILD.bzl",
8+
strip_prefix = "icu-release-75-1",
99
urls = [
10-
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/unicode-org/icu/archive/release-64-2.zip",
11-
"https://github.com/unicode-org/icu/archive/release-64-2.zip",
10+
"https://github.com/unicode-org/icu/archive/refs/tags/release-75-1.zip",
1211
],
13-
build_file = "//third_party/icu:BUILD.bzl",
14-
patches = ["//third_party/icu:udata.patch"],
15-
patch_args = ["-p1"],
1612
)
1713

1814
http_archive(
1915
name = "com_google_sentencepiece",
20-
strip_prefix = "sentencepiece-0.1.96",
16+
build_file = "//third_party/sentencepiece:BUILD",
17+
patch_args = ["-p1"],
18+
patches = ["//third_party/sentencepiece:sp.patch"],
2119
sha256 = "8409b0126ebd62b256c685d5757150cf7fcb2b92a2f2b98efb3f38fc36719754",
20+
strip_prefix = "sentencepiece-0.1.96",
2221
urls = [
23-
"https://github.com/google/sentencepiece/archive/refs/tags/v0.1.96.zip"
22+
"https://github.com/google/sentencepiece/archive/refs/tags/v0.1.96.zip",
2423
],
25-
build_file = "//third_party/sentencepiece:BUILD",
26-
patches = ["//third_party/sentencepiece:sp.patch"],
27-
patch_args = ["-p1"],
2824
)
2925

3026
http_archive(
3127
name = "cppitertools",
32-
urls = ["https://github.com/ryanhaining/cppitertools/archive/refs/tags/v2.0.zip"],
3328
sha256 = "e56741b108d6baced98c4ccd83fd0d5a545937f2845978799c28d0312c0dee3d",
3429
strip_prefix = "cppitertools-2.0",
30+
urls = ["https://github.com/ryanhaining/cppitertools/archive/refs/tags/v2.0.zip"],
3531
)
3632

3733
http_archive(
@@ -56,10 +52,9 @@ http_archive(
5652

5753
http_archive(
5854
name = "org_tensorflow",
59-
strip_prefix = "tensorflow-40998f44c0c500ce0f6e3b1658dfbc54f838a82a",
60-
sha256 = "5a5bc4599964c71277dcac0d687435291e5810d2ac2f6283cc96736febf73aaf",
55+
strip_prefix = "tensorflow-2.20.0",
6156
urls = [
62-
"https://github.com/tensorflow/tensorflow/archive/40998f44c0c500ce0f6e3b1658dfbc54f838a82a.zip"
57+
"https://github.com/tensorflow/tensorflow/archive/v2.20.0.zip",
6358
],
6459
)
6560

@@ -74,13 +69,13 @@ http_archive(
7469

7570
http_archive(
7671
name = "pybind11",
72+
build_file = "//third_party/pybind11:BUILD.bzl",
73+
sha256 = "efc901aa0aab439a3fea6efeaf930b5a349fb06394bf845c64ce15a9cf8f0240",
74+
strip_prefix = "pybind11-2.13.4",
7775
urls = [
7876
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/pybind/pybind11/archive/v2.13.4.tar.gz",
7977
"https://github.com/pybind/pybind11/archive/v2.13.4.tar.gz",
8078
],
81-
sha256 = "efc901aa0aab439a3fea6efeaf930b5a349fb06394bf845c64ce15a9cf8f0240",
82-
strip_prefix = "pybind11-2.13.4",
83-
build_file = "//third_party/pybind11:BUILD.bzl",
8479
)
8580

8681
http_archive(
@@ -99,27 +94,31 @@ load("//tensorflow_text:tftext.bzl", "py_deps_profile")
9994

10095
py_deps_profile(
10196
name = "release_or_nightly",
102-
requirements_in = "//oss_scripts/pip_package:requirements.in",
103-
pip_repo_name = "pypi",
10497
deps_map = {
105-
"tensorflow": ["tf-nightly", "tf_header_lib", "libtensorflow_framework"],
106-
"tf-keras": ["tf-keras-nightly"]
98+
"tensorflow": [
99+
"tf-nightly",
100+
"tf_header_lib",
101+
"libtensorflow_framework",
102+
],
103+
"tf-keras": ["tf-keras-nightly"],
107104
},
105+
pip_repo_name = "pypi",
106+
requirements_in = "//oss_scripts/pip_package:requirements.in",
108107
switch = {
109-
"IS_NIGHTLY": "nightly"
110-
}
108+
"IS_NIGHTLY": "false",
109+
},
111110
)
112111

113112
load("@org_tensorflow//third_party/py:python_init_repositories.bzl", "python_init_repositories")
114113

115114
python_init_repositories(
115+
default_python_version = "system",
116116
requirements = {
117117
"3.9": "//oss_scripts/pip_package:requirements_lock_3_9.txt",
118118
"3.10": "//oss_scripts/pip_package:requirements_lock_3_10.txt",
119119
"3.11": "//oss_scripts/pip_package:requirements_lock_3_11.txt",
120120
"3.12": "//oss_scripts/pip_package:requirements_lock_3_12.txt",
121121
},
122-
default_python_version = "system",
123122
)
124123

125124
load("@org_tensorflow//third_party/py:python_init_toolchains.bzl", "python_init_toolchains")
@@ -136,18 +135,28 @@ install_deps()
136135

137136
# Initialize TensorFlow dependencies.
138137
load("@org_tensorflow//tensorflow:workspace3.bzl", "tf_workspace3")
138+
139139
tf_workspace3()
140+
140141
load("@org_tensorflow//tensorflow:workspace2.bzl", "tf_workspace2")
142+
141143
tf_workspace2()
144+
142145
load("@org_tensorflow//tensorflow:workspace1.bzl", "tf_workspace1")
146+
143147
tf_workspace1()
148+
144149
load("@org_tensorflow//tensorflow:workspace0.bzl", "tf_workspace0")
150+
145151
tf_workspace0()
146152

147153
# Set up Android.
148154
load("@org_tensorflow//third_party/android:android_configure.bzl", "android_configure")
149-
android_configure(name="local_config_android")
155+
156+
android_configure(name = "local_config_android")
157+
150158
load("@local_config_android//:android.bzl", "android_workspace")
159+
151160
android_workspace()
152161

153162
load(

oss_scripts/pip_package/requirements.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ setuptools==70.0.0
22
dm-tree==0.1.8 # Limit for macos support.
33
numpy
44
protobuf==4.25.3 # b/397977335 - Fix crash on python 3.9, 3.10.
5-
tensorflow
6-
tf-keras
5+
tensorflow==2.20.0
6+
tf-keras<=3.11.3
77
tensorflow-datasets
88
tensorflow-metadata

tensorflow_text/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,4 +110,4 @@
110110
]
111111

112112
remove_undocumented(__name__, _allowed_symbols)
113-
__version__ = "2.13.0"
113+
__version__ = "2.20.0"

tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.cc

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,11 @@
1515
#include "tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h"
1616

1717
#include <string>
18+
#include <cassert>
1819

19-
#include "icu4c/source/common/unicode/uchar.h"
2020
#include "icu4c/source/common/unicode/umachine.h"
2121
#include "icu4c/source/common/unicode/uniset.h"
22-
#include "icu4c/source/common/unicode/uset.h"
23-
#include "icu4c/source/common/unicode/utf8.h"
22+
#include "icu4c/source/common/unicode/unistr.h"
2423
#include "icu4c/source/common/unicode/utypes.h"
2524

2625
namespace tensorflow {
@@ -29,24 +28,27 @@ namespace text {
2928
namespace {
3029

3130
const icu::UnicodeSet& WhiteSpaceSet() {
32-
// Will not fail because the data is hardcoded in the ICU library.
33-
UErrorCode error_code = U_ZERO_ERROR;
34-
const USet* c_set = u_getBinaryPropertySet(UCHAR_WHITE_SPACE, &error_code);
35-
// assert(U_SUCCESS(error_code));
36-
const icu::UnicodeSet* set = icu::UnicodeSet::fromUSet(c_set);
37-
return *set;
31+
// Use a C++11 static lambda to safely initialize the UnicodeSet.
32+
static const icu::UnicodeSet white_space_set = []() {
33+
UErrorCode status = U_ZERO_ERROR;
34+
// The pattern "[:White_Space:]" selects all whitespace characters.
35+
icu::UnicodeSet set(u"[:White_Space:]", status);
36+
// This should never fail as the pattern is hardcoded and valid.
37+
assert(U_SUCCESS(status));
38+
return set;
39+
}();
40+
return white_space_set;
3841
}
3942

4043
} // namespace
4144

4245
std::string BuildWhitespaceString() {
43-
std::string str;
44-
char buf[U8_MAX_LENGTH];
46+
icu::UnicodeString ustr;
4547
for (auto cp : WhiteSpaceSet().codePoints()) {
46-
int len = 0;
47-
U8_APPEND_UNSAFE(buf, len, cp);
48-
str.append(buf, len);
48+
ustr.append(cp);
4949
}
50+
std::string str;
51+
ustr.toUTF8String(str);
5052
return str;
5153
}
5254

0 commit comments

Comments
 (0)