Skip to content
This repository was archived by the owner on Jul 31, 2023. It is now read-only.

Commit 424c025

Browse files
authored
Merge pull request #24 from google/hotfix18
Hotfix18
2 parents 1516009 + 6817a5f commit 424c025

File tree

4 files changed

+28
-22
lines changed

4 files changed

+28
-22
lines changed

samples/Basic-TFRecorder-Usage.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
"metadata": {},
1818
"outputs": [],
1919
"source": [
20-
"import pandas as pd\n",
21-
"import tfrutil"
20+
"import pandas as pd \n",
21+
"import tfrecorder"
2222
]
2323
},
2424
{

samples/Using-TFRecorder-with-Google-Cloud-Dataflow.ipynb

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@
44
"cell_type": "markdown",
55
"metadata": {},
66
"source": [
7-
"# Using Google Cloud DataFlow with TFRUtil\n",
7+
"# Using Google Cloud DataFlow with TFRecorder\n",
88
"\n",
9-
"This notebook demonstrates how to use TFRUtil with Google Cloud DataFlow to scale up to processing any size of dataset.\n",
9+
"This notebook demonstrates how to use TFRecorder with Google Cloud DataFlow to scale up to processing any size of dataset.\n",
1010
" \n",
1111
"## Notebook Setup\n",
1212
"\n",
13-
"1. Please install TFUtil with the command `python setup.py` from the repository root.\n",
13+
"1. Please install TFRecorder with the command `python setup.py` from the repository root.\n",
1414
"\n",
15-
"2. Create a new GCS bucket the command with `gsutil mb gs://your/bucket/name` and set the BUCKET= constant to that name.\n",
15+
"2. Create a new GCS bucket the command with `gsutil mb gs://your/bucket/name/` and set the BUCKET= constant to that name.\n",
1616
"\n",
1717
"3. Copy the test images from the TFRutil repo to the new gcs bucket with the command `gsutil cp -r ./tfrutil/test_data/images gs://<BUCKET_NAME/images`\n"
1818
]
@@ -24,7 +24,8 @@
2424
"outputs": [],
2525
"source": [
2626
"import pandas as pd\n",
27-
"import tfrutil"
27+
"import tfrecorder\n",
28+
"import os"
2829
]
2930
},
3031
{
@@ -33,11 +34,8 @@
3334
"metadata": {},
3435
"outputs": [],
3536
"source": [
36-
"BUCKET=\"\" # ADD YOUR BUCKET HERE\n",
37-
"PROJECT=\"\" # ADD YOUR PROJECT NAME HERE\n",
38-
"REGION=\"\" # ADD A COMPUTE REGION HERE\n",
39-
"TFRUTIL_PATH = \"\" # ADD THE LOCAL PATH TO YOUR CLONE OF THE TFRUTIL REPO HERE\n",
40-
"OUTPUT_PATH = \"/results/"
37+
"!pip download tfrecorder --no-deps\n",
38+
"!cp tfrecorder* /tmp"
4139
]
4240
},
4341
{
@@ -46,7 +44,11 @@
4644
"metadata": {},
4745
"outputs": [],
4846
"source": [
49-
"df = pd.read_csv(\"data.csv\")"
47+
"BUCKET=\"\" # ADD YOUR BUCKET HERE, E.G. \"GS://MYBUCKET/\"\n",
48+
"PROJECT=\"\" # ADD YOUR PROJECT NAME HERE\n",
49+
"REGION=\"\" # ADD A COMPUTE REGION HERE\n",
50+
"OUTPUT_PATH = \"results/\"\n",
51+
"TFRECORDER_WHEEL = \"/tmp/tfrecorder-0.1.1-py3-none-any.whl\" #UPDATE VERSION AS NEEDED"
5052
]
5153
},
5254
{
@@ -55,7 +57,7 @@
5557
"metadata": {},
5658
"outputs": [],
5759
"source": [
58-
"df"
60+
"df = pd.read_csv(\"data.csv\")"
5961
]
6062
},
6163
{
@@ -73,7 +75,7 @@
7375
"metadata": {},
7476
"outputs": [],
7577
"source": [
76-
"df['image_uri'] = BUCKET + df.image_uri.str.slice(start=20)"
78+
"df['image_uri'] = df.image_uri.str.replace(\"../tfrecorder/\", BUCKET)"
7779
]
7880
},
7981
{
@@ -93,11 +95,11 @@
9395
},
9496
"outputs": [],
9597
"source": [
96-
"df.tensorflow.to_tfr(output_dir=BUCKET + OUTPUT_PATH\n",
97-
" runner=\"DataFlowRunner\",\n",
98+
"df.tensorflow.to_tfr(output_dir=BUCKET + OUTPUT_PATH,\n",
99+
" runner=\"DataflowRunner\",\n",
98100
" project=PROJECT,\n",
99101
" region=REGION,\n",
100-
" tfrutil_path=TFRUTIL_PATH)"
102+
" tfrecorder_wheel=TFRECORDER_WHEEL)"
101103
]
102104
},
103105
{
@@ -106,11 +108,16 @@
106108
"source": [
107109
"# That's it!\n",
108110
"\n",
109-
"As you can see, TFRUtil has taken the supplied CSV and transformed it into TFRecords, ready for consumption, along with the transform function"
111+
"As you can see, TFRecorder has taken the supplied CSV and transformed it into TFRecords, ready for consumption, along with the transform function"
110112
]
111113
}
112114
],
113115
"metadata": {
116+
"environment": {
117+
"name": "tf2-2-3-gpu.2-3.m55",
118+
"type": "gcloud",
119+
"uri": "gcr.io/deeplearning-platform-release/tf2-2-3-gpu.2-3:m55"
120+
},
114121
"kernelspec": {
115122
"display_name": "Python 3",
116123
"language": "python",
@@ -126,7 +133,7 @@
126133
"name": "python",
127134
"nbconvert_exporter": "python",
128135
"pygments_lexer": "ipython3",
129-
"version": "3.7.7"
136+
"version": "3.7.8"
130137
}
131138
},
132139
"nbformat": 4,

setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222

2323
REQUIRED_PACKAGES = [
24-
"absl-py < 0.9, >= 0.7",
2524
"apache-beam[gcp] >= 2.22.0",
2625
"avro >= 1.10.0",
2726
"coverage >= 5.1",

tfrecorder/beam_pipeline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def _get_pipeline_options(
8686
if region:
8787
options_dict['region'] = region
8888
if runner == 'DataflowRunner':
89-
options_dict['extra_packages'] = tfrecorder_wheel
89+
options_dict['extra_packages'] = [tfrecorder_wheel]
9090
if dataflow_options:
9191
options_dict.update(dataflow_options)
9292

0 commit comments

Comments
 (0)