|
4 | 4 | "cell_type": "markdown",
|
5 | 5 | "metadata": {},
|
6 | 6 | "source": [
|
7 |
| - "# Using Google Cloud DataFlow with TFRUtil\n", |
| 7 | + "# Using Google Cloud DataFlow with TFRecorder\n", |
8 | 8 | "\n",
|
9 |
| - "This notebook demonstrates how to use TFRUtil with Google Cloud DataFlow to scale up to processing any size of dataset.\n", |
| 9 | + "This notebook demonstrates how to use TFRecorder with Google Cloud DataFlow to scale up to processing any size of dataset.\n", |
10 | 10 | " \n",
|
11 | 11 | "## Notebook Setup\n",
|
12 | 12 | "\n",
|
13 |
| - "1. Please install TFUtil with the command `python setup.py` from the repository root.\n", |
| 13 | + "1. Please install TFRecorder with the command `python setup.py` from the repository root.\n", |
14 | 14 | "\n",
|
15 |
| - "2. Create a new GCS bucket the command with `gsutil mb gs://your/bucket/name` and set the BUCKET= constant to that name.\n", |
| 15 | + "2. Create a new GCS bucket the command with `gsutil mb gs://your/bucket/name/` and set the BUCKET= constant to that name.\n", |
16 | 16 | "\n",
|
17 | 17 | "3. Copy the test images from the TFRutil repo to the new gcs bucket with the command `gsutil cp -r ./tfrutil/test_data/images gs://<BUCKET_NAME/images`\n"
|
18 | 18 | ]
|
|
24 | 24 | "outputs": [],
|
25 | 25 | "source": [
|
26 | 26 | "import pandas as pd\n",
|
27 |
| - "import tfrutil" |
| 27 | + "import tfrecorder\n", |
| 28 | + "import os" |
28 | 29 | ]
|
29 | 30 | },
|
30 | 31 | {
|
|
33 | 34 | "metadata": {},
|
34 | 35 | "outputs": [],
|
35 | 36 | "source": [
|
36 |
| - "BUCKET=\"\" # ADD YOUR BUCKET HERE\n", |
37 |
| - "PROJECT=\"\" # ADD YOUR PROJECT NAME HERE\n", |
38 |
| - "REGION=\"\" # ADD A COMPUTE REGION HERE\n", |
39 |
| - "TFRUTIL_PATH = \"\" # ADD THE LOCAL PATH TO YOUR CLONE OF THE TFRUTIL REPO HERE\n", |
40 |
| - "OUTPUT_PATH = \"/results/" |
| 37 | + "!pip download tfrecorder --no-deps\n", |
| 38 | + "!cp tfrecorder* /tmp" |
41 | 39 | ]
|
42 | 40 | },
|
43 | 41 | {
|
|
46 | 44 | "metadata": {},
|
47 | 45 | "outputs": [],
|
48 | 46 | "source": [
|
49 |
| - "df = pd.read_csv(\"data.csv\")" |
| 47 | + "BUCKET=\"\" # ADD YOUR BUCKET HERE, E.G. \"GS://MYBUCKET/\"\n", |
| 48 | + "PROJECT=\"\" # ADD YOUR PROJECT NAME HERE\n", |
| 49 | + "REGION=\"\" # ADD A COMPUTE REGION HERE\n", |
| 50 | + "OUTPUT_PATH = \"results/\"\n", |
| 51 | + "TFRECORDER_WHEEL = \"/tmp/tfrecorder-0.1.1-py3-none-any.whl\" #UPDATE VERSION AS NEEDED" |
50 | 52 | ]
|
51 | 53 | },
|
52 | 54 | {
|
|
55 | 57 | "metadata": {},
|
56 | 58 | "outputs": [],
|
57 | 59 | "source": [
|
58 |
| - "df" |
| 60 | + "df = pd.read_csv(\"data.csv\")" |
59 | 61 | ]
|
60 | 62 | },
|
61 | 63 | {
|
|
73 | 75 | "metadata": {},
|
74 | 76 | "outputs": [],
|
75 | 77 | "source": [
|
76 |
| - "df['image_uri'] = BUCKET + df.image_uri.str.slice(start=20)" |
| 78 | + "df['image_uri'] = df.image_uri.str.replace(\"../tfrecorder/\", BUCKET)" |
77 | 79 | ]
|
78 | 80 | },
|
79 | 81 | {
|
|
93 | 95 | },
|
94 | 96 | "outputs": [],
|
95 | 97 | "source": [
|
96 |
| - "df.tensorflow.to_tfr(output_dir=BUCKET + OUTPUT_PATH\n", |
97 |
| - " runner=\"DataFlowRunner\",\n", |
| 98 | + "df.tensorflow.to_tfr(output_dir=BUCKET + OUTPUT_PATH,\n", |
| 99 | + " runner=\"DataflowRunner\",\n", |
98 | 100 | " project=PROJECT,\n",
|
99 | 101 | " region=REGION,\n",
|
100 |
| - " tfrutil_path=TFRUTIL_PATH)" |
| 102 | + " tfrecorder_wheel=TFRECORDER_WHEEL)" |
101 | 103 | ]
|
102 | 104 | },
|
103 | 105 | {
|
|
106 | 108 | "source": [
|
107 | 109 | "# That's it!\n",
|
108 | 110 | "\n",
|
109 |
| - "As you can see, TFRUtil has taken the supplied CSV and transformed it into TFRecords, ready for consumption, along with the transform function" |
| 111 | + "As you can see, TFRecorder has taken the supplied CSV and transformed it into TFRecords, ready for consumption, along with the transform function" |
110 | 112 | ]
|
111 | 113 | }
|
112 | 114 | ],
|
113 | 115 | "metadata": {
|
| 116 | + "environment": { |
| 117 | + "name": "tf2-2-3-gpu.2-3.m55", |
| 118 | + "type": "gcloud", |
| 119 | + "uri": "gcr.io/deeplearning-platform-release/tf2-2-3-gpu.2-3:m55" |
| 120 | + }, |
114 | 121 | "kernelspec": {
|
115 | 122 | "display_name": "Python 3",
|
116 | 123 | "language": "python",
|
|
126 | 133 | "name": "python",
|
127 | 134 | "nbconvert_exporter": "python",
|
128 | 135 | "pygments_lexer": "ipython3",
|
129 |
| - "version": "3.7.7" |
| 136 | + "version": "3.7.8" |
130 | 137 | }
|
131 | 138 | },
|
132 | 139 | "nbformat": 4,
|
|
0 commit comments