Merge pull request #24 from google/hotfix18

mbernico · web-flow · commit 424c0257a9a2 · 2020-09-15T13:55:49.000-05:00
Hotfix18
diff --git a/samples/Basic-TFRecorder-Usage.ipynb b/samples/Basic-TFRecorder-Usage.ipynb
@@ -17,8 +17,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pandas as pd\n",
-    "import tfrutil"
+    "import pandas as pd \n",
+    "import tfrecorder"
    ]
   },
   {
diff --git a/samples/Using-TFRecorder-with-Google-Cloud-Dataflow.ipynb b/samples/Using-TFRecorder-with-Google-Cloud-Dataflow.ipynb
@@ -4,15 +4,15 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Using Google Cloud DataFlow with TFRUtil\n",
+    "# Using Google Cloud DataFlow with TFRecorder\n",
     "\n",
-    "This notebook demonstrates how to use TFRUtil with Google Cloud DataFlow to scale up to processing any size of dataset.\n",
+    "This notebook demonstrates how to use TFRecorder with Google Cloud DataFlow to scale up to processing any size of dataset.\n",
     "    \n",
     "## Notebook Setup\n",
     "\n",
-    "1. Please install TFUtil with the command `python setup.py` from the repository root.\n",
+    "1. Please install TFRecorder with the command `python setup.py` from the repository root.\n",
     "\n",
-    "2. Create a new GCS bucket the command with `gsutil mb gs://your/bucket/name` and set the BUCKET= constant to that name.\n",
+    "2. Create a new GCS bucket the command with `gsutil mb gs://your/bucket/name/` and set the BUCKET= constant to that name.\n",
     "\n",
     "3. Copy the test images from the TFRutil repo to the new gcs bucket with the command `gsutil cp -r  ./tfrutil/test_data/images gs://<BUCKET_NAME/images`\n"
    ]
@@ -24,7 +24,8 @@
    "outputs": [],
    "source": [
     "import pandas as pd\n",
-    "import tfrutil"
+    "import tfrecorder\n",
+    "import os"
    ]
   },
   {
@@ -33,11 +34,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "BUCKET=\"\" # ADD YOUR BUCKET HERE\n",
-    "PROJECT=\"\" # ADD YOUR PROJECT NAME HERE\n",
-    "REGION=\"\" # ADD A COMPUTE REGION HERE\n",
-    "TFRUTIL_PATH = \"\" # ADD THE LOCAL PATH TO YOUR CLONE OF THE TFRUTIL REPO HERE\n",
-    "OUTPUT_PATH = \"/results/"
+    "!pip download tfrecorder --no-deps\n",
+    "!cp tfrecorder* /tmp"
    ]
   },
   {
@@ -46,7 +44,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df = pd.read_csv(\"data.csv\")"
+    "BUCKET=\"\" # ADD YOUR BUCKET HERE, E.G. \"GS://MYBUCKET/\"\n",
+    "PROJECT=\"\" # ADD YOUR PROJECT NAME HERE\n",
+    "REGION=\"\" # ADD A COMPUTE REGION HERE\n",
+    "OUTPUT_PATH = \"results/\"\n",
+    "TFRECORDER_WHEEL = \"/tmp/tfrecorder-0.1.1-py3-none-any.whl\" #UPDATE VERSION AS NEEDED"
    ]
   },
   {
@@ -55,7 +57,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df"
+    "df = pd.read_csv(\"data.csv\")"
    ]
   },
   {
@@ -73,7 +75,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df['image_uri'] = BUCKET + df.image_uri.str.slice(start=20)"
+    "df['image_uri'] = df.image_uri.str.replace(\"../tfrecorder/\", BUCKET)"
    ]
   },
   {
@@ -93,11 +95,11 @@
    },
    "outputs": [],
    "source": [
-    "df.tensorflow.to_tfr(output_dir=BUCKET + OUTPUT_PATH\n",
-    "                     runner=\"DataFlowRunner\",\n",
+    "df.tensorflow.to_tfr(output_dir=BUCKET + OUTPUT_PATH,\n",
+    "                     runner=\"DataflowRunner\",\n",
     "                     project=PROJECT,\n",
     "                     region=REGION,\n",
-    "                     tfrutil_path=TFRUTIL_PATH)"
+    "                     tfrecorder_wheel=TFRECORDER_WHEEL)"
    ]
   },
   {
@@ -106,11 +108,16 @@
    "source": [
     "# That's it!\n",
     "\n",
-    "As you can see, TFRUtil has taken the supplied CSV and transformed it into TFRecords, ready for consumption, along with the transform function"
+    "As you can see, TFRecorder has taken the supplied CSV and transformed it into TFRecords, ready for consumption, along with the transform function"
    ]
   }
  ],
  "metadata": {
+  "environment": {
+   "name": "tf2-2-3-gpu.2-3.m55",
+   "type": "gcloud",
+   "uri": "gcr.io/deeplearning-platform-release/tf2-2-3-gpu.2-3:m55"
+  },
   "kernelspec": {
    "display_name": "Python 3",
    "language": "python",
@@ -126,7 +133,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7"
+   "version": "3.7.8"
   }
  },
  "nbformat": 4,
diff --git a/setup.py b/setup.py
@@ -21,7 +21,6 @@
 
 
 REQUIRED_PACKAGES = [
-    "absl-py < 0.9, >= 0.7",
     "apache-beam[gcp] >= 2.22.0",
     "avro >= 1.10.0",
     "coverage >= 5.1",
diff --git a/tfrecorder/beam_pipeline.py b/tfrecorder/beam_pipeline.py
@@ -86,7 +86,7 @@ def _get_pipeline_options(
   if region:
     options_dict['region'] = region
   if runner == 'DataflowRunner':
-    options_dict['extra_packages'] = tfrecorder_wheel
+    options_dict['extra_packages'] = [tfrecorder_wheel]
   if dataflow_options:
     options_dict.update(dataflow_options)
 

Original file line number	Diff line number	Diff line change
`@@ -17,8 +17,8 @@`
`17`	`17`	`"metadata": {},`
`18`	`18`	`"outputs": [],`
`19`	`19`	`"source": [`
`20`		`- "import pandas as pd\n",`
`21`		`- "import tfrutil"`
	`20`	`+ "import pandas as pd \n",`
	`21`	`+ "import tfrecorder"`
`22`	`22`	`]`
`23`	`23`	`},`
`24`	`24`	`{`