aws-solutions-library-samples · lorchda · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025
diff --git a/config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/config.yaml b/config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/config.yaml
@@ -96,7 +96,7 @@ classes:
               "cc": null,
               "reference_number": "TNJB 0008497"
         imagePath: >-
-          config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter1.jpg
+          config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter1.jpg
       - classPrompt: This is an example of the class 'letter'
         name: Letter2
         attributesPrompt: |-
@@ -112,7 +112,7 @@ classes:
               "cc": null,
               "reference_number": null
         imagePath: >-
-          config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter2.png
+          config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter2.png
   - $schema: https://json-schema.org/draft/2020-12/schema
     $id: form
     x-aws-idp-document-type: form
@@ -587,7 +587,7 @@ classes:
               "thread_id": null,
               "message_id": null
         imagePath: >-
-          config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/email1.jpg
+          config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/email1.jpg
   - $schema: https://json-schema.org/draft/2020-12/schema
     $id: questionnaire
     x-aws-idp-document-type: questionnaire
@@ -805,7 +805,7 @@ classes:
               "account_number": ["003525801543","352580154336"],
               "transactions": [{"Date": "2/6/2020", "Description": "Food Purchase - AnyCompany Restaurant - 1194989245", "Amount": "-171"}]
         imagePath: >-
-          config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/bank-statement-pages/
+          config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/bank-statement-pages/
 
 classification:
   maxPagesForClassification: "ALL"
@@ -822,9 +822,9 @@ classification:
     You are a multimodal document classification expert that analyzes business documents using both visual layout and textual content. Your task is to classify single-page documents into predefined categories based on their structural patterns, visual features, and text content. Your output must be valid JSON according to the requested format.
 
     <variables>
-    DOCUMENT_TEXT: OCR-extracted text content from the document page that provides textual information for classification
-    DOCUMENT_IMAGE: Visual representation of the document page that provides layout, formatting, and visual structure information
-    CLASS_NAMES_AND_DESCRIPTIONS: List of valid document types with their descriptions that the document must be classified into
+    <document-ocr-data>: OCR-extracted text content from the document page that provides textual information for classification
+    <document-image>: Visual representation of the document page that provides layout, formatting, and visual structure information
+    <document-types>: List of valid document types with their descriptions that the document must be classified into
     </variables>
   task_prompt: >-
     <reasoning-guidelines>
@@ -836,6 +836,10 @@ classification:
     - Provide specific evidence from both visual and textual analysis
     </reasoning-guidelines>
 
+    <document-types>
+    {CLASS_NAMES_AND_DESCRIPTIONS}
+    </document-types>
+
     <output-format>
     Return your classification as valid JSON following this exact structure:
     {
@@ -892,6 +896,11 @@ extraction:
 
     </task>
 
+    <few-shot-examples>
+
+    {FEW_SHOT_EXAMPLES}
+
+    </few-shot-examples>
 
     <extraction-guidelines>
 

diff --git a/lib/idp_common_pkg/idp_common/extraction/service.py b/lib/idp_common_pkg/idp_common/extraction/service.py
@@ -819,10 +819,12 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
                 # Create empty result structure without invoking LLM
                 extracted_fields = {}
                 metering = {
-                    "input_tokens": 0,
-                    "output_tokens": 0,
-                    "invocation_count": 0,
-                    "total_cost": 0.0,
+                    f"Extraction/{self.config.extraction.model}": {
+                        "input_tokens": 0,
+                        "output_tokens": 0,
+                        "invocation_count": 0,
+                        "total_cost": 0.0,
+                    }
                 }
                 total_duration = 0.0
                 parsing_succeeded = True

diff --git a/notebooks/misc/e2e-example-with-multimodal-page-classification-few-shot-prompting.ipynb b/notebooks/misc/e2e-example-with-multimodal-page-classification-few-shot-prompting.ipynb
@@ -22,7 +22,32 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 1. Install Dependencies\n",
+    "## 1. Setup AWS Access"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%reload_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "import boto3\n",
+    "import os\n",
+    "\n",
+    "os.environ['AWS_PROFILE'] = 'default' # change to desired AWS profile\n",
+    "\n",
+    "sts_client = boto3.client('sts')\n",
+    "sts_client.get_caller_identity()['Arn']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Install Dependencies\n",
     "\n",
     "The IDP common package supports granular installation through extras. You can install:\n",
     "- `[core]` - Just core functionality \n",
@@ -68,7 +93,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 2. Import Libraries and Set Up Environment"
+    "## 3. Import Libraries and Set Up Environment"
    ]
   },
   {
@@ -93,8 +118,8 @@
     "logging.getLogger('idp_common.ocr.service').setLevel(logging.INFO)  # Focus on service logs\n",
     "logging.getLogger('idp_common.classification.service').setLevel(logging.DEBUG)  # Enable classification logs\n",
     "logging.getLogger('idp_common.bedrock.client').setLevel(logging.DEBUG)  # show prompts\n",
-    "\n",
-    "logging.getLogger('idp_common.evaluation.service').setLevel(logging.DEBUG)  # Enable evaluation logs\n",
+    "logging.getLogger('idp_common.extraction.service').setLevel(logging.INFO)  # Enable extraction logs\n",
+    "logging.getLogger('idp_common.utils.few_shot_example_builder').setLevel(logging.INFO)  # Enable few shot logs\n",
     "\n",
     "logging.getLogger('textractor').setLevel(logging.WARNING)  # Suppress textractor logs\n",
     "logging.getLogger('idp_common.evaluation.service').setLevel(logging.DEBUG)  # Enable evaluation logs\n",
@@ -145,7 +170,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 3. Set Up S3 Buckets and Upload Sample File"
+    "## 4. Set Up S3 Buckets and Upload Sample File"
    ]
   },
   {
@@ -196,7 +221,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 4. Set Up Configuration"
+    "## 5. Set Up Configuration"
    ]
   },
   {
@@ -217,7 +242,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 5. Process Document with OCR"
+    "## 6. Process Document with OCR"
    ]
   },
   {
@@ -271,7 +296,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 6. Classify the Document"
+    "## 7. Classify the Document"
    ]
   },
   {
@@ -326,7 +351,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 7. Extract Information from Document Sections"
+    "## 8. Extract Information from Document Sections"
    ]
   },
   {
@@ -364,18 +389,28 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from idp_common.s3 import get_json_content\n",
+    "\n",
     "print(\"\\nShow extraction results...\\n\")\n",
     "\n",
     "document_dict = document.to_dict()\n",
     "sections_json = json.dumps(document_dict[\"sections\"][:n], indent=2)\n",
-    "print(f\"{sections_json}...\")"
+    "print(sections_json)\n",
+    "\n",
+    "for section in document.sections[:n]:\n",
+    "    print(f\"\\nSection {section.section_id} extraction result:\")\n",
+    "    extraction_result_uri = section.extraction_result_uri\n",
+    "    if extraction_result_uri:\n",
+    "        result = get_json_content(extraction_result_uri)\n",
+    "        result_json = json.dumps(result[\"inference_result\"], indent=2)\n",
+    "        print(result_json)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 8. Final Document Status Summary"
+    "## 9. Final Document Status Summary"
    ]
   },
   {
@@ -405,7 +440,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 9. Evaluate Results\n",
+    "## 10. Evaluate Results\n",
     "\n",
     "In this section, we'll demonstrate how to evaluate extraction results by comparing them with expected (ground truth) values. The evaluation process involves:\n",
     "\n",
@@ -612,7 +647,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# 10. Clean Up (Optional)"
+    "# 11. Clean Up (Optional)"
    ]
   },
   {

diff --git a/notebooks/misc/test_few_shot_extraction.ipynb b/notebooks/misc/test_few_shot_extraction.ipynb
@@ -9,6 +9,31 @@
     "This notebook tests the new `{FEW_SHOT_EXAMPLES}` placeholder functionality in the Extraction service."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup AWS Access and Python Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": []
+   "source": [
+    "%reload_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "import boto3\n",
+    "import os\n",
+    "\n",
+    "os.environ['AWS_PROFILE'] = 'default' # change to desired AWS profile\n",
+    "\n",
+    "sts_client = boto3.client('sts')\n",
+    "sts_client.get_caller_identity()['Arn']"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 6,
@@ -27,7 +52,7 @@
     "os.environ['ROOT_DIR'] = f\"{ROOTDIR}/\"\n",
     "\n",
     "# Add the idp_common package to the path\n",
-    "sys.path.insert(0, '{ROOTDIR}/lib/idp_common_pkg')\n",
+    "sys.path.insert(0, f'{ROOTDIR}/lib/idp_common_pkg')\n",
     "\n",
     "from idp_common.extraction.service import ExtractionService"
    ]
@@ -56,7 +81,7 @@
    ],
    "source": [
     "# Load the few-shot configuration\n",
-    "config_path = f'{ROOTDIR}/config_library/pattern-2/few_shot_example_with_multimodal_page_classification/config.yaml'\n",
+    "config_path = f'{ROOTDIR}/config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/config.yaml'\n",
     "with open(config_path, 'r') as f:\n",
     "    config = yaml.safe_load(f)\n",
     "\n",
@@ -215,16 +240,16 @@
       "    Attributes Prompt: expected attributes are:\n",
       "    \"sender_name\": \"Will E. Clark\",\n",
       "    \"sender_address\": \"206 Maple Street...\n",
-      "    Image Path: config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter1.jpg\n",
-      "    S3 URI: config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter1.jpg\n",
+      "    Image Path: config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter1.jpg\n",
+      "    S3 URI: config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter1.jpg\n",
       "  Example 2:\n",
       "    Name: Letter2\n",
       "    Class Prompt: This is an example of the class 'letter'\n",
       "    Attributes Prompt: expected attributes are:\n",
       "    \"sender_name\": \"William H. W. Anderson\",\n",
       "    \"sender_address\": \"P O. BO...\n",
-      "    Image Path: config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter2.png\n",
-      "    S3 URI: config_library/pattern-2/few_shot_example_with_multimodal_page_classification/example-images/letter2.png\n",
+      "    Image Path: config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter2.png\n",
+      "    S3 URI: config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/example-images/letter2.png\n",
       "\n",
       "Class: form\n",
       "Number of examples: 0\n",
@@ -278,7 +303,7 @@
       "\n",
       "Environment variables:\n",
       "  CONFIGURATION_BUCKET: Not set - using ROOT_DIR to resolve paths locally\n",
-      "  ROOT_DIR: ../../\n"
+      "  ROOT_DIR: ../..\n"
      ]
     }
    ],