Skip to content

Commit a09f070

Browse files
fix(genai): extract and include metadata (#941)
### **PR Description** This pull request fixes an issue where grounding metadata from the Google search tool responses was not preserved. It ensures that fields such as `grounding_supports`, `grounding_chunks`, and `web_search_queries` are extracted and included in `response_metadata`, enabling citation tracking and improved traceability in search-augmented responses. --- ### **Relevant issues** Fixes #907 --- ### **Type** 🐛 Bug Fix --- ### **Changes (optional)** * Added `_extract_grounding_metadata()` helper function to safely extract grounding data. * Updated `_response_to_result()` to include `grounding_supports`, `grounding_chunks`, and `web_search_queries` in `response_metadata`. * Preserves backward compatibility with the existing response structure. --- ### **Testing (optional)** * Manually verified that metadata is preserved across multiple Google search tool responses. * Confirmed presence of citation information in augmented responses. --- ### **Note (optional)** This fix improves the reliability of citations and source tracking in responses using Google search, supporting downstream systems relying on structured grounding metadata. --------- Co-authored-by: Mason Daugherty <[email protected]> Co-authored-by: Mason Daugherty <[email protected]>
1 parent c7cec1c commit a09f070

File tree

1 file changed

+67
-7
lines changed

1 file changed

+67
-7
lines changed

libs/genai/langchain_google_genai/chat_models.py

Lines changed: 67 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,66 @@ def _parse_response_candidate(
738738
)
739739

740740

741+
def _extract_grounding_metadata(candidate: Any) -> Dict[str, Any]:
742+
"""Extract grounding metadata from candidate.
743+
744+
Uses `proto.Message.to_dict()` for complete unfiltered extraction first,
745+
falls back to custom field extraction in cases of failure for robustness.
746+
"""
747+
if not hasattr(candidate, "grounding_metadata") or not candidate.grounding_metadata:
748+
return {}
749+
750+
grounding_metadata = candidate.grounding_metadata
751+
752+
try:
753+
return proto.Message.to_dict(grounding_metadata)
754+
except (AttributeError, TypeError):
755+
# Fallback: field extraction
756+
result: Dict[str, Any] = {}
757+
758+
# Extract grounding chunks
759+
if hasattr(grounding_metadata, "grounding_chunks"):
760+
grounding_chunks = []
761+
for chunk in grounding_metadata.grounding_chunks:
762+
chunk_data: Dict[str, Any] = {}
763+
if hasattr(chunk, "web") and chunk.web:
764+
chunk_data["web"] = {
765+
"uri": chunk.web.uri if hasattr(chunk.web, "uri") else "",
766+
"title": chunk.web.title if hasattr(chunk.web, "title") else "",
767+
}
768+
grounding_chunks.append(chunk_data)
769+
result["grounding_chunks"] = grounding_chunks
770+
771+
# Extract grounding supports
772+
if hasattr(grounding_metadata, "grounding_supports"):
773+
grounding_supports = []
774+
for support in grounding_metadata.grounding_supports:
775+
support_data: Dict[str, Any] = {}
776+
if hasattr(support, "segment") and support.segment:
777+
support_data["segment"] = {
778+
"start_index": getattr(support.segment, "start_index", 0),
779+
"end_index": getattr(support.segment, "end_index", 0),
780+
"text": getattr(support.segment, "text", ""),
781+
"part_index": getattr(support.segment, "part_index", 0),
782+
}
783+
if hasattr(support, "grounding_chunk_indices"):
784+
support_data["grounding_chunk_indices"] = list(
785+
support.grounding_chunk_indices
786+
)
787+
if hasattr(support, "confidence_scores"):
788+
support_data["confidence_scores"] = [
789+
round(score, 6) for score in support.confidence_scores
790+
]
791+
grounding_supports.append(support_data)
792+
result["grounding_supports"] = grounding_supports
793+
794+
# Extract web search queries
795+
if hasattr(grounding_metadata, "web_search_queries"):
796+
result["web_search_queries"] = list(grounding_metadata.web_search_queries)
797+
798+
return result
799+
800+
741801
def _response_to_result(
742802
response: GenerateContentResponse,
743803
stream: bool = False,
@@ -800,15 +860,15 @@ def _response_to_result(
800860
proto.Message.to_dict(safety_rating, use_integers_for_enums=False)
801861
for safety_rating in candidate.safety_ratings
802862
]
803-
try:
804-
if candidate.grounding_metadata:
805-
generation_info["grounding_metadata"] = proto.Message.to_dict(
806-
candidate.grounding_metadata
807-
)
808-
except AttributeError:
809-
pass
863+
grounding_metadata = _extract_grounding_metadata(candidate)
864+
generation_info["grounding_metadata"] = grounding_metadata
810865
message = _parse_response_candidate(candidate, streaming=stream)
811866
message.usage_metadata = lc_usage
867+
868+
if not hasattr(message, "response_metadata"):
869+
message.response_metadata = {}
870+
message.response_metadata["grounding_metadata"] = grounding_metadata
871+
812872
if stream:
813873
generations.append(
814874
ChatGenerationChunk(

0 commit comments

Comments
 (0)