Skip to content

Commit 4fc4c56

Browse files
committed
feat: Add health check response schema and update conversion result schemas
- Introduce HealthCheckResponse schema with status and services fields - Modify ConversionResult schema to make images and page_content optional - Update BatchConversionResult and BatchConversionJobResult to have optional conversion results - Adjust Chunk metadata to be more flexible with optional types - Add response_model to health check route in route.py
1 parent a6a91a8 commit 4fc4c56

File tree

2 files changed

+19
-23
lines changed

2 files changed

+19
-23
lines changed

document_converter/route.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
ConversionJobResult,
1010
ConversionResult,
1111
ChunkingResult,
12-
TextChunkingRequest
12+
TextChunkingRequest,
13+
HealthCheckResponse
1314
)
1415
from document_converter.service import DocumentConverterService, DoclingDocumentConversion
1516
from document_converter.utils import is_file_format_supported
@@ -352,6 +353,7 @@ async def get_batch_conversion_job_status(
352353

353354
@router.get(
354355
"/health",
356+
response_model=HealthCheckResponse,
355357
responses={
356358
200: {"description": "All services are healthy"},
357359
500: {"description": "One or more services are unhealthy"}

document_converter/schema.py

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@ class ImageData(BaseModel):
1212
class ConversionResult(BaseModel):
1313
filename: str = Field(..., description="Original filename of the document")
1414
markdown: Optional[str] = Field(None, description="Converted markdown content")
15-
images: List[ImageData] = Field(default_factory=list, description="Images extracted from the document")
15+
images: Optional[List[ImageData]] = Field(None, description="Images extracted from the document")
1616
error: Optional[str] = Field(None, description="Error message if conversion failed")
17-
page_content: Optional[Dict[int, str]] = Field(None, description="Markdown content organized by page number")
17+
page_content: Optional[Dict[str, Optional[str]]] = Field(None, description="Markdown content organized by page number")
1818

1919

2020
class BatchConversionResult(BaseModel):
21-
conversion_results: List[ConversionResult] = Field(
22-
default_factory=list, description="The results of the conversions"
21+
conversion_results: Optional[List[ConversionResult]] = Field(
22+
None, description="The results of the conversions"
2323
)
2424

2525

@@ -33,13 +33,13 @@ class ConversionJobResult(BaseModel):
3333
class BatchConversionJobResult(BaseModel):
3434
job_id: str = Field(..., description="The id of the batch conversion job")
3535
status: str = Field(..., description="Current status of the batch job")
36-
conversion_results: List[ConversionJobResult] = Field(default_factory=list, description="Individual conversion job results")
36+
conversion_results: Optional[List[ConversionJobResult]] = Field(None, description="Individual conversion job results")
3737
error: Optional[str] = Field(None, description="Error message if batch job failed")
3838

3939

4040
class Chunk(BaseModel):
4141
text: str = Field(..., description="The plain text content of the chunk")
42-
metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata associated with the chunk")
42+
metadata: Optional[Dict[str, Optional[str]]] = Field(None, description="Additional metadata associated with the chunk")
4343
page_numbers: Optional[List[int]] = Field(None, description="List of page numbers this chunk spans across")
4444
start_page: Optional[int] = Field(None, description="The page number where this chunk starts")
4545
end_page: Optional[int] = Field(None, description="The page number where this chunk ends")
@@ -48,24 +48,18 @@ class Chunk(BaseModel):
4848
class ChunkingResult(BaseModel):
4949
job_id: str = Field(..., description="The id of the original conversion job")
5050
filename: str = Field(..., description="The filename of the document")
51-
chunks: List[Chunk] = Field(default_factory=list, description="The chunks extracted from the document")
51+
chunks: Optional[List[Chunk]] = Field(None, description="The chunks extracted from the document")
5252
error: Optional[str] = Field(None, description="The error that occurred during chunking")
5353

5454

5555
class TextChunkingRequest(BaseModel):
5656
text: str = Field(..., description="The text content to chunk")
57-
filename: Optional[str] = Field("input.txt", description="A name to identify the source (for reporting purposes)")
58-
max_tokens: int = Field(512, ge=64, le=2048, description="Maximum number of tokens per chunk")
59-
merge_peers: bool = Field(True, description="Whether to merge undersized peer chunks")
60-
include_page_numbers: bool = Field(True, description="Whether to include page number references in chunk metadata")
61-
62-
class Config:
63-
json_schema_extra = {
64-
"example": {
65-
"text": "This is the text content that needs to be chunked. It can be as long as needed.",
66-
"filename": "example.txt",
67-
"max_tokens": 512,
68-
"merge_peers": True,
69-
"include_page_numbers": True
70-
}
71-
}
57+
filename: str = Field(default="input.txt", description="A name to identify the source (for reporting purposes)")
58+
max_tokens: int = Field(default=512, description="Maximum number of tokens per chunk")
59+
merge_peers: bool = Field(default=True, description="Whether to merge undersized peer chunks")
60+
include_page_numbers: bool = Field(default=True, description="Whether to include page number references in chunk metadata")
61+
62+
63+
class HealthCheckResponse(BaseModel):
64+
status: str = Field(..., description="Overall health status")
65+
services: Optional[Dict[str, str]] = Field(None, description="Status of individual services")

0 commit comments

Comments
 (0)