@@ -12,14 +12,14 @@ class ImageData(BaseModel):
12
12
class ConversionResult (BaseModel ):
13
13
filename : str = Field (..., description = "Original filename of the document" )
14
14
markdown : Optional [str ] = Field (None , description = "Converted markdown content" )
15
- images : List [ImageData ] = Field (default_factory = list , description = "Images extracted from the document" )
15
+ images : Optional [ List [ImageData ]] = Field (None , description = "Images extracted from the document" )
16
16
error : Optional [str ] = Field (None , description = "Error message if conversion failed" )
17
- page_content : Optional [Dict [int , str ]] = Field (None , description = "Markdown content organized by page number" )
17
+ page_content : Optional [Dict [str , Optional [ str ] ]] = Field (None , description = "Markdown content organized by page number" )
18
18
19
19
20
20
class BatchConversionResult (BaseModel ):
21
- conversion_results : List [ConversionResult ] = Field (
22
- default_factory = list , description = "The results of the conversions"
21
+ conversion_results : Optional [ List [ConversionResult ] ] = Field (
22
+ None , description = "The results of the conversions"
23
23
)
24
24
25
25
@@ -33,13 +33,13 @@ class ConversionJobResult(BaseModel):
33
33
class BatchConversionJobResult (BaseModel ):
34
34
job_id : str = Field (..., description = "The id of the batch conversion job" )
35
35
status : str = Field (..., description = "Current status of the batch job" )
36
- conversion_results : List [ConversionJobResult ] = Field (default_factory = list , description = "Individual conversion job results" )
36
+ conversion_results : Optional [ List [ConversionJobResult ]] = Field (None , description = "Individual conversion job results" )
37
37
error : Optional [str ] = Field (None , description = "Error message if batch job failed" )
38
38
39
39
40
40
class Chunk (BaseModel ):
41
41
text : str = Field (..., description = "The plain text content of the chunk" )
42
- metadata : Dict [str , Any ] = Field (default_factory = dict , description = "Additional metadata associated with the chunk" )
42
+ metadata : Optional [ Dict [str , Optional [ str ]]] = Field (None , description = "Additional metadata associated with the chunk" )
43
43
page_numbers : Optional [List [int ]] = Field (None , description = "List of page numbers this chunk spans across" )
44
44
start_page : Optional [int ] = Field (None , description = "The page number where this chunk starts" )
45
45
end_page : Optional [int ] = Field (None , description = "The page number where this chunk ends" )
@@ -48,24 +48,18 @@ class Chunk(BaseModel):
48
48
class ChunkingResult (BaseModel ):
49
49
job_id : str = Field (..., description = "The id of the original conversion job" )
50
50
filename : str = Field (..., description = "The filename of the document" )
51
- chunks : List [Chunk ] = Field (default_factory = list , description = "The chunks extracted from the document" )
51
+ chunks : Optional [ List [Chunk ]] = Field (None , description = "The chunks extracted from the document" )
52
52
error : Optional [str ] = Field (None , description = "The error that occurred during chunking" )
53
53
54
54
55
55
class TextChunkingRequest (BaseModel ):
56
56
text : str = Field (..., description = "The text content to chunk" )
57
- filename : Optional [str ] = Field ("input.txt" , description = "A name to identify the source (for reporting purposes)" )
58
- max_tokens : int = Field (512 , ge = 64 , le = 2048 , description = "Maximum number of tokens per chunk" )
59
- merge_peers : bool = Field (True , description = "Whether to merge undersized peer chunks" )
60
- include_page_numbers : bool = Field (True , description = "Whether to include page number references in chunk metadata" )
61
-
62
- class Config :
63
- json_schema_extra = {
64
- "example" : {
65
- "text" : "This is the text content that needs to be chunked. It can be as long as needed." ,
66
- "filename" : "example.txt" ,
67
- "max_tokens" : 512 ,
68
- "merge_peers" : True ,
69
- "include_page_numbers" : True
70
- }
71
- }
57
+ filename : str = Field (default = "input.txt" , description = "A name to identify the source (for reporting purposes)" )
58
+ max_tokens : int = Field (default = 512 , description = "Maximum number of tokens per chunk" )
59
+ merge_peers : bool = Field (default = True , description = "Whether to merge undersized peer chunks" )
60
+ include_page_numbers : bool = Field (default = True , description = "Whether to include page number references in chunk metadata" )
61
+
62
+
63
+ class HealthCheckResponse (BaseModel ):
64
+ status : str = Field (..., description = "Overall health status" )
65
+ services : Optional [Dict [str , str ]] = Field (None , description = "Status of individual services" )
0 commit comments