Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
c4a0219
feat(ocr): add unit tests, resolve double sent headers, and fix the w…
perfectra1n Jun 10, 2025
33a5492
fix(package): referenced wrong tesseract.js lol
perfectra1n Jun 10, 2025
864543e
feat(ocr): drop confidence down a little bit
perfectra1n Jun 10, 2025
a4adc51
fix(unit): resolve typecheck errors
perfectra1n Jun 10, 2025
f135622
feat(unit): ocr unit tests almost pass
perfectra1n Jun 10, 2025
d20b3d8
feat(unit): ocr tests almost pass...
perfectra1n Jun 10, 2025
80a9182
feat(unit): ocr tests almost pass...
perfectra1n Jun 10, 2025
7868ebe
fix(unit): also fix broken llm test
perfectra1n Jun 10, 2025
09196c0
fix(ocr): obviously don't need this migration file anymore
perfectra1n Jun 10, 2025
4b5e8d3
Update playwright.yml
perfectra1n Jun 10, 2025
9029f59
feat(ocr): swap from custom table to using the blobs table, with a ne…
perfectra1n Jul 14, 2025
893be24
merge main into feature branch
perfectra1n Jul 14, 2025
2a8c887
fix(dev): resolve issues with pnpm-lock.yaml
perfectra1n Jul 14, 2025
0298083
Merge branch 'main' into feat/add-ocr-capabilities
perfectra1n Jul 15, 2025
a7878dd
Merge branch 'main' into feat/add-ocr-capabilities
perfectra1n Jul 16, 2025
e040865
feat(ocr): add officeparser, pdf-parse, and sharp dependencies for ocr
perfectra1n Jul 16, 2025
508cbea
feat(ocr): update this new migration to also add a `ocr_last_processe…
perfectra1n Jul 16, 2025
6722d2d
feat(ocr): implement new language selection form
perfectra1n Jul 16, 2025
ca8cbf8
feat(ocr): add additional processors for OCR feature
perfectra1n Jul 16, 2025
99fa5d8
Merge remote-tracking branch 'origin/main' into feat/add-ocr-capabili…
eliandoran Jul 26, 2025
2adfc1d
chore(ci): remove unnecessary change
eliandoran Jul 26, 2025
11e9b09
feat(ocr): basic processing of new files
eliandoran Jul 26, 2025
090b175
refactor(ocr): deduplicate mime types partially
eliandoran Jul 26, 2025
c55aa6e
refactor(ocr): unnecessary initialization logic
eliandoran Jul 26, 2025
422d318
feat(ocr): add an option to display OCR text
eliandoran Jul 26, 2025
69b0973
feat(ocr): add a button to trigger an OCR manually
eliandoran Jul 26, 2025
f295592
fix(ocr): search error due to scoring
eliandoran Jul 26, 2025
6212ea0
feat(ocr): display OCR text in search results
eliandoran Jul 26, 2025
925c9c1
feat(ocr): display OCR text only in search results
eliandoran Jul 26, 2025
08ca86c
chore(deps): move workspace dependencies to server
eliandoran Jul 26, 2025
72cea24
feat(ocr): automatically process images
eliandoran Jul 26, 2025
2cb4e5e
feat(ocr): run the image operation in the background
eliandoran Jul 26, 2025
65b58c3
feat(ocr): auto-process images only if enabled in settings
eliandoran Jul 26, 2025
55ac1e0
chore(ocr): improve ocr search result style
eliandoran Jul 26, 2025
5ec6141
feat(ocr): filter out text based on confidence
eliandoran Jul 26, 2025
b9cef15
Merge remote-tracking branch 'origin/main' into feat/add-ocr-capabili…
eliandoran Jul 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/instructions/nx.instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ applyTo: '**'

// This file is automatically generated by Nx Console

You are in an nx workspace using Nx 21.3.5 and pnpm as the package manager.
You are in an nx workspace using Nx 21.3.7 and pnpm as the package manager.

You have access to the Nx MCP server and the tools it provides. Use them. Follow these guidelines in order to best help the user:

Expand Down
1 change: 0 additions & 1 deletion .github/workflows/playwright.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ jobs:
run: pnpm install --frozen-lockfile
- run: pnpm exec playwright install --with-deps
- uses: nrwl/nx-set-shas@v4

# Prepend any command with "nx-cloud record --" to record its logs to Nx Cloud
# - run: npx nx-cloud record -- echo Hello World
# Nx Affected runs only tasks affected by the changes in this PR/commit. Learn more: https://nx.dev/ci/features/affected
Expand Down
13 changes: 13 additions & 0 deletions apps/client/src/components/root_command_executor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,19 @@ export default class RootCommandExecutor extends Component {
}
}

async showNoteOCRTextCommand() {
const notePath = appContext.tabManager.getActiveContextNotePath();

if (notePath) {
await appContext.tabManager.openTabWithNoteWithHoisting(notePath, {
activate: true,
viewScope: {
viewMode: "ocr"
}
});
}
}

async showAttachmentsCommand() {
const notePath = appContext.tabManager.getActiveContextNotePath();

Expand Down
44 changes: 40 additions & 4 deletions apps/client/src/services/content_renderer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ interface Options {
tooltip?: boolean;
trim?: boolean;
imageHasZoom?: boolean;
showOcrText?: boolean;
}

const CODE_MIME_TYPES = new Set(["application/json"]);
Expand All @@ -46,9 +47,9 @@ async function getRenderedContent(this: {} | { ctx: string }, entity: FNote | FA
} else if (type === "code") {
await renderCode(entity, $renderedContent);
} else if (["image", "canvas", "mindMap"].includes(type)) {
renderImage(entity, $renderedContent, options);
await renderImage(entity, $renderedContent, options);
} else if (!options.tooltip && ["file", "pdf", "audio", "video"].includes(type)) {
renderFile(entity, type, $renderedContent);
await renderFile(entity, type, $renderedContent, options);
} else if (type === "mermaid") {
await renderMermaid(entity, $renderedContent);
} else if (type === "render" && entity instanceof FNote) {
Expand Down Expand Up @@ -161,7 +162,7 @@ async function renderCode(note: FNote | FAttachment, $renderedContent: JQuery<HT
await applySingleBlockSyntaxHighlight($codeBlock, normalizeMimeTypeForCKEditor(note.mime));
}

function renderImage(entity: FNote | FAttachment, $renderedContent: JQuery<HTMLElement>, options: Options = {}) {
async function renderImage(entity: FNote | FAttachment, $renderedContent: JQuery<HTMLElement>, options: Options = {}) {
const encodedTitle = encodeURIComponent(entity.title);

let url;
Expand Down Expand Up @@ -201,9 +202,39 @@ function renderImage(entity: FNote | FAttachment, $renderedContent: JQuery<HTMLE
}

imageContextMenuService.setupContextMenu($img);

// Add OCR text display for image notes
if (entity instanceof FNote && options.showOcrText) {
await addOCRTextIfAvailable(entity, $renderedContent);
}
}

async function addOCRTextIfAvailable(note: FNote, $content: JQuery<HTMLElement>) {
try {
const response = await fetch(`api/ocr/notes/${note.noteId}/text`);
if (response.ok) {
const data = await response.json();
if (data.success && data.hasOcr && data.text) {
const $ocrSection = $(`
<div class="ocr-text-section">
<div class="ocr-header">
<span class="bx bx-text"></span> ${t("ocr.extracted_text")}
</div>
<div class="ocr-content"></div>
</div>
`);

$ocrSection.find('.ocr-content').text(data.text);
$content.append($ocrSection);
}
}
} catch (error) {
// Silently fail if OCR API is not available
console.debug('Failed to fetch OCR text:', error);
}
}

function renderFile(entity: FNote | FAttachment, type: string, $renderedContent: JQuery<HTMLElement>) {
async function renderFile(entity: FNote | FAttachment, type: string, $renderedContent: JQuery<HTMLElement>, options: Options = {}) {
let entityType, entityId;

if (entity instanceof FNote) {
Expand Down Expand Up @@ -239,6 +270,11 @@ function renderFile(entity: FNote | FAttachment, type: string, $renderedContent:
$content.append($videoPreview);
}

// Add OCR text display for file notes
if (entity instanceof FNote && options.showOcrText) {
await addOCRTextIfAvailable(entity, $content);
}

if (entityType === "notes" && "noteId" in entity) {
// TODO: we should make this available also for attachments, but there's a problem with "Open externally" support
// in attachment list
Expand Down
23 changes: 23 additions & 0 deletions apps/client/src/stylesheets/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -2251,3 +2251,26 @@ footer.webview-footer button {
content: "\ec24";
transform: rotate(180deg);
}

.ocr-text-section {
margin: 10px 0;
padding: 10px;
background: var(--accented-background-color);
border-left: 3px solid var(--main-border-color);
text-align: left;
}

.ocr-header {
font-weight: bold;
margin-bottom: 8px;
font-size: 0.9em;
color: var(--muted-text-color);
}

.ocr-content {
max-height: 150px;
overflow-y: auto;
font-size: 0.9em;
line-height: 1.4;
white-space: pre-wrap;
}
32 changes: 31 additions & 1 deletion apps/client/src/translations/en/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,7 @@
"search_in_note": "Search in note",
"note_source": "Note source",
"note_attachments": "Note attachments",
"view_ocr_text": "View OCR text",
"open_note_externally": "Open note externally",
"open_note_externally_title": "File will be open in an external application and watched for changes. You'll then be able to upload the modified version back to Trilium.",
"open_note_custom": "Open note custom",
Expand Down Expand Up @@ -1303,7 +1304,22 @@
"enable_image_compression": "Enable image compression",
"max_image_dimensions": "Max width / height of an image (image will be resized if it exceeds this setting).",
"max_image_dimensions_unit": "pixels",
"jpeg_quality_description": "JPEG quality (10 - worst quality, 100 - best quality, 50 - 85 is recommended)"
"jpeg_quality_description": "JPEG quality (10 - worst quality, 100 - best quality, 50 - 85 is recommended)",
"ocr_section_title": "Optical Character Recognition (OCR)",
"enable_ocr": "Enable OCR for images",
"ocr_description": "Automatically extract text from images using OCR technology. This makes image content searchable within your notes.",
"ocr_auto_process": "Automatically process new images with OCR",
"ocr_language": "OCR Language",
"ocr_min_confidence": "Minimum confidence threshold",
"ocr_confidence_unit": "(0.0-1.0)",
"ocr_confidence_description": "Only extract text with confidence above this threshold. Lower values include more text but may be less accurate.",
"batch_ocr_title": "Process Existing Images",
"batch_ocr_description": "Process all existing images in your notes with OCR. This may take some time depending on the number of images.",
"batch_ocr_start": "Start Batch OCR Processing",
"batch_ocr_starting": "Starting batch OCR processing...",
"batch_ocr_progress": "Processing {{processed}} of {{total}} images...",
"batch_ocr_completed": "Batch OCR completed! Processed {{processed}} images.",
"batch_ocr_error": "Error during batch OCR: {{error}}"
},
"attachment_erasure_timeout": {
"attachment_erasure_timeout": "Attachment Erasure Timeout",
Expand Down Expand Up @@ -1988,6 +2004,20 @@
"new-item": "New item",
"add-column": "Add Column"
},
"ocr": {
"extracted_text": "Extracted Text (OCR)",
"extracted_text_title": "Extracted Text (OCR)",
"loading_text": "Loading OCR text...",
"no_text_available": "No OCR text available",
"no_text_explanation": "This note has not been processed for OCR text extraction or no text was found.",
"failed_to_load": "Failed to load OCR text",
"extracted_on": "Extracted on: {{date}}",
"unknown_date": "Unknown",
"process_now": "Process OCR",
"processing": "Processing...",
"processing_started": "OCR processing has been started. Please wait a moment and refresh.",
"processing_failed": "Failed to start OCR processing"
},
"command_palette": {
"tree-action-name": "Tree: {{name}}",
"export_note_title": "Export Note",
Expand Down
9 changes: 9 additions & 0 deletions apps/client/src/widgets/buttons/note_actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ const TPL = /*html*/`
<span class="bx bx-code"></span> ${t("note_actions.note_source")}<kbd data-command="showNoteSource"></kbd>
</li>

<li data-trigger-command="showNoteOCRText" class="dropdown-item show-ocr-text-button">
<span class="bx bx-text"></span> ${t("note_actions.view_ocr_text")}<kbd data-command="showNoteOCRText"></kbd>
</li>


<div class="dropdown-divider"></div>

Expand Down Expand Up @@ -117,6 +121,7 @@ export default class NoteActionsWidget extends NoteContextAwareWidget {
private $printActiveNoteButton!: JQuery<HTMLElement>;
private $exportAsPdfButton!: JQuery<HTMLElement>;
private $showSourceButton!: JQuery<HTMLElement>;
private $showOCRTextButton!: JQuery<HTMLElement>;
private $showAttachmentsButton!: JQuery<HTMLElement>;
private $renderNoteButton!: JQuery<HTMLElement>;
private $saveRevisionButton!: JQuery<HTMLElement>;
Expand All @@ -143,6 +148,7 @@ export default class NoteActionsWidget extends NoteContextAwareWidget {
this.$printActiveNoteButton = this.$widget.find(".print-active-note-button");
this.$exportAsPdfButton = this.$widget.find(".export-as-pdf-button");
this.$showSourceButton = this.$widget.find(".show-source-button");
this.$showOCRTextButton = this.$widget.find(".show-ocr-text-button");
this.$showAttachmentsButton = this.$widget.find(".show-attachments-button");
this.$renderNoteButton = this.$widget.find(".render-note-button");
this.$saveRevisionButton = this.$widget.find(".save-revision-button");
Expand Down Expand Up @@ -190,6 +196,9 @@ export default class NoteActionsWidget extends NoteContextAwareWidget {

this.toggleDisabled(this.$showAttachmentsButton, !isInOptions);
this.toggleDisabled(this.$showSourceButton, ["text", "code", "relationMap", "mermaid", "canvas", "mindMap"].includes(note.type));

// Show OCR text button for notes that could have OCR data (images and files)
this.toggleDisabled(this.$showOCRTextButton, ["image", "file"].includes(note.type));

const canPrint = ["text", "code"].includes(note.type);
this.toggleDisabled(this.$printActiveNoteButton, canPrint);
Expand Down
5 changes: 5 additions & 0 deletions apps/client/src/widgets/note_detail.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import ContentWidgetTypeWidget from "./type_widgets/content_widget.js";
import AttachmentListTypeWidget from "./type_widgets/attachment_list.js";
import AttachmentDetailTypeWidget from "./type_widgets/attachment_detail.js";
import MindMapWidget from "./type_widgets/mind_map.js";
import ReadOnlyOCRTextWidget from "./type_widgets/read_only_ocr_text.js";
import utils from "../services/utils.js";
import type { NoteType } from "../entities/fnote.js";
import type TypeWidget from "./type_widgets/type_widget.js";
Expand Down Expand Up @@ -55,6 +56,7 @@ const typeWidgetClasses = {
readOnlyText: ReadOnlyTextTypeWidget,
editableCode: EditableCodeTypeWidget,
readOnlyCode: ReadOnlyCodeTypeWidget,
readOnlyOCRText: ReadOnlyOCRTextWidget,
file: FileTypeWidget,
image: ImageTypeWidget,
search: NoneTypeWidget,
Expand Down Expand Up @@ -85,6 +87,7 @@ type ExtendedNoteType =
| "empty"
| "readOnlyCode"
| "readOnlyText"
| "readOnlyOCRText"
| "editableText"
| "editableCode"
| "attachmentDetail"
Expand Down Expand Up @@ -223,6 +226,8 @@ export default class NoteDetailWidget extends NoteContextAwareWidget {

if (viewScope?.viewMode === "source") {
resultingType = "readOnlyCode";
} else if (viewScope?.viewMode === "ocr") {
resultingType = "readOnlyOCRText";
} else if (viewScope && viewScope.viewMode === "attachments") {
resultingType = viewScope.attachmentId ? "attachmentDetail" : "attachmentList";
} else if (type === "text" && (await this.noteContext?.isReadOnly())) {
Expand Down
Loading