-
Notifications
You must be signed in to change notification settings - Fork 22
Create annotations from highlights #6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,141 @@ | ||
| # Copyright 2021 Ben Rush | ||
| # | ||
| # This program is free software: you can redistribute it and/or modify | ||
| # it under the terms of the GNU General Public License as published by | ||
| # the Free Software Foundation, either version 3 of the License, or | ||
| # (at your option) any later version. | ||
| # | ||
| # This program is distributed in the hope that it will be useful, | ||
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| # GNU General Public License for more details. | ||
| # | ||
| # You should have received a copy of the GNU General Public License | ||
| # along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| class Point: | ||
| def __init__(self, x: float, y: float): | ||
| self.x = x | ||
| self.y = y | ||
|
|
||
| def toList(self) -> list: | ||
| return [self.x, self.y] | ||
|
|
||
| class Rect: | ||
| """ | ||
| From PDF spec: | ||
| a specific array object used to describe locations on a page and | ||
| bounding boxes for a variety of objects and written as an array | ||
| of four numbers giving the coordinates of a pair of diagonally | ||
| opposite corners, typically in the form [ll.x, ll.y, ur.x, ur.x] | ||
| """ | ||
|
|
||
| def __init__(self, ll: Point, ur: Point): | ||
| self.ll = ll | ||
| self.ur = ur | ||
|
|
||
| def intersects(self, rectB: Rect) -> bool: | ||
| # To check if either rectangle is actually a line | ||
| # For example : l1 ={-1,0} r1={1,1} l2={0,-1} r2={0,1} | ||
|
|
||
| if (self.ll.x == self.ur.x or self.ll.y == self.ur.y or rectB.ll.x == rectB.ur.x or rectB.ll.y == rectB.ur.y): | ||
| # the line cannot have positive overlap | ||
| return False | ||
|
|
||
|
|
||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: No more than one empty line. |
||
| # If one rectangle is on left side of other | ||
| if(self.ll.x >= rectB.ur.y or rectB.ll.x >= self.ur.y): | ||
| return False | ||
|
|
||
| # If one rectangle is above other | ||
| if(self.ur.y <= rectB.ll.y or rectB.ur.y <= self.ll.y): | ||
| return False | ||
|
|
||
| return True | ||
|
|
||
| def union(self, rectB: Rect) -> Rect: | ||
| ll = Point(min(self.ll.x, rectB.ll.x), | ||
| min(self.ll.y, rectB.ll.y)) | ||
| ur = Point(max(self.ur.x, rectB.ur.x), | ||
| max(self.ur.y, rectB.ur.y)) | ||
| return Rect(ll, ur) | ||
|
|
||
| def toList(self) -> list: | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: |
||
| return [self.ll.x, self.ll.y, self.ur.x, self.ur.y] | ||
|
|
||
| class QuadPoints: | ||
| """ | ||
| From PDF spec: | ||
| An array of 8 x n numbers specifying the coordinates of n quadrilaterals | ||
| in default user space. Each quadrilateral shall encompass a word or group | ||
| of contiguous words in the text underlying the annotation. The coordinates | ||
| for each quadrilateral shall be given in the order x1, y1, x2, y2, x3, y3, x4, y4 | ||
| specifying the quadrilateral's four vertices in counterclockwise order | ||
| starting with the lower left. The text shall be oriented with respect to the | ||
| edge connecting points (x1, y1) with (x2, y2). | ||
| """ | ||
|
|
||
| points: list[Point] | ||
|
|
||
| def __init__(self, points: list[Point]): | ||
| self.points = points | ||
|
|
||
| def append(self, quadpoints: QuadPoints) -> QuadPoints: | ||
| return QuadPoints(self.points + quadpoints.points) | ||
|
|
||
| def toList(self) -> list: | ||
| return [c for p in points for c in p.toList()] | ||
|
|
||
|
|
||
| @staticmethod | ||
| def fromRect(rect: Rect): | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: |
||
| """ | ||
| Assumes that the rect is aligned with the text. Will return incorrect | ||
| results otherwise | ||
| """ | ||
| # Needs to be in this order to account for rotations applied later? | ||
| # ll.x, ur.y, ur.x, ur.y, ll.x, ll.y, ur.x, ll.y | ||
| quadpoints = [Point(rect.ll.x, rect.ur.y), | ||
| Point(rect.ur.x, rect.ur.y), | ||
| Point(rect.ll.x, rect.ll.y), | ||
| Point(rect.ur.x, rect.ll.y)] | ||
| return QuadPoints(quadpoints) | ||
|
|
||
| class Annotation(): | ||
| annotype: str | ||
| rect: Rect | ||
| quadpoints: QuadPoints | ||
| contents: str | ||
|
|
||
| def __init__(self, annotype: str, rect: Rect, quadpoints: list = None, contents: str = ""): | ||
| self.annotype = annotype | ||
| self.rect = rect | ||
| if quadpoints: | ||
| self.quadpoints = quadpoints | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Won't this produce some inconsistency, if quadpoints and rect aren't the same? |
||
| else: | ||
| self.quadpoints = QuadPoints.fromRect(rect) | ||
| self.contents = contents | ||
|
|
||
| def united(self, annot: Annotation) -> Annotation: | ||
| if self.annotype != annot.annotype: | ||
| raise Exception("Cannot merge annotations with different types") | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we have a custom exception here? Or perhaps use TypeError? |
||
|
|
||
| return Annotation(self.annotype, | ||
| self.rect.union(annot.rect), | ||
| self.quadpoints.append(annot.quadpoints), | ||
| self.contents + annot.contents) | ||
|
|
||
|
|
||
| @staticmethod | ||
| def union(annotA: Annotation, annotB: Annotation) -> Annotation: | ||
| if annotA is None: | ||
| return annotB | ||
| elif annotB is None: | ||
| return annotA | ||
| else: | ||
| return annotA.united(annotB) | ||
|
|
||
| def intersects(self, annot: Annotation) -> bool: | ||
| return self.rect.intersects(annot.rect) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,3 +23,4 @@ | |
| TEMPLATE_PATH = xdg_data_home() / 'rmrl' / 'templates' | ||
|
|
||
| VERSION = pkg_resources.get_distribution('rmrl').version | ||
| HIGHLIGHTCOLOR = [1, 0.941177, 0.4] | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's go with |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,7 +23,9 @@ | |
|
|
||
| from . import lines, pens | ||
| from .constants import DISPLAY, PDFHEIGHT, PDFWIDTH, PTPERPX, TEMPLATE_PATH | ||
| from .annotation import Annotation, Rect, Point | ||
|
|
||
| from typing import List, Tuple | ||
|
|
||
| log = logging.getLogger(__name__) | ||
|
|
||
|
|
@@ -34,6 +36,12 @@ def __init__(self, source, pid, pagenum): | |
| self.source = source | ||
| self.num = pagenum | ||
|
|
||
| self.highlights = None | ||
| highlightspath = f'{{ID}}.highlights/{pid}.json' | ||
| if source.exists(highlightspath): | ||
| with source.open(highlightspath, 'r') as f: | ||
| self.highlights = json.load(f)["highlights"] | ||
|
|
||
| # On disk, these files are named by a UUID | ||
| self.rmpath = f'{{ID}}/{pid}.rm' | ||
| if not source.exists(self.rmpath): | ||
|
|
@@ -71,15 +79,41 @@ def __init__(self, source, pid, pagenum): | |
| self.load_layers() | ||
|
|
||
| def get_grouped_annotations(self): | ||
| # Return the annotations grouped by proximity. If they are | ||
| # within a distance of each other, count them as a single | ||
| # annotation. | ||
|
|
||
| # Annotations should be delivered in an array, where each | ||
| # index is a tuple (LayerName, | ||
| annotations = [] | ||
| if self.highlights is not None: | ||
| annotations.append(("Highlights",[])) | ||
|
|
||
| for h in self.highlights: | ||
| note = None | ||
| cursor = -1 | ||
| for stroke in h: | ||
| log.debug(stroke) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add a brief description to the logging output of what this thing is. |
||
| rect = None | ||
| for r in stroke["rects"]: # I guess in theory there could be more than one? | ||
| ll = Point(r["x"], r["y"]) | ||
| ur = Point(r["x"]+r["width"], r["y"]+r["height"]) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Spaces around |
||
| if rect: rect = rect.union(Rect(ll,ur)) | ||
| else: rect = Rect(ll, ur) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: If and else blocks on separate lines. Alternatively, |
||
|
|
||
|
|
||
| contents = stroke["text"] + " " | ||
| newnote = Annotation("Highlight", rect, contents=contents) | ||
|
|
||
| if cursor > 0 and (stroke["start"] - cursor > 10): # sometimes there are small gaps due to whitespace? | ||
| # For now, treat non-continuous highlights as separate notes | ||
| annotations[0][1].append(note) | ||
| note = newnote | ||
| else: | ||
| note = Annotation.union(note, newnote) | ||
|
|
||
| cursor = stroke["start"]+stroke["length"] | ||
|
|
||
| if note: | ||
| annotations[0][1].append(note) | ||
|
|
||
| for layer in self.layers: | ||
| annotations.append(layer.get_grouped_annotations()) | ||
|
|
||
| return annotations | ||
|
|
||
| def load_layers(self): | ||
|
|
@@ -172,27 +206,27 @@ def __init__(self, page, name=None): | |
| # PDF layers are ever implemented. | ||
| self.annot_paths = [] | ||
|
|
||
| def get_grouped_annotations(self): | ||
| # return: (LayerName, [(AnnotType, minX, minY, maxX, maxY)]) | ||
| def get_grouped_annotations(self) -> Tuple[str, list]: | ||
| # return: (LayerName, [Annotations]) | ||
|
|
||
| # Compare all the annot_paths to each other. If any overlap, | ||
| # they will be grouped together. This is done recursively. | ||
| def grouping_func(pathset): | ||
| newset = [] | ||
|
|
||
| for p in pathset: | ||
| annotype = p[0] | ||
| path = p[1] | ||
| annotype = p.annotype | ||
| #path = p[1] #returns (xmin, ymin, xmax, ymax) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Remove commented code. |
||
| did_fit = False | ||
| for i, g in enumerate(newset): | ||
| gannotype = g[0] | ||
| group = g[1] | ||
| gannotype = g.annotype | ||
| #group = g[1] | ||
| # Only compare annotations of the same type | ||
| if gannotype != annotype: | ||
| continue | ||
| if path.intersects(group): | ||
| if p.intersects(g): | ||
| did_fit = True | ||
| newset[i] = (annotype, group.united(path)) | ||
| newset[i] = g.united(p) #left off here, need to build united and quadpoints | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this comment valid? If so, should we get this finished up? |
||
| break | ||
| if did_fit: | ||
| continue | ||
|
|
@@ -207,22 +241,7 @@ def grouping_func(pathset): | |
| return newset | ||
|
|
||
| grouped = grouping_func(self.annot_paths) | ||
|
|
||
| # Get the bounding rect of each group, which sets the PDF | ||
| # annotation geometry. | ||
| annot_rects = [] | ||
| for p in grouped: | ||
| annotype = p[0] | ||
| path = p[1] | ||
| rect = path.boundingRect() | ||
| annot = (annotype, | ||
| float(rect.x()), | ||
| float(rect.y()), | ||
| float(rect.x() + rect.width()), | ||
| float(rect.y() + rect.height())) | ||
| annot_rects.append(annot) | ||
|
|
||
| return (self.name, annot_rects) | ||
| return (self.name, grouped) | ||
|
|
||
| def paint_strokes(self, canvas, vector): | ||
| for stroke in self.strokes: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,38 +15,66 @@ | |
| # along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
|
|
||
| from .generic import GenericPen | ||
| from reportlab.graphics.shapes import Rect | ||
| from reportlab.pdfgen.pathobject import PDFPathObject | ||
| from ..annotation import Annotation, Point, Rect, QuadPoints | ||
|
|
||
| class HighlighterPen(GenericPen): | ||
| def __init__(self, *args, **kwargs): | ||
| super().__init__(*args, **kwargs) | ||
| self.layer = kwargs.get('layer') | ||
| self.annotate = False #TODO bool(int(QSettings().value( | ||
| self.annotate = True#False #TODO bool(int(QSettings().value( | ||
| # 'pane/notebooks/export_pdf_annotate'))) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was an ugly hack as I was translating the code. But now, we should either make this configurable or remove the flag entirely. (Is there a reason not to include annotations every time? I can't think of one.) |
||
|
|
||
| def paint_stroke(self, canvas, stroke): | ||
| canvas.saveState() | ||
| canvas.setLineCap(2) # Square | ||
| canvas.setLineJoin(1) # Round | ||
| #canvas.setDash ?? for solid line | ||
| canvas.setStrokeColor((1.000, 0.914, 0.290), alpha=0.392) | ||
| white = (1, 1, 1) #color handled by annotation object in PDF | ||
| canvas.setStrokeColor(white, alpha=0.0) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here I'll display my ignorance about the PDF format: If we're relying on the annotation object to be the visible element, do we need these invisible strokes at all? |
||
| canvas.setLineWidth(stroke.width) | ||
|
|
||
| path = canvas.beginPath() | ||
| path.moveTo(stroke.segments[0].x, stroke.segments[0].y) | ||
|
|
||
| x0 = stroke.segments[0].x | ||
| y0 = stroke.segments[0].y | ||
|
|
||
| ll = Point(x0, y0) | ||
| ur = Point(x0, y0) | ||
|
|
||
| for segment in stroke.segments[1:]: | ||
| path.lineTo(segment.x, segment.y) | ||
| canvas.drawPath(path, stroke=1, fill=0) | ||
| canvas.restoreState() | ||
|
|
||
| # Do some basic vector math to rotate the line width | ||
| # perpendicular to this segment | ||
|
|
||
| x1 = segment.x | ||
| y1 = segment.y | ||
| width = segment.width | ||
|
|
||
| l = [x1-x0, y1-y0] | ||
| if l[0] == 0: | ||
| orthogonal = [1, 0] | ||
| else: | ||
| v0 = -l[1]/l[0] | ||
| scale = (1+v0**2)**0.5 | ||
| orthogonal = [v0/scale, 1/scale] | ||
|
|
||
| xmin = x0-width/2*orthogonal[0] | ||
| ymin = y0-width/2*orthogonal[1] | ||
| xmax = x1+width/2*orthogonal[0] | ||
| ymax = y1+width/2*orthogonal[1] | ||
|
|
||
| ll = Point(min(ll.x, xmin), min(ll.y, ymin)) | ||
| ur = Point(max(ur.x, xmax), max(ur.y, ymax)) | ||
|
|
||
| x0 = x1 | ||
| y0 = y1 | ||
|
|
||
| if self.annotate: | ||
| assert False | ||
| # Create outline of the path. Annotations that are close to | ||
| # each other get groups. This is determined by overlapping | ||
| # paths. In order to fuzz this, we'll double the normal | ||
| # width and extend the end caps. | ||
| self.setWidthF(self.widthF() * 2) | ||
| self.setCapStyle(Qt.SquareCap) | ||
| opath = QPainterPathStroker(self).createStroke(path) | ||
| # The annotation type is carried all the way through. This | ||
| # is the type specified in the PDF spec. | ||
| self.layer.annot_paths.append(('Highlight', opath)) | ||
| self.layer.annot_paths.append(Annotation("Highlight", Rect(ll, ur))) | ||
|
|
||
| canvas.drawPath(path, stroke=1, fill=0) | ||
| canvas.restoreState() | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Trailing new line. |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit:
to_list. (The existing code base is not terribly consistent here, since it derived from some Qt code. But I'm mostly following pep8 with new code, I think.)Alternatively, we could implement
__iter__and then just calllist(point). And depending what it's used for, we might just iterate through the point directly. But I'm happy withto_list; only go this way if it seems to provide other benefits.