Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 141 additions & 0 deletions rmrl/annotation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# Copyright 2021 Ben Rush
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

from __future__ import annotations

class Point:
def __init__(self, x: float, y: float):
self.x = x
self.y = y

def toList(self) -> list:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: to_list. (The existing code base is not terribly consistent here, since it derived from some Qt code. But I'm mostly following pep8 with new code, I think.)

Alternatively, we could implement __iter__ and then just call list(point). And depending what it's used for, we might just iterate through the point directly. But I'm happy with to_list; only go this way if it seems to provide other benefits.

return [self.x, self.y]

class Rect:
"""
From PDF spec:
a specific array object used to describe locations on a page and
bounding boxes for a variety of objects and written as an array
of four numbers giving the coordinates of a pair of diagonally
opposite corners, typically in the form [ll.x, ll.y, ur.x, ur.x]
"""

def __init__(self, ll: Point, ur: Point):
self.ll = ll
self.ur = ur

def intersects(self, rectB: Rect) -> bool:
# To check if either rectangle is actually a line
# For example : l1 ={-1,0} r1={1,1} l2={0,-1} r2={0,1}

if (self.ll.x == self.ur.x or self.ll.y == self.ur.y or rectB.ll.x == rectB.ur.x or rectB.ll.y == rectB.ur.y):
# the line cannot have positive overlap
return False


Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: No more than one empty line.

# If one rectangle is on left side of other
if(self.ll.x >= rectB.ur.y or rectB.ll.x >= self.ur.y):
return False

# If one rectangle is above other
if(self.ur.y <= rectB.ll.y or rectB.ur.y <= self.ll.y):
return False

return True

def union(self, rectB: Rect) -> Rect:
ll = Point(min(self.ll.x, rectB.ll.x),
min(self.ll.y, rectB.ll.y))
ur = Point(max(self.ur.x, rectB.ur.x),
max(self.ur.y, rectB.ur.y))
return Rect(ll, ur)

def toList(self) -> list:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: to_list.

return [self.ll.x, self.ll.y, self.ur.x, self.ur.y]

class QuadPoints:
"""
From PDF spec:
An array of 8 x n numbers specifying the coordinates of n quadrilaterals
in default user space. Each quadrilateral shall encompass a word or group
of contiguous words in the text underlying the annotation. The coordinates
for each quadrilateral shall be given in the order x1, y1, x2, y2, x3, y3, x4, y4
specifying the quadrilateral's four vertices in counterclockwise order
starting with the lower left. The text shall be oriented with respect to the
edge connecting points (x1, y1) with (x2, y2).
"""

points: list[Point]

def __init__(self, points: list[Point]):
self.points = points

def append(self, quadpoints: QuadPoints) -> QuadPoints:
return QuadPoints(self.points + quadpoints.points)

def toList(self) -> list:
return [c for p in points for c in p.toList()]


@staticmethod
def fromRect(rect: Rect):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: from_rect.

"""
Assumes that the rect is aligned with the text. Will return incorrect
results otherwise
"""
# Needs to be in this order to account for rotations applied later?
# ll.x, ur.y, ur.x, ur.y, ll.x, ll.y, ur.x, ll.y
quadpoints = [Point(rect.ll.x, rect.ur.y),
Point(rect.ur.x, rect.ur.y),
Point(rect.ll.x, rect.ll.y),
Point(rect.ur.x, rect.ll.y)]
return QuadPoints(quadpoints)

class Annotation():
annotype: str
rect: Rect
quadpoints: QuadPoints
contents: str

def __init__(self, annotype: str, rect: Rect, quadpoints: list = None, contents: str = ""):
self.annotype = annotype
self.rect = rect
if quadpoints:
self.quadpoints = quadpoints
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't this produce some inconsistency, if quadpoints and rect aren't the same?

else:
self.quadpoints = QuadPoints.fromRect(rect)
self.contents = contents

def united(self, annot: Annotation) -> Annotation:
if self.annotype != annot.annotype:
raise Exception("Cannot merge annotations with different types")
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we have a custom exception here? Or perhaps use TypeError?


return Annotation(self.annotype,
self.rect.union(annot.rect),
self.quadpoints.append(annot.quadpoints),
self.contents + annot.contents)


@staticmethod
def union(annotA: Annotation, annotB: Annotation) -> Annotation:
if annotA is None:
return annotB
elif annotB is None:
return annotA
else:
return annotA.united(annotB)

def intersects(self, annot: Annotation) -> bool:
return self.rect.intersects(annot.rect)
1 change: 1 addition & 0 deletions rmrl/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@
TEMPLATE_PATH = xdg_data_home() / 'rmrl' / 'templates'

VERSION = pkg_resources.get_distribution('rmrl').version
HIGHLIGHTCOLOR = [1, 0.941177, 0.4]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's go with HIGHLIGHT_COLOR for now. (But this should be moved into a config, probably.)

79 changes: 49 additions & 30 deletions rmrl/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@

from . import lines, pens
from .constants import DISPLAY, PDFHEIGHT, PDFWIDTH, PTPERPX, TEMPLATE_PATH
from .annotation import Annotation, Rect, Point

from typing import List, Tuple

log = logging.getLogger(__name__)

Expand All @@ -34,6 +36,12 @@ def __init__(self, source, pid, pagenum):
self.source = source
self.num = pagenum

self.highlights = None
highlightspath = f'{{ID}}.highlights/{pid}.json'
if source.exists(highlightspath):
with source.open(highlightspath, 'r') as f:
self.highlights = json.load(f)["highlights"]

# On disk, these files are named by a UUID
self.rmpath = f'{{ID}}/{pid}.rm'
if not source.exists(self.rmpath):
Expand Down Expand Up @@ -71,15 +79,41 @@ def __init__(self, source, pid, pagenum):
self.load_layers()

def get_grouped_annotations(self):
# Return the annotations grouped by proximity. If they are
# within a distance of each other, count them as a single
# annotation.

# Annotations should be delivered in an array, where each
# index is a tuple (LayerName,
annotations = []
if self.highlights is not None:
annotations.append(("Highlights",[]))

for h in self.highlights:
note = None
cursor = -1
for stroke in h:
log.debug(stroke)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add a brief description to the logging output of what this thing is.

rect = None
for r in stroke["rects"]: # I guess in theory there could be more than one?
ll = Point(r["x"], r["y"])
ur = Point(r["x"]+r["width"], r["y"]+r["height"])
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Spaces around +.

if rect: rect = rect.union(Rect(ll,ur))
else: rect = Rect(ll, ur)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: If and else blocks on separate lines.

Alternatively, rect = rect.union(...) if rect else Rect(...).



contents = stroke["text"] + " "
newnote = Annotation("Highlight", rect, contents=contents)

if cursor > 0 and (stroke["start"] - cursor > 10): # sometimes there are small gaps due to whitespace?
# For now, treat non-continuous highlights as separate notes
annotations[0][1].append(note)
note = newnote
else:
note = Annotation.union(note, newnote)

cursor = stroke["start"]+stroke["length"]

if note:
annotations[0][1].append(note)

for layer in self.layers:
annotations.append(layer.get_grouped_annotations())

return annotations

def load_layers(self):
Expand Down Expand Up @@ -172,27 +206,27 @@ def __init__(self, page, name=None):
# PDF layers are ever implemented.
self.annot_paths = []

def get_grouped_annotations(self):
# return: (LayerName, [(AnnotType, minX, minY, maxX, maxY)])
def get_grouped_annotations(self) -> Tuple[str, list]:
# return: (LayerName, [Annotations])

# Compare all the annot_paths to each other. If any overlap,
# they will be grouped together. This is done recursively.
def grouping_func(pathset):
newset = []

for p in pathset:
annotype = p[0]
path = p[1]
annotype = p.annotype
#path = p[1] #returns (xmin, ymin, xmax, ymax)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Remove commented code.

did_fit = False
for i, g in enumerate(newset):
gannotype = g[0]
group = g[1]
gannotype = g.annotype
#group = g[1]
# Only compare annotations of the same type
if gannotype != annotype:
continue
if path.intersects(group):
if p.intersects(g):
did_fit = True
newset[i] = (annotype, group.united(path))
newset[i] = g.united(p) #left off here, need to build united and quadpoints
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this comment valid? If so, should we get this finished up?

break
if did_fit:
continue
Expand All @@ -207,22 +241,7 @@ def grouping_func(pathset):
return newset

grouped = grouping_func(self.annot_paths)

# Get the bounding rect of each group, which sets the PDF
# annotation geometry.
annot_rects = []
for p in grouped:
annotype = p[0]
path = p[1]
rect = path.boundingRect()
annot = (annotype,
float(rect.x()),
float(rect.y()),
float(rect.x() + rect.width()),
float(rect.y() + rect.height()))
annot_rects.append(annot)

return (self.name, annot_rects)
return (self.name, grouped)

def paint_strokes(self, canvas, vector):
for stroke in self.strokes:
Expand Down
58 changes: 43 additions & 15 deletions rmrl/pens/highlighter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,38 +15,66 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.

from .generic import GenericPen
from reportlab.graphics.shapes import Rect
from reportlab.pdfgen.pathobject import PDFPathObject
from ..annotation import Annotation, Point, Rect, QuadPoints

class HighlighterPen(GenericPen):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.layer = kwargs.get('layer')
self.annotate = False #TODO bool(int(QSettings().value(
self.annotate = True#False #TODO bool(int(QSettings().value(
# 'pane/notebooks/export_pdf_annotate')))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was an ugly hack as I was translating the code. But now, we should either make this configurable or remove the flag entirely. (Is there a reason not to include annotations every time? I can't think of one.)


def paint_stroke(self, canvas, stroke):
canvas.saveState()
canvas.setLineCap(2) # Square
canvas.setLineJoin(1) # Round
#canvas.setDash ?? for solid line
canvas.setStrokeColor((1.000, 0.914, 0.290), alpha=0.392)
white = (1, 1, 1) #color handled by annotation object in PDF
canvas.setStrokeColor(white, alpha=0.0)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here I'll display my ignorance about the PDF format: If we're relying on the annotation object to be the visible element, do we need these invisible strokes at all?

canvas.setLineWidth(stroke.width)

path = canvas.beginPath()
path.moveTo(stroke.segments[0].x, stroke.segments[0].y)

x0 = stroke.segments[0].x
y0 = stroke.segments[0].y

ll = Point(x0, y0)
ur = Point(x0, y0)

for segment in stroke.segments[1:]:
path.lineTo(segment.x, segment.y)
canvas.drawPath(path, stroke=1, fill=0)
canvas.restoreState()

# Do some basic vector math to rotate the line width
# perpendicular to this segment

x1 = segment.x
y1 = segment.y
width = segment.width

l = [x1-x0, y1-y0]
if l[0] == 0:
orthogonal = [1, 0]
else:
v0 = -l[1]/l[0]
scale = (1+v0**2)**0.5
orthogonal = [v0/scale, 1/scale]

xmin = x0-width/2*orthogonal[0]
ymin = y0-width/2*orthogonal[1]
xmax = x1+width/2*orthogonal[0]
ymax = y1+width/2*orthogonal[1]

ll = Point(min(ll.x, xmin), min(ll.y, ymin))
ur = Point(max(ur.x, xmax), max(ur.y, ymax))

x0 = x1
y0 = y1

if self.annotate:
assert False
# Create outline of the path. Annotations that are close to
# each other get groups. This is determined by overlapping
# paths. In order to fuzz this, we'll double the normal
# width and extend the end caps.
self.setWidthF(self.widthF() * 2)
self.setCapStyle(Qt.SquareCap)
opath = QPainterPathStroker(self).createStroke(path)
# The annotation type is carried all the way through. This
# is the type specified in the PDF spec.
self.layer.annot_paths.append(('Highlight', opath))
self.layer.annot_paths.append(Annotation("Highlight", Rect(ll, ur)))

canvas.drawPath(path, stroke=1, fill=0)
canvas.restoreState()
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Trailing new line.

Loading