Skip to content

Commit 5e6de45

Browse files
committed
Refactor UReadability methods to use pointer receiver
Needed for OpenAI support later on
1 parent 287949e commit 5e6de45

File tree

3 files changed

+10
-10
lines changed

3 files changed

+10
-10
lines changed

backend/extractor/pics.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import (
1111
log "github.com/go-pkgz/lgr"
1212
)
1313

14-
func (f UReadability) extractPics(iselect *goquery.Selection, url string) (mainImage string, allImages []string, ok bool) {
14+
func (f *UReadability) extractPics(iselect *goquery.Selection, url string) (mainImage string, allImages []string, ok bool) {
1515
images := make(map[int]string)
1616

1717
type imgInfo struct {
@@ -58,7 +58,7 @@ func (f UReadability) extractPics(iselect *goquery.Selection, url string) (mainI
5858
}
5959

6060
// getImageSize loads image to get size
61-
func (f UReadability) getImageSize(url string) (size int) {
61+
func (f *UReadability) getImageSize(url string) (size int) {
6262
httpClient := &http.Client{Timeout: time.Second * 30}
6363
req, err := http.NewRequest("GET", url, nil)
6464
if err != nil {

backend/extractor/readability.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,17 +59,17 @@ var (
5959
const userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15"
6060

6161
// Extract fetches page and retrieves article
62-
func (f UReadability) Extract(ctx context.Context, reqURL string) (*Response, error) {
62+
func (f *UReadability) Extract(ctx context.Context, reqURL string) (*Response, error) {
6363
return f.extractWithRules(ctx, reqURL, nil)
6464
}
6565

6666
// ExtractByRule fetches page and retrieves article using a specific rule
67-
func (f UReadability) ExtractByRule(ctx context.Context, reqURL string, rule *datastore.Rule) (*Response, error) {
67+
func (f *UReadability) ExtractByRule(ctx context.Context, reqURL string, rule *datastore.Rule) (*Response, error) {
6868
return f.extractWithRules(ctx, reqURL, rule)
6969
}
7070

7171
// ExtractWithRules is the core function that handles extraction with or without a specific rule
72-
func (f UReadability) extractWithRules(ctx context.Context, reqURL string, rule *datastore.Rule) (*Response, error) {
72+
func (f *UReadability) extractWithRules(ctx context.Context, reqURL string, rule *datastore.Rule) (*Response, error) {
7373
log.Printf("[INFO] extract %s", reqURL)
7474
rb := &Response{}
7575

@@ -140,7 +140,7 @@ func (f UReadability) extractWithRules(ctx context.Context, reqURL string, rule
140140
// getContent retrieves content from raw body string, both content (text only) and rich (with html tags)
141141
// if rule is provided, it uses custom rule, otherwise tries to retrieve one from the storage,
142142
// and at last tries to use general readability parser
143-
func (f UReadability) getContent(ctx context.Context, body, reqURL string, rule *datastore.Rule) (content, rich string, err error) {
143+
func (f *UReadability) getContent(ctx context.Context, body, reqURL string, rule *datastore.Rule) (content, rich string, err error) {
144144
// general parser
145145
genParser := func(body, _ string) (content, rich string, err error) {
146146
doc, err := readability.NewDocument(body)
@@ -192,7 +192,7 @@ func (f UReadability) getContent(ctx context.Context, body, reqURL string, rule
192192
}
193193

194194
// makes all links absolute and returns all found links
195-
func (f UReadability) normalizeLinks(data string, reqContext *http.Request) (result string, links []string) {
195+
func (f *UReadability) normalizeLinks(data string, reqContext *http.Request) (result string, links []string) {
196196
absoluteLink := func(link string) (absLink string, changed bool) {
197197
if r, err := reqContext.URL.Parse(link); err == nil {
198198
return r.String(), r.String() != link

backend/extractor/text.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import (
1212
)
1313

1414
// get clean text from html content
15-
func (f UReadability) getText(content, title string) string {
15+
func (f *UReadability) getText(content, title string) string {
1616
cleanText := sanitize.HTML(content)
1717
cleanText = strings.Replace(cleanText, title, "", 1) // get rid of title in snippet
1818
cleanText = strings.ReplaceAll(cleanText, "\t", " ")
@@ -32,7 +32,7 @@ func (f UReadability) getText(content, title string) string {
3232
}
3333

3434
// get snippet from clean text content
35-
func (f UReadability) getSnippet(cleanText string) string {
35+
func (f *UReadability) getSnippet(cleanText string) string {
3636
cleanText = strings.ReplaceAll(cleanText, "\n", " ")
3737
size := len([]rune(cleanText))
3838
if size > f.SnippetSize {
@@ -50,7 +50,7 @@ func (f UReadability) getSnippet(cleanText string) string {
5050
}
5151

5252
// detect encoding, content type and convert content to utf8
53-
func (f UReadability) toUtf8(content []byte, header http.Header) (contentType, origEncoding, result string) {
53+
func (f *UReadability) toUtf8(content []byte, header http.Header) (contentType, origEncoding, result string) {
5454
getContentTypeAndEncoding := func(str string) (contentType, encoding string) { // from "text/html; charset=windows-1251"
5555
elems := strings.Split(str, ";")
5656
contentType = strings.TrimSpace(elems[0])

0 commit comments

Comments
 (0)