Refactor UReadability methods to use pointer receiver

paskal · paskal · commit 5e6de45b0c28 · 2025-03-22T22:07:34.000+01:00
Needed for OpenAI support later on
diff --git a/backend/extractor/pics.go b/backend/extractor/pics.go
@@ -11,7 +11,7 @@ import (
 	log "github.com/go-pkgz/lgr"
 )
 
-func (f UReadability) extractPics(iselect *goquery.Selection, url string) (mainImage string, allImages []string, ok bool) {
+func (f *UReadability) extractPics(iselect *goquery.Selection, url string) (mainImage string, allImages []string, ok bool) {
 	images := make(map[int]string)
 
 	type imgInfo struct {
@@ -58,7 +58,7 @@ func (f UReadability) extractPics(iselect *goquery.Selection, url string) (mainI
 }
 
 // getImageSize loads image to get size
-func (f UReadability) getImageSize(url string) (size int) {
+func (f *UReadability) getImageSize(url string) (size int) {
 	httpClient := &http.Client{Timeout: time.Second * 30}
 	req, err := http.NewRequest("GET", url, nil)
 	if err != nil {
diff --git a/backend/extractor/readability.go b/backend/extractor/readability.go
@@ -59,17 +59,17 @@ var (
 const userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15"
 
 // Extract fetches page and retrieves article
-func (f UReadability) Extract(ctx context.Context, reqURL string) (*Response, error) {
+func (f *UReadability) Extract(ctx context.Context, reqURL string) (*Response, error) {
 	return f.extractWithRules(ctx, reqURL, nil)
 }
 
 // ExtractByRule fetches page and retrieves article using a specific rule
-func (f UReadability) ExtractByRule(ctx context.Context, reqURL string, rule *datastore.Rule) (*Response, error) {
+func (f *UReadability) ExtractByRule(ctx context.Context, reqURL string, rule *datastore.Rule) (*Response, error) {
 	return f.extractWithRules(ctx, reqURL, rule)
 }
 
 // ExtractWithRules is the core function that handles extraction with or without a specific rule
-func (f UReadability) extractWithRules(ctx context.Context, reqURL string, rule *datastore.Rule) (*Response, error) {
+func (f *UReadability) extractWithRules(ctx context.Context, reqURL string, rule *datastore.Rule) (*Response, error) {
 	log.Printf("[INFO] extract %s", reqURL)
 	rb := &Response{}
 
@@ -140,7 +140,7 @@ func (f UReadability) extractWithRules(ctx context.Context, reqURL string, rule
 // getContent retrieves content from raw body string, both content (text only) and rich (with html tags)
 // if rule is provided, it uses custom rule, otherwise tries to retrieve one from the storage,
 // and at last tries to use general readability parser
-func (f UReadability) getContent(ctx context.Context, body, reqURL string, rule *datastore.Rule) (content, rich string, err error) {
+func (f *UReadability) getContent(ctx context.Context, body, reqURL string, rule *datastore.Rule) (content, rich string, err error) {
 	// general parser
 	genParser := func(body, _ string) (content, rich string, err error) {
 		doc, err := readability.NewDocument(body)
@@ -192,7 +192,7 @@ func (f UReadability) getContent(ctx context.Context, body, reqURL string, rule
 }
 
 // makes all links absolute and returns all found links
-func (f UReadability) normalizeLinks(data string, reqContext *http.Request) (result string, links []string) {
+func (f *UReadability) normalizeLinks(data string, reqContext *http.Request) (result string, links []string) {
 	absoluteLink := func(link string) (absLink string, changed bool) {
 		if r, err := reqContext.URL.Parse(link); err == nil {
 			return r.String(), r.String() != link
diff --git a/backend/extractor/text.go b/backend/extractor/text.go
@@ -12,7 +12,7 @@ import (
 )
 
 // get clean text from html content
-func (f UReadability) getText(content, title string) string {
+func (f *UReadability) getText(content, title string) string {
 	cleanText := sanitize.HTML(content)
 	cleanText = strings.Replace(cleanText, title, "", 1) // get rid of title in snippet
 	cleanText = strings.ReplaceAll(cleanText, "\t", " ")
@@ -32,7 +32,7 @@ func (f UReadability) getText(content, title string) string {
 }
 
 // get snippet from clean text content
-func (f UReadability) getSnippet(cleanText string) string {
+func (f *UReadability) getSnippet(cleanText string) string {
 	cleanText = strings.ReplaceAll(cleanText, "\n", " ")
 	size := len([]rune(cleanText))
 	if size > f.SnippetSize {
@@ -50,7 +50,7 @@ func (f UReadability) getSnippet(cleanText string) string {
 }
 
 // detect encoding, content type and convert content to utf8
-func (f UReadability) toUtf8(content []byte, header http.Header) (contentType, origEncoding, result string) {
+func (f *UReadability) toUtf8(content []byte, header http.Header) (contentType, origEncoding, result string) {
 	getContentTypeAndEncoding := func(str string) (contentType, encoding string) { // from "text/html; charset=windows-1251"
 		elems := strings.Split(str, ";")
 		contentType = strings.TrimSpace(elems[0])

Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,7 @@ import (`
`11`	`11`	`log "github.com/go-pkgz/lgr"`
`12`	`12`	`)`
`13`	`13`
`14`		`-func (f UReadability) extractPics(iselect *goquery.Selection, url string) (mainImage string, allImages []string, ok bool) {`
	`14`	`+func (f UReadability) extractPics(iselect goquery.Selection, url string) (mainImage string, allImages []string, ok bool) {`
`15`	`15`	`images := make(map[int]string)`
`16`	`16`
`17`	`17`	`type imgInfo struct {`
`@@ -58,7 +58,7 @@ func (f UReadability) extractPics(iselect *goquery.Selection, url string) (mainI`
`58`	`58`	`}`
`59`	`59`
`60`	`60`	`// getImageSize loads image to get size`
`61`		`-func (f UReadability) getImageSize(url string) (size int) {`
	`61`	`+func (f *UReadability) getImageSize(url string) (size int) {`
`62`	`62`	`httpClient := &http.Client{Timeout: time.Second * 30}`
`63`	`63`	`req, err := http.NewRequest("GET", url, nil)`
`64`	`64`	`if err != nil {`