Skip to content

Commit 8c9d615

Browse files
committed
Add article summary feature with OpenAI integration
- Introduce 'summary' query parameter in /api/content/v1/parser endpoint - Integrate OpenAI API for generating article summaries - Add OpenAIKey field to Server struct and corresponding command-line flag - Update extractArticleEmulateReadability to handle summary requests - Add generateSummary method using OpenAI's GPT-4o model (turns out to be faster than even 4o mini) - Add OpenAIClient interface and mock for testing - Update README.md with new configuration options and API details This feature allows users to request a summary of extracted articles using OpenAI's GPT-4o model. To ensure secure usage, summary generation requires a valid server token. The changes include comprehensive error handling and test coverage for various scenarios, including token validation and server misconfiguration.
1 parent 5e6de45 commit 8c9d615

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+6997
-3
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
| address | UKEEPER_ADDRESS | all interfaces | web server listening address |
1212
| port | UKEEPER_PORT | `8080` | web server port |
1313
| mongo_uri | MONGO_URI | none | MongoDB connection string, _required_ |
14+
| openai_key | OPENAI_KEY | none | OpenAI API key for summary generation |
1415
| frontend_dir | FRONTEND_DIR | `/srv/web` | directory with frontend files |
1516
| token | TOKEN | none | token for /content/v1/parser endpoint auth |
1617
| mongo-delay | MONGO_DELAY | `0` | mongo initial delay |
@@ -20,7 +21,7 @@
2021

2122
### API
2223

23-
GET /api/content/v1/parser?token=secret&url=http://aa.com/blah - extract content (emulate Readability API parse call)
24+
GET /api/content/v1/parser?token=secret&summary=true&url=http://aa.com/blah - extract content (emulate Readability API parse call), summary is optional and requires OpenAI key and token to be enabled
2425
POST /api/v1/extract {url: http://aa.com/blah} - extract content
2526

2627
## Development

backend/extractor/openai_mock.go

Lines changed: 82 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

backend/extractor/readability.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,17 @@ import (
1414
"github.com/PuerkitoBio/goquery"
1515
log "github.com/go-pkgz/lgr"
1616
"github.com/mauidude/go-readability"
17+
"github.com/sashabaranov/go-openai"
1718
"go.mongodb.org/mongo-driver/bson/primitive"
1819

1920
"github.com/ukeeper/ukeeper-redabilty/backend/datastore"
2021
)
2122

23+
//go:generate moq -out openai_mock.go . OpenAIClient
24+
type OpenAIClient interface {
25+
CreateChatCompletion(ctx context.Context, request openai.ChatCompletionRequest) (openai.ChatCompletionResponse, error)
26+
}
27+
2228
// Rules interface with all methods to access datastore
2329
type Rules interface {
2430
Get(ctx context.Context, rURL string) (datastore.Rule, bool)
@@ -33,10 +39,14 @@ type UReadability struct {
3339
TimeOut time.Duration
3440
SnippetSize int
3541
Rules Rules
42+
OpenAIKey string
43+
44+
openAIClient OpenAIClient
3645
}
3746

3847
// Response from api calls
3948
type Response struct {
49+
Summary string `json:"summary,omitempty"`
4050
Content string `json:"content"`
4151
Rich string `json:"rich_content"`
4252
Domain string `json:"domain"`
@@ -68,6 +78,37 @@ func (f *UReadability) ExtractByRule(ctx context.Context, reqURL string, rule *d
6878
return f.extractWithRules(ctx, reqURL, rule)
6979
}
7080

81+
func (f *UReadability) GenerateSummary(ctx context.Context, content string) (string, error) {
82+
if f.OpenAIKey == "" {
83+
return "", fmt.Errorf("OpenAI key is not set")
84+
}
85+
if f.openAIClient == nil {
86+
f.openAIClient = openai.NewClient(f.OpenAIKey)
87+
}
88+
resp, err := f.openAIClient.CreateChatCompletion(
89+
ctx,
90+
openai.ChatCompletionRequest{
91+
Model: openai.GPT4o,
92+
Messages: []openai.ChatCompletionMessage{
93+
{
94+
Role: openai.ChatMessageRoleSystem,
95+
Content: "You are a helpful assistant that summarizes articles. Please summarize the main points in a few sentences as TLDR style (don't add a TLDR label). Then, list up to five detailed bullet points. Provide the response in plain text. Do not add any additional information. Do not add a Summary at the beginning of the response. If detailed bullet points are too similar to the summary, don't include them at all:",
96+
},
97+
{
98+
Role: openai.ChatMessageRoleUser,
99+
Content: content,
100+
},
101+
},
102+
},
103+
)
104+
105+
if err != nil {
106+
return "", err
107+
}
108+
109+
return resp.Choices[0].Message.Content, nil
110+
}
111+
71112
// ExtractWithRules is the core function that handles extraction with or without a specific rule
72113
func (f *UReadability) extractWithRules(ctx context.Context, reqURL string, rule *datastore.Rule) (*Response, error) {
73114
log.Printf("[INFO] extract %s", reqURL)

backend/extractor/readability_test.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ import (
1111
"testing"
1212
"time"
1313

14+
"github.com/sashabaranov/go-openai"
1415
"github.com/stretchr/testify/assert"
16+
"github.com/stretchr/testify/require"
1517
"go.mongodb.org/mongo-driver/bson/primitive"
1618

1719
"github.com/ukeeper/ukeeper-redabilty/backend/datastore"
@@ -176,3 +178,61 @@ func TestGetContentCustom(t *testing.T) {
176178
assert.Equal(t, 6988, len(content))
177179
assert.Equal(t, 7169, len(rich))
178180
}
181+
182+
func TestUReadability_GenerateSummary(t *testing.T) {
183+
mockOpenAI := &OpenAIClientMock{
184+
CreateChatCompletionFunc: func(ctx context.Context, request openai.ChatCompletionRequest) (openai.ChatCompletionResponse, error) {
185+
return openai.ChatCompletionResponse{
186+
Choices: []openai.ChatCompletionChoice{
187+
{
188+
Message: openai.ChatCompletionMessage{
189+
Content: "This is a summary of the article.",
190+
},
191+
},
192+
},
193+
}, nil
194+
},
195+
}
196+
197+
tests := []struct {
198+
name string
199+
content string
200+
openAIKey string
201+
expectedResult string
202+
expectedError string
203+
}{
204+
{
205+
name: "Valid OpenAI Key and content",
206+
content: "This is a test article content.",
207+
openAIKey: "test-key",
208+
expectedResult: "This is a summary of the article.",
209+
expectedError: "",
210+
},
211+
{
212+
name: "No OpenAI Key",
213+
content: "This is a test article content.",
214+
openAIKey: "",
215+
expectedResult: "",
216+
expectedError: "OpenAI key is not set",
217+
},
218+
}
219+
220+
for _, tt := range tests {
221+
t.Run(tt.name, func(t *testing.T) {
222+
readability := UReadability{
223+
OpenAIKey: tt.openAIKey,
224+
openAIClient: mockOpenAI,
225+
}
226+
227+
result, err := readability.GenerateSummary(context.Background(), tt.content)
228+
229+
if tt.expectedError != "" {
230+
require.Error(t, err)
231+
assert.Contains(t, err.Error(), tt.expectedError)
232+
} else {
233+
require.NoError(t, err)
234+
assert.Equal(t, tt.expectedResult, result)
235+
}
236+
})
237+
}
238+
}

backend/go.mod

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
module github.com/ukeeper/ukeeper-redabilty/backend
22

3-
go 1.22
3+
go 1.23.0
4+
45
toolchain go1.24.1
56

67
require (
@@ -14,6 +15,7 @@ require (
1415
github.com/jessevdk/go-flags v1.6.1
1516
github.com/kennygrant/sanitize v1.2.4
1617
github.com/mauidude/go-readability v0.0.0-20220221173116-a9b3620098b7
18+
github.com/sashabaranov/go-openai v1.38.1
1719
github.com/stretchr/testify v1.10.0
1820
go.mongodb.org/mongo-driver v1.17.3
1921
golang.org/x/net v0.36.0

backend/go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,8 @@ github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6So
183183
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
184184
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
185185
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
186+
github.com/sashabaranov/go-openai v1.38.1 h1:TtZabbFQZa1nEni/IhVtDF/WQjVqDgd+cWR5OeddzF8=
187+
github.com/sashabaranov/go-openai v1.38.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
186188
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
187189
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
188190
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=

backend/main.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ var opts struct {
2626
MongoURI string `short:"m" long:"mongo_uri" env:"MONGO_URI" required:"true" description:"MongoDB connection string"`
2727
MongoDelay time.Duration `long:"mongo-delay" env:"MONGO_DELAY" default:"0" description:"mongo initial delay"`
2828
MongoDB string `long:"mongo-db" env:"MONGO_DB" default:"ureadability" description:"mongo database name"`
29+
OpenAIKey string `long:"openai_key" env:"OPENAI_KEY" description:"OpenAI API key for summary generation"`
2930
Debug bool `long:"dbg" env:"DEBUG" description:"debug mode"`
3031
}
3132

@@ -41,7 +42,7 @@ func main() {
4142
log.Fatalf("[ERROR] can't connect to mongo %v", err)
4243
}
4344
srv := rest.Server{
44-
Readability: extractor.UReadability{TimeOut: 30, SnippetSize: 300, Rules: db.GetStores()},
45+
Readability: extractor.UReadability{TimeOut: 30, SnippetSize: 300, Rules: db.GetStores(), OpenAIKey: opts.OpenAIKey},
4546
Token: opts.Token,
4647
Credentials: opts.Credentials,
4748
Version: revision,

backend/rest/server.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"net/http"
1010
"os"
1111
"path/filepath"
12+
"strconv"
1213
"strings"
1314
"time"
1415

@@ -185,12 +186,28 @@ func (s *Server) extractArticle(w http.ResponseWriter, r *http.Request) {
185186
// if token is not set for application, it won't be checked
186187
func (s *Server) extractArticleEmulateReadability(w http.ResponseWriter, r *http.Request) {
187188
token := r.URL.Query().Get("token")
189+
summary, _ := strconv.ParseBool(r.URL.Query().Get("summary"))
190+
188191
if s.Token != "" && token == "" {
189192
render.Status(r, http.StatusExpectationFailed)
190193
render.JSON(w, r, JSON{"error": "no token passed"})
191194
return
192195
}
193196

197+
// Check if summary is requested but token is not provided, or OpenAI key is not set
198+
if summary {
199+
if s.Readability.OpenAIKey == "" {
200+
render.Status(r, http.StatusBadRequest)
201+
render.JSON(w, r, JSON{"error": "OpenAI key is not set"})
202+
return
203+
}
204+
if s.Token == "" {
205+
render.Status(r, http.StatusBadRequest)
206+
render.JSON(w, r, JSON{"error": "summary generation requires token, but token is not set for the server"})
207+
return
208+
}
209+
}
210+
194211
if s.Token != "" && s.Token != token {
195212
render.Status(r, http.StatusUnauthorized)
196213
render.JSON(w, r, JSON{"error": "wrong token passed"})
@@ -211,6 +228,16 @@ func (s *Server) extractArticleEmulateReadability(w http.ResponseWriter, r *http
211228
return
212229
}
213230

231+
if summary {
232+
summaryText, err := s.Readability.GenerateSummary(r.Context(), res.Content)
233+
if err != nil {
234+
render.Status(r, http.StatusInternalServerError)
235+
render.JSON(w, r, JSON{"error": fmt.Sprintf("failed to generate summary: %v", err)})
236+
return
237+
}
238+
res.Summary = summaryText
239+
}
240+
214241
render.JSON(w, r, &res)
215242
}
216243

@@ -250,6 +277,13 @@ func (s *Server) handlePreview(w http.ResponseWriter, r *http.Request) {
250277
continue
251278
}
252279

280+
if s.Readability.OpenAIKey != "" {
281+
result.Summary, e = s.Readability.GenerateSummary(r.Context(), result.Content)
282+
if e != nil {
283+
log.Printf("[WARN] failed to generate summary for preview of %s: %v", url, e)
284+
}
285+
}
286+
253287
responses = append(responses, *result)
254288
}
255289

@@ -260,6 +294,7 @@ func (s *Server) handlePreview(w http.ResponseWriter, r *http.Request) {
260294
Excerpt string
261295
Rich template.HTML
262296
Content string
297+
Summary template.HTML
263298
}
264299

265300
var results []result
@@ -270,6 +305,8 @@ func (s *Server) handlePreview(w http.ResponseWriter, r *http.Request) {
270305
//nolint: gosec // this content is escaped by Extractor, so it's safe to use it as is
271306
Rich: template.HTML(r.Rich),
272307
Content: r.Content,
308+
//nolint: gosec // we do not expect CSS from OpenAI response
309+
Summary: template.HTML(strings.ReplaceAll(r.Summary, "\n", "<br>")),
273310
})
274311
}
275312

0 commit comments

Comments
 (0)