Skip to content

Commit 3d48955

Browse files
committed
Add article summary feature with OpenAI integration
- Introduce 'summary' query parameter in /api/content/v1/parser endpoint - Integrate OpenAI API for generating article summaries - Add OpenAIKey field to Server struct and corresponding command-line flag - Update extractArticleEmulateReadability to handle summary requests - Add generateSummary method using OpenAI's GPT-4o model (turns out to be faster than even 4o mini) - Add OpenAIClient interface and mock for testing - Update README.md with new configuration options and API details This feature allows users to request a summary of extracted articles using OpenAI's GPT-4o model. To ensure secure usage, summary generation requires a valid server token. The changes include comprehensive error handling and test coverage for various scenarios, including token validation and server misconfiguration.
1 parent 95c8ea6 commit 3d48955

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+6940
-2
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
| address | UKEEPER_ADDRESS | all interfaces | web server listening address |
1212
| port | UKEEPER_PORT | `8080` | web server port |
1313
| mongo_uri | MONGO_URI | none | MongoDB connection string, _required_ |
14+
| openai_key | OPENAI_KEY | none | OpenAI API key for summary generation |
1415
| frontend_dir | FRONTEND_DIR | `/srv/web` | directory with frontend files |
1516
| token | TOKEN | none | token for /content/v1/parser endpoint auth |
1617
| mongo-delay | MONGO_DELAY | `0` | mongo initial delay |
@@ -20,7 +21,7 @@
2021

2122
### API
2223

23-
GET /api/content/v1/parser?token=secret&url=http://aa.com/blah - extract content (emulate Readability API parse call)
24+
GET /api/content/v1/parser?token=secret&summary=true&url=http://aa.com/blah - extract content (emulate Readability API parse call), summary is optional and requires OpenAI key and token to be enabled
2425
POST /api/v1/extract {url: http://aa.com/blah} - extract content
2526

2627
POST /api/v1/rule {"domain": "aa.com", content="#content p"} - add/update custom rule

backend/extractor/readability.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ type UReadability struct {
3737

3838
// Response from api calls
3939
type Response struct {
40+
Summary string `json:"summary,omitempty"`
4041
Content string `json:"content"`
4142
Rich string `json:"rich_content"`
4243
Domain string `json:"domain"`

backend/go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ require (
1515
github.com/jessevdk/go-flags v1.6.1
1616
github.com/kennygrant/sanitize v1.2.4
1717
github.com/mauidude/go-readability v0.0.0-20220221173116-a9b3620098b7
18+
github.com/sashabaranov/go-openai v1.28.1
1819
github.com/stretchr/testify v1.9.0
1920
go.mongodb.org/mongo-driver v1.16.0
2021
golang.org/x/net v0.27.0

backend/go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,8 @@ github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6So
183183
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
184184
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
185185
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
186+
github.com/sashabaranov/go-openai v1.28.1 h1:aREx6faUTeOZNMDTNGAY8B9vNmmN7qoGvDV0Ke2J1Mc=
187+
github.com/sashabaranov/go-openai v1.28.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
186188
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
187189
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
188190
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=

backend/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ var opts struct {
2626
MongoURI string `short:"m" long:"mongo_uri" env:"MONGO_URI" required:"true" description:"MongoDB connection string"`
2727
MongoDelay time.Duration `long:"mongo-delay" env:"MONGO_DELAY" default:"0" description:"mongo initial delay"`
2828
MongoDB string `long:"mongo-db" env:"MONGO_DB" default:"ureadability" description:"mongo database name"`
29+
OpenAIKey string `long:"openai_key" env:"OPENAI_KEY" description:"OpenAI API key for summary generation"`
2930
Debug bool `long:"dbg" env:"DEBUG" description:"debug mode"`
3031
}
3132

@@ -45,6 +46,7 @@ func main() {
4546
Token: opts.Token,
4647
Credentials: opts.Credentials,
4748
Version: revision,
49+
OpenAIKey: opts.OpenAIKey,
4850
}
4951

5052
ctx, cancel := context.WithCancel(context.Background())

backend/rest/openai_mock.go

Lines changed: 82 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

backend/rest/server.go

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"context"
66
"fmt"
77
"net/http"
8+
"strconv"
89
"time"
910

1011
"github.com/didip/tollbooth/v7"
@@ -15,18 +16,27 @@ import (
1516
log "github.com/go-pkgz/lgr"
1617
UM "github.com/go-pkgz/rest"
1718
"github.com/go-pkgz/rest/logger"
19+
"github.com/sashabaranov/go-openai"
1820
"go.mongodb.org/mongo-driver/bson/primitive"
1921

2022
"github.com/ukeeper/ukeeper-redabilty/backend/datastore"
2123
"github.com/ukeeper/ukeeper-redabilty/backend/extractor"
2224
)
2325

26+
//go:generate moq -out openai_mock.go . OpenAIClient
27+
type OpenAIClient interface {
28+
CreateChatCompletion(ctx context.Context, request openai.ChatCompletionRequest) (openai.ChatCompletionResponse, error)
29+
}
30+
2431
// Server is a basic rest server providing access to store and invoking parser
2532
type Server struct {
2633
Readability extractor.UReadability
2734
Version string
2835
Token string
2936
Credentials map[string]string
37+
OpenAIKey string
38+
39+
openAIClient OpenAIClient
3040
}
3141

3242
// JSON is a map alias, just for convenience
@@ -115,16 +125,32 @@ func (s *Server) extractArticle(w http.ResponseWriter, r *http.Request) {
115125
render.JSON(w, r, &res)
116126
}
117127

118-
// emulate readability API parse - https://www.readability.com/api/content/v1/parser?token=%s&url=%s
128+
// extractArticleEmulateReadability emulates readability API parse - https://www.readability.com/api/content/v1/parser?token=%s&url=%s
119129
// if token is not set for application, it won't be checked
120130
func (s *Server) extractArticleEmulateReadability(w http.ResponseWriter, r *http.Request) {
121131
token := r.URL.Query().Get("token")
132+
summary, _ := strconv.ParseBool(r.URL.Query().Get("summary"))
133+
122134
if s.Token != "" && token == "" {
123135
render.Status(r, http.StatusExpectationFailed)
124136
render.JSON(w, r, JSON{"error": "no token passed"})
125137
return
126138
}
127139

140+
// Check if summary is requested but token is not provided, or OpenAI key is not set
141+
if summary {
142+
if s.OpenAIKey == "" {
143+
render.Status(r, http.StatusBadRequest)
144+
render.JSON(w, r, JSON{"error": "OpenAI key is not set"})
145+
return
146+
}
147+
if s.Token == "" {
148+
render.Status(r, http.StatusBadRequest)
149+
render.JSON(w, r, JSON{"error": "summary generation requires token, but token is not set for the server"})
150+
return
151+
}
152+
}
153+
128154
if s.Token != "" && s.Token != token {
129155
render.Status(r, http.StatusUnauthorized)
130156
render.JSON(w, r, JSON{"error": "wrong token passed"})
@@ -145,6 +171,16 @@ func (s *Server) extractArticleEmulateReadability(w http.ResponseWriter, r *http
145171
return
146172
}
147173

174+
if summary {
175+
summaryText, err := s.generateSummary(r.Context(), res.Content)
176+
if err != nil {
177+
render.Status(r, http.StatusInternalServerError)
178+
render.JSON(w, r, JSON{"error": fmt.Sprintf("failed to generate summary: %v", err)})
179+
return
180+
}
181+
res.Summary = summaryText
182+
}
183+
148184
render.JSON(w, r, &res)
149185
}
150186

@@ -224,6 +260,34 @@ func (s *Server) authFake(w http.ResponseWriter, r *http.Request) {
224260
render.JSON(w, r, JSON{"pong": t.Format("20060102150405")})
225261
}
226262

263+
func (s *Server) generateSummary(ctx context.Context, content string) (string, error) {
264+
if s.openAIClient == nil {
265+
s.openAIClient = openai.NewClient(s.OpenAIKey)
266+
}
267+
resp, err := s.openAIClient.CreateChatCompletion(
268+
ctx,
269+
openai.ChatCompletionRequest{
270+
Model: openai.GPT4o,
271+
Messages: []openai.ChatCompletionMessage{
272+
{
273+
Role: openai.ChatMessageRoleSystem,
274+
Content: "You are a helpful assistant that summarizes articles. Please summarize the main points in a few sentences as TLDR style (don't add a TLDR label). Then, list up to five detailed bullet points. Provide the response in plain text. Do not add any additional information. Do not add a Summary at the beginning of the response. If detailed bullet points are too similar to the summary, don't include them at all:",
275+
},
276+
{
277+
Role: openai.ChatMessageRoleUser,
278+
Content: content,
279+
},
280+
},
281+
},
282+
)
283+
284+
if err != nil {
285+
return "", err
286+
}
287+
288+
return resp.Choices[0].Message.Content, nil
289+
}
290+
227291
func getBid(id string) primitive.ObjectID {
228292
bid, err := primitive.ObjectIDFromHex(id)
229293
if err != nil {

0 commit comments

Comments
 (0)