Skip to content

Commit e26bdfc

Browse files
paskalclaude
andcommitted
Add article summary feature with OpenAI integration
Implement MongoDB cache for summaries to reduce API costs and improve performance. Rename parameters from OpenAI-specific to more generic API names. Support configurable model selection through ModelType enum or direct model names. Add comprehensive tests with mocks for summary generation and caching. Update documentation in README with summary feature details. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 8137796 commit e26bdfc

File tree

13 files changed

+656
-42
lines changed

13 files changed

+656
-42
lines changed

README.md

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
|--------------|-----------------|----------------|-------------------------------------------------------|
1111
| address | UKEEPER_ADDRESS | all interfaces | web server listening address |
1212
| port | UKEEPER_PORT | `8080` | web server port |
13-
| mongo_uri | MONGO_URI | none | MongoDB connection string, _required_ |
14-
| openai_key | OPENAI_KEY | none | OpenAI API key for summary generation |
15-
| frontend_dir | FRONTEND_DIR | `/srv/web` | directory with frontend files |
13+
| mongo-uri | MONGO_URI | none | MongoDB connection string, _required_ |
14+
| api-key | API_KEY | none | OpenAI API key for summary generation |
15+
| model-type | MODEL_TYPE | `gpt-4o-mini` | OpenAI model name for summary generation (e.g., gpt-4o, gpt-4o-mini) |
16+
| frontend-dir | FRONTEND_DIR | `/srv/web` | directory with frontend files |
1617
| token | TOKEN | none | token for /content/v1/parser endpoint auth |
1718
| mongo-delay | MONGO_DELAY | `0` | mongo initial delay |
1819
| mongo-db | MONGO_DB | `ureadability` | mongo database name |
@@ -24,6 +25,30 @@
2425
GET /api/content/v1/parser?token=secret&summary=true&url=http://aa.com/blah - extract content (emulate Readability API parse call), summary is optional and requires OpenAI key and token to be enabled
2526
POST /api/v1/extract {url: http://aa.com/blah} - extract content
2627

28+
### Article Summary Feature
29+
30+
The application can generate concise summaries of article content using OpenAI's GPT models:
31+
32+
1. **Configuration**:
33+
- Set `api-key` to your OpenAI API key
34+
- Optionally set `model-type` to specify which model to use (e.g., `gpt-4o`, `gpt-4o-mini`)
35+
- Default is `gpt-4o-mini` if not specified
36+
- A server token must be configured for security reasons
37+
38+
2. **Usage**:
39+
- Add `summary=true` parameter to the `/api/content/v1/parser` endpoint
40+
- Example: `/api/content/v1/parser?token=secret&summary=true&url=http://example.com/article`
41+
42+
3. **Features**:
43+
- Summaries are cached in MongoDB to reduce API costs and improve performance
44+
- The cache stores:
45+
- Content hash (to identify articles)
46+
- Summary text
47+
- Model used for generation
48+
- Creation and update timestamps
49+
- If the same content is requested again, the cached summary is returned
50+
- The preview page automatically shows summaries when available
51+
2752
## Development
2853

2954
### Running tests

backend/datastore/mongo.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,29 @@ func New(connectionURI, dbName string, delay time.Duration) (*MongoServer, error
3737
return &MongoServer{client: client, dbName: dbName}, nil
3838
}
3939

40+
// Stores contains all DAO instances
41+
type Stores struct {
42+
Rules RulesDAO
43+
Summaries SummariesDAO
44+
}
45+
4046
// GetStores initialize collections and make indexes
41-
func (m *MongoServer) GetStores() (rules RulesDAO) {
47+
func (m *MongoServer) GetStores() Stores {
4248
rIndexes := []mongo.IndexModel{
4349
{Keys: bson.D{{Key: "enabled", Value: 1}, {Key: "domain", Value: 1}}},
4450
{Keys: bson.D{{Key: "user", Value: 1}, {Key: "domain", Value: 1}, {Key: "enabled", Value: 1}}},
4551
{Keys: bson.D{{Key: "domain", Value: 1}, {Key: "match_urls", Value: 1}}},
4652
}
47-
rules = RulesDAO{Collection: m.collection("rules", rIndexes)}
48-
return rules
53+
54+
sIndexes := []mongo.IndexModel{
55+
{Keys: bson.D{{Key: "created_at", Value: 1}}},
56+
{Keys: bson.D{{Key: "model", Value: 1}}},
57+
}
58+
59+
return Stores{
60+
Rules: RulesDAO{Collection: m.collection("rules", rIndexes)},
61+
Summaries: SummariesDAO{Collection: m.collection("summaries", sIndexes)},
62+
}
4963
}
5064

5165
// collection makes collection with indexes

backend/datastore/rules_test.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ func TestRules(t *testing.T) {
1919
server, err := New("mongodb://localhost:27017/", "test_ureadability", 0)
2020
require.NoError(t, err)
2121
assert.NotNil(t, server.client)
22-
rules := server.GetStores()
22+
stores := server.GetStores()
23+
assert.NotNil(t, stores)
24+
rules := stores.Rules
2325
assert.NotNil(t, rules)
2426
rule := Rule{
2527
Domain: randStringBytesRmndr(42) + ".com",
@@ -74,7 +76,9 @@ func TestRulesCanceledContext(t *testing.T) {
7476
server, err := New("mongodb://wrong", "", 0)
7577
require.NoError(t, err)
7678
assert.NotNil(t, server.client)
77-
rules := server.GetStores()
79+
stores := server.GetStores()
80+
assert.NotNil(t, stores)
81+
rules := stores.Rules
7882
assert.NotNil(t, rules)
7983

8084
ctx, cancel := context.WithCancel(context.Background())

backend/datastore/summaries.go

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// Package datastore provides mongo implementation for store to keep and access summaries
2+
package datastore
3+
4+
import (
5+
"context"
6+
"crypto/sha256"
7+
"encoding/hex"
8+
"fmt"
9+
"time"
10+
11+
log "github.com/go-pkgz/lgr"
12+
"go.mongodb.org/mongo-driver/bson"
13+
"go.mongodb.org/mongo-driver/mongo"
14+
"go.mongodb.org/mongo-driver/mongo/options"
15+
)
16+
17+
// Summary contains information about a cached summary
18+
type Summary struct {
19+
ID string `bson:"_id"` // SHA256 hash of the content
20+
Content string `bson:"content"` // original content that was summarized (could be truncated for storage efficiency)
21+
Summary string `bson:"summary"` // generated summary
22+
Model string `bson:"model"` // openAI model used for summarization
23+
CreatedAt time.Time `bson:"created_at"`
24+
UpdatedAt time.Time `bson:"updated_at"`
25+
}
26+
27+
// SummariesDAO handles database operations for article summaries
28+
type SummariesDAO struct {
29+
Collection *mongo.Collection
30+
}
31+
32+
// Get returns summary by content hash
33+
func (s SummariesDAO) Get(ctx context.Context, content string) (Summary, bool) {
34+
contentHash := generateContentHash(content)
35+
res := s.Collection.FindOne(ctx, bson.M{"_id": contentHash})
36+
if res.Err() != nil {
37+
if res.Err() == mongo.ErrNoDocuments {
38+
return Summary{}, false
39+
}
40+
log.Printf("[WARN] can't get summary for hash %s: %v", contentHash, res.Err())
41+
return Summary{}, false
42+
}
43+
44+
summary := Summary{}
45+
if err := res.Decode(&summary); err != nil {
46+
log.Printf("[WARN] can't decode summary document for hash %s: %v", contentHash, err)
47+
return Summary{}, false
48+
}
49+
50+
return summary, true
51+
}
52+
53+
// Save creates or updates summary in the database
54+
func (s SummariesDAO) Save(ctx context.Context, summary Summary) error {
55+
if summary.ID == "" {
56+
summary.ID = generateContentHash(summary.Content)
57+
}
58+
59+
if summary.CreatedAt.IsZero() {
60+
summary.CreatedAt = time.Now()
61+
}
62+
summary.UpdatedAt = time.Now()
63+
64+
opts := options.Update().SetUpsert(true)
65+
_, err := s.Collection.UpdateOne(
66+
ctx,
67+
bson.M{"_id": summary.ID},
68+
bson.M{"$set": summary},
69+
opts,
70+
)
71+
if err != nil {
72+
return fmt.Errorf("failed to save summary: %w", err)
73+
}
74+
return nil
75+
}
76+
77+
// Delete removes summary from the database
78+
func (s SummariesDAO) Delete(ctx context.Context, contentHash string) error {
79+
_, err := s.Collection.DeleteOne(ctx, bson.M{"_id": contentHash})
80+
if err != nil {
81+
return fmt.Errorf("failed to delete summary: %w", err)
82+
}
83+
return nil
84+
}
85+
86+
// generateContentHash creates a hash for the content to use as an ID
87+
func generateContentHash(content string) string {
88+
hash := sha256.Sum256([]byte(content))
89+
return hex.EncodeToString(hash[:])
90+
}

backend/datastore/summaries_test.go

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
package datastore
2+
3+
import (
4+
"context"
5+
"os"
6+
"testing"
7+
"time"
8+
9+
"github.com/stretchr/testify/assert"
10+
"github.com/stretchr/testify/require"
11+
)
12+
13+
func TestSummariesDAO_SaveAndGet(t *testing.T) {
14+
if _, ok := os.LookupEnv("ENABLE_MONGO_TESTS"); !ok {
15+
t.Skip("ENABLE_MONGO_TESTS env variable is not set")
16+
}
17+
18+
mdb, err := New("mongodb://localhost:27017", "test_ureadability", 0)
19+
require.NoError(t, err)
20+
21+
// create a unique collection for this test to avoid conflicts
22+
collection := mdb.client.Database(mdb.dbName).Collection("summaries_test")
23+
defer func() {
24+
_ = collection.Drop(context.Background())
25+
}()
26+
27+
dao := SummariesDAO{Collection: collection}
28+
29+
content := "This is a test article content. It should generate a unique hash."
30+
summary := Summary{
31+
Content: content,
32+
Summary: "This is a test summary of the article.",
33+
Model: "gpt-4o-mini",
34+
CreatedAt: time.Now(),
35+
}
36+
37+
// test saving a summary
38+
err = dao.Save(context.Background(), summary)
39+
require.NoError(t, err)
40+
41+
// test getting the summary
42+
foundSummary, found := dao.Get(context.Background(), content)
43+
assert.True(t, found)
44+
assert.Equal(t, summary.Summary, foundSummary.Summary)
45+
assert.Equal(t, summary.Model, foundSummary.Model)
46+
assert.NotEmpty(t, foundSummary.ID)
47+
48+
// test getting a non-existent summary
49+
_, found = dao.Get(context.Background(), "non-existent content")
50+
assert.False(t, found)
51+
52+
// test updating an existing summary
53+
updatedSummary := Summary{
54+
ID: foundSummary.ID,
55+
Content: content,
56+
Summary: "This is an updated summary.",
57+
Model: "gpt-4o-mini",
58+
CreatedAt: foundSummary.CreatedAt,
59+
}
60+
61+
err = dao.Save(context.Background(), updatedSummary)
62+
require.NoError(t, err)
63+
64+
foundSummary, found = dao.Get(context.Background(), content)
65+
assert.True(t, found)
66+
assert.Equal(t, "This is an updated summary.", foundSummary.Summary)
67+
assert.Equal(t, updatedSummary.CreatedAt, foundSummary.CreatedAt)
68+
assert.NotEqual(t, updatedSummary.UpdatedAt, foundSummary.UpdatedAt) // UpdatedAt should be set by the DAO
69+
70+
// test deleting a summary
71+
err = dao.Delete(context.Background(), foundSummary.ID)
72+
require.NoError(t, err)
73+
74+
_, found = dao.Get(context.Background(), content)
75+
assert.False(t, found)
76+
}
77+
78+
func TestGenerateContentHash(t *testing.T) {
79+
content1 := "This is a test content."
80+
content2 := "This is a different test content."
81+
82+
hash1 := generateContentHash(content1)
83+
hash2 := generateContentHash(content2)
84+
85+
assert.NotEqual(t, hash1, hash2)
86+
assert.Equal(t, hash1, generateContentHash(content1)) // same content should produce same hash
87+
assert.Equal(t, 64, len(hash1)) // SHA-256 produces 64 character hex string
88+
}

backend/extractor/readability.go

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package extractor
33

44
import (
55
"context"
6+
"errors"
67
"fmt"
78
"io"
89
"net/http"
@@ -21,6 +22,8 @@ import (
2122
)
2223

2324
//go:generate moq -out openai_mock.go . OpenAIClient
25+
26+
// OpenAIClient defines interface for OpenAI API client
2427
type OpenAIClient interface {
2528
CreateChatCompletion(ctx context.Context, request openai.ChatCompletionRequest) (openai.ChatCompletionResponse, error)
2629
}
@@ -34,14 +37,30 @@ type Rules interface {
3437
All(ctx context.Context) []datastore.Rule
3538
}
3639

40+
// Summaries interface with all methods to access summary cache
41+
//
42+
//go:generate moq -out summaries_mock.go . Summaries
43+
type Summaries interface {
44+
Get(ctx context.Context, content string) (datastore.Summary, bool)
45+
Save(ctx context.Context, summary datastore.Summary) error
46+
Delete(ctx context.Context, contentHash string) error
47+
}
48+
3749
// UReadability implements fetcher & extractor for local readability-like functionality
3850
type UReadability struct {
3951
TimeOut time.Duration
4052
SnippetSize int
4153
Rules Rules
54+
Summaries Summaries
4255
OpenAIKey string
56+
ModelType string
57+
58+
apiClient OpenAIClient
59+
}
4360

44-
openAIClient OpenAIClient
61+
// SetAPIClient sets the API client for testing purposes
62+
func (f *UReadability) SetAPIClient(client OpenAIClient) {
63+
f.apiClient = client
4564
}
4665

4766
// Response from api calls
@@ -78,17 +97,31 @@ func (f *UReadability) ExtractByRule(ctx context.Context, reqURL string, rule *d
7897
return f.extractWithRules(ctx, reqURL, rule)
7998
}
8099

100+
// GenerateSummary creates a summary of the content using OpenAI
81101
func (f *UReadability) GenerateSummary(ctx context.Context, content string) (string, error) {
82102
if f.OpenAIKey == "" {
83-
return "", fmt.Errorf("OpenAI key is not set")
103+
return "", errors.New("API key for summarization is not set")
104+
}
105+
106+
if f.Summaries != nil {
107+
if cachedSummary, found := f.Summaries.Get(ctx, content); found {
108+
log.Printf("[DEBUG] using cached summary for content")
109+
return cachedSummary.Summary, nil
110+
}
111+
}
112+
113+
if f.apiClient == nil {
114+
f.apiClient = openai.NewClient(f.OpenAIKey)
84115
}
85-
if f.openAIClient == nil {
86-
f.openAIClient = openai.NewClient(f.OpenAIKey)
116+
117+
model := openai.GPT4oMini
118+
if f.ModelType != "" {
119+
model = f.ModelType
87120
}
88-
resp, err := f.openAIClient.CreateChatCompletion(
121+
resp, err := f.apiClient.CreateChatCompletion(
89122
ctx,
90123
openai.ChatCompletionRequest{
91-
Model: openai.GPT4o,
124+
Model: model,
92125
Messages: []openai.ChatCompletionMessage{
93126
{
94127
Role: openai.ChatMessageRoleSystem,
@@ -103,10 +136,28 @@ func (f *UReadability) GenerateSummary(ctx context.Context, content string) (str
103136
)
104137

105138
if err != nil {
106-
return "", err
139+
log.Printf("[WARN] AI summarization failed: %v", err)
140+
return "", fmt.Errorf("failed to generate summary: %w", err)
141+
}
142+
143+
summary := resp.Choices[0].Message.Content
144+
145+
if f.Summaries != nil {
146+
err = f.Summaries.Save(ctx, datastore.Summary{
147+
Content: content,
148+
Summary: summary,
149+
Model: model,
150+
CreatedAt: time.Now(),
151+
})
152+
153+
if err != nil {
154+
log.Printf("[WARN] failed to cache summary: %v", err)
155+
} else {
156+
log.Printf("[DEBUG] summary cached successfully")
157+
}
107158
}
108159

109-
return resp.Choices[0].Message.Content, nil
160+
return summary, nil
110161
}
111162

112163
// ExtractWithRules is the core function that handles extraction with or without a specific rule

0 commit comments

Comments
 (0)