Skip to content

Commit 04b9ae4

Browse files
paskalclaude
andcommitted
Add article summary feature with OpenAI integration
Implement MongoDB cache for summaries to reduce API costs and improve performance. Rename parameters from OpenAI-specific to more generic API names. Support configurable model selection through ModelType enum or direct model names. Add comprehensive tests with mocks for summary generation and caching. Update documentation in README with summary feature details. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 8727a3c commit 04b9ae4

File tree

11 files changed

+651
-32
lines changed

11 files changed

+651
-32
lines changed

README.md

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
|--------------|-----------------|----------------|-------------------------------------------------------|
1111
| address | UKEEPER_ADDRESS | all interfaces | web server listening address |
1212
| port | UKEEPER_PORT | `8080` | web server port |
13-
| mongo_uri | MONGO_URI | none | MongoDB connection string, _required_ |
14-
| openai_key | OPENAI_KEY | none | OpenAI API key for summary generation |
15-
| frontend_dir | FRONTEND_DIR | `/srv/web` | directory with frontend files |
13+
| mongo-uri | MONGO_URI | none | MongoDB connection string, _required_ |
14+
| api-key | API_KEY | none | OpenAI API key for summary generation |
15+
| model-type | MODEL_TYPE | `gpt-4o-mini` | OpenAI model name for summary generation (e.g., gpt-4o, gpt-4o-mini) |
16+
| frontend-dir | FRONTEND_DIR | `/srv/web` | directory with frontend files |
1617
| token | TOKEN | none | token for /content/v1/parser endpoint auth |
1718
| mongo-delay | MONGO_DELAY | `0` | mongo initial delay |
1819
| mongo-db | MONGO_DB | `ureadability` | mongo database name |
@@ -24,6 +25,30 @@
2425
GET /api/content/v1/parser?token=secret&summary=true&url=http://aa.com/blah - extract content (emulate Readability API parse call), summary is optional and requires OpenAI key and token to be enabled
2526
POST /api/v1/extract {url: http://aa.com/blah} - extract content
2627

28+
### Article Summary Feature
29+
30+
The application can generate concise summaries of article content using OpenAI's GPT models:
31+
32+
1. **Configuration**:
33+
- Set `api-key` to your OpenAI API key
34+
- Optionally set `model-type` to specify which model to use (e.g., `gpt-4o`, `gpt-4o-mini`)
35+
- Default is `gpt-4o-mini` if not specified
36+
- A server token must be configured for security reasons
37+
38+
2. **Usage**:
39+
- Add `summary=true` parameter to the `/api/content/v1/parser` endpoint
40+
- Example: `/api/content/v1/parser?token=secret&summary=true&url=http://example.com/article`
41+
42+
3. **Features**:
43+
- Summaries are cached in MongoDB to reduce API costs and improve performance
44+
- The cache stores:
45+
- Content hash (to identify articles)
46+
- Summary text
47+
- Model used for generation
48+
- Creation and update timestamps
49+
- If the same content is requested again, the cached summary is returned
50+
- The preview page automatically shows summaries when available
51+
2752
## Development
2853

2954
### Running tests

backend/datastore/mongo.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,29 @@ func New(connectionURI, dbName string, delay time.Duration) (*MongoServer, error
3737
return &MongoServer{client: client, dbName: dbName}, nil
3838
}
3939

40+
// Stores contains all DAO instances
41+
type Stores struct {
42+
Rules RulesDAO
43+
Summaries SummariesDAO
44+
}
45+
4046
// GetStores initialize collections and make indexes
41-
func (m *MongoServer) GetStores() (rules RulesDAO) {
47+
func (m *MongoServer) GetStores() Stores {
4248
rIndexes := []mongo.IndexModel{
4349
{Keys: bson.D{{Key: "enabled", Value: 1}, {Key: "domain", Value: 1}}},
4450
{Keys: bson.D{{Key: "user", Value: 1}, {Key: "domain", Value: 1}, {Key: "enabled", Value: 1}}},
4551
{Keys: bson.D{{Key: "domain", Value: 1}, {Key: "match_urls", Value: 1}}},
4652
}
47-
rules = RulesDAO{Collection: m.collection("rules", rIndexes)}
48-
return rules
53+
54+
sIndexes := []mongo.IndexModel{
55+
{Keys: bson.D{{Key: "created_at", Value: 1}}},
56+
{Keys: bson.D{{Key: "model", Value: 1}}},
57+
}
58+
59+
return Stores{
60+
Rules: RulesDAO{Collection: m.collection("rules", rIndexes)},
61+
Summaries: SummariesDAO{Collection: m.collection("summaries", sIndexes)},
62+
}
4963
}
5064

5165
// collection makes collection with indexes

backend/datastore/rules_test.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ func TestRules(t *testing.T) {
1919
server, err := New("mongodb://localhost:27017/", "test_ureadability", 0)
2020
require.NoError(t, err)
2121
assert.NotNil(t, server.client)
22-
rules := server.GetStores()
22+
stores := server.GetStores()
23+
assert.NotNil(t, stores)
24+
rules := stores.Rules
2325
assert.NotNil(t, rules)
2426
rule := Rule{
2527
Domain: randStringBytesRmndr(42) + ".com",
@@ -74,7 +76,9 @@ func TestRulesCanceledContext(t *testing.T) {
7476
server, err := New("mongodb://wrong", "", 0)
7577
require.NoError(t, err)
7678
assert.NotNil(t, server.client)
77-
rules := server.GetStores()
79+
stores := server.GetStores()
80+
assert.NotNil(t, stores)
81+
rules := stores.Rules
7882
assert.NotNil(t, rules)
7983

8084
ctx, cancel := context.WithCancel(context.Background())

backend/datastore/summaries.go

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// Package datastore provides mongo implementation for store to keep and access summaries
2+
package datastore
3+
4+
import (
5+
"context"
6+
"crypto/sha256"
7+
"encoding/hex"
8+
"fmt"
9+
"time"
10+
11+
log "github.com/go-pkgz/lgr"
12+
"go.mongodb.org/mongo-driver/bson"
13+
"go.mongodb.org/mongo-driver/mongo"
14+
"go.mongodb.org/mongo-driver/mongo/options"
15+
)
16+
17+
// Summary contains information about a cached summary
18+
type Summary struct {
19+
ID string `bson:"_id"` // SHA256 hash of the content
20+
Content string `bson:"content"` // Original content that was summarized (could be truncated for storage efficiency)
21+
Summary string `bson:"summary"` // Generated summary
22+
Model string `bson:"model"` // OpenAI model used for summarization
23+
CreatedAt time.Time `bson:"created_at"`
24+
UpdatedAt time.Time `bson:"updated_at"`
25+
}
26+
27+
// SummariesDAO handles database operations for article summaries
28+
type SummariesDAO struct {
29+
Collection *mongo.Collection
30+
}
31+
32+
// Get returns summary by content hash
33+
func (s SummariesDAO) Get(ctx context.Context, content string) (Summary, bool) {
34+
contentHash := generateContentHash(content)
35+
res := s.Collection.FindOne(ctx, bson.M{"_id": contentHash})
36+
if res.Err() != nil {
37+
if res.Err() == mongo.ErrNoDocuments {
38+
return Summary{}, false
39+
}
40+
log.Printf("[WARN] can't get summary for hash %s: %v", contentHash, res.Err())
41+
return Summary{}, false
42+
}
43+
44+
summary := Summary{}
45+
if err := res.Decode(&summary); err != nil {
46+
log.Printf("[WARN] can't decode summary document for hash %s: %v", contentHash, err)
47+
return Summary{}, false
48+
}
49+
50+
return summary, true
51+
}
52+
53+
// Save creates or updates summary in the database
54+
func (s SummariesDAO) Save(ctx context.Context, summary Summary) error {
55+
if summary.ID == "" {
56+
summary.ID = generateContentHash(summary.Content)
57+
}
58+
59+
if summary.CreatedAt.IsZero() {
60+
summary.CreatedAt = time.Now()
61+
}
62+
summary.UpdatedAt = time.Now()
63+
64+
opts := options.Update().SetUpsert(true)
65+
_, err := s.Collection.UpdateOne(
66+
ctx,
67+
bson.M{"_id": summary.ID},
68+
bson.M{"$set": summary},
69+
opts,
70+
)
71+
if err != nil {
72+
return fmt.Errorf("failed to save summary: %w", err)
73+
}
74+
return nil
75+
}
76+
77+
// Delete removes summary from the database
78+
func (s SummariesDAO) Delete(ctx context.Context, contentHash string) error {
79+
_, err := s.Collection.DeleteOne(ctx, bson.M{"_id": contentHash})
80+
if err != nil {
81+
return fmt.Errorf("failed to delete summary: %w", err)
82+
}
83+
return nil
84+
}
85+
86+
// generateContentHash creates a hash for the content to use as an ID
87+
func generateContentHash(content string) string {
88+
hash := sha256.Sum256([]byte(content))
89+
return hex.EncodeToString(hash[:])
90+
}

backend/datastore/summaries_test.go

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
package datastore
2+
3+
import (
4+
"context"
5+
"os"
6+
"testing"
7+
"time"
8+
9+
"github.com/stretchr/testify/assert"
10+
"github.com/stretchr/testify/require"
11+
)
12+
13+
func TestSummariesDAO_SaveAndGet(t *testing.T) {
14+
if _, ok := os.LookupEnv("ENABLE_MONGO_TESTS"); !ok {
15+
t.Skip("ENABLE_MONGO_TESTS env variable is not set")
16+
}
17+
18+
mdb, err := New("mongodb://localhost:27017", "test_ureadability", 0)
19+
require.NoError(t, err)
20+
21+
// Create a unique collection for this test to avoid conflicts
22+
collection := mdb.client.Database(mdb.dbName).Collection("summaries_test")
23+
defer func() {
24+
_ = collection.Drop(context.Background())
25+
}()
26+
27+
dao := SummariesDAO{Collection: collection}
28+
29+
content := "This is a test article content. It should generate a unique hash."
30+
summary := Summary{
31+
Content: content,
32+
Summary: "This is a test summary of the article.",
33+
Model: "gpt-4o-mini",
34+
CreatedAt: time.Now(),
35+
}
36+
37+
// Test saving a summary
38+
err = dao.Save(context.Background(), summary)
39+
require.NoError(t, err)
40+
41+
// Test getting the summary
42+
foundSummary, found := dao.Get(context.Background(), content)
43+
assert.True(t, found)
44+
assert.Equal(t, summary.Summary, foundSummary.Summary)
45+
assert.Equal(t, summary.Model, foundSummary.Model)
46+
assert.NotEmpty(t, foundSummary.ID)
47+
48+
// Test getting a non-existent summary
49+
_, found = dao.Get(context.Background(), "non-existent content")
50+
assert.False(t, found)
51+
52+
// Test updating an existing summary
53+
updatedSummary := Summary{
54+
ID: foundSummary.ID,
55+
Content: content,
56+
Summary: "This is an updated summary.",
57+
Model: "gpt-4o-mini",
58+
CreatedAt: foundSummary.CreatedAt,
59+
}
60+
61+
err = dao.Save(context.Background(), updatedSummary)
62+
require.NoError(t, err)
63+
64+
foundSummary, found = dao.Get(context.Background(), content)
65+
assert.True(t, found)
66+
assert.Equal(t, "This is an updated summary.", foundSummary.Summary)
67+
assert.Equal(t, updatedSummary.CreatedAt, foundSummary.CreatedAt)
68+
assert.NotEqual(t, updatedSummary.UpdatedAt, foundSummary.UpdatedAt) // UpdatedAt should be set by the DAO
69+
70+
// Test deleting a summary
71+
err = dao.Delete(context.Background(), foundSummary.ID)
72+
require.NoError(t, err)
73+
74+
_, found = dao.Get(context.Background(), content)
75+
assert.False(t, found)
76+
}
77+
78+
func TestGenerateContentHash(t *testing.T) {
79+
content1 := "This is a test content."
80+
content2 := "This is a different test content."
81+
82+
hash1 := generateContentHash(content1)
83+
hash2 := generateContentHash(content2)
84+
85+
assert.NotEqual(t, hash1, hash2)
86+
assert.Equal(t, hash1, generateContentHash(content1)) // Same content should produce same hash
87+
assert.Equal(t, 64, len(hash1)) // SHA-256 produces 64 character hex string
88+
}

backend/extractor/readability.go

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,30 @@ type Rules interface {
3434
All(ctx context.Context) []datastore.Rule
3535
}
3636

37+
// Summaries interface with all methods to access summary cache
38+
//
39+
//go:generate moq -out summaries_mock.go . Summaries
40+
type Summaries interface {
41+
Get(ctx context.Context, content string) (datastore.Summary, bool)
42+
Save(ctx context.Context, summary datastore.Summary) error
43+
Delete(ctx context.Context, contentHash string) error
44+
}
45+
3746
// UReadability implements fetcher & extractor for local readability-like functionality
3847
type UReadability struct {
3948
TimeOut time.Duration
4049
SnippetSize int
4150
Rules Rules
51+
Summaries Summaries
4252
OpenAIKey string
53+
ModelType string
4354

44-
openAIClient OpenAIClient
55+
apiClient OpenAIClient
56+
}
57+
58+
// SetAPIClient sets the API client for testing purposes
59+
func (f *UReadability) SetAPIClient(client OpenAIClient) {
60+
f.apiClient = client
4561
}
4662

4763
// Response from api calls
@@ -79,16 +95,34 @@ func (f *UReadability) ExtractByRule(ctx context.Context, reqURL string, rule *d
7995
}
8096

8197
func (f *UReadability) GenerateSummary(ctx context.Context, content string) (string, error) {
98+
// Check for API key
8299
if f.OpenAIKey == "" {
83-
return "", fmt.Errorf("OpenAI key is not set")
100+
return "", fmt.Errorf("API key for summarization is not set")
84101
}
85-
if f.openAIClient == nil {
86-
f.openAIClient = openai.NewClient(f.OpenAIKey)
102+
103+
// Check for cached summary
104+
if f.Summaries != nil {
105+
if cachedSummary, found := f.Summaries.Get(ctx, content); found {
106+
log.Printf("[DEBUG] using cached summary for content")
107+
return cachedSummary.Summary, nil
108+
}
109+
}
110+
111+
// Initialize client if needed
112+
if f.apiClient == nil {
113+
f.apiClient = openai.NewClient(f.OpenAIKey)
87114
}
88-
resp, err := f.openAIClient.CreateChatCompletion(
115+
116+
// Use the model name or default to GPT-4o Mini if not specified
117+
model := openai.GPT4oMini
118+
if f.ModelType != "" {
119+
model = f.ModelType
120+
}
121+
// Generate summary
122+
resp, err := f.apiClient.CreateChatCompletion(
89123
ctx,
90124
openai.ChatCompletionRequest{
91-
Model: openai.GPT4o,
125+
Model: model,
92126
Messages: []openai.ChatCompletionMessage{
93127
{
94128
Role: openai.ChatMessageRoleSystem,
@@ -103,10 +137,29 @@ func (f *UReadability) GenerateSummary(ctx context.Context, content string) (str
103137
)
104138

105139
if err != nil {
106-
return "", err
140+
log.Printf("[WARN] AI summarization failed: %v", err)
141+
return "", fmt.Errorf("failed to generate summary: %w", err)
142+
}
143+
144+
summary := resp.Choices[0].Message.Content
145+
146+
// Cache the summary if summaries cache is available
147+
if f.Summaries != nil {
148+
err = f.Summaries.Save(ctx, datastore.Summary{
149+
Content: content,
150+
Summary: summary,
151+
Model: model,
152+
CreatedAt: time.Now(),
153+
})
154+
155+
if err != nil {
156+
log.Printf("[WARN] failed to cache summary: %v", err)
157+
} else {
158+
log.Printf("[DEBUG] summary cached successfully")
159+
}
107160
}
108161

109-
return resp.Choices[0].Message.Content, nil
162+
return summary, nil
110163
}
111164

112165
// ExtractWithRules is the core function that handles extraction with or without a specific rule

0 commit comments

Comments
 (0)