Skip to content
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
1dd21ae
inital draft of using a materialized view to query user report data
RuthTurk Aug 28, 2025
e48582b
clean up materalized view migration and add SKIP_HEAVY_MIGRATION, add…
RuthTurk Sep 1, 2025
a163c57
Draft implementation of optimized usage data calculation for user report
rajpatel24 Sep 3, 2025
f9a90b3
Optimization for the cross database values
rajpatel24 Sep 9, 2025
4f6f0f8
add documentation for /user-reports endpoint
RuthTurk Sep 10, 2025
c688f58
Refactor billing and usage snapshot table
rajpatel24 Sep 11, 2025
9780930
Improvements
rajpatel24 Sep 11, 2025
9649670
Merge refactored endpoint logic into original implementation
rajpatel24 Sep 11, 2025
9304d87
add 'account_restricted' field
RuthTurk Sep 12, 2025
20896e5
Refactor snapshot and celery task logic
rajpatel24 Sep 12, 2025
061f595
Add comments
rajpatel24 Sep 12, 2025
dbd88e6
Merge branch 'main' of github.com:kobotoolbox/kpi into dev-899-optimi…
RuthTurk Sep 13, 2025
1227002
fix migration dependency errors
RuthTurk Sep 13, 2025
33f3bde
only run 0070 if Stripe is enabled and add error messages to the endp…
RuthTurk Sep 15, 2025
4d87545
Cleanup for filters
rajpatel24 Sep 15, 2025
38e1965
add tests
RuthTurk Sep 15, 2025
75e5a66
fix spelling
RuthTurk Sep 15, 2025
757b22f
skip tests if stripe is not enabled
RuthTurk Sep 15, 2025
3f421d1
fix tests and add openapi schema generated by management command
RuthTurk Sep 16, 2025
a54b789
remove newline at end of schema_v2.json
RuthTurk Sep 16, 2025
bf3bc31
Merge branch 'main' of github.com:kobotoolbox/kpi into dev-899-optimi…
RuthTurk Sep 19, 2025
817905d
clean up and organize code
RuthTurk Sep 19, 2025
9be8ae7
fix darker issues
RuthTurk Sep 19, 2025
8d7441b
move everything for this endpoint into a new django app called user_r…
RuthTurk Sep 22, 2025
e33b454
Merge branch 'main' of github.com:kobotoolbox/kpi into dev-899-optimi…
RuthTurk Sep 22, 2025
859243e
remove newline
RuthTurk Sep 22, 2025
bc57655
Update logic to retry failed Celery job from where it left off
rajpatel24 Oct 2, 2025
881712b
Fix linter issues
rajpatel24 Oct 2, 2025
71334ff
Minor fixes
rajpatel24 Oct 3, 2025
e698dde
Refactor user report snapshot task with resumable runs and add migrat…
rajpatel24 Oct 6, 2025
be0906b
Fix redis locks and simplify codes
noliveleger Oct 6, 2025
ccb0664
Refactor models and migrations to follow conventions and remove redun…
rajpatel24 Oct 7, 2025
0d3418b
Merge branch 'main' of github.com:kobotoolbox/kpi into dev-899-optimi…
RuthTurk Oct 8, 2025
c58e1b8
update audit logs tag to server logs and fix error message with corre…
RuthTurk Oct 8, 2025
a556695
Remove filtering logic from the branch
rajpatel24 Oct 8, 2025
e867928
fix darker and orval issues
RuthTurk Oct 8, 2025
7f87d61
rename materialized view 'user_reports_mv' to 'user_reports_userrepor…
RuthTurk Oct 8, 2025
fa632e4
don't include anonymous users in reports, create utility function to …
RuthTurk Oct 9, 2025
ffb30d7
Merge branch 'main' of github.com:kobotoolbox/kpi into dev-899-optimi…
RuthTurk Oct 9, 2025
6485dc3
fix tests
RuthTurk Oct 9, 2025
1232d5e
Update materialized view field names for consistency
rajpatel24 Oct 10, 2025
6e4fd44
Minor fixes
rajpatel24 Oct 10, 2025
aa84ad4
Fix linter issues
rajpatel24 Oct 10, 2025
b8bd62d
Add organization balance calculations to celery task and materialized…
rajpatel24 Oct 10, 2025
57212ef
Make QueryParse work with integer,float in JSONBfield
noliveleger Oct 10, 2025
76a38a1
Merge branch 'dev-899-optimize-queries-for-subscriptions' of github.c…
noliveleger Oct 10, 2025
f7cdc9a
Add unit test for process_value of QueryParser
noliveleger Oct 11, 2025
214b7ad
Rename 'organizations' to 'organization' and simplified field names
rajpatel24 Oct 12, 2025
a851aac
Fix failing tests and refactoring
rajpatel24 Oct 13, 2025
5713681
Merge branch 'main' of github.com:kobotoolbox/kpi into dev-899-optimi…
RuthTurk Oct 13, 2025
a8342bc
fix tests and add require_stripe to refresh_user_report_snapshots
RuthTurk Oct 14, 2025
10ba98b
Merge branch 'main' of github.com:kobotoolbox/kpi into dev-899-optimi…
RuthTurk Oct 14, 2025
1034ed2
use f string instead of replace for filtering out anonymous users
RuthTurk Oct 14, 2025
eed9c41
Remove redundant fields from the materialized view
rajpatel24 Oct 15, 2025
67d4d96
Refactor user reports logic to ensure compatibility with stripe-disab…
rajpatel24 Oct 15, 2025
68540ee
Update materialized view
rajpatel24 Oct 15, 2025
db2df41
update materialized view to run in non-stripe environment
RuthTurk Oct 15, 2025
a9498f1
Use composite index (user, org) for material view, keep id for Django
noliveleger Oct 15, 2025
f55589c
simplify how the materialized view is created to reduce redundancy
RuthTurk Oct 15, 2025
85a7c46
Merge branch 'main' of github.com:kobotoolbox/kpi into dev-899-optimi…
RuthTurk Oct 15, 2025
840d2d0
fix darker
RuthTurk Oct 15, 2025
fa29e90
fix tests so that they don't require stripe
RuthTurk Oct 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
6 changes: 6 additions & 0 deletions kobo/apps/user_reports/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class UserReportsConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'kobo.apps.user_reports'
454 changes: 454 additions & 0 deletions kobo/apps/user_reports/migrations/0001_initial.py

Large diffs are not rendered by default.

Empty file.
110 changes: 110 additions & 0 deletions kobo/apps/user_reports/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import uuid

from django.db import models
from django.utils import timezone


class BillingAndUsageSnapshotStatus(models.TextChoices):
RUNNING = 'running'
COMPLETED = 'completed'
ABORTED = 'aborted'


class BillingAndUsageSnapshot(models.Model):
"""
A snapshot table for storing precomputed organization billing and usage data.
Why this table exists:
1. Maintaining billing period calculations directly inside the materialized view
would make it too complex and hard to manage.
2. Usage data such as total submissions, current period submissions, and storage
resides in the `kobocat` db, while the materialized view lives in the `kpi`
db. Joining across databases for 1.7M+ users would be inefficient.
3. A periodic Celery task precomputes these values and writes them here.
The materialized view then joins against this table efficiently.
"""

organization_id = models.CharField(max_length=64, unique=True)
effective_user_id = models.IntegerField(null=True, blank=True, db_index=True)
storage_bytes_total = models.BigIntegerField(default=0)
submission_counts_all_time = models.BigIntegerField(default=0)
current_period_submissions = models.BigIntegerField(default=0)
billing_period_start = models.DateTimeField(null=True, blank=True)
billing_period_end = models.DateTimeField(null=True, blank=True)
snapshot_created_at = models.DateTimeField(default=timezone.now)
last_snapshot_run_id = models.UUIDField(null=True, blank=True, db_index=True)

class Meta:
managed = False
db_table = 'billing_and_usage_snapshot'
indexes = [
models.Index(fields=['effective_user_id']),
models.Index(fields=['snapshot_created_at']),
models.Index(fields=['last_snapshot_run_id']),
]

def __str__(self):
return f'BillingAndUsageSnapshot(org={self.organization_id})'


class BillingAndUsageSnapshotRun(models.Model):
"""
Tracks the status and progress of billing and usage snapshot runs
"""
id = models.BigAutoField(primary_key=True)
run_id = models.UUIDField(default=uuid.uuid4, unique=True, editable=False)
status = models.CharField(
max_length=32,
choices=BillingAndUsageSnapshotStatus.choices,
default=BillingAndUsageSnapshotStatus.RUNNING
)
started_at = models.DateTimeField(default=timezone.now)
last_heartbeat_at = models.DateTimeField(default=timezone.now)
last_processed_org_id = models.CharField(null=True, blank=True)
expires_at = models.DateTimeField(null=True, blank=True)
details = models.JSONField(null=True, blank=True)

class Meta:
db_table = 'billing_and_usage_snapshot_run'
managed = False
ordering = ['-started_at']


class UserReports(models.Model):
extra_details_uid = models.CharField(null=True, blank=True)
username = models.CharField()
first_name = models.CharField()
last_name = models.CharField()
email = models.EmailField()
is_superuser = models.BooleanField()
is_staff = models.BooleanField()
is_active = models.BooleanField()
date_joined = models.CharField()
last_login = models.CharField(null=True, blank=True)
validated_email = models.BooleanField()
validated_password = models.BooleanField()
mfa_is_active = models.BooleanField()
sso_is_active = models.BooleanField()
accepted_tos = models.BooleanField()
social_accounts = models.JSONField(default=list)
organizations = models.JSONField(null=True, blank=True)
metadata = models.JSONField(null=True, blank=True)
subscriptions = models.JSONField(default=list)

storage_bytes_total = models.BigIntegerField(default=0)
submission_counts_all_time = models.BigIntegerField(default=0)
nlp_usage_asr_seconds_total = models.BigIntegerField(default=0)
nlp_usage_mt_characters_total = models.BigIntegerField(default=0)
asset_count = models.IntegerField(default=0)
deployed_asset_count = models.IntegerField(default=0)

current_period_start = models.DateTimeField(null=True, blank=True)
current_period_end = models.DateTimeField(null=True, blank=True)
current_period_submissions = models.BigIntegerField(default=0)
current_period_asr = models.BigIntegerField(default=0)
current_period_mt = models.BigIntegerField(default=0)
organization_id = models.IntegerField(null=True, blank=True)

class Meta:
managed = False
db_table = 'user_reports_mv'
122 changes: 122 additions & 0 deletions kobo/apps/user_reports/seralizers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
from typing import Any

from django.utils import timezone
from rest_framework import serializers

from kobo.apps.organizations.constants import UsageType
from kobo.apps.organizations.models import Organization
from kobo.apps.stripe.utils.subscription_limits import (
get_organizations_effective_limits,
)
from kobo.apps.user_reports.models import UserReports
from kpi.utils.usage_calculator import (
calculate_usage_balance,
)


class UserReportsSerializer(serializers.ModelSerializer):
extra_details__uid = serializers.CharField(
source='extra_details_uid', read_only=True
)
current_service_usage = serializers.SerializerMethodField()
account_restricted = serializers.SerializerMethodField()

class Meta:
model = UserReports
fields = [
'extra_details__uid',
'username',
'first_name',
'last_name',
'email',
'is_superuser',
'is_staff',
'is_active',
'date_joined',
'last_login',
'validated_email',
'validated_password',
'mfa_is_active',
'sso_is_active',
'accepted_tos',
'social_accounts',
'organizations',
'metadata',
'subscriptions',
'current_service_usage',
'account_restricted',
'asset_count',
'deployed_asset_count',
]

def get_account_restricted(self, obj) -> bool:
service_usage = self.get_current_service_usage(obj)
balances = service_usage.get('balances', {})
return any(balance and balance.get('exceeded') for balance in balances.values())

def get_current_service_usage(self, obj) -> dict[str, Any]:
total_nlp_usage = {
'asr_seconds_current_period': obj.current_period_asr,
'mt_characters_current_period': obj.current_period_mt,
'asr_seconds_all_time': obj.nlp_usage_asr_seconds_total,
'mt_characters_all_time': obj.nlp_usage_mt_characters_total,
}

total_submission_count = {
'current_period': obj.current_period_submissions,
'all_time': obj.submission_counts_all_time,
}

# Calculate usage balances (this is the only runtime calculation needed)
balances = self._calculate_usage_balances(obj)

# Format billing period dates
current_period_start = None
current_period_end = None
if obj.current_period_start:
current_period_start = obj.current_period_start.isoformat()
if obj.current_period_end:
current_period_end = obj.current_period_end.isoformat()

return {
'total_nlp_usage': total_nlp_usage,
'total_storage_bytes': obj.storage_bytes_total,
'total_submission_count': total_submission_count,
'balances': balances,
'current_period_start': current_period_start,
'current_period_end': current_period_end,
'last_updated': timezone.now().isoformat(),
}

def _calculate_usage_balances(self, obj) -> dict[str, Any]:
"""
Calculate usage balances against organization limits.

This is the only remaining runtime calculation, but it's much more
efficient since all usage data is pre-computed.
"""
if not obj.organization_id:
return {}

organization = Organization.objects.get(id=obj.organization_id)
limits = get_organizations_effective_limits([organization], True, True)
org_limits = limits.get(organization.id, {})

return {
'submission': calculate_usage_balance(
limit=org_limits.get(f'{UsageType.SUBMISSION}_limit', float('inf')),
usage=obj.current_period_submissions,
),
'storage_bytes': calculate_usage_balance(
limit=org_limits.get(f'{UsageType.STORAGE_BYTES}_limit', float('inf')),
usage=obj.storage_bytes_total,
),
'asr_seconds': calculate_usage_balance(
limit=org_limits.get(f'{UsageType.ASR_SECONDS}_limit', float('inf')),
usage=obj.current_period_asr,
),
'mt_characters': calculate_usage_balance(
limit=org_limits.get(f'{UsageType.MT_CHARACTERS}_limit', float('inf')),
usage=obj.current_period_mt,
),
}
Loading