kobotoolbox · RuthTurk · Oct 16, 2025 · Aug 28, 2025 · Sep 1, 2025 · Sep 3, 2025
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class UserReportsConfig(AppConfig):
+    default_auto_field = 'django.db.models.BigAutoField'
+    name = 'kobo.apps.user_reports'
@@ -0,0 +1,110 @@
+import uuid
+
+from django.db import models
+from django.utils import timezone
+
+
+class BillingAndUsageSnapshotStatus(models.TextChoices):
+    RUNNING = 'running'
+    COMPLETED = 'completed'
+    ABORTED = 'aborted'
+
+
+class BillingAndUsageSnapshot(models.Model):
+    """
+    A snapshot table for storing precomputed organization billing and usage data.
+
+    Why this table exists:
+    1. Maintaining billing period calculations directly inside the materialized view
+       would make it too complex and hard to manage.
+    2. Usage data such as total submissions, current period submissions, and storage
+       resides in the `kobocat` db, while the materialized view lives in the `kpi`
+       db. Joining across databases for 1.7M+ users would be inefficient.
+    3. A periodic Celery task precomputes these values and writes them here.
+       The materialized view then joins against this table efficiently.
+    """
+
+    organization_id = models.CharField(max_length=64, unique=True)
+    effective_user_id = models.IntegerField(null=True, blank=True, db_index=True)
+    storage_bytes_total = models.BigIntegerField(default=0)
+    submission_counts_all_time = models.BigIntegerField(default=0)
+    current_period_submissions = models.BigIntegerField(default=0)
+    billing_period_start = models.DateTimeField(null=True, blank=True)
+    billing_period_end = models.DateTimeField(null=True, blank=True)
+    snapshot_created_at = models.DateTimeField(default=timezone.now)
+    last_snapshot_run_id = models.UUIDField(null=True, blank=True, db_index=True)
+
+    class Meta:
+        managed = False
+        db_table = 'billing_and_usage_snapshot'
+        indexes = [
+            models.Index(fields=['effective_user_id']),
+            models.Index(fields=['snapshot_created_at']),
+            models.Index(fields=['last_snapshot_run_id']),
+        ]
+
+    def __str__(self):
+        return f'BillingAndUsageSnapshot(org={self.organization_id})'
+
+
+class BillingAndUsageSnapshotRun(models.Model):
+    """
+    Tracks the status and progress of billing and usage snapshot runs
+    """
+    id = models.BigAutoField(primary_key=True)
+    run_id = models.UUIDField(default=uuid.uuid4, unique=True, editable=False)
+    status = models.CharField(
+        max_length=32,
+        choices=BillingAndUsageSnapshotStatus.choices,
+        default=BillingAndUsageSnapshotStatus.RUNNING
+    )
+    started_at = models.DateTimeField(default=timezone.now)
+    last_heartbeat_at = models.DateTimeField(default=timezone.now)
+    last_processed_org_id = models.CharField(null=True, blank=True)
+    expires_at = models.DateTimeField(null=True, blank=True)
+    details = models.JSONField(null=True, blank=True)
+
+    class Meta:
+        db_table = 'billing_and_usage_snapshot_run'
+        managed = False
+        ordering = ['-started_at']
+
+
+class UserReports(models.Model):
+    extra_details_uid = models.CharField(null=True, blank=True)
+    username = models.CharField()
+    first_name = models.CharField()
+    last_name = models.CharField()
+    email = models.EmailField()
+    is_superuser = models.BooleanField()
+    is_staff = models.BooleanField()
+    is_active = models.BooleanField()
+    date_joined = models.CharField()
+    last_login = models.CharField(null=True, blank=True)
+    validated_email = models.BooleanField()
+    validated_password = models.BooleanField()
+    mfa_is_active = models.BooleanField()
+    sso_is_active = models.BooleanField()
+    accepted_tos = models.BooleanField()
+    social_accounts = models.JSONField(default=list)
+    organizations = models.JSONField(null=True, blank=True)
+    metadata = models.JSONField(null=True, blank=True)
+    subscriptions = models.JSONField(default=list)
+
+    storage_bytes_total = models.BigIntegerField(default=0)
+    submission_counts_all_time = models.BigIntegerField(default=0)
+    nlp_usage_asr_seconds_total = models.BigIntegerField(default=0)
+    nlp_usage_mt_characters_total = models.BigIntegerField(default=0)
+    asset_count = models.IntegerField(default=0)
+    deployed_asset_count = models.IntegerField(default=0)
+
+    current_period_start = models.DateTimeField(null=True, blank=True)
+    current_period_end = models.DateTimeField(null=True, blank=True)
+    current_period_submissions = models.BigIntegerField(default=0)
+    current_period_asr = models.BigIntegerField(default=0)
+    current_period_mt = models.BigIntegerField(default=0)
+    organization_id = models.IntegerField(null=True, blank=True)
+
+    class Meta:
+        managed = False
+        db_table = 'user_reports_mv'
@@ -0,0 +1,122 @@
+from typing import Any
+
+from django.utils import timezone
+from rest_framework import serializers
+
+from kobo.apps.organizations.constants import UsageType
+from kobo.apps.organizations.models import Organization
+from kobo.apps.stripe.utils.subscription_limits import (
+    get_organizations_effective_limits,
+)
+from kobo.apps.user_reports.models import UserReports
+from kpi.utils.usage_calculator import (
+    calculate_usage_balance,
+)
+
+
+class UserReportsSerializer(serializers.ModelSerializer):
+    extra_details__uid = serializers.CharField(
+        source='extra_details_uid', read_only=True
+    )
+    current_service_usage = serializers.SerializerMethodField()
+    account_restricted = serializers.SerializerMethodField()
+
+    class Meta:
+        model = UserReports
+        fields = [
+            'extra_details__uid',
+            'username',
+            'first_name',
+            'last_name',
+            'email',
+            'is_superuser',
+            'is_staff',
+            'is_active',
+            'date_joined',
+            'last_login',
+            'validated_email',
+            'validated_password',
+            'mfa_is_active',
+            'sso_is_active',
+            'accepted_tos',
+            'social_accounts',
+            'organizations',
+            'metadata',
+            'subscriptions',
+            'current_service_usage',
+            'account_restricted',
+            'asset_count',
+            'deployed_asset_count',
+        ]
+
+    def get_account_restricted(self, obj) -> bool:
+        service_usage = self.get_current_service_usage(obj)
+        balances = service_usage.get('balances', {})
+        return any(balance and balance.get('exceeded') for balance in balances.values())
+
+    def get_current_service_usage(self, obj) -> dict[str, Any]:
+        total_nlp_usage = {
+            'asr_seconds_current_period': obj.current_period_asr,
+            'mt_characters_current_period': obj.current_period_mt,
+            'asr_seconds_all_time': obj.nlp_usage_asr_seconds_total,
+            'mt_characters_all_time': obj.nlp_usage_mt_characters_total,
+        }
+
+        total_submission_count = {
+            'current_period': obj.current_period_submissions,
+            'all_time': obj.submission_counts_all_time,
+        }
+
+        # Calculate usage balances (this is the only runtime calculation needed)
+        balances = self._calculate_usage_balances(obj)
+
+        # Format billing period dates
+        current_period_start = None
+        current_period_end = None
+        if obj.current_period_start:
+            current_period_start = obj.current_period_start.isoformat()
+        if obj.current_period_end:
+            current_period_end = obj.current_period_end.isoformat()
+
+        return {
+            'total_nlp_usage': total_nlp_usage,
+            'total_storage_bytes': obj.storage_bytes_total,
+            'total_submission_count': total_submission_count,
+            'balances': balances,
+            'current_period_start': current_period_start,
+            'current_period_end': current_period_end,
+            'last_updated': timezone.now().isoformat(),
+        }
+
+    def _calculate_usage_balances(self, obj) -> dict[str, Any]:
+        """
+        Calculate usage balances against organization limits.
+
+        This is the only remaining runtime calculation, but it's much more
+        efficient since all usage data is pre-computed.
+        """
+        if not obj.organization_id:
+            return {}
+
+        organization = Organization.objects.get(id=obj.organization_id)
+        limits = get_organizations_effective_limits([organization], True, True)
+        org_limits = limits.get(organization.id, {})
+
+        return {
+            'submission': calculate_usage_balance(
+                limit=org_limits.get(f'{UsageType.SUBMISSION}_limit', float('inf')),
+                usage=obj.current_period_submissions,
+            ),
+            'storage_bytes': calculate_usage_balance(
+                limit=org_limits.get(f'{UsageType.STORAGE_BYTES}_limit', float('inf')),
+                usage=obj.storage_bytes_total,
+            ),
+            'asr_seconds': calculate_usage_balance(
+                limit=org_limits.get(f'{UsageType.ASR_SECONDS}_limit', float('inf')),
+                usage=obj.current_period_asr,
+            ),
+            'mt_characters': calculate_usage_balance(
+                limit=org_limits.get(f'{UsageType.MT_CHARACTERS}_limit', float('inf')),
+                usage=obj.current_period_mt,
+            ),
+        }