diff --git a/kpi/management/commands/build_user_report_mv.py b/kpi/management/commands/build_user_report_mv.py new file mode 100644 index 0000000000..655e8b13a0 --- /dev/null +++ b/kpi/management/commands/build_user_report_mv.py @@ -0,0 +1,258 @@ +from django.core.management.base import BaseCommand +from django.apps import apps +from django.db import connection +from django.conf import settings +from django.utils import timezone + + +class Command(BaseCommand): + help = 'Create optimized user_report materialized view without billing period dependencies.' + + MATVIEW_NAME = 'user_report_mv' + + def handle(self, *args, **options): + # Get models + User = apps.get_model('kobo_auth', 'User') + XForm = apps.get_model('logger', 'XForm') + DailyCounter = apps.get_model('logger', 'DailyXFormSubmissionCounter') + NLP = apps.get_model('trackers', 'NLPUsageCounter') + ExtraDetails = apps.get_model('hub', 'ExtraUserDetail') + Asset = apps.get_model('kpi', 'Asset') + OrgUser = apps.get_model('organizations', 'OrganizationUser') + Organization = apps.get_model('organizations', 'Organization') + EmailAddress = apps.get_model('account', 'EmailAddress') + SocialAccount = apps.get_model('socialaccount', 'SocialAccount') + + # Optional djstripe models + DjCustomer = None + DjSubscription = None + try: + DjCustomer = apps.get_model('djstripe', 'Customer') + DjSubscription = apps.get_model('djstripe', 'Subscription') + except LookupError: + self.stdout.write('djstripe not installed; subscriptions will be empty.') + + # Optional MFA model + MFAMethod = None + try: + MFAMethod = apps.get_model('mfa', 'MFAMethod') + except LookupError: + self.stdout.write('MFA models not found; mfa_is_active will be false.') + + # Get table names + user_table = User._meta.db_table + xform_table = XForm._meta.db_table + daily_counter_table = DailyCounter._meta.db_table + nlp_table = NLP._meta.db_table + ed_table = ExtraDetails._meta.db_table + asset_table = Asset._meta.db_table + org_user_table = OrgUser._meta.db_table + org_table = Organization._meta.db_table + ea_table = EmailAddress._meta.db_table + sa_table = SocialAccount._meta.db_table + + self.stdout.write('Creating materialized view...') + + sql = f""" + CREATE MATERIALIZED VIEW IF NOT EXISTS {self.MATVIEW_NAME} AS + SELECT + -- Basic user fields + u.id AS user_id, + u.username, + u.first_name, + u.last_name, + u.email, + u.is_superuser, + u.is_staff, + u.is_active, + u.date_joined, + u.last_login, + + -- Extra details + ed.uid AS extra_details_uid, + ed.data AS metadata, + + -- Boolean flags + (ed.private_data ? 'last_tos_accept_time') AS accepted_tos, + (ea.verified AND ea.primary) AS validated_email, + COALESCE(ed.validated_password, false) AS validated_password, + (EXISTS(SELECT 1 FROM {sa_table} sa WHERE sa.user_id = u.id)) AS sso_is_active, + {f''' + (EXISTS ( + SELECT 1 + FROM {MFAMethod._meta.db_table} mfa + JOIN trench_mfamethod trench ON mfa.mfamethod_ptr_id = trench.id + WHERE trench.user_id = u.id AND trench.is_active = true + )) AS mfa_is_active, + ''' if MFAMethod else 'false AS mfa_is_active,'} + + -- Social accounts as JSON array + COALESCE( + (SELECT jsonb_agg( + jsonb_build_object( + 'provider', sa.provider, + 'uid', sa.uid, + 'last_login', sa.last_login, + 'date_joined', sa.date_joined, + 'extra_data', sa.extra_data + ) + ) FROM {sa_table} sa WHERE sa.user_id = u.id), + '[]'::jsonb + ) AS social_accounts, + + -- Asset counts + COALESCE(ac.total_assets, 0) AS asset_count, + COALESCE(ac.deployed_assets, 0) AS deployed_asset_count, + + -- Storage usage (organization-agnostic) + COALESCE(xb.total_storage_bytes, 0) AS total_storage_bytes, + + -- ALL-TIME totals (no billing period dependency) + COALESCE(subs.all_time_submissions, 0) AS submission_counts_all_time, + COALESCE(nlp.asr_seconds_all_time, 0) AS nlp_usage_asr_seconds_all_time, + COALESCE(nlp.mt_characters_all_time, 0) AS nlp_usage_mt_characters_all_time, + + -- Organization data + ou.organization_id, + org.name AS organization_name, + org.id::text AS organization_uid, + COALESCE(ou.is_admin, false) AS is_org_admin, + ed.data->>'organization_type' AS metadata_organization_type, + + -- Subscriptions as JSON + COALESCE(subs_json.subscriptions_json, '[]'::jsonb) AS subscriptions, + + -- Refresh timestamp + now() AS last_refresh + + FROM {user_table} u + + -- Extra details + LEFT JOIN {ed_table} ed ON ed.user_id = u.id + + -- Email validation + LEFT JOIN {ea_table} ea ON ea.user_id = u.id AND ea.primary = true + + -- Storage calculation (organization-agnostic) + LEFT JOIN ( + SELECT + x.user_id, + SUM(COALESCE(x.attachment_storage_bytes, 0)) AS total_storage_bytes + FROM {xform_table} x + WHERE COALESCE(x.pending_delete, false) = false + GROUP BY x.user_id + ) xb ON xb.user_id = u.id + + -- ALL-TIME submission counts (no period filtering) + LEFT JOIN ( + SELECT + user_id, + SUM(counter) AS all_time_submissions + FROM {daily_counter_table} + GROUP BY user_id + ) subs ON subs.user_id = u.id + + -- ALL-TIME NLP usage (no period filtering) + LEFT JOIN ( + SELECT + user_id, + SUM(total_asr_seconds) AS asr_seconds_all_time, + SUM(total_mt_characters) AS mt_characters_all_time + FROM {nlp_table} + GROUP BY user_id + ) nlp ON nlp.user_id = u.id + + -- Asset counts + LEFT JOIN ( + SELECT + owner_id AS user_id, + COUNT(*) AS total_assets, + SUM(CASE WHEN _deployment_status = 'deployed' THEN 1 ELSE 0 END)::int AS deployed_assets + FROM {asset_table} + WHERE asset_type IS NOT NULL + GROUP BY owner_id + ) ac ON ac.user_id = u.id + + -- Organization membership + LEFT JOIN {org_user_table} ou ON ou.user_id = u.id + LEFT JOIN {org_table} org ON org.id = ou.organization_id + """ + + # Add djstripe subscriptions if available + if DjCustomer and DjSubscription: + sql += f""" + -- Subscriptions from djstripe + LEFT JOIN ( + SELECT + c.subscriber_id::int AS user_id, + jsonb_agg( + jsonb_build_object( + 'id', s.id, + 'status', s.status, + 'current_period_start', s.current_period_start, + 'current_period_end', s.current_period_end, + 'created', s.created, + 'plan', jsonb_build_object( + 'id', p.id, + 'nickname', p.nickname, + 'amount', p.amount, + 'currency', p.currency, + 'interval', p.interval + ) + ) ORDER BY s.current_period_end DESC + ) AS subscriptions_json + FROM {DjCustomer._meta.db_table} c + JOIN {DjSubscription._meta.db_table} s ON s.customer_id = c.id + LEFT JOIN djstripe_plan p ON p.id = s.plan_id::varchar + WHERE s.status IN ('active', 'trialing', 'past_due') + GROUP BY c.subscriber_id + ) subs_json ON subs_json.user_id = u.id + """ + else: + sql += """ + -- No djstripe, empty subscriptions + LEFT JOIN ( + SELECT NULL::int as user_id, '[]'::jsonb as subscriptions_json + WHERE false + ) subs_json ON subs_json.user_id = u.id + """ + + # Execute the materialized view creation + with connection.cursor() as cur: + self.stdout.write('Dropping existing materialized view if exists...') + cur.execute(f'DROP MATERIALIZED VIEW IF EXISTS {self.MATVIEW_NAME} CASCADE;') + + self.stdout.write('Creating materialized view...') + cur.execute(sql) + + # Create indexes for performance + self.stdout.write('Creating indexes...') + + # Primary index + cur.execute(f'CREATE UNIQUE INDEX {self.MATVIEW_NAME}_user_id_idx ON {self.MATVIEW_NAME} (user_id);') + + # Filtering indexes based on requirements + cur.execute( + f'CREATE INDEX {self.MATVIEW_NAME}_storage_bytes_idx ON {self.MATVIEW_NAME} (total_storage_bytes);' + ) + cur.execute(f'CREATE INDEX {self.MATVIEW_NAME}_date_joined_idx ON {self.MATVIEW_NAME} (date_joined);') + cur.execute(f'CREATE INDEX {self.MATVIEW_NAME}_last_login_idx ON {self.MATVIEW_NAME} (last_login);') + cur.execute(f'CREATE INDEX {self.MATVIEW_NAME}_email_idx ON {self.MATVIEW_NAME} (email);') + cur.execute(f'CREATE INDEX {self.MATVIEW_NAME}_org_id_idx ON {self.MATVIEW_NAME} (organization_id);') + + # JSONB indexes for complex filtering + cur.execute( + f'CREATE INDEX {self.MATVIEW_NAME}_subscriptions_gin ON {self.MATVIEW_NAME} USING gin (subscriptions);') + cur.execute( + f'CREATE INDEX {self.MATVIEW_NAME}_org_type_idx ON {self.MATVIEW_NAME} (metadata_organization_type);') + + # Composite indexes for common patterns + cur.execute( + f'CREATE INDEX {self.MATVIEW_NAME}_org_storage_idx ON {self.MATVIEW_NAME} (organization_id, total_storage_bytes);') + cur.execute( + f'CREATE INDEX {self.MATVIEW_NAME}_active_users_idx ON {self.MATVIEW_NAME} (is_active, date_joined);') + + self.stdout.write(self.style.SUCCESS('Materialized view created successfully!')) + self.stdout.write('This view contains organization-agnostic data.') + self.stdout.write('Current period calculations will be done at query time.') + self.stdout.write('To refresh: python manage.py refresh_user_report_mv') diff --git a/kpi/models/user_report.py b/kpi/models/user_report.py new file mode 100644 index 0000000000..fbc22a55cb --- /dev/null +++ b/kpi/models/user_report.py @@ -0,0 +1,61 @@ +from django.db import models + + +class UserReportMaterialized(models.Model): + """ + Django model representing the user_report_mv materialized view. + """ + user_id = models.BigIntegerField(primary_key=True) + + # Basic user fields + username = models.CharField(max_length=255) + first_name = models.CharField(max_length=255, null=True, blank=True) + last_name = models.CharField(max_length=255, null=True, blank=True) + email = models.EmailField(null=True, blank=True) + is_superuser = models.BooleanField(default=False) + is_staff = models.BooleanField(default=False) + is_active = models.BooleanField(default=True) + date_joined = models.DateTimeField(null=True) + last_login = models.DateTimeField(null=True) + + # Extra details + extra_details_uid = models.CharField(max_length=255, null=True, blank=True) + metadata = models.JSONField(null=True, default=dict) + + # Boolean flags + accepted_tos = models.BooleanField(null=True, default=False) + validated_email = models.BooleanField(null=True, default=False) + validated_password = models.BooleanField(null=True, default=False) + sso_is_active = models.BooleanField(null=True, default=False) + mfa_is_active = models.BooleanField(null=True, default=False) + + # Social accounts as JSON array + social_accounts = models.JSONField(null=True, default=list) + + # Asset counts + asset_count = models.IntegerField(default=0) + deployed_asset_count = models.IntegerField(default=0) + + total_storage_bytes = models.BigIntegerField(default=0) + + submission_counts_all_time = models.BigIntegerField(default=0) + nlp_usage_asr_seconds_all_time = models.BigIntegerField(default=0) + nlp_usage_mt_characters_all_time = models.BigIntegerField(default=0) + + # Organization data + organization_id = models.IntegerField(null=True, blank=True) + organization_name = models.CharField(max_length=255, null=True, blank=True) + organization_uid = models.CharField(max_length=255, null=True, blank=True) + is_org_admin = models.BooleanField(default=False) + metadata_organization_type = models.CharField(max_length=255, null=True, blank=True) + + subscriptions = models.JSONField(null=True, default=list) + + last_refresh = models.DateTimeField(null=True) + + class Meta: + managed = False + db_table = 'user_report_mv' + + def __str__(self): + return f'UserReportMaterialized({self.username})' diff --git a/kpi/serializers/v2/user_report_serializer.py b/kpi/serializers/v2/user_report_serializer.py new file mode 100644 index 0000000000..ead71d19c1 --- /dev/null +++ b/kpi/serializers/v2/user_report_serializer.py @@ -0,0 +1,145 @@ +from rest_framework import serializers +from django.conf import settings +from django.apps import apps +from math import inf +from typing import Dict, List, Tuple + +from kpi.models.user_report import UserReportMaterialized +from kobo.apps.organizations.constants import UsageType +from kobo.apps.stripe.utils.import_management import requires_stripe +from kobo.apps.stripe.utils.subscription_limits import get_organizations_effective_limits +from kobo.apps.organizations.models import Organization +from kpi.utils.optimized_usage_calculator import OptimizedUsageCalculator + + +class UserReportSerializer(serializers.ModelSerializer): + """ + Serializer for the hybrid materialized view approach. + Pre-computed data from materialized view + on-demand current period calculations. + """ + + extra_details__uid = serializers.CharField(source='extra_details_uid') + organizations = serializers.SerializerMethodField() + current_service_usage = serializers.SerializerMethodField() + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.usage_calculator = OptimizedUsageCalculator() + self._current_period_cache = {} + + class Meta: + model = UserReportMaterialized + fields = ( + 'extra_details__uid', + 'username', + 'first_name', + 'last_name', + 'email', + 'is_superuser', + 'is_staff', + 'is_active', + 'date_joined', + 'last_login', + 'validated_email', + 'validated_password', + 'mfa_is_active', + 'sso_is_active', + 'accepted_tos', + 'social_accounts', + 'organizations', + 'metadata', + 'subscriptions', + 'current_service_usage', + 'asset_count', + 'deployed_asset_count', + ) + + def to_representation(self, instance): + current_period_data = self.usage_calculator.calculate_single_user_current_period( + instance.user_id, instance.organization_id + ) + self._current_period_cache[instance.user_id] = current_period_data + return super().to_representation(instance) + + def get_organizations(self, obj): + if obj.organization_id is None: + return None + + role = 'owner' if obj.is_org_admin else 'member' + + return { + 'organization_name': obj.organization_name, + 'organization_uid': obj.organization_uid, + 'role': role, + } + + def get_current_service_usage(self, obj): + current_period_data = self._current_period_cache.get(obj.user_id, {}) + + total_nlp_usage = { + 'asr_seconds_current_period': current_period_data.get('nlp_usage_asr_seconds_total', 0), + 'mt_characters_current_period': current_period_data.get('nlp_usage_mt_characters_total', 0), + 'asr_seconds_all_time': obj.nlp_usage_asr_seconds_all_time, + 'mt_characters_all_time': obj.nlp_usage_mt_characters_all_time, + } + + total_submission_count = { + 'all_time': obj.submission_counts_all_time, + 'current_period': current_period_data.get('submission_counts_current_month', 0), + } + + # balances = self._calculate_balances(obj, current_period_data) + + return { + 'total_nlp_usage': total_nlp_usage, + 'total_storage_bytes': obj.total_storage_bytes, + 'total_submission_count': total_submission_count, + 'balances': [], + 'current_period_start': current_period_data.get( + 'current_period_start').isoformat() if current_period_data.get('current_period_start') else None, + 'current_period_end': current_period_data.get('current_period_end').isoformat() if current_period_data.get( + 'current_period_end') else None, + 'last_updated': obj.last_refresh.isoformat() if obj.last_refresh else None, + } + + def _calculate_balances(self, obj, current_period_data: Dict): + # balances = { + # "submission": calculate_usage_balance( + # limit=org_limits.get(f"{UsageType.SUBMISSION}_limit", float("inf")), + # usage=submissions_cur, + # ), + # "storage_bytes": calculate_usage_balance( + # limit=org_limits.get(f"{UsageType.STORAGE_BYTES}_limit", float("inf")), + # usage=storage_total, + # ), + # "asr_seconds": calculate_usage_balance( + # limit=org_limits.get(f"{UsageType.ASR_SECONDS}_limit", float("inf")), + # usage=asr_cur, + # ), + # "mt_characters": calculate_usage_balance( + # limit=org_limits.get(f"{UsageType.MT_CHARACTERS}_limit", float("inf")), + # usage=mt_cur, + # ), + # } + + return {} + + +class UserReportFilterSerializer(serializers.Serializer): + subscriptions = serializers.CharField(required=False, help_text="Filter by subscription status/type") + storage_bytes__total = serializers.IntegerField(required=False, help_text="Filter by total storage bytes") + nlp_usage__asr_seconds_total = serializers.IntegerField(required=False, help_text="Filter by ASR seconds total") + nlp_usage__mt_characters_total = serializers.IntegerField(required=False, help_text="Filter by MT characters total") + date_joined = serializers.DateTimeField(required=False, help_text="Filter by date joined") + last_login = serializers.DateTimeField(required=False, help_text="Filter by last login") + metadata__organization_type = serializers.CharField(required=False, help_text="Filter by organization type") + subscription_id = serializers.CharField(required=False, help_text="Filter by specific subscription ID") + email = serializers.CharField(required=False, help_text="Filter by email address") + + storage_bytes__total__gte = serializers.IntegerField(required=False, + help_text="Storage bytes greater than or equal") + storage_bytes__total__lte = serializers.IntegerField(required=False, help_text="Storage bytes less than or equal") + date_joined__gte = serializers.DateTimeField(required=False, help_text="Date joined after") + date_joined__lte = serializers.DateTimeField(required=False, help_text="Date joined before") + last_login__gte = serializers.DateTimeField(required=False, help_text="Last login after") + last_login__lte = serializers.DateTimeField(required=False, help_text="Last login before") diff --git a/kpi/tasks.py b/kpi/tasks.py index cc1406636c..ec88073f29 100644 --- a/kpi/tasks.py +++ b/kpi/tasks.py @@ -6,6 +6,7 @@ from django.core import mail from django.core.exceptions import ObjectDoesNotExist from django.core.management import call_command +from django.db import connection from kobo.apps.kobo_auth.shortcuts import User from kobo.apps.markdownx_uploader.tasks import remove_unused_markdown_files @@ -125,3 +126,9 @@ def perform_maintenance(): remove_unused_markdown_files() remove_old_import_tasks() remove_old_asset_snapshots() + + +# @celery_app.task +def refresh_optimized_user_report_view(): + with connection.cursor() as cursor: + cursor.execute('REFRESH MATERIALIZED VIEW user_report_mv;') diff --git a/kpi/urls/router_api_v2.py b/kpi/urls/router_api_v2.py index 9830fc5048..9687dcf402 100644 --- a/kpi/urls/router_api_v2.py +++ b/kpi/urls/router_api_v2.py @@ -34,6 +34,7 @@ from kpi.views.v2.tos import TermsOfServiceViewSet from kpi.views.v2.user import UserViewSet from kpi.views.v2.user_asset_subscription import UserAssetSubscriptionViewSet +from kpi.views.v2.user_report_viewset import UserReportViewSet class ExtendedDefaultRouterWithPathAliases(ExtendedDefaultRouter): @@ -179,6 +180,9 @@ def get_urls(self, *args, **kwargs): router_api_v2.register(r'service_usage', ServiceUsageViewSet, basename='service-usage') router_api_v2.register(r'users', UserViewSet, basename='user-kpi') +router_api_v2.register( + r'users-report', UserReportViewSet, basename='users-report-kpi' +) router_api_v2.register(r'tags', TagViewSet, basename='tags') router_api_v2.register( diff --git a/kpi/utils/optimized_usage_calculator.py b/kpi/utils/optimized_usage_calculator.py new file mode 100644 index 0000000000..c3ca9c29b2 --- /dev/null +++ b/kpi/utils/optimized_usage_calculator.py @@ -0,0 +1,200 @@ +from django.apps import apps +from django.db.models import Q, Sum +from django.db.models.functions import Coalesce +from django.utils import timezone +from typing import Dict, List, Tuple +from collections import defaultdict + +from kobo.apps.organizations.models import Organization +from kobo.apps.organizations.utils import get_billing_dates +from kobo.apps.organizations.constants import UsageType + + +class OptimizedUsageCalculator: + """ + Efficiently calculates current period usage for multiple users at once. + Minimizes database queries by batching operations per organization. + """ + + def __init__(self): + self.DailyCounter = apps.get_model('logger', 'DailyXFormSubmissionCounter') + self.NLPUsageCounter = apps.get_model('trackers', 'NLPUsageCounter') + + def calculate_current_period_usage_batch( + self, + user_org_pairs: List[Tuple[int, int]] + ) -> Dict[int, Dict]: + """ + Calculate current period usage for multiple users efficiently. + + Args: + user_org_pairs: List of (user_id, organization_id) tuples + + Returns: + Dict mapping user_id to their current period usage data + """ + if not user_org_pairs: + return {} + + # Group users by organization for efficient billing period lookup + users_by_org = defaultdict(list) + for user_id, org_id in user_org_pairs: + if org_id: # Only group users that have organizations + users_by_org[org_id].append(user_id) + + # Get billing periods for all organizations at once + org_billing_periods = self._get_billing_periods_batch( + list(users_by_org.keys()) + ) + + # Calculate current period usage efficiently + submissions_data = self._get_current_period_submissions_batch( + users_by_org, org_billing_periods + ) + nlp_data = self._get_current_period_nlp_batch(users_by_org, org_billing_periods) + + # Combine results + result = {} + for user_id, org_id in user_org_pairs: + result[user_id] = { + 'submission_counts_current_month': submissions_data.get(user_id, 0), + 'nlp_usage_asr_seconds_total': nlp_data.get(user_id, {}).get('asr_seconds', 0), + 'nlp_usage_mt_characters_total': nlp_data.get(user_id, {}).get('mt_characters', 0), + 'current_period_start': org_billing_periods.get(org_id, {}).get('start'), + 'current_period_end': org_billing_periods.get(org_id, {}).get('end'), + } + + return result + + def _get_billing_periods_batch(self, org_ids: List[int]) -> Dict[int, Dict]: + """ + Get billing periods for multiple organizations efficiently. + """ + if not org_ids: + return {} + + # Fetch organizations in a single query + orgs = Organization.objects.filter(id__in=org_ids).select_related() + + billing_periods = {} + for org in orgs: + try: + start, end = get_billing_dates(org) + billing_periods[org.id] = { + 'start': start, + 'end': end + } + except Exception: + # Fallback to current month if billing dates fail + now = timezone.now() + billing_periods[org.id] = { + 'start': now.replace( + day=1, hour=0, minute=0, second=0, microsecond=0 + ), + 'end': now + } + + return billing_periods + + def _get_current_period_submissions_batch( + self, + users_by_org: Dict[int, List[int]], + org_billing_periods: Dict[int, Dict] + ) -> Dict[int, int]: + """ + Get current period submissions for multiple users efficiently. + """ + if not users_by_org or not org_billing_periods: + return {} + + # Build efficient query with OR conditions for each org's billing period + filters = Q() + for org_id, user_ids in users_by_org.items(): + billing_period = org_billing_periods.get(org_id) + if billing_period and user_ids: + filters |= Q( + user_id__in=user_ids, + date__range=[billing_period['start'], billing_period['end']] + ) + + if not filters: + return {} + + # Execute single query for all users + submission_data = ( + self.DailyCounter.objects + .filter(filters) + .values('user_id') + .annotate(current_period_total=Sum('counter')) + ) + + return { + row['user_id']: row['current_period_total'] or 0 + for row in submission_data + } + + def _get_current_period_nlp_batch( + self, + users_by_org: Dict[int, List[int]], + org_billing_periods: Dict[int, Dict] + ) -> Dict[int, Dict]: + """ + Get current period NLP usage for multiple users efficiently. + """ + if not users_by_org or not org_billing_periods: + return {} + + # Build efficient query with OR conditions for each org's billing period + filters = Q() + for org_id, user_ids in users_by_org.items(): + billing_period = org_billing_periods.get(org_id) + if billing_period and user_ids: + filters |= Q( + user_id__in=user_ids, + date__range=[billing_period['start'], billing_period['end']] + ) + + if not filters: + return {} + + # Execute single query for all users + nlp_data = ( + self.NLPUsageCounter.objects + .filter(filters) + .values('user_id') + .annotate( + asr_seconds_total=Coalesce( + Sum(f'total_{UsageType.ASR_SECONDS}'), 0 + ), + mt_characters_total=Coalesce( + Sum(f'total_{UsageType.MT_CHARACTERS}'), 0 + ) + ) + ) + + return { + row['user_id']: { + 'asr_seconds': row['asr_seconds_total'], + 'mt_characters': row['mt_characters_total'] + } + for row in nlp_data + } + + def calculate_single_user_current_period( + self, user_id: int, organization_id: int + ) -> Dict: + """ + Calculate current period usage for a single user (fallback method). + """ + if not organization_id: + return { + 'submission_counts_current_month': 0, + 'nlp_usage_asr_seconds_total': 0, + 'nlp_usage_mt_characters_total': 0, + 'current_period_start': None, + 'current_period_end': None, + } + + return self.calculate_current_period_usage_batch( + [(user_id, organization_id)] + ).get(user_id, {}) diff --git a/kpi/views/v2/user_report_viewset.py b/kpi/views/v2/user_report_viewset.py new file mode 100644 index 0000000000..339a3f69f2 --- /dev/null +++ b/kpi/views/v2/user_report_viewset.py @@ -0,0 +1,165 @@ +from django.db.models import Q +from rest_framework import viewsets, status, exceptions +from rest_framework.response import Response +from rest_framework.decorators import action +from rest_framework.permissions import IsAuthenticated +from django.db import connection +from django.utils import timezone + +from kpi.models.user_report import UserReportMaterialized +from kpi.serializers.v2.user_report_serializer import ( + UserReportSerializer, + UserReportFilterSerializer +) +from kpi.paginators import LimitStartPagination + + +class UserReportFilter: + """ + Filter class for UserReport materialized view. + Handles filtering on pre-computed fields efficiently. + """ + + def filter_queryset(self, request, queryset): + # Get filter parameters + filter_serializer = UserReportFilterSerializer(data=request.query_params) + filter_serializer.is_valid(raise_exception=True) + filters = filter_serializer.validated_data + + # Apply basic field filters (all from materialized view) + if 'storage_bytes__total' in filters: + queryset = queryset.filter( + total_storage_bytes=filters['storage_bytes__total'] + ) + + if 'storage_bytes__total__gte' in filters: + queryset = queryset.filter( + total_storage_bytes__gte=filters['storage_bytes__total__gte'] + ) + + if 'storage_bytes__total__lte' in filters: + queryset = queryset.filter( + total_storage_bytes__lte=filters['storage_bytes__total__lte'] + ) + + if 'date_joined' in filters: + queryset = queryset.filter(date_joined=filters['date_joined']) + + if 'date_joined__gte' in filters: + queryset = queryset.filter(date_joined__gte=filters['date_joined__gte']) + + if 'date_joined__lte' in filters: + queryset = queryset.filter(date_joined__lte=filters['date_joined__lte']) + + if 'last_login' in filters: + queryset = queryset.filter(last_login=filters['last_login']) + + if 'last_login__gte' in filters: + queryset = queryset.filter(last_login__gte=filters['last_login__gte']) + + if 'last_login__lte' in filters: + queryset = queryset.filter(last_login__lte=filters['last_login__lte']) + + if 'metadata__organization_type' in filters: + queryset = queryset.filter( + metadata_organization_type=filters['metadata__organization_type'] + ) + + if 'email' in filters: + queryset = queryset.filter(email__icontains=filters['email']) + + # Handle JSONB subscription filtering + if 'subscriptions' in filters: + subscription_filter = filters['subscriptions'] + queryset = queryset.extra( + where=["subscriptions::jsonb @> %s"], + params=[f'[{{"status": "{subscription_filter}"}}]'] + ) + + if 'subscription_id' in filters: + subscription_id = filters['subscription_id'] + queryset = queryset.extra( + where=["subscriptions::jsonb @> %s"], + params=[f'[{{"id": "{subscription_id}"}}]'] + ) + + # Handle search across multiple fields + if 'search' in filters: + search_term = filters['search'] + queryset = queryset.filter( + Q(username__icontains=search_term) | + Q(email__icontains=search_term) | + Q(first_name__icontains=search_term) | + Q(last_name__icontains=search_term) + ) + + return queryset + + +class UserReportViewSet(viewsets.ReadOnlyModelViewSet): + """ + ## Filtering Options: + - `storage_bytes__total__gte/lte`: Filter by storage usage range + - `date_joined__gte/lte`: Filter by registration date range + - `last_login__gte/lte`: Filter by last login date range + - `metadata__organization_type`: Filter by organization type + - `subscriptions`: Filter by subscription status + - `subscription_id`: Filter by specific subscription ID + - `email`: Search by email address + - `search`: Search across username, email, first_name, last_name + - `ordering`: Order results by field (use - prefix for descending) + + ## Examples: + ``` + # Get users with high storage usage + GET /api/v2/user-reports-optimized/?storage_bytes__total__gte=10000000 + + # Get recently active users + GET /api/v2/user-reports-optimized/?last_login__gte=2025-08-01&ordering=-last_login + + # Complex filtering + GET /api/v2/user-reports-optimized/?storage_bytes__total__lte=50000000&metadata__organization_type=enterprise + ``` + + Note: Current period submission/NLP filters are not available for performance reasons. + These values are calculated on-demand and cannot be efficiently filtered in SQL. + """ + + queryset = UserReportMaterialized.objects.all() + serializer_class = UserReportSerializer + pagination_class = LimitStartPagination + permission_classes = [IsAuthenticated] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.user_report_filter = UserReportFilter() + + def get_queryset(self): + """ + Get the base queryset and apply filters. + """ + queryset = super().get_queryset() + + # Apply custom filtering (only on materialized view fields) + queryset = self.user_report_filter.filter_queryset(self.request, queryset) + + # Apply ordering (only on materialized view fields) + ordering = self.request.query_params.get('ordering', 'user_id') + if ordering: + queryset = queryset.order_by(ordering) + + return queryset + + def list(self, request, *args, **kwargs): + # Only allow superusers to access this endpoint (same as your UserViewSet) + if not request.user.is_superuser: + raise exceptions.PermissionDenied() + + filtered_queryset = self.get_queryset() + + page = self.paginate_queryset(filtered_queryset) + if page is not None: + serializer = self.get_serializer(page, many=True) + response = self.get_paginated_response(serializer.data) + + return response