Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
name = "webcompat_topline_metric_daily"
etl = ["metric", "metric-backfill"]
etl = ["metric"]
1 change: 1 addition & 0 deletions jobs/webcompat-kb/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ test = [
[project.scripts]
webcompat-etl = "webcompat_kb.main:main"
webcompat-backfill-history = "webcompat_kb.utils:backfill_history"
webcompat-backfill-metric = "webcompat_kb.commands.backfill_metric:main"
webcompat-check-templates = "webcompat_kb.commands.checkdata:main"
webcompat-render = "webcompat_kb.commands.render:main"
webcompat-validate = "webcompat_kb.commands.validate:main"
Expand Down
2 changes: 1 addition & 1 deletion jobs/webcompat-kb/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ set -ex
uv sync --extra=test
uv run mypy webcompat_kb
uv run pytest --ruff --ruff-format .
uv run webcompat-check-templates --bq-project-id="moz-fx-dev-dschubert-wckb"
uv run webcompat-check-templates --bq-project="moz-fx-dev-dschubert-wckb"
78 changes: 77 additions & 1 deletion jobs/webcompat-kb/webcompat_kb/base.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import argparse
import logging
import re
import pathlib
import os
import sys
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, MutableMapping
from typing import Any, MutableMapping, Optional

from .bqhelpers import BigQuery, SchemaId
from .config import Config
Expand Down Expand Up @@ -39,6 +41,80 @@ def dataset_arg(value: str) -> str:
return value


class Command(ABC):
def argument_parser(self) -> argparse.ArgumentParser:
parser = argparse.ArgumentParser()
parser.add_argument(
"--log-level",
choices=["debug", "info", "warn", "error"],
default="info",
help="Log level",
)

parser.add_argument(
"--bq-project",
dest="bq_project_id",
type=project_arg,
help="BigQuery project id",
)

parser.add_argument(
"--data-path",
action="store",
type=pathlib.Path,
default=DEFAULT_DATA_DIR,
help="Path to directory containing sql to deploy",
)

parser.add_argument(
"--stage",
action="store_true",
help="Write to staging location (currently same project with _test suffix on dataset names)",
)

parser.add_argument(
"--no-write",
dest="write",
action="store_false",
default=True,
help="Don't write updates to BigQuery",
)

parser.add_argument(
"--github-token",
default=os.environ.get("GH_TOKEN"),
help="GitHub token",
)

parser.add_argument(
"--pdb", action="store_true", help="Drop into debugger on execption"
)
return parser

@abstractmethod
def main(self, args: argparse.Namespace) -> Optional[int]: ...

def __call__(self) -> None:
parser = self.argument_parser()
args = parser.parse_args()

logging.basicConfig()
log_level = args.log_level.upper() if "log_level" in args else "INFO"
logging.getLogger().setLevel(logging.getLevelNamesMapping()[log_level])

try:
rv = self.main(args)
except Exception:
if "pdb" in args and args.pdb:
import pdb

pdb.post_mortem()
else:
raise
if rv:
sys.exit(rv)


@dataclass
class Context:
args: argparse.Namespace
Expand Down
104 changes: 104 additions & 0 deletions jobs/webcompat-kb/webcompat_kb/commands/backfill_metric.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import argparse
import logging
import os
from typing import Optional

from .. import projectdata
from ..base import Command
from ..bqhelpers import BigQuery, DatasetId, get_client
from ..config import Config
from ..projectdata import Project


def backfill_metric_daily(
project: Project,
client: BigQuery,
write: bool,
metric_name: str,
) -> None:
metric_dfns, metric_types = project.data.metric_dfns, project.data.metric_types
daily_metric_types = [
metric_type for metric_type in metric_types if "daily" in metric_type.contexts
]

metric = None
for metric in metric_dfns:
if metric.name == metric_name:
break
else:
raise ValueError(f"Metric named {metric_name} not found")

select_fields = []
field_names = []
conditions = []
for metric_type in daily_metric_types:
field_name = f"{metric_type.name}_{metric.name}"
field_names.append(field_name)
select_fields.append(
f"{metric_type.agg_function('bugs', metric)} AS {field_name}"
)
conditions.append(f"metric_daily.{field_name} IS NULL")
select_query = f"""
SELECT
date,
{",\n ".join(select_fields)}
FROM
`{project["webcompat_knowledge_base"]["scored_site_reports"]}` AS bugs
JOIN `{project["webcompat_knowledge_base"]["webcompat_topline_metric_daily"]}` as metric_daily
ON
DATE(bugs.creation_time) <= metric_daily.date
AND IF (bugs.resolved_time IS NOT NULL, DATE(bugs.resolved_time) >= date, TRUE)
WHERE
{metric.condition("bugs")} AND {" AND ".join(conditions)}
GROUP BY
date
ORDER BY date"""

update_query = f"""
UPDATE `{project["webcompat_knowledge_base"]["webcompat_topline_metric_daily"]}` AS metric_daily
SET
{",\n ".join(f"metric_daily.{field_name}=new_data.{field_name}" for field_name in field_names)}
FROM ({select_query}) AS new_data
WHERE new_data.date = metric_daily.date
"""

if write:
result = client.query(update_query)
else:
logging.info(f"Would run query:\n{update_query}")
result = client.query(select_query)
logging.info(f"Would set {list(result)}")


class BackfillMetric(Command):
def argument_parser(self) -> argparse.ArgumentParser:
parser = super().argument_parser()
parser.add_argument("metric", action="store", help="Metric name to update")
return parser

def main(self, args: argparse.Namespace) -> Optional[int]:
client = get_client(args.bq_project_id)
config = Config(write=args.write, stage=args.stage)
project = projectdata.load(
client, args.bq_project_id, os.path.normpath(args.data_path), set(), config
)
if args.metric not in project.data.metric_dfns:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should it check metric.name here? something like:
if args.metric not in [m.name for m in project.data.metric_dfns]

raise ValueError(f"Unknown metric {args.metric}")

bq_client = BigQuery(
client,
DatasetId(args.bq_project_id, "webcompat_knowledge_base"),
args.write,
None,
)

backfill_metric_daily(
project,
bq_client,
config.write,
args.metric,
)
return None


main = BackfillMetric()
40 changes: 13 additions & 27 deletions jobs/webcompat-kb/webcompat_kb/commands/checkdata.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import argparse
import logging
import os
import pathlib
import sys
from typing import Optional

from .. import projectdata
from ..base import ALL_JOBS, DEFAULT_DATA_DIR
from ..base import ALL_JOBS, Command
from ..bqhelpers import get_client
from ..config import Config
from ..projectdata import lint_templates
Expand All @@ -15,26 +14,15 @@
here = os.path.dirname(__file__)


def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--bq-project-id", action="store", help="BigQuery project ID")
parser.add_argument("--pdb", action="store_true", help="Run debugger on failure")
parser.add_argument(
"--path",
action="store",
type=pathlib.Path,
default=DEFAULT_DATA_DIR,
help="Path to directory containing data",
)
try:
class CheckData(Command):
def main(self, args: argparse.Namespace) -> Optional[int]:
# This should be unused
client = get_client("test")
args = parser.parse_args()

project = projectdata.load(
client,
args.bq_project_id,
os.path.normpath(args.path),
os.path.normpath(args.data_path),
set(),
Config(write=False, stage=False),
)
Expand All @@ -43,17 +31,15 @@ def main() -> None:
project.data.templates_by_dataset.values(),
):
logging.error("Lint failed")
sys.exit(1)
return 1

try:
creator = SchemaCreator(project)
creator.create()
except Exception:
logging.error("Creating schemas failed")
raise
except Exception:
if args.pdb:
import pdb

pdb.post_mortem()
raise
except Exception as e:
logging.error(f"Creating schemas failed: {e}")
return 1
return None


main = CheckData()
59 changes: 25 additions & 34 deletions jobs/webcompat-kb/webcompat_kb/commands/validate.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,35 @@
import argparse
import os
import pathlib
import sys
from typing import Optional

from google.auth.exceptions import RefreshError


from .. import projectdata
from ..base import DEFAULT_DATA_DIR
from ..base import Command
from ..config import Config
from ..bqhelpers import BigQuery, DatasetId, SchemaId, SchemaType, get_client
from ..update_schema import render_schemas


def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--bq-project-id", action="store", help="BigQuery project ID")
parser.add_argument(
"--default-dataset",
action="store",
default="webcompat_knowledge_base",
help="Default dataset name",
)
parser.add_argument("--pdb", action="store_true", help="Run debugger on failure")
parser.add_argument(
"--data-path",
action="store",
type=pathlib.Path,
default=DEFAULT_DATA_DIR,
help="Path to directory containing data",
)
parser.add_argument(
"schema_ids",
action="store",
nargs="*",
help="Schemas to render e.g. dataset.view_name",
)
try:
args = parser.parse_args()
class Validate(Command):
def argument_parser(self) -> argparse.ArgumentParser:
parser = super().argument_parser()
parser.add_argument(
"--default-dataset",
action="store",
default="webcompat_knowledge_base",
help="Default dataset name",
)
parser.add_argument(
"schema_ids",
action="store",
nargs="*",
help="Schemas to render e.g. dataset.view_name",
)
return parser

def main(self, args: argparse.Namespace) -> Optional[int]:
client = get_client(args.bq_project_id)
project = projectdata.load(
client,
Expand Down Expand Up @@ -91,10 +83,9 @@ def main() -> None:
else:
print(" Validation succeeded")
if not success:
sys.exit(1)
except Exception:
if args.pdb:
import pdb
return 1

return None


pdb.post_mortem()
raise
main = Validate()
Loading