Skip to content

Commit ee28a87

Browse files
authored
Build state tracker (#2074)
Define a SQLAlchemy DB class "Dataset" which defines persistent state for a Pbench dataset including most importantly the dataset's username (which is not otherwise recorded until we index) and the dataset's current state so that we can track the progress of a dataset through the Pbench server pipeline. We also support Metadata associated with each Dataset, describing additional information about datasets beyond the "state". For example, the backup component will mark a dataset as "ARCHIVED" and pbench-reindex marks the selected datasets to "REINDEX". A new dataset starts in UPLOADING state and will progress through the steps as we perform operations on it. We're defining both "in progress" -ing steps such as INDEXING as well as completion steps (which are ready for the next operation) such as INDEXED. There are also two "terminal" states, EXPIRED and QUARANTINED, from which a dataset cannot exit.
1 parent 6f41da4 commit ee28a87

File tree

116 files changed

+1801
-34
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+1801
-34
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ build
77
dist
88
*~
99
????-*.patch
10+
.env
1011
.npmrc
1112
.yarnrc
1213
*.egg-info

jenkins/development.Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ RUN \
9797
rsync \
9898
screen \
9999
sos \
100+
sqlite \
100101
tar \
101102
xz \
102103
&& \

lib/pbench/cli/server/shell.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@
44
import sys
55

66
from configparser import NoSectionError, NoOptionError
7+
from sqlalchemy_utils import database_exists, create_database
78

89
from pbench.common.exceptions import BadConfig, ConfigFileNotSpecified
910
from pbench.server.api import create_app, get_server_config
11+
from pbench.server.database.database import Database
12+
from pbench.common.logger import get_pbench_logger
1013

1114

1215
def app():
@@ -24,12 +27,24 @@ def main():
2427
except (ConfigFileNotSpecified, BadConfig) as e:
2528
print(e)
2629
sys.exit(1)
30+
logger = get_pbench_logger(__name__, server_config)
2731
try:
2832
host = str(server_config.get("pbench-server", "bind_host"))
2933
port = str(server_config.get("pbench-server", "bind_port"))
34+
db = str(server_config.get("Postgres", "db_uri"))
3035
workers = str(server_config.get("pbench-server", "workers"))
31-
except (NoOptionError, NoSectionError) as e:
32-
print(f"{__name__}: ERROR: {e.__traceback__}")
36+
37+
# Multiple gunicorn workers will attempt to connect to the DB; rather
38+
# than attempt to synchronize them, detect a missing DB (from the
39+
# postgres URI) and create it here. It's safer to do this here,
40+
# where we're single-threaded.
41+
if not database_exists(db):
42+
logger.info("Postgres DB {} doesn't exist", db)
43+
create_database(db)
44+
logger.info("Created DB {}", db)
45+
Database.init_db(server_config, logger)
46+
except (NoOptionError, NoSectionError):
47+
logger.exception(f"{__name__}: ERROR")
3348
sys.exit(1)
3449

3550
subprocess.run(

lib/pbench/server/api/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from pbench.server.database.database import Database
2222
from pbench.server.api.resources.query_apis.query_month_indices import QueryMonthIndices
2323
from pbench.server.api.auth import Auth
24-
2524
from pbench.server.api.resources.users_api import (
2625
RegisterUser,
2726
Login,

lib/pbench/server/api/resources/__init__.py

Whitespace-only changes.

lib/pbench/server/api/resources/upload_api.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,14 @@
33
import tempfile
44
import hashlib
55
from pathlib import Path
6+
from http import HTTPStatus
7+
68
from flask_restful import Resource, abort
79
from flask import request, jsonify
810
from werkzeug.utils import secure_filename
911
from pbench.server.utils import filesize_bytes
12+
from pbench.server.database.models.tracker import Dataset, States
13+
1014

1115
ALLOWED_EXTENSIONS = {"xz"}
1216

@@ -109,6 +113,20 @@ def put(self, controller):
109113
md5_full_path = Path(path, f"{filename}.md5")
110114
bytes_received = 0
111115

116+
# TODO: Need real user from PUT!
117+
118+
# Create a tracking dataset object; it'll begin in UPLOADING state
119+
try:
120+
dataset = Dataset(controller=controller, path=tar_full_path, md5=md5sum)
121+
dataset.add()
122+
except Exception:
123+
self.logger.exception("unable to create dataset for {}", filename)
124+
abort(
125+
HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR",
126+
)
127+
128+
self.logger.info("Uploading file {} to {}", filename, dataset)
129+
112130
with tempfile.NamedTemporaryFile(mode="wb", dir=path) as ofp:
113131
chunk_size = 4096
114132
self.logger.debug("Writing chunks")
@@ -181,6 +199,11 @@ def put(self, controller):
181199
)
182200
raise
183201

202+
try:
203+
dataset.advance(States.UPLOADED)
204+
except Exception:
205+
self.logger.exception("Unable to finalize {}", dataset)
206+
abort(HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR")
184207
response = jsonify(dict(message="File successfully uploaded"))
185208
response.status_code = 201
186209
return response

lib/pbench/server/api/resources/users_api.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,13 +92,13 @@ def post(self):
9292

9393
first_name = user_data.get("first_name")
9494
if not first_name:
95-
self.logger.warning("Missing firstName field")
96-
abort(400, message="Missing firstName field")
95+
self.logger.warning("Missing first_name field")
96+
abort(400, message="Missing first_name field")
9797

9898
last_name = user_data.get("last_name")
9999
if not last_name:
100-
self.logger.warning("Missing lastName field")
101-
abort(400, message="Missing lastName field")
100+
self.logger.warning("Missing last_name field")
101+
abort(400, message="Missing last_name field")
102102

103103
try:
104104
user = User(
@@ -325,8 +325,8 @@ def get(self, username):
325325
"message": "Success"/"failure message",
326326
"data": {
327327
"username": <username>,
328-
"firstName": <firstName>,
329-
"lastName": <lastName>,
328+
"first_name": <firstName>,
329+
"last_name": <lastName>,
330330
"registered_on": registered_on,
331331
}
332332
}

lib/pbench/server/database/database.py

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
class Database:
88
# Create declarative base model that our model can inherit from
99
Base = declarative_base()
10-
# Initialize the db scoped session
11-
db_session = scoped_session(sessionmaker(autocommit=False, autoflush=False))
1210

1311
@staticmethod
1412
def get_engine_uri(config, logger):
@@ -21,24 +19,20 @@ def get_engine_uri(config, logger):
2119
)
2220
return None
2321

24-
# return f"postgresql+{psql_driver}://{psql_username}:{psql_password}@{psql_host}:{psql_port}/{psql_db}"
25-
26-
@staticmethod
27-
def init_engine(server_config, logger):
28-
try:
29-
return create_engine(Database.get_engine_uri(server_config, logger))
30-
except Exception:
31-
logger.exception("Exception while creating a sqlalchemy engine")
32-
return None
33-
3422
@staticmethod
3523
def init_db(server_config, logger):
24+
# Attach the logger to the base class for models to find
25+
if not hasattr(Database.Base, "logger"):
26+
Database.Base.logger = logger
27+
28+
# WARNING:
3629
# Make sure all the models are imported before this function gets called
3730
# so that they will be registered properly on the metadata. Otherwise
3831
# metadata will not have any tables and create_all functionality will do nothing
3932

40-
Database.Base.query = Database.db_session.query_property()
41-
4233
engine = create_engine(Database.get_engine_uri(server_config, logger))
4334
Database.Base.metadata.create_all(bind=engine)
44-
Database.db_session.configure(bind=engine)
35+
Database.db_session = scoped_session(
36+
sessionmaker(bind=engine, autocommit=False, autoflush=False)
37+
)
38+
Database.Base.query = Database.db_session.query_property()

0 commit comments

Comments
 (0)