From 7bebf5cbc8a1b2d9660488e32f4873cabc515141 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Sun, 4 Oct 2020 21:23:36 +0000 Subject: [PATCH 1/4] Collect build logs in a temporary file --- binderhub/builder.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/binderhub/builder.py b/binderhub/builder.py index bb727823d..173fac621 100644 --- a/binderhub/builder.py +++ b/binderhub/builder.py @@ -5,7 +5,9 @@ import hashlib from http.client import responses import json +import os import string +from tempfile import NamedTemporaryFile import time import escapism @@ -366,6 +368,7 @@ async def get(self, provider_prefix, _unescaped_spec): sticky_builds=self.settings['sticky_builds'], ) + templogfile = NamedTemporaryFile(mode='wt', delete=False) with BUILDS_INPROGRESS.track_inprogress(): build_starttime = time.perf_counter() pool = self.settings['build_pool'] @@ -419,9 +422,16 @@ async def get(self, provider_prefix, _unescaped_spec): failed = True BUILD_TIME.labels(status='failure').observe(time.perf_counter() - build_starttime) BUILD_COUNT.labels(status='failure', **self.repo_metric_labels).inc() - + templogfile.write(payload.get("message")) + app_log.debug(f'log: {payload.get("message")}') await self.emit(event) + templogfile.close() + app_log.debug(templogfile.name) + with open(templogfile.name) as f: + app_log.debug(f.read()) + os.remove(templogfile.name) + # Launch after building an image if not failed: BUILD_TIME.labels(status='success').observe(time.perf_counter() - build_starttime) From d984465ac85ef1d4e2bf52f33abed25d03bc92ad Mon Sep 17 00:00:00 2001 From: Simon Li Date: Sun, 4 Oct 2020 23:38:16 +0000 Subject: [PATCH 2/4] Upload repo2docker log to S3 object store --- binderhub/app.py | 11 +++++++++++ binderhub/builder.py | 31 +++++++++++++++++++++++++++++-- requirements.txt | 1 + 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/binderhub/app.py b/binderhub/app.py index c2404f62c..8c1bab134 100755 --- a/binderhub/app.py +++ b/binderhub/app.py @@ -457,6 +457,12 @@ def _add_slash(self, proposal): """ ) + s3_logs_endpoint = Unicode("", help="S3 endpoint", config=True) + s3_logs_access_key = Unicode("", help="S3 access key ", config=True) + s3_logs_secret_key = Unicode("", help="S3 secret key", config=True) + s3_logs_bucket = Unicode("", help="S3 bucket", config=True) + s3_logs_region = Unicode("", help="S3 region", config=True) + # FIXME: Come up with a better name for it? builder_required = Bool( True, @@ -626,6 +632,11 @@ def initialize(self, *args, **kwargs): "auth_enabled": self.auth_enabled, "event_log": self.event_log, "normalized_origin": self.normalized_origin, + "s3_logs_endpoint": self.s3_logs_endpoint, + "s3_logs_access_key": self.s3_logs_access_key, + "s3_logs_secret_key": self.s3_logs_secret_key, + "s3_logs_bucket": self.s3_logs_bucket, + "s3_logs_region": self.s3_logs_region, } ) if self.auth_enabled: diff --git a/binderhub/builder.py b/binderhub/builder.py index 173fac621..f95d9cb5f 100644 --- a/binderhub/builder.py +++ b/binderhub/builder.py @@ -2,6 +2,7 @@ Handlers for working with version control services (i.e. GitHub) for builds. """ +import aiobotocore import hashlib from http.client import responses import json @@ -428,8 +429,7 @@ async def get(self, provider_prefix, _unescaped_spec): templogfile.close() app_log.debug(templogfile.name) - with open(templogfile.name) as f: - app_log.debug(f.read()) + await self.upload_log(templogfile.name, 'repo2docker.log') os.remove(templogfile.name) # Launch after building an image @@ -456,6 +456,33 @@ async def get(self, provider_prefix, _unescaped_spec): # well-behaved clients will close connections after they receive the launch event. await gen.sleep(60) + async def upload_log(self, logfile, destname): + if (self.settings["s3_logs_endpoint"] and + self.settings["s3_logs_access_key"] and + self.settings["s3_logs_secret_key"] and + self.settings["s3_logs_bucket"] + ): + dest = f"buildlogs/{self.image_name}/{destname}" + app_log.info(f'Uploading log to %s/%s/%s', + self.settings["s3_logs_endpoint"], + self.settings["s3_logs_bucket"], + dest + ) + async with aiobotocore.get_session().create_client( + "s3", + endpoint_url=self.settings["s3_logs_endpoint"], + aws_access_key_id=self.settings["s3_logs_access_key"], + aws_secret_access_key=self.settings["s3_logs_secret_key"], + region_name=self.settings["s3_logs_region"], + ) as client: + with open(logfile, 'rb') as fh: + r = await client.put_object( + Bucket=self.settings["s3_logs_bucket"], + Key=dest, + ContentType='text/plain', + Body=fh) + return r + async def launch(self, kube, provider): """Ask JupyterHub to launch the image.""" # Load the spec-specific configuration if it has been overridden diff --git a/requirements.txt b/requirements.txt index b6c18199e..0979a162c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ kubernetes==9.0.* escapism traitlets +aiobotocore docker jinja2 prometheus_client From f90282a94d773ca98e6d2c324b05ce26a80009c5 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Tue, 6 Oct 2020 20:39:01 +0000 Subject: [PATCH 3/4] Use boto3 wrapped with async instead of aioboto* aioboto* requires compiled dependencies which may not be available as wheels --- binderhub/builder.py | 67 +++++++++++++++++++++++++++++++++----------- requirements.txt | 2 +- 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/binderhub/builder.py b/binderhub/builder.py index f95d9cb5f..6d047b531 100644 --- a/binderhub/builder.py +++ b/binderhub/builder.py @@ -2,7 +2,9 @@ Handlers for working with version control services (i.e. GitHub) for builds. """ -import aiobotocore +import boto3 +from botocore import UNSIGNED +from botocore.config import Config import hashlib from http.client import responses import json @@ -429,7 +431,12 @@ async def get(self, provider_prefix, _unescaped_spec): templogfile.close() app_log.debug(templogfile.name) - await self.upload_log(templogfile.name, 'repo2docker.log') + loglink = await self.upload_log(templogfile.name, 'repo2docker.log') + if loglink: + await self.emit({ + 'phase': 'built', + 'message': f'Build log: {loglink}\n', + }) os.remove(templogfile.name) # Launch after building an image @@ -456,6 +463,39 @@ async def get(self, provider_prefix, _unescaped_spec): # well-behaved clients will close connections after they receive the launch event. await gen.sleep(60) + def _s3_upload(self, logfile, destkey): + s3 = boto3.resource( + "s3", + endpoint_url=self.settings["s3_logs_endpoint"], + aws_access_key_id=self.settings["s3_logs_access_key"], + aws_secret_access_key=self.settings["s3_logs_secret_key"], + region_name=self.settings["s3_logs_region"], + config=Config(signature_version="s3v4"), + ) + s3.Bucket(self.settings["s3_logs_bucket"]).upload_file( + logfile, + destkey, + ExtraArgs={"ContentType": "text/plain"}, + ) + # For simple S3 servers you can easily build the canonical URL + # For AWS S3 this can be more complicated depending on which region your bucket is in + # boto3 doesn't have a way to just get the public URL, so instead we create a pre-signed + # URL but discard the parameters since the object is public + s3unsigned = boto3.client( + "s3", + endpoint_url=self.settings["s3_logs_endpoint"], + aws_access_key_id=self.settings["s3_logs_access_key"], + aws_secret_access_key=self.settings["s3_logs_secret_key"], + region_name=self.settings["s3_logs_region"], + config=Config(UNSIGNED) + ) + link = s3unsigned.generate_presigned_url( + 'get_object', ExpiresIn=0, Params={ + 'Bucket': self.settings["s3_logs_bucket"], + 'Key': destkey, + }) + return link.split('?', 1)[0] + async def upload_log(self, logfile, destname): if (self.settings["s3_logs_endpoint"] and self.settings["s3_logs_access_key"] and @@ -463,25 +503,18 @@ async def upload_log(self, logfile, destname): self.settings["s3_logs_bucket"] ): dest = f"buildlogs/{self.image_name}/{destname}" - app_log.info(f'Uploading log to %s/%s/%s', + app_log.debug(f'Uploading log to %s/%s/%s', self.settings["s3_logs_endpoint"], self.settings["s3_logs_bucket"], dest ) - async with aiobotocore.get_session().create_client( - "s3", - endpoint_url=self.settings["s3_logs_endpoint"], - aws_access_key_id=self.settings["s3_logs_access_key"], - aws_secret_access_key=self.settings["s3_logs_secret_key"], - region_name=self.settings["s3_logs_region"], - ) as client: - with open(logfile, 'rb') as fh: - r = await client.put_object( - Bucket=self.settings["s3_logs_bucket"], - Key=dest, - ContentType='text/plain', - Body=fh) - return r + # Since we only need one method wrap the sync boto3 library instead of using + # aioboto3 which depends on some compiled dependencies + loop = IOLoop.current() + link = await loop.run_in_executor( + self.settings['executor'], self._s3_upload, logfile, dest) + app_log.info('Log is available at %s', link) + return link async def launch(self, kube, provider): """Ask JupyterHub to launch the image.""" diff --git a/requirements.txt b/requirements.txt index 0979a162c..98fcf423b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ kubernetes==9.0.* escapism traitlets -aiobotocore +boto3 docker jinja2 prometheus_client From 05165128a658af679e3e98b52300c1d7f44a6896 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Tue, 6 Oct 2020 20:41:18 +0000 Subject: [PATCH 4/4] builder: exit loop if failed This ensures logs are uploaded for failed builds --- binderhub/builder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/binderhub/builder.py b/binderhub/builder.py index 6d047b531..1d53989b6 100644 --- a/binderhub/builder.py +++ b/binderhub/builder.py @@ -425,6 +425,7 @@ async def get(self, provider_prefix, _unescaped_spec): failed = True BUILD_TIME.labels(status='failure').observe(time.perf_counter() - build_starttime) BUILD_COUNT.labels(status='failure', **self.repo_metric_labels).inc() + done = True templogfile.write(payload.get("message")) app_log.debug(f'log: {payload.get("message")}') await self.emit(event)