Skip to content

Commit fa3f299

Browse files
committed
fix: increase log limits (#2646)
<!-- Please make sure there is an issue that this PR is correlated to. --> ## Changes <!-- If there are frontend changes, please include screenshots. -->
1 parent c0a9c3b commit fa3f299

File tree

5 files changed

+77
-31
lines changed

5 files changed

+77
-31
lines changed

Cargo.toml

Lines changed: 1 addition & 4 deletions
Large diffs are not rendered by default.

packages/edge/infra/client/container-runner/src/container.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ fn ship_logs(
151151
// spike of logs does not exhaust the long rate limit.
152152
//
153153
// 64 logs/s
154-
let mut throttle_short = throttle::Throttle::new(960, Duration::from_secs(15));
154+
let mut throttle_short = throttle::Throttle::new(960 * 1024, Duration::from_secs(15));
155155

156156
// Reduces logs from noisy games. Set reasonable caps on how
157157
// much can be logged per minute. This is here to prevent games
@@ -160,10 +160,10 @@ fn ship_logs(
160160
// amounts of logging. This happens very frequently.
161161
//
162162
// 4 logs/s * 1024 bytes/log = 4096 bytes/lobby/s = 14.7 MB/lobby/hr = 353.8 MB/lobby/day = 10.6 GB/lobby/month
163-
let mut throttle_long = throttle::Throttle::new(1200, Duration::from_secs(300));
163+
let mut throttle_long = throttle::Throttle::new(1200 * 1024, Duration::from_secs(300));
164164

165165
// Throttles error logs
166-
let mut throttle_error = throttle::Throttle::new(1, Duration::from_secs(60));
166+
let mut throttle_error = throttle::Throttle::new(1 * 1024, Duration::from_secs(60));
167167

168168
// How many lines have been logged as a preview, see `MAX_PREVIEW_LINES`
169169
let mut preview_iine_count = 0;

packages/edge/infra/client/manager/src/image_download_handler.rs

Lines changed: 70 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -99,28 +99,28 @@ impl ImageDownloadHandler {
9999
let mut conn = ctx.sql().await?;
100100
let mut tx = conn.begin().await?;
101101

102-
// Get total size of images directory. Note that it doesn't matter if this doesn't
103-
// match the actual fs size because it should either be exactly at or below actual fs
104-
// size. Also calculating fs size manually is expensive.
105-
let (cache_count, images_dir_size) = sqlx::query_as::<_, (i64, i64)>(indoc!(
106-
"
107-
SELECT COUNT(size), COALESCE(SUM(size), 0) FROM images_cache
108-
",
109-
))
110-
.fetch_one(&mut *tx)
111-
.await
112-
.map_err(Into::<anyhow::Error>::into)?;
102+
let ((cache_count, images_dir_size), image_download_size) = tokio::try_join!(
103+
async {
104+
// Get total size of images directory. Note that it doesn't matter if this doesn't
105+
// match the actual fs size because it should either be exactly at or below actual fs
106+
// size. Also calculating fs size manually is expensive.
107+
sqlx::query_as::<_, (i64, i64)>(indoc!(
108+
"
109+
SELECT COUNT(size), COALESCE(SUM(size), 0) FROM images_cache
110+
",
111+
))
112+
.fetch_one(&mut *tx)
113+
.await
114+
.map_err(Into::<anyhow::Error>::into)
115+
},
116+
// NOTE: The image size here is somewhat misleading because its only the size of the
117+
// downloaded archive and not the total disk usage after it is unpacked. However, this is
118+
// good enough
119+
self.fetch_image_download_size(ctx, image_config),
120+
)?;
113121

114122
// Prune images
115-
//
116-
// HACK: The artifact_size_bytes here is somewhat misleading because its only the size of the
117-
// downloaded archive and not the total disk usage after it is unpacked. However, this is size
118-
// is recalculated later once decompressed, so this will only ever exceed the cache
119-
// size limit in edge cases by `actual size - compressed size`. In this situation,
120-
// that extra difference is already reserved on the file system by the actor
121-
// itself.
122-
let (removed_count, removed_bytes) = if images_dir_size as u64
123-
+ image_config.artifact_size_bytes
123+
let (removed_count, removed_bytes) = if images_dir_size as u64 + image_download_size
124124
> ctx.config().images.max_cache_size()
125125
{
126126
// Fetch as many images as it takes to clear up enough space for this new image.
@@ -157,7 +157,7 @@ impl ImageDownloadHandler {
157157
.bind(image_config.id)
158158
.bind(
159159
(images_dir_size as u64)
160-
.saturating_add(image_config.artifact_size_bytes)
160+
.saturating_add(image_download_size)
161161
.saturating_sub(ctx.config().images.max_cache_size()) as i64,
162162
)
163163
.fetch_all(&mut *tx)
@@ -202,7 +202,7 @@ impl ImageDownloadHandler {
202202

203203
metrics::IMAGE_CACHE_COUNT.set(cache_count + 1 - removed_count);
204204
metrics::IMAGE_CACHE_SIZE
205-
.set(images_dir_size + image_config.artifact_size_bytes as i64 - removed_bytes);
205+
.set(images_dir_size + image_download_size as i64 - removed_bytes);
206206

207207
sqlx::query(indoc!(
208208
"
@@ -230,7 +230,7 @@ impl ImageDownloadHandler {
230230
metrics::IMAGE_CACHE_SIZE.set(images_dir_size + image_size as i64 - removed_bytes);
231231

232232
// Update state to signify download completed successfully
233-
sqlx::query(indoc!(
233+
let foo = sqlx::query(indoc!(
234234
"
235235
UPDATE images_cache
236236
SET
@@ -487,4 +487,51 @@ impl ImageDownloadHandler {
487487

488488
Ok(addresses)
489489
}
490+
491+
/// Attempts to fetch HEAD for the image download url and determine the image's download size.
492+
async fn fetch_image_download_size(
493+
&self,
494+
ctx: &Ctx,
495+
image_config: &protocol::Image,
496+
) -> Result<u64> {
497+
let addresses = self.get_image_addresses(ctx, image_config).await?;
498+
499+
let mut iter = addresses.into_iter();
500+
while let Some(artifact_url) = iter.next() {
501+
// Log the full URL we're attempting to download from
502+
tracing::info!(image_id=?image_config.id, %artifact_url, "attempting to download image");
503+
504+
match reqwest::Client::new()
505+
.head(&artifact_url)
506+
.send()
507+
.await
508+
.and_then(|res| res.error_for_status())
509+
{
510+
Ok(res) => {
511+
tracing::info!(image_id=?image_config.id, %artifact_url, "successfully fetched image HEAD");
512+
513+
// Read Content-Length header from response
514+
let image_size = res
515+
.headers()
516+
.get(reqwest::header::CONTENT_LENGTH)
517+
.context("no Content-Length header")?
518+
.to_str()?
519+
.parse::<u64>()
520+
.context("invalid Content-Length header")?;
521+
522+
return Ok(image_size);
523+
}
524+
Err(err) => {
525+
tracing::warn!(
526+
image_id=?image_config.id,
527+
%artifact_url,
528+
%err,
529+
"failed to fetch image HEAD"
530+
);
531+
}
532+
}
533+
}
534+
535+
bail!("artifact url could not be resolved");
536+
}
490537
}

packages/edge/infra/client/manager/src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ async fn run(init: Init, first: bool) -> Result<()> {
193193
async { metrics_thread.await?.map_err(Into::into) },
194194
ctx.run(rx),
195195
)?;
196+
196197

197198
Ok(())
198199
}

packages/edge/services/pegboard/src/protocol.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ pub struct Image {
117117
pub artifact_url_stub: String,
118118
/// Direct S3 url to download the image from without ATS.
119119
pub fallback_artifact_url: Option<String>,
120-
/// Size in bytes of the artfiact.
120+
/// Size in bytes of the artifact.
121+
#[serde(default)]
121122
pub artifact_size_bytes: u64,
122123
pub kind: ImageKind,
123124
pub compression: ImageCompression,

0 commit comments

Comments
 (0)