Skip to content

Commit 748f488

Browse files
tlangsGhost-in-a-Jaraednicholsjgainerdewar
authored
ID-1276 Introduce Bard Service for sending metrics (#7434)
Co-authored-by: Tristan Garwood <[email protected]> Co-authored-by: Adam Nichols <[email protected]> Co-authored-by: Janet Gainer-Dewar <[email protected]>
1 parent ea67a13 commit 748f488

File tree

30 files changed

+720
-32
lines changed

30 files changed

+720
-32
lines changed

backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import wom.graph.CommandCallNode
1414

1515
import scala.concurrent.ExecutionContext
1616

17-
trait BackendLifecycleActorFactory {
17+
trait BackendLifecycleActorFactory extends PlatformSpecific {
1818

1919
/**
2020
* Name of the backend.
@@ -166,7 +166,7 @@ trait BackendLifecycleActorFactory {
166166
/**
167167
* Allows Cromwell to self-identify which cloud it's running on for runtime attribute purposes
168168
*/
169-
def platform: Option[Platform] = None
169+
override def platform: Option[Platform] = None
170170
}
171171

172172
object BackendLifecycleActorFactory {

backend/src/main/scala/cromwell/backend/backend.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,3 +178,18 @@ object Azure extends Platform {
178178
object Aws extends Platform {
179179
override def runtimeKey: String = "aws"
180180
}
181+
trait PlatformSpecific {
182+
def platform: Option[Platform]
183+
184+
}
185+
trait GcpPlatform extends PlatformSpecific {
186+
override val platform: Option[Platform] = Option(Gcp)
187+
}
188+
189+
trait AzurePlatform extends PlatformSpecific {
190+
override val platform: Option[Platform] = Option(Azure)
191+
}
192+
193+
trait AwsPlatform extends PlatformSpecific {
194+
override val platform: Option[Platform] = Option(Aws)
195+
}

backend/src/main/scala/cromwell/backend/dummy/DummyAsyncExecutionActor.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ class DummyAsyncExecutionActor(override val standardParams: StandardAsyncExecuti
3131
with StandardAsyncExecutionActor
3232
with CromwellInstrumentation {
3333

34+
override def platform: Option[Nothing] = None
35+
3436
/** The type of the run info when a job is started. */
3537
override type StandardAsyncRunInfo = String
3638

backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,23 @@ import cromwell.core.path.Path
3131
import cromwell.services.keyvalue.KeyValueServiceActor._
3232
import cromwell.services.keyvalue.KvClient
3333
import cromwell.services.metadata.CallMetadataKeys
34+
import cromwell.services.metrics.bard.BardEventing.BardEventRequest
35+
import cromwell.services.metrics.bard.model.TaskSummaryEvent
3436
import eu.timepit.refined.refineV
3537
import mouse.all._
3638
import net.ceedubs.ficus.Ficus._
3739
import org.apache.commons.lang3.StringUtils
3840
import org.apache.commons.lang3.exception.ExceptionUtils
3941
import shapeless.Coproduct
42+
import wdl4s.parser.MemoryUnit
4043
import wom.callable.{AdHocValue, CommandTaskDefinition, ContainerizedInputExpression}
4144
import wom.expression.WomExpression
4245
import wom.graph.LocalName
4346
import wom.values._
4447
import wom.{CommandSetupSideEffectFile, InstantiatedCommand, WomFileMapper}
4548

49+
import java.time.OffsetDateTime
50+
import java.time.temporal.ChronoUnit
4651
import java.io.IOException
4752
import scala.concurrent._
4853
import scala.concurrent.duration._
@@ -70,6 +75,8 @@ case class DefaultStandardAsyncExecutionActorParams(
7075
// Override to `false` when we need the script to set an environment variable in the parent shell.
7176
case class ScriptPreambleData(bashString: String, executeInSubshell: Boolean = true)
7277

78+
case class StartAndEndTimes(jobStart: OffsetDateTime, cpuStart: Option[OffsetDateTime], jobEnd: OffsetDateTime)
79+
7380
/**
7481
* An extension of the generic AsyncBackendJobExecutionActor providing a standard abstract implementation of an
7582
* asynchronous polling backend.
@@ -85,7 +92,8 @@ trait StandardAsyncExecutionActor
8592
with StandardCachingActorHelper
8693
with AsyncIoActorClient
8794
with KvClient
88-
with SlowJobWarning {
95+
with SlowJobWarning
96+
with PlatformSpecific {
8997
this: Actor with ActorLogging with BackendJobLifecycleActor =>
9098

9199
override lazy val ioCommandBuilder: IoCommandBuilder = DefaultIoCommandBuilder
@@ -894,6 +902,14 @@ trait StandardAsyncExecutionActor
894902
*/
895903
def getTerminalEvents(runStatus: StandardAsyncRunState): Seq[ExecutionEvent] = Seq.empty
896904

905+
/**
906+
* Get the min and max event times from a terminal run status
907+
*
908+
* @param runStatus The terminal run status, as defined by isTerminal.
909+
* @return The min and max event times, if events exist.
910+
*/
911+
def getStartAndEndTimes(runStatus: StandardAsyncRunState): Option[StartAndEndTimes] = None
912+
897913
/**
898914
* Returns true if the status represents a completion.
899915
*
@@ -1316,6 +1332,7 @@ trait StandardAsyncExecutionActor
13161332
val metadata = getTerminalMetadata(state)
13171333
onTaskComplete(state, oldHandle)
13181334
tellMetadata(metadata)
1335+
tellBard(state)
13191336
handleExecutionResult(state, oldHandle)
13201337
case s =>
13211338
Future.successful(
@@ -1509,6 +1526,40 @@ trait StandardAsyncExecutionActor
15091526
serviceRegistryActor.putMetadata(jobDescriptor.workflowDescriptor.id, Option(jobDescriptor.key), metadataKeyValues)
15101527
}
15111528

1529+
def tellBard(state: StandardAsyncRunState): Unit =
1530+
getStartAndEndTimes(state) match {
1531+
case Some(startAndEndTimes: StartAndEndTimes) =>
1532+
val dockerImage =
1533+
RuntimeAttributesValidation.extractOption(DockerValidation.instance, validatedRuntimeAttributes)
1534+
val cpus = RuntimeAttributesValidation.extract(CpuValidation.instance, validatedRuntimeAttributes).value
1535+
val memory = RuntimeAttributesValidation
1536+
.extract(MemoryValidation.instance(), validatedRuntimeAttributes)
1537+
.to(MemoryUnit.Bytes)
1538+
.amount
1539+
serviceRegistryActor ! BardEventRequest(
1540+
TaskSummaryEvent(
1541+
workflowDescriptor.id.id,
1542+
workflowDescriptor.possibleParentWorkflowId.map(_.id),
1543+
workflowDescriptor.rootWorkflowId.id,
1544+
jobDescriptor.key.tag,
1545+
jobDescriptor.key.call.fullyQualifiedName,
1546+
jobDescriptor.key.index,
1547+
jobDescriptor.key.attempt,
1548+
state.getClass.getSimpleName,
1549+
platform.map(_.runtimeKey),
1550+
dockerImage,
1551+
cpus,
1552+
memory,
1553+
startAndEndTimes.jobStart.toString,
1554+
startAndEndTimes.cpuStart.map(_.toString),
1555+
startAndEndTimes.jobEnd.toString,
1556+
startAndEndTimes.jobStart.until(startAndEndTimes.jobEnd, ChronoUnit.SECONDS),
1557+
startAndEndTimes.cpuStart.map(_.until(startAndEndTimes.jobEnd, ChronoUnit.SECONDS))
1558+
)
1559+
)
1560+
case _ => ()
1561+
}
1562+
15121563
implicit override protected lazy val ec: ExecutionContextExecutor = context.dispatcher
15131564
}
15141565

build.sbt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ lazy val languageFactoryCore = (project in languageFactoryRoot / "language-facto
339339
.dependsOn(common % "test->test")
340340

341341
lazy val wdlDraft2LanguageFactory = (project in languageFactoryRoot / "wdl-draft2")
342-
.withLibrarySettings("wdl-draft2", draft2LanguageFactoryDependencies)
342+
.withLibrarySettings("wdl-draft2", mockServerDependencies)
343343
.dependsOn(languageFactoryCore)
344344
.dependsOn(common % "test->test")
345345
.dependsOn(wdlModelDraft2)

core/src/main/resources/reference.conf

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ akka {
3636
mailbox-type = "akka.dispatch.UnboundedControlAwareMailbox"
3737
}
3838

39+
bard-actor-mailbox {
40+
mailbox-type = "akka.dispatch.UnboundedControlAwareMailbox"
41+
}
42+
3943
dispatchers {
4044
# A dispatcher for actors performing blocking io operations
4145
# Prevents the whole system from being slowed down when waiting for responses from external resources for instance
@@ -582,6 +586,17 @@ services {
582586
# ecm.base-url = ""
583587
}
584588
}
589+
// Bard is used for metrics collection in the Terra SaaS offering and is not applicable outside of it.
590+
Bard {
591+
class = "cromwell.services.metrics.bard.impl.BardEventingActor"
592+
config {
593+
enabled = false
594+
bard {
595+
base-url = ""
596+
connection-pool-size = 0
597+
}
598+
}
599+
}
585600
}
586601

587602
include required(classpath("reference_database.inc.conf"))

project/Dependencies.scala

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ object Dependencies {
66
private val akkaV = "2.5.32" // scala-steward:off (CROM-6637)
77
private val ammoniteOpsV = "2.4.1"
88
private val apacheHttpClientV = "4.5.13"
9+
private val apacheHttpClient5V = "5.3.1"
910
private val awsSdkV = "2.17.265"
1011
// We would like to use the BOM to manage Azure SDK versions, but SBT doesn't support it.
1112
// https://github.com/Azure/azure-sdk-for-java/tree/main/sdk/boms/azure-sdk-bom
@@ -21,6 +22,7 @@ object Dependencies {
2122
private val azureAppInsightsLogbackV = "2.6.4"
2223
private val betterFilesV = "3.9.1"
2324
private val jsonSmartV = "2.4.10"
25+
private val bardClientV = "1.0.4"
2426
/*
2527
cats-effect, fs2, http4s, and sttp (also to v3) should all be upgraded at the same time to use cats-effect 3.x.
2628
*/
@@ -421,8 +423,8 @@ object Dependencies {
421423
exclude("org.apache.httpcomponents", "httpclient"),
422424
"org.broadinstitute.dsde.workbench" %% "workbench-google" % workbenchGoogleV
423425
exclude("com.google.apis", "google-api-services-genomics"),
424-
"org.apache.httpcomponents" % "httpclient" % apacheHttpClientV,
425-
"com.google.apis" % "google-api-services-cloudkms" % googleCloudKmsV
426+
"org.apache.httpcomponents.client5" % "httpclient5" % apacheHttpClient5V,
427+
"com.google.apis" % "google-api-services-cloudkms" % googleCloudKmsV
426428
exclude("com.google.guava", "guava-jdk5"),
427429
"org.glassfish.hk2.external" % "jakarta.inject" % jakartaInjectV,
428430
) ++ googleGenomicsV2Alpha1Dependency ++ googleLifeSciencesV2BetaDependency ++ googleBatchv1Dependency
@@ -529,7 +531,7 @@ object Dependencies {
529531
"jakarta.activation" % "jakarta.activation-api" % jakartaActivationV,
530532
)
531533

532-
val draft2LanguageFactoryDependencies = List(
534+
val mockServerDependencies = List(
533535
"org.mock-server" % "mockserver-netty" % mockserverNettyV % Test
534536
)
535537

@@ -593,7 +595,12 @@ object Dependencies {
593595
val servicesDependencies: List[ModuleID] = List(
594596
"com.google.api" % "gax-grpc" % googleGaxGrpcV,
595597
"org.apache.commons" % "commons-csv" % commonsCsvV,
596-
) ++ testDatabaseDependencies ++ akkaHttpDependencies
598+
"bio.terra" % "bard-client-resttemplate-javax" % bardClientV
599+
exclude("org.springframework", "spring-aop")
600+
exclude("org.springframework", "spring-jcl"),
601+
"org.apache.httpcomponents.client5" % "httpclient5" % apacheHttpClient5V // Needed for rest-template connection pooling
602+
603+
) ++ testDatabaseDependencies ++ akkaHttpDependencies ++ mockServerDependencies
597604

598605
val serverDependencies: List[ModuleID] = slf4jBindingDependencies
599606

@@ -664,7 +671,7 @@ object Dependencies {
664671
cromwellApiClientDependencies ++
665672
databaseMigrationDependencies ++
666673
databaseSqlDependencies ++
667-
draft2LanguageFactoryDependencies ++
674+
mockServerDependencies ++
668675
drsLocalizerDependencies ++
669676
engineDependencies ++
670677
gcsFileSystemDependencies ++
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package cromwell.services.metrics.bard
2+
3+
import com.typesafe.config.Config
4+
import net.ceedubs.ficus.Ficus._
5+
6+
final case class BardConfig(enabled: Boolean, baseUrl: String, connectionPoolSize: Int)
7+
8+
object BardConfig {
9+
def apply(config: Config): BardConfig = BardConfig(config.as[Boolean]("enabled"),
10+
config.as[String]("bard.base-url"),
11+
config.as[Int]("bard.connection-pool-size")
12+
)
13+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package cromwell.services.metrics.bard
2+
3+
import cromwell.services.ServiceRegistryActor.ServiceRegistryMessage
4+
import cromwell.services.metrics.bard.model.BardEvent
5+
6+
object BardEventing {
7+
sealed trait BardEventingMessage extends ServiceRegistryMessage {
8+
override def serviceName: String = "BardEventing"
9+
}
10+
11+
case class BardEventRequest(event: BardEvent) extends BardEventingMessage
12+
13+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package cromwell.services.metrics.bard
2+
3+
import akka.actor.ActorRef
4+
import bio.terra.bard.api.DefaultApi
5+
import bio.terra.bard.client.ApiClient
6+
import bio.terra.bard.model.EventsEventLogRequest
7+
import cats.data.NonEmptyList
8+
import com.typesafe.scalalogging.LazyLogging
9+
import cromwell.services.instrumentation.CromwellInstrumentation
10+
import cromwell.services.metrics.bard.model.BardEvent
11+
import org.apache.http.impl.client.HttpClients
12+
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager
13+
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory
14+
import org.springframework.web.client.RestTemplate
15+
16+
class BardService(bardUrl: String, connectionPoolSize: Int, serviceRegistry: ActorRef)
17+
extends LazyLogging
18+
with CromwellInstrumentation {
19+
20+
private val restTemplate = makeRestTemplateWithPooling
21+
private val client = getEventApi(restTemplate)
22+
private val appId = "cromwell"
23+
24+
override lazy val serviceRegistryActor: ActorRef = serviceRegistry
25+
26+
def sendEvent(event: BardEvent): Unit = {
27+
try {
28+
val eventLogRequest = new EventsEventLogRequest().properties(event.getProperties)
29+
client.eventsEventLog(event.eventName, appId, eventLogRequest)
30+
increment(NonEmptyList.of("send_event", "success"), Some("bard"))
31+
} catch {
32+
case e: Exception =>
33+
logger.error(s"Failed to send event to Bard: ${e.getMessage}", e)
34+
increment(NonEmptyList.of("send_event", "failure"), Some("bard"))
35+
}
36+
()
37+
}
38+
39+
private def getEventApi(restTemplate: RestTemplate): DefaultApi = {
40+
val bardClient = new ApiClient(restTemplate)
41+
bardClient.setBasePath(bardUrl)
42+
new DefaultApi(bardClient)
43+
}
44+
45+
/**
46+
* @return a new RestTemplate backed by a pooling connection manager
47+
*/
48+
private def makeRestTemplateWithPooling: RestTemplate = {
49+
val poolingConnManager = new PoolingHttpClientConnectionManager()
50+
poolingConnManager.setMaxTotal(connectionPoolSize)
51+
poolingConnManager.setDefaultMaxPerRoute(connectionPoolSize)
52+
val httpClient = HttpClients.custom.setConnectionManager(poolingConnManager).build
53+
val factory = new HttpComponentsClientHttpRequestFactory(httpClient)
54+
new RestTemplate(factory)
55+
}
56+
57+
}

0 commit comments

Comments
 (0)