Skip to content

Commit ea98b81

Browse files
authored
Merge pull request #5975 from IntersectMBO/baldurb/ekg-restart
cardano-tracer: Allow switching EKG service between different nodes.
2 parents c13177f + 7ba25c5 commit ea98b81

File tree

19 files changed

+431
-410
lines changed

19 files changed

+431
-410
lines changed

cabal.project

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ package cryptonite
4848
flags: -support_rdrand
4949

5050
package snap-server
51-
flags: +openssl
51+
flags: -openssl
5252

5353
package bitvec
5454
flags: -simd
@@ -62,8 +62,8 @@ constraints:
6262

6363
allow-newer:
6464
, katip:Win32
65+
, ekg-wai:time
6566

6667
-- IMPORTANT
6768
-- Do NOT add more source-repository-package stanzas here unless they are strictly
6869
-- temporary! Please read the section in CONTRIBUTING about updating dependencies.
69-

cardano-tracer/CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
# ChangeLog
22

3+
## 0.3 (September 20, 2024)
4+
5+
* Abondon `snap` webserver in favour of `wai`/`warp` for Prometheus and EKG Monitoring.
6+
* Add dynamic routing to EKG stores of all connected nodes.
7+
* Derive URL compliant routes from connected node names (instead of plain node names).
8+
* Remove the requirement of two distinct ports for the EKG backend (changing `hasEKG` config type).
9+
* For optional RTView component only: Disable SSL/https connections. Force `snap-server`
10+
dependency to build with `-flag -openssl`.
11+
* Add JSON responses when listing connected nodes for both Prometheus and EKG Monitoring.
12+
* Add consistency check for redundant port values in the config.
13+
314
## 0.2.4 (August 13, 2024)
415

516
* `systemd` is enabled by default. To disable it use the cabal

cardano-tracer/cardano-tracer.cabal

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
cabal-version: 3.0
22

33
name: cardano-tracer
4-
version: 0.2.4
4+
version: 0.3
55
synopsis: A service for logging and monitoring over Cardano nodes
66
description: A service for logging and monitoring over Cardano nodes.
77
category: Cardano,
@@ -155,11 +155,12 @@ library
155155
cardano-git-rev ^>=0.2.2
156156
, cassava
157157
, threepenny-gui
158+
, utf8-string
158159
, vector
159160

160161
build-depends: aeson
161162
, async
162-
, async-extras
163+
, auto-update
163164
, bimap
164165
, blaze-html
165166
, bytestring
@@ -168,21 +169,20 @@ library
168169
, containers
169170
, contra-tracer
170171
, directory
171-
, ekg
172172
, ekg-core
173-
, ekg-forward ^>= 0.5
173+
, ekg-forward >= 0.5
174+
, ekg-wai
174175
, extra
175176
, filepath
177+
, http-types
176178
, mime-mail
177179
, optparse-applicative
178180
, ouroboros-network ^>= 0.17
179181
, ouroboros-network-api
180182
, ouroboros-network-framework
181183
, signal
184+
, slugify
182185
, smtp-mail ^>= 0.5
183-
, snap-blaze
184-
, snap-core
185-
, snap-server
186186
, stm
187187
, string-qq
188188
, text
@@ -191,6 +191,8 @@ library
191191
, trace-forward
192192
, trace-resources
193193
, unordered-containers
194+
, wai ^>= 3.2
195+
, warp ^>= 3.4
194196
, yaml
195197

196198
if flag(systemd) && os(linux)
@@ -281,8 +283,7 @@ library demo-acceptor-lib
281283

282284
exposed-modules: Cardano.Tracer.Test.Acceptor
283285

284-
build-depends: async-extras
285-
, bytestring
286+
build-depends: bytestring
286287
, cardano-tracer
287288
, containers
288289
, extra

cardano-tracer/configuration/complete-example.json

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,10 @@
66
},
77
"loRequestNum": 100,
88
"ekgRequestFreq": 2,
9-
"hasEKG": [
10-
{
11-
"epHost": "127.0.0.1",
12-
"epPort": 3100
13-
},
14-
{
15-
"epHost": "127.0.0.1",
16-
"epPort": 3101
17-
}
18-
],
9+
"hasEKG": {
10+
"epHost": "127.0.0.1",
11+
"epPort": 3100
12+
},
1913
"hasPrometheus": {
2014
"epHost": "127.0.0.1",
2115
"epPort": 3000

cardano-tracer/configuration/complete-example.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,8 @@ network:
77
loRequestNum: 100
88
ekgRequestFreq: 2
99
hasEKG:
10-
- epHost: 127.0.0.1
10+
epHost: 127.0.0.1
1111
epPort: 3100
12-
- epHost: 127.0.0.1
13-
epPort: 3101
1412
hasPrometheus:
1513
epHost: 127.0.0.1
1614
epPort: 3000

cardano-tracer/demo/multi/active-tracer-config.json

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,10 @@
88
"/run/user/1000/cardano-tracer-demo-3.sock"
99
]
1010
},
11-
"hasEKG": [
12-
{
11+
"hasEKG": {
1312
"epHost": "127.0.0.1",
1413
"epPort": 3100
15-
},
16-
{
17-
"epHost": "127.0.0.1",
18-
"epPort": 3101
19-
}
20-
],
14+
},
2115
"hasPrometheus": {
2216
"epHost": "127.0.0.1",
2317
"epPort": 3000

cardano-tracer/demo/multi/passive-tracer-config.json

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,10 @@
44
"tag": "AcceptAt",
55
"contents": "/run/user/1000/cardano-tracer-demo-1.sock"
66
},
7-
"hasEKG": [
8-
{
9-
"epHost": "127.0.0.1",
10-
"epPort": 3100
11-
},
12-
{
13-
"epHost": "127.0.0.1",
14-
"epPort": 3101
15-
}
16-
],
7+
"hasEKG": {
8+
"epHost": "127.0.0.1",
9+
"epPort": 3100
10+
},
1711
"hasPrometheus": {
1812
"epHost": "127.0.0.1",
1913
"epPort": 3000

cardano-tracer/docs/cardano-tracer.md

Lines changed: 100 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -337,72 +337,135 @@ The fields `rpMaxAgeMinutes`, `rpMaxAgeHours` specify the lifetime of the log fi
337337

338338
## Prometheus
339339

340-
The optional field `hasPrometheus` specifies the host and port of the web page with metrics. For example:
340+
At top-level route `/` Promtheus gives a list of connected nodes.
341+
342+
The responses are either human-readable names (HTML) with clickable
343+
links, or JSON mapping from connected node names to relative URLs,
344+
depending on desired content type (`Accept:` header of the request).
345+
346+
The routes dynamically depend on the connected nodes, the node names
347+
are [sluggified](https://hackage.haskell.org/package/slugify).
348+
349+
The optional field `hasPrometheus` specifies the host and port of the
350+
web page with Prometheus metrics. For example:
341351

342352
```
343353
"hasPrometheus": {
344354
"epHost": "127.0.0.1",
345-
"epPort": 3000
355+
"epPort": 3200
346356
}
347357
```
348358

349-
Here the web page is available at `http://127.0.0.1:3000`. Please note that if you skip this field, the web page will not be available.
359+
With this example, the list of clickable identifiers of connected
360+
nodes will be available at `http://127.0.0.1:3200`, such as:
361+
362+
```
363+
* 127.0.0.1:30004
364+
* 127.0.0.1:30001
365+
* 127.0.0.1:30005
366+
* 127.0.0.1:30000
367+
* 127.0.0.1:30003
368+
* 127.0.0.1:30002
369+
* TxGenerator
370+
```
371+
372+
Clicking an identifier will take you to its monitoring page. For
373+
example clicking on `127.0.0.1:30004` displays the monitoring metrics
374+
at `http://localhost:3200/12700130004`.
350375

351-
After you open `http://127.0.0.1:3000` in your browser, you will see the list of identifiers of connected nodes (or the warning message, if there are no connected nodes yet), for example:
376+
Sending a HTTP GET request with a JSON Accept header gives the metrics
377+
of the top-level route, or identifier as JSON. `jq '.'` pretty-prints
378+
the JSON object.
352379

353380
```
354-
* tmp-forwarder.sock@0
355-
* tmp-forwarder.sock@1
356-
* tmp-forwarder.sock@2
381+
$ curl --silent -H "Accept: application/json" '127.0.0.1:3200' | jq '.'
382+
{
383+
"127.0.0.1:30000": "/12700130000",
384+
"127.0.0.1:30001": "/12700130001",
385+
"127.0.0.1:30002": "/12700130002",
386+
"127.0.0.1:30003": "/12700130003",
387+
"127.0.0.1:30004": "/12700130004",
388+
"127.0.0.1:30005": "/12700130005",
389+
"TxGenerator": "/txgenerator"
390+
}
357391
```
358392

359-
Each identifier is a hyperlink to the page where you will see the **current** list of metrics received from the corresponding node, in such a format:
393+
The Promethus output is a map from Prometheus metric to value:
360394

361395
```
396+
$ curl '127.0.0.1:3200/12700130004'
397+
blockNum_int 35
398+
rts_gc_init_cpu_ms 5
362399
rts_gc_par_tot_bytes_copied 0
363-
rts_gc_num_gcs 2
364-
rts_gc_max_bytes_slop 15880
365-
rts_gc_num_bytes_usage_samples 1
366-
rts_gc_wall_ms 4005
367-
...
368-
rts_gc_par_max_bytes_copied 0
369-
rts_gc_mutator_cpu_ms 57
370-
rts_gc_mutator_wall_ms 4004
371-
rts_gc_gc_cpu_ms 1
372-
rts_gc_cumulative_bytes_used 184824
400+
served_block_counter 31
401+
submissions_accepted_counter 2771
402+
density_real 5.7692307692307696e-2
403+
blocksForged_int 6
404+
373405
```
374406

375407
## EKG Monitoring
376408

377-
The optional field `hasEKG` specifies the hosts and ports of two web pages:
409+
At top-level route `/` EKG gives a list of connected nodes.
410+
411+
The responses are either human-readable names (HTML) with clickable
412+
links, or JSON mapping from connected node names to relative URLs,
413+
depending on desired content type (`Accept:` header of the request).
378414

379-
1. the list of identifiers of connected nodes,
380-
2. EKG monitoring page.
415+
The routes dynamically depend on the connected nodes, the node names
416+
are [sluggified](https://hackage.haskell.org/package/slugify).
381417

382-
For example, if you use JSON configuration file:
418+
The optional field `hasEKG` specifies the host and port of the web
419+
page with EKG metrics. For example:
383420

384421
```
385-
"hasEKG": [
386-
{
387-
"epHost": "127.0.0.1",
388-
"epPort": 3100
389-
},
390-
{
391-
"epHost": "127.0.0.1",
392-
"epPort": 3101
393-
}
394-
]
422+
"hasEKG": {
423+
"epHost": "127.0.0.1",
424+
"epPort": 3100
425+
}
395426
```
396427

397-
The page with the list of identifiers of connected nodes will be available at `http://127.0.0.1:3100`, for example:
428+
With this example, the list of clickable identifiers of connected
429+
nodes will be available at `http://127.0.0.1:3100`, such as:
398430

399431
```
400-
* tmp-forwarder.sock@0
401-
* tmp-forwarder.sock@1
402-
* tmp-forwarder.sock@2
432+
* 127.0.0.1:30004
433+
* 127.0.0.1:30001
434+
* 127.0.0.1:30005
435+
* 127.0.0.1:30000
436+
* 127.0.0.1:30003
437+
* 127.0.0.1:30002
438+
* TxGenerator
403439
```
404440

405-
Each identifier is a hyperlink, after clicking to it you will be redirected to `http://127.0.0.1:3101` where you will see EKG monitoring page for corresponding node.
441+
Clicking an identifier will take you to its monitoring page. For
442+
example clicking on `127.0.0.1:30004` displays the monitoring metrics
443+
at `http://localhost:3100/12700130004`.
444+
445+
Sending a HTTP GET request with a JSON Accept header gives the metrics
446+
of an identifier as JSON. `jq '.'` pretty-prints the JSON object.
447+
448+
```
449+
$ curl --silent -H 'Accept: application/json' '127.0.0.1:3100/12700130004' | jq '.'
450+
{
451+
"ChainSync": {
452+
"HeadersServed_counter": {
453+
"type": "c",
454+
"val": 24
455+
}
456+
},
457+
"Mem": {
458+
"resident_int": {
459+
"type": "g",
460+
"val": 91877376
461+
}
462+
},
463+
"RTS": {
464+
"alloc_int": {
465+
"type": "g",
466+
"val": 1014189896
467+
},
468+
```
406469

407470
## Verbosity
408471

cardano-tracer/src/Cardano/Tracer/Acceptors/Utils.hs

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
{-# LANGUAGE NamedFieldPuns #-}
2-
#if RTVIEW
32
{-# LANGUAGE OverloadedStrings #-}
4-
#endif
3+
{-# LANGUAGE TupleSections #-}
54

65
module Cardano.Tracer.Acceptors.Utils
76
( prepareDataPointRequestor
@@ -26,6 +25,7 @@ import Control.Concurrent.STM.TVar (TVar, modifyTVar', newTVarIO)
2625
import qualified Data.Bimap as BM
2726
import qualified Data.Map.Strict as M
2827
import qualified Data.Set as S
28+
import Data.Time.Clock.POSIX (getPOSIXTime)
2929
#if RTVIEW
3030
import Data.Time.Clock.System (getSystemTime, systemToUTCTime)
3131
#endif
@@ -51,12 +51,26 @@ prepareMetricsStores
5151
-> IO (EKG.Store, TVar MetricsLocalStore)
5252
prepareMetricsStores TracerEnv{teConnectedNodes, teAcceptedMetrics} connId = do
5353
addConnectedNode teConnectedNodes connId
54-
storesForNewNode <- (,) <$> EKG.newStore
55-
<*> newTVarIO emptyMetricsLocalStore
56-
atomically $
57-
modifyTVar' teAcceptedMetrics $ M.insert (connIdToNodeId connId) storesForNewNode
54+
store <- EKG.newStore
55+
56+
EKG.registerCounter "ekg.server_timestamp_ms" getTimeMs store
57+
storesForNewNode <- (store ,) <$> newTVarIO emptyMetricsLocalStore
58+
59+
atomically do
60+
modifyTVar' teAcceptedMetrics do
61+
M.insert (connIdToNodeId connId) storesForNewNode
62+
5863
return storesForNewNode
5964

65+
where
66+
-- forkServer definition of `getTimeMs'. The ekg frontend relies
67+
-- on the "ekg.server_timestamp_ms" metric being in every
68+
-- store. While forkServer adds that that automatically we must
69+
-- manually add it.
70+
-- url
71+
-- + https://github.com/tvh/ekg-wai/blob/master/System/Remote/Monitoring/Wai.hs#L237-L238
72+
getTimeMs = (round . (* 1000)) `fmap` getPOSIXTime
73+
6074
addConnectedNode
6175
:: ConnectedNodes
6276
-> ConnectionId LocalAddress

0 commit comments

Comments
 (0)