From 86703facd48029c1d929278d7a78df7b837cba96 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 16 Jun 2023 00:20:13 +0200 Subject: [PATCH 01/15] docs: add basic info for network history restart --- .../restart-data-node-from-network-history.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 docs/node-operators/how-to/restart-data-node-from-network-history.md diff --git a/docs/node-operators/how-to/restart-data-node-from-network-history.md b/docs/node-operators/how-to/restart-data-node-from-network-history.md new file mode 100644 index 000000000..9e1f20dc7 --- /dev/null +++ b/docs/node-operators/how-to/restart-data-node-from-network-history.md @@ -0,0 +1,18 @@ +--- +sidebar_position: 11 +title: How to restart data-node from the network history +sidebar_label: Restart data-node from the network history +hide_title: false +--- + + +## Why would you start/restart your data node from network history? + +- Your data-node crashed +- You are starting a data node when the network has a lot of blocks, and replaying from block 0 is a long process (up to several days) +- Your data-node state got corrupted. + +## What is a network history? + +A network history is a mechanism in the data node that allows sharing of parts of information between other data nodes in the network. For example, when you are interested in a specific period of data from the Vega network, you can use network history to download this data from other nodes **if they have it**. Usually, you are interested in the last few blocks required to start a new data node or a data node after a crash - in those cases, you do not need full the network history. + From 672f842e27c49f772e04cb15d98a5990074789e5 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 17 Jun 2023 01:16:39 +0200 Subject: [PATCH 02/15] feat: finish the network history restart docs --- .../restart-data-node-from-network-history.md | 162 ++++++++++++++++++ 1 file changed, 162 insertions(+) diff --git a/docs/node-operators/how-to/restart-data-node-from-network-history.md b/docs/node-operators/how-to/restart-data-node-from-network-history.md index 9e1f20dc7..33fb5db45 100644 --- a/docs/node-operators/how-to/restart-data-node-from-network-history.md +++ b/docs/node-operators/how-to/restart-data-node-from-network-history.md @@ -16,3 +16,165 @@ hide_title: false A network history is a mechanism in the data node that allows sharing of parts of information between other data nodes in the network. For example, when you are interested in a specific period of data from the Vega network, you can use network history to download this data from other nodes **if they have it**. Usually, you are interested in the last few blocks required to start a new data node or a data node after a crash - in those cases, you do not need full the network history. +## What information do you need to restart the data node from the network history? + +- Tendermint RPC servers for the state sync +- Trust block hash and height from one node you are going to sync with +- BootstraPeers for data node + +We will learn, how to get the trust block hash and height later in the tutorial. + +## Steps to start/restart node with network history + +:::warning +You must have a data node already configured to work. If you do not have one, please see the [documentation, how to set up a data node](link to documentation) +::: + +### 1. Stop the data node if it is running + +You must stop the vegavisor if you use it to control your node otherwise, you must stop vega and data node processes. + +Example: + +```shell +# visor: +systemctl stop vegavisor; + +# non-visor: +systemctl stop data-node; +systemctl stop vega; +``` + +### 2. Unsafe reset all the state + +```shell +# remove data node state +rm -rf /home/vega/vega_home/state/data-node/ + +# remove vega state +vega unsafe_reset_all --home + +# remove tendermint state +vega tm unsafe_reset_all --home +``` + +### 3. Update the config + +#### a. The data node config + +The config is located in the `/config/data-node/config.toml`. Update the following parameters in your `config.toml` file for the data node: + +```toml +AutoInitialiseFromNetworkHistory = true + +[SQLStore] + WipeOnStartup = true + +[NetworkHistory] + Enabled = true + [NetworkHistory.Store] + BootstrapPeers = BootstrapPeers = ["/dns/api1.vega.community/tcp/4001/ipfs/12D3KooWDZrusS1p2XyJDbCaWkVDCk2wJaKi6tNb4bjgSHo9yi5Q","/dns/api2.vega.community/tcp/4001/ipfs/12D3KooWEH9pQd6P7RgNEpwbRyavWcwrAdiy9etivXqQZzd7Jkrh","/dns/api0.vega.community/tcp/4001/ipfs/12D3KooWAHkKJfX7rt1pAuGebP9g2BGTT5w7peFGyWd2QbpyZwaw","/dns/api7.vega.community/tcp/4001/ipfs/12D3KooWBqVQPjJur5EvjrizCyKG2d6eyCX8hxkvVXeUQHMjbWj9"] + + + [NetworkHistory.Initialise] + TimeOut = "4h" +``` + +#### b. The vega core config + +The config is located in the `/config/node/config.toml`. Update the following parameters in your `config.toml` file for the vega core: + +```toml +[Snapshot] + StartHeight = -1 + +[Broker] + [Broker.Socket] + DialTimeout = "4h" +``` + +#### c. Tendermint config + +To update tendermint, you have to know the trust block and height. To collect the above information, please visit one of the following link: + +- https://api0.vega.community/api/v2/snapshots +- https://api1.vega.community/api/v2/snapshots +- https://api2.vega.community/api/v2/snapshots +- https://api3.vega.community/api/v2/snapshots + +Then select one of the latest pair for block height and hash + +Once you have thrusted block, you can update the following parameters in the `/config/config.toml` file: + +```toml +[statesync] +enable = true +rpc_servers = "api0.vega.community:26657,api1.vega.community:26657,api2.vega.community:26657,api7.vega.community:26657" +trust_height = +trust_hash = "" +``` + +Example config: + +:::warning +Do not use below block. Please select newer block! +::: + +```toml +[statesync] +enable = true +rpc_servers = "api0.vega.community:26657,api1.vega.community:26657,api2.vega.community:26657,api7.vega.community:26657" +trust_height = 3040600 +trust_hash = "b4b500d8fc84cce3a42b141193db7ba23ff03cc80b70cc817f6536582ebd5eda" +``` + +### 4. Start your node + +:::warning +When you are not using visor. You MUST start your data node before the vega core. +::: + +```shell +# with visor +systemctl start vegavisor; + +# non-visor +systemctl start data-node; +systemctl start vega; +``` + +Your node should start in a several minutes. + +:::info +If you use vegavisor, you may see the following messages in the logs; please ignore them. It is just visor checking if node has already started: + +```log +Jun 16 22:21:10 vega visor[1876]: 2023-06-16T22:21:10.125Z DEBUG visor visor/visor.go:171 failed to get upgrade status from API {"error": "failed to call protocolupgrade.UpgradeStatus method: failed to post data \"{\\\"method\\\":\\\"protocolupgrade.UpgradeStatus\\\",\\\"params\\\":[null],\\\"id\\\":8485730894528034258}\": Post \"http://unix/rpc\": dial unix /tmp/vega.sock: connect: no such file or directory"} +Jun 16 22:21:10 vega visor[1883]: 2023-06-16T22:21:10.242Z ERROR core.protocol.broker.socket-client broker/socket_client.go:182 failed to connect, retrying {"error": "dial tcp 127.0.0.1:3005: connect: connection refused", "peer": "tcp://127.0.0.1:3005"} +``` + +::: + +### 5. Revert required after your node has started + +:::warning +This step is critical, and you may end with the corrupted node after next restart or protocol upgrade!!! +::: + +#### a. Disable statesync in the tendermint config + +Open the `/config/config.toml` file and update the following parameter: + +```toml +[statesync] +enable = false +``` + +#### b. Disable wiping the data node database + +Open the `/config/data-node/config.toml` file and update the following parmater: + +```toml +[SQLStore] + WipeOnStartup = false +``` From 64828f55c059e6a6bcf643a8dde601ffb3f6dfb6 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 17 Jun 2023 01:21:59 +0200 Subject: [PATCH 03/15] feat: add small note --- .../how-to/restart-data-node-from-network-history.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/node-operators/how-to/restart-data-node-from-network-history.md b/docs/node-operators/how-to/restart-data-node-from-network-history.md index 33fb5db45..d88938c25 100644 --- a/docs/node-operators/how-to/restart-data-node-from-network-history.md +++ b/docs/node-operators/how-to/restart-data-node-from-network-history.md @@ -178,3 +178,7 @@ Open the `/config/data-node/config.toml` file and update the followin [SQLStore] WipeOnStartup = false ``` + +:::info +Do not restart your node. Just update config to avoid issues in the future restarts. +::: \ No newline at end of file From 50149945f10789a68b8db09b6dcf48217be29944 Mon Sep 17 00:00:00 2001 From: daniel1302 Date: Sun, 18 Jun 2023 14:32:50 +0200 Subject: [PATCH 04/15] feat: add fairground and validators testnet to restart data-node from network history --- .../restart-data-node-from-network-history.md | 89 ++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-) diff --git a/docs/node-operators/how-to/restart-data-node-from-network-history.md b/docs/node-operators/how-to/restart-data-node-from-network-history.md index d88938c25..f75f4eac8 100644 --- a/docs/node-operators/how-to/restart-data-node-from-network-history.md +++ b/docs/node-operators/how-to/restart-data-node-from-network-history.md @@ -4,6 +4,8 @@ title: How to restart data-node from the network history sidebar_label: Restart data-node from the network history hide_title: false --- +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; ## Why would you start/restart your data node from network history? @@ -64,6 +66,11 @@ vega tm unsafe_reset_all --home The config is located in the `/config/data-node/config.toml`. Update the following parameters in your `config.toml` file for the data node: + + + + + ```toml AutoInitialiseFromNetworkHistory = true @@ -80,10 +87,31 @@ AutoInitialiseFromNetworkHistory = true TimeOut = "4h" ``` + + + + +TBD for fairground + + + + + +TBD for validators-testnet + + + + + #### b. The vega core config The config is located in the `/config/node/config.toml`. Update the following parameters in your `config.toml` file for the vega core: + + + + + ```toml [Snapshot] StartHeight = -1 @@ -93,6 +121,22 @@ The config is located in the `/config/node/config.toml`. Update the f DialTimeout = "4h" ``` + + + + +TBD for fairground + + + + + +TBD for validators-testnet + + + + + #### c. Tendermint config To update tendermint, you have to know the trust block and height. To collect the above information, please visit one of the following link: @@ -106,6 +150,11 @@ Then select one of the latest pair for block height and hash Once you have thrusted block, you can update the following parameters in the `/config/config.toml` file: + + + + + ```toml [statesync] enable = true @@ -128,6 +177,44 @@ trust_height = 3040600 trust_hash = "b4b500d8fc84cce3a42b141193db7ba23ff03cc80b70cc817f6536582ebd5eda" ``` + + + + +```toml +[statesync] +enable = true +rpc_servers = "n00.testnet.vega.rocks:26657,n06.testnet.vega.rocks:26657,n07.testnet.vega.rocks:26657" +trust_height = <height for collected block> +trust_hash = "<hash for collected block>" +``` + +Example config: + +:::warning +Do not use below block. Please select newer block! +::: + +```toml +[statesync] +enable = true +rpc_servers = "api0.vega.community:26657,api1.vega.community:26657,api2.vega.community:26657,api7.vega.community:26657" +trust_height = 3040600 +trust_hash = "b4b500d8fc84cce3a42b141193db7ba23ff03cc80b70cc817f6536582ebd5eda" +``` + + + + + + + +TBD for validators-testnet + + + + + ### 4. Start your node :::warning @@ -181,4 +268,4 @@ Open the `/config/data-node/config.toml` file and update the followin :::info Do not restart your node. Just update config to avoid issues in the future restarts. -::: \ No newline at end of file +::: From c71beef901a74d345b1b5612f0bbb3fd112ba919 Mon Sep 17 00:00:00 2001 From: daniel1302 Date: Mon, 19 Jun 2023 11:42:07 +0200 Subject: [PATCH 05/15] feat: add examples for fairground and validators-testnet --- .../restart-data-node-from-network-history.md | 108 +++++++++++++++--- 1 file changed, 93 insertions(+), 15 deletions(-) diff --git a/docs/node-operators/how-to/restart-data-node-from-network-history.md b/docs/node-operators/how-to/restart-data-node-from-network-history.md index f75f4eac8..80c5a4f2c 100644 --- a/docs/node-operators/how-to/restart-data-node-from-network-history.md +++ b/docs/node-operators/how-to/restart-data-node-from-network-history.md @@ -70,7 +70,6 @@ The config is located in the `/config/data-node/config.toml`. Update - ```toml AutoInitialiseFromNetworkHistory = true @@ -80,7 +79,7 @@ AutoInitialiseFromNetworkHistory = true [NetworkHistory] Enabled = true [NetworkHistory.Store] - BootstrapPeers = BootstrapPeers = ["/dns/api1.vega.community/tcp/4001/ipfs/12D3KooWDZrusS1p2XyJDbCaWkVDCk2wJaKi6tNb4bjgSHo9yi5Q","/dns/api2.vega.community/tcp/4001/ipfs/12D3KooWEH9pQd6P7RgNEpwbRyavWcwrAdiy9etivXqQZzd7Jkrh","/dns/api0.vega.community/tcp/4001/ipfs/12D3KooWAHkKJfX7rt1pAuGebP9g2BGTT5w7peFGyWd2QbpyZwaw","/dns/api7.vega.community/tcp/4001/ipfs/12D3KooWBqVQPjJur5EvjrizCyKG2d6eyCX8hxkvVXeUQHMjbWj9"] + BootstrapPeers = ["/dns/api1.vega.community/tcp/4001/ipfs/12D3KooWDZrusS1p2XyJDbCaWkVDCk2wJaKi6tNb4bjgSHo9yi5Q","/dns/api2.vega.community/tcp/4001/ipfs/12D3KooWEH9pQd6P7RgNEpwbRyavWcwrAdiy9etivXqQZzd7Jkrh","/dns/api0.vega.community/tcp/4001/ipfs/12D3KooWAHkKJfX7rt1pAuGebP9g2BGTT5w7peFGyWd2QbpyZwaw","/dns/api7.vega.community/tcp/4001/ipfs/12D3KooWBqVQPjJur5EvjrizCyKG2d6eyCX8hxkvVXeUQHMjbWj9"] [NetworkHistory.Initialise] @@ -91,13 +90,39 @@ AutoInitialiseFromNetworkHistory = true -TBD for fairground +```toml +AutoInitialiseFromNetworkHistory = true + +[SQLStore] + WipeOnStartup = true + +[NetworkHistory] + Enabled = true + [NetworkHistory.Store] + BootstrapPeers = ["/dns/n00.testnet.vega.rocks/tcp/4001/ipfs/12D3KooWNiWcT93S3P3eiHqGq4a6feaD2cUfbWw9AxgdVt8RzTHJ","/dns/n06.testnet.vega.rocks/tcp/4001/ipfs/12D3KooWMSaQevxg1JcaFxWTpxMjKw1J13bLVLmoxbeSJ5gpXjRh","/dns/n07.testnet.vega.rocks/tcp/4001/ipfs/12D3KooWACJuzchZQH8Tz1zNmkGCatgcS2DUoiQnMFaALVMo7DpC"] + + [NetworkHistory.Initialise] + TimeOut = "4h" +``` -TBD for validators-testnet +```toml +AutoInitialiseFromNetworkHistory = true + +[SQLStore] + WipeOnStartup = true + +[NetworkHistory] + Enabled = true + [NetworkHistory.Store] + BootstrapPeers = ["/dns/n00.validators-testnet.vega.rocks/tcp/4001/ipfs/12D3KooWQbCMy5echT1sMKwRQh8GJJk5zmHmg6VNg1qEbpysNACN","/dns/n02.validators-testnet.vega.rocks/tcp/4001/ipfs/12D3KooWHffX2tdw2phH7ai8GCo2K3ehJfnLRATve5otVr4D3ggK","/dns/metabase00.validators-testnet.vega.rocks/tcp/4001/ipfs/12D3KooWKPDZ1s5FM8YewZVeRb9XwaQ7PdaoyD84hFnKmVbn94gN"] + + [NetworkHistory.Initialise] + TimeOut = "4h" +``` @@ -111,7 +136,6 @@ The config is located in the `/config/node/config.toml`. Update the f - ```toml [Snapshot] StartHeight = -1 @@ -125,13 +149,27 @@ The config is located in the `/config/node/config.toml`. Update the f -TBD for fairground +```toml +[Snapshot] + StartHeight = -1 + +[Broker] + [Broker.Socket] + DialTimeout = "4h" +``` -TBD for validators-testnet +```toml +[Snapshot] + StartHeight = -1 + +[Broker] + [Broker.Socket] + DialTimeout = "4h" +``` @@ -141,11 +179,33 @@ TBD for validators-testnet To update tendermint, you have to know the trust block and height. To collect the above information, please visit one of the following link: + + + - https://api0.vega.community/api/v2/snapshots - https://api1.vega.community/api/v2/snapshots - https://api2.vega.community/api/v2/snapshots - https://api3.vega.community/api/v2/snapshots + + + + +- https://api.n00.testnet.vega.rocks/api/v2/snapshots +- https://api.n06.testnet.vega.rocks/api/v2/snapshots +- https://api.n07.testnet.vega.rocks/api/v2/snapshots + + + + + +- https://api.n00.validators-testnet.vega.rocks/api/v2/snapshots +- https://api.n02.validators-testnet.vega.rocks/api/v2/snapshots + + + + + Then select one of the latest pair for block height and hash Once you have thrusted block, you can update the following parameters in the `/config/config.toml` file: @@ -185,8 +245,8 @@ trust_hash = "b4b500d8fc84cce3a42b141193db7ba23ff03cc80b70cc817f6536582ebd5eda" [statesync] enable = true rpc_servers = "n00.testnet.vega.rocks:26657,n06.testnet.vega.rocks:26657,n07.testnet.vega.rocks:26657" -trust_height = <height for collected block> -trust_hash = "<hash for collected block>" +trust_height = +trust_hash = "" ``` Example config: @@ -198,18 +258,36 @@ Do not use below block. Please select newer block! ```toml [statesync] enable = true -rpc_servers = "api0.vega.community:26657,api1.vega.community:26657,api2.vega.community:26657,api7.vega.community:26657" -trust_height = 3040600 -trust_hash = "b4b500d8fc84cce3a42b141193db7ba23ff03cc80b70cc817f6536582ebd5eda" +rpc_servers = "n00.testnet.vega.rocks:26657,n06.testnet.vega.rocks:26657,n07.testnet.vega.rocks:26657777777" +trust_height = 5103884 +trust_hash = "ac6670c5e37a3b8f90e1380d03fe83c3021bab77acf93c203d26152693877732" ``` - - -TBD for validators-testnet +```toml +[statesync] +enable = true +rpc_servers = "n00.validators-testnet.vega.rocks:26657,n02.validators-testnet.vega.rocks:26657" +trust_height = +trust_hash = "" +``` + +Example config: + +:::warning +Do not use below block. Please select newer block! +::: + +```toml +[statesync] +enable = true +rpc_servers = "n00.validators-testnet.vega.rocks:26657,n02.validators-testnet.vega.rocks:26657" +trust_height = 3896400 +trust_hash = "9edf8b5779aa79e96ac95256a7c671b855990be518a9c7cbb98eb8694918b004" +``` From a36a5c10ce0e99fab05f409bec2d4b5a5917a112 Mon Sep 17 00:00:00 2001 From: candida-d <62548908+candida-d@users.noreply.github.com> Date: Tue, 20 Jun 2023 16:25:56 +0100 Subject: [PATCH 06/15] tidy up and duplicate for mainnet --- .../restart-data-node-from-network-history.md | 148 ++++--------- .../restart-data-node-from-network-history.md | 203 ++++++++++++++++++ 2 files changed, 242 insertions(+), 109 deletions(-) create mode 100644 versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md diff --git a/docs/node-operators/how-to/restart-data-node-from-network-history.md b/docs/node-operators/how-to/restart-data-node-from-network-history.md index 80c5a4f2c..22a3c0b98 100644 --- a/docs/node-operators/how-to/restart-data-node-from-network-history.md +++ b/docs/node-operators/how-to/restart-data-node-from-network-history.md @@ -1,40 +1,41 @@ --- sidebar_position: 11 -title: How to restart data-node from the network history -sidebar_label: Restart data-node from the network history +title: How to restart data node from network history +sidebar_label: Restart data node from network history hide_title: false --- import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - ## Why would you start/restart your data node from network history? -- Your data-node crashed -- You are starting a data node when the network has a lot of blocks, and replaying from block 0 is a long process (up to several days) -- Your data-node state got corrupted. +- Your data node crashed +- You are starting a data node when the network has a lot of blocks, and replaying from block 0 would be a long process (up to several days) +- Your data node state got corrupted -## What is a network history? +## What is network history? -A network history is a mechanism in the data node that allows sharing of parts of information between other data nodes in the network. For example, when you are interested in a specific period of data from the Vega network, you can use network history to download this data from other nodes **if they have it**. Usually, you are interested in the last few blocks required to start a new data node or a data node after a crash - in those cases, you do not need full the network history. +Network history is a mechanism in the data node software that allows for sharing chunks of information between other data nodes connected to the network. For example, when you are interested in a specific period of data from the Vega network, you can use network history to download this data from other nodes **if they have it**. Usually, you are interested in the last few blocks required to start a new data node or a data node after a crash - in those cases, you do not need the full network history. ## What information do you need to restart the data node from the network history? - Tendermint RPC servers for the state sync - Trust block hash and height from one node you are going to sync with -- BootstraPeers for data node +- Bootstrap peers for data node -We will learn, how to get the trust block hash and height later in the tutorial. +This guide will explain how to get the trust block hash and height further down. ## Steps to start/restart node with network history -:::warning -You must have a data node already configured to work. If you do not have one, please see the [documentation, how to set up a data node](link to documentation) +:::warning Data node must be configured +You must have a data node already configured and running. If you do not have one already, see the guide on [setting up a data node](../get-started/setup-datanode.md). ::: -### 1. Stop the data node if it is running +### 1. Stop data node if it is running + +If you're using Visor, you must stop it to control your node. -You must stop the vegavisor if you use it to control your node otherwise, you must stop vega and data node processes. +Otherwise, you must stop the Vega and data node processes. Example: @@ -47,47 +48,29 @@ systemctl stop data-node; systemctl stop vega; ``` -### 2. Unsafe reset all the state +### 2. Use unsafe reset all to clear the state ```shell # remove data node state rm -rf /home/vega/vega_home/state/data-node/ -# remove vega state +# remove Vega state vega unsafe_reset_all --home -# remove tendermint state +# remove Tendermint state vega tm unsafe_reset_all --home ``` -### 3. Update the config +### 3. Update config -#### a. The data node config +#### a. Data node config -The config is located in the `/config/data-node/config.toml`. Update the following parameters in your `config.toml` file for the data node: +Data node config is located in the `/config/data-node/config.toml` file. +Update the following parameters in your `config.toml` file for the data node: - - - -```toml -AutoInitialiseFromNetworkHistory = true - -[SQLStore] - WipeOnStartup = true - -[NetworkHistory] - Enabled = true - [NetworkHistory.Store] - BootstrapPeers = ["/dns/api1.vega.community/tcp/4001/ipfs/12D3KooWDZrusS1p2XyJDbCaWkVDCk2wJaKi6tNb4bjgSHo9yi5Q","/dns/api2.vega.community/tcp/4001/ipfs/12D3KooWEH9pQd6P7RgNEpwbRyavWcwrAdiy9etivXqQZzd7Jkrh","/dns/api0.vega.community/tcp/4001/ipfs/12D3KooWAHkKJfX7rt1pAuGebP9g2BGTT5w7peFGyWd2QbpyZwaw","/dns/api7.vega.community/tcp/4001/ipfs/12D3KooWBqVQPjJur5EvjrizCyKG2d6eyCX8hxkvVXeUQHMjbWj9"] - - - [NetworkHistory.Initialise] - TimeOut = "4h" -``` - - + ```toml @@ -128,25 +111,11 @@ AutoInitialiseFromNetworkHistory = true -#### b. The vega core config +#### b. Vega core node config -The config is located in the `/config/node/config.toml`. Update the following parameters in your `config.toml` file for the vega core: +The config is located in the `/config/node/config.toml`. Update the following parameters in your `config.toml` file for the Vega core: - - - -```toml -[Snapshot] - StartHeight = -1 - -[Broker] - [Broker.Socket] - DialTimeout = "4h" -``` - - - ```toml @@ -177,18 +146,9 @@ The config is located in the `/config/node/config.toml`. Update the f #### c. Tendermint config -To update tendermint, you have to know the trust block and height. To collect the above information, please visit one of the following link: +To update Tendermint, you need to know the trust block and height. To collect the above information, please visit one of the following links: - - -- https://api0.vega.community/api/v2/snapshots -- https://api1.vega.community/api/v2/snapshots -- https://api2.vega.community/api/v2/snapshots -- https://api3.vega.community/api/v2/snapshots - - - - https://api.n00.testnet.vega.rocks/api/v2/snapshots @@ -206,39 +166,11 @@ To update tendermint, you have to know the trust block and height. To collect th -Then select one of the latest pair for block height and hash +Then select one of the latest pairs for block height and hash. -Once you have thrusted block, you can update the following parameters in the `/config/config.toml` file: +Once you have the trusted block, you can update the following parameters in the `/config/config.toml` file: - - - - -```toml -[statesync] -enable = true -rpc_servers = "api0.vega.community:26657,api1.vega.community:26657,api2.vega.community:26657,api7.vega.community:26657" -trust_height = -trust_hash = "" -``` - -Example config: - -:::warning -Do not use below block. Please select newer block! -::: - -```toml -[statesync] -enable = true -rpc_servers = "api0.vega.community:26657,api1.vega.community:26657,api2.vega.community:26657,api7.vega.community:26657" -trust_height = 3040600 -trust_hash = "b4b500d8fc84cce3a42b141193db7ba23ff03cc80b70cc817f6536582ebd5eda" -``` - - - ```toml @@ -251,8 +183,8 @@ trust_hash = "" Example config: -:::warning -Do not use below block. Please select newer block! +:::warning Sample data +Do not use the below block. Select a newer block! ::: ```toml @@ -278,7 +210,7 @@ trust_hash = "" Example config: :::warning -Do not use below block. Please select newer block! +Do not use the below block. Select a newer block! ::: ```toml @@ -296,14 +228,14 @@ trust_hash = "9edf8b5779aa79e96ac95256a7c671b855990be518a9c7cbb98eb8694918b004" ### 4. Start your node :::warning -When you are not using visor. You MUST start your data node before the vega core. +If you are not using Visor, you MUST start your data node before starting the Vega core. ::: ```shell -# with visor +# with Visor systemctl start vegavisor; -# non-visor +# without Visor systemctl start data-node; systemctl start vega; ``` @@ -311,7 +243,7 @@ systemctl start vega; Your node should start in a several minutes. :::info -If you use vegavisor, you may see the following messages in the logs; please ignore them. It is just visor checking if node has already started: +If you use Visor, you may see the following messages in the logs; please ignore them. It is just visor checking if node has already started: ```log Jun 16 22:21:10 vega visor[1876]: 2023-06-16T22:21:10.125Z DEBUG visor visor/visor.go:171 failed to get upgrade status from API {"error": "failed to call protocolupgrade.UpgradeStatus method: failed to post data \"{\\\"method\\\":\\\"protocolupgrade.UpgradeStatus\\\",\\\"params\\\":[null],\\\"id\\\":8485730894528034258}\": Post \"http://unix/rpc\": dial unix /tmp/vega.sock: connect: no such file or directory"} @@ -322,11 +254,11 @@ Jun 16 22:21:10 vega visor[1883]: 2023-06-16T22:21:10.242Z ERROR c ### 5. Revert required after your node has started -:::warning -This step is critical, and you may end with the corrupted node after next restart or protocol upgrade!!! +:::warning Critical step to avoid corrupted node +This step is critical, otherwise you may end with a corrupted node after next restart or protocol upgrade. ::: -#### a. Disable statesync in the tendermint config +#### a. Disable statesync in Tendermint config Open the `/config/config.toml` file and update the following parameter: @@ -344,6 +276,4 @@ Open the `/config/data-node/config.toml` file and update the followin WipeOnStartup = false ``` -:::info -Do not restart your node. Just update config to avoid issues in the future restarts. -::: +**Do not restart your node.** You only need to update the config to avoid having issues in future restarts. diff --git a/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md b/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md new file mode 100644 index 000000000..5cc17d4c0 --- /dev/null +++ b/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md @@ -0,0 +1,203 @@ +--- +sidebar_position: 11 +title: How to restart data node from network history +sidebar_label: Restart data node from network history +hide_title: false +--- +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +## Why would you start/restart your data node from network history? + +- Your data node crashed +- You are starting a data node when the network has a lot of blocks, and replaying from block 0 would be a long process (up to several days) +- Your data node state got corrupted + +## What is network history? + +Network history is a mechanism in the data node software that allows for sharing chunks of information between other data nodes connected to the network. For example, when you are interested in a specific period of data from the Vega network, you can use network history to download this data from other nodes **if they have it**. Usually, you are interested in the last few blocks required to start a new data node or a data node after a crash - in those cases, you do not need the full network history. + +## What information do you need to restart the data node from the network history? + +- Tendermint RPC servers for the state sync +- Trust block hash and height from one node you are going to sync with +- Bootstrap peers for data node + +This guide will explain how to get the trust block hash and height further down. + +## Steps to start/restart node with network history + +:::warning Data node must be configured +You must have a data node already configured and running. If you do not have one already, see the guide on [setting up a data node](../get-started/setup-datanode.md). +::: + +### 1. Stop data node if it is running + +If you're using Visor, you must stop it to control your node. + +Otherwise, you must stop the Vega and data node processes. + +Example: + +```shell +# visor: +systemctl stop vegavisor; + +# non-visor: +systemctl stop data-node; +systemctl stop vega; +``` + +### 2. Use unsafe reset all to clear the state + +```shell +# remove data node state +rm -rf /home/vega/vega_home/state/data-node/ + +# remove Vega state +vega unsafe_reset_all --home + +# remove Tendermint state +vega tm unsafe_reset_all --home +``` + +### 3. Update config + +#### a. Data node config + +Data node config is located in the `/config/data-node/config.toml` file. + +Update the following parameters in your `config.toml` file for the mainnet data node: + +```toml +AutoInitialiseFromNetworkHistory = true + +[SQLStore] + WipeOnStartup = true + +[NetworkHistory] + Enabled = true + [NetworkHistory.Store] + BootstrapPeers = ["/dns/api1.vega.community/tcp/4001/ipfs/12D3KooWDZrusS1p2XyJDbCaWkVDCk2wJaKi6tNb4bjgSHo9yi5Q","/dns/api2.vega.community/tcp/4001/ipfs/12D3KooWEH9pQd6P7RgNEpwbRyavWcwrAdiy9etivXqQZzd7Jkrh","/dns/api0.vega.community/tcp/4001/ipfs/12D3KooWAHkKJfX7rt1pAuGebP9g2BGTT5w7peFGyWd2QbpyZwaw","/dns/api7.vega.community/tcp/4001/ipfs/12D3KooWBqVQPjJur5EvjrizCyKG2d6eyCX8hxkvVXeUQHMjbWj9"] + + + [NetworkHistory.Initialise] + TimeOut = "4h" +``` + + +#### b. Vega core node config + +The config is located in the `/config/node/config.toml`. Update the following parameters in your `config.toml` file for the Vega core for mainnet: + +```toml +[Snapshot] + StartHeight = -1 + +[Broker] + [Broker.Socket] + DialTimeout = "4h" +``` + +#### c. Tendermint config + +To update Tendermint, you need to know the trust block and height. To collect the above information, please visit one of the following links: + +- https://api0.vega.community/api/v2/snapshots +- https://api1.vega.community/api/v2/snapshots +- https://api2.vega.community/api/v2/snapshots +- https://api3.vega.community/api/v2/snapshots + + +Then select one of the latest pairs for block height and hash. + +Once you have the trusted block, you can update the following parameters in the `/config/config.toml` file: + +```toml +[statesync] +enable = true +rpc_servers = "api0.vega.community:26657,api1.vega.community:26657,api2.vega.community:26657,api7.vega.community:26657" +trust_height = +trust_hash = "" +``` + +Example config: + +:::warning Sample data +Do not use the below block. Select a newer block! +::: + +```toml +[statesync] +enable = true +rpc_servers = "api0.vega.community:26657,api1.vega.community:26657,api2.vega.community:26657,api7.vega.community:26657" +trust_height = 3040600 +trust_hash = "b4b500d8fc84cce3a42b141193db7ba23ff03cc80b70cc817f6536582ebd5eda" +``` + +Example config: + +:::warning +Do not use the below block. Select a newer block! +::: + +```toml +[statesync] +enable = true +rpc_servers = "n00.validators-testnet.vega.rocks:26657,n02.validators-testnet.vega.rocks:26657" +trust_height = 3896400 +trust_hash = "9edf8b5779aa79e96ac95256a7c671b855990be518a9c7cbb98eb8694918b004" +``` + +### 4. Start your node + +:::warning +If you are not using Visor, you MUST start your data node before starting the Vega core. +::: + +```shell +# with Visor +systemctl start vegavisor; + +# without Visor +systemctl start data-node; +systemctl start vega; +``` + +Your node should start in a several minutes. + +:::info +If you use Visor, you may see the following messages in the logs; please ignore them. It is just visor checking if node has already started: + +```log +Jun 16 22:21:10 vega visor[1876]: 2023-06-16T22:21:10.125Z DEBUG visor visor/visor.go:171 failed to get upgrade status from API {"error": "failed to call protocolupgrade.UpgradeStatus method: failed to post data \"{\\\"method\\\":\\\"protocolupgrade.UpgradeStatus\\\",\\\"params\\\":[null],\\\"id\\\":8485730894528034258}\": Post \"http://unix/rpc\": dial unix /tmp/vega.sock: connect: no such file or directory"} +Jun 16 22:21:10 vega visor[1883]: 2023-06-16T22:21:10.242Z ERROR core.protocol.broker.socket-client broker/socket_client.go:182 failed to connect, retrying {"error": "dial tcp 127.0.0.1:3005: connect: connection refused", "peer": "tcp://127.0.0.1:3005"} +``` + +::: + +### 5. Revert required after your node has started + +:::warning Critical step to avoid corrupted node +This step is critical, otherwise you may end with a corrupted node after next restart or protocol upgrade. +::: + +#### a. Disable statesync in Tendermint config + +Open the `/config/config.toml` file and update the following parameter: + +```toml +[statesync] +enable = false +``` + +#### b. Disable wiping the data node database + +Open the `/config/data-node/config.toml` file and update the following parmater: + +```toml +[SQLStore] + WipeOnStartup = false +``` + +**Do not restart your node.** You only need to update the config to avoid having issues in future restarts. From fcd495fab09c3b18cacc15de7aab879623aa337c Mon Sep 17 00:00:00 2001 From: candida-d <62548908+candida-d@users.noreply.github.com> Date: Tue, 20 Jun 2023 16:48:47 +0100 Subject: [PATCH 07/15] more minor changes --- .../restart-data-node-from-network-history.md | 13 ++++++++----- .../restart-data-node-from-network-history.md | 12 +++++++----- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/docs/node-operators/how-to/restart-data-node-from-network-history.md b/docs/node-operators/how-to/restart-data-node-from-network-history.md index 22a3c0b98..39bb738dd 100644 --- a/docs/node-operators/how-to/restart-data-node-from-network-history.md +++ b/docs/node-operators/how-to/restart-data-node-from-network-history.md @@ -17,15 +17,18 @@ import TabItem from '@theme/TabItem'; Network history is a mechanism in the data node software that allows for sharing chunks of information between other data nodes connected to the network. For example, when you are interested in a specific period of data from the Vega network, you can use network history to download this data from other nodes **if they have it**. Usually, you are interested in the last few blocks required to start a new data node or a data node after a crash - in those cases, you do not need the full network history. -## What information do you need to restart the data node from the network history? +## Requirements +Information you need to start or restart the data node from network history: - Tendermint RPC servers for the state sync - Trust block hash and height from one node you are going to sync with - Bootstrap peers for data node This guide will explain how to get the trust block hash and height further down. -## Steps to start/restart node with network history +## Start/restart node with network history + +Follow the steps below to start up, or restart, your data node using network history. :::warning Data node must be configured You must have a data node already configured and running. If you do not have one already, see the guide on [setting up a data node](../get-started/setup-datanode.md). @@ -37,7 +40,7 @@ If you're using Visor, you must stop it to control your node. Otherwise, you must stop the Vega and data node processes. -Example: +Example commands: ```shell # visor: @@ -227,7 +230,7 @@ trust_hash = "9edf8b5779aa79e96ac95256a7c671b855990be518a9c7cbb98eb8694918b004" ### 4. Start your node -:::warning +:::caution Start data node first If you are not using Visor, you MUST start your data node before starting the Vega core. ::: @@ -243,7 +246,7 @@ systemctl start vega; Your node should start in a several minutes. :::info -If you use Visor, you may see the following messages in the logs; please ignore them. It is just visor checking if node has already started: +If you use Visor, you may see the following messages in the logs; please ignore them. It is just Visor checking if node has already started: ```log Jun 16 22:21:10 vega visor[1876]: 2023-06-16T22:21:10.125Z DEBUG visor visor/visor.go:171 failed to get upgrade status from API {"error": "failed to call protocolupgrade.UpgradeStatus method: failed to post data \"{\\\"method\\\":\\\"protocolupgrade.UpgradeStatus\\\",\\\"params\\\":[null],\\\"id\\\":8485730894528034258}\": Post \"http://unix/rpc\": dial unix /tmp/vega.sock: connect: no such file or directory"} diff --git a/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md b/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md index 5cc17d4c0..b77439622 100644 --- a/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md +++ b/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md @@ -17,15 +17,17 @@ import TabItem from '@theme/TabItem'; Network history is a mechanism in the data node software that allows for sharing chunks of information between other data nodes connected to the network. For example, when you are interested in a specific period of data from the Vega network, you can use network history to download this data from other nodes **if they have it**. Usually, you are interested in the last few blocks required to start a new data node or a data node after a crash - in those cases, you do not need the full network history. -## What information do you need to restart the data node from the network history? +## Requirements +Information you need to start or restart the data node from network history: - Tendermint RPC servers for the state sync - Trust block hash and height from one node you are going to sync with - Bootstrap peers for data node This guide will explain how to get the trust block hash and height further down. -## Steps to start/restart node with network history +## Start/restart node with network history +Follow the steps below to start up, or restart, your data node using network history. :::warning Data node must be configured You must have a data node already configured and running. If you do not have one already, see the guide on [setting up a data node](../get-started/setup-datanode.md). @@ -37,7 +39,7 @@ If you're using Visor, you must stop it to control your node. Otherwise, you must stop the Vega and data node processes. -Example: +Example commands: ```shell # visor: @@ -151,7 +153,7 @@ trust_hash = "9edf8b5779aa79e96ac95256a7c671b855990be518a9c7cbb98eb8694918b004" ### 4. Start your node -:::warning +:::caution Start data node If you are not using Visor, you MUST start your data node before starting the Vega core. ::: @@ -167,7 +169,7 @@ systemctl start vega; Your node should start in a several minutes. :::info -If you use Visor, you may see the following messages in the logs; please ignore them. It is just visor checking if node has already started: +If you use Visor, you may see the following messages in the logs; please ignore them. It is just Visor checking if node has already started: ```log Jun 16 22:21:10 vega visor[1876]: 2023-06-16T22:21:10.125Z DEBUG visor visor/visor.go:171 failed to get upgrade status from API {"error": "failed to call protocolupgrade.UpgradeStatus method: failed to post data \"{\\\"method\\\":\\\"protocolupgrade.UpgradeStatus\\\",\\\"params\\\":[null],\\\"id\\\":8485730894528034258}\": Post \"http://unix/rpc\": dial unix /tmp/vega.sock: connect: no such file or directory"} From f5760f13bb57961bebe7079b3ae001d65c4110fb Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 21 Jun 2023 11:31:56 +0200 Subject: [PATCH 08/15] feat: cleanup mainnet docs for network history restart --- .../restart-data-node-from-network-history.md | 21 +++---------------- 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md b/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md index b77439622..fc6392e09 100644 --- a/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md +++ b/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md @@ -80,7 +80,7 @@ AutoInitialiseFromNetworkHistory = true [NetworkHistory] Enabled = true [NetworkHistory.Store] - BootstrapPeers = ["/dns/api1.vega.community/tcp/4001/ipfs/12D3KooWDZrusS1p2XyJDbCaWkVDCk2wJaKi6tNb4bjgSHo9yi5Q","/dns/api2.vega.community/tcp/4001/ipfs/12D3KooWEH9pQd6P7RgNEpwbRyavWcwrAdiy9etivXqQZzd7Jkrh","/dns/api0.vega.community/tcp/4001/ipfs/12D3KooWAHkKJfX7rt1pAuGebP9g2BGTT5w7peFGyWd2QbpyZwaw","/dns/api7.vega.community/tcp/4001/ipfs/12D3KooWBqVQPjJur5EvjrizCyKG2d6eyCX8hxkvVXeUQHMjbWj9"] + BootstrapPeers = ["/dns/api1.vega.community/tcp/4001/ipfs/12D3KooWDZrusS1p2XyJDbCaWkVDCk2wJaKi6tNb4bjgSHo9yi5Q","/dns/api2.vega.community/tcp/4001/ipfs/12D3KooWEH9pQd6P7RgNEpwbRyavWcwrAdiy9etivXqQZzd7Jkrh","/dns/api0.vega.community/tcp/4001/ipfs/12D3KooWAHkKJfX7rt1pAuGebP9g2BGTT5w7peFGyWd2QbpyZwaw"] [NetworkHistory.Initialise] @@ -108,7 +108,6 @@ To update Tendermint, you need to know the trust block and height. To collect th - https://api0.vega.community/api/v2/snapshots - https://api1.vega.community/api/v2/snapshots - https://api2.vega.community/api/v2/snapshots -- https://api3.vega.community/api/v2/snapshots Then select one of the latest pairs for block height and hash. @@ -118,7 +117,7 @@ Once you have the trusted block, you can update the following parameters in the ```toml [statesync] enable = true -rpc_servers = "api0.vega.community:26657,api1.vega.community:26657,api2.vega.community:26657,api7.vega.community:26657" +rpc_servers = "api0.vega.community:26657,api1.vega.community:26657,api2.vega.community:26657" trust_height = trust_hash = "" ``` @@ -132,25 +131,11 @@ Do not use the below block. Select a newer block! ```toml [statesync] enable = true -rpc_servers = "api0.vega.community:26657,api1.vega.community:26657,api2.vega.community:26657,api7.vega.community:26657" +rpc_servers = "api0.vega.community:26657,api1.vega.community:26657,api2.vega.community:26657" trust_height = 3040600 trust_hash = "b4b500d8fc84cce3a42b141193db7ba23ff03cc80b70cc817f6536582ebd5eda" ``` -Example config: - -:::warning -Do not use the below block. Select a newer block! -::: - -```toml -[statesync] -enable = true -rpc_servers = "n00.validators-testnet.vega.rocks:26657,n02.validators-testnet.vega.rocks:26657" -trust_height = 3896400 -trust_hash = "9edf8b5779aa79e96ac95256a7c671b855990be518a9c7cbb98eb8694918b004" -``` - ### 4. Start your node :::caution Start data node From a0283e616590f93fedd3b06c6d68b81915f063cd Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 21 Jun 2023 11:38:59 +0200 Subject: [PATCH 09/15] feat: add missing config revert --- .../how-to/restart-data-node-from-network-history.md | 2 ++ .../how-to/restart-data-node-from-network-history.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docs/node-operators/how-to/restart-data-node-from-network-history.md b/docs/node-operators/how-to/restart-data-node-from-network-history.md index 39bb738dd..3427bc7dc 100644 --- a/docs/node-operators/how-to/restart-data-node-from-network-history.md +++ b/docs/node-operators/how-to/restart-data-node-from-network-history.md @@ -275,6 +275,8 @@ enable = false Open the `/config/data-node/config.toml` file and update the following parmater: ```toml +AutoInitialiseFromNetworkHistory = false + [SQLStore] WipeOnStartup = false ``` diff --git a/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md b/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md index fc6392e09..0737bdf5f 100644 --- a/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md +++ b/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md @@ -183,6 +183,8 @@ enable = false Open the `/config/data-node/config.toml` file and update the following parmater: ```toml +AutoInitialiseFromNetworkHistory = false + [SQLStore] WipeOnStartup = false ``` From 0831d166b0cf8f5b1875c3694b45911cac0f9a85 Mon Sep 17 00:00:00 2001 From: candida-d <62548908+candida-d@users.noreply.github.com> Date: Mon, 26 Jun 2023 10:52:56 +0100 Subject: [PATCH 10/15] remove duplicate --- .../get-started/setup-datanode.md | 53 ++ .../get-started/setup-datanode.md | 688 ++++++++++++++++++ 2 files changed, 741 insertions(+) create mode 100644 versioned_docs/version-v0.71/node-operators/get-started/setup-datanode.md diff --git a/docs/node-operators/get-started/setup-datanode.md b/docs/node-operators/get-started/setup-datanode.md index e62c75b87..e1464cf2f 100644 --- a/docs/node-operators/get-started/setup-datanode.md +++ b/docs/node-operators/get-started/setup-datanode.md @@ -329,7 +329,60 @@ For example: ChunkInterval = "2 hours" ``` +<<<<<<< HEAD ## Resetting the data node +======= +## Generate config +To generate the configuration files you need for the data node, you can use the following command: + +```shell +vega datanode init --home="YOUR_DATA_NODE_HOME_PATH" "CHAIN_ID" +``` + +Find the `CHAIN_ID` by going to the relevant network genesis file in the relevant networks repo. + +Visit [networks ↗](https://github.com/vegaprotocol/networks/) for mainnet and other validator-run networks, or [networks-internal ↗](https://github.com/vegaprotocol/networks-internal) for Vega-run testnet networks. + +To update your data node configuration, such as to set up ports for the APIs or database credentials, edit the config file: + +```shell +"YOUR_DATA_NODE_HOME_PATH"/config/data-node/config.toml +``` + +## Configure nodes + +### Vega +To configure your Vega node to work with a data node you need to update the `[Broker.Socket]` section of the Vega configuration file `YOUR_VEGA_HOME_PATH/config/node/config.toml` from false to: + +```toml + [Broker.Socket] + ... + Enabled = true + ... +``` + +:::note +While it's possible to run the data node and Vega node on separate machines, it's not recommended given the volume of data that will be transferred between the two. +::: + +### Data node database +Data node database configuration is defined under the `[SQLStore.ConnectionConfig]` section of the data node configuration file `YOUR_DATA_NODE_HOME_PATH/config/data-node/config.toml`: + +```toml + [SQLStore.ConnectionConfig] + Host = "localhost" + Port = 5432 + Username = "USERNAME" + Password = "PASSWORD" + Database = "DATABASE_NAME" + MaxConnLifetime = "30m0s" + MaxConnLifetimeJitter = "5m0s" +``` + +You should ensure the database configuration matches those of the database you created in the pre-requisite steps. + +### Resetting the data node +>>>>>>> c7bbf785 (remove duplicate) :::warning Running the following command will remove all data from the data node and is not recoverable. ::: diff --git a/versioned_docs/version-v0.71/node-operators/get-started/setup-datanode.md b/versioned_docs/version-v0.71/node-operators/get-started/setup-datanode.md new file mode 100644 index 000000000..b606514f1 --- /dev/null +++ b/versioned_docs/version-v0.71/node-operators/get-started/setup-datanode.md @@ -0,0 +1,688 @@ +--- +sidebar_position: 4 +title: Set up a data node +hide_title: false +--- +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +To set up a data node, you must first have followed the guide to [install and set up a Vega node](setup-server.md). A data node must be run in conjunction with a **non-validator Vega node**. + +The non-validator Vega node will send the events it receives from the network and the events it creates to the data node, which will then store them in a database. An API is provided to query the data stored by the data node. + +Note: If you are running a Vega validator node it is recommended that you do not connect the data node to this node, but to a separate non-validator Vega node. + +The database used by the data node is a PostgreSQL database with the Timescale extension installed. The database can be a dedicated database server, a docker container, or an embedded version of PostgreSQL with Timescale installed that is provided by Vega. + +:::note Operating system +The following instructions assume you are installing on a Ubuntu Linux machine as explained in the [server setup guide](setup-server#os-and-software). +::: + +## Pre-requisites + +### Vega core +Please follow the instructions in the [server setup guide](setup-server.md) to install Vega. + +### PostgreSQL and TimescaleDB full installation +We have tested and recommend using version 2.8.0 of the TimescaleDB plugin with Postgres 14. + +Refer to the [PostgreSQL documentation ↗](https://www.postgresql.org/docs/14/index.html) for more detailed information on setting up a PostgreSQL database. + +:::note Linux and MacOS guides + + + + +### Linux users +To ensure you install the correct version of TimescaleDB, you can use the notes at the bottom of the [Timescales Documentation for Debian ↗](https://docs.timescale.com/install/latest/self-hosted/installation-debian/). + + + + +### MacOS users +To ensure you install the correct version of TimescaleDB, you can use the notes at the bottom of the [Timescales Documentation for Mac ↗](https://docs.timescale.com/install/latest/self-hosted/installation-macos/). + + +::: + +:::note Database user roles +Due to operations that are required for snapshotting, the database user for the data-node must have superuser privileges. This is a limitation that we currently have due to Postgresql 14 and TimescaleDB. +::: + +### PostgreSQL and TimescaleDB docker installation +If you prefer to run PostgreSQL and TimescaleDB in a docker container, you can use the following command to start a Postgresql docker container with TimescaleDB installed: + +This guide assumes you already have Docker installed on your system. For full installation guide consult Docker's [documentation ↗](https://docs.docker.com/engine/install/ubuntu/). + +```Shell +docker volume create vega_pgdata + +docker run -d \ + --rm \ + --name MY_LOVELY_DB_CONTAINER \ + -e POSTGRES_USER=DATABASE_USER \ + -e POSTGRES_PASSWORD=DATABASE_PASSWORD \ + -e POSTGRES_DB=DATABASE_NAME \ + -p LOCALDB_PORT:5432 \ + -v vega_pgdata:/var/lib/postgresql/data \ + timescale/timescaledb:2.8.0-pg14 +``` + +Where: + +- `database_user` is the user name you want to use to connect to the database. +- `database_password` is the password you want to use to connect to the database. +- `database_name` is the name of the database you want to use for storing the data. +- `localdb_port` is the port you want to use to connect to the database on your local machine. (5432 is the default port for Postgresql database server and may not be available if you already have a PostgreSQL database server running on your machine and want to use Docker for testing). + +You should also consider [PostgreSQL configuration tuning](#postgresql-configuration-tuning). + +Example command to start PostgreSQL: + +```shell +docker volume create vega_pgdata + +docker run -d \ + --rm \ + --name vega_postgresql \ + -e POSTGRES_USER=DATABASE_USER \ + -e POSTGRES_PASSWORD=DATABASE_PASSWORD \ + -e POSTGRES_DB=DATABASE_NAME \ + -p 5432:5432 \ + -v vega_pgdata:/var/lib/postgresql/data \ + timescale/timescaledb:2.8.0-pg14 \ + -c "max_connections=50" \ + -c "log_destination=stderr" \ + -c "work_mem=5MB" \ + -c "huge_pages=off" \ + -c "shared_memory_type=sysv" \ + -c "dynamic_shared_memory_type=sysv" \ + -c "shared_buffers=2GB" \ + -c "temp_buffers=5MB" +``` + +### PostgreSQL configuration tuning + +The default PostgreSQL configuration is not optimised for memory usage, and can be modified. + +Find the PostgreSQL parameters in the `postgresq.conf` file. The default file path for Linux and PostgreSQL 14 is: `/etc/postgresql/14/main/postgresql.conf`. + +:::note Memory usage +Total memory usage for PostgreSQL is predictable. To determine the values of the parameters below, you must know how PostgreSQL uses the memory. +There is a `shared_memory` that is used between all connections and background workers. + +Each background worker and connection has its own smaller chunk of memory: + +- `work_mem` - memory available for the query buffers in the connection session. +- `temp_buffers` - memory available for accessing temporary tables by the connection session. + +You can assume that `Max RAM` utilisation can be rounded to: `shared_buffer + (temp_buffers + work_mem) * max_connections`. +::: + +The suggested parameters are below. + +#### Max connections + +New value: + +```conf +max_connections = 50 +``` + +Limiting the maximum number of connections reduces the memory usage by PostgreSQL. + +#### Huge pages + +New value: + +```conf +huge_pages = off +``` + +The default value of the `huge_pages` config is `try`. Setting it to `off` usually reduces the RAM usage, however, it increases the CPU usage on the machine. + +#### Work mem + +New value: + +```conf +work_mem = 5MB +``` + +#### Temp buffers + +New value: + +```conf +temp_buffers = 5MB +``` + +#### Shared buffers + +New value: + +```conf +shared_buffers = 2GB +``` + +This value should be set to 25% of your server’s physical memory. The 2GB value would work for a server with 8GB physical memory. + +#### Dynamic shared memory type + +New value: + +```conf +dynamic_shared_memory_type = sysv +``` + +#### Shared memory type + +New value: + +```conf +shared_memory_type = sysv +``` + +The two above parameters determine how your operating system manages the shared memory. + +If your operating system supports the POSIX standard, you may want to use the `map` value both for the `dynamic_shared_memory_type` and `shared_memory_type`. But the `sysv` value is more portable than `map`. There is no significant difference in [performance ↗](https://lists.dragonflybsd.org/pipermail/kernel/attachments/20120913/317c1aab/attachment-0001.pdf). + +## Generate configuration files + +### Vega and Tendermint configuration +Before you can use Vega, you need to generate the default configuration files for Vega and Tendermint. You can then alter those to the specific requirements. + +The below command will create home paths (if they don't already exist) and generate the configuration in the paths you chose. + +```shell +vega init --home="YOUR_VEGA_HOME_PATH" --tendermint-home="YOUR_TENDERMINT_HOME_PATH" full +``` + +To update your node configuration, such as to set up ports for the APIs, edit the config file: + +```shell +"YOUR_VEGA_HOME_PATH"/config/node/config.toml +``` + +:::note Setting up validator node +For more information about setting up a validator node, see the [validator node setup guide](setup-validator.md). +::: + +### Data node configuration + +#### Data node retention profiles +When starting a data node, you can choose the data retention configuration for your data node, depending on the use case for the node. The retention policy details can all be fine-tuned manually, as well. + +There are 3 retention policy configurations: +* **Standard (default)**: The node retains data according to the default retention policies, which assume a data node retains some data over time, but not all data +* **Lite**: The node retains enough data to be able to provide the latest state to clients, and produce network history segments. This mode saves enough to provide the current state of accounts, assets, balances, delegations, liquidity provisions, live orders, margin levels, markets, network limits, network parameters, node details, parties, positions +* **Archive**: The node retains all data + +To run a node that doesn't use the standard default retention, use one of the following flags when running the `init` command: + +* For a standard node, no flag +* For an archive node, use `--archive` +* For a lite node, use `--lite` + +If you want to tweak the retention policy once the initial configuration has been generated, set it on per-table basis in the data node's `config.toml`. + +For example: + +```toml +[[SQLStore.RetentionPolicies]] + HypertableOrCaggName = "balances" + DataRetentionPeriod = "7 days" +``` + +Additionally, you can set the chunk interval for Timescale hypertables that are used to store historical data. Default values are chosen by Vega and are applied when the database migrations are run. The chunk interval determines how much data is stored in each chunk and affects the amount of RAM used by the database, as recent chunks are kept in memory in order to make querying faster. To change the chunk interval, set it on a per-table basis in the data node's `config.toml`. + +For example: + +```toml +[[SQLStore.ChunkIntervals]] + HypertableName = "orders" + ChunkInterval = "2 hours" +``` + +## Generate config +To generate the configuration files you need for the data node, you can use the following command: + +```shell +vega datanode init --home="YOUR_DATA_NODE_HOME_PATH" "CHAIN_ID" +``` + +Find the `CHAIN_ID` by going to the relevant network genesis file in the relevant networks repo. + +Visit [networks ↗](https://github.com/vegaprotocol/networks/) for mainnet and other validator-run networks, or [networks-internal ↗](https://github.com/vegaprotocol/networks-internal) for Vega-run testnet networks. + +To update your data node configuration, such as to set up ports for the APIs or database credentials, edit the config file: + +```shell +"YOUR_DATA_NODE_HOME_PATH"/config/data-node/config.toml +``` + +## Configure nodes + +### Vega +To configure your Vega node to work with a data node you need to update the `[Broker.Socket]` section of the Vega configuration file `YOUR_VEGA_HOME_PATH/config/node/config.toml` from false to: + +```toml + [Broker.Socket] + ... + Enabled = true + ... +``` + +:::note +While it's possible to run the data node and Vega node on separate machines, it's not recommended given the volume of data that will be transferred between the two. +::: + +### Data node database +Data node database configuration is defined under the `[SQLStore.ConnectionConfig]` section of the data node configuration file `YOUR_DATA_NODE_HOME_PATH/config/data-node/config.toml`: + +```toml + [SQLStore.ConnectionConfig] + Host = "localhost" + Port = 5432 + Username = "USER_NAME" + Password = "PASSWORD" + Database = "DATABASE_NAME" + MaxConnLifetime = "30m0s" + MaxConnLifetimeJitter = "5m0s" +``` + +You should ensure the database configuration matches those of the database you created in the pre-requisite steps. + +### Resetting the data node +:::warning +Running the following command will remove all data from the data node and is not recoverable. +::: + +To reset the data node and remove all data, execute the command: + +```shell +vega datanode unsafe_reset_all +``` + +After this is done you can repopulate the data node by replaying the chain or by [initialising it from network history](../how-to/restart-data-node-from-network-history.md). + +### Embedded Postgres +:::warning +This is not recommended for use in production, but you can use it to test or learn about the system. +::: + +If you do not have access to, or do not want to use a PostgreSQL database server, or a Postgres Docker container, it is possible to run a data node with an embedded version of Postgres. You can enable this by setting the flag: + +```toml +[SQLStore] + ... + UseEmbedded = true + ... +``` + +This will cause data node to download a specially prepared Postgresql package which is extracted to your local machine if it doesn't exist. A separate Postgresql process will be spawned by data node using the credentials you specified in the database configuration section. Once data node is stopped, the child Postgresql process will be stopped automatically. + +You can launch PostgreSQL in its own separate process using the data node embedded PostgreSQL binaries by running the following command: + +```shell +vega datanode postgres run --home="YOUR_DATA_NODE_HOME_PATH" +``` + +In either case, the files for the database will be stored in the data node `state` folder located at `YOUR_DATA_NODE_HOME_PATH/state/data-node/storage/sqlstore`. + +### Buffered event source +When a data node is restarted from snapshots, it is possible for the event queue to become flooded, causing the Vega core to panic when the event queue is full and stop both the Vega core and data node. + +To prevent this, the buffered event source flag is set to true by default. You can confirm this by looking at the following config section: + +```toml +[Broker] + ... + UseBufferedEventSource = true + ... +``` + +## Start Vega and data node +It is recommended to start the data node before starting the Vega node. By default if the `Broker.Socket.Enabled` flag is set to true, the Vega node will attempt to connect to the data node on startup. It will continue to try and connect for one minute before giving up. + +**If you're using [Vega Visor](setup-server#install-visor)**, start your data node by running the service manager of your choice and use the following command: + +```shell +visor run --home "VISOR_HOME_PATH" +``` + +If not using Vega Visor, to start the data node, run the following command: + +```shell +vega datanode start --home="YOUR_DATA_NODE_HOME_PATH" +``` + +## Fetch network history +After starting a data node, you can load in a segment of network history, if you want your node to have more data than provided by the current height. This is particularly useful if you're running an archive node. + +Before you can fetch network history, you will need to update your data node configuration file to add a list of bootstrap peers, as by default this is left empty and depends on which network you are connecting to: + +```toml +[NetworkHistory] +... + [NetworkHistory.Store] + ... + BootstrapPeers = ["/path/to/bootstrap-peer/1","/path/to/bootstrap-peer/2"] +``` + +To get a list of bootstrap peers available for your network, you can make a HTTP request to the API service for your chosen network: `/api/v2/networkhistory`. + +```json +{ + "ipfsAddress": "/ip4//tcp/4001/p2p/", + "swarmKey": "/key/swarm/psk/1.0.0/\n/base16/\n", + "swarmKeySeed": "", + "connectedPeers": [ + "/ip4//tcp/4001/p2p/", + ... + ] +} +``` + + +To see how much network history your data node has, run the following command: + +```shell +vega datanode network-history show --home="YOUR_DATA_NODE_HOME_PATH" +``` + +### Fetching network history for a new data node + +If you are building a new data node, you may have no history and you will see this message: + +```shell +No network history is available. Use the fetch command to fetch network history +``` + +In this case, it is possible to get the latest history segment ids from your network history bootstrap peers: + +```shell +vega datanode network-history latest-history-segment-from-peers --home="YOUR_DATA_NODE_HOME_PATH" +``` + +This will return a list of peers you can fetch network history from: + +```text +Most Recent History Segments: + +Peer:, Swarm Key:, Segment{from_height:75001 to_height:75300 history_segment_id:"some-segment-id" previous_history_segment_id:"some-other-segment-id"} +... +``` + +:::note +It is possible that some data peers are behind and may not have the latest data. However, the `history_segment_id` should be the same across all nodes that are at the same height. +::: + + +To fetch a network history segment, run the command below. Use the ID of the segment you want (for example, the latest) followed by the number of blocks prior to the segment's height that you want fetch. `2000` is used in the following example. This will result in all blocks from height N-2000 to N being retrieved. + +```shell +vega datanode network-history fetch 2000 --home="YOUR_DATA_NODE_HOME_PATH" +``` + +Once the network history segments have been downloaded, running: + +```shell +vega datanode network-history show --home="YOUR_DATA_NODE_HOME_PATH" +``` + +should display the network history you have: + +```text +Available contiguous history spans: + +Contiguous history from block height XXXXX to XXXXX, from segment id: to + + +Datanode contains no data +``` + +### Loading network history into data node +Now that you have downloaded historical data you can load the history into the data node using: + +```shell +vega datanode network-history load --home="YOUR_DATA_NODE_HOME_PATH" +``` + +:::note +Data node must not be running when you use this command. If you attempt to use `network-history load` while data node is running, you will get an error: + +```text +datanode must be shutdown before data can be loaded +``` +::: + +You will be notified that the load command will force all existing connections to the data node database will be closed automatically and prompted if you want to continue. Enter `y` and data node will check how much history you have, tell you what it will load, and prompts you to confirm if you want to continue. Enter `y` again and the data node will load the history you have fetched. + +This process may take a very long time depending on how much history you have retrieved and are loading into the database. Some processes in the restoration can take a long time to complete and it may look like the process is not doing anything. Do not try to terminate the process early or you may end up with a corrupted data node database. + + +## Data node recovery from network history + +It is possible to start a data node and initialise it using network history automatically. By default, this process is disabled, and if enabled will only fetch - if you don't have the snapshot locally - the last segment before loading it into the data node. This is to allow users to quickly initialise a data node without fetching a lot of data and start participating while the data node is fetching more data in the background. + +To enable this feature, set the `AutoInitialiseFromNetworkHistory` setting in the data node configuration file to `true`. It is safe to leave this setting to true. + +If the data node already contains data that should be removed before loading from network history this can be done using the following command: + + +`vega datanode unsafe_reset_all` + +If you want to fetch more than the last segment, you may also set the `MinimumBlockCount` configuration setting. + +If you want to initialise the data node automatically up to a specific segment and X blocks before it, you can set both the `ToSegment` and `MinimumBlockCount` configuration settings. + +```toml +AutoInitialiseFromNetworkHistory = true + +[NetworkHistory.Initialise] + ToSegment = "" + MinimumBlockCount = +``` + +To initialise the data node with the latest segment, you can leave `ToSegment` empty. By default, this will ensure you initialise the data node with the latest segment with the number of blocks that are specified in `MinimumBlockCount`. The default `MinimumBlockCount` is 1. + +If you are trying to initialise the data node with a large number of blocks, or have a slow internet connection for example, it is possible that the network history download will fail due to a timeout. Your data node logs may see an error such as: + +```text +2023-03-29T14:45:44.516+0100 ERROR datanode.start.persistentPre backoff@v2.2.1+incompatible/retry.go:37 failed to fetch history blocks: failed to fetch history:could not write out the fetched history segment: context deadline exceeded +``` + +To extend the timeout, you may set the `Timeout` setting for `NetworkHistory.Initialise`: + +```toml +[NetworkHistory.Initialise] + Timeout = "15m0s" +``` + +Additionally you can also set the `FetchRetryMax` configuration setting: + +```toml +[NetworkHistory.Initialise] + FetchRetryMax = 5 +``` + +Retries will find segments that have already been downloaded and not try to download them again. By default the process will wait 1 second between retries, but you can change this by setting the `RetryTimeout` configuration setting: + +```toml +[NetworkHistory.Initialise] + RetryTimeout = "5s" +``` + + +## Network history troubleshooting + +### Network history cannot find any history + +When using `latest-history-from-peers` command, the peers returns no history. There can be a number of reasons why this is not working: + +1. You have copied a configuration file from another machine and the PeerID has been duplicated. +2. You are using an out-of-date ChainID. +3. You have not configured any network peers. + +If you have copied a configuration file from another machine and the other machine already exists on the network, running `network-history` commands may fail due to duplicated `PeerID` you have copied to the new machine. You should use the `vega datanode init` command to create a new configuration file, and copy the `PeerID` from that into your configuration file, or update the newly generated configuration file with configuration settings you have obtained from the existing configuration file. + +If the network's `ChainID` has been updated and the `ChainID` your data node is configured with is out of date, the mismatched `ChainID` will prevent your request from being accepted. You will need to update the `ChainID` in your data node configuration file to the appropriate ChainID. + +If you have not configured any network peers, follow the instructions found under [Fetch Network History](#fetch-network-history) to add the peers to your data node configuration file. + +## Configure data node APIs +In order for clients to communicate with data nodes, we expose a set of APIs and methods for reading data. + +There are currently three protocols to communicate with the data node APIs: + +### gRPC +gRPC is an open source remote procedure call (RPC) system initially developed at Google. In data node the gRPC API features streaming of events in addition to standard procedure calls. + +The default port (configurable) for the gRPC API is `3007` and matches the [gRPC protobuf definition ↗](https://github.com/vegaprotocol/vega/tree/develop/protos). + +gRPC configurations are defined under the `[Gateway.Node]` section of the data node configuration file `YOUR_DATA_NODE_HOME_PATH/config/data-node/config.toml`: + +```toml + [Gateway.Node] + Port = 3007 + IP = "0.0.0.0" +``` + +### GraphQL +[GraphQL ↗](https://graphql.org/) is an open-source data query and manipulation language for APIs, and a runtime for fulfilling queries with existing data, originally developed at Facebook. The [Console ↗](https://github.com/vegaprotocol/frontend-monorepo/tree/develop/apps/trading) uses the GraphQL API to retrieve data including streams of events. + +The GraphQL API is defined by a [schema ↗](https://github.com/vegaprotocol/vega/blob/master/datanode/gateway/graphql/schema.graphql). External clients will use this schema to communicate with Vega. + +Queries can be tested using the GraphQL playground app which is bundled with a node. The default port (configurable) for the playground app is `3008` accessing this in a web browser will show a web app for testing custom queries, mutations and subscriptions. + +The GraphQL default port and other configuration options can be found in the data node configuration file `YOUR_DATA_NODE_HOME_PATH/config/data-node/config.toml` under the `Gateway.GraphQL` section: + +```toml + [Gateway.GraphQL] + Port = 3008 + IP = "0.0.0.0" + Enabled = true + ComplexityLimit = 0 + HTTPSEnabled = false + AutoCertDomain = "" + CertificateFile = "" + KeyFile = "" + Endpoint = "/graphql" +``` + +#### HTTPS +The REST and GraphQL API gateway can be configured to use secure http connections. + +**GraphQL subscriptions do not work properly unless the HTTPS is enabled**. + +You will need your data node to be reachable over the internet with a proper fully qualified domain name, and a valid signed certificate. You may either: + +* Provide data node with a path to an existing signed certificate and corresponding private key +* Configure data node to create a certificate for you, and automatically request a signature via `LetsEncrypt` + +In the former case, where you already have a certificate and corresponding private key file, you can specify them as follows: + +```toml + [Gateway] + HTTPSEnabled = true + CertificateFile = "/path/to/certificate/file" + KeyFile = "/path/to/key/file" +``` + +You can buy a certificate from a verified source and save the obtained file to your preferred location. It is advised that the certificate and key files have a permission mask of `0600` and the directory where they are located as `0700`. + +Many administrators prefer to use a tool called `certbot` for generating and signing free certificates via `LetsEncrypt`. To obtain a signed certificate with this method: +* [Install certbot ↗](https://www.inmotionhosting.com/support/website/ssl/lets-encrypt-ssl-ubuntu-with-certbot/) +* Run `certbot certonly --standalone` to generate certificate +* Place the generated `fullchain.pem` into the `Gateway.CertificateFile` location and corresponding `privkey.pem` to `Gateway.KeyFile`. +* Read the [configuration considerations](https://serverfault.com/questions/790772/best-practices-for-setting-a-cron-job-for-lets-encrypt-certbot-renewal) for certbot in crontab. + +Data node can optionally perform a similar role to `certbot` and manage creation and signing of certificates automatically via LetsEncrypt. To enable this feature, specify an `AutoCertDomain` instead of `CertificateFile` and `KeyFile` paths in the `[Gateway]` section data node's configuration file. For example: + +```toml + [Gateway] + HTTPSEnabled = true + AutoCertDomain = "my.lovely.domain.com" +``` + +**It is a hard requirement of the `LetsEncrypt` validation process that the the server answering its challenge is running on the standard HTTPS port (443).** By default, the GraphQL API listens on port 3008, and the REST API runs on 3009, so the validation will not succeed. This means if you wish to make use of data node's automatic certificate management, you must do one of the following: + +* Forward port 443 on your machine to the GraphQL or REST API port using `iptables` or similar other network configuration CLI. Example: `iptables`: `iptables -A PREROUTING -t nat -p tcp --dport 443 -j DNAT --to-destination :3008` +* Proxy pass to port 3008 by using reverse proxy server. Some example sources on how to set one up: + - [`caddy`](https://caddyserver.com/docs/quick-starts/reverse-proxy) + - [`nginx`](https://docs.nginx.com/nginx/admin-guide/web-server/reverse-proxy/) + - [`httpd`](https://httpd.apache.org/docs/2.4/howto/reverse_proxy.html) +* Directly listen on port 443 instead of the default with either the GraphQL or REST gateways in data node by specifying the following configuration: + +```toml + [Gateway.GraphQL] + Port = 443 +``` + +or + +```toml + [Gateway.REST] + Port = 443 +``` +Note that Linux systems generally require processes listening on ports under 1024 to either run as root, or be specifically granted permission, e.g. by launching with the following: + +```shell +setcap cap_net_bind_service=ep vega datanode run +``` + +#### GraphQL complexity +Currently the GraphQL complexity limit is globally set to 3750. + +This setting is theoretical at the moment and will be refined and have different levels for different queries/resolvers in the future. + +The intention behind this limit is to prevent the VEGA system from being abused by heavy queries (DOS). The complexity level is mostly affected by the number of objects a query contains. So the heaviest ones we currently have in the system are: + +| Query | Items | Complexity | +| ----- | ----- | ---------- | +| SimpleMarkets (embedded candles) | 1 candle | 151 | +| SimpleMarkets (embedded candles) | 91 candle | 788 | +| MarketInfo (embedded candles) | 1 candle | 399 | +| MarketInfo (embedded candles) | 91 candles | 1036 | +| Orders (embedded orders) | 1 order | 163 | +| Orders (embedded orders) | 80 orders | 4003 | +| Trades (embedded trades) | 1 trades | 118 | +| Trades (embedded trades) | 75 trades | 1393 | +| Positions (embedded positions) | 1 position | 129 | +| Positions (embedded positions) | 40 positions | 2500 | + +The approximate number of positions queries by customers is 40. + +The GraphQL will return error for queries that have complexity above the set limit: "GraphQL error: Query is too complex to execute" and will not proceed with execution. + +### REST +REST provides a standard between computer systems on the web, making it easier for systems to communicate with each other. It is arguably simpler to work with than gRPC and GraphQL. In Vega the REST API is a reverse proxy to the gRPC API, however it does not support streaming. + +The default port (configurable) for the REST API is `3009` and we use a reverse proxy to the gRPC API to deliver the REST API implementation. + +## Further reading +For more information about data node and developing on data node please see the data node [README ↗](https://github.com/vegaprotocol/vega/blob/master/datanode/README.md) + +## Data node troubleshooting + +### Block height on begin block is too high + +If you start data node and you receive an error that looks like: + +```text +block height on begin block, XXXXXX, is too high, the height of the last processed block is XXXXXX +``` + +This indicates that the core node block height is ahead of the data node it is connected to. + +There are a number of ways this can happen: + +1. Data node has been added and connected to a core node that has been running for a while and no history has been loaded into data node. +2. Core has been started from a snapshot, but data node has been had no history loaded. +3. Data node was restarted with `WipeOnStartup` set to true. +4. Core has been started from snapshot, and data node has been started from an earlier snapshot than core. + +Data node requires that the first block of data it receives from core is no more that the last block height received by data node + 1. If the height of the block received from core is lower than the last block height received by data node, the events from core are ignored until events from the appropriate next block is received. + +To fix this problem, use the [network-history load](#loading-network-history-into-data-node), or use [AutoInitialiseFromNetworkHistory](#data-node-recovery-from-network-history) to ensure the data-node contains the necessary history before starting the core node. From 56874c616e77fbe6759b472a5b9e002b543f036d Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 24 Oct 2023 18:58:51 +0200 Subject: [PATCH 11/15] feat: changes for v0.73 --- .../how-to/restart-data-node-from-network-history.md | 4 ++-- .../how-to/restart-data-node-from-network-history.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/node-operators/how-to/restart-data-node-from-network-history.md b/docs/node-operators/how-to/restart-data-node-from-network-history.md index 3427bc7dc..9557e1e65 100644 --- a/docs/node-operators/how-to/restart-data-node-from-network-history.md +++ b/docs/node-operators/how-to/restart-data-node-from-network-history.md @@ -123,7 +123,7 @@ The config is located in the `/config/node/config.toml`. Update the f ```toml [Snapshot] - StartHeight = -1 + StartHeight = 0 [Broker] [Broker.Socket] @@ -136,7 +136,7 @@ The config is located in the `/config/node/config.toml`. Update the f ```toml [Snapshot] - StartHeight = -1 + StartHeight = 0 [Broker] [Broker.Socket] diff --git a/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md b/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md index 0737bdf5f..2e0983aa2 100644 --- a/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md +++ b/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md @@ -94,7 +94,7 @@ The config is located in the `/config/node/config.toml`. Update the f ```toml [Snapshot] - StartHeight = -1 + StartHeight = 0 [Broker] [Broker.Socket] From 7883793fe9b55f6d1aad41c3bcfe69dac07706b5 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 10 Dec 2023 14:10:11 +0100 Subject: [PATCH 12/15] feat: update network history docs --- .../how-to/restart-data-node-from-network-history.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md b/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md index 2e0983aa2..2cb72bffe 100644 --- a/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md +++ b/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md @@ -85,6 +85,7 @@ AutoInitialiseFromNetworkHistory = true [NetworkHistory.Initialise] TimeOut = "4h" + MinimumBlockCount = 10000 ``` @@ -132,8 +133,8 @@ Do not use the below block. Select a newer block! [statesync] enable = true rpc_servers = "api0.vega.community:26657,api1.vega.community:26657,api2.vega.community:26657" -trust_height = 3040600 -trust_hash = "b4b500d8fc84cce3a42b141193db7ba23ff03cc80b70cc817f6536582ebd5eda" +trust_height = 26530300 +trust_hash = "19d3510c1bda5d05a88429bb5a1f182f9b037c5b0975800cc18fe3bf8c75061b" ``` ### 4. Start your node From 8b77e06710282ccda277291160c2d1cceeea5f4f Mon Sep 17 00:00:00 2001 From: daniel1302 Date: Mon, 1 Jan 2024 13:09:44 +0100 Subject: [PATCH 13/15] feat: cleanup after rebase --- .../get-started/setup-datanode.md | 688 ------------------ 1 file changed, 688 deletions(-) delete mode 100644 versioned_docs/version-v0.71/node-operators/get-started/setup-datanode.md diff --git a/versioned_docs/version-v0.71/node-operators/get-started/setup-datanode.md b/versioned_docs/version-v0.71/node-operators/get-started/setup-datanode.md deleted file mode 100644 index b606514f1..000000000 --- a/versioned_docs/version-v0.71/node-operators/get-started/setup-datanode.md +++ /dev/null @@ -1,688 +0,0 @@ ---- -sidebar_position: 4 -title: Set up a data node -hide_title: false ---- -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; - -To set up a data node, you must first have followed the guide to [install and set up a Vega node](setup-server.md). A data node must be run in conjunction with a **non-validator Vega node**. - -The non-validator Vega node will send the events it receives from the network and the events it creates to the data node, which will then store them in a database. An API is provided to query the data stored by the data node. - -Note: If you are running a Vega validator node it is recommended that you do not connect the data node to this node, but to a separate non-validator Vega node. - -The database used by the data node is a PostgreSQL database with the Timescale extension installed. The database can be a dedicated database server, a docker container, or an embedded version of PostgreSQL with Timescale installed that is provided by Vega. - -:::note Operating system -The following instructions assume you are installing on a Ubuntu Linux machine as explained in the [server setup guide](setup-server#os-and-software). -::: - -## Pre-requisites - -### Vega core -Please follow the instructions in the [server setup guide](setup-server.md) to install Vega. - -### PostgreSQL and TimescaleDB full installation -We have tested and recommend using version 2.8.0 of the TimescaleDB plugin with Postgres 14. - -Refer to the [PostgreSQL documentation ↗](https://www.postgresql.org/docs/14/index.html) for more detailed information on setting up a PostgreSQL database. - -:::note Linux and MacOS guides - - - - -### Linux users -To ensure you install the correct version of TimescaleDB, you can use the notes at the bottom of the [Timescales Documentation for Debian ↗](https://docs.timescale.com/install/latest/self-hosted/installation-debian/). - - - - -### MacOS users -To ensure you install the correct version of TimescaleDB, you can use the notes at the bottom of the [Timescales Documentation for Mac ↗](https://docs.timescale.com/install/latest/self-hosted/installation-macos/). - - -::: - -:::note Database user roles -Due to operations that are required for snapshotting, the database user for the data-node must have superuser privileges. This is a limitation that we currently have due to Postgresql 14 and TimescaleDB. -::: - -### PostgreSQL and TimescaleDB docker installation -If you prefer to run PostgreSQL and TimescaleDB in a docker container, you can use the following command to start a Postgresql docker container with TimescaleDB installed: - -This guide assumes you already have Docker installed on your system. For full installation guide consult Docker's [documentation ↗](https://docs.docker.com/engine/install/ubuntu/). - -```Shell -docker volume create vega_pgdata - -docker run -d \ - --rm \ - --name MY_LOVELY_DB_CONTAINER \ - -e POSTGRES_USER=DATABASE_USER \ - -e POSTGRES_PASSWORD=DATABASE_PASSWORD \ - -e POSTGRES_DB=DATABASE_NAME \ - -p LOCALDB_PORT:5432 \ - -v vega_pgdata:/var/lib/postgresql/data \ - timescale/timescaledb:2.8.0-pg14 -``` - -Where: - -- `database_user` is the user name you want to use to connect to the database. -- `database_password` is the password you want to use to connect to the database. -- `database_name` is the name of the database you want to use for storing the data. -- `localdb_port` is the port you want to use to connect to the database on your local machine. (5432 is the default port for Postgresql database server and may not be available if you already have a PostgreSQL database server running on your machine and want to use Docker for testing). - -You should also consider [PostgreSQL configuration tuning](#postgresql-configuration-tuning). - -Example command to start PostgreSQL: - -```shell -docker volume create vega_pgdata - -docker run -d \ - --rm \ - --name vega_postgresql \ - -e POSTGRES_USER=DATABASE_USER \ - -e POSTGRES_PASSWORD=DATABASE_PASSWORD \ - -e POSTGRES_DB=DATABASE_NAME \ - -p 5432:5432 \ - -v vega_pgdata:/var/lib/postgresql/data \ - timescale/timescaledb:2.8.0-pg14 \ - -c "max_connections=50" \ - -c "log_destination=stderr" \ - -c "work_mem=5MB" \ - -c "huge_pages=off" \ - -c "shared_memory_type=sysv" \ - -c "dynamic_shared_memory_type=sysv" \ - -c "shared_buffers=2GB" \ - -c "temp_buffers=5MB" -``` - -### PostgreSQL configuration tuning - -The default PostgreSQL configuration is not optimised for memory usage, and can be modified. - -Find the PostgreSQL parameters in the `postgresq.conf` file. The default file path for Linux and PostgreSQL 14 is: `/etc/postgresql/14/main/postgresql.conf`. - -:::note Memory usage -Total memory usage for PostgreSQL is predictable. To determine the values of the parameters below, you must know how PostgreSQL uses the memory. -There is a `shared_memory` that is used between all connections and background workers. - -Each background worker and connection has its own smaller chunk of memory: - -- `work_mem` - memory available for the query buffers in the connection session. -- `temp_buffers` - memory available for accessing temporary tables by the connection session. - -You can assume that `Max RAM` utilisation can be rounded to: `shared_buffer + (temp_buffers + work_mem) * max_connections`. -::: - -The suggested parameters are below. - -#### Max connections - -New value: - -```conf -max_connections = 50 -``` - -Limiting the maximum number of connections reduces the memory usage by PostgreSQL. - -#### Huge pages - -New value: - -```conf -huge_pages = off -``` - -The default value of the `huge_pages` config is `try`. Setting it to `off` usually reduces the RAM usage, however, it increases the CPU usage on the machine. - -#### Work mem - -New value: - -```conf -work_mem = 5MB -``` - -#### Temp buffers - -New value: - -```conf -temp_buffers = 5MB -``` - -#### Shared buffers - -New value: - -```conf -shared_buffers = 2GB -``` - -This value should be set to 25% of your server’s physical memory. The 2GB value would work for a server with 8GB physical memory. - -#### Dynamic shared memory type - -New value: - -```conf -dynamic_shared_memory_type = sysv -``` - -#### Shared memory type - -New value: - -```conf -shared_memory_type = sysv -``` - -The two above parameters determine how your operating system manages the shared memory. - -If your operating system supports the POSIX standard, you may want to use the `map` value both for the `dynamic_shared_memory_type` and `shared_memory_type`. But the `sysv` value is more portable than `map`. There is no significant difference in [performance ↗](https://lists.dragonflybsd.org/pipermail/kernel/attachments/20120913/317c1aab/attachment-0001.pdf). - -## Generate configuration files - -### Vega and Tendermint configuration -Before you can use Vega, you need to generate the default configuration files for Vega and Tendermint. You can then alter those to the specific requirements. - -The below command will create home paths (if they don't already exist) and generate the configuration in the paths you chose. - -```shell -vega init --home="YOUR_VEGA_HOME_PATH" --tendermint-home="YOUR_TENDERMINT_HOME_PATH" full -``` - -To update your node configuration, such as to set up ports for the APIs, edit the config file: - -```shell -"YOUR_VEGA_HOME_PATH"/config/node/config.toml -``` - -:::note Setting up validator node -For more information about setting up a validator node, see the [validator node setup guide](setup-validator.md). -::: - -### Data node configuration - -#### Data node retention profiles -When starting a data node, you can choose the data retention configuration for your data node, depending on the use case for the node. The retention policy details can all be fine-tuned manually, as well. - -There are 3 retention policy configurations: -* **Standard (default)**: The node retains data according to the default retention policies, which assume a data node retains some data over time, but not all data -* **Lite**: The node retains enough data to be able to provide the latest state to clients, and produce network history segments. This mode saves enough to provide the current state of accounts, assets, balances, delegations, liquidity provisions, live orders, margin levels, markets, network limits, network parameters, node details, parties, positions -* **Archive**: The node retains all data - -To run a node that doesn't use the standard default retention, use one of the following flags when running the `init` command: - -* For a standard node, no flag -* For an archive node, use `--archive` -* For a lite node, use `--lite` - -If you want to tweak the retention policy once the initial configuration has been generated, set it on per-table basis in the data node's `config.toml`. - -For example: - -```toml -[[SQLStore.RetentionPolicies]] - HypertableOrCaggName = "balances" - DataRetentionPeriod = "7 days" -``` - -Additionally, you can set the chunk interval for Timescale hypertables that are used to store historical data. Default values are chosen by Vega and are applied when the database migrations are run. The chunk interval determines how much data is stored in each chunk and affects the amount of RAM used by the database, as recent chunks are kept in memory in order to make querying faster. To change the chunk interval, set it on a per-table basis in the data node's `config.toml`. - -For example: - -```toml -[[SQLStore.ChunkIntervals]] - HypertableName = "orders" - ChunkInterval = "2 hours" -``` - -## Generate config -To generate the configuration files you need for the data node, you can use the following command: - -```shell -vega datanode init --home="YOUR_DATA_NODE_HOME_PATH" "CHAIN_ID" -``` - -Find the `CHAIN_ID` by going to the relevant network genesis file in the relevant networks repo. - -Visit [networks ↗](https://github.com/vegaprotocol/networks/) for mainnet and other validator-run networks, or [networks-internal ↗](https://github.com/vegaprotocol/networks-internal) for Vega-run testnet networks. - -To update your data node configuration, such as to set up ports for the APIs or database credentials, edit the config file: - -```shell -"YOUR_DATA_NODE_HOME_PATH"/config/data-node/config.toml -``` - -## Configure nodes - -### Vega -To configure your Vega node to work with a data node you need to update the `[Broker.Socket]` section of the Vega configuration file `YOUR_VEGA_HOME_PATH/config/node/config.toml` from false to: - -```toml - [Broker.Socket] - ... - Enabled = true - ... -``` - -:::note -While it's possible to run the data node and Vega node on separate machines, it's not recommended given the volume of data that will be transferred between the two. -::: - -### Data node database -Data node database configuration is defined under the `[SQLStore.ConnectionConfig]` section of the data node configuration file `YOUR_DATA_NODE_HOME_PATH/config/data-node/config.toml`: - -```toml - [SQLStore.ConnectionConfig] - Host = "localhost" - Port = 5432 - Username = "USER_NAME" - Password = "PASSWORD" - Database = "DATABASE_NAME" - MaxConnLifetime = "30m0s" - MaxConnLifetimeJitter = "5m0s" -``` - -You should ensure the database configuration matches those of the database you created in the pre-requisite steps. - -### Resetting the data node -:::warning -Running the following command will remove all data from the data node and is not recoverable. -::: - -To reset the data node and remove all data, execute the command: - -```shell -vega datanode unsafe_reset_all -``` - -After this is done you can repopulate the data node by replaying the chain or by [initialising it from network history](../how-to/restart-data-node-from-network-history.md). - -### Embedded Postgres -:::warning -This is not recommended for use in production, but you can use it to test or learn about the system. -::: - -If you do not have access to, or do not want to use a PostgreSQL database server, or a Postgres Docker container, it is possible to run a data node with an embedded version of Postgres. You can enable this by setting the flag: - -```toml -[SQLStore] - ... - UseEmbedded = true - ... -``` - -This will cause data node to download a specially prepared Postgresql package which is extracted to your local machine if it doesn't exist. A separate Postgresql process will be spawned by data node using the credentials you specified in the database configuration section. Once data node is stopped, the child Postgresql process will be stopped automatically. - -You can launch PostgreSQL in its own separate process using the data node embedded PostgreSQL binaries by running the following command: - -```shell -vega datanode postgres run --home="YOUR_DATA_NODE_HOME_PATH" -``` - -In either case, the files for the database will be stored in the data node `state` folder located at `YOUR_DATA_NODE_HOME_PATH/state/data-node/storage/sqlstore`. - -### Buffered event source -When a data node is restarted from snapshots, it is possible for the event queue to become flooded, causing the Vega core to panic when the event queue is full and stop both the Vega core and data node. - -To prevent this, the buffered event source flag is set to true by default. You can confirm this by looking at the following config section: - -```toml -[Broker] - ... - UseBufferedEventSource = true - ... -``` - -## Start Vega and data node -It is recommended to start the data node before starting the Vega node. By default if the `Broker.Socket.Enabled` flag is set to true, the Vega node will attempt to connect to the data node on startup. It will continue to try and connect for one minute before giving up. - -**If you're using [Vega Visor](setup-server#install-visor)**, start your data node by running the service manager of your choice and use the following command: - -```shell -visor run --home "VISOR_HOME_PATH" -``` - -If not using Vega Visor, to start the data node, run the following command: - -```shell -vega datanode start --home="YOUR_DATA_NODE_HOME_PATH" -``` - -## Fetch network history -After starting a data node, you can load in a segment of network history, if you want your node to have more data than provided by the current height. This is particularly useful if you're running an archive node. - -Before you can fetch network history, you will need to update your data node configuration file to add a list of bootstrap peers, as by default this is left empty and depends on which network you are connecting to: - -```toml -[NetworkHistory] -... - [NetworkHistory.Store] - ... - BootstrapPeers = ["/path/to/bootstrap-peer/1","/path/to/bootstrap-peer/2"] -``` - -To get a list of bootstrap peers available for your network, you can make a HTTP request to the API service for your chosen network: `/api/v2/networkhistory`. - -```json -{ - "ipfsAddress": "/ip4//tcp/4001/p2p/", - "swarmKey": "/key/swarm/psk/1.0.0/\n/base16/\n", - "swarmKeySeed": "", - "connectedPeers": [ - "/ip4//tcp/4001/p2p/", - ... - ] -} -``` - - -To see how much network history your data node has, run the following command: - -```shell -vega datanode network-history show --home="YOUR_DATA_NODE_HOME_PATH" -``` - -### Fetching network history for a new data node - -If you are building a new data node, you may have no history and you will see this message: - -```shell -No network history is available. Use the fetch command to fetch network history -``` - -In this case, it is possible to get the latest history segment ids from your network history bootstrap peers: - -```shell -vega datanode network-history latest-history-segment-from-peers --home="YOUR_DATA_NODE_HOME_PATH" -``` - -This will return a list of peers you can fetch network history from: - -```text -Most Recent History Segments: - -Peer:, Swarm Key:, Segment{from_height:75001 to_height:75300 history_segment_id:"some-segment-id" previous_history_segment_id:"some-other-segment-id"} -... -``` - -:::note -It is possible that some data peers are behind and may not have the latest data. However, the `history_segment_id` should be the same across all nodes that are at the same height. -::: - - -To fetch a network history segment, run the command below. Use the ID of the segment you want (for example, the latest) followed by the number of blocks prior to the segment's height that you want fetch. `2000` is used in the following example. This will result in all blocks from height N-2000 to N being retrieved. - -```shell -vega datanode network-history fetch 2000 --home="YOUR_DATA_NODE_HOME_PATH" -``` - -Once the network history segments have been downloaded, running: - -```shell -vega datanode network-history show --home="YOUR_DATA_NODE_HOME_PATH" -``` - -should display the network history you have: - -```text -Available contiguous history spans: - -Contiguous history from block height XXXXX to XXXXX, from segment id: to - - -Datanode contains no data -``` - -### Loading network history into data node -Now that you have downloaded historical data you can load the history into the data node using: - -```shell -vega datanode network-history load --home="YOUR_DATA_NODE_HOME_PATH" -``` - -:::note -Data node must not be running when you use this command. If you attempt to use `network-history load` while data node is running, you will get an error: - -```text -datanode must be shutdown before data can be loaded -``` -::: - -You will be notified that the load command will force all existing connections to the data node database will be closed automatically and prompted if you want to continue. Enter `y` and data node will check how much history you have, tell you what it will load, and prompts you to confirm if you want to continue. Enter `y` again and the data node will load the history you have fetched. - -This process may take a very long time depending on how much history you have retrieved and are loading into the database. Some processes in the restoration can take a long time to complete and it may look like the process is not doing anything. Do not try to terminate the process early or you may end up with a corrupted data node database. - - -## Data node recovery from network history - -It is possible to start a data node and initialise it using network history automatically. By default, this process is disabled, and if enabled will only fetch - if you don't have the snapshot locally - the last segment before loading it into the data node. This is to allow users to quickly initialise a data node without fetching a lot of data and start participating while the data node is fetching more data in the background. - -To enable this feature, set the `AutoInitialiseFromNetworkHistory` setting in the data node configuration file to `true`. It is safe to leave this setting to true. - -If the data node already contains data that should be removed before loading from network history this can be done using the following command: - - -`vega datanode unsafe_reset_all` - -If you want to fetch more than the last segment, you may also set the `MinimumBlockCount` configuration setting. - -If you want to initialise the data node automatically up to a specific segment and X blocks before it, you can set both the `ToSegment` and `MinimumBlockCount` configuration settings. - -```toml -AutoInitialiseFromNetworkHistory = true - -[NetworkHistory.Initialise] - ToSegment = "" - MinimumBlockCount = -``` - -To initialise the data node with the latest segment, you can leave `ToSegment` empty. By default, this will ensure you initialise the data node with the latest segment with the number of blocks that are specified in `MinimumBlockCount`. The default `MinimumBlockCount` is 1. - -If you are trying to initialise the data node with a large number of blocks, or have a slow internet connection for example, it is possible that the network history download will fail due to a timeout. Your data node logs may see an error such as: - -```text -2023-03-29T14:45:44.516+0100 ERROR datanode.start.persistentPre backoff@v2.2.1+incompatible/retry.go:37 failed to fetch history blocks: failed to fetch history:could not write out the fetched history segment: context deadline exceeded -``` - -To extend the timeout, you may set the `Timeout` setting for `NetworkHistory.Initialise`: - -```toml -[NetworkHistory.Initialise] - Timeout = "15m0s" -``` - -Additionally you can also set the `FetchRetryMax` configuration setting: - -```toml -[NetworkHistory.Initialise] - FetchRetryMax = 5 -``` - -Retries will find segments that have already been downloaded and not try to download them again. By default the process will wait 1 second between retries, but you can change this by setting the `RetryTimeout` configuration setting: - -```toml -[NetworkHistory.Initialise] - RetryTimeout = "5s" -``` - - -## Network history troubleshooting - -### Network history cannot find any history - -When using `latest-history-from-peers` command, the peers returns no history. There can be a number of reasons why this is not working: - -1. You have copied a configuration file from another machine and the PeerID has been duplicated. -2. You are using an out-of-date ChainID. -3. You have not configured any network peers. - -If you have copied a configuration file from another machine and the other machine already exists on the network, running `network-history` commands may fail due to duplicated `PeerID` you have copied to the new machine. You should use the `vega datanode init` command to create a new configuration file, and copy the `PeerID` from that into your configuration file, or update the newly generated configuration file with configuration settings you have obtained from the existing configuration file. - -If the network's `ChainID` has been updated and the `ChainID` your data node is configured with is out of date, the mismatched `ChainID` will prevent your request from being accepted. You will need to update the `ChainID` in your data node configuration file to the appropriate ChainID. - -If you have not configured any network peers, follow the instructions found under [Fetch Network History](#fetch-network-history) to add the peers to your data node configuration file. - -## Configure data node APIs -In order for clients to communicate with data nodes, we expose a set of APIs and methods for reading data. - -There are currently three protocols to communicate with the data node APIs: - -### gRPC -gRPC is an open source remote procedure call (RPC) system initially developed at Google. In data node the gRPC API features streaming of events in addition to standard procedure calls. - -The default port (configurable) for the gRPC API is `3007` and matches the [gRPC protobuf definition ↗](https://github.com/vegaprotocol/vega/tree/develop/protos). - -gRPC configurations are defined under the `[Gateway.Node]` section of the data node configuration file `YOUR_DATA_NODE_HOME_PATH/config/data-node/config.toml`: - -```toml - [Gateway.Node] - Port = 3007 - IP = "0.0.0.0" -``` - -### GraphQL -[GraphQL ↗](https://graphql.org/) is an open-source data query and manipulation language for APIs, and a runtime for fulfilling queries with existing data, originally developed at Facebook. The [Console ↗](https://github.com/vegaprotocol/frontend-monorepo/tree/develop/apps/trading) uses the GraphQL API to retrieve data including streams of events. - -The GraphQL API is defined by a [schema ↗](https://github.com/vegaprotocol/vega/blob/master/datanode/gateway/graphql/schema.graphql). External clients will use this schema to communicate with Vega. - -Queries can be tested using the GraphQL playground app which is bundled with a node. The default port (configurable) for the playground app is `3008` accessing this in a web browser will show a web app for testing custom queries, mutations and subscriptions. - -The GraphQL default port and other configuration options can be found in the data node configuration file `YOUR_DATA_NODE_HOME_PATH/config/data-node/config.toml` under the `Gateway.GraphQL` section: - -```toml - [Gateway.GraphQL] - Port = 3008 - IP = "0.0.0.0" - Enabled = true - ComplexityLimit = 0 - HTTPSEnabled = false - AutoCertDomain = "" - CertificateFile = "" - KeyFile = "" - Endpoint = "/graphql" -``` - -#### HTTPS -The REST and GraphQL API gateway can be configured to use secure http connections. - -**GraphQL subscriptions do not work properly unless the HTTPS is enabled**. - -You will need your data node to be reachable over the internet with a proper fully qualified domain name, and a valid signed certificate. You may either: - -* Provide data node with a path to an existing signed certificate and corresponding private key -* Configure data node to create a certificate for you, and automatically request a signature via `LetsEncrypt` - -In the former case, where you already have a certificate and corresponding private key file, you can specify them as follows: - -```toml - [Gateway] - HTTPSEnabled = true - CertificateFile = "/path/to/certificate/file" - KeyFile = "/path/to/key/file" -``` - -You can buy a certificate from a verified source and save the obtained file to your preferred location. It is advised that the certificate and key files have a permission mask of `0600` and the directory where they are located as `0700`. - -Many administrators prefer to use a tool called `certbot` for generating and signing free certificates via `LetsEncrypt`. To obtain a signed certificate with this method: -* [Install certbot ↗](https://www.inmotionhosting.com/support/website/ssl/lets-encrypt-ssl-ubuntu-with-certbot/) -* Run `certbot certonly --standalone` to generate certificate -* Place the generated `fullchain.pem` into the `Gateway.CertificateFile` location and corresponding `privkey.pem` to `Gateway.KeyFile`. -* Read the [configuration considerations](https://serverfault.com/questions/790772/best-practices-for-setting-a-cron-job-for-lets-encrypt-certbot-renewal) for certbot in crontab. - -Data node can optionally perform a similar role to `certbot` and manage creation and signing of certificates automatically via LetsEncrypt. To enable this feature, specify an `AutoCertDomain` instead of `CertificateFile` and `KeyFile` paths in the `[Gateway]` section data node's configuration file. For example: - -```toml - [Gateway] - HTTPSEnabled = true - AutoCertDomain = "my.lovely.domain.com" -``` - -**It is a hard requirement of the `LetsEncrypt` validation process that the the server answering its challenge is running on the standard HTTPS port (443).** By default, the GraphQL API listens on port 3008, and the REST API runs on 3009, so the validation will not succeed. This means if you wish to make use of data node's automatic certificate management, you must do one of the following: - -* Forward port 443 on your machine to the GraphQL or REST API port using `iptables` or similar other network configuration CLI. Example: `iptables`: `iptables -A PREROUTING -t nat -p tcp --dport 443 -j DNAT --to-destination :3008` -* Proxy pass to port 3008 by using reverse proxy server. Some example sources on how to set one up: - - [`caddy`](https://caddyserver.com/docs/quick-starts/reverse-proxy) - - [`nginx`](https://docs.nginx.com/nginx/admin-guide/web-server/reverse-proxy/) - - [`httpd`](https://httpd.apache.org/docs/2.4/howto/reverse_proxy.html) -* Directly listen on port 443 instead of the default with either the GraphQL or REST gateways in data node by specifying the following configuration: - -```toml - [Gateway.GraphQL] - Port = 443 -``` - -or - -```toml - [Gateway.REST] - Port = 443 -``` -Note that Linux systems generally require processes listening on ports under 1024 to either run as root, or be specifically granted permission, e.g. by launching with the following: - -```shell -setcap cap_net_bind_service=ep vega datanode run -``` - -#### GraphQL complexity -Currently the GraphQL complexity limit is globally set to 3750. - -This setting is theoretical at the moment and will be refined and have different levels for different queries/resolvers in the future. - -The intention behind this limit is to prevent the VEGA system from being abused by heavy queries (DOS). The complexity level is mostly affected by the number of objects a query contains. So the heaviest ones we currently have in the system are: - -| Query | Items | Complexity | -| ----- | ----- | ---------- | -| SimpleMarkets (embedded candles) | 1 candle | 151 | -| SimpleMarkets (embedded candles) | 91 candle | 788 | -| MarketInfo (embedded candles) | 1 candle | 399 | -| MarketInfo (embedded candles) | 91 candles | 1036 | -| Orders (embedded orders) | 1 order | 163 | -| Orders (embedded orders) | 80 orders | 4003 | -| Trades (embedded trades) | 1 trades | 118 | -| Trades (embedded trades) | 75 trades | 1393 | -| Positions (embedded positions) | 1 position | 129 | -| Positions (embedded positions) | 40 positions | 2500 | - -The approximate number of positions queries by customers is 40. - -The GraphQL will return error for queries that have complexity above the set limit: "GraphQL error: Query is too complex to execute" and will not proceed with execution. - -### REST -REST provides a standard between computer systems on the web, making it easier for systems to communicate with each other. It is arguably simpler to work with than gRPC and GraphQL. In Vega the REST API is a reverse proxy to the gRPC API, however it does not support streaming. - -The default port (configurable) for the REST API is `3009` and we use a reverse proxy to the gRPC API to deliver the REST API implementation. - -## Further reading -For more information about data node and developing on data node please see the data node [README ↗](https://github.com/vegaprotocol/vega/blob/master/datanode/README.md) - -## Data node troubleshooting - -### Block height on begin block is too high - -If you start data node and you receive an error that looks like: - -```text -block height on begin block, XXXXXX, is too high, the height of the last processed block is XXXXXX -``` - -This indicates that the core node block height is ahead of the data node it is connected to. - -There are a number of ways this can happen: - -1. Data node has been added and connected to a core node that has been running for a while and no history has been loaded into data node. -2. Core has been started from a snapshot, but data node has been had no history loaded. -3. Data node was restarted with `WipeOnStartup` set to true. -4. Core has been started from snapshot, and data node has been started from an earlier snapshot than core. - -Data node requires that the first block of data it receives from core is no more that the last block height received by data node + 1. If the height of the block received from core is lower than the last block height received by data node, the events from core are ignored until events from the appropriate next block is received. - -To fix this problem, use the [network-history load](#loading-network-history-into-data-node), or use [AutoInitialiseFromNetworkHistory](#data-node-recovery-from-network-history) to ensure the data-node contains the necessary history before starting the core node. From ef3df8dad024ffc81f127d8610a00460a5920104 Mon Sep 17 00:00:00 2001 From: daniel1302 Date: Mon, 1 Jan 2024 13:11:46 +0100 Subject: [PATCH 14/15] feat: fix rebase --- docs/node-operators/get-started/setup-datanode.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/node-operators/get-started/setup-datanode.md b/docs/node-operators/get-started/setup-datanode.md index e1464cf2f..9ee0ba015 100644 --- a/docs/node-operators/get-started/setup-datanode.md +++ b/docs/node-operators/get-started/setup-datanode.md @@ -329,9 +329,6 @@ For example: ChunkInterval = "2 hours" ``` -<<<<<<< HEAD -## Resetting the data node -======= ## Generate config To generate the configuration files you need for the data node, you can use the following command: @@ -382,7 +379,6 @@ Data node database configuration is defined under the `[SQLStore.ConnectionConfi You should ensure the database configuration matches those of the database you created in the pre-requisite steps. ### Resetting the data node ->>>>>>> c7bbf785 (remove duplicate) :::warning Running the following command will remove all data from the data node and is not recoverable. ::: From 3ec2ef19c13e3e180a23cbad7c03729714d5e587 Mon Sep 17 00:00:00 2001 From: daniel1302 Date: Mon, 1 Jan 2024 16:18:05 +0100 Subject: [PATCH 15/15] feat: update docs for latest version --- .../restart-data-node-from-network-history.md | 62 +++++++++++++------ docs/node-operators/how-to/use-snapshots.md | 2 + .../restart-data-node-from-network-history.md | 52 +++++++++++----- .../node-operators/how-to/use-snapshots.md | 2 + 4 files changed, 83 insertions(+), 35 deletions(-) diff --git a/docs/node-operators/how-to/restart-data-node-from-network-history.md b/docs/node-operators/how-to/restart-data-node-from-network-history.md index 9557e1e65..36f113253 100644 --- a/docs/node-operators/how-to/restart-data-node-from-network-history.md +++ b/docs/node-operators/how-to/restart-data-node-from-network-history.md @@ -15,7 +15,7 @@ import TabItem from '@theme/TabItem'; ## What is network history? -Network history is a mechanism in the data node software that allows for sharing chunks of information between other data nodes connected to the network. For example, when you are interested in a specific period of data from the Vega network, you can use network history to download this data from other nodes **if they have it**. Usually, you are interested in the last few blocks required to start a new data node or a data node after a crash - in those cases, you do not need the full network history. +Network history is a mechanism in the data node software that allows for sharing chunks of information between other data nodes connected to the network. For example, when you are interested in a specific period of data from the Vega network, you can use network history to download this data from other nodes **if they have it**. Usually, you are interested in the last few blocks required to start a new data node or a data node after a crash - in those cases, you do not need the full-network history. ## Requirements @@ -36,7 +36,7 @@ You must have a data node already configured and running. If you do not have one ### 1. Stop data node if it is running -If you're using Visor, you must stop it to control your node. +If you're using Visor, you must stop it, and it will stop the data-node as well. Otherwise, you must stop the Vega and data node processes. @@ -58,17 +58,25 @@ systemctl stop vega; rm -rf /home/vega/vega_home/state/data-node/ # remove Vega state -vega unsafe_reset_all --home +vega unsafe_reset_all --home "YOUR_VEGA_HOME" # remove Tendermint state -vega tm unsafe_reset_all --home +vega tm unsafe_reset_all --home "YOUR_TENDERMINT_HOME" ``` -### 3. Update config +### 3. Clear the PostgreSQL database + +```sql title="PostgreSQL console" +DROP DATABASE ; +CREATE DATABASE WITH owner='' +``` + + +### 4. Update config #### a. Data node config -Data node config is located in the `/config/data-node/config.toml` file. +Data node config is located in the `YOUR_VEGA_HOME/config/data-node/config.toml` file. Update the following parameters in your `config.toml` file for the data node: @@ -76,7 +84,7 @@ Update the following parameters in your `config.toml` file for the data node: -```toml +```toml title="YOUR_VEGA_HOME/config/data-node/config.toml" AutoInitialiseFromNetworkHistory = true [SQLStore] @@ -88,14 +96,15 @@ AutoInitialiseFromNetworkHistory = true BootstrapPeers = ["/dns/n00.testnet.vega.rocks/tcp/4001/ipfs/12D3KooWNiWcT93S3P3eiHqGq4a6feaD2cUfbWw9AxgdVt8RzTHJ","/dns/n06.testnet.vega.rocks/tcp/4001/ipfs/12D3KooWMSaQevxg1JcaFxWTpxMjKw1J13bLVLmoxbeSJ5gpXjRh","/dns/n07.testnet.vega.rocks/tcp/4001/ipfs/12D3KooWACJuzchZQH8Tz1zNmkGCatgcS2DUoiQnMFaALVMo7DpC"] [NetworkHistory.Initialise] - TimeOut = "4h" + TimeOut = "96h" + MinimumBlockCount = 10000 ``` -```toml +```toml title="YOUR_VEGA_HOME/config/data-node/config.toml" AutoInitialiseFromNetworkHistory = true [SQLStore] @@ -107,40 +116,46 @@ AutoInitialiseFromNetworkHistory = true BootstrapPeers = ["/dns/n00.validators-testnet.vega.rocks/tcp/4001/ipfs/12D3KooWQbCMy5echT1sMKwRQh8GJJk5zmHmg6VNg1qEbpysNACN","/dns/n02.validators-testnet.vega.rocks/tcp/4001/ipfs/12D3KooWHffX2tdw2phH7ai8GCo2K3ehJfnLRATve5otVr4D3ggK","/dns/metabase00.validators-testnet.vega.rocks/tcp/4001/ipfs/12D3KooWKPDZ1s5FM8YewZVeRb9XwaQ7PdaoyD84hFnKmVbn94gN"] [NetworkHistory.Initialise] - TimeOut = "4h" + TimeOut = "96h" + MinimumBlockCount = 10000 ``` + +:::note Number of blocks to sync +The `NetworkHistory.Initialise.MinimumBlockCount` parameter tells how much blocks your node is going to download before it stats. +::: + #### b. Vega core node config -The config is located in the `/config/node/config.toml`. Update the following parameters in your `config.toml` file for the Vega core: +The config is located in the `YOUR_VEGA_HOME/config/node/config.toml`. Update the following parameters in your `config.toml` file for the Vega core: -```toml +```toml title="YOUR_VEGA_HOME/config/node/config.toml" [Snapshot] StartHeight = 0 [Broker] [Broker.Socket] - DialTimeout = "4h" + DialTimeout = "96h" ``` -```toml +```toml title="YOUR_VEGA_HOME/config/node/config.toml" [Snapshot] StartHeight = 0 [Broker] [Broker.Socket] - DialTimeout = "4h" + DialTimeout = "96h" ``` @@ -171,7 +186,7 @@ To update Tendermint, you need to know the trust block and height. To collect th Then select one of the latest pairs for block height and hash. -Once you have the trusted block, you can update the following parameters in the `/config/config.toml` file: +Once you have the trusted block, you can update the following parameters in the `YOUR_TENDERMINT_HOME/config/config.toml` file: @@ -228,7 +243,16 @@ trust_hash = "9edf8b5779aa79e96ac95256a7c671b855990be518a9c7cbb98eb8694918b004" -### 4. Start your node +#### d. Vegavisor config + +The config is located in `YOUR_VEGAVISOR_HOME/config.toml` + +```toml title="YOUR_VEGAVISOR_HOME/config.toml" +maxNumberOfFirstConnectionRetries = 172800 +... +``` + +### 5. Start your node :::caution Start data node first If you are not using Visor, you MUST start your data node before starting the Vega core. @@ -263,7 +287,7 @@ This step is critical, otherwise you may end with a corrupted node after next re #### a. Disable statesync in Tendermint config -Open the `/config/config.toml` file and update the following parameter: +Open the `YOUR_TENDERMINT_HOME/config/config.toml` file and update the following parameter: ```toml [statesync] @@ -272,7 +296,7 @@ enable = false #### b. Disable wiping the data node database -Open the `/config/data-node/config.toml` file and update the following parmater: +Open the `YOUR_VEGA_HOME/config/data-node/config.toml` file and update the following parmater: ```toml AutoInitialiseFromNetworkHistory = false diff --git a/docs/node-operators/how-to/use-snapshots.md b/docs/node-operators/how-to/use-snapshots.md index f8787b76e..cf1a8c246 100644 --- a/docs/node-operators/how-to/use-snapshots.md +++ b/docs/node-operators/how-to/use-snapshots.md @@ -81,6 +81,8 @@ The default action when starting a node is for it to load from the latest snapsh As an example, if the local snapshot list looks like the below: ```shell +# vega tools snapshot --home="YOUR_VEGA_HOME" + Snapshots available: 2 Height 901, version: 4, size 92, hash: 562414bb5be3ccc8403fbd030d06eebc799bfef5ca25b02ad360fec349cb4bc8 Height 601, version: 3, size 92, hash: 72a2edd960cf3340ae94bf092991f923850738144789959124590675798fefd9 diff --git a/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md b/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md index 2cb72bffe..35a37478b 100644 --- a/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md +++ b/versioned_docs/version-v0.73/node-operators/how-to/restart-data-node-from-network-history.md @@ -15,7 +15,7 @@ import TabItem from '@theme/TabItem'; ## What is network history? -Network history is a mechanism in the data node software that allows for sharing chunks of information between other data nodes connected to the network. For example, when you are interested in a specific period of data from the Vega network, you can use network history to download this data from other nodes **if they have it**. Usually, you are interested in the last few blocks required to start a new data node or a data node after a crash - in those cases, you do not need the full network history. +Network history is a mechanism in the data node software that allows for sharing chunks of information between other data nodes connected to the network. For example, when you are interested in a specific period of data from the Vega network, you can use network history to download this data from other nodes **if they have it**. Usually, you are interested in the last few blocks required to start a new data node or a data node after a crash - in those cases, you do not need the full-network history. ## Requirements @@ -35,7 +35,7 @@ You must have a data node already configured and running. If you do not have one ### 1. Stop data node if it is running -If you're using Visor, you must stop it to control your node. +If you're using Visor, you must stop it, and it will stop the data-node as well. Otherwise, you must stop the Vega and data node processes. @@ -57,21 +57,28 @@ systemctl stop vega; rm -rf /home/vega/vega_home/state/data-node/ # remove Vega state -vega unsafe_reset_all --home +vega unsafe_reset_all --home "YOUR_VEGA_HOME" # remove Tendermint state -vega tm unsafe_reset_all --home +vega tm unsafe_reset_all --home "YOUR_TENDERMINT_HOME" ``` -### 3. Update config +### 3. Clear the PostgreSQL database + +```sql title="PostgreSQL console" +DROP DATABASE ; +CREATE DATABASE WITH owner='' +``` + +### 4. Update config #### a. Data node config -Data node config is located in the `/config/data-node/config.toml` file. +Data node config is located in the `YOUR_VEGA_HOME/config/data-node/config.toml` file. Update the following parameters in your `config.toml` file for the mainnet data node: -```toml +```toml title="YOUR_VEGA_HOME/config/data-node/config.toml" AutoInitialiseFromNetworkHistory = true [SQLStore] @@ -84,22 +91,26 @@ AutoInitialiseFromNetworkHistory = true [NetworkHistory.Initialise] - TimeOut = "4h" + TimeOut = "96h" MinimumBlockCount = 10000 ``` +:::note Number of blocks to sync +The `NetworkHistory.Initialise.MinimumBlockCount` parameter tells how much blocks your node is going to download before it stats. +::: + #### b. Vega core node config -The config is located in the `/config/node/config.toml`. Update the following parameters in your `config.toml` file for the Vega core for mainnet: +The config is located in the `YOUR_VEGA_HOME/config/node/config.toml`. Update the following parameters in your `config.toml` file for the Vega core for mainnet: -```toml +```toml title="YOUR_VEGA_HOME/config/node/config.toml" [Snapshot] StartHeight = 0 [Broker] [Broker.Socket] - DialTimeout = "4h" + DialTimeout = "96h" ``` #### c. Tendermint config @@ -113,7 +124,7 @@ To update Tendermint, you need to know the trust block and height. To collect th Then select one of the latest pairs for block height and hash. -Once you have the trusted block, you can update the following parameters in the `/config/config.toml` file: +Once you have the trusted block, you can update the following parameters in the `YOUR_TENDERMINT_HOME/config/config.toml` file: ```toml [statesync] @@ -137,7 +148,16 @@ trust_height = 26530300 trust_hash = "19d3510c1bda5d05a88429bb5a1f182f9b037c5b0975800cc18fe3bf8c75061b" ``` -### 4. Start your node +#### d. Vegavisor config + +The config is located in `YOUR_VEGAVISOR_HOME/config.toml` + +```toml title="YOUR_VEGAVISOR_HOME/config.toml" +maxNumberOfFirstConnectionRetries = 172800 +... +``` + +### 5. Start your node :::caution Start data node If you are not using Visor, you MUST start your data node before starting the Vega core. @@ -152,7 +172,7 @@ systemctl start data-node; systemctl start vega; ``` -Your node should start in a several minutes. +Your node should start in a several minutes depending on how much blocks you are going to sync. :::info If you use Visor, you may see the following messages in the logs; please ignore them. It is just Visor checking if node has already started: @@ -172,7 +192,7 @@ This step is critical, otherwise you may end with a corrupted node after next re #### a. Disable statesync in Tendermint config -Open the `/config/config.toml` file and update the following parameter: +Open the `YOUR_TENDERMINT_HOME/config/config.toml` file and update the following parameter: ```toml [statesync] @@ -181,7 +201,7 @@ enable = false #### b. Disable wiping the data node database -Open the `/config/data-node/config.toml` file and update the following parmater: +Open the `YOUR_VEGA_HOME/config/data-node/config.toml` file and update the following parmater: ```toml AutoInitialiseFromNetworkHistory = false diff --git a/versioned_docs/version-v0.73/node-operators/how-to/use-snapshots.md b/versioned_docs/version-v0.73/node-operators/how-to/use-snapshots.md index f8787b76e..cf1a8c246 100644 --- a/versioned_docs/version-v0.73/node-operators/how-to/use-snapshots.md +++ b/versioned_docs/version-v0.73/node-operators/how-to/use-snapshots.md @@ -81,6 +81,8 @@ The default action when starting a node is for it to load from the latest snapsh As an example, if the local snapshot list looks like the below: ```shell +# vega tools snapshot --home="YOUR_VEGA_HOME" + Snapshots available: 2 Height 901, version: 4, size 92, hash: 562414bb5be3ccc8403fbd030d06eebc799bfef5ca25b02ad360fec349cb4bc8 Height 601, version: 3, size 92, hash: 72a2edd960cf3340ae94bf092991f923850738144789959124590675798fefd9