1
// This file is part of Substrate.
2

            
3
// Copyright (C) Parity Technologies (UK) Ltd.
4
// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0
5

            
6
// This program is free software: you can redistribute it and/or modify
7
// it under the terms of the GNU General Public License as published by
8
// the Free Software Foundation, either version 3 of the License, or
9
// (at your option) any later version.
10

            
11
// This program is distributed in the hope that it will be useful,
12
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
// GNU General Public License for more details.
15

            
16
// You should have received a copy of the GNU General Public License
17
// along with this program. If not, see <https://www.gnu.org/licenses/>.
18

            
19
use std::time::SystemTime;
20

            
21
use crate::config::Configuration;
22
use futures_timer::Delay;
23
use prometheus_endpoint::{register, Gauge, GaugeVec, Opts, PrometheusError, Registry, U64};
24
use sc_client_api::{ClientInfo, UsageProvider};
25
use sc_network::{config::Role, NetworkStatus, NetworkStatusProvider};
26
use sc_network_sync::{SyncStatus, SyncStatusProvider};
27
use sc_telemetry::{telemetry, TelemetryHandle, SUBSTRATE_INFO};
28
use sc_transaction_pool_api::{MaintainedTransactionPool, PoolStatus};
29
use sc_utils::metrics::register_globals;
30
use sp_api::ProvideRuntimeApi;
31
use sp_runtime::traits::{Block, NumberFor, SaturatedConversion, UniqueSaturatedInto};
32
use std::{
33
	sync::Arc,
34
	time::{Duration, Instant},
35
};
36

            
37
struct PrometheusMetrics {
38
	// generic info
39
	block_height: GaugeVec<U64>,
40
	number_leaves: Gauge<U64>,
41
	ready_transactions_number: Gauge<U64>,
42

            
43
	// I/O
44
	database_cache: Gauge<U64>,
45
	state_cache: Gauge<U64>,
46
}
47

            
48
impl PrometheusMetrics {
49
	fn setup(
50
		registry: &Registry,
51
		name: &str,
52
		version: &str,
53
		roles: u64,
54
	) -> Result<Self, PrometheusError> {
55
		register(
56
			Gauge::<U64>::with_opts(
57
				Opts::new(
58
					"substrate_build_info",
59
					"A metric with a constant '1' value labeled by name, version",
60
				)
61
				.const_label("name", name)
62
				.const_label("version", version),
63
			)?,
64
			registry,
65
		)?
66
		.set(1);
67

            
68
		register(
69
			Gauge::<U64>::new("substrate_node_roles", "The roles the node is running as")?,
70
			registry,
71
		)?
72
		.set(roles);
73

            
74
		register_globals(registry)?;
75

            
76
		let start_time_since_epoch =
77
			SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap_or_default();
78
		register(
79
			Gauge::<U64>::new(
80
				"substrate_process_start_time_seconds",
81
				"Number of seconds between the UNIX epoch and the moment the process started",
82
			)?,
83
			registry,
84
		)?
85
		.set(start_time_since_epoch.as_secs());
86

            
87
		Ok(Self {
88
			// generic internals
89
			block_height: register(
90
				GaugeVec::new(
91
					Opts::new("substrate_block_height", "Block height info of the chain"),
92
					&["status"],
93
				)?,
94
				registry,
95
			)?,
96

            
97
			number_leaves: register(
98
				Gauge::new("substrate_number_leaves", "Number of known chain leaves (aka forks)")?,
99
				registry,
100
			)?,
101

            
102
			ready_transactions_number: register(
103
				Gauge::new(
104
					"substrate_ready_transactions_number",
105
					"Number of transactions in the ready queue",
106
				)?,
107
				registry,
108
			)?,
109

            
110
			// I/ O
111
			database_cache: register(
112
				Gauge::new("substrate_database_cache_bytes", "RocksDB cache size in bytes")?,
113
				registry,
114
			)?,
115
			state_cache: register(
116
				Gauge::new("substrate_state_cache_bytes", "State cache size in bytes")?,
117
				registry,
118
			)?,
119
		})
120
	}
121
}
122

            
123
/// A `MetricsService` periodically sends general client and
124
/// network state to the telemetry as well as (optionally)
125
/// a Prometheus endpoint.
126
pub struct MetricsService {
127
	metrics: Option<PrometheusMetrics>,
128
	last_update: Instant,
129
	last_total_bytes_inbound: u64,
130
	last_total_bytes_outbound: u64,
131
	telemetry: Option<TelemetryHandle>,
132
}
133

            
134
impl MetricsService {
135
	/// Creates a `MetricsService` that only sends information
136
	/// to the telemetry.
137
	pub fn new(telemetry: Option<TelemetryHandle>) -> Self {
138
		MetricsService {
139
			metrics: None,
140
			last_total_bytes_inbound: 0,
141
			last_total_bytes_outbound: 0,
142
			last_update: Instant::now(),
143
			telemetry,
144
		}
145
	}
146

            
147
	/// Creates a `MetricsService` that sends metrics
148
	/// to prometheus alongside the telemetry.
149
	pub fn with_prometheus(
150
		telemetry: Option<TelemetryHandle>,
151
		registry: &Registry,
152
		config: &Configuration,
153
	) -> Result<Self, PrometheusError> {
154
		let role_bits = match config.role {
155
			Role::Full => 1u64,
156
			// 2u64 used to represent light client role
157
			Role::Authority { .. } => 4u64,
158
		};
159

            
160
		PrometheusMetrics::setup(
161
			registry,
162
			&config.network.node_name,
163
			&config.impl_version,
164
			role_bits,
165
		)
166
		.map(|p| MetricsService {
167
			metrics: Some(p),
168
			last_total_bytes_inbound: 0,
169
			last_total_bytes_outbound: 0,
170
			last_update: Instant::now(),
171
			telemetry,
172
		})
173
	}
174

            
175
	/// Returns a never-ending `Future` that performs the
176
	/// metric and telemetry updates with information from
177
	/// the given sources.
178
	pub async fn run<TBl, TExPool, TCl, TNet, TSync>(
179
		mut self,
180
		client: Arc<TCl>,
181
		transactions: Arc<TExPool>,
182
		network: TNet,
183
		syncing: TSync,
184
	) where
185
		TBl: Block,
186
		TCl: ProvideRuntimeApi<TBl> + UsageProvider<TBl>,
187
		TExPool: MaintainedTransactionPool<Block = TBl, Hash = <TBl as Block>::Hash>,
188
		TNet: NetworkStatusProvider,
189
		TSync: SyncStatusProvider<TBl>,
190
	{
191
		let mut timer = Delay::new(Duration::from_secs(0));
192
		let timer_interval = Duration::from_secs(5);
193

            
194
		loop {
195
			// Wait for the next tick of the timer.
196
			(&mut timer).await;
197

            
198
			// Try to get the latest network information.
199
			let net_status = network.status().await.ok();
200

            
201
			// Try to get the latest syncing information.
202
			let sync_status = syncing.status().await.ok();
203

            
204
			// Update / Send the metrics.
205
			self.update(&client.usage_info(), &transactions.status(), net_status, sync_status);
206

            
207
			// Schedule next tick.
208
			timer.reset(timer_interval);
209
		}
210
	}
211

            
212
	fn update<T: Block>(
213
		&mut self,
214
		info: &ClientInfo<T>,
215
		txpool_status: &PoolStatus,
216
		net_status: Option<NetworkStatus>,
217
		sync_status: Option<SyncStatus<T>>,
218
	) {
219
		let now = Instant::now();
220
		let elapsed = (now - self.last_update).as_secs();
221
		self.last_update = now;
222

            
223
		let best_number = info.chain.best_number.saturated_into::<u64>();
224
		let best_hash = info.chain.best_hash;
225
		let finalized_number: u64 = info.chain.finalized_number.saturated_into::<u64>();
226

            
227
		// Update/send metrics that are always available.
228
		telemetry!(
229
			self.telemetry;
230
			SUBSTRATE_INFO;
231
			"system.interval";
232
			"height" => best_number,
233
			"best" => ?best_hash,
234
			"txcount" => txpool_status.ready,
235
			"finalized_height" => finalized_number,
236
			"finalized_hash" => ?info.chain.finalized_hash,
237
			"used_state_cache_size" => info.usage.as_ref()
238
				.map(|usage| usage.memory.state_cache.as_bytes())
239
				.unwrap_or(0),
240
		);
241

            
242
		if let Some(metrics) = self.metrics.as_ref() {
243
			metrics.block_height.with_label_values(&["finalized"]).set(finalized_number);
244
			metrics.block_height.with_label_values(&["best"]).set(best_number);
245

            
246
			if let Ok(leaves) = u64::try_from(info.chain.number_leaves) {
247
				metrics.number_leaves.set(leaves);
248
			}
249

            
250
			metrics.ready_transactions_number.set(txpool_status.ready as u64);
251

            
252
			if let Some(info) = info.usage.as_ref() {
253
				metrics.database_cache.set(info.memory.database_cache.as_bytes() as u64);
254
				metrics.state_cache.set(info.memory.state_cache.as_bytes() as u64);
255
			}
256
		}
257

            
258
		// Update/send network status information, if any.
259
		if let Some(net_status) = net_status {
260
			let num_peers = net_status.num_connected_peers;
261
			let total_bytes_inbound = net_status.total_bytes_inbound;
262
			let total_bytes_outbound = net_status.total_bytes_outbound;
263

            
264
			let diff_bytes_inbound = total_bytes_inbound - self.last_total_bytes_inbound;
265
			let diff_bytes_outbound = total_bytes_outbound - self.last_total_bytes_outbound;
266
			let (avg_bytes_per_sec_inbound, avg_bytes_per_sec_outbound) = if elapsed > 0 {
267
				self.last_total_bytes_inbound = total_bytes_inbound;
268
				self.last_total_bytes_outbound = total_bytes_outbound;
269
				(diff_bytes_inbound / elapsed, diff_bytes_outbound / elapsed)
270
			} else {
271
				(diff_bytes_inbound, diff_bytes_outbound)
272
			};
273

            
274
			telemetry!(
275
				self.telemetry;
276
				SUBSTRATE_INFO;
277
				"system.interval";
278
				"peers" => num_peers,
279
				"bandwidth_download" => avg_bytes_per_sec_inbound,
280
				"bandwidth_upload" => avg_bytes_per_sec_outbound,
281
			);
282
		}
283

            
284
		if let Some(sync_status) = sync_status {
285
			if let Some(metrics) = self.metrics.as_ref() {
286
				let best_seen_block: Option<u64> =
287
					sync_status.best_seen_block.map(|num: NumberFor<T>| {
288
						UniqueSaturatedInto::<u64>::unique_saturated_into(num)
289
					});
290

            
291
				metrics
292
					.block_height
293
					.with_label_values(&["sync_target"])
294
					.set(best_seen_block.unwrap_or(best_number));
295
			}
296
		}
297
	}
298
}