rcmgr: Use prometheus SDK for rcmgr metrics (#2044)
* Replace OpenCensus with Prometheus Go SDK
Allows for much more efficient metric collecting
* PR comments
* Try a different way of testing noallocs
* CI fiddling
* CI fiddling
* Undo debug change
* Return early
* Debug
* Try AllocsPerRun
* Try gosched in noop
* Use AllocsPerRun
* Try without cover
* Use tag
* Revert "Try without cover"
This reverts commit e91b1e8f0d0ffbe6842a37925eb72e0177bf0773.
* Cleanup debug code
* Use global string slice pool
"legendFormat":"max {{dir}} streams per peer – {{instance}}",
@ -664,7 +589,7 @@
"type":"prometheus",
"uid":"${DS_PROMETHEUS}"
},
"description":"How many peers have N-0.1 streams open",
"description":"How many peers have N streams open",
"fieldConfig":{
"defaults":{
"color":{
@ -675,7 +600,8 @@
"mode":"absolute",
"steps":[
{
"color":"green"
"color":"green",
"value":null
},
{
"color":"red",
@ -690,11 +616,13 @@
"h":8,
"w":12,
"x":0,
"y":46
"y":37
},
"id":46,
"options":{
"displayMode":"gradient",
"minVizHeight":10,
"minVizWidth":0,
"orientation":"auto",
"reduceOptions":{
"calcs":[
@ -705,7 +633,7 @@
},
"showUnfilled":true
},
"pluginVersion":"8.4.5",
"pluginVersion":"9.3.6",
"targets":[
{
"datasource":{
@ -713,7 +641,7 @@
"uid":"${DS_PROMETHEUS}"
},
"exemplar":false,
"expr":"sum without (instance) (rcmgr_peer_streams_bucket{dir=\"inbound\"}-rcmgr_peer_streams_negative_bucket{dir=\"inbound\"})",
"expr":"sum without (instance) (rcmgr_peer_streams_bucket{dir=\"inbound\"}-rcmgr_previous_peer_streams_bucket{dir=\"inbound\"})",
"format":"heatmap",
"hide":false,
"interval":"",
@ -729,7 +657,7 @@
"type":"prometheus",
"uid":"${DS_PROMETHEUS}"
},
"description":"How many peers have N-0.1 streams open",
"description":"How many peers have N streams open",
"fieldConfig":{
"defaults":{
"color":{
@ -740,7 +668,8 @@
"mode":"absolute",
"steps":[
{
"color":"green"
"color":"green",
"value":null
},
{
"color":"red",
@ -755,11 +684,13 @@
"h":8,
"w":12,
"x":12,
"y":46
"y":37
},
"id":47,
"options":{
"displayMode":"gradient",
"minVizHeight":10,
"minVizWidth":0,
"orientation":"auto",
"reduceOptions":{
"calcs":[
@ -770,7 +701,7 @@
},
"showUnfilled":true
},
"pluginVersion":"8.4.5",
"pluginVersion":"9.3.6",
"targets":[
{
"datasource":{
@ -778,7 +709,7 @@
"uid":"${DS_PROMETHEUS}"
},
"exemplar":false,
"expr":"sum without (instance) (rcmgr_peer_streams_bucket{dir=\"outbound\"}-rcmgr_peer_streams_negative_bucket{dir=\"outbound\"})",
"expr":"sum without (instance) (rcmgr_peer_streams_bucket{dir=\"outbound\"}-rcmgr_previous_peer_streams_bucket{dir=\"outbound\"})",
"format":"heatmap",
"hide":false,
"interval":"",
@ -795,7 +726,7 @@
"h":1,
"w":24,
"x":0,
"y":54
"y":45
},
"id":29,
"panels":[],
@ -811,14 +742,19 @@
"h":9,
"w":24,
"x":0,
"y":55
"y":46
},
"id":31,
"options":{
"code":{
"language":"plaintext",
"showLineNumbers":false,
"showMiniMap":false
},
"content":"# Libp2p Connections\n\nBroken down by [Resource Scope](https://github.com/libp2p/go-libp2p/blob/master/p2p/host/resource-manager/README.md#resource-scopes). \nScopes represent what is imposing limits on this resource. For connections, we have three main scopes:\n\n1. System. The total number of connections owned by the process. Includes both application usable connections + the number of transient connections.\n2. Transient. The total number of connections that are being upgraded into usable connections in the process.\n3. Peer. The total number of connections associated with this peer. When a connection has this scope it is usable by the application.\n\nAn example of a System connection is a connection you can open a libp2p stream on and send data.\nA transient connection is not yet usable for application data since it may be negotiating \na security handshake or a multiplexer.\n\nConnections start in the transient scope and move over to the System and Peer scopes once they are ready to be used.\n\nIt would be unusual to see a lot of transient connections. It would also be unusal to see a peer with a lot of connections.",
"mode":"markdown"
},
"pluginVersion":"8.4.5",
"pluginVersion":"9.3.6",
"title":"libp2p Connections",
"type":"text"
},
@ -833,6 +769,8 @@
"mode":"palette-classic"
},
"custom":{
"axisCenteredZero":false,
"axisColorMode":"text",
"axisLabel":"",
"axisPlacement":"auto",
"barAlignment":0,
@ -865,7 +803,8 @@
"mode":"absolute",
"steps":[
{
"color":"green"
"color":"green",
"value":null
},
{
"color":"red",
@ -880,14 +819,15 @@
"h":8,
"w":12,
"x":0,
"y":64
"y":55
},
"id":33,
"options":{
"legend":{
"calcs":[],
"displayMode":"list",
"placement":"bottom"
"placement":"bottom",
"showLegend":true
},
"tooltip":{
"mode":"single",
@ -921,6 +861,8 @@
"mode":"palette-classic"
},
"custom":{
"axisCenteredZero":false,
"axisColorMode":"text",
"axisLabel":"",
"axisPlacement":"auto",
"barAlignment":0,
@ -953,7 +895,8 @@
"mode":"absolute",
"steps":[
{
"color":"green"
"color":"green",
"value":null
},
{
"color":"red",
@ -968,14 +911,15 @@
"h":8,
"w":12,
"x":12,
"y":64
"y":55
},
"id":36,
"options":{
"legend":{
"calcs":[],
"displayMode":"list",
"placement":"bottom"
"placement":"bottom",
"showLegend":true
},
"tooltip":{
"mode":"single",
@ -1007,14 +951,19 @@
"h":3,
"w":24,
"x":0,
"y":72
"y":63
},
"id":38,
"options":{
"content":"These are aggregated stats. They are grouped by buckets. Each bucket represents how many peers have N number of connections.\n\nDue to a quirk in [opencensus](https://github.com/census-instrumentation/opencensus-go/blob/v0.23.0/stats/view/aggregation_data.go#L195) the bucket values have to be a bit bigger than the integer values.\nSo subtract 0.1 from the number to get the true number of connections. e.g. If a peer has 3 connections, it'll be put in the 3.1 bucket. \n",
"code":{
"language":"plaintext",
"showLineNumbers":false,
"showMiniMap":false
},
"content":"These are aggregated stats. They are grouped by buckets. Each bucket represents how many peers have N number of connections.",
conns=stats.Int64(metricNamespace+"connections","Number of Connections",stats.UnitDimensionless)
peerConns=stats.Int64(metricNamespace+"peer/connections","Number of connections this peer has",stats.UnitDimensionless)
peerConnsNegative=stats.Int64(metricNamespace+"peer/connections_negative","Number of connections this peer had. This is used to get the current connection number per peer histogram by subtracting this from the peer/connections histogram",stats.UnitDimensionless)
streams=stats.Int64(metricNamespace+"streams","Number of Streams",stats.UnitDimensionless)
peerStreams=stats.Int64(metricNamespace+"peer/streams","Number of streams this peer has",stats.UnitDimensionless)
peerStreamsNegative=stats.Int64(metricNamespace+"peer/streams_negative","Number of streams this peer had. This is used to get the current streams number per peer histogram by subtracting this from the peer/streams histogram",stats.UnitDimensionless)
memory=stats.Int64(metricNamespace+"memory","Amount of memory reserved as reported to the Resource Manager",stats.UnitDimensionless)
peerMemory=stats.Int64(metricNamespace+"peer/memory","Amount of memory currently reseved for peer",stats.UnitDimensionless)
peerMemoryNegative=stats.Int64(metricNamespace+"peer/memory_negative","Amount of memory previously reseved for peer. This is used to get the current memory per peer histogram by subtracting this from the peer/memory histogram",stats.UnitDimensionless)
connMemory=stats.Int64(metricNamespace+"conn/memory","Amount of memory currently reseved for the connection",stats.UnitDimensionless)
connMemoryNegative=stats.Int64(metricNamespace+"conn/memory_negative","Amount of memory previously reseved for the connection. This is used to get the current memory per connection histogram by subtracting this from the conn/memory histogram",stats.UnitDimensionless)
fds=stats.Int64(metricNamespace+"fds","Number of fds as reported to the Resource Manager",stats.UnitDimensionless)
blockedResources=stats.Int64(metricNamespace+"blocked_resources","Number of resource requests blocked",stats.UnitDimensionless)
// Lets us build a histogram of our current state. See https://github.com/libp2p/go-libp2p-resource-manager/pull/54#discussion_r911244757 for more information.
Help:"Number of connections this peer previously had. This is used to get the current connection number per peer histogram by subtracting this from the peer_connections histogram",