Initial import

This commit is contained in:
Peter Fern 2018-11-08 19:39:50 +11:00
commit 21327404c9
13 changed files with 1270 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
zfs_exporter
.build/
.tarballs/

32
.promu.yml Normal file
View File

@ -0,0 +1,32 @@
verbose: true
go:
version: 1.11.2
cgo: false
repository:
path: github.com/pdf/zfs_exporter
build:
prefix: .build
binaries:
- name: zfs_exporter
flags: -a -tags netgo
ldflags: |
-s
-X github.com/prometheus/common/version.Version={{.Version}}
-X github.com/prometheus/common/version.Revision={{.Revision}}
-X github.com/prometheus/common/version.Branch={{.Branch}}
-X github.com/prometheus/common/version.BuildUser={{user}}@{{host}}
-X github.com/prometheus/common/version.BuildDate={{date "20060102-15:04:05"}}
tarball:
prefix: .build
files:
- LICENSE
crossbuild:
platforms:
- linux/amd64
- linux/386
- freebsd/amd64
- freebsd/386
- solaris/amd64
- solaris/386
- darwin/amd64
- darwin/386

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 Peter Fern
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

5
Makefile Normal file
View File

@ -0,0 +1,5 @@
include Makefile.common
.PHONY: build
build: export GO111MODULE=on
build: common-build

137
Makefile.common Normal file
View File

@ -0,0 +1,137 @@
# Copyright 2018 The Prometheus Authors
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# A common Makefile that includes rules to be reused in different prometheus projects.
# !!! Open PRs only against the prometheus/prometheus/Makefile.common repository!
# Example usage :
# Create the main Makefile in the root project directory.
# include Makefile.common
# customTarget:
# @echo ">> Running customTarget"
#
# Ensure GOBIN is not set during build so that promu is installed to the correct path
unexport GOBIN
GO ?= go
GOFMT ?= $(GO)fmt
FIRST_GOPATH := $(firstword $(subst :, ,$(shell $(GO) env GOPATH)))
PROMU := $(FIRST_GOPATH)/bin/promu
STATICCHECK := $(FIRST_GOPATH)/bin/staticcheck
GOVENDOR := $(FIRST_GOPATH)/bin/govendor
pkgs = ./...
PREFIX ?= $(shell pwd)
BIN_DIR ?= $(shell pwd)
DOCKER_IMAGE_TAG ?= $(subst /,-,$(shell git rev-parse --abbrev-ref HEAD))
DOCKER_REPO ?= pdf
.PHONY: all
all: style staticcheck unused build test
# This rule is used to forward a target like "build" to "common-build". This
# allows a new "build" target to be defined in a Makefile which includes this
# one and override "common-build" without override warnings.
%: common-% ;
.PHONY: common-style
common-style:
@echo ">> checking code style"
@fmtRes=$$($(GOFMT) -d $$(find . -path ./vendor -prune -o -name '*.go' -print)); \
if [ -n "$${fmtRes}" ]; then \
echo "gofmt checking failed!"; echo "$${fmtRes}"; echo; \
echo "Please ensure you are using $$($(GO) version) for formatting code."; \
exit 1; \
fi
.PHONY: common-check_license
common-check_license:
@echo ">> checking license header"
@licRes=$$(for file in $$(find . -type f -iname '*.go' ! -path './vendor/*') ; do \
awk 'NR<=3' $$file | grep -Eq "(Copyright|generated|GENERATED)" || echo $$file; \
done); \
if [ -n "$${licRes}" ]; then \
echo "license header checking failed:"; echo "$${licRes}"; \
exit 1; \
fi
.PHONY: common-test-short
common-test-short:
@echo ">> running short tests"
$(GO) test -short $(pkgs)
.PHONY: common-test
common-test:
@echo ">> running all tests"
$(GO) test -race $(pkgs)
.PHONY: common-format
common-format:
@echo ">> formatting code"
$(GO) fmt $(pkgs)
.PHONY: common-vet
common-vet:
@echo ">> vetting code"
$(GO) vet $(pkgs)
.PHONY: common-staticcheck
common-staticcheck: $(STATICCHECK)
@echo ">> running staticcheck"
$(STATICCHECK) -ignore "$(STATICCHECK_IGNORE)" $(pkgs)
.PHONY: common-unused
common-unused: $(GOVENDOR)
@echo ">> running check for unused packages"
@$(GOVENDOR) list +unused | grep . && exit 1 || echo 'No unused packages'
.PHONY: common-build
common-build: promu
@echo ">> building binaries"
$(PROMU) build --prefix $(PREFIX)
.PHONY: common-tarball
common-tarball: promu
@echo ">> building release tarball"
$(PROMU) tarball --prefix $(PREFIX) $(BIN_DIR)
.PHONY: common-docker
common-docker:
docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" .
.PHONY: common-docker-publish
common-docker-publish:
docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)"
.PHONY: common-docker-tag-latest
common-docker-tag-latest:
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):latest"
.PHONY: promu
promu:
GOOS= GOARCH= $(GO) get -u github.com/prometheus/promu
.PHONY: proto
proto:
@echo ">> generating code from proto files"
@./scripts/genproto.sh
.PHONY: $(STATICCHECK)
$(STATICCHECK):
GOOS= GOARCH= $(GO) get -u honnef.co/go/tools/cmd/staticcheck
.PHONY: $(GOVENDOR)
$(GOVENDOR):
GOOS= GOARCH= $(GO) get -u github.com/kardianos/govendor

74
README.md Normal file
View File

@ -0,0 +1,74 @@
# ZFS Exporter
Prometheus exporter for ZFS (pools, filesystems, snapshots and volumes). Other implementations exist, however performance can be quite variable, producing occasional timeouts (and associated alerts). This exporter was build with a few features aimed at allowing users to avoid collecting more than they need to, and to ensure timeouts cannot occur, but that we eventually get useful data:
- __Pool selection__ - allow the user to select which pools are collected
- __Multiple collectors__ - allow the user to select which data types are collected (pools, filesystems, snapshots and volumes)
- __Collection deadline and caching__ - if the collection duration exceeds the configured deadline, cached data from the last run will be returned for any metrics that have not yet been collected, and the current collection run will continue in the background. Collections will not run concurrently, so that when a system is running slowly, we don't compound the problem - if and existing collection is still running, cached data will be returned.
## Installation
Download the [latest release](https://github.com/pdf/zfs_exporter/releases/latest) for your platform, and unpack it somewhere on your filesystem.
You may also build the latest version using Go v1.11+ via `go get`:
```bash
go get -u github.com/pdf/zfs_exporter
```
## Usage
```
usage: zfs_exporter [<flags>]
Flags:
-h, --help Show context-sensitive help (also try --help-long and
--help-man).
--collector.dataset-filesystem
Enable the dataset-filesystem collector (default:
enabled)
--collector.dataset-snapshot
Enable the dataset-snapshot collector (default:
disabled)
--collector.dataset-volume
Enable the dataset-volume collector (default: enabled)
--collector.pool Enable the pool collector (default: enabled)
--web.listen-address=":9134"
Address on which to expose metrics and web interface.
--web.telemetry-path="/metrics"
Path under which to expose metrics.
--deadline=8s Maximum duration that a collection should run before
returning cached data. Should be set to a value
shorter than your scrape timeout duration. The current
collection run will continue and update the cache when
complete (default: 8s)
--pool=POOL ... Name of the pool(s) to collect, repeat for multiple
pools (default: all pools).
--log.level="info" Only log messages with the given severity or above.
Valid levels: [debug, info, warn, error, fatal]
--log.format="logger:stderr"
Set the log target and format. Example:
"logger:syslog?appname=bob&local=7" or
"logger:stdout?json=true"
--version Show application version.
```
Collectors that are enabled by default can be negated by prefixing the flag with `--no-*`, ie:
```
zfs_exporter --no-collector.dataset-filesystem
```
## Caveats
The collector may need to be run as root on some platforms (ie - Linux prior to ZFS v0.7.0).
Whilst inspiration was taken from some of the alternative ZFS collectors, metric names may not be compatible.
## Alternatives
In no particular order, here are some alternative implementations:
- https://github.com/eliothedeman/zfs_exporter
- https://github.com/ncabatoff/zfs-exporter
- https://github.com/eripa/prometheus-zfs

1
VERSION Normal file
View File

@ -0,0 +1 @@
0.0.1

277
collector/collector.go Normal file
View File

@ -0,0 +1,277 @@
package collector
import (
"context"
"fmt"
"sort"
"strings"
"sync"
"time"
"github.com/mistifyio/go-zfs"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
"gopkg.in/alecthomas/kingpin.v2"
)
const (
defaultEnabled = true
defaultDisabled = false
namespace = `zfs`
helpDefaultStateEnabled = `enabled`
helpDefaultStateDisabled = `disabled`
)
var (
collectorStates = make(map[string]State)
scrapeDurationDescName = prometheus.BuildFQName(namespace, `scrape`, `collector_duration_seconds`)
scrapeDurationDesc = prometheus.NewDesc(
scrapeDurationDescName,
`zfs_exporter: Duration of a collector scrape.`,
[]string{`collector`},
nil,
)
scrapeSuccessDescName = prometheus.BuildFQName(namespace, `scrape`, `collector_success`)
scrapeSuccessDesc = prometheus.NewDesc(
scrapeSuccessDescName,
`zfs_exporter: Whether a collector succeeded.`,
[]string{`collector`},
nil,
)
)
type factoryFunc func() (Collector, error)
type State struct {
Enabled *bool
factory factoryFunc
}
type Collector interface {
update(ch chan<- metric, pools []*zfs.Zpool) error
}
type metric struct {
name string
prometheus prometheus.Metric
}
type desc struct {
name string
prometheus *prometheus.Desc
}
type ZFSCollector struct {
Deadline time.Duration
Pools []string
Collectors map[string]State
cache map[string]prometheus.Metric
done chan struct{}
mu sync.RWMutex
}
// Describe implements the prometheus.Collector interface.
func (c *ZFSCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- scrapeDurationDesc
ch <- scrapeSuccessDesc
}
// Collect implements the prometheus.Collector interface.
func (c *ZFSCollector) Collect(ch chan<- prometheus.Metric) {
c.mu.RLock()
select {
case <-c.done:
c.mu.RUnlock()
default:
c.mu.RUnlock()
c.sendCached(ch, make(map[string]struct{}))
return
}
c.mu.Lock()
c.done = make(chan struct{})
c.mu.Unlock()
mu := sync.Mutex{}
ctx, cancel := context.WithTimeout(context.Background(), c.Deadline)
defer cancel()
proxy := make(chan metric)
cache := make(map[string]prometheus.Metric)
timeout := make(chan struct{})
wg := sync.WaitGroup{}
wg.Add(len(c.Collectors))
// Upon exceeding deadline, send cached data for any metrics that have not already been reported.
go func() {
select {
case <-ctx.Done():
mu.Lock()
cacheIndex := make(map[string]struct{}, len(cache))
c.mu.Lock()
for name, metric := range cache {
c.cache[name] = metric
cacheIndex[name] = struct{}{}
}
c.mu.Unlock()
c.sendCached(ch, cacheIndex)
close(timeout) // assert timeout for flow control in other goroutines
mu.Unlock()
case <-c.done:
}
}()
// Close the proxy channel upon collector completion.
go func() {
wg.Wait()
close(proxy)
}()
// Cache metrics as they come in via the proxy channel, and ship them out if we've not exceeded the deadline.
go func() {
for metric := range proxy {
mu.Lock()
cache[metric.name] = metric.prometheus
select {
case <-timeout:
mu.Unlock()
continue
default:
ch <- metric.prometheus
mu.Unlock()
}
}
// Signal completion.
c.mu.Lock()
c.cache = cache
close(c.done)
c.mu.Unlock()
}()
pools, err := getPools(c.Pools)
if err != nil {
log.Errorf("Could not find pools: %s", err)
return
}
for name, state := range c.Collectors {
if !*state.Enabled {
wg.Done()
continue
}
collector, err := state.factory()
if err != nil {
log.Errorf("Could not instantiate collector (%s): %s", name, err)
continue
}
go func(name string, collector Collector) {
execute(ctx, name, collector, proxy, pools)
wg.Done()
}(name, collector)
}
// Wait for either timeout or completion.
select {
case <-timeout:
case <-c.done:
}
}
// sendCached values that do not appear in the current cacheIndex.
func (c *ZFSCollector) sendCached(ch chan<- prometheus.Metric, cacheIndex map[string]struct{}) {
c.mu.RLock()
defer c.mu.RUnlock()
for name, metric := range c.cache {
if _, ok := cacheIndex[name]; ok {
continue
}
ch <- metric
}
}
func NewZFSCollector(deadline time.Duration, pools []string) (*ZFSCollector, error) {
sort.Strings(pools)
done := make(chan struct{})
close(done)
return &ZFSCollector{
Deadline: deadline,
Pools: pools,
Collectors: collectorStates,
cache: make(map[string]prometheus.Metric),
done: done,
}, nil
}
func registerCollector(collector string, isDefaultEnabled bool, factory factoryFunc) {
helpDefaultState := helpDefaultStateDisabled
if isDefaultEnabled {
helpDefaultState = helpDefaultStateEnabled
}
flagName := fmt.Sprintf("collector.%s", collector)
flagHelp := fmt.Sprintf("Enable the %s collector (default: %s)", collector, helpDefaultState)
defaultValue := fmt.Sprintf("%t", isDefaultEnabled)
flag := kingpin.Flag(flagName, flagHelp).Default(defaultValue).Bool()
collectorStates[collector] = State{
Enabled: flag,
factory: factory,
}
}
func getPools(pools []string) ([]*zfs.Zpool, error) {
// Get all pools if not explicitly configured.
if len(pools) == 0 {
zpools, err := zfs.ListZpools()
if err != nil {
return nil, err
}
return zpools, nil
}
// Configured pools may not exist, so append available pools as they're found, rather than allocating up front.
zpools := make([]*zfs.Zpool, 0)
for _, name := range pools {
pool, err := zfs.GetZpool(name)
if err != nil {
log.Warnln("Pool unavailable:", name)
continue
}
zpools = append(zpools, pool)
}
return zpools, nil
}
func execute(ctx context.Context, name string, collector Collector, ch chan<- metric, pools []*zfs.Zpool) {
begin := time.Now()
err := collector.update(ch, pools)
duration := time.Since(begin)
var success float64
if err != nil {
log.Errorf("ERROR: %s collector failed after %fs: %s", name, duration.Seconds(), err)
success = 0
} else {
select {
case <-ctx.Done():
log.Warnf("DELAYED: %s collector completed after %fs: %s", name, duration.Seconds(), ctx.Err())
success = 0
default:
log.Debugf("OK: %s collector succeeded after %fs.", name, duration.Seconds())
success = 1
}
}
ch <- metric{
name: scrapeDurationDescName,
prometheus: prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, duration.Seconds(), name),
}
ch <- metric{
name: scrapeSuccessDescName,
prometheus: prometheus.MustNewConstMetric(scrapeSuccessDesc, prometheus.GaugeValue, success, name),
}
}
func expandMetricName(prefix string, context ...string) string {
return strings.Join(append(context, prefix), `-`)
}

271
collector/dataset.go Normal file
View File

@ -0,0 +1,271 @@
package collector
import (
"fmt"
zfs "github.com/mistifyio/go-zfs"
"github.com/prometheus/client_golang/prometheus"
)
func init() {
registerCollector(`dataset-filesystem`, defaultEnabled, newFilesystemCollector)
registerCollector(`dataset-snapshot`, defaultDisabled, newSnapshotCollector)
registerCollector(`dataset-volume`, defaultEnabled, newVolumeCollector)
}
type datasetCollector struct {
kind string
usedBytes desc
availableBytes desc
writtenBytes desc
volumeSizeBytes desc
logicalUsedBytes desc
usedByDatasetBytes desc
quotaBytes desc
referencedBytes desc
}
func (c *datasetCollector) update(ch chan<- metric, pools []*zfs.Zpool) error {
for _, pool := range pools {
if err := c.updatePoolMetrics(ch, pool); err != nil {
return err
}
}
return nil
}
func (c *datasetCollector) updatePoolMetrics(ch chan<- metric, pool *zfs.Zpool) error {
var (
datasets []*zfs.Dataset
err error
)
switch c.kind {
case zfs.DatasetFilesystem:
datasets, err = zfs.Filesystems(pool.Name)
case zfs.DatasetSnapshot:
datasets, err = zfs.Snapshots(pool.Name)
case zfs.DatasetVolume:
datasets, err = zfs.Volumes(pool.Name)
}
if err != nil {
return err
}
for _, dataset := range datasets {
if err = c.updateDatasetMetrics(ch, pool, dataset); err != nil {
return err
}
}
return nil
}
func (c *datasetCollector) updateDatasetMetrics(ch chan<- metric, pool *zfs.Zpool, dataset *zfs.Dataset) error {
labels := []string{dataset.Name, pool.Name, c.kind}
// Metrics shared by all dataset types.
ch <- metric{
name: expandMetricName(c.usedBytes.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.usedBytes.prometheus,
prometheus.GaugeValue,
float64(dataset.Used),
labels...,
),
}
ch <- metric{
name: expandMetricName(c.writtenBytes.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.writtenBytes.prometheus,
prometheus.GaugeValue,
float64(dataset.Written),
labels...,
),
}
ch <- metric{
name: expandMetricName(c.logicalUsedBytes.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.logicalUsedBytes.prometheus,
prometheus.GaugeValue,
float64(dataset.Logicalused),
labels...,
),
}
ch <- metric{
name: expandMetricName(c.referencedBytes.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.referencedBytes.prometheus,
prometheus.GaugeValue,
float64(dataset.Referenced),
labels...,
),
}
// Metrics shared by multiple dataset types.
switch c.kind {
case zfs.DatasetFilesystem, zfs.DatasetVolume:
ch <- metric{
name: expandMetricName(c.availableBytes.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.availableBytes.prometheus,
prometheus.GaugeValue,
float64(dataset.Avail),
labels...,
),
}
ch <- metric{
name: expandMetricName(c.usedByDatasetBytes.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.usedByDatasetBytes.prometheus,
prometheus.GaugeValue,
float64(dataset.Avail),
labels...,
),
}
}
// Metrics specific to individual dataset types.
switch c.kind {
case zfs.DatasetFilesystem:
ch <- metric{
name: expandMetricName(c.quotaBytes.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.quotaBytes.prometheus,
prometheus.GaugeValue,
float64(dataset.Avail),
labels...,
),
}
case zfs.DatasetVolume:
ch <- metric{
name: expandMetricName(c.volumeSizeBytes.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.volumeSizeBytes.prometheus,
prometheus.GaugeValue,
float64(dataset.Avail),
labels...,
),
}
}
return nil
}
func newDatasetCollector(kind string) (Collector, error) {
switch kind {
case zfs.DatasetFilesystem, zfs.DatasetSnapshot, zfs.DatasetVolume:
default:
return nil, fmt.Errorf("unknown dataset type: %s", kind)
}
const subsystem = `dataset`
var (
labels = []string{
`name`,
`pool`,
`type`,
}
usedBytesName = prometheus.BuildFQName(namespace, subsystem, `used_bytes`)
availableBytesName = prometheus.BuildFQName(namespace, subsystem, `available_bytes`)
writtenBytesName = prometheus.BuildFQName(namespace, subsystem, `written_bytes`)
volumeSizeBytesName = prometheus.BuildFQName(namespace, subsystem, `volume_size_bytes`)
logicalUsedBytesName = prometheus.BuildFQName(namespace, subsystem, `logical_used_bytes`)
usedByDatasetBytes = prometheus.BuildFQName(namespace, subsystem, `used_by_dataset_bytes`)
quotaBytesName = prometheus.BuildFQName(namespace, subsystem, `quota_bytes`)
referencedBytesName = prometheus.BuildFQName(namespace, subsystem, `referenced_bytes`)
)
return &datasetCollector{
kind: kind,
usedBytes: desc{
name: usedBytesName,
prometheus: prometheus.NewDesc(
usedBytesName,
`The amount of space in bytes consumed by this dataset and all its descendents.`,
labels,
nil,
),
},
availableBytes: desc{
name: availableBytesName,
prometheus: prometheus.NewDesc(
availableBytesName,
`The amount of space in bytes available to the dataset and all its children.`,
labels,
nil,
),
},
writtenBytes: desc{
name: writtenBytesName,
prometheus: prometheus.NewDesc(
writtenBytesName,
`The amount of referenced space in bytes written to this dataset since the previous snapshot.`,
labels,
nil,
),
},
volumeSizeBytes: desc{
name: volumeSizeBytesName,
prometheus: prometheus.NewDesc(
volumeSizeBytesName,
`The logical size of the volume in bytes.`,
labels,
nil,
),
},
logicalUsedBytes: desc{
name: logicalUsedBytesName,
prometheus: prometheus.NewDesc(
logicalUsedBytesName,
`The amount of space in bytes that is "logically" consumed by this dataset and all its descendents.`,
labels,
nil,
),
},
usedByDatasetBytes: desc{
name: usedByDatasetBytes,
prometheus: prometheus.NewDesc(
usedByDatasetBytes,
`The amount of space in bytes used by this dataset itself, which would be freed if the dataset were destroyed`,
labels,
nil,
),
},
quotaBytes: desc{
name: quotaBytesName,
prometheus: prometheus.NewDesc(
quotaBytesName,
`The amount of space in bytes this dataset and its descendents can consume.`,
labels,
nil,
),
},
referencedBytes: desc{
name: referencedBytesName,
prometheus: prometheus.NewDesc(
referencedBytesName,
`The amount of data in bytes that is accessible by this dataset, which may or may not be shared with other datasets in the pool.`,
labels,
nil,
),
},
}, nil
}
func newFilesystemCollector() (Collector, error) {
return newDatasetCollector(zfs.DatasetFilesystem)
}
func newSnapshotCollector() (Collector, error) {
return newDatasetCollector(zfs.DatasetSnapshot)
}
func newVolumeCollector() (Collector, error) {
return newDatasetCollector(zfs.DatasetVolume)
}

271
collector/pool.go Normal file
View File

@ -0,0 +1,271 @@
package collector
import (
"fmt"
"github.com/mistifyio/go-zfs"
"github.com/prometheus/client_golang/prometheus"
)
func init() {
registerCollector(`pool`, defaultEnabled, newPoolCollector)
}
type healthCode int
const (
online healthCode = iota
degraded
faulted
offline
unavail
removed
)
type poolCollector struct {
health desc
allocatedBytes desc
sizeBytes desc
freeBytes desc
fragmentationPercent desc
readOnly desc
freeingBytes desc
leakedBytes desc
dedupRatio desc
}
func (c *poolCollector) update(ch chan<- metric, pools []*zfs.Zpool) error {
for _, pool := range pools {
if err := c.updatePoolMetrics(ch, pool); err != nil {
return err
}
}
return nil
}
func (c *poolCollector) updatePoolMetrics(ch chan<- metric, pool *zfs.Zpool) error {
health, err := healthCodeFromString(pool.Health)
if err != nil {
return err
}
var readOnly float64
if pool.ReadOnly {
readOnly = 1
}
labels := []string{pool.Name}
ch <- metric{
name: expandMetricName(c.health.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.health.prometheus,
prometheus.GaugeValue,
float64(health),
labels...,
),
}
ch <- metric{
name: expandMetricName(c.allocatedBytes.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.allocatedBytes.prometheus,
prometheus.GaugeValue,
float64(pool.Allocated),
labels...,
),
}
ch <- metric{
name: expandMetricName(c.sizeBytes.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.sizeBytes.prometheus,
prometheus.GaugeValue,
float64(pool.Size),
labels...,
),
}
ch <- metric{
name: expandMetricName(c.freeBytes.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.freeBytes.prometheus,
prometheus.GaugeValue,
float64(pool.Free),
labels...,
),
}
ch <- metric{
name: expandMetricName(c.fragmentationPercent.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.fragmentationPercent.prometheus,
prometheus.GaugeValue,
float64(pool.Fragmentation),
labels...,
),
}
ch <- metric{
name: expandMetricName(c.readOnly.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.readOnly.prometheus,
prometheus.GaugeValue,
readOnly,
labels...,
),
}
ch <- metric{
name: expandMetricName(c.freeingBytes.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.freeingBytes.prometheus,
prometheus.GaugeValue,
float64(pool.Freeing),
labels...,
),
}
ch <- metric{
name: expandMetricName(c.leakedBytes.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.leakedBytes.prometheus,
prometheus.GaugeValue,
float64(pool.Leaked),
labels...,
),
}
ch <- metric{
name: expandMetricName(c.dedupRatio.name, labels...),
prometheus: prometheus.MustNewConstMetric(
c.dedupRatio.prometheus,
prometheus.GaugeValue,
pool.DedupRatio,
labels...,
),
}
return nil
}
func newPoolCollector() (Collector, error) {
const subsystem = `pool`
var (
labels = []string{`pool`}
healthName = prometheus.BuildFQName(namespace, subsystem, `health`)
allocatedBytesName = prometheus.BuildFQName(namespace, subsystem, `allocated_bytes`)
sizeBytesName = prometheus.BuildFQName(namespace, subsystem, `size_bytes`)
freeBytesName = prometheus.BuildFQName(namespace, subsystem, `free_bytes`)
fragmentationPercentName = prometheus.BuildFQName(namespace, subsystem, `fragmentation_percent`)
readOnlyName = prometheus.BuildFQName(namespace, subsystem, `readonly`)
freeingBytesName = prometheus.BuildFQName(namespace, subsystem, `freeing_bytes`)
leakedBytesName = prometheus.BuildFQName(namespace, subsystem, `leaked_bytes`)
dedupRatioName = prometheus.BuildFQName(namespace, subsystem, `deduplication_ratio`)
)
return &poolCollector{
health: desc{
name: healthName,
prometheus: prometheus.NewDesc(
healthName,
fmt.Sprintf("Health status code for the pool [%d: %s, %d: %s, %d: %s, %d: %s, %d: %s, %d: %s].",
online, zfs.ZpoolOnline, degraded, zfs.ZpoolDegraded, faulted, zfs.ZpoolFaulted, offline, zfs.ZpoolOffline, unavail, zfs.ZpoolUnavail, removed, zfs.ZpoolRemoved),
labels,
nil,
),
},
allocatedBytes: desc{
name: allocatedBytesName,
prometheus: prometheus.NewDesc(
allocatedBytesName,
`Amount of storage space in bytes within the pool that has been physically allocated.`,
labels,
nil,
),
},
sizeBytes: desc{
name: sizeBytesName,
prometheus: prometheus.NewDesc(
sizeBytesName,
`Total size in bytes of the storage pool.`,
labels,
nil,
),
},
freeBytes: desc{
name: freeBytesName,
prometheus: prometheus.NewDesc(
freeBytesName,
`The amount of free space in bytes available in the pool.`,
labels,
nil,
),
},
fragmentationPercent: desc{
name: fragmentationPercentName,
prometheus: prometheus.NewDesc(
fragmentationPercentName,
`Fragmentation percentage of the pool.`,
labels,
nil,
),
},
readOnly: desc{
name: readOnlyName,
prometheus: prometheus.NewDesc(
readOnlyName,
`Read-only status of the pool [0: read-write, 1: read-only].`,
labels,
nil,
),
},
freeingBytes: desc{
name: freeingBytesName,
prometheus: prometheus.NewDesc(
freeingBytesName,
`The amount of space in bytes remaining to be freed following the desctruction of a file system or snapshot.`,
labels,
nil,
),
},
leakedBytes: desc{
name: leakedBytesName,
prometheus: prometheus.NewDesc(
leakedBytesName,
`Number of leaked bytes in the pool.`,
labels,
nil,
),
},
dedupRatio: desc{
name: dedupRatioName,
prometheus: prometheus.NewDesc(
dedupRatioName,
`The deduplication ratio specified for the pool, expressed as a multiplier.`,
labels,
nil,
),
},
}, nil
}
func healthCodeFromString(status string) (healthCode, error) {
switch status {
case zfs.ZpoolOnline:
return online, nil
case zfs.ZpoolDegraded:
return degraded, nil
case zfs.ZpoolFaulted:
return faulted, nil
case zfs.ZpoolOffline:
return offline, nil
case zfs.ZpoolUnavail:
return unavail, nil
case zfs.ZpoolRemoved:
return removed, nil
}
return -1, fmt.Errorf(`unknown pool heath status: %s`, status)
}

22
go.mod Normal file
View File

@ -0,0 +1,22 @@
module github.com/pdf/zfs_exporter
require (
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc // indirect
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf // indirect
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973 // indirect
github.com/gogo/protobuf v1.1.1 // indirect
github.com/golang/protobuf v1.2.0 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
github.com/mistifyio/go-zfs v2.1.2-0.20180321011823-d5b163290a48+incompatible
github.com/pborman/uuid v1.2.0 // indirect
github.com/prometheus/client_golang v0.9.1
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910 // indirect
github.com/prometheus/common v0.0.0-20181020173914-7e9e6cabbd39
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d // indirect
github.com/sirupsen/logrus v1.2.0 // indirect
golang.org/x/crypto v0.0.0-20181106171534-e4dc69e5b2fd // indirect
golang.org/x/net v0.0.0-20181106065722-10aee1819953 // indirect
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f // indirect
golang.org/x/sys v0.0.0-20181106135930-3a76605856fd // indirect
gopkg.in/alecthomas/kingpin.v2 v2.2.6
)

51
go.sum Normal file
View File

@ -0,0 +1,51 @@
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc h1:cAKDfWh5VpdgMhJosfJnn5/FoN2SRZ4p7fJNX58YPaU=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf h1:qet1QNfXsQxTZqLG4oE62mJzwPIB8+Tee4RNCL9ulrY=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973 h1:xJ4a3vCFaGF/jqvzLMYoU8P317H5OQ+Via4RmuPwCS0=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gogo/protobuf v1.1.1 h1:72R+M5VuhED/KujmZVcIquuo8mBgX4oVda//DQb3PXo=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/google/uuid v1.0.0 h1:b4Gk+7WdP/d3HZH8EJsZpvV7EtDOgaZLtnaNGIu1adA=
github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/mistifyio/go-zfs v2.1.2-0.20180321011823-d5b163290a48+incompatible h1:ykF7RTauaW3BBWZPcJz4dPU5aZCbi6SQiKhIl3WxdUs=
github.com/mistifyio/go-zfs v2.1.2-0.20180321011823-d5b163290a48+incompatible/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4=
github.com/pborman/uuid v1.2.0 h1:J7Q5mO4ysT1dv8hyrUGHb9+ooztCXu1D8MY8DZYsu3g=
github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v0.9.1 h1:K47Rk0v/fkEfwfQet2KWhscE0cJzjgCCDBG2KHZoVno=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910 h1:idejC8f05m9MGOsuEi1ATq9shN03HrxNkD/luQvxCv8=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/common v0.0.0-20181020173914-7e9e6cabbd39 h1:Cto4X6SVMWRPBkJ/3YHn1iDGDGc/Z+sW+AEMKHMVvN4=
github.com/prometheus/common v0.0.0-20181020173914-7e9e6cabbd39/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d h1:GoAlyOgbOEIFdaDqxJVlbOQ1DtGmZWs/Qau0hIlk+WQ=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/sirupsen/logrus v1.2.0 h1:juTguoYk5qI21pwyTXY3B3Y5cOTH3ZUyZCg1v/mihuo=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793 h1:u+LnwYTOOW7Ukr/fppxEb1Nwz0AtPflrblfvUudpo+I=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20181106171534-e4dc69e5b2fd h1:VtIkGDhk0ph3t+THbvXHfMZ8QHgsBO39Nh52+74pq7w=
golang.org/x/crypto v0.0.0-20181106171534-e4dc69e5b2fd/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/net v0.0.0-20181106065722-10aee1819953 h1:LuZIitY8waaxUfNIdtajyE/YzA/zyf0YxXG27VpLrkg=
golang.org/x/net v0.0.0-20181106065722-10aee1819953/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f h1:wMNYb4v58l5UBM7MYRLPG6ZhfOqbKu7X5eyFl8ZhKvA=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33 h1:I6FyU15t786LL7oL/hn43zqTuEGr4PN7F4XJ1p4E3Y8=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181106135930-3a76605856fd h1:5lx5yH6109ClL0rlBzOj++ZkX/njUT+RVgTO2RMbmZo=
golang.org/x/sys v0.0.0-20181106135930-3a76605856fd/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=

105
zfs_exporter.go Normal file
View File

@ -0,0 +1,105 @@
package main
import (
"fmt"
"net/http"
"github.com/pdf/zfs_exporter/collector"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/common/log"
"github.com/prometheus/common/version"
"gopkg.in/alecthomas/kingpin.v2"
)
func init() {
prometheus.MustRegister(version.NewCollector("zfs_exporter"))
}
func handler(c *collector.ZFSCollector) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
registry := prometheus.NewRegistry()
if err := registry.Register(c); err != nil {
serr := fmt.Sprintf("Couldn't register collector: %s", err)
log.Errorln(serr)
w.WriteHeader(http.StatusInternalServerError)
if _, err = w.Write([]byte(serr)); err != nil {
log.Warnln(`Couldn't write response:`, err)
}
return
}
gatherers := prometheus.Gatherers{
prometheus.DefaultGatherer,
registry,
}
h := promhttp.InstrumentMetricHandler(
registry,
promhttp.HandlerFor(gatherers,
promhttp.HandlerOpts{
ErrorLog: log.NewErrorLogger(),
ErrorHandling: promhttp.ContinueOnError,
}),
)
h.ServeHTTP(w, r)
}
}
func main() {
var (
listenAddress = kingpin.Flag("web.listen-address", "Address on which to expose metrics and web interface.").Default(":9134").String()
metricsPath = kingpin.Flag("web.telemetry-path", "Path under which to expose metrics.").Default("/metrics").String()
deadline = kingpin.Flag("deadline", "Maximum duration that a collection should run before returning cached data. Should be set to a value shorter than your scrape timeout duration. The current collection run will continue and update the cache when complete (default: 8s)").Default("8s").Duration()
pools = kingpin.Flag("pool", "Name of the pool(s) to collect, repeat for multiple pools (default: all pools).").Strings()
)
log.AddFlags(kingpin.CommandLine)
kingpin.Version(version.Print("zfs_exporter"))
kingpin.HelpFlag.Short('h')
kingpin.Parse()
log.Infoln("Starting zfs_exporter", version.Info())
log.Infoln("Build context", version.BuildContext())
c, err := collector.NewZFSCollector(*deadline, *pools)
if err != nil {
log.Fatalf("Couldn't create collector: %s", err)
}
log.Infof("Enabling pools:")
for _, p := range c.Pools {
log.Infof(" - %s", p)
}
if len(c.Pools) == 0 {
log.Infof(" - (all)")
}
log.Infof("Enabling collectors:")
for n, c := range c.Collectors {
if *c.Enabled {
log.Infof(" - %s", n)
}
}
http.HandleFunc(*metricsPath, handler(c))
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
_, err = w.Write([]byte(`<html>
<head><title>ZFS Exporter</title></head>
<body>
<h1>ZFS Exporter</h1>
<p><a href="` + *metricsPath + `">Metrics</a></p>
</body>
</html>`))
if err != nil {
log.Errorln(err)
}
})
log.Infoln("Listening on", *listenAddress)
err = http.ListenAndServe(*listenAddress, nil)
if err != nil {
log.Fatal(err)
}
}