专业的编程技术博客社区

网站首页 > 博客文章 正文

深入理解prometheus:如何为表达式抽取和注入Labels

baijin 2024-08-17 10:42:22 博客文章 14 ℃ 0 评论

本文分析PromQL基本组成,以及如何抽取和注入Labels,先上结论和方法:

如何抽取Labels

最新的Prometheus代码中已经给出了对应的函数

https://github.com/prometheus/prometheus/blob/cd18da36058aee37c237d765864a1a59f263bb96/promql/parser/ast.go#L319

func ExtractSelectors(expr Expr) [][]*labels.Matcher {
	var selectors [][]*labels.Matcher
	Inspect(expr, func(node Node, _ []Node) error {
		vs, ok := node.(*VectorSelector)
		if ok {
			selectors = append(selectors, vs.LabelMatchers)
		}
		return nil
	})
	return selectors
}

如何注入Labels

没有提供,但可以根据上面的代码实现一个

func InjectSelectors(expr Expr, selectors []*labels.Matcher) error {
	Inspect(expr, func(node Node, _ []Node) error {
		vs, ok := node.(*VectorSelector)
		if ok {
		        vs.LabelMatchers = append(vs.LabelMatchers, selectors...)
		}
		return nil
	})
  return nil
}

测试代码

import (
	"testing"

	"github.com/prometheus/prometheus/pkg/labels"
	"github.com/prometheus/prometheus/promql/parser"
)

func genQuery(qs string, lset map[string]string) (string, error) {
	if lset == nil {
		return qs, nil
	}
	expr, err := parser.ParseExpr(string(qs))
	if err != nil {
		return "", err
	}
	var selectors []*labels.Matcher
	for k, v := range lset {
		selectors = append(selectors, labels.MustNewMatcher(labels.MatchEqual, k, v))
	}
	parser.Inspect(expr, func(node parser.Node, _ []parser.Node) error {
		vs, ok := node.(*parser.VectorSelector)
		if ok {
			vs.LabelMatchers = append(vs.LabelMatchers, selectors...)
		}
		return nil
	})
	return expr.String(), nil
}

func TestGenQuery(t *testing.T) {
	testExprs := []string{
		"node_cpu_usage > 0",
		"rate(node_cpu_total{node=\"n1\"}[1m]) > rate(node_cpu_total{node=\"n2\"}[1m])",
		"container_cpu_limit_usage / avg_over_time(container_cpu_limit_usage[1d] offset 1d) > 1.01",
		"container_cpu_limit_usage > 0 and container_memory_limit_usage > 0",
		"container_cpu_limit_usage > 0.5 and container_memory_limit_usage > 0.5 or container_cpu_limit_usage > 0.8",
		`sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)
		/
		sum(rate(apiserver_request_duration_seconds_count{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)`,
	}
	testLset := map[string]string{
		"tcs_product": "cvm",
		"tcs_type":    "cvm",
	}
	for _, qs := range testExprs {
		s, err := genQuery(qs, testLset)
		if err != nil {
			t.Fatal(err)
		}
		t.Log(s)
	}
}
=== RUN   TestGenQuery
    xxx_test.go:52: node_cpu_usage{tcs_product="cvm",tcs_type="cvm"} > 0
    xxx_test.go:52: rate(node_cpu_total{node="n1",tcs_product="cvm",tcs_type="cvm"}[1m]) > rate(node_cpu_total{node="n2",tcs_product="cvm",tcs_type="cvm"}[1m])
    xxx_test.go:52: container_cpu_limit_usage{tcs_product="cvm",tcs_type="cvm"} / avg_over_time(container_cpu_limit_usage{tcs_product="cvm",tcs_type="cvm"}[1d] offset 1d) > 1.01
    xxx_test.go:52: container_cpu_limit_usage{tcs_product="cvm",tcs_type="cvm"} > 0 and container_memory_limit_usage{tcs_product="cvm",tcs_type="cvm"} > 0
    xxx_test.go:52: container_cpu_limit_usage{tcs_product="cvm",tcs_type="cvm"} > 0.5 and container_memory_limit_usage{tcs_product="cvm",tcs_type="cvm"} > 0.5 or container_cpu_limit_usage{tcs_product="cvm",tcs_type="cvm"} > 0.8
    xxx_test.go:52: sum without(instance, pod) (rate(apiserver_request_duration_seconds_sum{subresource!="log",tcs_product="cvm",tcs_type="cvm",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) / sum without(instance, pod) (rate(apiserver_request_duration_seconds_count{subresource!="log",tcs_product="cvm",tcs_type="cvm",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m]))
--- PASS: TestGenQuery (0.00s)
PASS
ok  	xxx_test	0.024s


实现原理分析

PromQL基本接口和类型

PromQL抽象的接口类型,目前有11种Node或者说Expr接口类型,但基本类型为:NumberLiteralStringLiteralVectorSelectorMatrixSelector,前两个本身就没有任何标签,后两个有明确的结构体来保存标签,其中MatrixSelector嵌套了VectorSelector。其他类型只是这四种基本类型的组合,抽取和注入labels最根本是要对VectorSelector进行操作

接口定义代码

https://github.com/prometheus/prometheus/blob/cd18da36058aee37c237d765864a1a59f263bb96/promql/parser/ast.go#L26

// Node is a generic interface for all nodes in an AST.
//
// Whenever numerous nodes are listed such as in a switch-case statement
// or a chain of function definitions (e.g. String(), PromQLExpr(), etc.) convention is
// to list them as follows:
//
// 	- Statements
// 	- statement types (alphabetical)
// 	- ...
// 	- Expressions
// 	- expression types (alphabetical)
// 	- ...
//
type Node interface {
	// String representation of the node that returns the given node when parsed
	// as part of a valid query.
	String() string

	// PositionRange returns the position of the AST Node in the query string.
	PositionRange() PositionRange
}

// Statement is a generic interface for all statements.
type Statement interface {
	Node

	// PromQLStmt ensures that no other type accidentally implements the interface
	// nolint:unused
	PromQLStmt()
}

// EvalStmt holds an expression and information on the range it should
// be evaluated on.
type EvalStmt struct {
	Expr Expr // Expression to be evaluated.

	// The time boundaries for the evaluation. If Start equals End an instant
	// is evaluated.
	Start, End time.Time
	// Time between two evaluated instants for the range [Start:End].
	Interval time.Duration
}

func (*EvalStmt) PromQLStmt() {}

// Expr is a generic interface for all expression types.
type Expr interface {
	Node

	// Type returns the type the expression evaluates to. It does not perform
	// in-depth checks as this is done at parsing-time.
	Type() ValueType
	// PromQLExpr ensures that no other types accidentally implement the interface.
	PromQLExpr()
}

// Expressions is a list of expression nodes that implements Node.
type Expressions []Expr

类型代码

// AggregateExpr represents an aggregation operation on a Vector.
type AggregateExpr struct {
	Op       ItemType // The used aggregation operation.
	Expr     Expr     // The Vector expression over which is aggregated.
	Param    Expr     // Parameter used by some aggregators.
	Grouping []string // The labels by which to group the Vector.
	Without  bool     // Whether to drop the given labels rather than keep them.
	PosRange PositionRange
}

// BinaryExpr represents a binary expression between two child expressions.
type BinaryExpr struct {
	Op       ItemType // The operation of the expression.
	LHS, RHS Expr     // The operands on the respective sides of the operator.

	// The matching behavior for the operation if both operands are Vectors.
	// If they are not this field is nil.
	VectorMatching *VectorMatching

	// If a comparison operator, return 0/1 rather than filtering.
	ReturnBool bool
}

// Call represents a function call.
type Call struct {
	Func *Function   // The function that was called.
	Args Expressions // Arguments used in the call.

	PosRange PositionRange
}

// MatrixSelector represents a Matrix selection.
type MatrixSelector struct {
	// It is safe to assume that this is an VectorSelector
	// if the parser hasn't returned an error.
	VectorSelector Expr
	Range          time.Duration

	EndPos Pos
}

// SubqueryExpr represents a subquery.
type SubqueryExpr struct {
	Expr  Expr
	Range time.Duration
	// OriginalOffset is the actual offset that was set in the query.
	// This never changes.
	OriginalOffset time.Duration
	// Offset is the offset used during the query execution
	// which is calculated using the original offset, at modifier time,
	// eval time, and subquery offsets in the AST tree.
	Offset     time.Duration
	Timestamp  *int64
	StartOrEnd ItemType // Set when @ is used with start() or end()
	Step       time.Duration

	EndPos Pos
}

// NumberLiteral represents a number.
type NumberLiteral struct {
	Val float64

	PosRange PositionRange
}

// ParenExpr wraps an expression so it cannot be disassembled as a consequence
// of operator precedence.
type ParenExpr struct {
	Expr     Expr
	PosRange PositionRange
}

// StringLiteral represents a string.
type StringLiteral struct {
	Val      string
	PosRange PositionRange
}

// UnaryExpr represents a unary operation on another expression.
// Currently unary operations are only supported for Scalars.
type UnaryExpr struct {
	Op   ItemType
	Expr Expr

	StartPos Pos
}

// StepInvariantExpr represents a query which evaluates to the same result
// irrespective of the evaluation time given the raw samples from TSDB remain unchanged.
// Currently this is only used for engine optimisations and the parser does not produce this.
type StepInvariantExpr struct {
	Expr Expr
}

func (e *StepInvariantExpr) String() string { return e.Expr.String() }

func (e *StepInvariantExpr) PositionRange() PositionRange { return e.Expr.PositionRange() }

// VectorSelector represents a Vector selection.
type VectorSelector struct {
	Name string
	// OriginalOffset is the actual offset that was set in the query.
	// This never changes.
	OriginalOffset time.Duration
	// Offset is the offset used during the query execution
	// which is calculated using the original offset, at modifier time,
	// eval time, and subquery offsets in the AST tree.
	Offset        time.Duration
	Timestamp     *int64
	StartOrEnd    ItemType // Set when @ is used with start() or end()
	LabelMatchers []*labels.Matcher

	// The unexpanded seriesSet populated at query preparation time.
	UnexpandedSeriesSet storage.SeriesSet
	Series              []storage.Series

	PosRange PositionRange
}

可以看到Expr这个interface是由接口Node和一个Type() ValueType函数组成,ValueType定义如下:

// Value is a generic interface for values resulting from a query evaluation.
type Value interface {
	Type() ValueType
	String() string
}

// ValueType describes a type of a value.
type ValueType string

// The valid value types.
const (
	ValueTypeNone   ValueType = "none"
	ValueTypeVector ValueType = "vector"
	ValueTypeScalar ValueType = "scalar"
	ValueTypeMatrix ValueType = "matrix"
	ValueTypeString ValueType = "string"
)

// DocumentedType returns the internal type to the equivalent
// user facing terminology as defined in the documentation.
func DocumentedType(t ValueType) string {
	switch t {
	case ValueTypeVector:
		return "instant vector"
	case ValueTypeMatrix:
		return "range vector"
	default:
		return string(t)
	}
}

从上面可以看出,vector=instant vector=VectorSelector,martix=range vector=MatrixSelector

同时回顾下Prometheus文档中关于四种类型的定义:

  • Instant vector - a set of time series containing a single sample for each time series, all sharing the same timestamp
  • Range vector - a set of time series containing a range of data points over time for each time series
  • Scalar - a simple numeric floating point value
  • String - a simple string value; currently unused

根据前面的分析,MatrixSelector嵌套了VectorSelector,那么只有VectorSelector具有Labels,可以抽取和修改,那首先需要解析PromQL语法并找到VectorSelector。继续查看Prometheus的代码,发现已经有对应的辅助函数可以简化我们的实现:

// Visitor allows visiting a Node and its child nodes. The Visit method is
// invoked for each node with the path leading to the node provided additionally.
// If the result visitor w is not nil and no error, Walk visits each of the children
// of node with the visitor w, followed by a call of w.Visit(nil, nil).
type Visitor interface {
	Visit(node Node, path []Node) (w Visitor, err error)
}

// Walk traverses an AST in depth-first order: It starts by calling
// v.Visit(node, path); node must not be nil. If the visitor w returned by
// v.Visit(node, path) is not nil and the visitor returns no error, Walk is
// invoked recursively with visitor w for each of the non-nil children of node,
// followed by a call of w.Visit(nil), returning an error
// As the tree is descended the path of previous nodes is provided.
func Walk(v Visitor, node Node, path []Node) error {
	var err error
	if v, err = v.Visit(node, path); v == nil || err != nil {
		return err
	}
	path = append(path, node)

	for _, e := range Children(node) {
		if err := Walk(v, e, path); err != nil {
			return err
		}
	}

	_, err = v.Visit(nil, nil)
	return err
}

func ExtractSelectors(expr Expr) [][]*labels.Matcher {
	var selectors [][]*labels.Matcher
	Inspect(expr, func(node Node, _ []Node) error {
		vs, ok := node.(*VectorSelector)
		if ok {
			selectors = append(selectors, vs.LabelMatchers)
		}
		return nil
	})
	return selectors
}

type inspector func(Node, []Node) error

func (f inspector) Visit(node Node, path []Node) (Visitor, error) {
	if err := f(node, path); err != nil {
		return nil, err
	}

	return f, nil
}

// Inspect traverses an AST in depth-first order: It starts by calling
// f(node, path); node must not be nil. If f returns a nil error, Inspect invokes f
// for all the non-nil children of node, recursively.
func Inspect(node Node, f inspector) {
	//nolint: errcheck
	Walk(inspector(f), node, nil)
}

// Children returns a list of all child nodes of a syntax tree node.
func Children(node Node) []Node {
	// For some reasons these switches have significantly better performance than interfaces
	switch n := node.(type) {
	case *EvalStmt:
		return []Node{n.Expr}
	case Expressions:
		// golang cannot convert slices of interfaces
		ret := make([]Node, len(n))
		for i, e := range n {
			ret[i] = e
		}
		return ret
	case *AggregateExpr:
		// While this does not look nice, it should avoid unnecessary allocations
		// caused by slice resizing
		if n.Expr == nil && n.Param == nil {
			return nil
		} else if n.Expr == nil {
			return []Node{n.Param}
		} else if n.Param == nil {
			return []Node{n.Expr}
		} else {
			return []Node{n.Expr, n.Param}
		}
	case *BinaryExpr:
		return []Node{n.LHS, n.RHS}
	case *Call:
		// golang cannot convert slices of interfaces
		ret := make([]Node, len(n.Args))
		for i, e := range n.Args {
			ret[i] = e
		}
		return ret
	case *SubqueryExpr:
		return []Node{n.Expr}
	case *ParenExpr:
		return []Node{n.Expr}
	case *UnaryExpr:
		return []Node{n.Expr}
	case *MatrixSelector:
		return []Node{n.VectorSelector}
	case *StepInvariantExpr:
		return []Node{n.Expr}
	case *NumberLiteral, *StringLiteral, *VectorSelector:
		// nothing to do
		return []Node{}
	default:
		panic(errors.Errorf("promql.Children: unhandled node type %T", node))
	}
}

其中Inspect函数可以接受一个带有接口Node的入参函数f,然后对语法树中的每个Node调用函数f进行对应处理,而且ExtractSelectors还提供了一个很好地例子,可以说很贴心了。

知其然,知其所以然,下面继续分析下内部是如何实现的。

首先,PromQL分析后得到的是一颗抽象语法树,其中树的每个节点都实现了Node接口,回顾下刚开始的接口Node接口定义:

// Node is a generic interface for all nodes in an AST.
//
// Whenever numerous nodes are listed such as in a switch-case statement
// or a chain of function definitions (e.g. String(), PromQLExpr(), etc.) convention is
// to list them as follows:
//
// 	- Statements
// 	- statement types (alphabetical)
// 	- ...
// 	- Expressions
// 	- expression types (alphabetical)
// 	- ...
//
type Node interface {
	// String representation of the node that returns the given node when parsed
	// as part of a valid query.
	String() string

	// PositionRange returns the position of the AST Node in the query string.
	PositionRange() PositionRange
}

然后,对于语法树中的每个节点需要一个遍历的方法,Prometheus代码也提供了对应的函数Walk:

// Walk traverses an AST in depth-first order: It starts by calling
// v.Visit(node, path); node must not be nil. If the visitor w returned by
// v.Visit(node, path) is not nil and the visitor returns no error, Walk is
// invoked recursively with visitor w for each of the non-nil children of node,
// followed by a call of w.Visit(nil), returning an error
// As the tree is descended the path of previous nodes is provided.
func Walk(v Visitor, node Node, path []Node) error {
	var err error
	if v, err = v.Visit(node, path); v == nil || err != nil {
		return err
	}
	path = append(path, node)

	for _, e := range Children(node) {
		if err := Walk(v, e, path); err != nil {
			return err
		}
	}

	_, err = v.Visit(nil, nil)
	return err
}

Walk从根开始深度优先遍历所有节点,然后依次调用Visitor方法,Visitor是抽象出来的一个需要实现具体访问操作的接口。

正常情况需要我们自己定义一个struct,然后实现Visitor接口,不过像这种通用操作,在Golang中通常会使用一种叫函数类型转换的方式(参照http.HandleFunc)将该实现简化:

type inspector func(Node, []Node) error

func (f inspector) Visit(node Node, path []Node) (Visitor, error) {
	if err := f(node, path); err != nil {
		return nil, err
	}

	return f, nil
}

有了这个inspector类型,我们只需要实现一个函数签名为func(Node, []Node) error的函数,然后转换为inspector类型,自动就实现了Visit方法,也就可以满足Walk函数了,这里官方也给出了实现:

// Inspect traverses an AST in depth-first order: It starts by calling
// f(node, path); node must not be nil. If f returns a nil error, Inspect invokes f
// for all the non-nil children of node, recursively.
func Inspect(node Node, f inspector) {
	//nolint: errcheck
	Walk(inspector(f), node, nil)
}

OK,到这里,所有分析完毕,各位看官理解了吗?如有疑问,欢迎留言

本文暂时没有评论,来添加一个吧(●'◡'●)

欢迎 发表评论:

最近发表
标签列表