WIP: update idx

Dieterbe · Dieterbe · commit 1c511b762db0 · 2018-03-19T13:45:25.000+01:00
* update MetricDefinition serialization
* use MKeys internally instead of string id's
* support msg.Point
* while we're at it, also document all Index function parameters
diff --git a/idx/cassandra/cassandra.go b/idx/cassandra/cassandra.go
@@ -264,7 +264,7 @@ func (c *CasIdx) Stop() {
 	c.session.Close()
 }
 
-func (c *CasIdx) AddOrUpdate(data *schema.MetricData, partition int32) idx.Archive {
+func (c *CasIdx) AddOrUpdate(point msg.Point, partition int32) idx.Archive {
 	pre := time.Now()
 	existing, inMemory := c.MemoryIdx.Get(data.Id)
 	archive := c.MemoryIdx.AddOrUpdate(data, partition)
@@ -358,8 +358,14 @@ func (c *CasIdx) load(defs []schema.MetricDefinition, iter cqlIterator, cutoff u
 	var tags []string
 	cutoff64 := int64(cutoff)
 	for iter.Scan(&id, &orgId, &partition, &name, &metric, &interval, &unit, &mtype, &tags, &lastupdate) {
+		mkey, err := schema.MKeyFromString(id)
+		if err != nil {
+			log.Error(3, "cassandra-idx: load() could not parse ID %q: %s -> skipping", id, err)
+			continue
+		}
+
 		mdef := &schema.MetricDefinition{
-			Id:         id,
+			Id:         mkey,
 			OrgId:      orgId,
 			Partition:  partition,
 			Name:       name,
@@ -413,7 +419,7 @@ func (c *CasIdx) processWriteQueue() {
 		for !success {
 			if err := c.session.Query(
 				qry,
-				req.def.Id,
+				req.def.Id.String(),
 				req.def.OrgId,
 				req.def.Partition,
 				req.def.Name,
diff --git a/idx/idx.go b/idx/idx.go
@@ -6,6 +6,8 @@ import (
 	"errors"
 	"time"
 
+	"github.com/grafana/metrictank/msg"
+
 	schema "gopkg.in/raintank/schema.v1"
 )
 
@@ -58,37 +60,35 @@ type MetricIndex interface {
 
 	// AddOrUpdate makes sure a metric is known in the index,
 	// and should be called for every received metric.
-	AddOrUpdate(*schema.MetricData, int32) Archive
+	AddOrUpdate(point msg.Point, partition int32) Archive
 
 	// Get returns the archive for the requested id.
-	Get(string) (Archive, bool)
+	Get(key schema.MKey) (Archive, bool)
 
 	// GetPath returns the archives under the given path.
-	GetPath(int, string) []Archive
+	GetPath(orgId int, path string) []Archive
 
-	// Delete deletes items from the index for the given org and query.
+	// Delete deletes items from the index
 	// If the pattern matches a branch node, then
 	// all leaf nodes on that branch are deleted. So if the pattern is
 	// "*", all items in the index are deleted.
 	// It returns a copy of all of the Archives deleted.
-	Delete(int, string) ([]Archive, error)
+	Delete(orgId int, pattern string) ([]Archive, error)
 
-	// Find searches the index.  The method is passed an OrgId, a query
-	// pattern and a unix timestamp. Searches should return all nodes that match for
-	// the given OrgId and OrgId -1.  The pattern should be handled in the same way
-	// Graphite would. see https://graphite.readthedocs.io/en/latest/render_api.html#paths-and-wildcards
-	// And the unix stimestamp is used to ignore series that have been stale since
-	// the timestamp.
-	Find(int, string, int64) ([]Node, error)
+	// Find searches the index for matching nodes.
+	// * orgId can be -1.
+	// * pattern is handled like graphite does. see https://graphite.readthedocs.io/en/latest/render_api.html#paths-and-wildcards
+	// * from is a unix timestamp. series not updated since then are excluded.
+	Find(orgId int, pattern string, from int64) ([]Node, error)
 
 	// List returns all Archives for the passed OrgId, or for all organisations if -1 is provided.
-	List(int) []Archive
+	List(orgId int) []Archive
 
 	// Prune deletes all metrics from the index for the passed org where
 	// the last time the metric was seen is older then the passed timestamp. If the org
 	// passed is -1, then the all orgs should be examined for stale metrics to be deleted.
 	// It returns all Archives deleted and any error encountered.
-	Prune(int, time.Time) ([]Archive, error)
+	Prune(orgId int, oldest time.Time) ([]Archive, error)
 
 	// FindByTag takes a list of expressions in the format key<operator>value.
 	// The allowed operators are: =, !=, =~, !=~.
@@ -98,24 +98,24 @@ type MetricIndex interface {
 	// where the LastUpdate time is >= from will be returned as results.
 	// The returned results are not deduplicated and in certain cases it is possible
 	// that duplicate entries will be returned.
-	FindByTag(int, []string, int64) ([]Node, error)
+	FindByTag(orgId int, expressions []string, from int64) ([]Node, error)
 
 	// Tags returns a list of all tag keys associated with the metrics of a given
 	// organization. The return values are filtered by the regex in the second parameter.
 	// If the third parameter is >0 then only metrics will be accounted of which the
 	// LastUpdate time is >= the given value.
-	Tags(int, string, int64) ([]string, error)
+	Tags(orgId int, filter string, from int64) ([]string, error)
 
 	// FindTags generates a list of possible tags that could complete a
 	// given prefix. It also accepts additional tag conditions to further narrow
 	// down the result set in the format of graphite's tag queries
-	FindTags(int, string, []string, int64, uint) ([]string, error)
+	FindTags(orgId int, prefix string, expressions []string, from int64, limit uint) ([]string, error)
 
 	// FindTagValues generates a list of possible values that could
 	// complete a given value prefix. It requires a tag to be specified and only values
 	// of the given tag will be returned. It also accepts additional conditions to
 	// further narrow down the result set in the format of graphite's tag queries
-	FindTagValues(int, string, string, []string, int64, uint) ([]string, error)
+	FindTagValues(orgId int, tag string, prefix string, expressions []string, from int64, limit uint) ([]string, error)
 
 	// TagDetails returns a list of all values associated with a given tag key in the
 	// given org. The occurrences of each value is counted and the count is referred to by
@@ -124,9 +124,9 @@ type MetricIndex interface {
 	// the values before accounting for them.
 	// If the fourth parameter is > 0 then only those metrics of which the LastUpdate
 	// time is >= the from timestamp will be included.
-	TagDetails(int, string, string, int64) (map[string]uint64, error)
+	TagDetails(orgId int, key string, filter string, from int64) (map[string]uint64, error)
 
 	// DeleteTagged deletes the specified series from the tag index and also the
 	// DefById index.
-	DeleteTagged(int, []string) ([]Archive, error)
+	DeleteTagged(orgId int, paths []string) ([]Archive, error)
 }
diff --git a/idx/memory/memory.go b/idx/memory/memory.go
@@ -63,15 +63,15 @@ type Tree struct {
 	Items map[string]*Node // key is the full path of the node.
 }
 
-type IdSet map[string]struct{} // set of ids
+type IdSet map[schema.MKey]struct{} // set of ids
 
 func (ids IdSet) String() string {
 	var res string
 	for id := range ids {
 		if len(res) > 0 {
 			res += " "
 		}
-		res += id
+		res += id.String()
 	}
 	return res
 
@@ -80,7 +80,7 @@ func (ids IdSet) String() string {
 type TagValue map[string]IdSet    // value -> set of ids
 type TagIndex map[string]TagValue // key -> list of values
 
-func (t *TagIndex) addTagId(name, value string, id string) {
+func (t *TagIndex) addTagId(name, value string, id schema.MKey) {
 	ti := *t
 	if _, ok := ti[name]; !ok {
 		ti[name] = make(TagValue)
@@ -91,7 +91,7 @@ func (t *TagIndex) addTagId(name, value string, id string) {
 	ti[name][value][id] = struct{}{}
 }
 
-func (t *TagIndex) delTagId(name, value string, id string) {
+func (t *TagIndex) delTagId(name, value string, id schema.MKey) {
 	ti := *t
 
 	delete(ti[name][value], id)
@@ -155,7 +155,7 @@ func (defs defByTagSet) defs(id int, fullName string) map[*schema.MetricDefiniti
 type Node struct {
 	Path     string
 	Children []string
-	Defs     []string
+	Defs     []schema.MKey
 }
 
 func (n *Node) HasChildren() bool {
@@ -179,7 +179,7 @@ type MemoryIdx struct {
 
 	// used for both hierarchy and tag index, so includes all MDs, with
 	// and without tags. It also mixes all orgs into one flat map.
-	defById map[string]*idx.Archive // by ID string
+	defById map[schema.MKey]*idx.Archive
 
 	// used by hierarchy index only
 	tree map[int]*Tree // by orgId
@@ -191,7 +191,7 @@ type MemoryIdx struct {
 
 func New() *MemoryIdx {
 	return &MemoryIdx{
-		defById:     make(map[string]*idx.Archive),
+		defById:     make(map[schema.MKey]*idx.Archive),
 		defByTagSet: make(defByTagSet),
 		tree:        make(map[int]*Tree),
 		tags:        make(map[int]TagIndex),
@@ -359,7 +359,7 @@ func (m *MemoryIdx) add(def *schema.MetricDefinition) idx.Archive {
 		root := &Node{
 			Path:     "",
 			Children: make([]string, 0),
-			Defs:     make([]string, 0),
+			Defs:     make([]schema.MKey, 0),
 		}
 		m.tree[def.OrgId] = &Tree{
 			Items: map[string]*Node{"": root},
@@ -396,7 +396,7 @@ func (m *MemoryIdx) add(def *schema.MetricDefinition) idx.Archive {
 		tree.Items[branch] = &Node{
 			Path:     branch,
 			Children: []string{prevNode},
-			Defs:     make([]string, 0),
+			Defs:     make([]schema.MKey, 0),
 		}
 
 		prevPos = pos
@@ -416,15 +416,15 @@ func (m *MemoryIdx) add(def *schema.MetricDefinition) idx.Archive {
 	tree.Items[path] = &Node{
 		Path:     path,
 		Children: []string{},
-		Defs:     []string{def.Id},
+		Defs:     []schema.MKey{def.Id},
 	}
 	m.defById[def.Id] = archive
 	statAdd.Inc()
 
 	return *archive
 }
 
-func (m *MemoryIdx) Get(id string) (idx.Archive, bool) {
+func (m *MemoryIdx) Get(id schema.MKey) (idx.Archive, bool) {
 	pre := time.Now()
 	m.RLock()
 	defer m.RUnlock()
diff --git a/idx/memory/tag_query.go b/idx/memory/tag_query.go
@@ -9,6 +9,8 @@ import (
 	"sync"
 	"sync/atomic"
 
+	schema "gopkg.in/raintank/schema.v1"
+
 	"github.com/grafana/metrictank/idx"
 	"github.com/raintank/worldping-api/pkg/log"
 )
@@ -97,8 +99,8 @@ type TagQuery struct {
 
 	startWith match // choses the first clause to generate the initial result set (one of EQUAL PREFIX MATCH MATCH_TAG PREFIX_TAG)
 
-	index TagIndex                // the tag index, hierarchy of tags & values, set by Run()/RunGetTags()
-	byId  map[string]*idx.Archive // the metric index by ID, set by Run()/RunGetTags()
+	index TagIndex                     // the tag index, hierarchy of tags & values, set by Run()/RunGetTags()
+	byId  map[schema.MKey]*idx.Archive // the metric index by ID, set by Run()/RunGetTags()
 
 	wg *sync.WaitGroup
 }
@@ -333,7 +335,7 @@ func NewTagQuery(expressions []string, from int64) (TagQuery, error) {
 }
 
 // getInitialByEqual generates the initial resultset by executing the given equal expression
-func (q *TagQuery) getInitialByEqual(expr kv, idCh chan string, stopCh chan struct{}) {
+func (q *TagQuery) getInitialByEqual(expr kv, idCh chan schema.MKey, stopCh chan struct{}) {
 	defer q.wg.Done()
 
 KEYS:
@@ -349,7 +351,7 @@ KEYS:
 }
 
 // getInitialByPrefix generates the initial resultset by executing the given prefix match expression
-func (q *TagQuery) getInitialByPrefix(expr kv, idCh chan string, stopCh chan struct{}) {
+func (q *TagQuery) getInitialByPrefix(expr kv, idCh chan schema.MKey, stopCh chan struct{}) {
 	defer q.wg.Done()
 
 VALUES:
@@ -371,7 +373,7 @@ VALUES:
 }
 
 // getInitialByMatch generates the initial resultset by executing the given match expression
-func (q *TagQuery) getInitialByMatch(expr kvRe, idCh chan string, stopCh chan struct{}) {
+func (q *TagQuery) getInitialByMatch(expr kvRe, idCh chan schema.MKey, stopCh chan struct{}) {
 	defer q.wg.Done()
 
 	// shortcut if value == nil.
@@ -412,7 +414,7 @@ VALUES2:
 
 // getInitialByTagPrefix generates the initial resultset by creating a list of
 // metric IDs of which at least one tag starts with the defined prefix
-func (q *TagQuery) getInitialByTagPrefix(idCh chan string, stopCh chan struct{}) {
+func (q *TagQuery) getInitialByTagPrefix(idCh chan schema.MKey, stopCh chan struct{}) {
 	defer q.wg.Done()
 
 TAGS:
@@ -437,7 +439,7 @@ TAGS:
 
 // getInitialByTagMatch generates the initial resultset by creating a list of
 // metric IDs of which at least one tag matches the defined regex
-func (q *TagQuery) getInitialByTagMatch(idCh chan string, stopCh chan struct{}) {
+func (q *TagQuery) getInitialByTagMatch(idCh chan schema.MKey, stopCh chan struct{}) {
 	defer q.wg.Done()
 
 TAGS:
@@ -461,8 +463,8 @@ TAGS:
 // getInitialIds asynchronously collects all ID's of the initial result set.  It returns:
 // a channel through which the IDs of the initial result set will be sent
 // a stop channel, which when closed, will cause it to abort the background worker.
-func (q *TagQuery) getInitialIds() (chan string, chan struct{}) {
-	idCh := make(chan string, 1000)
+func (q *TagQuery) getInitialIds() (chan schema.MKey, chan struct{}) {
+	idCh := make(chan schema.MKey, 1000)
 	stopCh := make(chan struct{})
 	q.wg.Add(1)
 
@@ -492,7 +494,7 @@ func (q *TagQuery) getInitialIds() (chan string, chan struct{}) {
 // all required tests in order to decide whether this metric should be part
 // of the final result set or not
 // in map/reduce terms this is the reduce function
-func (q *TagQuery) testByAllExpressions(id string, def *idx.Archive, omitTagFilters bool) bool {
+func (q *TagQuery) testByAllExpressions(id schema.MKey, def *idx.Archive, omitTagFilters bool) bool {
 	if !q.testByFrom(def) {
 		return false
 	}
@@ -695,7 +697,7 @@ func (q *TagQuery) testByTagPrefix(def *idx.Archive) bool {
 }
 
 // testByEqual filters a given metric by the defined "=" expressions
-func (q *TagQuery) testByEqual(id string, exprs []kv, not bool) bool {
+func (q *TagQuery) testByEqual(id schema.MKey, exprs []kv, not bool) bool {
 	for _, e := range exprs {
 		indexIds := q.index[e.key][e.value]
 
@@ -722,7 +724,7 @@ func (q *TagQuery) testByEqual(id string, exprs []kv, not bool) bool {
 // required tests to decide whether a metric should be part of the final
 // result set or not
 // it returns the final result set via the given resCh parameter
-func (q *TagQuery) filterIdsFromChan(idCh, resCh chan string) {
+func (q *TagQuery) filterIdsFromChan(idCh, resCh chan schema.MKey) {
 	for id := range idCh {
 		var def *idx.Archive
 		var ok bool
@@ -772,14 +774,14 @@ func (q *TagQuery) sortByCost() {
 }
 
 // Run executes the tag query on the given index and returns a list of ids
-func (q *TagQuery) Run(index TagIndex, byId map[string]*idx.Archive) IdSet {
+func (q *TagQuery) Run(index TagIndex, byId map[schema.MKey]*idx.Archive) IdSet {
 	q.index = index
 	q.byId = byId
 
 	q.sortByCost()
 
 	idCh, _ := q.getInitialIds()
-	resCh := make(chan string)
+	resCh := make(chan schema.MKey)
 
 	// start the tag query workers. they'll consume the ids on the idCh and
 	// evaluate for each of them whether it satisfies all the conditions
@@ -836,7 +838,7 @@ func (q *TagQuery) getMaxTagCount() int {
 // according to the criteria associated with this query
 // those that pass all the tests will have their relevant tags extracted, which
 // are then pushed into the given tag channel
-func (q *TagQuery) filterTagsFromChan(idCh chan string, tagCh chan string, stopCh chan struct{}, omitTagFilters bool) {
+func (q *TagQuery) filterTagsFromChan(idCh chan schema.MKey, tagCh chan string, stopCh chan struct{}, omitTagFilters bool) {
 	// used to prevent that this worker thread will push the same result into
 	// the chan twice
 	resultsCache := make(map[string]struct{})
@@ -953,7 +955,7 @@ func (q *TagQuery) tagFilterMatchesName() bool {
 
 // RunGetTags executes the tag query and returns all the tags of the
 // resulting metrics
-func (q *TagQuery) RunGetTags(index TagIndex, byId map[string]*idx.Archive) map[string]struct{} {
+func (q *TagQuery) RunGetTags(index TagIndex, byId map[schema.MKey]*idx.Archive) map[string]struct{} {
 	q.index = index
 	q.byId = byId