You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
480 lines
11 KiB
480 lines
11 KiB
package main
|
|
|
|
import (
|
|
"context"
|
|
"os"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/gophercloud/gophercloud"
|
|
"github.com/gophercloud/gophercloud/openstack"
|
|
"github.com/gophercloud/gophercloud/openstack/compute/v2/servers"
|
|
"github.com/gophercloud/gophercloud/pagination"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
datamodels "gitlab.com/cyclops-utilities/datamodels"
|
|
eeEvent "github.com/Cyclops-Labs/cyclops-4-hpc.git/services/eventsengine/client/event_management"
|
|
eeModels "github.com/Cyclops-Labs/cyclops-4-hpc.git/services/eventsengine/models"
|
|
l "gitlab.com/cyclops-utilities/logging"
|
|
)
|
|
|
|
var (
|
|
collector = "Servers"
|
|
objects = "vm"
|
|
collectionStart int64
|
|
vmCount int
|
|
terminatedCount int
|
|
dropCount int
|
|
remotelist []*eeModels.MinimalState
|
|
client *gophercloud.ServiceClient
|
|
)
|
|
|
|
// collect handles the process of retrieving the information from the system.
|
|
func collect() {
|
|
|
|
l.Trace.Printf("[COLLECTION] The collection process has been started.\n")
|
|
|
|
collectionStart = time.Now().UnixNano()
|
|
|
|
metricTime.With(prometheus.Labels{"type": "Collection Start Time"}).Set(float64(collectionStart))
|
|
|
|
// Here comes the logic to retrieve the information from the system.
|
|
opts := gophercloud.AuthOptions{
|
|
DomainName: cfg.OpenStack.Domain,
|
|
IdentityEndpoint: cfg.OpenStack.Keystone,
|
|
Password: cfg.OpenStack.Password,
|
|
Username: cfg.OpenStack.User,
|
|
}
|
|
|
|
if len(cfg.OpenStack.Project) > 0 {
|
|
|
|
opts.TenantName = cfg.OpenStack.Project
|
|
|
|
}
|
|
|
|
provider, e := openstack.AuthenticatedClient(opts)
|
|
|
|
if e != nil {
|
|
|
|
l.Error.Printf("[COLLECTION] Error authenticating against OpenStack. Error: %v", e)
|
|
|
|
os.Exit(1)
|
|
|
|
}
|
|
|
|
client, e = openstack.NewComputeV2(provider, gophercloud.EndpointOpts{
|
|
Region: cfg.OpenStack.Region,
|
|
})
|
|
|
|
if e != nil {
|
|
|
|
l.Error.Printf("[COLLECTION] Error creating compute collector client. Error: %v", e)
|
|
|
|
os.Exit(1)
|
|
|
|
}
|
|
|
|
serveropts := servers.ListOpts{
|
|
AllTenants: true, //set this to true to list VMs from all tenants if policy allows it
|
|
}
|
|
|
|
l.Trace.Printf("[COLLECTION] Querying events engine service for list of known and not terminated servers")
|
|
|
|
resourceType := "server"
|
|
eeParams := eeEvent.NewListStatesParams().WithResource(&resourceType).WithRegion(&cfg.OpenStack.Region)
|
|
|
|
ctx, _ := context.WithTimeout(context.Background(), 300*time.Second)
|
|
r, e := reportClient.EventManagement.ListStates(ctx, eeParams)
|
|
|
|
// Clears the remotelist between runs
|
|
remotelist = nil
|
|
|
|
if e != nil {
|
|
|
|
l.Warning.Printf("[COLLECTION] Something went wrong while retrieving the usage from the system, check with the administrator. Error: %v.\n", e)
|
|
|
|
} else {
|
|
|
|
remotelist = r.Payload
|
|
|
|
}
|
|
|
|
metricCount.With(prometheus.Labels{"type": "Total VMs from EventEngine"}).Set(float64(len(remotelist)))
|
|
|
|
l.Trace.Printf("[COLLECTION] (BEFORE) Existing count of servers at remote [ %v ].\n", len(remotelist))
|
|
|
|
eeCount := len(remotelist)
|
|
|
|
pager := servers.List(client, serveropts)
|
|
|
|
vmCount = 0
|
|
|
|
e = pager.EachPage(extractPage)
|
|
|
|
if e != nil {
|
|
|
|
l.Error.Printf("[COLLECTION] Error processing the lists of active resources. Error: %v\n", e)
|
|
|
|
os.Exit(1)
|
|
|
|
}
|
|
|
|
metricCount.With(prometheus.Labels{"type": "Total VMs reported by OS API"}).Set(float64(vmCount))
|
|
|
|
metricCount.With(prometheus.Labels{"type": "Total VMs reported by OS API (to terminated state)"}).Set(float64(terminatedCount))
|
|
|
|
metricCount.With(prometheus.Labels{"type": "Total VMs DROPPED due to unknown flavor"}).Set(float64(dropCount))
|
|
|
|
l.Trace.Printf("[COLLECTION] (AFTER) Remaining count of servers at remote which were left unprocessed [ %v ].\n", len(remotelist))
|
|
|
|
//now for all remaining servers send terminated status
|
|
for _, object := range remotelist {
|
|
|
|
l.Trace.Printf("[COLLECTION] Sending terminated event for server [ %v ] for project [ %v ] with ID [ %v ].\n", object.ResourceID, object.ResourceName, object.Account)
|
|
|
|
evTime := int64(time.Now().Unix())
|
|
evLast := getStatus("terminated")
|
|
|
|
// events reports to be sent.
|
|
event := eeModels.Event{
|
|
Account: object.Account,
|
|
EventTime: &evTime,
|
|
LastEvent: &evLast,
|
|
MetaData: object.MetaData,
|
|
Region: cfg.OpenStack.Region,
|
|
ResourceID: object.ResourceID,
|
|
ResourceName: object.ResourceName,
|
|
ResourceType: "server",
|
|
}
|
|
|
|
report(event)
|
|
|
|
}
|
|
|
|
metricCount.With(prometheus.Labels{"type": "Total VMs forcefully TERMINATED"}).Set(float64(len(remotelist)))
|
|
|
|
metricTime.With(prometheus.Labels{"type": "Collection Processing Time"}).Set(float64(time.Now().UnixNano()-collectionStart) / float64(time.Millisecond))
|
|
|
|
l.Warning.Printf("[COLLECTION] Completed.\n - OS Report: %v\n - EE Report: %v\n - Droped: %v\n - OS Terminated: %v\n - Forced Termination: %v\n - Processing Time: %v[ms]\n", vmCount, eeCount, dropCount, terminatedCount, len(remotelist), float64(time.Now().UnixNano()-collectionStart)/float64(time.Millisecond))
|
|
|
|
l.Trace.Printf("[COLLECTION] The collection process has been finished.\n")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
// extractPage is the handler function invoked to process each page collected
|
|
// from the server list.
|
|
// Parameters:
|
|
// - page: Pagination.Page reference of the page to be processed.
|
|
// Returns:
|
|
// - ok: a bool to mark the state of the processing.
|
|
// - e: an error reference raised in case of something goes wrong.
|
|
func extractPage(page pagination.Page) (ok bool, e error) {
|
|
|
|
var serverList []servers.Server
|
|
|
|
serverList, e = servers.ExtractServers(page)
|
|
|
|
if e != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
allProjectsLoop:
|
|
for _, s := range serverList {
|
|
|
|
// Filter by project id:
|
|
for _, filter := range cfg.ProjectFilters {
|
|
|
|
if strings.Contains(s.TenantID, filter) && filter != "" {
|
|
|
|
l.Debug.Printf("[COLLECTION] The Project [ %v ] matches filter [ %v ] and won't be further processed.", s.TenantID, filter)
|
|
|
|
continue allProjectsLoop
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Filter by project name:
|
|
for _, filter := range cfg.NameFilters {
|
|
|
|
if strings.Contains(strings.ToLower(s.Name), strings.ToLower(filter)) && filter != "" {
|
|
|
|
l.Debug.Printf("[COLLECTION] The Project [ %v ] matches filter [ %v ] and won't be further processed.", s.Name, filter)
|
|
|
|
continue allProjectsLoop
|
|
|
|
}
|
|
|
|
}
|
|
|
|
vmCount++
|
|
|
|
// "s" will be a servers.Server
|
|
var imageid, flavorid, imagename, imageosflavor, flavorname string
|
|
|
|
for k, val := range s.Image {
|
|
|
|
switch v := val.(type) {
|
|
|
|
case string:
|
|
|
|
if strings.Compare(k, "id") == 0 {
|
|
|
|
imageid = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for k, val := range s.Flavor {
|
|
|
|
switch v := val.(type) {
|
|
|
|
case string:
|
|
|
|
if strings.Compare(k, "id") == 0 {
|
|
|
|
flavorid = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
l.Trace.Printf("%+v, %v", client, imageid)
|
|
|
|
imagename, imageosflavor, e := getFromImageCache(client, imageid)
|
|
|
|
if e != nil {
|
|
|
|
l.Error.Printf("[COLLECTION] Error while getting the image id [ %+v ]. Error: %v\n", imageid, e)
|
|
|
|
}
|
|
|
|
flavorname, e = getFromFlavorCache(client, flavorid)
|
|
|
|
if e != nil {
|
|
|
|
l.Error.Printf("[COLLECTION] Error while getting the flavor id [ %+v ]. Error: %v\n", flavorid, e)
|
|
|
|
}
|
|
|
|
if len(flavorname) == 0 {
|
|
|
|
l.Warning.Printf("[COLLECTION] Found VM - Name:[%s], TenantID:[%s], Status:[%s], ID:[%s], ImageID:[%s], ImageName:[%s], ImageOSFlavor:[%s], FlavorId:[%s], FlavorName:[%s] :: with missing FlavorName, skipping record!",
|
|
s.Name, s.TenantID, s.Status, s.ID, imageid, imagename, imageosflavor, flavorid, flavorname)
|
|
|
|
dropCount++
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
l.Info.Printf("[COLLECTION] Found VM - Name:[%s], TenantID:[%s], Status:[%s], ID:[%s], ImageID:[%s], ImageName:[%s], ImageOSFlavor:[%s], FlavorId:[%s], FlavorName:[%s]",
|
|
s.Name, s.TenantID, s.Status, s.ID, imageid, imagename, imageosflavor, flavorid, flavorname)
|
|
|
|
// Potential problem with these filters are if clients create their
|
|
// VMs with the filter strings those will not be billed.
|
|
// It will be better to actually filter out all resources within a given
|
|
// tenant, so if rally or tempest testrun create resources exclusively
|
|
// belonging to a specific tenant then that tenant can be filtered out.
|
|
// TODO: TBD with SWITCH!
|
|
|
|
// Here comes the transformation of the information retrieved into either
|
|
metadata := make(datamodels.JSONdb)
|
|
metadata["imageid"] = imageid
|
|
metadata["imagename"] = imagename
|
|
metadata["imageosflavor"] = imageosflavor
|
|
metadata["flavorid"] = flavorid
|
|
metadata["flavorname"] = flavorname
|
|
metadata["region"] = cfg.OpenStack.Region
|
|
|
|
// TODO: MAke more generic and customizable via config file
|
|
if value, exists := s.Metadata["schedule_frequency"]; exists && value == "never" {
|
|
|
|
metadata["PlanOverride"] = true
|
|
|
|
}
|
|
|
|
evTime := int64(time.Now().Unix())
|
|
evLast := getStatus(s.Status)
|
|
|
|
if evLast == "terminated" {
|
|
|
|
terminatedCount++
|
|
|
|
}
|
|
|
|
// events reports to be sent.
|
|
event := eeModels.Event{
|
|
Account: s.TenantID,
|
|
EventTime: &evTime,
|
|
LastEvent: &evLast,
|
|
MetaData: metadata,
|
|
Region: cfg.OpenStack.Region,
|
|
ResourceID: s.ID,
|
|
ResourceName: s.Name,
|
|
ResourceType: "server",
|
|
}
|
|
|
|
report(event)
|
|
|
|
//if this object exists in remote list then lets remove it
|
|
for i, object := range remotelist {
|
|
|
|
if strings.Compare(object.Account, s.TenantID) == 0 &&
|
|
strings.Compare(object.ResourceID, s.ID) == 0 &&
|
|
strings.Compare(object.ResourceName, s.Name) == 0 {
|
|
|
|
l.Debug.Printf("[COLLECTION] Event send cleaned from the processing list..\n")
|
|
|
|
remotelist = append(remotelist[:i], remotelist[i+1:]...)
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ok = true
|
|
|
|
return
|
|
|
|
}
|
|
|
|
// getStatus job is to normalize the event state returned by the collectors.
|
|
// Parameters:
|
|
// - state: string returned by the system.
|
|
// Returns:
|
|
// - status: normalized state to be returned.
|
|
func getStatus(state string) (status string) {
|
|
|
|
switch strings.ToUpper(state) {
|
|
|
|
case "ACTIVE":
|
|
|
|
status = "active"
|
|
|
|
case "ATTACHING":
|
|
|
|
status = "active"
|
|
|
|
case "AVAILABLE":
|
|
|
|
status = "active"
|
|
|
|
case "BUILD":
|
|
|
|
status = "inactive"
|
|
|
|
case "CREATING":
|
|
|
|
status = "active"
|
|
|
|
case "DELETED":
|
|
|
|
status = "terminated"
|
|
|
|
case "DELETING":
|
|
|
|
status = "terminated"
|
|
|
|
case "DETACHING":
|
|
|
|
status = "active"
|
|
|
|
case "DOWN":
|
|
|
|
status = "inactive"
|
|
|
|
case "ERROR":
|
|
|
|
status = "error"
|
|
|
|
case "ERROR_DELETING":
|
|
|
|
status = "error"
|
|
|
|
case "EXTENDING":
|
|
|
|
status = "inactive"
|
|
|
|
case "HARD_DELETED":
|
|
|
|
status = "terminated"
|
|
|
|
case "IN-USE":
|
|
|
|
status = "active"
|
|
|
|
case "MAINTENANCE":
|
|
|
|
status = "active"
|
|
|
|
case "PAUSED":
|
|
|
|
status = "inactive"
|
|
|
|
case "RESCUED":
|
|
|
|
status = "active"
|
|
|
|
case "RESIZE":
|
|
|
|
status = "active"
|
|
|
|
case "RESIZED":
|
|
|
|
status = "active"
|
|
|
|
case "RESERVED":
|
|
|
|
status = "active"
|
|
|
|
case "RETYPING":
|
|
|
|
status = "inactive"
|
|
|
|
case "SHELVED_OFFLOADED":
|
|
|
|
status = "terminated"
|
|
|
|
case "SHUTOFF":
|
|
|
|
status = "inactive"
|
|
|
|
case "SOFT_DELETED":
|
|
|
|
status = "terminated"
|
|
|
|
case "STOPPED":
|
|
|
|
status = "inactive"
|
|
|
|
case "SUSPENDED":
|
|
|
|
status = "inactive"
|
|
|
|
case "TERMINATED":
|
|
|
|
status = "terminated"
|
|
|
|
case "VERIFY_RESIZE":
|
|
|
|
status = "active"
|
|
|
|
}
|
|
|
|
l.Trace.Printf("[REPORT] State received from the system [ %v ] normalized to [ %v ]", state, status)
|
|
|
|
return status
|
|
|
|
}
|
|
|