Current File : //opt/cloudlinux/venv/lib64/python3.11/site-packages/cl_plus/collectors/node_exporter.py
# coding=utf-8
#
# Copyright © Cloud Linux GmbH & Cloud Linux Software, Inc 2010-2020 All Rights Reserved
#
# Licensed under CLOUD LINUX LICENSE AGREEMENT
# http://cloudlinux.com/docs/LICENCE.TXT
#

import os
import platform
import re
from requests import PreparedRequest, RequestException
from prometheus_client.parser import text_string_to_metric_families
from typing import Dict, AnyStr, List
from itertools import groupby

from .collector_base import CollectorBase
from cl_plus.utils.unix_socket import UnixHTTPAdapter
from clcommon.utils import get_virt_type
from cl_plus.consts import METRIC_NAME, METRIC_LABELS, METRIC_VALUE


class NodeExporterCollector(CollectorBase):
    def __init__(self, _logger):
        super(NodeExporterCollector, self).__init__(_logger)
        self._required_metrics = ['node_filesystem_avail_bytes',
                                  'node_filesystem_size_bytes',
                                  'node_cpu_seconds_total',
                                  'node_filesystem_free_bytes',
                                  'node_memory_MemTotal_bytes',
                                  'node_memory_MemAvailable_bytes',
                                  'node_boot_time_seconds',
                                  'node_context_switches_total',
                                  'node_disk_io_time_seconds_total',
                                  'node_disk_read_bytes_total',
                                  'node_disk_read_time_seconds_total',
                                  'node_disk_reads_completed_total',
                                  'node_disk_write_time_seconds_total',
                                  'node_disk_writes_completed_total',
                                  'node_disk_written_bytes_total',
                                  'node_network_receive_bytes_total',
                                  'node_network_transmit_bytes_total',
                                  'node_filefd_allocated',
                                  'node_load1',
                                  'node_load15',
                                  'node_load5',
                                  'node_hwmon_temp_celsius']
        self._is_ne_error = None
        self._socket_file = '/var/run/cl_node_exporter.socket'
        self._metrics_url = 'http://localhost/metrics'
        self.virt_type = get_virt_type()

    def init(self):
        """
        Initialize Node Exporter collector
        :return: None
        """
        self._aggregated_data = {}
        self._logger.info("Node Exporter collector init")

    def _get_data_from_socket(self):

        request = PreparedRequest()
        request.prepare(
            method='GET',
            url=self._metrics_url,
        )
        unix_http_adapter = UnixHTTPAdapter(self._socket_file)
        response = unix_http_adapter.send(request)
        try:
            response.raise_for_status()
        finally:
            unix_http_adapter.close()

        return response.text

    @staticmethod
    def _is_metric_value_exclude(labels):
        """
        Determines is metric value with supposed labels need exclude
        :param labels: labels for mount dictionary to check. Example:
            {'device': '/dev/sda1', 'fstype': 'xfs', 'mountpoint': '/usr/share/cagefs-skeleton'}
        :return: True/False - Exclude/not exclude metric value from collector output
        """
        if 'mountpoint' not in labels:
            return False
        # Check for special case if CageFs skeleton path (or it part) is symlink to some another dir
        path_to_check = os.path.realpath('/usr/share/cagefs-skeleton')
        if labels['mountpoint'].startswith(path_to_check) or '/virtfs/' in labels['mountpoint']:
            return True
        return False

    @staticmethod
    def _calculate_available_mem(metrics):
        """
        Calculates available mem metric
        according to article:
        https://access.redhat.com/solutions/406773
        """
        return metrics['node_memory_MemFree_bytes'] + \
            metrics['node_memory_SReclaimable_bytes'] + \
            metrics['node_memory_Buffers_bytes'] + \
            metrics['node_memory_Cached_bytes']

    def _prepare_node_disk_entries(self, raw_ne_output: AnyStr) -> AnyStr:
        """
        Different virtualization types display all devices
        in the /proc/diskstats with different titles
        Node exporter provides default filtering, but it doesn't work
        with non-kvm virtual machines (at least, XEN)

        Example on kvm /proc/diskstats output:
            vda ...
            vda1 ...
            sr0 ...
            loop0 ...

        And default NE behaviour will leave only vda and sr0 devices

        Example on XEN:
            xvda1 ...

        And default NE behaviour will ignore this entry

        So this method checks virt type and leaves only suitable
        devices (depending on vm type)

        :param raw_ne_output: node exporter output including all devices
        for node_disk metrics
        :return: output with skipped lines
        """
        # Set default regexp value (just like NE)
        # All lines with node_disk metrics and devices to filter
        # Example of raw output with kvm virtualization:
        # # HELP node_disk_io_now The number of I/Os currently in progress.
        # # TYPE node_disk_io_now gauge
        # node_disk_io_now{device="loop0"} 0
        # node_disk_io_now{device="sr0"} 0
        # node_disk_io_now{device="vda"} 0
        # node_disk_io_now{device="vda1"} 0
        # After the regexp substring:
        # # HELP node_disk_io_now The number of I/Os currently in progress.
        # # TYPE node_disk_io_now gauge
        # node_disk_io_now{device="sr0"} 0
        # node_disk_io_now{device="vda"} 0
        reg = "node_disk.*(ram|loop|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d.*\n"
        if self.virt_type == "xen":
            # Regex, that will include `xvd*{num}` and `vd*{num}` devices
            reg = "node_disk.*(ram|loop|fd|(h|s)d[a-z]|nvme\\d+n\\d+p)\\d.*\n"
        return re.sub(reg, "", raw_ne_output)

    @staticmethod
    def filter_samples_for_xen(family) -> List:
        """
        Checks for presence of devices without numbers in a title and
        ignores all devices with numbers (if any)
        If there are only numbered devices - doesn't filter anything

        Example of received node_disk metric family:
        Name: node_disk_* Labels: {'device': 'xvda'} Value: 811395.84
        Name: node_disk_* Labels: {'device': 'xvda1'} Value: 1114.75
        Name: node_disk_* Labels: {'device': 'xvda2'} Value: 0.0
        Name: node_disk_* Labels: {'device': 'xvdb1'} Value: 813760.16
        Name: node_disk_* Labels: {'device': 'xvdb2'} Value: 872.06
        Name: node_disk_* Labels: {'device': 'xvdb3'} Value: 0.0
        Expected result of filtering:
        `xvda` group (since we have sample with device without num):
        Name: node_disk_* Labels: {'device': 'xvda'} Value: 811395.84
        `xvdb` group (since all samples devices are with num):
        Name: node_disk_* Labels: {'device': 'xvdb1'} Value: 813760.16
        Name: node_disk_* Labels: {'device': 'xvdb2'} Value: 872.06
        Name: node_disk_* Labels: {'device': 'xvdb3'} Value: 0.0

        :param family: parsed Metric object with all samples
        :return: list of needed samples
        """
        filtered_family_samples = []
        # Grouping samples of metric by device base
        # (device value without numbers)
        for device_base, group in groupby(
                family.samples, lambda x: re.sub("\\d+", "", x[1]["device"])):
            group = list(group)
            for sample in group:
                # If devices have xvd* title without numbers (`xvda`)
                if sample[METRIC_LABELS]["device"] == device_base:
                    # Leave only this sample from device group
                    filtered_family_samples.append(sample)
                    break
            else:
                # There is no device without a number in title
                # Add info about all devices in the group
                filtered_family_samples.extend(group)
        return filtered_family_samples

    def _get_data_from_ne(self):
        """
        Retrieve new data from Node Exporter
        :return: Dict with NE metrics
        """
        # Get data from NE
        try:
            node_exporter_output = self._get_data_from_socket()
        except RequestException as err:
            if not self._is_ne_error:
                self._logger.warn("[Node exporter collector] socket get_data error: %s", err)
            self._is_ne_error = True
            return None
        self._is_ne_error = False
        try:
            node_exporter_metrics = {}

            # metrics to calculate availableMEM for cl6
            memory_metrics = ['node_memory_MemFree_bytes', 'node_memory_SReclaimable_bytes',
                              'node_memory_Cached_bytes', 'node_memory_Buffers_bytes']
            memory_metrics_values = {}
            # Filter diskstats entries
            node_exporter_output = self._prepare_node_disk_entries(
                node_exporter_output)
            for family in text_string_to_metric_families(node_exporter_output):
                # Exclude all unneeded samples for node_disk and XEN VMs
                if family.name.startswith("node_disk") and self.virt_type == "xen":
                    family.samples = self.filter_samples_for_xen(family)
                for sample in family.samples:
                    # sample example
                    # Name: node_cpu_seconds_total Labels: {'cpu': '0', 'mode': 'idle'} Value: 811395.84
                    # Name: node_cpu_seconds_total Labels: {'cpu': '0', 'mode': 'iowait'} Value: 1114.75
                    # Name: node_cpu_seconds_total Labels: {'cpu': '0', 'mode': 'irq'} Value: 0.0
                    # Name: node_cpu_seconds_total Labels: {'cpu': '1', 'mode': 'idle'} Value: 813760.16
                    # Name: node_cpu_seconds_total Labels: {'cpu': '1', 'mode': 'iowait'} Value: 872.06
                    # Name: node_cpu_seconds_total Labels: {'cpu': '1', 'mode': 'irq'} Value: 0.0
                    name = sample[METRIC_NAME]            # metric name
                    labels = sample[METRIC_LABELS]        # labels dict
                    value = sample[METRIC_VALUE]          # metric value

                    if name in memory_metrics:
                        memory_metrics_values[name] = value

                    if name not in self._required_metrics:
                        continue
                    # Skip unneeded mounts
                    if self._is_metric_value_exclude(labels):
                        continue
                    if name not in node_exporter_metrics:
                        node_exporter_metrics[name] = []
                    metric_data = {"value": value}
                    if len(labels) != 0:
                        metric_data.update({"labels": labels})
                    node_exporter_metrics[name].append(metric_data)
            self._is_ne_error = False

            if 'el6' in platform.release() and \
                    node_exporter_metrics.get('node_memory_MemAvailable_bytes') is None:
                available_mem = self._calculate_available_mem(memory_metrics_values)
                node_exporter_metrics['node_memory_MemAvailable_bytes'] = [{'value': available_mem}]

            return node_exporter_metrics
        except Exception:
            if not self._is_ne_error:
                import traceback
                var = traceback.format_exc()
                self._logger.warn("[Node exporter collector] generic get_data error: %s" % var)
                self._is_ne_error = True
        return None

    def _collect_new_data(self, new_ne_data_dict: Dict):
        """
        Add new NE data
        :param new_ne_data_dict: New data from NE

        income node exporter dict:
        {
            'metric_name': [
                {'value': 1}
            ],
            'metric_name2': [
                {'value': 1, 'labels': {...}},
                {'value': 1, 'labels': {...}}
            ]
        }

        aggregated data:
        {
            'metric_name': [
                {'value': [1, 2, 3]}
            ],
            'metric_name2': [
                {'value': [1, 2, 3], 'labels': {...}},
                {'value': [1, 3, 5], 'labels': {...}}
            ]
        }
        """
        for metric_name, metric_values_list in new_ne_data_dict.items():
            for metric_values_dict in metric_values_list:
                if metric_name not in self._aggregated_data:
                    self._aggregated_data[metric_name] = []

                if 'labels' not in metric_values_dict:
                    if self._aggregated_data[metric_name]:
                        self._aggregated_data[metric_name][0]['value'].append(metric_values_dict['value'])
                    else:
                        self._aggregated_data[metric_name].append({
                            'value': [metric_values_dict['value']]
                        })
                else:
                    for aggregated_item_dict in self._aggregated_data[metric_name]:
                        if metric_values_dict['labels'] == aggregated_item_dict['labels']:
                            aggregated_item_dict['value'].append(metric_values_dict['value'])
                            break
                    else:
                        self._aggregated_data[metric_name].append({
                            'value': [metric_values_dict['value']],
                            'labels': metric_values_dict['labels']
                        })

    def aggregate_new_data(self):
        """
        Retrieve and aggregate new data
        :return None
        """
        json_dict = self._get_data_from_ne()
        # Exit if no data
        if not json_dict:
            return
        # New data present - aggregate
        self._collect_new_data(json_dict)

    def get_averages(self):
        """
        Get collector's averages data
        :return: dict
            {
             "node_exporter":
              {
                "go_gc_duration_seconds": [ { "labels":  {"quantile": "0"} , "value": "0" },
                                            { "labels":  {"quantile": "0.25"} , "value": "0" },
                                            { "labels":  {"quantile": "0.5"} , "value": "0" },
                                            { "labels":  {"quantile": "0.75"} , "value": "0" },
                                            { "labels":  {"quantile": "1"} , "value": "0" }
                                          ],
                "go_gc_duration_seconds_sum": [ { "value": "0" } ]
              }
            }
            or None if can't get data
        """
        if not self._aggregated_data:
            return None

        # calculate average for each metric
        for _, metric_values_list in self._aggregated_data.items():
            for metric_data in metric_values_list:
                # {'metric_name': [{'value': [1,2,3,...], 'labels': {}}, ...], ...} ->
                # {'metric_name': [{'value': average([1,2,3,...]), 'labels': {}}, ...], ...}
                metric_data['value'] = sum(metric_data['value']) // len(metric_data['value'])
        ret_dict = self._aggregated_data.copy()
        # Prepare to next collecting cycle - reset all needed variables
        self._aggregated_data = {}
        return {"node_exporter": ret_dict}