%PDF- %PDF-
Direktori : /opt/alt/python37/lib/python3.7/site-packages/ssa/modules/ |
Current File : //opt/alt/python37/lib/python3.7/site-packages/ssa/modules/processor.py |
# -*- coding: utf-8 -*- # Copyright © Cloud Linux GmbH & Cloud Linux Software, Inc 2010-2021 All Rights Reserved # # Licensed under CLOUD LINUX LICENSE AGREEMENT # http://cloudlinux.com/docs/LICENSE.TXT """ This module contains RequestProcessor class """ import logging import os import shelve import sys import time import traceback from datetime import datetime, timedelta, date from threading import Thread, RLock, current_thread from typing import Callable, Any from .autotracer import AutoTracer from .common import Common from .decision_maker import DecisionMaker from .stat_sender import StatisticsSender from ..internal.constants import storage_file from ..internal.exceptions import SSAError from ..internal.utils import ( singleton, url_split, switch_schedstats ) @singleton class RequestProcessor(Common): """ SSA Request processor implementation. Only one instance is allowed to be created """ def __init__(self): super().__init__() self.logger = logging.getLogger('req_processor') self.logger.info('Processor enabled: %s', __package__) # enable throttling detection kernel mechanism on service start switch_schedstats(enabled=True) self.total_struct, total_struct_date = self.load_stats() if not total_struct_date: self.cleanup() self._lock = RLock() self.decision_maker = DecisionMaker() self.sender = StatisticsSender() self.auto_tracer = AutoTracer() self.start_background_routine() @property def configured_duration(self): """ Return config file value multiplied by 1000000, as we receive duration in microseconds """ return self.requests_duration * 1000000 def send_stats(self, report: dict): """ Call Statistics Sender """ try: self.sender.send(report) except SSAError as e: self.logger.error('StatisticsSender failed: %s', str(e)) def start_background_routine(self) -> None: """ Start dumper|DecisionMaker thread in background """ t = Thread(target=self.background_routine, daemon=True) t.start() self.logger.info('[%s] Routine started', t.name) def background_routine(self) -> None: """ Dumps collected stats to file once an hour. Runs DecisionMaker once a day Cleanup storage after DecisionMaker run """ while True: tick = datetime.now() if tick.minute == 0: self.logger.info('[%s] Routine thread launching dump (%s)', current_thread().name, tick) self.dump_collected_stats_to_file() if tick.hour == 0: self.logger.info( '[%s] Routine thread launching AutoTracer (%s)', current_thread().name, tick) self._safe_exec(self.auto_tracer) self.logger.info( '[%s] Routine thread launching DecisionMaker (%s)', current_thread().name, tick) report = self._safe_exec(self.decision_maker) self.cleanup() self._safe_exec(self.send_stats, report) # attempt to enable throttling detection kernel mechanism # in case it was accidentally switched off switch_schedstats(enabled=True) self._simple_sleep(60) else: self._sleep_till_next_hour(tick.minute) def _safe_exec(self, action: Callable, *args) -> Any: """Call requested Callable with given args and capture any exception""" try: return action(*args) except Exception: et, ev, _ = sys.exc_info() self.logger.critical('%s failed with exception %s, %s', str(action), et, ev, extra={'orig_traceback': traceback.format_exc()}) def _simple_sleep(self, to_sleep: int = 15 * 60): """ Log and sleep given number of seconds or 15 minutes by default """ self.logger.info('[%s] Routine thread sleeping for (%s)', current_thread().name, to_sleep) time.sleep(to_sleep) def _sleep_till_next_hour(self, start_minute): """ Sleep the number of minutes remaining till next hour """ sleep_for = (timedelta(hours=1) - timedelta( minutes=start_minute)).total_seconds() self._simple_sleep(int(sleep_for)) def dump_collected_stats_to_file(self) -> dict: """ Dump collected stats to file """ with self._lock: self.logger.debug('[%s] Acquires lock to dump stats', current_thread().name) self.add_current_date() try: with shelve.open(storage_file) as db: for item, value in self.total_struct.items(): db[item] = value dump_result = {k: v for k, v in db.items()} except OSError as e: self.logger.error( 'Failed to dump data', extra={'err': str(e)}) dump_result = dict() self.logger.debug('[%s] Released lock to dump stats', current_thread().name) return dump_result @staticmethod def get_interval_for(timestamp: int) -> int: """ Takes an hour of a day, to which the given timestamp belongs """ return datetime.fromtimestamp(timestamp).hour def add_current_date(self) -> None: """ Adds current date to the 'total struct' dict before dumping to a file """ self.total_struct['current_date'] = date.today() def add_domain(self, name: str, is_wp: bool) -> None: """ Add new domain sub-struct if it is not already present """ if name not in self.total_struct: self.logger.debug('[%s] New domain received: %s', current_thread().name, name) self.total_struct[name] = dict( domain_total_reqs=list([0] * 24), is_a_wordpress_domain=is_wp ) def add_url(self, domain: str, url: str) -> None: """ Add new URL sub-struct if it is not already present """ if url not in self.total_struct[domain]: self.logger.debug('[%s] New URL received: %s', current_thread().name, url) self.total_struct[domain][url] = dict( url_total_reqs=list([0] * 24), url_slow_reqs=list([0] * 24), url_throttled_reqs=list([0] * 24), durations=list() ) def update_data(self, *, domain: str, url: str, timestamp: int, duration: float, throttled: bool) -> None: """ Update request counters for given domain and url, save request duration """ interval = self.get_interval_for(timestamp) self.total_struct[domain]['domain_total_reqs'][interval] += 1 self.total_struct[domain][url]['url_total_reqs'][interval] += 1 if duration > self.configured_duration: self.total_struct[domain][url]['url_slow_reqs'][interval] += 1 if throttled: if 'url_throttled_reqs' not in self.total_struct[domain][url]: # `url_throttled_reqs` was added in 0.2-1, and this field # could be missing in existing stats on real servers, so we # need to check for its existence and add it if it is missing self.total_struct[domain][url]['url_throttled_reqs'] = list( [0] * 24) self.total_struct[domain][url]['url_throttled_reqs'][interval] += 1 self.total_struct[domain][url]['durations'].append(duration) self.logger.info('[%s] Request to %s processed', current_thread().name, url) self.logger.debug('[%s] %s', current_thread().name, self.total_struct) def handle(self, data: dict) -> None: """ Process given request data """ url = data.get('url') if self.is_ignored(url): self.logger.debug('%s ignored', url) return domain, uri = url_split(url) with self._lock: self.logger.debug('[%s] Acquires lock to handle request counters', current_thread().name) self.add_domain(domain, is_wp=bool(data.get('wordpress'))) self.add_url(domain, url) self.update_data(domain=domain, url=url, timestamp=int(data.get('timestamp')), duration=float(data.get('duration')), throttled=bool(data.get('hitting_limits'))) self.logger.debug('[%s] Released lock to handle request counters', current_thread().name) def cleanup(self): """ Cleanup storage and total_struct """ self.logger.info('RequestProcessor cleanup...') try: os.unlink(storage_file) except OSError: self.logger.info('Already no storage file') self.total_struct.clear()