Source code for components.connectors

import logging
import asyncio
import requests
import aiohttp
import browser_cookie3
import webbrowser

from typing import Protocol, runtime_checkable
from queue import Queue
from datetime import datetime
from tqdm.asyncio import tqdm
from time import sleep

from .containers import ReportProtocol


logger_main = logging.getLogger(__name__)


@runtime_checkable
class Connector(Protocol):
    """Protocol class for connector object.

    :param queue: Shared queue object.
    :type queue: Queue
    :param timeout: Request's timeout value in seconds.
    :type timeout: int
    :param headers: Headers required to establish the connection.
    :type headers: dict[str, str]
    """

    queue: Queue
    timeout: int
    headers: dict[str, str]

    def check_connection(self) -> bool:
        """Checks connection with given domain.

        :return: Flag, True if connection is established, False otherwise.
        :rtype: bool
        """
        ...

    async def report_gathering(self, reports: list[ReportProtocol], session: aiohttp.ClientSession) -> None:
        """Collects asynchronous responses from the servers.

        :param reports: Collection of ReportProtocol objects.
        :type reports: list[ReportProt]
        :param session: HTTP client session object to handle request in transaction.
        :type session: aiohttp.ClientSession
        """
        ...


[docs]class SfdcConnector(): """Concrete class representing Connector object for SFDC :param queue: Shared queue object. :type queue: Queue :param verbose: CLI parameter used as switch between progress bar and logging to stdout on INFO level. Defaults to False. :type timeout: int :param timeout: Request's timeout value in seconds. Defaults to 900. :type timeout: int :param headers: Headers required to establish the connection. Defaults to {'Content-Type': 'application/csv', 'X-PrettyPrint': '1'}. :type headers: dict[str, str] :param export_params: Default parameters required by SFDC. Defaults to '?export=csv&enc=UTF-8&isdtp=p1'. :type export_params: str """ def __init__(self, queue: Queue, *, domain: str, verbose: bool = False, timeout: int = 900, headers: dict[str, str] = {'Content-Type': 'application/csv', 'X-PrettyPrint': '1'}): """Constructor method for SfdcConnector, automatically checks connection after initialization. :param queue: Shared, thread-safe queue. :type queue: Queue :param verbose: Flag, if True switches to verbose mode and changes loglevel for stdout handler to INFO, if Fales shows progress bar. Defaults to False. :type verbose: bool :param timeout: Response timeout in seconds. Defaults to 900. :type timeout: int :param headers: Headers for the request. Defaults to {'Content-Type': 'application/csv', 'X-PrettyPrint': '1'}. :type headers: dict[str, str] """ self.queue: Queue = queue self.verbose: bool = verbose self.domain: str = domain self.timeout: int = timeout self.headers: dict[str, str] = headers self.check:bool = False self.sid: str = "" self.sid_valid: bool = False self.edge_path: str = '"C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe" --profile-directory=Default %s' # self.check_connection() def _convert_domain_for_cookies_lookup(self) -> str: """Converts domain as key in cookier for sid lookup. :return: Converted url complaiant with cookies keys. :rtype: str """ logger_main.debug("Parsing domain key for cookies") return self.domain.replace('https://', '').replace('/', '') def _parse_headers(self) -> None: """Parses headers for request. """ logger_main.debug("Parsing headers for SFDC request check") self.headers['Authorization'] = ''.join( filter(None, ['Bearer ', self.sid])) return None def _intercept_sid(self) -> str: """Intercepts sid from MS Edge's CookieJar. :return: Intercepted `sid` or empty string if `sid` doesn't exist. :rtype: str """ logger_main.info('SID interception started') try: logger_main.debug("Trying to access MS Edge's CookieJar") cookie_jar = browser_cookie3.edge() logger_main.debug("Retrieving SID entry from CookieJar") sid = [cookie.value for cookie in cookie_jar if cookie.name == 'sid' and cookie.domain == self._convert_domain_for_cookies_lookup()] return sid[0] or "" except: logger_main.debug("SID entry not there") return "" def _open_sfdc_site(self) -> None: """Opens SFDC website on given domain url if `sid` """ logger_main.warning( 'SID not found! -> Login to SFDC -> SalesForce webpage will open shortly') sleep(2) logger_main.debug('Openning SFDC webside to log in to SalesForce') webbrowser.get(self.edge_path).open(self.domain) logger_main.debug( "Starting 30 sec sleep to let user log in to SalesForce") sleep(30) while not self.sid: self.sid = self._intercept_sid() logger_main.info('intercepting SID! Hold on tight!') sleep(2) return None def _sid_check(self) -> bool: """Checks SID valididty for given SFDC domain. :return: Flag, True if SID was valid, False when wasn't. :rtype: bool """ response = requests.get(self.domain, cookies={'sid': self.sid}, allow_redirects=True) if response.headers['Cache-Control'] == 'private': logger_main.info('SID ok!') self.sid_valid = True self.check = True return True else: logger_main.critical('SID not ok!!!') self.sid = "" self.sid_valid = False self.check = False return False
[docs] def check_connection(self) -> bool: """Checks the connection with given domain. :return: Flag, True if connection was successful, False wasn't. :rtype: bool """ self._parse_headers() while not self.check: self.sid = self._intercept_sid() logger_main.info("SID checking in progress ...") if not self.sid: self._open_sfdc_site() logger_main.info('SID found!') logger_main.debug("Checking SID validity") if not self.sid_valid: self._sid_check() return True
def _parse_report_url(self, report: ReportProtocol) -> str: """Parses report object url. :param report: Instance of `ReportProtocol`. :type report: ReportProtocol :return: Parsed url. :rtype: str """ return self.domain + report.id + report.export_params async def _request_report(self, report: ReportProtocol, session: aiohttp.ClientSession) -> None: """Sends asynchronous request to given domain with given parameters within shared session. Checks response status: - 200: response is saved in `ReportProtocol.response`, `ReportProtocol.valid` set to True, ReportProtocol is being put to the `queue`. - 404: error in response, `ReportProtocol.valid` set to False, no retries. - 500: request timeour, `ReportProtocol.valid` set to False, another attempt. - *: unknown error, `ReportProtocol.valid` set to False, another attempt. :param report: Instance of `ReportProtocol`. :type report: ReportProtocol :param session: Shared session object. :type session: aiohttp.ClientSession """ report.created_date = datetime.now() report_url = self._parse_report_url(report) logger_main.info("%s -> Sending request", report.name) logger_main.debug( "Sending asynchronous report request with params: %s, %s", report_url, self.headers) while not report.valid and report.attempt_count < 20: async with session.get(report_url, headers=self.headers, cookies={'sid': str(self.sid)}, timeout=self.timeout, allow_redirects=True) as r: report.attempt_count += 1 if r.status == 200: logger_main.info( "%s -> Request successful, retrieving content", report.name) try: report.response = await r.text() report.valid = True logger_main.debug( "Sending the content to the queue for processing, %s elements in the queue before transfer", self.queue.qsize()) self.queue.put(report) logger_main.debug( '%s succesfuly downloaded and put to the queue', report.name) except aiohttp.ClientPayloadError as e: logger_main.warning( '%s is invalid, Unexpected end of stream, SFDC just broke the connection -> %s', report.name, e) continue elif r.status == 404: logger_main.error( "%s is invalid, Report does not exist - check ID, SFDC respond with status %s - %s", report.name, r.status, r.reason) report.valid = False break elif r.status == 500: logger_main.warning( "%s is invalid, Timeout, SFDC respond with status %s - %s", report.name, r.status, r.reason) report.valid = False else: logger_main.warning( "%s is invalid, Unknown Error, SFDC respond with status %s - %s", report.name, r.status, r.reason) report.valid = False return None async def _toggle_progress_bar(self, tasks: list[asyncio.Task]) -> None: """Toggles between showing progress bar and logging on INFO level. :param tasks: Collection of asynchronous request tasks. :type tasks: list[asyncio.Task] """ print("") if not self.verbose: _ = [await task_ for task_ in tqdm.as_completed(tasks, total=len(tasks))] def _create_async_tasks(self, reports: list[ReportProtocol], session: aiohttp.ClientSession) -> list[asyncio.Task]: """Creates collection of asynchronous request tasks. :param reports: Collection of `ReportsProtocol' instances. :type reports: list[ReportProtocol] :param session: Shared, asynchronous session. :type session: aiohttp.ClientSession :return: Collection of asynchronous request tasks. :rtype: list[asyncio.Task] """ logger_main.debug("Creating tasks for asynchronous processing") return [asyncio.create_task(self._request_report(report, session)) for report in reports] async def _report_request_all(self, reports: list[ReportProtocol], session: aiohttp.ClientSession) -> None: """Orchestrates entire process of processing tasks. :param reports: Collection of `ReportProtocol` instances. :type reports: list[ReportProtocol] :param session: Shared asyncio session. :type session: aiohttp.ClientSession """ tasks = self._create_async_tasks(reports, session) await self._toggle_progress_bar(tasks) await asyncio.gather(*tasks) return None
[docs] async def handle_requests(self, reports: list[ReportProtocol]) -> None: """Creates session and process asynchronous tasks. :param reports: Collection of `ReportProtocol` instances. :type reports: list[ReportProtocol] """ logger_main.debug("Awaiting responses") timeout = aiohttp.ClientTimeout(total=self.timeout) async with aiohttp.ClientSession(timeout=timeout) as session: await self._report_request_all(reports, session) return None
if __name__ == '__main__': pass