diff --git a/brotab/api.py b/brotab/api.py index 4f6d471..a94bdc9 100644 --- a/brotab/api.py +++ b/brotab/api.py @@ -1,29 +1,30 @@ import io +import json +import logging +import socket import sys import time -import socket -import logging -import json -from traceback import print_exc -from urllib.error import URLError, HTTPError -from urllib.request import Request, urlopen -from urllib.parse import quote_plus -from functools import partial from collections.abc import Mapping - +from functools import partial +from http.client import RemoteDisconnected +from traceback import print_exc from typing import List +from urllib.error import HTTPError +from urllib.error import URLError +from urllib.parse import quote_plus +from urllib.request import Request +from urllib.request import urlopen -from brotab.inout import edit_tabs_in_editor from brotab.inout import MultiPartForm -from brotab.parallel import call_parallel +from brotab.inout import edit_tabs_in_editor from brotab.operations import infer_delete_and_move_commands +from brotab.parallel import call_parallel from brotab.tab import parse_tab_lines from brotab.utils import encode_query - logger = logging.getLogger('brotab') -HTTP_TIMEOUT = 10.0 # 2 # 10.0 +HTTP_TIMEOUT = 10.0 MAX_NUMBER_OF_TABS = 5000 @@ -35,6 +36,7 @@ class SingleMediatorAPI(object): """ This API is designed to work with a single mediator. """ + def __init__(self, prefix, host='localhost', port=4625, startup_timeout=None): self._prefix = '%s.' % prefix self._host = host @@ -53,6 +55,10 @@ def wait_for_startup(self, timeout_msec: int) -> bool: time.sleep(0.050) return False + @property + def browser(self) -> str: + return self._browser + @property def ready(self): return self._browser != '' @@ -68,8 +74,8 @@ def prefix_tabs(self, tabs): return list(map(self.prefix_tab, tabs)) def unprefix_tabs(self, tabs): - N = len(self._prefix) - return [tab[N:] + num = len(self._prefix) + return [tab[num:] if tab.startswith(self._prefix) else tab for tab in tabs] @@ -86,7 +92,7 @@ def _get_pid(self): """Get process ID from the mediator.""" try: return int(self._get('/get_pid')) - except (URLError, HTTPError, socket.timeout) as e: + except (URLError, HTTPError, socket.timeout, RemoteDisconnected) as e: logger.info('_get_pid failed: %s', e) return -1 @@ -94,13 +100,13 @@ def _get_browser(self): """Get browser name from the mediator.""" try: return self._get('/get_browser') - except (URLError, HTTPError, socket.timeout) as e: + except (URLError, HTTPError, socket.timeout, RemoteDisconnected) as e: logger.info('_get_browser failed: %s', e) return '' def close_tabs(self, args): tabs = ','.join(tab_id for _prefix, _window_id, - tab_id in self._split_tabs(args)) + tab_id in self._split_tabs(args)) return self._get('/close_tabs/%s' % tabs) def activate_tab(self, args: List[str], focused: bool): @@ -108,7 +114,7 @@ def activate_tab(self, args: List[str], focused: bool): return # args: ['a.1.2'] - prefix, window_id, tab_id = args[0].split('.') + _prefix, _window_id, tab_id = args[0].split('.') self._get('/activate_tab/%s%s' % (tab_id, '?focused=1' if focused else '')) def get_active_tabs(self, args) -> List[str]: @@ -152,9 +158,6 @@ def list_tabs(self, args): result = self._get('/list_tabs') lines = [] for line in result.splitlines()[:num_tabs]: - # for line in result.split('\n')[:num_tabs]: - # line = '%s%s' % (self._prefix, line) - # print(line) lines.append(line) return self.prefix_tabs(lines) @@ -203,12 +206,12 @@ def get_words(self, tab_ids, match_regex, join_with): 'SingleMediatorAPI: get_words: %s, match_regex: %s, join_with: %s', tab_id, match_regex, join_with) words |= set(self._get( - '/get_words/%s/?match_regex=%s&join_with=%s' % (tab_id, match_regex, join_with) + '/get_words/%s?match_regex=%s&join_with=%s' % (tab_id, match_regex, join_with) ).splitlines()) if not tab_ids: words = set(self._get( - '/get_words/?match_regex=%s&join_with=%s' % (match_regex, join_with) + '/get_words?match_regex=%s&join_with=%s' % (match_regex, join_with) ).splitlines()) return sorted(list(words)) @@ -219,7 +222,7 @@ def get_text_or_html(self, command, args, delimiter_regex, replace_with): num_tabs = int(args[0]) result = self._get( - '/%s/?delimiter_regex=%s&replace_with=%s' % ( + '/%s?delimiter_regex=%s&replace_with=%s' % ( command, encode_query(delimiter_regex), encode_query(replace_with), @@ -279,10 +282,6 @@ def ready_apis(self): return [api for api in self._apis if api.ready] def close_tabs(self, args): - # if len(args) == 0: - # print('Usage: brotab_client.py close_tabs <#tab ...>') - # return 2 - for api in self._apis: api.close_tabs(args) @@ -373,7 +372,7 @@ def get_words(self, tab_ids, match_regex, join_with): start = time.time() words |= set(api.get_words(tab_ids, match_regex, join_with)) delta = time.time() - start - #print('DELTA', delta, file=sys.stderr) + # print('DELTA', delta, file=sys.stderr) return sorted(list(words)) def _get_text_or_html(self, api, getter, args, delimiter_regex, replace_with): @@ -407,4 +406,3 @@ def get_html(self, args, delimiter_regex, replace_with): tabs.extend(self._get_text_or_html(api, api.get_html, args, delimiter_regex, replace_with)) return tabs - diff --git a/brotab/inout.py b/brotab/inout.py index 8317062..8f91781 100644 --- a/brotab/inout.py +++ b/brotab/inout.py @@ -1,13 +1,14 @@ import io +import mimetypes import os -import sys -import uuid import select import socket +import sys import tempfile -import mimetypes +import uuid +from subprocess import CalledProcessError +from subprocess import check_call from tempfile import NamedTemporaryFile -from subprocess import check_call, CalledProcessError from typing import Iterable from brotab.platform import get_editor @@ -35,11 +36,11 @@ def get_mediator_ports() -> Iterable: return range(MIN_MEDIATOR_PORT, MAX_MEDIATOR_PORT) -def get_free_tcp_port(start=1025, end=65536, host='127.0.0.1'): +def get_available_tcp_port(start=1025, end=65536, host='127.0.0.1'): for port in range(start, end): if not is_port_accepting_connections(port, host): return port - return RuntimeError('Cannot find free port in range %d:%d' % (start, end)) + return RuntimeError('Cannot find available port in range %d:%d' % (start, end)) def is_port_accepting_connections(port, host='127.0.0.1'): @@ -82,6 +83,7 @@ def edit_tabs_in_editor(tabs_before): except CalledProcessError: return None + def read_stdin(): if select.select([sys.stdin, ], [], [], 1.0)[0]: return sys.stdin.read() @@ -118,8 +120,8 @@ def add_file(self, fieldname, filename, fileHandle, body = fileHandle.read() if mimetype is None: mimetype = ( - mimetypes.guess_type(filename)[0] or - 'application/octet-stream' + mimetypes.guess_type(filename)[0] or + 'application/octet-stream' ) self.files.append((fieldname, filename, mimetype, body)) return @@ -133,7 +135,7 @@ def _form_data(name): def _attached_file(name, filename): return ('Content-Disposition: file; ' 'name="{}"; filename="{}"\r\n').format( - name, filename).encode('utf-8') + name, filename).encode('utf-8') @staticmethod def _content_type(ct): diff --git a/brotab/main.py b/brotab/main.py index bde430e..4c95874 100644 --- a/brotab/main.py +++ b/brotab/main.py @@ -49,47 +49,42 @@ import os import re +import shutil import sys import time -import shutil -import logging -from string import ascii_lowercase from argparse import ArgumentParser from functools import partial from itertools import groupby -from urllib.parse import quote_plus, quote - -from typing import Tuple, List - -from brotab.inout import is_port_accepting_connections -from brotab.inout import read_stdin +from string import ascii_lowercase +from typing import List +from typing import Tuple +from urllib.parse import quote_plus + +from brotab.api import MultipleMediatorsAPI +from brotab.api import SingleMediatorAPI +from brotab.const import DEFAULT_GET_HTML_DELIMITER_REGEX +from brotab.const import DEFAULT_GET_HTML_REPLACE_WITH +from brotab.const import DEFAULT_GET_TEXT_DELIMITER_REGEX +from brotab.const import DEFAULT_GET_TEXT_REPLACE_WITH +from brotab.const import DEFAULT_GET_WORDS_JOIN_WITH +from brotab.const import DEFAULT_GET_WORDS_MATCH_REGEX from brotab.inout import get_mediator_ports from brotab.inout import in_temp_dir +from brotab.inout import is_port_accepting_connections +from brotab.inout import read_stdin from brotab.inout import stdout_buffer_write +from brotab.mediator.log import init_brotab_logger from brotab.platform import is_windows +from brotab.platform import make_windows_path_double_sep +from brotab.platform import register_native_manifest_windows_brave from brotab.platform import register_native_manifest_windows_chrome from brotab.platform import register_native_manifest_windows_firefox -from brotab.platform import register_native_manifest_windows_brave -from brotab.platform import make_windows_path_double_sep -from brotab.utils import split_tab_ids, get_file_size, encode_query -from brotab.search.query import query from brotab.search.index import index -from brotab.api import SingleMediatorAPI, MultipleMediatorsAPI -from brotab.const import \ - DEFAULT_GET_WORDS_MATCH_REGEX, \ - DEFAULT_GET_WORDS_JOIN_WITH, \ - DEFAULT_GET_TEXT_DELIMITER_REGEX, \ - DEFAULT_GET_TEXT_REPLACE_WITH, \ - DEFAULT_GET_HTML_DELIMITER_REGEX, \ - DEFAULT_GET_HTML_REPLACE_WITH - - -FORMAT = '%(asctime)-15s %(levelname)-10s %(message)s' -logging.basicConfig( - format=FORMAT, - filename=in_temp_dir('brotab.log'), - level=logging.DEBUG) -logger = logging.getLogger('brotab') +from brotab.search.query import query +from brotab.utils import get_file_size +from brotab.utils import split_tab_ids + +logger = init_brotab_logger('brotab') logger.info('Logger has been created') @@ -193,6 +188,7 @@ def query_tabs(args): for tab in api.query_tabs(queryInfo): print(tab) + def index_tabs(args): if args.tsv is None: args.tsv = in_temp_dir('tabs.tsv') @@ -247,7 +243,7 @@ def get_words(args): words = api.get_words(args.tab_ids, args.match_regex, args.join_with) print('\n'.join(words)) delta = time.time() - start - #print('DELTA TOTAL', delta, file=sys.stderr) + # print('DELTA TOTAL', delta, file=sys.stderr) def get_text_or_html(getter, args): @@ -287,7 +283,7 @@ def get_html(args): def show_duplicates(args): # I'm not using uniq here because it's not easy to get duplicates # only by a single column. awk is much easier in this regard. - #print('bt list | sort -k3 | uniq -f2 -D | cut -f1 | bt close') + # print('bt list | sort -k3 | uniq -f2 -D | cut -f1 | bt close') print("Show duplicates by Title:") print( "bt list | sort -k2 | awk -F$'\\t' '{ if (a[$2]++ > 0) print }' | cut -f1 | bt close") @@ -365,7 +361,8 @@ def install_mediator(args): register_native_manifest_windows_brave(destination) print('Link to Firefox extension: https://addons.mozilla.org/en-US/firefox/addon/brotab/') - print('Link to Chrome (Chromium)/Brave extension: https://chrome.google.com/webstore/detail/brotab/mhpeahbikehnfkfnmopaigggliclhmnc/') + print( + 'Link to Chrome (Chromium)/Brave extension: https://chrome.google.com/webstore/detail/brotab/mhpeahbikehnfkfnmopaigggliclhmnc/') def executejs(args): @@ -462,55 +459,55 @@ def parse_args(args): prefix_chars='-+') parser_query_tabs.set_defaults(func=query_tabs) parser_query_tabs.add_argument('+active', action='store_const', const=True, default=None, - help='tabs are active in their windows') + help='tabs are active in their windows') parser_query_tabs.add_argument('-active', action='store_const', const=False, default=None, - help='tabs are not active in their windows') + help='tabs are not active in their windows') parser_query_tabs.add_argument('+pinned', action='store_const', const=True, default=None, - help='tabs are pinned') + help='tabs are pinned') parser_query_tabs.add_argument('-pinned', action='store_const', const=False, default=None, - help='tabs are not pinned') + help='tabs are not pinned') parser_query_tabs.add_argument('+audible', action='store_const', const=True, default=None, - help='tabs are audible') + help='tabs are audible') parser_query_tabs.add_argument('-audible', action='store_const', const=False, default=None, - help='tabs are not audible') + help='tabs are not audible') parser_query_tabs.add_argument('+muted', action='store_const', const=True, default=None, - help='tabs are muted') + help='tabs are muted') parser_query_tabs.add_argument('-muted', action='store_const', const=False, default=None, - help='tabs not are muted') + help='tabs not are muted') parser_query_tabs.add_argument('+highlighted', action='store_const', const=True, default=None, - help='tabs are highlighted') + help='tabs are highlighted') parser_query_tabs.add_argument('-highlighted', action='store_const', const=False, default=None, - help='tabs not are highlighted') + help='tabs not are highlighted') parser_query_tabs.add_argument('+discarded', action='store_const', const=True, default=None, - help='tabs are discarded i.e. unloaded from memory but still visible in the tab strip.') + help='tabs are discarded i.e. unloaded from memory but still visible in the tab strip.') parser_query_tabs.add_argument('-discarded', action='store_const', const=False, default=None, - help='tabs are not discarded i.e. unloaded from memory but still visible in the tab strip.') + help='tabs are not discarded i.e. unloaded from memory but still visible in the tab strip.') parser_query_tabs.add_argument('+autoDiscardable', action='store_const', const=True, default=None, - help='tabs can be discarded automatically by the browser when resources are low.') + help='tabs can be discarded automatically by the browser when resources are low.') parser_query_tabs.add_argument('-autoDiscardable', action='store_const', const=False, default=None, - help='tabs cannot be discarded automatically by the browser when resources are low.') + help='tabs cannot be discarded automatically by the browser when resources are low.') parser_query_tabs.add_argument('+currentWindow', action='store_const', const=True, default=None, - help='tabs are in the current window.') + help='tabs are in the current window.') parser_query_tabs.add_argument('-currentWindow', action='store_const', const=False, default=None, - help='tabs are not in the current window.') + help='tabs are not in the current window.') parser_query_tabs.add_argument('+lastFocusedWindow', action='store_const', const=True, default=None, - help='tabs are in the last focused window.') + help='tabs are in the last focused window.') parser_query_tabs.add_argument('-lastFocusedWindow', action='store_const', const=False, default=None, - help='tabs are not in the last focused window.') + help='tabs are not in the last focused window.') parser_query_tabs.add_argument('-status', type=str, choices=['loading', 'complete'], - help='whether the tabs have completed loading i.e. loading or complete.') + help='whether the tabs have completed loading i.e. loading or complete.') parser_query_tabs.add_argument('-title', type=str, - help='match page titles against a pattern.') + help='match page titles against a pattern.') parser_query_tabs.add_argument('-url', type=str, action='append', - help='match tabs against one or more URL patterns. Fragment identifiers are not matched. see https://developer.chrome.com/extensions/match_patterns') + help='match tabs against one or more URL patterns. Fragment identifiers are not matched. see https://developer.chrome.com/extensions/match_patterns') parser_query_tabs.add_argument('-windowId', type=int, - help='the ID of the parent window, or windows.WINDOW_ID_CURRENT for the current window.') + help='the ID of the parent window, or windows.WINDOW_ID_CURRENT for the current window.') parser_query_tabs.add_argument('-windowType', type=str, choices=['normal', 'popup', 'panel', 'app', 'devtools'], - help='the type of window the tabs are in.') + help='the type of window the tabs are in.') parser_query_tabs.add_argument('-index', type=int, - help='the position of the tabs within their windows.') + help='the position of the tabs within their windows.') parser_query_tabs.add_argument('-info', type=str, - help=''' + help=''' the queryInfo parameter as outlined here: https://developer.chrome.com/extensions/tabs#method-query. all other query arguments are ignored if this argument is present. ''') @@ -586,7 +583,7 @@ def parse_args(args): ''') parser_get_text.set_defaults(func=get_text) parser_get_text.add_argument('tab_ids', type=str, nargs='*', - help='Tab IDs to get text from') + help='Tab IDs to get text from') parser_get_text.add_argument('--tsv', type=str, default=None, help='tsv file to save results to') parser_get_text.add_argument('--cleanup', action='store_true', @@ -606,7 +603,7 @@ def parse_args(args): ''') parser_get_html.set_defaults(func=get_html) parser_get_html.add_argument('tab_ids', type=str, nargs='*', - help='Tab IDs to get text from') + help='Tab IDs to get text from') parser_get_html.add_argument('--tsv', type=str, default=None, help='tsv file to save results to') parser_get_html.add_argument('--cleanup', action='store_true', @@ -650,7 +647,7 @@ def parse_args(args): parser_install_mediator.add_argument('--tests', action='store_true', default=False, help='install testing version of ' - 'manifest for chromium') + 'manifest for chromium') parser_install_mediator.set_defaults(func=install_mediator) return parser.parse_args(args) diff --git a/brotab/mediator/brotab_mediator.py b/brotab/mediator/brotab_mediator.py index 8ae9eda..de7e327 100755 --- a/brotab/mediator/brotab_mediator.py +++ b/brotab/mediator/brotab_mediator.py @@ -1,329 +1,19 @@ #!/usr/bin/env python3 -import json import logging import logging.handlers -import struct import os -import sys +import re import socket -import signal -import requests -from urllib.parse import quote_plus, unquote_plus -from typing import List -import flask -from flask import request - -from brotab.utils import encode_query, decode_query from brotab.inout import get_mediator_ports from brotab.inout import is_port_accepting_connections -from brotab.inout import in_temp_dir -from brotab.const import \ - DEFAULT_GET_WORDS_MATCH_REGEX, \ - DEFAULT_GET_WORDS_JOIN_WITH, \ - DEFAULT_GET_TEXT_DELIMITER_REGEX, \ - DEFAULT_GET_TEXT_REPLACE_WITH, \ - DEFAULT_GET_HTML_DELIMITER_REGEX, \ - DEFAULT_GET_HTML_REPLACE_WITH - -app = flask.Flask(__name__) - -FORMAT = '%(asctime)-15s %(process)-5d %(levelname)-10s %(message)s' -MAX_LOG_SIZE = 50 * 1024 * 1024 -LOG_FILENAME = in_temp_dir('brotab_mediator.log') -LOG_BACKUP_COUNT = 1 - -logger = logging.getLogger('brotab_mediator') -logger.setLevel(logging.DEBUG) -handler = logging.handlers.RotatingFileHandler( - filename=LOG_FILENAME, - maxBytes=MAX_LOG_SIZE, - backupCount=LOG_BACKUP_COUNT, -) -handler.setFormatter(logging.Formatter(FORMAT)) -logger.addHandler(handler) -logger.info('Logger has been created') - -DEFAULT_HTTP_IFACE = '127.0.0.1' -DEFAULT_MIN_HTTP_PORT = 4625 -DEFAULT_MAX_HTTP_PORT = DEFAULT_MIN_HTTP_PORT + 10 -actual_port = None - -DEFAULT_GET_WORDS_MATCH_REGEX = encode_query(DEFAULT_GET_WORDS_MATCH_REGEX) -DEFAULT_GET_WORDS_JOIN_WITH = encode_query(DEFAULT_GET_WORDS_JOIN_WITH) -DEFAULT_GET_TEXT_DELIMITER_REGEX = encode_query(DEFAULT_GET_TEXT_DELIMITER_REGEX) -DEFAULT_GET_TEXT_REPLACE_WITH = encode_query(DEFAULT_GET_TEXT_REPLACE_WITH) -DEFAULT_GET_HTML_DELIMITER_REGEX = encode_query(DEFAULT_GET_HTML_DELIMITER_REGEX) -DEFAULT_GET_HTML_REPLACE_WITH = encode_query(DEFAULT_GET_HTML_REPLACE_WITH) - - -def create_browser_remote_api(transport=None): - if transport is None: - transport = StdTransport(sys.stdin.buffer, sys.stdout.buffer) - return BrowserRemoteAPI(transport) - - -class StdTransport: - def __init__(self, input_file, output_file): - self._in = input_file - self._out = output_file - - def send(self, message): - encoded = self._encode(message) - logger.info('SENDING: %s', message) - self._out.write(encoded['length']) - self._out.write(encoded['content']) - self._out.flush() - - def recv(self): - raw_rength = self._in.read(4) - if len(raw_rength) == 0: - sys.exit(0) - message_length = struct.unpack('@I', raw_rength)[0] - message = self._in.read(message_length).decode('utf8') - logger.info('RECEIVED: %s', message.encode('utf8')) - return json.loads(message) - - def _encode(self, message): - encoded_content = json.dumps(message).encode('utf8') - encoded_length = struct.pack('@I', len(encoded_content)) - return {'length': encoded_length, 'content': encoded_content} - - -class BrowserRemoteAPI: - """ - Communicates with a browser using stdin/stdout. This mediator is supposed - to be run by the browser after a request from the helper extension. - """ - - def __init__(self, transport): - self._transport = transport - - def list_tabs(self): - command = {'name': 'list_tabs'} - self._transport.send(command) - return self._transport.recv() - - def query_tabs(self, query_info: str): - logger.info('query info: %s', query_info) - command = {'name': 'query_tabs', 'query_info': query_info} - self._transport.send(command) - return self._transport.recv() - - def move_tabs(self, move_triplets: str): - """ - :param move_triplets: Comma-separated list of: - - """ - logger.info('move_tabs, move_triplets: %s', move_triplets) - - triplets = [list(map(int, triplet.split(' '))) - for triplet in move_triplets.split(',')] - logger.info('moving tab ids: %s', triplets) - command = {'name': 'move_tabs', 'move_triplets': triplets} - self._transport.send(command) - return self._transport.recv() - - def open_urls(self, urls: List[str], window_id=None): - """ - Open specified list of URLs in a window, specified by window_id. - - If window_id is None, currently active window is used. - """ - logger.info('open urls: %s', urls) - - command = {'name': 'open_urls', 'urls': urls} - if window_id is not None: - command['window_id'] = window_id - self._transport.send(command) - return self._transport.recv() - - def close_tabs(self, tab_ids: str): - """ - :param tab_ids: Comma-separated list of tab IDs to close. - """ - int_tab_ids = [int(id_) for id_ in tab_ids.split(',')] - logger.info('closing tab ids: %s', int_tab_ids) - command = {'name': 'close_tabs', 'tab_ids': int_tab_ids} - self._transport.send(command) - return self._transport.recv() - - def new_tab(self, query): - url = "https://www.google.com/search?q=%s" % quote_plus(query) - logger.info('opening url: %s', url) - command = {'name': 'new_tab', 'url': url} - self._transport.send(command) - return self._transport.recv() - - def activate_tab(self, tab_id: int, focused: bool): - logger.info('activating tab id: %s', tab_id) - command = {'name': 'activate_tab', 'tab_id': tab_id, 'focused': focused} - self._transport.send(command) - - def get_active_tabs(self) -> str: - logger.info('getting active tabs') - command = {'name': 'get_active_tabs'} - self._transport.send(command) - return self._transport.recv() - - def get_words(self, tab_id, match_regex, join_with): - logger.info('getting tab words: %s', tab_id) - command = { - 'name': 'get_words', - 'tab_id': tab_id, - 'match_regex': match_regex, - 'join_with': join_with, - } - self._transport.send(command) - return self._transport.recv() - - def get_text(self, delimiter_regex, replace_with): - logger.info('getting text, delimiter_regex=%s, replace_with=%s', - delimiter_regex, replace_with) - command = { - 'name': 'get_text', - 'delimiter_regex': delimiter_regex, - 'replace_with': replace_with, - } - self._transport.send(command) - return self._transport.recv() - - def get_html(self, delimiter_regex, replace_with): - logger.info('getting html, delimiter_regex=%s, replace_with=%s', - delimiter_regex, replace_with) - command = { - 'name': 'get_html', - 'delimiter_regex': delimiter_regex, - 'replace_with': replace_with, - } - self._transport.send(command) - return self._transport.recv() - - def get_browser(self): - logger.info('getting browser name') - command = {'name': 'get_browser'} - self._transport.send(command) - return self._transport.recv() - - -browser = create_browser_remote_api() -logger.info('BrowserRemoteAPI has been created') - - -@app.route('/shutdown') -def shutdown(): - # Taken from: https://stackoverflow.com/a/17053522/258421 - logger.info('Shutting down the server...') - func = request.environ.get('werkzeug.server.shutdown') - if func is None: - raise RuntimeError('Not running with the Werkzeug Server') - func() - return 'OK' - - -@app.route('/list_tabs') -def list_tabs(): - tabs = browser.list_tabs() - return '\n'.join(tabs) - - -@app.route('/query_tabs/') -def query_tabs(query_info): - tabs = browser.query_tabs(query_info) - return '\n'.join(tabs) - - -@app.route('/move_tabs/') -def move_tabs(move_triplets): - return browser.move_tabs(unquote_plus(move_triplets)) - - -@app.route('/open_urls', methods=['POST']) -@app.route('/open_urls/', methods=['POST']) -def open_urls(window_id=None): - urls = request.files.get('urls') - if urls is None: - return 'ERROR: Please provide urls file in the request' - urls = urls.stream.read().decode('utf8').splitlines() - logger.info('Open urls (window_id = %s): %s', window_id, urls) - return browser.open_urls(urls, window_id) - - -@app.route('/close_tabs/') -def close_tabs(tab_ids): - return browser.close_tabs(tab_ids) - - -@app.route('/new_tab/') -def new_tab(query): - return browser.new_tab(query) - - -@app.route('/activate_tab/') -def activate_tab(tab_id): - focused = bool(request.args.get('focused', False)) - browser.activate_tab(tab_id, focused) - return 'OK' - - -@app.route('/get_active_tabs') -def get_active_tabs(): - return browser.get_active_tabs() - - -@app.route('/get_words/') -@app.route('/get_words//') -def get_words(tab_id=None): - tab_id = int(tab_id) if is_valid_integer(tab_id) else None - match_regex = request.args.get('match_regex', DEFAULT_GET_WORDS_MATCH_REGEX) - join_with = request.args.get('join_with', DEFAULT_GET_WORDS_JOIN_WITH) - words = browser.get_words(tab_id, - decode_query(match_regex), - decode_query(join_with)) - logger.info('words for tab_id %s (match_regex %s, join_with %s): %s', - tab_id, match_regex, join_with, words) - return '\n'.join(words) - - -@app.route('/get_text/') -def get_text(): - delimiter_regex = request.args.get('delimiter_regex', DEFAULT_GET_TEXT_DELIMITER_REGEX) - replace_with = request.args.get('replace_with', DEFAULT_GET_TEXT_REPLACE_WITH) - lines = browser.get_text(decode_query(delimiter_regex), - decode_query(replace_with)) - return '\n'.join(lines) - - -@app.route('/get_html/') -def get_html(): - delimiter_regex = request.args.get('delimiter_regex', DEFAULT_GET_HTML_DELIMITER_REGEX) - replace_with = request.args.get('replace_with', DEFAULT_GET_HTML_REPLACE_WITH) - lines = browser.get_html(decode_query(delimiter_regex), - decode_query(replace_with)) - return '\n'.join(lines) - - -@app.route('/get_pid') -def get_pid(): - return str(os.getpid()) - - -@app.route('/get_browser') -def get_browser(): - return browser.get_browser() - - -@app.route('/') -def root_handler(): - lines = [] - for rule in app.url_map.iter_rules(): - line = unquote_plus('%s\t%s' % (rule.endpoint, rule)) - logger.info('endpoint type: %s', type(rule.endpoint)) - lines.append(line) - return '\n'.join(lines) + '\n' - - +from brotab.mediator import sig +from brotab.mediator.const import DEFAULT_HTTP_IFACE +from brotab.mediator.http_server import MediatorHttpServer +from brotab.mediator.log import disable_click_echo +from brotab.mediator.log import logger +from brotab.mediator.remote_api import default_remote_api # TODO: # 1. Run HTTP server and accept the following commands: # - /list_tabs @@ -341,32 +31,13 @@ def root_handler(): # make sure this threaded reader and server reader are mutually exclusive. # TODO: all commands should be synchronous and should only terminate after # the action has been actually executed in the browser. +from brotab.mediator.transport import default_transport -def is_valid_integer(str_value): - try: - return int(str_value) >= 0 - except (ValueError, TypeError): - return False - - -def signal_pipe(e): - logger.info('Pipe has been closed...') - requests.get('http://%s:%s/shutdown' % (DEFAULT_HTTP_IFACE, actual_port)) +# app = flask.Flask(__name__) -def disable_click_echo(): - """Stupid flask started using click which unconditionally prints stupid - messages""" - def numb_echo(*args, **kwargs): - pass - - import click - click.echo = numb_echo - click.secho = numb_echo - - -def monkeypatch_socket_bind(): +def monkeypatch_socket_bind_allow_port_reuse(): """Allow port reuse by default""" socket.socket._bind = socket.socket.bind @@ -374,48 +45,55 @@ def my_socket_bind(self, *args, **kwargs): logger.info('Custom bind called: %s, %s', args, kwargs) self.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) return socket.socket._bind(self, *args, **kwargs) + socket.socket.bind = my_socket_bind -def run_mediator(port: int, remote_api, no_logging=False): - global browser - # reassign this variable again so that tests could mock it - browser = remote_api - # TODO: does not really work, I still see logs in unittests - if no_logging: - log = logging.getLogger('werkzeug') - log.setLevel(logging.ERROR) - log.disabled = True - app.logger.disabled = True - from flask.logging import default_handler - app.logger.removeHandler(default_handler) - return app.run(DEFAULT_HTTP_IFACE, port, debug=False, threaded=False) +def blacklist_loggers(): + blacklist = r'.*pyppeteer.*|.*urllib.*|.*socks.*|.*requests.*|.*dotenv.*' + for name in logging.root.manager.loggerDict: + match = re.match(blacklist, name) is not None + # print(name, match) + if match: + logger = logging.getLogger(name) + logger.setLevel(logging.ERROR) + logger.propagate = False -def main(): - monkeypatch_socket_bind() +def mediator_main(): + monkeypatch_socket_bind_allow_port_reuse() disable_click_echo() - global actual_port port_range = list(get_mediator_ports()) + transport = default_transport() + remote_api = default_remote_api(transport) + host = DEFAULT_HTTP_IFACE + for port in port_range: - logger.info('Starting mediator on %s:%s...', - DEFAULT_HTTP_IFACE, port) + logger.info('Starting mediator on %s:%s...', host, port) if is_port_accepting_connections(port): continue - actual_port = port try: - run_mediator(port, create_browser_remote_api()) - logger.info('Exiting mediator...') + server = MediatorHttpServer(host, port, remote_api) + process = server.run.in_process() + sig.setup(server.run.shutdown) + process.join() + logger.info('Exiting mediator pid=%s on %s:%s...', os.getpid(), host, port) break except OSError as e: + # TODO: fixme: we won't get this if we run in a process logger.info('Cannot bind on port %s: %s', port, e) - except BrokenPipeError: - signal_pipe(e) + except BrokenPipeError as e: + # TODO: probably also won't work with processes, also a race + sig.pipe(server.run.shutdown, e) else: logger.error('No TCP ports available for bind in range %s', port_range) +def main(): + mediator_main() + + if __name__ == '__main__': main() diff --git a/brotab/mediator/const.py b/brotab/mediator/const.py new file mode 100644 index 0000000..147dcba --- /dev/null +++ b/brotab/mediator/const.py @@ -0,0 +1,17 @@ +from brotab.const import DEFAULT_GET_HTML_DELIMITER_REGEX +from brotab.const import DEFAULT_GET_HTML_REPLACE_WITH +from brotab.const import DEFAULT_GET_TEXT_DELIMITER_REGEX +from brotab.const import DEFAULT_GET_TEXT_REPLACE_WITH +from brotab.const import DEFAULT_GET_WORDS_JOIN_WITH +from brotab.const import DEFAULT_GET_WORDS_MATCH_REGEX +from brotab.utils import encode_query + +DEFAULT_HTTP_IFACE = '127.0.0.1' +DEFAULT_MIN_HTTP_PORT = 4625 +DEFAULT_MAX_HTTP_PORT = DEFAULT_MIN_HTTP_PORT + 10 +DEFAULT_GET_WORDS_MATCH_REGEX = encode_query(DEFAULT_GET_WORDS_MATCH_REGEX) +DEFAULT_GET_WORDS_JOIN_WITH = encode_query(DEFAULT_GET_WORDS_JOIN_WITH) +DEFAULT_GET_TEXT_DELIMITER_REGEX = encode_query(DEFAULT_GET_TEXT_DELIMITER_REGEX) +DEFAULT_GET_TEXT_REPLACE_WITH = encode_query(DEFAULT_GET_TEXT_REPLACE_WITH) +DEFAULT_GET_HTML_DELIMITER_REGEX = encode_query(DEFAULT_GET_HTML_DELIMITER_REGEX) +DEFAULT_GET_HTML_REPLACE_WITH = encode_query(DEFAULT_GET_HTML_REPLACE_WITH) diff --git a/brotab/mediator/http_server.py b/brotab/mediator/http_server.py new file mode 100644 index 0000000..68d1bb1 --- /dev/null +++ b/brotab/mediator/http_server.py @@ -0,0 +1,140 @@ +import os +from urllib.parse import unquote_plus + +from flask import Flask +from flask import request +from flask import url_for + +from brotab.mediator.const import DEFAULT_GET_HTML_DELIMITER_REGEX +from brotab.mediator.const import DEFAULT_GET_HTML_REPLACE_WITH +from brotab.mediator.const import DEFAULT_GET_TEXT_DELIMITER_REGEX +from brotab.mediator.const import DEFAULT_GET_TEXT_REPLACE_WITH +from brotab.mediator.const import DEFAULT_GET_WORDS_JOIN_WITH +from brotab.mediator.const import DEFAULT_GET_WORDS_MATCH_REGEX +from brotab.mediator.log import logger +from brotab.mediator.remote_api import BrowserRemoteAPI +from brotab.mediator.runner import Runner +from brotab.mediator.support import is_valid_integer +from brotab.utils import decode_query + + +class MediatorHttpServer: + def __init__(self, host: str, port: str, remote_api: BrowserRemoteAPI): + self.host: str = host + self.port: str = port + self.remote_api: BrowserRemoteAPI = remote_api + self.pid: int = os.getpid() + self.app = Flask(__name__) + self._setup_routes() + + def target() -> None: + self.app.url_map.strict_slashes = False + self.app.run(host=host, port=port, debug=False, threaded=False) + + self.run = Runner(target) + + def _setup_routes(self) -> None: + logger.info('Starting mediator http server on %s:%s pid=%s', self.host, self.port, self.pid) + self.app.register_error_handler(ConnectionError, self.error_handler) + self.app.register_error_handler(TimeoutError, self.error_handler) + self.app.register_error_handler(ValueError, self.error_handler) + self.app.route('/', methods=['GET'])(self.route_index) + self.app.route('/shutdown', methods=['GET'])(self.shutdown) + self.app.route('/list_tabs', methods=['GET'])(self.list_tabs) + self.app.route('/query_tabs/', methods=['GET'])(self.query_tabs) + self.app.route('/move_tabs/', methods=['GET'])(self.move_tabs) + self.app.route('/open_urls/', methods=['POST'])(self.open_urls) + self.app.route('/open_urls', methods=['POST'])(self.open_urls) + self.app.route('/close_tabs/', methods=['GET'])(self.close_tabs) + self.app.route('/new_tab/', methods=['GET'])(self.new_tab) + self.app.route('/activate_tab/', methods=['GET'])(self.activate_tab) + self.app.route('/get_active_tabs', methods=['GET'])(self.get_active_tabs) + self.app.route('/get_words/', methods=['GET'])(self.get_words) + self.app.route('/get_words', methods=['GET'])(self.get_words) + self.app.route('/get_text', methods=['GET'])(self.get_text) + self.app.route('/get_html', methods=['GET'])(self.get_html) + self.app.route('/get_pid', methods=['GET'])(self.get_pid) + self.app.route('/get_browser', methods=['GET'])(self.get_browser) + + def error_handler(self, e: Exception): + logger.exception('Shutting down mediator http server due to exception: %s', e) + self.run.shutdown() + return '' + + def route_index(self): + links = [] + for rule in self.app.url_map.iter_rules(): + args = {v: v for v in rule.arguments or {}} + url = url_for(rule.endpoint, **args) + links.append('%s\v%s' % (url, rule.endpoint)) + return '\n'.join(links) + + def shutdown(self): + self.run.shutdown() + return 'OK' + + def list_tabs(self): + tabs = self.remote_api.list_tabs() + return '\n'.join(tabs) + + def query_tabs(self, query_info): + tabs = self.remote_api.query_tabs(query_info) + return '\n'.join(tabs) + + def move_tabs(self, move_triplets): + return self.remote_api.move_tabs(unquote_plus(move_triplets)) + + def open_urls(self, window_id=None): + urls = request.files.get('urls') + if urls is None: + return 'ERROR: Please provide urls file in the request' + urls = urls.stream.read().decode('utf8').splitlines() + logger.info('Open urls (window_id = %s): %s', window_id, urls) + return self.remote_api.open_urls(urls, window_id) + + def close_tabs(self, tab_ids): + return self.remote_api.close_tabs(tab_ids) + + def new_tab(self, query): + return self.remote_api.new_tab(query) + + def activate_tab(self, tab_id): + focused = bool(request.args.get('focused', False)) + self.remote_api.activate_tab(tab_id, focused) + return 'OK' + + def get_active_tabs(self): + return self.remote_api.get_active_tabs() + + def get_words(self, tab_id=None): + tab_id = int(tab_id) if is_valid_integer(tab_id) else None + match_regex = request.args.get('match_regex', DEFAULT_GET_WORDS_MATCH_REGEX) + join_with = request.args.get('join_with', DEFAULT_GET_WORDS_JOIN_WITH) + words = self.remote_api.get_words(tab_id, + decode_query(match_regex), + decode_query(join_with)) + logger.info('words for tab_id %s (match_regex %s, join_with %s): %s', + tab_id, match_regex, join_with, words) + return '\n'.join(words) + + def get_text(self): + delimiter_regex = request.args.get('delimiter_regex', DEFAULT_GET_TEXT_DELIMITER_REGEX) + replace_with = request.args.get('replace_with', DEFAULT_GET_TEXT_REPLACE_WITH) + lines = self.remote_api.get_text(decode_query(delimiter_regex), + decode_query(replace_with)) + return '\n'.join(lines) + + def get_html(self): + delimiter_regex = request.args.get('delimiter_regex', DEFAULT_GET_HTML_DELIMITER_REGEX) + replace_with = request.args.get('replace_with', DEFAULT_GET_HTML_REPLACE_WITH) + lines = self.remote_api.get_html(decode_query(delimiter_regex), + decode_query(replace_with)) + return '\n'.join(lines) + + def get_pid(self): + logger.info('getting pid') + return str(os.getpid()) + + def get_browser(self): + logger.info('getting browser name') + return self.remote_api.get_browser() diff --git a/brotab/mediator/log.py b/brotab/mediator/log.py index e69de29..d1c528c 100644 --- a/brotab/mediator/log.py +++ b/brotab/mediator/log.py @@ -0,0 +1,63 @@ +import logging +import logging.handlers +from traceback import format_stack + +from brotab.inout import in_temp_dir + + +def _init_logger(tag, filename: str): + FORMAT = '%(asctime)-15s %(process)-5d %(levelname)-10s %(filename)s:%(lineno)d:%(funcName)s %(message)s' + MAX_LOG_SIZE = 50 * 1024 * 1024 + LOG_BACKUP_COUNT = 1 + + log = logging.getLogger('brotab') + log.setLevel(logging.DEBUG) + handler = logging.handlers.RotatingFileHandler( + filename=filename, + maxBytes=MAX_LOG_SIZE, + backupCount=LOG_BACKUP_COUNT, + ) + handler.setFormatter(logging.Formatter(FORMAT)) + log.addHandler(handler) + log.info('Logger has been created (%s)', tag) + return log + + +def init_brotab_logger(tag: str): + return _init_logger(tag, in_temp_dir('brotab.log')) + + +def init_mediator_logger(tag: str): + return _init_logger(tag, in_temp_dir('brotab_mediator.log')) + + +def disable_logging(): + # disables flask request logging + log = logging.getLogger('werkzeug') + log.setLevel(logging.ERROR) + log.disabled = True + # TODO: investigate this, maybe we can redirect werkzeug from stdout to a file + # log.handlers = [] + # disables my own logging in log_and_suppress_exceptions + # app.logger.disabled = True + # from flask.logging import default_handler + # app.logger.removeHandler(default_handler) + + +def disable_click_echo(): + """Stupid flask started using click which unconditionally prints stupid + messages""" + + def numb_echo(*args, **kwargs): + pass + + import click + click.echo = numb_echo + click.secho = numb_echo + + +def stack(): + return '\n'.join(format_stack()) + + +logger = init_mediator_logger('mediator') diff --git a/brotab/mediator/remote_api.py b/brotab/mediator/remote_api.py index e69de29..349a254 100644 --- a/brotab/mediator/remote_api.py +++ b/brotab/mediator/remote_api.py @@ -0,0 +1,128 @@ +from typing import List +from urllib.parse import quote_plus + +from brotab.mediator.log import logger +from brotab.mediator.transport import Transport +from brotab.mediator.transport import default_transport + + +class BrowserRemoteAPI: + """ + Communicates with a browser using stdin/stdout. This mediator is supposed + to be run by the browser after a request from the helper extension. + """ + + def __init__(self, transport: Transport): + self._transport: Transport = transport + + def list_tabs(self): + command = {'name': 'list_tabs'} + self._transport.send(command) + return self._transport.recv() + + def query_tabs(self, query_info: str): + logger.info('query info: %s', query_info) + command = {'name': 'query_tabs', 'query_info': query_info} + self._transport.send(command) + return self._transport.recv() + + def move_tabs(self, move_triplets: str): + """ + :param move_triplets: Comma-separated list of: + + """ + logger.info('move_tabs, move_triplets: %s', move_triplets) + + triplets = [list(map(int, triplet.split(' '))) + for triplet in move_triplets.split(',')] + logger.info('moving tab ids: %s', triplets) + command = {'name': 'move_tabs', 'move_triplets': triplets} + self._transport.send(command) + return self._transport.recv() + + def open_urls(self, urls: List[str], window_id=None): + """ + Open specified list of URLs in a window, specified by window_id. + + If window_id is None, currently active window is used. + """ + logger.info('open urls: %s', urls) + + command = {'name': 'open_urls', 'urls': urls} + if window_id is not None: + command['window_id'] = window_id + self._transport.send(command) + return self._transport.recv() + + def close_tabs(self, tab_ids: str): + """ + :param tab_ids: Comma-separated list of tab IDs to close. + """ + int_tab_ids = [int(id_) for id_ in tab_ids.split(',')] + logger.info('closing tab ids: %s', int_tab_ids) + command = {'name': 'close_tabs', 'tab_ids': int_tab_ids} + self._transport.send(command) + return self._transport.recv() + + def new_tab(self, query: str): + url = "https://www.google.com/search?q=%s" % quote_plus(query) + logger.info('opening url: %s', url) + command = {'name': 'new_tab', 'url': url} + self._transport.send(command) + return self._transport.recv() + + def activate_tab(self, tab_id: int, focused: bool): + logger.info('activating tab id: %s', tab_id) + command = {'name': 'activate_tab', 'tab_id': tab_id, 'focused': focused} + self._transport.send(command) + + def get_active_tabs(self) -> str: + logger.info('getting active tabs') + command = {'name': 'get_active_tabs'} + self._transport.send(command) + return self._transport.recv() + + def get_words(self, tab_id: str, match_regex: str, join_with: str): + logger.info('getting tab words: %s', tab_id) + command = { + 'name': 'get_words', + 'tab_id': tab_id, + 'match_regex': match_regex, + 'join_with': join_with, + } + self._transport.send(command) + return self._transport.recv() + + def get_text(self, delimiter_regex: str, replace_with: str): + logger.info('getting text, delimiter_regex=%s, replace_with=%s', + delimiter_regex, replace_with) + command = { + 'name': 'get_text', + 'delimiter_regex': delimiter_regex, + 'replace_with': replace_with, + } + self._transport.send(command) + return self._transport.recv() + + def get_html(self, delimiter_regex: str, replace_with: str): + logger.info('getting html, delimiter_regex=%s, replace_with=%s', + delimiter_regex, replace_with) + command = { + 'name': 'get_html', + 'delimiter_regex': delimiter_regex, + 'replace_with': replace_with, + } + self._transport.send(command) + return self._transport.recv() + + def get_browser(self): + logger.info('getting browser name') + command = {'name': 'get_browser'} + self._transport.send(command) + return self._transport.recv() + + +def default_remote_api(transport: Transport = None) -> BrowserRemoteAPI: + if transport is None: + transport = default_transport() + return BrowserRemoteAPI(transport) diff --git a/brotab/mediator/runner.py b/brotab/mediator/runner.py index e69de29..e4bad94 100644 --- a/brotab/mediator/runner.py +++ b/brotab/mediator/runner.py @@ -0,0 +1,80 @@ +import os +import signal +import time +from multiprocessing import Process +from typing import Callable +from typing import Optional + +from psutil import pid_exists + +from brotab.mediator.log import disable_logging +from brotab.mediator.log import logger + + +class NotStarted(Exception): + pass + + +class Runner: + def __init__(self, target: Callable[[], None]): + self.target = target + self._shutdown: Optional[Callable] = None + + def shutdown(self) -> None: + # TODO: break this to test ctrl-c + if not self._shutdown: + raise NotStarted('start the runner first') + logger.info('Runner: calling terminate: %s', self._shutdown) + self._shutdown() + + def _here(self) -> None: + # global browser + # TODO: fix this + # reassign this variable again so that tests could mock it + # browser = remote_api + # TODO: does not really work, I still see logs in unittests + # global logger + logger.info('Started mediator process, pid=%s', os.getpid()) + disable_logging() + + def shutdown(): + pid = os.getpid() + logger.info('Runner: shutdown in here, os.kill(%s)', pid) + os.kill(pid, signal.SIGTERM) + + self._shutdown = shutdown + return self.target() + + # def in_thread(self, port: int) -> Thread: + # thread = Thread(target=lambda: self.here(port)) + # thread.daemon = True + # thread.start() + # return thread + + def in_process(self) -> Process: + process = Process(target=self._here) + process.daemon = True + process.start() + + def shutdown(): + logger.info('Runner: shutdown in in_process, process.terminate') + process.terminate() + + self._shutdown = shutdown + self._watcher(os.getppid(), interval=1.0) + return process + + def _watcher(self, parent_pid: int, interval: float) -> Process: + def watch(): + logger.info('Watching parent process pid=%s', parent_pid) + while True: + time.sleep(interval) + if not pid_exists(parent_pid): + logger.info('Parent process died pid=%s, shutting down mediator', parent_pid) + self.shutdown() + break + + process = Process(target=watch) + process.daemon = True + process.start() + return process diff --git a/brotab/mediator/sig.py b/brotab/mediator/sig.py index e69de29..d29c617 100644 --- a/brotab/mediator/sig.py +++ b/brotab/mediator/sig.py @@ -0,0 +1,18 @@ +import signal +from typing import Callable + +from brotab.mediator.log import logger + + +def pipe(shutdown: Callable, e): + logger.info('Pipe has been closed (%s)', e) + shutdown() + + +def setup(shutdown: Callable): + def handler(signum, _frame): + logger.info('Got signal %s', signum) + shutdown() + + signal.signal(signal.SIGINT, handler) + signal.signal(signal.SIGTERM, handler) diff --git a/brotab/mediator/support.py b/brotab/mediator/support.py index e69de29..39fc46f 100644 --- a/brotab/mediator/support.py +++ b/brotab/mediator/support.py @@ -0,0 +1,5 @@ +def is_valid_integer(str_value): + try: + return int(str_value) >= 0 + except (ValueError, TypeError): + return False \ No newline at end of file diff --git a/brotab/mediator/transport.py b/brotab/mediator/transport.py index e69de29..0556157 100644 --- a/brotab/mediator/transport.py +++ b/brotab/mediator/transport.py @@ -0,0 +1,57 @@ +import json +import struct +import sys +from abc import ABC +from abc import abstractmethod +from typing import BinaryIO + +from brotab.mediator.log import logger + + +class Transport(ABC): + @abstractmethod + def send(self, command: dict) -> None: + pass + + @abstractmethod + def recv(self) -> dict: + pass + + +def default_transport() -> Transport: + return StdTransport(sys.stdin.buffer, sys.stdout.buffer) + + +class TransportError(Exception): + pass + + +class StdTransport(Transport): + def __init__(self, input_file: BinaryIO, output_file: BinaryIO): + self._in: BinaryIO = input_file + self._out: BinaryIO = output_file + + def send(self, command: dict) -> None: + encoded = self._encode(command) + logger.info('StdTransport SENDING: %s', command) + self._out.write(encoded['length']) + logger.info('StdTransport SENT length') + self._out.write(encoded['content']) + logger.info('StdTransport SENT content') + self._out.flush() + logger.info('StdTransport SENT flush') + + def recv(self) -> dict: + logger.info('StdTransport RECEIVING') + raw_length = self._in.read(4) + if len(raw_length) == 0: + raise TransportError('StdTransport: cannot read, raw_length is empty') + message_length = struct.unpack('@I', raw_length)[0] + message = self._in.read(message_length).decode('utf8') + logger.info('RECEIVED: %s', message.encode('utf8')) + return json.loads(message) + + def _encode(self, message): + encoded_content = json.dumps(message).encode('utf8') + encoded_length = struct.pack('@I', len(encoded_content)) + return {'length': encoded_length, 'content': encoded_content} diff --git a/brotab/tests/mocks.py b/brotab/tests/mocks.py index 78a8c29..4c71907 100644 --- a/brotab/tests/mocks.py +++ b/brotab/tests/mocks.py @@ -1,5 +1,2 @@ - - - class BrowserPortMock: pass diff --git a/brotab/tests/test_integration.py b/brotab/tests/test_integration.py index a4d524a..713d943 100644 --- a/brotab/tests/test_integration.py +++ b/brotab/tests/test_integration.py @@ -9,7 +9,7 @@ from brotab.tests.utils import wait_net_service -from brotab.mediator.brotab_mediator import DEFAULT_MIN_HTTP_PORT +from brotab.mediator.const import DEFAULT_MIN_HTTP_PORT from brotab.tab import parse_tab_lines diff --git a/brotab/tests/test_main.py b/brotab/tests/test_main.py index a3d17a2..394d118 100644 --- a/brotab/tests/test_main.py +++ b/brotab/tests/test_main.py @@ -1,73 +1,103 @@ -from uuid import uuid4 -from time import sleep -from threading import Thread +import os +from multiprocessing import Queue from string import ascii_letters +from time import sleep +from typing import List from unittest import TestCase from unittest.mock import patch -from typing import List +from uuid import uuid4 -from brotab.main import run_commands -from brotab.main import create_clients -from brotab.inout import get_free_tcp_port +from brotab.api import SingleMediatorAPI +from brotab.inout import MIN_MEDIATOR_PORT +from brotab.inout import get_available_tcp_port from brotab.inout import in_temp_dir from brotab.inout import spit -from brotab.inout import MIN_MEDIATOR_PORT -from brotab.api import SingleMediatorAPI -from brotab.mediator.brotab_mediator import run_mediator -from brotab.mediator.brotab_mediator import create_browser_remote_api - +from brotab.main import create_clients +from brotab.main import run_commands +from brotab.mediator.const import DEFAULT_HTTP_IFACE +from brotab.mediator.http_server import MediatorHttpServer +from brotab.mediator.log import logger +from brotab.mediator.remote_api import default_remote_api +from brotab.mediator.transport import Transport from brotab.tests.utils import assert_file_absent -from brotab.tests.utils import assert_file_not_empty from brotab.tests.utils import assert_file_contents +from brotab.tests.utils import assert_file_not_empty from brotab.tests.utils import assert_sqlite3_table_contents -class MockedLoggingTransport: +class MockedLoggingTransport(Transport): + MAX_SIZE = 1000 + def __init__(self): - self.reset() + self._sent = Queue(self.MAX_SIZE) + self._received = Queue(self.MAX_SIZE) + def reset(self): - self.sent = [] - self.received = [] - def send(self, message): - self.sent.append(message) + self._read_queue(self._sent) + self._read_queue(self._received) + + def _read_queue(self, queue: Queue) -> list: + result = [] + while not queue.empty(): + result.append(queue.get()) + return result + + @property + def sent(self): + result = self._read_queue(self._sent) + logger.warning('MAKING SENT LIST (=%s) pid=%s', result, os.getpid()) + return result + + @property + def received(self): + logger.warning('MAKING RECEIVED LIST pid=%s', os.getpid()) + return self._read_queue(self._received) + + def received_extend(self, values) -> None: + logger.warning('EXTENDING RECEIVED LIST (values=%s) pid=%s', values, os.getpid()) + for value in values: + self._received.put(value) + + def send(self, message) -> None: + self._sent.put(message) + logger.warning('Sent message (pid=%s): %s, empty=%s', os.getpid(), message, self._sent.empty()) + def recv(self): - if self.received: - result = self.received[0] - self.received = self.received[1:] + if not self._received.empty(): + result = self._received.get() + logger.warning('Received message (pid=%s): %s, empty=%s', os.getpid(), result, self._received.empty()) return result - - -def _run_mediator_in_thread(port, transport, remote_api=None) -> Thread: - remote_api = create_browser_remote_api(transport) if remote_api is None else remote_api - thread = Thread(target=lambda: run_mediator(port, remote_api, no_logging=True)) - thread.daemon = True - thread.start() - return thread + logger.warning('Nothing to receive (pid=%s)', os.getpid()) class MockedMediator: def __init__(self, prefix='a', port=None, remote_api=None): - self.port = get_free_tcp_port() if port is None else port + self.port = get_available_tcp_port() if port is None else port self.transport = MockedLoggingTransport() - self.transport.received = ['mocked'] - self.thread = _run_mediator_in_thread(self.port, self.transport, remote_api) + self.remote_api = default_remote_api(self.transport) if remote_api is None else remote_api + self.server = MediatorHttpServer(DEFAULT_HTTP_IFACE, self.port, self.remote_api) + self.process = self.server.run.in_process() + self.transport.received_extend(['mocked']) # TODO: why 2 times? should be only 1 self.api = SingleMediatorAPI(prefix, port=self.port, startup_timeout=1) - assert self.api._browser == 'mocked' + assert self.api.browser == 'mocked' self.transport.reset() + def shutdown_and_wait(self): - self.api.shutdown() - self.thread.join() + self.server.shutdown() + self.process.join() + def __enter__(self): return self + def __exit__(self, type_, value, tb): self.shutdown_and_wait() -def _run_commands(commands): - with MockedMediator('a') as mediator: - get_mediator_ports_mock.side_effect = \ - [range(mediator.port, mediator.port + 1)] - run_commands(commands) +# def _run_commands(commands): +# with MockedMediator('a') as mediator: +# get_mediator_ports_mock.side_effect = \ +# [range(mediator.port, mediator.port + 1)] +# run_commands(commands) class DummyBrowserRemoteAPI: @@ -77,26 +107,37 @@ class DummyBrowserRemoteAPI: def list_tabs(self): return ['1.1\ttitle\turl'] + def query_tabs(self, query_info: str): raise NotImplementedError() + def move_tabs(self, move_triplets: str): raise NotImplementedError() + def open_urls(self, urls: List[str], window_id=None): raise NotImplementedError() + def close_tabs(self, tab_ids: str): raise NotImplementedError() + def new_tab(self, query): raise NotImplementedError() + def activate_tab(self, tab_id: int, focused: bool): raise NotImplementedError() + def get_active_tabs(self) -> str: return '1.1' + def get_words(self, tab_id, match_regex, join_with): return ['a', 'b'] + def get_text(self, delimiter_regex, replace_with): return ['1.1\ttitle\turl\tbody'] + def get_html(self, delimiter_regex, replace_with): return ['1.1\ttitle\turl\tsome body'] + def get_browser(self): return 'mocked' @@ -118,7 +159,7 @@ def run_mocked_mediators(count, default_port_offset, delay): print('Ready') for mediator in mediators: print(mediator.port) - mediators[0].thread.join() + mediators[0].process.join() def run_mocked_mediator_current_thread(port): @@ -128,7 +169,7 @@ def run_mocked_mediator_current_thread(port): python -c 'from brotab.tests.test_main import run_mocked_mediator_current_thread as run; run(4635)' """ remote_api = DummyBrowserRemoteAPI() - port = get_free_tcp_port() if port is None else port + port = get_available_tcp_port() if port is None else port run_mediator(port, remote_api, no_logging=False) @@ -184,7 +225,7 @@ def test_activate_focused_ok(self): class TestText(WithMediator): def test_text_no_arguments_ok(self): - self.mediator.transport.received.extend([ + self.mediator.transport.received_extend([ 'mocked', ['1.1\ttitle\turl\tbody'], ]) @@ -199,7 +240,7 @@ def test_text_no_arguments_ok(self): assert output == [b'a.1.1\ttitle\turl\tbody\n'] def test_text_with_tab_id_ok(self): - self.mediator.transport.received.extend([ + self.mediator.transport.received_extend([ 'mocked', [ '1.1\ttitle\turl\tbody', @@ -220,7 +261,7 @@ def test_text_with_tab_id_ok(self): class TestHtml(WithMediator): def test_html_no_arguments_ok(self): - self.mediator.transport.received.extend([ + self.mediator.transport.received_extend([ 'mocked', ['1.1\ttitle\turl\tbody'], ]) @@ -235,7 +276,7 @@ def test_html_no_arguments_ok(self): assert output == [b'a.1.1\ttitle\turl\tbody\n'] def test_html_with_tab_id_ok(self): - self.mediator.transport.received.extend([ + self.mediator.transport.received_extend([ 'mocked', [ '1.1\ttitle\turl\tbody', @@ -253,9 +294,10 @@ def test_html_with_tab_id_ok(self): ] assert output == [b'a.1.2\ttitle\turl\tbody\na.1.3\ttitle\turl\tbody\n'] + class TestIndex(WithMediator): def test_index_no_arguments_ok(self): - self.mediator.transport.received.extend([ + self.mediator.transport.received_extend([ 'mocked', ['1.1\ttitle\turl\tbody'], ]) @@ -270,16 +312,16 @@ def test_index_no_arguments_ok(self): assert self.mediator.transport.sent == [ {'name': 'get_browser'}, {'delimiter_regex': '/\\n|\\r|\\t/g', - 'name': 'get_text', 'replace_with': '" "'}, + 'name': 'get_text', 'replace_with': '" "'}, ] assert_file_not_empty(sqlite_filename) assert_file_not_empty(tsv_filename) assert_file_contents(tsv_filename, 'a.1.1\ttitle\turl\tbody\n') assert_sqlite3_table_contents( - sqlite_filename, 'tabs', 'a.1.1\ttitle\turl\tbody') + sqlite_filename, 'tabs', 'a.1.1\ttitle\turl\tbody') def test_index_custom_filename(self): - self.mediator.transport.received.extend([ + self.mediator.transport.received_extend([ 'mocked', ['1.1\ttitle\turl\tbody'], ]) @@ -299,6 +341,12 @@ def test_index_custom_filename(self): assert_file_not_empty(tsv_filename) assert_file_contents(tsv_filename, 'a.1.1\ttitle\turl\tbody\n') assert_sqlite3_table_contents( - sqlite_filename, 'tabs', 'a.1.1\ttitle\turl\tbody') + sqlite_filename, 'tabs', 'a.1.1\ttitle\turl\tbody') assert_file_absent(sqlite_filename) assert_file_absent(tsv_filename) + +# tests todo: +# 1. mediator cannot write/read, terminates +# 2. terminate mediator on ctrl-c, sigint, sigterm +# 3. terminate mediator when parent terminates +# 4. make sure that stdin & stdout passed to mediator are passed correctly to http server process