diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..b62c5f1 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,40 @@ +on: + push: + branches: [ "main" ] + +jobs: + deploy: + name: Deploy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Executing remote ssh command + uses: appleboy/ssh-action@master + with: + host: ${{ secrets.HOST }} + port: ${{ secrets.PORT }} + username: ${{ secrets.USER }} + key: ${{ secrets.SSH }} + script: | + echo "Stoping old version" + if [ -d ~/parser_avito ]; + then + cd ~/parser_avito + make stop + fi + + echo "Removing old version" + cd ~ + rm -rf parser_avito + # yes | sudo docker system prune -a + + echo "Pulling new version" + GIT_SSH_COMMAND="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" git clone git@github.com:altservices/parser_avito.git + + # echo "Copying keys" + # cp ~/.secrets/parser_avito.env ~/parser_avito/.env + + echo "Starting new version" + cd ~/parser_avito + make run diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9e1c9cd --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +# Cache +.DS_Store +__pycache__/ + +# Environment +venv/ +env/ + +# Data +downloaded_files/ +result/ +viewed.txt diff --git a/AvitoParser.py b/AvitoParser.py deleted file mode 100755 index cb55c18..0000000 --- a/AvitoParser.py +++ /dev/null @@ -1,331 +0,0 @@ -#!/usr/bin/env python3 - -""" -AvitoParser - Поиск объявлений на avito.ru по цене или ключевым словам -by Duff89 (https://github.com/Duff89) -""" -__version__ = 1.09 - -import re - -import customtkinter - -import time -import tkinter -import threading -import webbrowser -import configparser - -from loguru import logger -from notifiers.logging import NotificationHandler - -from parser_cls import AvitoParse - -customtkinter.set_appearance_mode("dark") - - -class Window(customtkinter.CTk): - def __init__(self): - super().__init__() - self.geometry("720x800") - self.width_entry_field = 500 - self.resizable(width=True, height=True) - self.title(f"AvitoParser v. {__version__}") - self.is_run = False - self.main_windows_init() - self.logger_widget_init() - self.tg_logger_init = False - - """Центрируем окно относительно экрана""" - self.update_idletasks() - screen_width = self.winfo_screenwidth() - screen_height = self.winfo_screenheight() - window_width = self.winfo_width() - window_height = self.winfo_height() - x_pos = (screen_width - window_width) // 2 - y_pos = (screen_height - window_height) // 2 - self.geometry(f"{window_width}x{window_height}+{x_pos}+{y_pos}") - - self.checkbox_frame = FeedbackFrame(self) - self.checkbox_frame.grid(row=11, column=0, columnspan=2, pady=10, sticky="s") - - def main_windows_init(self): - """Инициализация всех полей""" - self.set_up() - self.token_label = customtkinter.CTkLabel(self, text="Token:") - self.token_label.grid(row=0, column=0, padx=10, pady=5, sticky="w") - self.token_entry = customtkinter.CTkEntry(self, width=self.width_entry_field, - placeholder_text="Введите токен вашего Telegram бота") - self.token_entry.grid(row=0, column=1, pady=5, sticky='w') - self.token_entry.insert(0, self.tg_token_env) - - self.chat_id_label = customtkinter.CTkLabel(self, text="Chat ID:") - self.chat_id_label.grid(row=1, column=0, padx=10, pady=5, sticky="w") - self.chat_id_entry = customtkinter.CTkEntry(self, width=self.width_entry_field, - placeholder_text="Введите ID чата вашего диалога в Telegram") - self.chat_id_entry.grid(row=1, column=1, pady=5, sticky='w') - self.chat_id_entry.insert(0, self.chat_id_env) - - self.key_label = customtkinter.CTkLabel(self, text="Ключевые слова:") - self.key_label.grid(row=2, column=0, padx=10, pady=5, sticky="w") - self.key_entry = customtkinter.CTkEntry(self, width=self.width_entry_field, - placeholder_text="Через запятую(регистр не важен)") - self.key_entry.grid(row=2, column=1, pady=5, sticky='w') - self.key_entry.insert(0, self.keys_env) - - self.ads_label = customtkinter.CTkLabel(self, text="Количество страниц:") - self.ads_label.grid(row=3, column=0, padx=10, pady=5, sticky="w") - self.ads_entry = customtkinter.CTkEntry(self, width=self.width_entry_field, - placeholder_text="Сколько страниц проверять каждый раз") - self.ads_entry.grid(row=3, column=1, pady=5, sticky='w') - self.ads_entry.insert(0, self.num_ads_env) - - self.freq_label = customtkinter.CTkLabel(self, text="Пауза в минутах:") - self.freq_label.grid(row=4, column=0, padx=10, pady=5, sticky="w") - self.freq_entry = customtkinter.CTkEntry(self, width=self.width_entry_field, - placeholder_text="Пауза между повторами. В минутах") - self.freq_entry.grid(row=4, column=1, pady=5, sticky='w') - self.freq_entry.insert(0, self.freq_env) - - self.url_label = customtkinter.CTkLabel(self, text="Url:") - self.url_label.grid(row=5, column=0, padx=10, pady=5, sticky="w") - self.url_entry = customtkinter.CTkEntry(self, width=self.width_entry_field, - placeholder_text="Адрес с которого нужно начинать") - self.url_entry.grid(row=5, column=1, pady=5, sticky='w') - self.url_entry.insert(0, self.start_url_env) - - self.clear_button = customtkinter.CTkButton(self, text="X", width=1, - command=lambda: self.url_entry.delete(0, 1000)) - self.clear_button.grid(row=5, column=3) - - self.min_price_label = customtkinter.CTkLabel(self, text="Минимальная цена:") - self.min_price_label.grid(row=6, column=0, padx=10, pady=5, sticky="w") - self.min_price_entry = customtkinter.CTkEntry(self, width=self.width_entry_field, - placeholder_text="Цена больше либо равна введенному значению") - self.min_price_entry.grid(row=6, column=1, pady=5, sticky='w') - self.min_price_entry.insert(0, str(self.min_price_env)) - - self.max_price_label = customtkinter.CTkLabel(self, text="Максимальная цена:") - self.max_price_label.grid(row=7, column=0, padx=10, pady=5, sticky="w") - self.max_price_entry = customtkinter.CTkEntry(self, width=self.width_entry_field, - placeholder_text="Цена меньше либо равна введенному значению") - self.max_price_entry.grid(row=7, column=1, pady=5, sticky='w') - self.max_price_entry.insert(0, str(self.max_price_env)) - - """гео""" - self.geo_label = customtkinter.CTkLabel(self, text="Ограничение по городу:") - self.geo_label.grid(row=8, column=0, padx=10, pady=5, sticky="w") - self.geo_entry = customtkinter.CTkEntry(self, width=self.width_entry_field, - placeholder_text="Введите текст, который должен быть в" - " адресе обязательно") - self.geo_entry.grid(row=8, column=1, pady=5, sticky='w') - self.geo_entry.insert(0, str(self.geo_env)) - - self.test_button = customtkinter.CTkButton(self, text="Получить тестовое уведомление", - command=self.telegram_log_test) - self.test_button.grid(row=9, column=1, pady=5, padx=(0, 6), sticky="ew") - - """debug mode""" - self.check_var = customtkinter.StringVar(value="off") - self.checkbox = customtkinter.CTkCheckBox(self, text="режим отладки", - variable=self.check_var, onvalue="on", offvalue="off") - self.checkbox.grid(row=11, column=0) - - """Кнопка старт. Старт работы""" - self.start_button = customtkinter.CTkButton(self, - text="Старт", - command=lambda: self.is_run or - threading.Thread( - target=self.start_scraping).start()) - self.start_button.grid(row=9, column=0, padx=5, pady=5, sticky="ew") - - """Размещаем кнопку Стоп""" - self.stop_button = customtkinter.CTkButton(self, text="Стоп", command=self.stop_scraping) - self.stop_button.grid(row=9, column=0, padx=5, pady=5, sticky="ew") - # прячем кнопку стоп - self.stop_button.grid_forget() - - def switch_action(self): - """Смена видимости кнопок старт и стоп""" - if self.start_button._last_geometry_manager_call is None: - self.start_button.grid(row=9, column=0, padx=5, pady=5, sticky="ew") - self.stop_button.grid_forget() - else: - self.stop_button.grid(row=9, column=0, padx=5, pady=5, sticky="ew") - self.start_button.grid_forget() - - def telegram_log_test(self): - """Тестирование отправки сообщения в telegram""" - # if not self.tg_logger_init: - self.logger_tg() - token = self.token_entry.get() - chat_id = self.chat_id_entry.get() - if all([token, chat_id]): - logger.success('test') - - logger.info('Если сообщение пришло к Вам в telegram - значит всё настроено правильно. Если нет - ' - 'результат парсинга всегда можно посмотреть в папке result или ниже') - return None - logger.info("Должны быть заполнены поля ТОКЕН TELEGRAM и CHAT ID TELEGRAM") - - def start_scraping(self): - """Кнопка старт. Запуск""" - self.logger_tg() - - """Если URL все-таки не заполнен""" - url = self.url_entry.get() - if not url: - logger.info("Внимание! URL - обязательный параметр. Пример ссылки:") - logger.info("https://www.avito.ru/moskva/remont_i_stroitelstvo/sadovaya_tehnika-ASgBAgICAURYnAI") - return - """Прячем кнопку старт""" - self.is_run = True - self.switch_action() - self.update() - logger.info("Начинаем поиск") - - """Сохраняем конфиг""" - self.save_config() - - """Основной цикл""" - while self.is_run: - self.run_parse() - if not self.is_run: - break - logger.info("Проверка завершена") - logger.info(f"Пауза {self.frequency} минут") - for _ in range(int(float(self.frequency) * 60)): - time.sleep(1) - if not self.is_run: - break - - """Прячем кнопку Стоп и показываем старт""" - logger.info("Успешно остановлено") - self.switch_action() - self.stop_button.configure(text='Стоп', state='normal') - self.update() - - def stop_scraping(self): - """Кнопка стоп. Остановка работы""" - - logger.info("Идет остановка. Пожалуйста, подождите") - self.is_run = False - self.stop_button.configure(text='Останавливаюсь', state='disabled') - self.update() - - def set_up(self): - """Работа с настройками""" - - self.config = configparser.ConfigParser() # создаём объекта парсера - self.config.read("settings.ini", encoding='utf-8') # читаем конфиг - try: - """Багфикс проблем с экранированием""" - self.start_url_env = self.config["Avito"]["URL"] - except Exception: - with open('settings.ini') as file: - line_url = file.readlines()[1] - regex = r"http.+" - self.start_url_env = re.search(regex, line_url)[0] - self.chat_id_env = self.config["Avito"]["CHAT_ID"] - self.tg_token_env = self.config["Avito"]["TG_TOKEN"] - self.num_ads_env = self.config["Avito"]["NUM_ADS"] - self.freq_env = self.config["Avito"]["FREQ"] - self.keys_env = self.config["Avito"]["KEYS"] - self.max_price_env = self.config["Avito"].get("MAX_PRICE", "0") - self.min_price_env = self.config["Avito"].get("MIN_PRICE", "0") - self.geo_env = self.config["Avito"].get("GEO", "") - - def save_config(self): - """Сохраняет конфиг""" - self.config["Avito"]["TG_TOKEN"] = self.token_entry.get() - self.config["Avito"]["CHAT_ID"] = self.chat_id_entry.get() - self.config["Avito"]["URL"] = str(self.url_entry.get()).replace('%', '%%') # bugfix - self.config["Avito"]["NUM_ADS"] = self.ads_entry.get() - self.config["Avito"]["FREQ"] = self.freq_entry.get() - self.config["Avito"]["KEYS"] = self.key_entry.get() - self.config["Avito"]["MAX_PRICE"] = self.max_price_entry.get() - self.config["Avito"]["MIN_PRICE"] = self.min_price_entry.get() - self.config["Avito"]["GEO"] = self.geo_entry.get() - with open('settings.ini', 'w', encoding='utf-8') as configfile: - self.config.write(configfile) - - def logger_tg(self): - """Логирование в telegram""" - token = self.token_entry.get() - chat_id = self.chat_id_entry.get() - if self.tg_logger_init: - return - if token and chat_id: - params = { - 'token': token, - 'chat_id': chat_id - } - tg_handler = NotificationHandler("telegram", defaults=params) - - """Все логи уровня SUCCESS и выше отсылаются в телегу""" - logger.add(tg_handler, level="SUCCESS", format="{message}") - self.tg_logger_init = True - return None - logger.info("Данные для отправки в telegram не заполнены. Результат будет сохранен в файл и выведен здесь") - - def logger_widget_init(self): - """Инициализация логирования в widget""" - self.log_widget = customtkinter.CTkTextbox(self, wrap="word", width=650, height=300, text_color="#00ff26") - self.log_widget.grid(row=10, padx=5, pady=(10, 0), column=0, columnspan=2) - logger.add(self.logger_text_widget, format="{time:HH:mm:ss} - {message}") - logger.info("Запуск AvitoParser") - logger.info("Чтобы начать работу, проверьте, чтобы поле URL было заполненными, " - "остальное на Ваше усмотрение. Нужна помощь - нажмите на ссылку внизу окна.") - logger.info("Удачного поиска !!!") - - def logger_text_widget(self, message): - """Логирование в log_widget (окно приложения)""" - self.log_widget.insert(tkinter.END, message) - self.log_widget.see(tkinter.END) - - def run_parse(self): - """Запуск парсера""" - url = self.url_entry.get() - num_ads = self.ads_entry.get() or 5 - keys = self.key_entry.get() - self.frequency = self.freq_entry.get() or 5 - max_price = self.max_price_entry.get() or 1000000 - min_price = self.min_price_entry.get() or 0 - geo = self.geo_entry.get() or None - debug_status = 0 - if self.checkbox.get() == 'on': - debug_status = 1 - - AvitoParse( - url=url, - count=int(num_ads), - keysword_list=keys.split(","), - max_price=int(max_price), - min_price=int(min_price), - geo=geo, - debug_mode=debug_status - ).parse() - - -class FeedbackFrame(customtkinter.CTkFrame): - def __init__(self, master): - super().__init__(master) - - link_label = customtkinter.CTkLabel(self, text="Связаться с автором или сообщить о проблеме", - text_color="grey60", cursor="hand2") - link_label.grid(column=1, row=1, padx=10, pady=5, ) - link_label.bind("", lambda e: webbrowser.open_new("https://github.com/Duff89/parser_avito#%D" - "0%BF%D1%80%D0%BE%D0%B1%D0%BB%D0%B5%D0%BC%D1%8B")) - - link_label = customtkinter.CTkLabel(self, text="Поддержать развитие проекта", - text_color="grey60", cursor="hand2") - link_label.grid(column=1, row=2, padx=10) - link_label.bind("", lambda e: webbrowser.open_new("https://github.com/Duff89/parser_avito" - "#%D0%BF%D0%BE%D0%B4%D0%B4%D0%B5%D1%80%D0%B6%D0%B" - "A%D0%B0-%D1%80%D0%B0%D0%B7%D0%B2%D0%B8%D1%82%D0" - "%B8%D1%8F-%D0%BF%D1%80%D0%BE%D0%B5%D0%BA%" - "D1%82%D0%B0")) - - -if __name__ == '__main__': - Window().mainloop() diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..5e714f4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,44 @@ +# Pull base image +FROM python:3.10 + +# Set work directory +WORKDIR /app + +# Установка зависимостей для Selenium и Chrome +RUN apt update +RUN apt install -y \ + wget \ + xvfb \ + unzip \ + libxi6 \ + libgconf-2-4 \ + libnss3 + +# Установка Chrome +RUN wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb +RUN dpkg -i google-chrome-stable_current_amd64.deb; apt-get -fy install + +# # Скачивание и распаковка Chrome +# RUN wget -q https://storage.googleapis.com/chrome-for-testing-public/122.0.6261.69/linux64/chrome-linux64.zip \ +# && unzip chrome-linux64.zip -d /opt/ \ +# && ln -s /opt/chrome-linux/chrome /usr/bin/google-chrome \ +# && rm chrome-linux64.zip + +# # Установка ChromeDriver +# RUN wget https://storage.googleapis.com/chrome-for-testing-public/122.0.6261.69/linux64/chromedriver-linux64.zip +# RUN unzip chromedriver-linux64.zip -d /usr/bin/ +# # RUN mv chromedriver /usr/bin/chromedriver +# # RUN chmod +x /usr/bin/chromedriver + +RUN apt-get update && apt-get install -y chromium-driver +RUN chmod +x /usr/bin/chromedriver + +# Copy project +COPY . /app + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONUNBUFFERED 1 + +# Install dependencies +RUN pip install -r requirements.txt diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..d98f761 --- /dev/null +++ b/Makefile @@ -0,0 +1,26 @@ +dev: + docker compose -p parser_avito up --build + +run: + docker compose -f compose.prod.yml -p parser_avito up --build -d + +stop: + docker compose -f compose.prod.yml -p parser_avito stop + +check: + docker ps --filter name="^parser_avito" --format "table {{.ID}}\t{{.Names}}\t{{.Image}}\t{{.Status}}\t{{.Ports}}" + +connect: + docker exec -it `docker ps -a | grep parser_avito-script | cut -d ' ' -f 1` bash + +logs: + docker logs `docker ps -a | grep parser_avito-script | cut -d ' ' -f 1` + +set: + cp nginx.prod.conf /etc/nginx/sites-enabled/alt.conf + sudo chmod 0755 ~ + sudo chmod -R a+w ~/parser_avito/result/ + sudo chmod 0700 ~/.ssh + sudo chmod -R 0600 ~/.ssh/* + sudo systemctl restart nginx + sudo certbot --nginx diff --git a/compose.prod.yml b/compose.prod.yml new file mode 100644 index 0000000..4cd5d17 --- /dev/null +++ b/compose.prod.yml @@ -0,0 +1,40 @@ +version: "3.9" +services: + script: + image: parser_avito/script + build: . + restart: unless-stopped + volumes: + - /root/parser_avito/result:/app/result + command: bash -c "cd /app && python parser_cls.py" + +# jitsu: +# image: jitsucom/server:latest +# ports: +# - "8000:8001" +# depends_on: +# - db +# environment: +# DATABASE_URL: postgres://jitsu:asdr1234@db/jitsu?sslmode=disable +# CONFIGURATOR_SECRET: asdr1234 +# volumes: +# - ./jitsu.conf:/home/jitsu/config +# - /root/data/jitsu:/var/lib/jitsu/data + +# db: +# image: postgres:13 +# ports: +# - "5432:5432" +# environment: +# POSTGRES_USER: jitsu +# POSTGRES_PASSWORD: asdr1234 +# POSTGRES_DB: jitsu +# volumes: +# - ./postgresql.conf:/etc/postgresql/postgresql.conf +# - ./pg_hba.conf:/etc/postgresql/pg_hba.conf +# - /root/data/pg:/var/lib/postgresql/data + +# volumes: +# jitsu-config: +# jitsu-data: +# postgres-data: diff --git a/jitsu.conf b/jitsu.conf new file mode 100644 index 0000000..1cb88f9 --- /dev/null +++ b/jitsu.conf @@ -0,0 +1,37 @@ +server: + log_level: info # Adjust logging level as needed (debug, info, warning, error) + +database: + type: postgres + postgres: + host: db # Matches the service name in docker-compose + port: 5432 # Default PostgreSQL port + dbname: jitsu # Database name as specified in docker-compose environment + schema: public + user: jitsu # Matches the user specified in docker-compose environment + password: asdr1234 # Ensure this matches what you've set in docker-compose + sslmode: disable # Adjust according to your SSL setup + +# destinations: +# bigquery_example: # Destination name +# type: bigquery # Destination type (bigquery, postgres, clickhouse, etc.) +# mode: stream # Insert mode (stream or batch) +# data_layout: +# table_name_template: events_{{.EventName}} # Naming pattern for tables +# google: +# project_id: your_project_id # Your Google Cloud project ID +# json_key: your_json_key # Your JSON key for authentication + +# postgres_example: +# type: postgres +# mode: batch +# data_layout: +# table_name_template: events_{{.EventName}} +# postgres: +# host: localhost # Or another host where your PostgreSQL is +# port: 5432 +# dbname: your_dbname +# user: your_user +# password: your_password +# parameters: # Any additional connection parameters +# sslmode: disable diff --git a/lib/req.py b/lib/req.py new file mode 100644 index 0000000..ade8fe2 --- /dev/null +++ b/lib/req.py @@ -0,0 +1,40 @@ +import aiohttp + + +async def fetch( + url, + payload=None, + type_req="post", + type_data="json", + headers=None, +): + if payload is None: + payload = {} + + async with aiohttp.ClientSession() as session: + if type_req == "post": + req = session.post + elif type_req == "put": + req = session.put + elif type_req == "delete": + req = session.delete + elif type_req == "patch": + req = session.patch + elif type_req == "options": + req = session.options + else: + req = session.get + + async with req( + url, + headers=headers, + **{type_data: payload}, + ) as response: + code = response.status + + try: + data = await response.json() + except: # noqa: E722 + data = await response.text() + + return code, data diff --git a/nginx.prod.conf b/nginx.prod.conf new file mode 100644 index 0000000..a2f0390 --- /dev/null +++ b/nginx.prod.conf @@ -0,0 +1,11 @@ +server { + listen 80; # TODO: add http2 after certbot + server_name alt.chill.services; + + root /; + + location /data/parser/ { + alias /root/parser_avito/result/; + add_header Access-Control-Allow-Origin "*"; + } +} diff --git a/parser_cls.py b/parser_cls.py index cec5b69..a954aa3 100755 --- a/parser_cls.py +++ b/parser_cls.py @@ -1,3 +1,4 @@ +import asyncio import os import random import time @@ -8,23 +9,26 @@ from loguru import logger from locator import LocatorAvito +from lib.req import fetch + class AvitoParse: """ Парсинг товаров на avito.ru """ - def __init__(self, - url: str, - keysword_list: list, - count: int = 10, - tg_token: str = None, - max_price: int = 0, - min_price: int = 0, - geo: str = None, - debug_mode: int = 0 - - ): + def __init__( + self, + url: str, + keysword_list: list, + count: int = 10, + tg_token: str = None, + max_price: int = 0, + min_price: int = 0, + geo: str = None, + delay: int = 0, + debug_mode: int = 0, + ): self.url = url self.keys_word = keysword_list self.count = count @@ -34,28 +38,34 @@ def __init__(self, self.max_price = int(max_price) self.min_price = int(min_price) self.geo = geo + self.delay = delay self.debug_mode = debug_mode - def __get_url(self): + async def __get_url(self): self.driver.open(self.url) if "Доступ ограничен" in self.driver.get_title(): - time.sleep(10) + await asyncio.sleep(10) raise Exception("Перезапуск из-за блокировки IP") self.driver.open_new_window() # сразу открываем и вторую вкладку self.driver.switch_to_window(window=0) - def __paginator(self): + async def __paginator(self): """Кнопка далее""" - logger.info('Страница успешно загружена. Просматриваю объявления') - self.__create_file_csv() + + logger.info("Страница успешно загружена. Просматриваю объявления") + # self.__create_file_csv() + while self.count > 0: - self.__parse_page() - time.sleep(random.randint(5, 7)) + await self.__parse_page() + + await asyncio.sleep(random.randint(5, 7)) """Проверяем есть ли кнопка далее""" if self.driver.find_elements(LocatorAvito.NEXT_BTN[1], by="css selector"): - self.driver.find_element(LocatorAvito.NEXT_BTN[1], by="css selector").click() + self.driver.find_element( + LocatorAvito.NEXT_BTN[1], by="css selector" + ).click() self.count -= 1 logger.debug("Следующая страница") else: @@ -63,17 +73,18 @@ def __paginator(self): break @logger.catch - def __parse_page(self): + async def __parse_page(self): """Парсит открытую страницу""" """Ограничение количества просмотренных объявлений""" - if os.path.isfile('viewed.txt'): - with open('viewed.txt', 'r') as file: + + if os.path.isfile("viewed.txt"): + with open("viewed.txt", "r") as file: self.viewed_list = list(map(str.rstrip, file.readlines())) if len(self.viewed_list) > 5000: self.viewed_list = self.viewed_list[-900:] else: - with open('viewed.txt', 'w') as file: + with open("viewed.txt", "w") as file: self.viewed_list = [] titles = self.driver.find_elements(LocatorAvito.TITLES[1], by="css selector") @@ -84,55 +95,72 @@ def __parse_page(self): if title.find_elements(*LocatorAvito.DESCRIPTIONS): description = title.find_element(*LocatorAvito.DESCRIPTIONS).text else: - description = '' + description = "" url = title.find_element(*LocatorAvito.URL).get_attribute("href") price = title.find_element(*LocatorAvito.PRICE).get_attribute("content") ads_id = title.get_attribute("data-item-id") - items.append({ - 'name': name, - 'description': description, - 'url': url, - 'price': price, - 'ads_id': ads_id - }) + items.append( + { + "name": name, + "description": description, + "url": url, + "price": price, + "ads_id": ads_id, + } + ) for data in items: - ads_id = data.pop('ads_id') - name = data.get('name') - description = data.get('description') - url = data.get('url') - price = data.get('price') - + ads_id = data.pop("ads_id") + name = data.get("name") + description = data.get("description") + url = data.get("url") + price = data.get("price") + if self.is_viewed(ads_id): continue self.viewed_list.append(ads_id) """Определяем нужно ли нам учитывать ключевые слова""" - if self.keys_word != ['']: - if any([item.lower() in (description.lower() + name.lower()) for item in self.keys_word]) \ - and \ - self.min_price <= int( - price) <= self.max_price: + if self.keys_word != [""]: + if ( + any( + [ + item.lower() in (description.lower() + name.lower()) + for item in self.keys_word + ] + ) + and self.min_price <= int(price) <= self.max_price + ): self.data.append(self.__parse_full_page(url, data)) """Проверка адреса если нужно""" - if self.geo and self.geo.lower() not in self.data[-1].get("geo", self.geo.lower()): + if self.geo and self.geo.lower() not in self.data[-1].get( + "geo", self.geo.lower() + ): continue """Отправляем в телеграм""" self.__pretty_log(data=data) - self.__save_data(data=data) + await self.__save_data(data=data) + elif self.min_price <= int(price) <= self.max_price: self.data.append(self.__parse_full_page(url, data)) """Проверка адреса если нужно""" - if self.geo and self.geo.lower() not in self.data[-1].get("geo", self.geo.lower()): + if self.geo and self.geo.lower() not in self.data[-1].get( + "geo", self.geo.lower() + ): continue """Отправляем в телеграм""" self.__pretty_log(data=data) - self.__save_data(data=data) + await self.__save_data(data=data) + else: continue + if self.delay: + logger.info(f"Пауза {self.delay} сек") + await asyncio.sleep(self.delay) + def __pretty_log(self, data): """Красивый вывод""" logger.success( @@ -142,7 +170,8 @@ def __pretty_log(self, data): f'Просмотров: {data.get("views", "-")}\n' f'Дата публикации: {data.get("date_public", "-")}\n' f'Продавец: {data.get("seller_name", "-")}\n' - f'Ссылка: {data.get("url", "-")}\n') + f'Ссылка: {data.get("url", "-")}\n' + ) def __parse_full_page(self, url: str, data: dict) -> dict: """Парсит для доп. информации открытое объявление на отдельной вкладке""" @@ -151,36 +180,48 @@ def __parse_full_page(self, url: str, data: dict) -> dict: """Если не дождались загрузки""" try: - self.driver.wait_for_element(LocatorAvito.TOTAL_VIEWS[1], by="css selector", timeout=10) + self.driver.wait_for_element( + LocatorAvito.TOTAL_VIEWS[1], by="css selector", timeout=10 + ) except Exception: """Проверка на бан по ip""" if "Доступ ограничен" in self.driver.get_title(): - logger.success("Доступ ограничен: проблема с IP. \nПоследние объявления будут без подробностей") + logger.success( + "Доступ ограничен: проблема с IP. \nПоследние объявления будут без подробностей" + ) self.driver.switch_to_window(window=0) logger.debug("Не дождался загрузки страницы") return data """Гео""" - if self.geo and self.driver.find_elements(LocatorAvito.GEO[1], by="css selector"): + if self.geo and self.driver.find_elements( + LocatorAvito.GEO[1], by="css selector" + ): geo = self.driver.find_element(LocatorAvito.GEO[1], by="css selector").text data["geo"] = geo.lower() """Количество просмотров""" if self.driver.find_elements(LocatorAvito.TOTAL_VIEWS[1], by="css selector"): - total_views = self.driver.find_element(LocatorAvito.TOTAL_VIEWS[1]).text.split()[0] + total_views = self.driver.find_element( + LocatorAvito.TOTAL_VIEWS[1] + ).text.split()[0] data["views"] = total_views """Дата публикации""" if self.driver.find_elements(LocatorAvito.DATE_PUBLIC[1], by="css selector"): - date_public = self.driver.find_element(LocatorAvito.DATE_PUBLIC[1], by="css selector").text + date_public = self.driver.find_element( + LocatorAvito.DATE_PUBLIC[1], by="css selector" + ).text if "· " in date_public: - date_public = date_public.replace("· ", '') + date_public = date_public.replace("· ", "") data["date_public"] = date_public """Имя продавца""" if self.driver.find_elements(LocatorAvito.SELLER_NAME[1], by="css selector"): - seller_name = self.driver.find_element(LocatorAvito.SELLER_NAME[1], by="css selector").text + seller_name = self.driver.find_element( + LocatorAvito.SELLER_NAME[1], by="css selector" + ).text data["seller_name"] = seller_name """Возвращается на вкладку №1""" @@ -193,23 +234,36 @@ def is_viewed(self, ads_id: str) -> bool: return True return False - def __save_data(self, data: dict): - """Сохраняет результат в файл keyword*.csv""" - with open(f"result/{self.title_file}.csv", mode="a", newline='', encoding='utf-8', errors='ignore') as file: - writer = csv.writer(file) - writer.writerow([ - data.get("name", '-'), - data.get("price", '-'), - data.get("url", '-'), - data.get("description", '-'), - data.get("views", '-'), - data.get("date_public", '-'), - data.get("seller_name", 'no'), - data.get("geo", '-') - ]) + async def __save_data(self, data: dict): + logger.info("JITSU") + + link = "http://104.248.143.154:8080/api/s/s2s/track" + code, res = await fetch( + link, + data, + headers={ + "X-Write-Key": "BaUKuuGZToJLSFmBtpwLK4KObUyFNmX1:LdWfwlhLbcvr8dgUNsfNDDT1NkSaIsKE", + }, + ) + + print("REQ", link, data) + print("RES", code, res) + + # with open(f"result/{self.title_file}.csv", mode="a", newline='', encoding='utf-8', errors='ignore') as file: + # writer = csv.writer(file) + # writer.writerow([ + # data.get("name", '-'), + # data.get("price", '-'), + # data.get("url", '-'), + # data.get("description", '-'), + # data.get("views", '-'), + # data.get("date_public", '-'), + # data.get("seller_name", 'no'), + # data.get("geo", '-') + # ]) """сохраняет просмотренные объявления""" - with open('viewed.txt', 'w') as file: + with open("viewed.txt", "w") as file: for item in set(self.viewed_list): file.write("%s\n" % item) @@ -218,7 +272,9 @@ def __is_csv_empty(self) -> bool: """Пустой csv или нет""" os.makedirs(os.path.dirname("result/"), exist_ok=True) try: - with open(f"result/{self.title_file}.csv", 'r', encoding='utf-8', errors='ignore') as file: + with open( + f"result/{self.title_file}.csv", "r", encoding="utf-8", errors="ignore" + ) as file: reader = csv.reader(file) try: # Попытка чтения первой строки @@ -235,46 +291,50 @@ def __create_file_csv(self): """Создает файл и прописывает названия если нужно""" if self.__is_csv_empty: - with open(f"result/{self.title_file}.csv", 'a', encoding='utf-8', errors='ignore') as file: + with open( + f"result/{self.title_file}.csv", "a", encoding="utf-8", errors="ignore" + ) as file: writer = csv.writer(file) - writer.writerow([ - "Название", - "Цена", - "Ссылка", - "Описание", - "Просмотров", - "Дата публикации", - "Продавец", - "Адрес" - ]) + writer.writerow( + [ + "Название", + "Цена", + "Ссылка", + "Описание", + "Просмотров", + "Дата публикации", + "Продавец", + "Адрес", + ] + ) def __get_file_title(self) -> str: """Определяет название файла""" - if self.keys_word != ['']: + if self.keys_word != [""]: title_file = "-".join(list(map(str.lower, self.keys_word))) else: - title_file = 'all' + title_file = "all" return title_file - def parse(self): + async def parse(self): """Метод для вызова""" - with SB(uc=True, - headed=True if self.debug_mode else False, - headless=True if not self.debug_mode else False, - page_load_strategy="eager", - block_images=True, - #skip_js_waits=True, - ) as self.driver: + with SB( + uc=True, + headed=True if self.debug_mode else False, + headless=True if not self.debug_mode else False, + page_load_strategy="eager", + block_images=True, + # skip_js_waits=True, + ) as self.driver: try: - self.__get_url() - self.__paginator() + await self.__get_url() + await self.__paginator() except Exception as error: logger.error(f"Ошибка: {error}") -if __name__ == '__main__': - """Здесь заменить данные на свои""" +async def main(): import configparser config = configparser.ConfigParser() # создаём объекта парсера @@ -284,7 +344,7 @@ def parse(self): """Багфикс проблем с экранированием""" url = config["Avito"]["URL"] # начальный url except Exception: - with open('settings.ini') as file: + with open("settings.ini") as file: line_url = file.readlines()[1] regex = r"http.+" url = re.search(regex, line_url)[0] @@ -292,16 +352,14 @@ def parse(self): token = config["Avito"]["TG_TOKEN"] num_ads = config["Avito"]["NUM_ADS"] freq = config["Avito"]["FREQ"] + delay = config["Avito"]["DELAY"] or "0" keys = config["Avito"]["KEYS"] max_price = config["Avito"].get("MAX_PRICE", "0") or "0" min_price = config["Avito"].get("MIN_PRICE", "0") or "0" geo = config["Avito"].get("GEO", "") or "" if token and chat_id: - params = { - 'token': token, - 'chat_id': chat_id - } + params = {"token": token, "chat_id": chat_id} tg_handler = NotificationHandler("telegram", defaults=params) """Все логи уровня SUCCESS и выше отсылаются в телегу""" @@ -309,19 +367,26 @@ def parse(self): while True: try: - AvitoParse( + await AvitoParse( url=url, count=int(num_ads), keysword_list=keys.split(","), max_price=int(max_price), min_price=int(min_price), - geo=geo + geo=geo, + delay=int(delay), ).parse() - logger.info("Пауза") - time.sleep(int(freq) * 60) + logger.info(f"Пауза {int(freq)} мин") + await asyncio.sleep(int(freq) * 60) except Exception as error: logger.error(error) - logger.error('Произошла ошибка, но работа будет продолжена через 30 сек. ' - 'Если ошибка повторится несколько раз - перезапустите скрипт.' - 'Если и это не поможет - обратитесь к разработчику по ссылке ниже') - time.sleep(30) + logger.error( + "Произошла ошибка, но работа будет продолжена через 30 сек. " + "Если ошибка повторится несколько раз - перезапустите скрипт." + "Если и это не поможет - обратитесь к разработчику по ссылке ниже" + ) + await asyncio.sleep(30) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/pg_hba.conf b/pg_hba.conf new file mode 100644 index 0000000..f85358f --- /dev/null +++ b/pg_hba.conf @@ -0,0 +1,8 @@ +# TYPE DATABASE USER ADDRESS METHOD + +# "local" is for Unix domain socket connections only +local all all peer +# IPv4 local connections: +host all all 0.0.0.0/0 md5 +# IPv6 local connections: +host all all ::/0 md5 diff --git a/postgresql.conf b/postgresql.conf new file mode 100644 index 0000000..128ef1a --- /dev/null +++ b/postgresql.conf @@ -0,0 +1 @@ +listen_addresses = '*' diff --git a/requirements.txt b/requirements.txt index eaa6ea8..58df29e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,4 @@ loguru==0.7.0 notifiers==1.3.3 seleniumbase==4.22.5 undetected-chromedriver==3.4.6 - +aiohttp diff --git a/settings.ini b/settings.ini index a483011..2829c57 100644 --- a/settings.ini +++ b/settings.ini @@ -1,11 +1,11 @@ [Avito] -url = https://www.avito.ru/moskva/predlozheniya_uslug/transport_perevozki/arenda_spetstekhniki-ASgBAgICAkSYC8SfAZoL3J8B?bt=1&q=%%D1%%83%%D1%%81%%D0%%BB%%D1%%83%%D0%%B3%%D0%%B8 -chat_id = -tg_token = -num_ads = 3 +url = https://www.avito.ru/sankt-peterburg/kvartiry/sdam/posutochno/-ASgBAgICAkSSA8gQ8AeSUg?context=H4sIAAAAAAAA_0q0MrSqLraysFJKK8rPDUhMT1WyLrYyNLNSKk5NLErOcMsvyg3PTElPLVGyrgUEAAD__xf8iH4tAAAA&f=ASgBAgECAkSSA8gQ8AeSUgFFqC0feyJmcm9tIjoyMDI0MDMwMSwidG8iOjIwMjQwMzEwfQ +chat_id = 136563129 +tg_token = +num_ads = 1 freq = 1 -keys = +delay = 60 +keys = max_price = 100000 min_price = 0 -geo = - +geo =