Skip to content

Commit

Permalink
created config functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Midnight95 committed Oct 3, 2023
1 parent 9bc74af commit 432b998
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 58 deletions.
64 changes: 22 additions & 42 deletions page_analyzer/app.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import os
import requests
from datetime import date
from urllib.parse import urlparse

from dotenv import load_dotenv
from bs4 import BeautifulSoup
from flask import (
Flask,
render_template,
Expand All @@ -16,14 +14,18 @@
from validators.url import url

from page_analyzer.db import Database
from page_analyzer.parsers import get_last_status_codes

from page_analyzer.config import (
get_url_config,
get_urls_checks,
get_last_status_codes
)

load_dotenv()
app = Flask(__name__)
app.secret_key = os.getenv('SECRET_KEY')
db_url = os.getenv('DATABASE_URL')


# Validator
def validate(addr: str):
if not url:
return 'URL обязателен'
Expand All @@ -38,9 +40,10 @@ def normalize(addr: str):
return f'{normalized_addr.scheme}://{normalized_addr.netloc}'


# App block
app = Flask(__name__)
app.secret_key = os.getenv('SECRET_KEY')
def render_url(id, table, col):
with Database(db_url) as db:
site = db.render(table=table, item=id, col=col)
return site


@app.route('/')
Expand All @@ -52,8 +55,8 @@ def index():
def get_urls():
with Database(db_url) as db:
sites = db.render(table='urls')
checks = get_last_status_codes(db.render(table='urls_checks'))
return render_template('urls.html', sites=sites, checks=checks)
latest_checks = get_last_status_codes(db.render(table='urls_checks'))
return render_template('urls.html', sites=sites, checks=latest_checks)


@app.post('/urls')
Expand All @@ -76,20 +79,15 @@ def post_urls():

else:
flash('Страница успешно добавлена', 'success')
urls = get_url_config(data)
id = db.insert(
table='urls',
cols=('name', 'created_at'),
data=(data, date.today())
cols=urls.keys(),
data=urls.values()
)
return redirect(url_for('url_info', id=id))


def render_url(id, table, col):
with Database(db_url) as db:
site = db.render(table=table, item=id, col=col)
return site


@app.get('/urls/<id>')
def url_info(id):
site = render_url(id=id, table='urls', col='id')[0]
Expand All @@ -99,40 +97,22 @@ def url_info(id):

@app.post('/urls/<id>/checks')
def check_url(id):
addr = render_url(id=id, table='urls', col='id')[0]['name']
try:
page = requests.get(
render_url(id=id, table='urls', col='id')[0]['name'],
timeout=5
)
page = requests.get(addr, timeout=5)
page.raise_for_status()
except requests.RequestException:
flash('Произошла ошибка при проверке', 'error')
return redirect(url_for('url_info', id=id))

soup = BeautifulSoup(page.text, 'html.parser')
crutch = soup.find('meta', attrs={'name': 'description'})
if crutch:
crutch = crutch['content']
checks = get_urls_checks(page, id)

with Database(db_url) as db:
db.insert(
table='urls_checks',
cols=(
'url_id',
'status_code',
'h1',
'title',
'description',
'created_at'
),
data=(
id,
page.status_code,
soup.h1.string,
soup.title.string,
crutch,
date.today(),
)
cols=checks.keys(),
data=checks.values()
)

flash('Страница успешно проверена', 'success')
return redirect(url_for('url_info', id=id))
45 changes: 45 additions & 0 deletions page_analyzer/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from datetime import date
from bs4 import BeautifulSoup


def get_url_config(name) -> dict:
return {
'name': name,
'created_at': date.today()
}


def get_urls_checks(page, id) -> dict:
soup = BeautifulSoup(page.text, 'html.parser')

h1 = soup.h1.string if soup.h1 else None
title = soup.title.string if soup.title else None
description = soup.find('meta', attrs={'name': 'description'})
if description:
description = description['content']

return {
'url_id': id,
'status_code': page.status_code,
'h1': h1,
'title': title,
'description': description,
'created_at': date.today()
}


def get_last_status_codes(checks):
result = {}

for item in checks:
id = item['id']
url_id = item['url_id']

if url_id not in result or result.get(url_id, {}).get('id', -1) < id:
result[url_id] = {
'id': id,
'status_code': item['status_code'],
'created_at': item['created_at']
}

return result
16 changes: 0 additions & 16 deletions page_analyzer/parsers.py

This file was deleted.

0 comments on commit 432b998

Please sign in to comment.