Skip to content

Commit

Permalink
feat(dev_utils): remove sec-api.io integration from Debug Dashboard
Browse files Browse the repository at this point in the history
  • Loading branch information
Elijas committed Oct 12, 2023
1 parent 1a8ef9f commit 2dbdc33
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 201 deletions.
21 changes: 5 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,30 +68,19 @@ pip install sec-parser

# Usage

Before using the parser, you need to create an account at https://sec-api.io/. The first 100 API calls are free. After creating an account, you will receive an API key. This key should be added to your environment variables. You can do this using the following *bash* command:
To retrieve the most recent 10-Q SEC EDGAR document in HTML format for Apple, follow these steps:

```bash
# Replace "your key here" with your actual key.
# An example key might look like "aef7f2f22c8b3456de55"
export SECAPIO_API_KEY="your key here"
```
```

> **Note**
sec-api.io is a third-party service that is not affiliated with `sec-parser`. We are planning to move away from this service in the near future and download the documents directly from the SEC EDGAR website.

> **Note**
The parser utilizes caching, so multiple calls to retrieve the same data will not consume your API calls limit.
report into a collection of semantic elements extracted from the document.

Once you have set up your API key, you can start using the parser in your *Python* code. Start by importing the `sec_parser` module as shown below:
The following code snippet demonstrates how to do this:

```python
import sec_parser as sp

# Fetch and parse the latest Apple 10-Q report
tree = sp.parse_latest("10-Q", ticker="AAPL")

# Display the tree structure of the parsed document
print(tree.render())
elements = sp.SecParser().parse(html)
```
Here is an example of the output you can expect:
```
Expand Down
77 changes: 37 additions & 40 deletions dev_utils/debug_dashboard/app.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from collections import Counter
from dataclasses import dataclass
from itertools import zip_longest
Expand All @@ -14,8 +13,7 @@
import sec_parser.semantic_elements as se
from dev_utils.debug_dashboard.general_utils import interleave_lists
from dev_utils.debug_dashboard.sec_data_retrieval import (
download_html,
get_metadata,
get_latest_10q_html,
get_semantic_elements,
get_semantic_tree,
)
Expand Down Expand Up @@ -79,7 +77,7 @@ def streamlit_app(
do_element_render_html = True
selected_step = 0
do_interleave = False
use_tree_view = True
use_tree_view = False
show_text_length = False

if not HIDE_UI_ELEMENTS:
Expand All @@ -89,14 +87,15 @@ def streamlit_app(
with PassthroughContext(): # replace with st.expander("") if needed
FIND_BY_TICKER = "Ticker symbols"
ENTER_URL_DIRECTLY = "URLs"
data_source_option = sac.segmented(
items=[
sac.SegmentedItem(label=FIND_BY_TICKER),
sac.SegmentedItem(label=ENTER_URL_DIRECTLY),
],
size="xs",
grow=True,
)
# data_source_option = sac.segmented(
# items=[
# sac.SegmentedItem(label=FIND_BY_TICKER),
# sac.SegmentedItem(label=ENTER_URL_DIRECTLY),
# ],
# size="xs",
# grow=True,
# )
data_source_option = FIND_BY_TICKER
selected_ticker = data_source_option == FIND_BY_TICKER
selected_url = data_source_option == ENTER_URL_DIRECTLY
if selected_ticker:
Expand Down Expand Up @@ -140,32 +139,26 @@ def streamlit_app(
if not input_urls:
st.info("Please enter at least one URL.")
st.stop()
section_1_2, all_sections = st_radio(
"Select Report Sections",
["Only MD&A", "All Report Sections"],
horizontal=True,
help="MD&A stands for Management Discussion and Analysis. It's a section of a company's annual report in which management discusses numerous aspects of the company, such as market dynamics, operating results, risk factors, and more.",
)
if section_1_2:
sections = ["part1item2"]
elif all_sections:
sections = None
section_1_2 = False
all_sections = True
# section_1_2, all_sections = st_radio(
# "Select Report Sections",
# ["Only MD&A", "All Report Sections"],
# horizontal=True,
# help="MD&A stands for Management Discussion and Analysis. It's a section of a company's annual report in which management discusses numerous aspects of the company, such as market dynamics, operating results, risk factors, and more.",
# )
# if section_1_2:
# sections = ["part1item2"]
# elif all_sections:
# sections = None

assert tickers or input_urls
for ticker in tickers:
metadata = get_metadata(
doc="10-Q",
latest_from_ticker=ticker,
)
metadata = None
metadatas.append(metadata)
url = metadata["linkToFilingDetails"]
html = download_html(
doc="10-Q",
url=url,
sections=sections,
ticker=ticker,
)
htmls_urls.append(url)
# url = metadata["linkToFilingDetails"]
html = get_latest_10q_html(ticker=ticker)
htmls_urls.append(ticker)
htmls.append(html)
for url in input_urls:
html = download_html(
Expand All @@ -174,7 +167,7 @@ def streamlit_app(
sections=sections,
ticker=None,
)
metadata = get_metadata(doc="10-Q", url=url)
metadata = None
metadatas.append(metadata)
htmls_urls.append(url)
htmls.append(html)
Expand Down Expand Up @@ -295,11 +288,10 @@ def format_cls(cls):
value=True,
)
if selected_step == 3:
use_expanders = st.checkbox(
"Merged view",
value=not use_tree_view,
use_tree_view = st.checkbox(
"Tree view",
value=use_tree_view,
)
use_tree_view = not use_expanders
with right:
if selected_step == 2:
element_column_count = st.number_input(
Expand Down Expand Up @@ -345,7 +337,10 @@ def format_cls(cls):

def get_label(metadata, url):
if not metadata:
return url.split("/")[-1]
if url and "/" in url:
return url.split("/")[-1]
else:
return url
company_name = normalize_company_name(metadata["companyName"])
form_type = metadata["formType"]
filed_at = parse(metadata["filedAt"]).astimezone(tzutc()).strftime("%b %d, %Y")
Expand All @@ -355,6 +350,8 @@ def get_label(metadata, url):
return f"**{company_name}** | {form_type} filed on {filed_at} for the period ended {period_of_report}"

def get_buttons(metadata, url, *, align="end"):
if "/" not in url:
return
if metadata:
url_buttons = [
{
Expand Down
51 changes: 14 additions & 37 deletions dev_utils/debug_dashboard/sec_data_retrieval.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,30 @@
from typing import Optional
from pathlib import Path

import streamlit as st
from sec_downloader import DownloadStorage
from sec_edgar_downloader import Downloader

import sec_parser as sp
from dev_utils.debug_dashboard.cache_utils import cache_to_file


@st.cache_data(
experimental_allow_widgets=True,
show_spinner="Retrieving SEC EDGAR document...",
)
@cache_to_file(
cache_by_keys={"latest_from_ticker", "doc", "url", "sections"},
cache_dir=".cache/metadata",
)
def get_metadata(
_secapi_api_key: str, # prefix _ prevents hashing in st.cache_data
*,
doc: sp.DocumentType | str,
url: str | None = None,
latest_from_ticker: str | None = None,
) -> str:
from sec_edgar_downloader import Downloader
retriever = sp.SecapioDataRetriever(api_key=_secapi_api_key)
return retriever.retrieve_report_metadata(
doc,
url=url,
latest_from_ticker=latest_from_ticker,
)
EDGAR_CLIENT_NAME = "Alphanome.AI"
EDGAR_CLIENT_EMAIL = "[email protected]"


@st.cache_data(
experimental_allow_widgets=True,
show_spinner="Retrieving SEC EDGAR document...",
)
@cache_to_file(
cache_by_keys={"url", "ticker", "doc", "sections"},
cache_dir=".cache/html",
)
def download_html(
_secapi_api_key: str, # prefix _ prevents hashing in st.cache_data
def get_latest_10q_html(
*,
ticker: str, # added just to make the cache write ticker as part of the filename
doc: sp.DocumentType | str,
url: str,
sections: Optional[list[sp.SectionType | str]] = None,
ticker: str,
) -> str:
retriever = sp.SecapioDataRetriever(api_key=_secapi_api_key)
return retriever.get_report_html(doc, url, sections=sections)
ticker = ticker.upper().strip()
assert ticker, "Ticker must not be empty"
storage = DownloadStorage(filter_pattern="**/*.htm*")
with storage as path:
dl = Downloader(EDGAR_CLIENT_NAME, EDGAR_CLIENT_EMAIL, path)
dl.get("10-Q", ticker, limit=1, download_details=True)
return storage.get_file_contents()[0].content


@st.cache_resource
Expand Down
Loading

0 comments on commit 2dbdc33

Please sign in to comment.