Skip to content

Commit

Permalink
16.0.0, making things super simple
Browse files Browse the repository at this point in the history
  • Loading branch information
imahdimir committed Oct 16, 2023
1 parent 31b7bfb commit 76fdc5a
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 91 deletions.
34 changes: 17 additions & 17 deletions .github/workflows/publish-on-pip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ name: Upload Python Package

on:
release:
types: [published]
types: [ published ]

permissions:
contents: read
Expand All @@ -21,19 +21,19 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build
- name: Build package
run: python -m build
- name: Publish package
uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build
- name: Build package
run: python -m build
- name: Publish package
uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@ pip install githubdata
# Quick Start

```python
from githubdata.utils import get_data_fr_github_without_double_clone
from githubdata import get_data_wo_double_clone

# Github data repository url
url = 'https://github.com/imahdimir/d-TSETMC_ID-2-FirmTicker'
# GitHub "Data Repository" url/path
url = 'imahdimir/d-TSETMC_ID-2-FirmTicker'

df = get_data_fr_github_without_double_clone(url)
# get the data as a pandas DataFrame
df = get_data_wo_double_clone(url)
```

- Easy as That!
***Easy as that!***
22 changes: 7 additions & 15 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,43 +4,35 @@ build-backend = "hatchling.build"

[project]
name = "githubdata"
version = "15.0.0"
version = "16.0.0"
authors = [{ name = "Mahdi Mir", email = "[email protected]" }]
description = "A simple Python package to easily download from and manage a GitHub \"Data repository\""
readme = "README.md"
license = { file = "LICENSE" }
dependencies = [
"giteasy",
"pandas",
"fastparquet",
"pyarrow",
"openpyxl",
"mirutil",
"persiantools"
"pandas",
]
classifiers = [
"Programming Language :: Python :: 3",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: Implementation :: PyPy",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Utilities"]
"Topic :: Utilities"
]
keywords = [
"TSE",
"Finance",
"data",
"cleaning",
"data_cleaning",
"Tehran",
"Stocks"]
"Stocks"
]

[project.urls]
"Homepage" = "https://github.com/imahdimir/githubdata"
Expand Down
4 changes: 3 additions & 1 deletion src/githubdata/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
from .github_data_repo import default_containing_dir
from .github_data_repo import default_githubdata_dir
from .github_data_repo import GitHubDataRepo
from .utils import clone_overwrite_a_repo__ret_gdr_obj
from .utils import get_data_wo_double_clone
53 changes: 21 additions & 32 deletions src/githubdata/github_data_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,16 @@

from pathlib import Path

import pandas as pd
from giteasy import GitHubRepo
from mirutil.df import read_data_according_to_type as rdatt

data_file_suffixes = {
'.xlsx' : None ,
'.prq' : None ,
'.csv' : None ,
}

default_containing_dir = Path('GitHubData/')
default_githubdata_dir = Path('GitHubData/')

class GitHubDataRepo(GitHubRepo) :

def __init__(self ,
repo_url ,
local_path = None ,
containing_dir = default_containing_dir ,
containing_dir = default_githubdata_dir ,
committing_usr = None ,
token = None
) :
Expand All @@ -30,34 +23,30 @@ def __init__(self ,
committing_usr = committing_usr ,
token = token)

self.data_suf: str | None = None
"""
"""

self.data_fp: Path | None = None

self.set_data_fps()
# run on init
self.set_data_fp()

def clone_overwrite(self , depth = 1) :
super().clone_overwrite(depth = depth)
self.set_data_fps()

def ret_sorted_fpns_by_suf(self , suffix) :
ls = list(self.local_path.glob(f'*{suffix}'))
return sorted(ls)

def _set_defualt_data_suffix(self) :
for ky in data_file_suffixes.keys() :
fps = self.ret_sorted_fpns_by_suf(ky)
if len(fps) != 0 :
self.data_suf = ky
return

def set_data_fps(self) :
self._set_defualt_data_suffix()
if self.data_suf is None :
return
fps = self.ret_sorted_fpns_by_suf(self.data_suf)
self.data_fp = fps[0]
self.set_data_fp()

def set_data_fp(self) :
fps = self.local_path.glob('*.parquet')
# get the first fp or none if no parquet file exists
self.data_fp = next(fps , None)

def read_data(self) :
"""
reads the data from the local path if it exists, otherwise clones the repo and reads the data.
:return: pandas.DataFrame
"""
if not self.local_path.exists() :
self.clone_overwrite()
return rdatt(self.data_fp)
df = pd.read_parquet(self.data_fp)
return df
25 changes: 4 additions & 21 deletions src/githubdata/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,14 @@
"""

import shutil

import pandas as pd
from persiantools.jdatetime import JalaliDateTime

from .github_data_repo import GitHubDataRepo

def get_data_fr_github_without_double_clone(github_url , remove_cache = False
) -> pd.DataFrame :
def get_data_wo_double_clone(github_url , remove_cache = False
) -> pd.DataFrame :
"""
gets data from a GitHub data repo, without cloning it twice.
if it is already cloned, it will read the data from the local path.
gets data from a GitHub data repo, without cloning it twice. if it is already cloned, it will read the data from the local path.
:param: github_url
:remove_cache: if True, it will remove the cloned repo after reading the data.
Expand All @@ -26,20 +22,7 @@ def get_data_fr_github_without_double_clone(github_url , remove_cache = False
gd.rmdir()
return df

def clone_overwrite_a_repo_return_gdr_obj(gd_url) :
def clone_overwrite_a_repo__ret_gdr_obj(gd_url) :
gdr = GitHubDataRepo(gd_url)
gdr.clone_overwrite()
return gdr

def replace_old_data_with_new_and_iso_jdate_title(gdt , df_fpn) :
gdt.data_fp.unlink()

tjd = JalaliDateTime.now().strftime('%Y-%m-%d')
fp = gdt.local_path / f'{tjd}.prq'

shutil.copy(df_fpn , fp)
print(f'Replaced {df_fpn} to {fp}')

def push_to_github_by_code_url(gdt , github_url) :
msg = 'Updated by ' + github_url
gdt.commit_and_push(msg , branch = 'main')

0 comments on commit 76fdc5a

Please sign in to comment.