Skip to content

Commit

Permalink
Merge pull request #838 from opendatalab/release-0.9.0
Browse files Browse the repository at this point in the history
Release 0.9.0
  • Loading branch information
myhloli authored Nov 1, 2024
2 parents 765c6d7 + 1402479 commit 3a42ebb
Show file tree
Hide file tree
Showing 591 changed files with 98,911 additions and 951 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cla.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
path-to-document: 'https://github.com/opendatalab/MinerU/blob/master/MinerU_CLA.md' # e.g. a CLA or a DCO document
# branch should not be protected
branch: 'master'
allowlist: myhloli,dt-yy,Focusshang,renpengli01,icecraft,drunkpig,wangbinDL,qiangqiang199,GDDGCZ518,papayalove,conghui,quyuan
allowlist: myhloli,dt-yy,Focusshang,renpengli01,icecraft,drunkpig,wangbinDL,qiangqiang199,GDDGCZ518,papayalove,conghui,quyuan,LollipopsAndWine

# the followings are the optional inputs - If the optional inputs are not given, then default values will be taken
#remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository)
Expand Down
20 changes: 9 additions & 11 deletions .github/workflows/cli.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,18 @@ on:
paths-ignore:
- "cmds/**"
- "**.md"
- "**.yml"
pull_request:
branches:
- "master"
- "dev"
paths-ignore:
- "cmds/**"
- "**.md"
- "**.yml"
workflow_dispatch:
jobs:
cli-test:
runs-on: pdf
timeout-minutes: 120
timeout-minutes: 240
strategy:
fail-fast: true

Expand All @@ -33,16 +31,16 @@ jobs:
with:
fetch-depth: 2

- name: install
- name: install&test
run: |
echo $GITHUB_WORKSPACE && sh tests/retry_env.sh
- name: unit test
run: |
cd $GITHUB_WORKSPACE && export PYTHONPATH=. && coverage run -m pytest tests/test_unit.py --cov=magic_pdf/ --cov-report term-missing --cov-report html
source activate mineru
conda env list
pip show coverage
# cd $GITHUB_WORKSPACE && sh tests/retry_env.sh
cd $GITHUB_WORKSPACE && python tests/clean_coverage.py
cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/ --cov-report html --cov-report term-missing
cd $GITHUB_WORKSPACE && python tests/get_coverage.py
- name: cli test
run: |
cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli_sdk.py
cd $GITHUB_WORKSPACE && pytest -m P0 -s -v tests/test_cli/test_cli_sdk.py
notify_to_feishu:
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
Expand Down
55 changes: 55 additions & 0 deletions .github/workflows/daily.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: mineru
on:
schedule:
- cron: '0 22 * * *' # 每天晚上 10 点执行
jobs:
cli-test:
runs-on: pdf
timeout-minutes: 240
strategy:
fail-fast: true

steps:
- name: PDF cli
uses: actions/checkout@v3
with:
fetch-depth: 2

- name: install&test
run: |
source activate mineru
conda env list
pip show coverage
# cd $GITHUB_WORKSPACE && sh tests/retry_env.sh
cd $GITHUB_WORKSPACE && python tests/clean_coverage.py
cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/ --cov-report html --cov-report term-missing
cd $GITHUB_WORKSPACE && python tests/get_coverage.py
cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli_sdk.py
notify_to_feishu:
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
needs: cli-test
runs-on: pdf
steps:
- name: get_actor
run: |
metion_list="dt-yy"
echo $GITHUB_ACTOR
if [[ $GITHUB_ACTOR == "drunkpig" ]]; then
metion_list="xuchao"
elif [[ $GITHUB_ACTOR == "myhloli" ]]; then
metion_list="zhaoxiaomeng"
elif [[ $GITHUB_ACTOR == "icecraft" ]]; then
metion_list="xurui1"
fi
echo $metion_list
echo "METIONS=$metion_list" >> "$GITHUB_ENV"
echo ${{ env.METIONS }}
- name: notify
run: |
echo ${{ secrets.USER_ID }}
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"'${{ github.repository }}' GitHubAction Failed","content":[[{"tag":"text","text":""},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }}
61 changes: 61 additions & 0 deletions .github/workflows/huigui.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: mineru
on:
push:
branches:
- "master"
- "dev"
paths-ignore:
- "cmds/**"
- "**.md"
workflow_dispatch:
jobs:
cli-test:
runs-on: pdf
timeout-minutes: 240
strategy:
fail-fast: true

steps:
- name: PDF cli
uses: actions/checkout@v3
with:
fetch-depth: 2

- name: install&test
run: |
source activate mineru
conda env list
pip show coverage
# cd $GITHUB_WORKSPACE && sh tests/retry_env.sh
cd $GITHUB_WORKSPACE && python tests/clean_coverage.py
cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/ --cov-report html --cov-report term-missing
cd $GITHUB_WORKSPACE && python tests/get_coverage.py
cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli_sdk.py
notify_to_feishu:
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
needs: cli-test
runs-on: pdf
steps:
- name: get_actor
run: |
metion_list="dt-yy"
echo $GITHUB_ACTOR
if [[ $GITHUB_ACTOR == "drunkpig" ]]; then
metion_list="xuchao"
elif [[ $GITHUB_ACTOR == "myhloli" ]]; then
metion_list="zhaoxiaomeng"
elif [[ $GITHUB_ACTOR == "icecraft" ]]; then
metion_list="xurui1"
fi
echo $metion_list
echo "METIONS=$metion_list" >> "$GITHUB_ENV"
echo ${{ env.METIONS }}
- name: notify
run: |
echo ${{ secrets.USER_ID }}
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"'${{ github.repository }}' GitHubAction Failed","content":[[{"tag":"text","text":""},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }}
22 changes: 0 additions & 22 deletions .github/workflows/update_base.yml

This file was deleted.

89 changes: 50 additions & 39 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,39 +1,50 @@
*.tar
*.tar.gz
venv*/
envs/
slurm_logs/

sync1.sh
data_preprocess_pj1
data-preparation1
__pycache__
*.log
*.pyc
.vscode
debug/
*.ipynb
.idea

# vscode history
.history

.DS_Store
.env

bad_words/
bak/

app/tests/*
temp/
tmp/
tmp
.vscode
.vscode/
ocr_demo

/app/common/__init__.py
/magic_pdf/config/__init__.py
source.dev.env

tmp
*.tar
*.tar.gz
*.zip
venv*/
envs/
slurm_logs/

sync1.sh
data_preprocess_pj1
data-preparation1
__pycache__
*.log
*.pyc
.vscode
debug/
*.ipynb
.idea

# vscode history
.history

.DS_Store
.env

bad_words/
bak/

app/tests/*
temp/
tmp/
tmp
.vscode
.vscode/
ocr_demo
.coveragerc
/app/common/__init__.py
/magic_pdf/config/__init__.py
source.dev.env

tmp

projects/web/node_modules
projects/web/dist

projects/web_demo/web_demo/static/
cli_debug/
debug_utils/

# sphinx docs
_build/
5 changes: 3 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ repos:
rev: 5.0.4
hooks:
- id: flake8
args: ["--max-line-length=120", "--ignore=E131,E125,W503,W504,E203"]
args: ["--max-line-length=150", "--ignore=E131,E125,W503,W504,E203"]
- repo: https://github.com/PyCQA/isort
rev: 5.11.5
hooks:
Expand All @@ -12,11 +12,12 @@ repos:
rev: v0.32.0
hooks:
- id: yapf
args: ["--style={based_on_style: google, column_limit: 120, indent_width: 4}"]
args: ["--style={based_on_style: google, column_limit: 150, indent_width: 4}"]
- repo: https://github.com/codespell-project/codespell
rev: v2.2.1
hooks:
- id: codespell
args: ['--skip', '*.json']
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
Expand Down
16 changes: 16 additions & 0 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
version: 2

build:
os: ubuntu-22.04
tools:
python: "3.10"

formats:
- epub

python:
install:
- requirements: docs/zh_cn/requirements.txt

sphinx:
configuration: docs/zh_cn/conf.py
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ RUN python3 -m venv /opt/mineru_venv
RUN /bin/bash -c "source /opt/mineru_venv/bin/activate && \
pip3 install --upgrade pip && \
wget https://gitee.com/myhloli/MinerU/raw/master/requirements-docker.txt && \
pip3 install -r requirements-docker.txt --extra-index-url https://wheels.myhloli.com -i https://pypi.tuna.tsinghua.edu.cn/simple && \
pip3 install -r requirements-docker.txt --extra-index-url https://wheels.myhloli.com -i https://mirrors.aliyun.com/pypi/simple && \
pip3 install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/"

# Copy the configuration file template and install magic-pdf latest
Expand Down
1 change: 1 addition & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
Expand Up @@ -659,3 +659,4 @@ specific requirements.
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU AGPL, see
<https://www.gnu.org/licenses/>.

Loading

0 comments on commit 3a42ebb

Please sign in to comment.