Skip to content

Commit

Permalink
Merge branch 'rvankoert:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
rvankoert authored Apr 3, 2024
2 parents 96aefa9 + 5d39dee commit 2a37dfa
Show file tree
Hide file tree
Showing 85 changed files with 9,457 additions and 4,748 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Pylint

on: [pull_request]

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint
pip install -r requirements.txt
- name: Analysing the code with pylint
run: |
pylint $(git ls-files 'src/*.py') --extension-pkg-whitelist=cv2,word_beam_search --generated-members=cv2,word_beam_search --ignored-modules=tensorflow.*,skimage.filters --disable=import-error --fail-under=8
32 changes: 23 additions & 9 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ jobs:

steps:
- name: Check out code
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: 3.9
python-version: '3.10'

- name: Install system dependencies
run: sudo apt-get update && sudo apt-get install -y libgl1-mesa-glx
Expand All @@ -23,23 +23,30 @@ jobs:
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Test utils
- name: Test tokenizer
run: |
python -m unittest tests/test_utils.py
python -m unittest tests/test_tokenizer.py
env:
TF_CPP_MIN_LOG_LEVEL: '2'

- name: Test DataGenerator
- name: Test DataLoader
if: always()
run: |
python -m unittest tests/test_datagenerator.py
python -m unittest tests/test_dataloader.py
env:
TF_CPP_MIN_LOG_LEVEL: '2'

- name: Test DataLoader
- name: Test data augment layers
if: always()
run: |
python -m unittest tests/test_dataloader.py
python -m unittest tests/test_data_augments.py
env:
TF_CPP_MIN_LOG_LEVEL: '2'

- name: Test DataManager
if: always()
run: |
python -m unittest tests/test_datamanager.py
env:
TF_CPP_MIN_LOG_LEVEL: '2'

Expand All @@ -63,3 +70,10 @@ jobs:
python -m unittest tests/test_model_replacement.py
env:
TF_CPP_MIN_LOG_LEVEL: '2'

- name: Test custom LR schedule
if: always()
run: |
python -m unittest tests/test_lr_schedule.py
env:
TF_CPP_MIN_LOG_LEVEL: '2'
268 changes: 174 additions & 94 deletions README.md

Large diffs are not rendered by default.

82 changes: 82 additions & 0 deletions configs/default.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
{
"args": {
"augmentation": {
"aug_binarize_otsu": false,
"aug_binarize_sauvola": false,
"aug_blur": false,
"aug_distort_jpeg": false,
"aug_elastic_transform": false,
"aug_invert": false,
"aug_multiply": 1,
"aug_random_crop": false,
"aug_random_shear": false,
"aug_random_width": false,
"visualize_augments": false
},
"decoding": {
"beam_width": 10,
"corpus_file": null,
"greedy": false,
"wbs_smoothing": 0.1
},
"depr": {
"channels": 3,
"config_file_output": null,
"do_inference": false,
"do_train": true,
"height": 64,
"no_auto": false,
"output_charlist": null,
"thaw": false,
"use_mask": true
},
"general": {
"batch_size": 4,
"charlist": null,
"config_file": null,
"gpu": "0",
"output": "output",
"seed": 42
},
"inference": {
"inference_list": null,
"results_file": "output/results.txt"
},
"learning_rate": {
"decay_per_epoch": false,
"decay_rate": 0.99,
"decay_steps": -1,
"learning_rate": 0.0003,
"linear_decay": false,
"optimizer": "adamw",
"warmup_ratio": 0.0
},
"misc": {
"deterministic": false
"normalization_file": null
},
"model": {
"freeze_conv_layers": false,
"freeze_dense_layers": false,
"freeze_recurrent_layers": false,
"model": null,
"model_name": null,
"replace_final_layer": false,
"replace_recurrent_layer": null,
"use_float32": false
},
"training": {
"do_validate": false,
"early_stopping_patience": 20,
"epochs": 40,
"max_queue_size": 256,
"output_checkpoints": false,
"steps_per_epoch": null,
"test_list": null,
"train_list": null,
"training_verbosity_mode": "auto",
"validation_list": null,
"width": 65536
}
}
}
59 changes: 59 additions & 0 deletions configs/finetuning.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"args": {
"augmentation": {
"aug_binarize_otsu": false,
"aug_binarize_sauvola": false,
"aug_blur": false,
"aug_distort_jpeg": false,
"aug_elastic_transform": true,
"aug_invert": false,
"aug_multiply": 1,
"aug_random_crop": true,
"aug_random_shear": false,
"aug_random_width": true,
"visualize_augments": false
},
"decoding": {
"beam_width": 10,
"greedy": false
},
"general": {
"batch_size": 64,
"gpu": "0",
"output": "output",
"seed": 42
},
"learning_rate": {
"decay_per_epoch": false,
"decay_rate": 0.95,
"decay_steps": -1,
"learning_rate": 0.0003,
"linear_decay": true,
"optimizer": "rmsprop",
"warmup_ratio": 0.0
},
"misc": {
"normalization_file": "/path/to/normalization.json"
},
"model": {
"freeze_conv_layers": true,
"freeze_dense_layers": false,
"freeze_recurrent_layers": false,
"model": "/path/to/model/",
"model_name": "My-Finetuned-Loghi-Model",
"replace_final_layer": true,
"replace_recurrent_layer": null,
"use_float32": false
},
"training": {
"do_validate": true,
"early_stopping_patience": 10,
"epochs": 50,
"max_queue_size": 256,
"output_checkpoints": true,
"train_list": "/path/to/train.txt",
"training_verbosity_mode": "auto",
"validation_list": "/path/to/val.txt"
}
}
}
28 changes: 28 additions & 0 deletions configs/inference.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"args": {
"decoding": {
"beam_width": 10,
"corpus_file": null,
"greedy": false,
"wbs_smoothing": 0.1
},
"general": {
"batch_size": 200,
"gpu": "0",
"output": "output",
"seed": 42
},
"inference": {
"inference_list": "/path/to/inference.txt",
"results_file": "output/results.txt"
},
"misc": {
"check_missing_files": false,
"deterministic": false
},
"model": {
"model": "/path/to/model/",
"use_float32": false
}
}
}
26 changes: 26 additions & 0 deletions configs/testing.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"args": {
"decoding": {
"beam_width": 10,
"greedy": false
},
"general": {
"batch_size": 200,
"gpu": "0",
"output": "output",
"seed": 42
},
"misc": {
"deterministic": false,
"normalization_file": "/path/to/normalization.json"
},
"model": {
"model": "/path/to/model/",
"use_float32": false
},
"training": {
"max_queue_size": 256,
"test_list": "/path/to/test.txt"
}
}
}
57 changes: 57 additions & 0 deletions configs/training.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"args": {
"augmentation": {
"aug_binarize_otsu": false,
"aug_binarize_sauvola": false,
"aug_blur": false,
"aug_distort_jpeg": false,
"aug_elastic_transform": true,
"aug_invert": false,
"aug_multiply": 1,
"aug_random_crop": true,
"aug_random_shear": false,
"aug_random_width": true,
"visualize_augments": false
},
"decoding": {
"beam_width": 10,
"greedy": false
},
"depr": {
"channels": 1
},
"general": {
"batch_size": 64,
"gpu": "0",
"output": "output",
"seed": 42
},
"learning_rate": {
"decay_per_epoch": false,
"decay_rate": 0.95,
"decay_steps": -1,
"learning_rate": 0.0003,
"linear_decay": false,
"optimizer": "rmsprop",
"warmup_ratio": 0.0
},
"misc": {
"normalization_file": "/path/to/normalization.json"
},
"model": {
"model": "recommended",
"model_name": "My-Loghi-Model",
"use_float32": false
},
"training": {
"do_validate": true,
"early_stopping_patience": 20,
"epochs": 100,
"max_queue_size": 256,
"output_checkpoints": true,
"train_list": "/path/to/train.txt",
"training_verbosity_mode": "auto",
"validation_list": "/path/to/val.txt"
}
}
}
27 changes: 27 additions & 0 deletions configs/validation.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"args": {
"decoding": {
"beam_width": 10,
"greedy": false
},
"general": {
"batch_size": 200,
"gpu": "0",
"output": "output",
"seed": 42
},
"misc": {
"deterministic": false,
"normalization_file": "/path/to/normalization.json"
},
"model": {
"model": "/path/to/model/",
"use_float32": false
},
"training": {
"do_validate": true,
"max_queue_size": 256,
"validation_list": "/path/to/validation.txt"
}
}
}
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: loghi-htr
channels:
- conda-forge
dependencies:
- python=3.10
- python=3.11
- pip
- pip:
- -r requirements.txt
Loading

0 comments on commit 2a37dfa

Please sign in to comment.