From 2590595dcb4fe00956363c3946a2949646584428 Mon Sep 17 00:00:00 2001 From: devlux76 Date: Sat, 25 Dec 2021 08:27:08 -0700 Subject: [PATCH 1/9] Addressing issue #84 --- eyecite/helpers.py | 12 +++++------- tests/test_FindTest.py | 6 ++++++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/eyecite/helpers.py b/eyecite/helpers.py index d5f6c42..c367a85 100644 --- a/eyecite/helpers.py +++ b/eyecite/helpers.py @@ -40,18 +40,16 @@ def get_court_by_paren(paren_string: str) -> Optional[str]: Does not work on SCOTUS, since that court lacks parentheticals, and needs to be handled after disambiguation has been completed. """ - court_str = strip_punct(paren_string) - + #remove punctuation and convert to upper case + court_str = re.sub(r'[^\w\s]', '',paren_string).upper() court_code = None if court_str: - # Map the string to a court, if possible. + # Map the string to a court, if possible. for court in courts: - # Use startswith because citations are often missing final period, - # e.g. "2d Cir" - if court["citation_string"].startswith(court_str): + #remove punctuation and convert to upper case because punctuation is often unreliable + if re.sub(r'[^\w\s]', '',court["citation_string"]).upper() == court_str: court_code = court["id"] break - return court_code diff --git a/tests/test_FindTest.py b/tests/test_FindTest.py index ad7ee6a..dbc6572 100644 --- a/tests/test_FindTest.py +++ b/tests/test_FindTest.py @@ -114,6 +114,12 @@ def test_find_citations(self): [case_citation(metadata={'plaintiff': 'lissner', 'defendant': 'test'}, year=1982)]), + # Test to disambiguate SC & Supreme Court + ('lissner v. test, 263 F.Supp. 26 (S.C. 1967)', + [case_citation(volume='263',page='26', year=1967, reporter='F.Supp.', + metadata={'plaintiff': 'lissner', + 'defendant' : 'test', + 'court' : 'sc'})]), # Test with court and extra information ('bob lissner v. test 1 U.S. 12, 347-348 (4th Cir. 1982)', [case_citation(page='12', year=1982, From 77bd55d570c260a220c247a181ec8d73c57a9082 Mon Sep 17 00:00:00 2001 From: devlux76 Date: Sat, 25 Dec 2021 08:41:45 -0700 Subject: [PATCH 2/9] Fixed S.C. to SC to match the original issue report --- tests/test_FindTest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_FindTest.py b/tests/test_FindTest.py index dbc6572..f8d80da 100644 --- a/tests/test_FindTest.py +++ b/tests/test_FindTest.py @@ -115,7 +115,7 @@ def test_find_citations(self): 'defendant': 'test'}, year=1982)]), # Test to disambiguate SC & Supreme Court - ('lissner v. test, 263 F.Supp. 26 (S.C. 1967)', + ('lissner v. test, 263 F.Supp. 26 (SC 1967)', [case_citation(volume='263',page='26', year=1967, reporter='F.Supp.', metadata={'plaintiff': 'lissner', 'defendant' : 'test', From cfce4b8b1830ea0f219e48b8f42df2cf6d8d9985 Mon Sep 17 00:00:00 2001 From: Michael Lissner Date: Mon, 27 Dec 2021 13:25:18 -0800 Subject: [PATCH 3/9] feat(dx): Add pre-commit config and deps --- .pre-commit-config.yaml | 38 ++++++++ poetry.lock | 192 +++++++++++++++++++++++++++++++++++++++- pyproject.toml | 7 +- 3 files changed, 235 insertions(+), 2 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..9e5e054 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,38 @@ +repos: + - repo: https://github.com/asottile/pyupgrade + rev: v2.29.1 + hooks: + - id: pyupgrade + args: [--py37-plus] + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.0.1 + hooks: + - id: check-added-large-files + - id: check-ast + - id: check-json + - id: check-merge-conflict + - id: check-toml + - id: check-yaml + - id: fix-byte-order-marker + - id: fix-encoding-pragma + args: [--remove] + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + exclude: ^tests/examples/pacer/nef/s3/.*\.txt$ + + - repo: https://github.com/ikamensh/flynt/ + rev: '0.69' + hooks: + - id: flynt + args: [--line-length=79, --transform-concats] + + - repo: https://github.com/psf/black + rev: 21.12b0 + hooks: + - id: black + + - repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + name: isort (python) diff --git a/poetry.lock b/poetry.lock index 36c5db4..619b1a5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -19,6 +19,21 @@ lazy-object-proxy = ">=1.4.0" typed-ast = {version = ">=1.4.0,<1.5", markers = "implementation_name == \"cpython\" and python_version < \"3.8\""} wrapt = ">=1.11,<1.13" +[[package]] +name = "backports.entry-points-selectable" +version = "1.1.1" +description = "Compatibility shim providing selectable entry points for older implementations" +category = "dev" +optional = false +python-versions = ">=2.7" + +[package.dependencies] +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} + +[package.extras] +docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] +testing = ["pytest", "pytest-flake8", "pytest-cov", "pytest-black (>=0.3.7)", "pytest-mypy", "pytest-checkdocs (>=2.4)", "pytest-enabler (>=1.0.1)"] + [[package]] name = "black" version = "20.8b1" @@ -41,6 +56,14 @@ typing-extensions = ">=3.7.4" colorama = ["colorama (>=0.4.3)"] d = ["aiohttp (>=3.3.2)", "aiohttp-cors"] +[[package]] +name = "cfgv" +version = "3.3.1" +description = "Validate configuration and produce human readable error messages." +category = "dev" +optional = false +python-versions = ">=3.6.1" + [[package]] name = "click" version = "7.1.2" @@ -76,6 +99,14 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "distlib" +version = "0.3.4" +description = "Distribution utilities" +category = "dev" +optional = false +python-versions = "*" + [[package]] name = "exrex" version = "0.10.5" @@ -84,6 +115,18 @@ category = "dev" optional = false python-versions = "*" +[[package]] +name = "filelock" +version = "3.4.2" +description = "A platform independent file lock." +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["furo (>=2021.8.17b43)", "sphinx (>=4.1)", "sphinx-autodoc-typehints (>=1.12)"] +testing = ["covdefaults (>=1.2.0)", "coverage (>=4)", "pytest (>=4)", "pytest-cov", "pytest-timeout (>=1.4.2)"] + [[package]] name = "flake8" version = "3.9.0" @@ -106,6 +149,17 @@ category = "dev" optional = false python-versions = ">=3.6.1,<4.0" +[[package]] +name = "identify" +version = "2.4.0" +description = "File identification library for Python" +category = "dev" +optional = false +python-versions = ">=3.6.1" + +[package.extras] +license = ["ukkonen"] + [[package]] name = "importlib-metadata" version = "3.4.0" @@ -240,6 +294,14 @@ category = "dev" optional = false python-versions = "*" +[[package]] +name = "nodeenv" +version = "1.6.0" +description = "Node.js virtual environment builder" +category = "dev" +optional = false +python-versions = "*" + [[package]] name = "pathspec" version = "0.8.1" @@ -260,6 +322,35 @@ python-versions = ">= 3.6" mako = "*" markdown = ">=3.0" +[[package]] +name = "platformdirs" +version = "2.4.1" +description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["Sphinx (>=4)", "furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)"] +test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)"] + +[[package]] +name = "pre-commit" +version = "2.16.0" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +category = "dev" +optional = false +python-versions = ">=3.6.1" + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +toml = "*" +virtualenv = ">=20.0.8" + [[package]] name = "pyahocorasick" version = "1.4.1" @@ -314,6 +405,14 @@ python-versions = "*" Jinja2 = "*" pylint = "*" +[[package]] +name = "pyyaml" +version = "6.0" +description = "YAML parser and emitter for Python" +category = "dev" +optional = false +python-versions = ">=3.6" + [[package]] name = "regex" version = "2021.4.4" @@ -373,6 +472,26 @@ category = "dev" optional = false python-versions = "*" +[[package]] +name = "virtualenv" +version = "20.10.0" +description = "Virtual Python Environment builder" +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" + +[package.dependencies] +"backports.entry-points-selectable" = ">=1.0.4" +distlib = ">=0.3.1,<1" +filelock = ">=3.2,<4" +importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} +platformdirs = ">=2,<3" +six = ">=1.9.0,<2" + +[package.extras] +docs = ["proselint (>=0.10.2)", "sphinx (>=3)", "sphinx-argparse (>=0.2.5)", "sphinx-rtd-theme (>=0.4.3)", "towncrier (>=21.3)"] +testing = ["coverage (>=4)", "coverage-enable-subprocess (>=1)", "flaky (>=3)", "pytest (>=4)", "pytest-env (>=0.6.2)", "pytest-freezegun (>=0.4.1)", "pytest-mock (>=2)", "pytest-randomly (>=1)", "pytest-timeout (>=1)", "packaging (>=20.0)"] + [[package]] name = "wrapt" version = "1.12.1" @@ -396,7 +515,7 @@ testing = ["pytest (>=3.5,!=3.7.3)", "pytest-checkdocs (>=1.2.3)", "pytest-flake [metadata] lock-version = "1.1" python-versions = "^3.7" -content-hash = "7cff438060073441e9c8f1a72e36d41b6312b89048fd0c76c33f796db61bdffc" +content-hash = "ba70894a8bfb3755f7e321c01b47b0f5282e9caed056c09b34a5e7e836a106e9" [metadata.files] appdirs = [ @@ -407,9 +526,17 @@ astroid = [ {file = "astroid-2.5.1-py3-none-any.whl", hash = "sha256:21d735aab248253531bb0f1e1e6d068f0ee23533e18ae8a6171ff892b98297cf"}, {file = "astroid-2.5.1.tar.gz", hash = "sha256:cfc35498ee64017be059ceffab0a25bedf7548ab76f2bea691c5565896e7128d"}, ] +"backports.entry-points-selectable" = [ + {file = "backports.entry_points_selectable-1.1.1-py2.py3-none-any.whl", hash = "sha256:7fceed9532a7aa2bd888654a7314f864a3c16a4e710b34a58cfc0f08114c663b"}, + {file = "backports.entry_points_selectable-1.1.1.tar.gz", hash = "sha256:914b21a479fde881635f7af5adc7f6e38d6b274be32269070c53b698c60d5386"}, +] black = [ {file = "black-20.8b1.tar.gz", hash = "sha256:1c02557aa099101b9d21496f8a914e9ed2222ef70336404eeeac8edba836fbea"}, ] +cfgv = [ + {file = "cfgv-3.3.1-py2.py3-none-any.whl", hash = "sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426"}, + {file = "cfgv-3.3.1.tar.gz", hash = "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736"}, +] click = [ {file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"}, {file = "click-7.1.2.tar.gz", hash = "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a"}, @@ -425,9 +552,17 @@ courts-db = [ diff-match-patch-python = [ {file = "diff_match_patch_python-1.0.2.tar.gz", hash = "sha256:5a833417344def272ad7dee7c5d455cf3aaf4fb0ffb58029d73e29512dd3ed48"}, ] +distlib = [ + {file = "distlib-0.3.4-py2.py3-none-any.whl", hash = "sha256:6564fe0a8f51e734df6333d08b8b94d4ea8ee6b99b5ed50613f731fd4089f34b"}, + {file = "distlib-0.3.4.zip", hash = "sha256:e4b58818180336dc9c529bfb9a0b58728ffc09ad92027a3f30b7cd91e3458579"}, +] exrex = [ {file = "exrex-0.10.5.tar.gz", hash = "sha256:3fb8b18fd9832eaff8b13dc042a4f63b13c5d684ee069f70a23ddfc6bcb708f3"}, ] +filelock = [ + {file = "filelock-3.4.2-py3-none-any.whl", hash = "sha256:cf0fc6a2f8d26bd900f19bf33915ca70ba4dd8c56903eeb14e1e7a2fd7590146"}, + {file = "filelock-3.4.2.tar.gz", hash = "sha256:38b4f4c989f9d06d44524df1b24bd19e167d851f19b50bf3e3559952dddc5b80"}, +] flake8 = [ {file = "flake8-3.9.0-py2.py3-none-any.whl", hash = "sha256:12d05ab02614b6aee8df7c36b97d1a3b2372761222b19b58621355e82acddcff"}, {file = "flake8-3.9.0.tar.gz", hash = "sha256:78873e372b12b093da7b5e5ed302e8ad9e988b38b063b61ad937f26ca58fc5f0"}, @@ -439,6 +574,10 @@ hyperscan = [ {file = "hyperscan-0.2.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:fd0d0fe64484443b9e5ee1e8b156a30f2ba91494b1343fae5c943130ff847607"}, {file = "hyperscan-0.2.0.tar.gz", hash = "sha256:10cb8939d7db85d522ed319031ff5ab86fd0133126b986290f01aa83dbfb9ff7"}, ] +identify = [ + {file = "identify-2.4.0-py2.py3-none-any.whl", hash = "sha256:eba31ca80258de6bb51453084bff4a923187cd2193b9c13710f2516ab30732cc"}, + {file = "identify-2.4.0.tar.gz", hash = "sha256:a33ae873287e81651c7800ca309dc1f84679b763c9c8b30680e16fbfa82f0107"}, +] importlib-metadata = [ {file = "importlib_metadata-3.4.0-py3-none-any.whl", hash = "sha256:ace61d5fc652dc280e7b6b4ff732a9c2d40db2c0f92bc6cb74e07b73d53a1771"}, {file = "importlib_metadata-3.4.0.tar.gz", hash = "sha256:fa5daa4477a7414ae34e95942e4dd07f62adf589143c875c133c1e53c4eff38d"}, @@ -616,6 +755,10 @@ mypy-extensions = [ {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, ] +nodeenv = [ + {file = "nodeenv-1.6.0-py2.py3-none-any.whl", hash = "sha256:621e6b7076565ddcacd2db0294c0381e01fd28945ab36bcf00f41c5daf63bef7"}, + {file = "nodeenv-1.6.0.tar.gz", hash = "sha256:3ef13ff90291ba2a4a7a4ff9a979b63ffdd00a464dbe04acf0ea6471517a4c2b"}, +] pathspec = [ {file = "pathspec-0.8.1-py2.py3-none-any.whl", hash = "sha256:aa0cb481c4041bf52ffa7b0d8fa6cd3e88a2ca4879c533c9153882ee2556790d"}, {file = "pathspec-0.8.1.tar.gz", hash = "sha256:86379d6b86d75816baba717e64b1a3a3469deb93bb76d613c9ce79edc5cb68fd"}, @@ -623,6 +766,14 @@ pathspec = [ pdoc3 = [ {file = "pdoc3-0.10.0.tar.gz", hash = "sha256:5f22e7bcb969006738e1aa4219c75a32f34c2d62d46dc9d2fb2d3e0b0287e4b7"}, ] +platformdirs = [ + {file = "platformdirs-2.4.1-py3-none-any.whl", hash = "sha256:1d7385c7db91728b83efd0ca99a5afb296cab9d0ed8313a45ed8ba17967ecfca"}, + {file = "platformdirs-2.4.1.tar.gz", hash = "sha256:440633ddfebcc36264232365d7840a970e75e1018d15b4327d11f91909045fda"}, +] +pre-commit = [ + {file = "pre_commit-2.16.0-py2.py3-none-any.whl", hash = "sha256:758d1dc9b62c2ed8881585c254976d66eae0889919ab9b859064fc2fe3c7743e"}, + {file = "pre_commit-2.16.0.tar.gz", hash = "sha256:fe9897cac830aa7164dbd02a4e7b90cae49630451ce88464bca73db486ba9f65"}, +] pyahocorasick = [ {file = "pyahocorasick-1.4.1.tar.gz", hash = "sha256:fe076da3b0b20dbb619b0fb6478af8766b06679c0e359a2bfb189d3f07ddeecf"}, ] @@ -642,6 +793,41 @@ pylint-json2html = [ {file = "pylint-json2html-0.3.0.tar.gz", hash = "sha256:1a7a3b4e018ba6b46cf44c20de5f3b7b3bd0d5604963456dae6ea733d2ee5ac8"}, {file = "pylint_json2html-0.3.0-py3-none-any.whl", hash = "sha256:b646a6b1e190e730967219cd4ae9bb8217e218cd8f34ecc7f15d0803cb13f9d8"}, ] +pyyaml = [ + {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, + {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, + {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, + {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, + {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"}, + {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"}, + {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"}, + {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"}, + {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"}, + {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"}, + {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"}, + {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"}, + {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"}, + {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"}, + {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, + {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, +] regex = [ {file = "regex-2021.4.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:619d71c59a78b84d7f18891fe914446d07edd48dc8328c8e149cbe0929b4e000"}, {file = "regex-2021.4.4-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:47bf5bf60cf04d72bf6055ae5927a0bd9016096bf3d742fa50d9bf9f45aa0711"}, @@ -738,6 +924,10 @@ typing-extensions = [ {file = "typing_extensions-3.7.4.3-py3-none-any.whl", hash = "sha256:7cb407020f00f7bfc3cb3e7881628838e69d8f3fcab2f64742a5e76b2f841918"}, {file = "typing_extensions-3.7.4.3.tar.gz", hash = "sha256:99d4073b617d30288f569d3f13d2bd7548c3a7e4c8de87db09a9d29bb3a4a60c"}, ] +virtualenv = [ + {file = "virtualenv-20.10.0-py2.py3-none-any.whl", hash = "sha256:4b02e52a624336eece99c96e3ab7111f469c24ba226a53ec474e8e787b365814"}, + {file = "virtualenv-20.10.0.tar.gz", hash = "sha256:576d05b46eace16a9c348085f7d0dc8ef28713a2cabaa1cf0aea41e8f12c9218"}, +] wrapt = [ {file = "wrapt-1.12.1.tar.gz", hash = "sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7"}, ] diff --git a/pyproject.toml b/pyproject.toml index 269f8ec..dead9c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,13 +48,18 @@ hyperscan = ">= 0.1.5" exrex = "^0.10.5" roman = "^3.3" pdoc3 = "^0.10.0" +pre-commit = "^2.16.0" [tool.black] include = '''.*\.pyi?$''' line-length = 79 [tool.isort] -profile = "black" +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true +ensure_newline_before_comments = true line_length = 79 [tool.pylint.master] From 1fed0e1afb9f92b3704b49f1bc46d54a03d0e68d Mon Sep 17 00:00:00 2001 From: Michael Lissner Date: Mon, 27 Dec 2021 13:26:49 -0800 Subject: [PATCH 4/9] cleanup(all): Run pre-commit hook --- CHANGES.md | 24 ++++++++++++------------ eyecite/find.py | 4 ++-- eyecite/helpers.py | 13 ++++++++----- eyecite/resolve.py | 2 +- eyecite/tokenizers.py | 6 ++---- tests/test_TokenizeTest.py | 4 +--- 6 files changed, 26 insertions(+), 27 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index d79b938..fdb3da7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,7 +11,7 @@ Changes: - None yet Fixes: - - Initial support for finding short cites with non-standard regexes, including fixing short cite extraction for `Mich.`, `N.Y.2d` and `Pa.`. + - Initial support for finding short cites with non-standard regexes, including fixing short cite extraction for `Mich.`, `N.Y.2d` and `Pa.`. ## Current @@ -22,7 +22,7 @@ Features: - Autogenerated documentation Changes: - - This version lands one more iteration of the APIs to make them more consistent. Sorry. Hopefully this will be the last of its kind for a while. The need for these changes became obvious when we began generating documentation. The changes are all in name only, not in functionality. So: 1) the `annotate` function is renamed as `annotate_citations`; 2) The `find_citations` module has been renamed `find` (so, do `from eyecite.find import get_citations` instead of `from eyecite.find_citations import get_citations`); 3) The `cleaners` module is now named `clean`; and 4) The `clean_text` function has been moved from `utils` to `clean` (so, do `from eyecite.clean import clean_text` instead of `from eyecite.utils import clean_text`). + - This version lands one more iteration of the APIs to make them more consistent. Sorry. Hopefully this will be the last of its kind for a while. The need for these changes became obvious when we began generating documentation. The changes are all in name only, not in functionality. So: 1) the `annotate` function is renamed as `annotate_citations`; 2) The `find_citations` module has been renamed `find` (so, do `from eyecite.find import get_citations` instead of `from eyecite.find_citations import get_citations`); 3) The `cleaners` module is now named `clean`; and 4) The `clean_text` function has been moved from `utils` to `clean` (so, do `from eyecite.clean import clean_text` instead of `from eyecite.utils import clean_text`). **2.2.0 - 2021-06-04** @@ -35,15 +35,15 @@ Features: - We now use page-based heuristics while looking up the citation that a pin cite refers to. For example, if an opinion says: > 1 U.S. 200. blah blah. 2 We Missed This 20. blah blah. Id. at 22. - + We might miss the second citation for whatever reason. The pin cite refers to the second citation, not the first, and you can be sure of that because the first citation begins on page 200 and the pin cite references page 22. When resolving the pin cite, we will no longer link it up to the first citation. - + Similarly, an analysis of the Caselaw Access Project's dataset indicates that all but the longest ~300 cases are shorter than 150 pages, so we also now ignore pin cites that don't make sense according to that heuristic. For example, this (made up) pin cite is also likely wrong because it's overwhelmingly unlikely that `1 U.S. 200` is 632 pages long: > 1 U.S. 200 blah blah 1 U.S. 832 - - The longest case in the Caselaw Access Project collection is [United States v. Philip Morris USA, Inc](https://cite.case.law/f-supp-2d/449/1/), at 986 pages, in case you were wondering. Figures. - + + The longest case in the Caselaw Access Project collection is [United States v. Philip Morris USA, Inc](https://cite.case.law/f-supp-2d/449/1/), at 986 pages, in case you were wondering. Figures. + [Issue #74][74], [PR #79][79]. Changes: @@ -84,15 +84,15 @@ Changes: Fixes: - Fixes crashing errors on some partial supra, id, and short form citations. - Fixes unbalanced tags created by annotation. - - Fixes year parsing to move away from `isdigit`, which can capture + - Fixes year parsing to move away from `isdigit`, which can capture unicode superscript numbers like "123 U.S. 456 (196⁴)" - Allow years all the way back to 1600 instead of 1754. Anybody got a citation from before then? - - Page number matching is tightened to be much more strict about how it - matches Roman numerals. This change will prevent some citations from being - matched if they have extremely common Roman numerals. See #56 for a full + - Page number matching is tightened to be much more strict about how it + matches Roman numerals. This change will prevent some citations from being + matched if they have extremely common Roman numerals. See #56 for a full discussion. - + **2.0.2** - Adds missing dependency to toml file, nukes setup.py and requirements.txt. We're now fully in the poetry world. diff --git a/eyecite/find.py b/eyecite/find.py index b865135..910c729 100644 --- a/eyecite/find.py +++ b/eyecite/find.py @@ -121,10 +121,10 @@ def _extract_full_citation( # journals). Get the set of all sources that matched, preferring exact # matches to variations: token = cast(CitationToken, words[index]) - cite_sources = set( + cite_sources = { e.reporter.source for e in (token.exact_editions or token.variation_editions) - ) + } # get citation_class based on cite_sources citation_class: Type[ResourceCitation] diff --git a/eyecite/helpers.py b/eyecite/helpers.py index c367a85..b75c327 100644 --- a/eyecite/helpers.py +++ b/eyecite/helpers.py @@ -40,14 +40,17 @@ def get_court_by_paren(paren_string: str) -> Optional[str]: Does not work on SCOTUS, since that court lacks parentheticals, and needs to be handled after disambiguation has been completed. """ - #remove punctuation and convert to upper case - court_str = re.sub(r'[^\w\s]', '',paren_string).upper() + # remove punctuation and convert to upper case + court_str = re.sub(r"[^\w\s]", "", paren_string).upper() court_code = None if court_str: - # Map the string to a court, if possible. + # Map the string to a court, if possible. for court in courts: - #remove punctuation and convert to upper case because punctuation is often unreliable - if re.sub(r'[^\w\s]', '',court["citation_string"]).upper() == court_str: + # remove punctuation and convert to upper case because punctuation is often unreliable + if ( + re.sub(r"[^\w\s]", "", court["citation_string"]).upper() + == court_str + ): court_code = court["id"] break return court_code diff --git a/eyecite/resolve.py b/eyecite/resolve.py index 7127f75..d10290a 100644 --- a/eyecite/resolve.py +++ b/eyecite/resolve.py @@ -141,7 +141,7 @@ def _resolve_shortcase_citation( candidates.append((full_citation, resource)) # Remove duplicates and only accept if one candidate remains - if len(set(resource for full_citation, resource in candidates)) == 1: + if len({resource for full_citation, resource in candidates}) == 1: return candidates[0][1] # Otherwise, if there is an antecedent guess, try to refine further diff --git a/eyecite/tokenizers.py b/eyecite/tokenizers.py index ffe61dd..7a96f00 100644 --- a/eyecite/tokenizers.py +++ b/eyecite/tokenizers.py @@ -362,9 +362,7 @@ class AhocorasickTokenizer(Tokenizer): def __post_init__(self): """Set up helpers to narrow down possible extractors.""" # Build a set of all extractors that don't list required strings - self.unfiltered_extractors = set( - e for e in EXTRACTORS if not e.strings - ) + self.unfiltered_extractors = {e for e in EXTRACTORS if not e.strings} # Build a pyahocorasick filter for all case-sensitive extractors self.case_sensitive_filter = self.make_ahocorasick_filter( (s, e) @@ -445,7 +443,7 @@ def on_match(index, start, end, flags, context): byte_to_str_offset = {} last_byte_offset = 0 str_offset = 0 - byte_offsets = sorted(set(i for m in matches for i in m[1])) + byte_offsets = sorted({i for m in matches for i in m[1]}) for byte_offset in byte_offsets: try: str_offset += len( diff --git a/tests/test_TokenizeTest.py b/tests/test_TokenizeTest.py index ad2dc16..ba542d7 100644 --- a/tests/test_TokenizeTest.py +++ b/tests/test_TokenizeTest.py @@ -99,7 +99,5 @@ def test_extractor_filter(self): ("id.", "ibid."), } extractors = AhocorasickTokenizer().get_extractors(text) - extractor_strings = set( - tuple(e.strings) for e in extractors if e.strings - ) + extractor_strings = {tuple(e.strings) for e in extractors if e.strings} self.assertEqual(expected_strings, extractor_strings) From a575a05670b3fa55ceb1e406bb1a0af5e26d2066 Mon Sep 17 00:00:00 2001 From: Michael Lissner Date: Mon, 27 Dec 2021 13:29:02 -0800 Subject: [PATCH 5/9] fix(git): Ignore cleanup revs --- .git-blame-ignore-revs | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .git-blame-ignore-revs diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..fa5bb63 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,13 @@ +# This file lists commits that changed large sections of the code and are best +# ignored by git blame (which tools like PyCharm use for their "annotate" +# feature). +# +# To use this file, go to the root of this project, and run: +# +# git config blame.ignoreRevsFile .git-blame-ignore-revs +# +# That'll tell git to use this file. For this to work, you need Git 2.23.0 +# (released late 2019) or later. + +# Run pre-commit +1fed0e1afb9f92b3704b49f1bc46d54a03d0e68d From fd46c1e7044575ddc9af54303680a58e61818a58 Mon Sep 17 00:00:00 2001 From: Michael Lissner Date: Mon, 27 Dec 2021 13:37:37 -0800 Subject: [PATCH 6/9] feat(dx): Adds pre-commit hook for flake --- .pre-commit-config.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9e5e054..8cf1936 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -36,3 +36,8 @@ repos: hooks: - id: isort name: isort (python) + + - repo: https://github.com/pycqa/flake8 + rev: 3.9.0 + hooks: + - id: flake8 From 09c3589b28cb65933fcbf910aa78a30d8355b845 Mon Sep 17 00:00:00 2001 From: Michael Lissner Date: Mon, 27 Dec 2021 13:37:59 -0800 Subject: [PATCH 7/9] cleanup(flake): Fix formatting --- eyecite/helpers.py | 4 ++-- tests/test_FindTest.py | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/eyecite/helpers.py b/eyecite/helpers.py index b75c327..1055fc5 100644 --- a/eyecite/helpers.py +++ b/eyecite/helpers.py @@ -23,7 +23,6 @@ POST_SHORT_CITATION_REGEX, YEAR_REGEX, ) -from eyecite.utils import strip_punct BACKWARD_SEEK = 28 # Median case name length in the CL db is 28 (2016-02-26) @@ -46,7 +45,8 @@ def get_court_by_paren(paren_string: str) -> Optional[str]: if court_str: # Map the string to a court, if possible. for court in courts: - # remove punctuation and convert to upper case because punctuation is often unreliable + # remove punctuation and convert to upper case because punctuation + # is often unreliable if ( re.sub(r"[^\w\s]", "", court["citation_string"]).upper() == court_str diff --git a/tests/test_FindTest.py b/tests/test_FindTest.py index f8d80da..9464ebf 100644 --- a/tests/test_FindTest.py +++ b/tests/test_FindTest.py @@ -116,10 +116,12 @@ def test_find_citations(self): year=1982)]), # Test to disambiguate SC & Supreme Court ('lissner v. test, 263 F.Supp. 26 (SC 1967)', - [case_citation(volume='263',page='26', year=1967, reporter='F.Supp.', - metadata={'plaintiff': 'lissner', - 'defendant' : 'test', - 'court' : 'sc'})]), + [case_citation(volume='263', page='26', year=1967, + reporter='F.Supp.', metadata={ + 'plaintiff': 'lissner', + 'defendant' : 'test', + 'court' : 'sc' + })]), # Test with court and extra information ('bob lissner v. test 1 U.S. 12, 347-348 (4th Cir. 1982)', [case_citation(page='12', year=1982, From 1fce984505e95db6722b64b76489949b32fe1549 Mon Sep 17 00:00:00 2001 From: Michael Lissner Date: Mon, 27 Dec 2021 14:32:58 -0800 Subject: [PATCH 8/9] feat(ci): More commit hooks --- .pre-commit-config.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8cf1936..be9dd67 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,6 +13,8 @@ repos: - id: check-merge-conflict - id: check-toml - id: check-yaml + - id: debug-statements + - id: detect-private-key - id: fix-byte-order-marker - id: fix-encoding-pragma args: [--remove] From f2dbd7f9be9fea8be2cae8a875281bf7d0216311 Mon Sep 17 00:00:00 2001 From: Michael Lissner Date: Mon, 24 Jan 2022 15:42:29 -0800 Subject: [PATCH 9/9] feat(ci): Only run one test workflow per PR at a time --- .github/workflows/tests.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d13a0e5..e808e5c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -44,3 +44,9 @@ jobs: - name: Run tests run: python -m unittest discover -s tests -p 'test_*.py' + +# Cancel the current workflow (tests) for pull requests (head_ref) only. See: +# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#example-using-a-fallback-value +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true \ No newline at end of file