diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7a115172..08a67bb4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -21,8 +21,8 @@ jobs: uses: dschep/install-pipenv-action@v1 - name: Install dependencies run: | - # install dependencies according to the lock file - pipenv install --dev --ignore-pipfile --python ${{ steps.setup-python.outputs.python-version }} + # install dependencies according to the pip file + pipenv install --dev --skip-lock --python ${{ steps.setup-python.outputs.python-version }} pipenv run python -m spacy download en_core_web_sm - name: Run test with pytest run: | diff --git a/Pipfile b/Pipfile index 4b732bbe..d0dcb74c 100644 --- a/Pipfile +++ b/Pipfile @@ -25,6 +25,7 @@ scipy = "*" pylint = "*" importlib-metadata = "*" atomicwrites = "*" +wordcloud = "*" [pipenv] allow_prereleases = true diff --git a/Pipfile.lock b/Pipfile.lock index be6a297c..dcef15d3 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "6b3bc1c36fb65fe6bdb73ed8dec80bcae9330690e3c1033ffeeb05209f5ce6dc" + "sha256": "f55093d03061582fa85ff34a474bb93b1e958b4213717869478f9d11bdfa859d" }, "pipfile-spec": 6, "requires": {}, @@ -55,11 +55,11 @@ }, "astroid": { "hashes": [ - "sha256:ad63b8552c70939568966811a088ef0bc880f99a24a00834abd0e3681b514f91", - "sha256:bea3f32799fbb8581f58431c12591bc20ce11cbc90ad82e2ea5717d94f2080d5" + "sha256:4db03ab5fc3340cf619dbc25e42c2cc3755154ce6009469766d7143d1fc2ee4e", + "sha256:8a398dfce302c13f14bab13e2b14fe385d32b73f4e4853b9bdfb64598baa1975" ], - "markers": "python_version >= '3.6'", - "version": "==2.5.3" + "markers": "python_version ~= '3.6'", + "version": "==2.5.6" }, "async-generator": { "hashes": [ @@ -134,11 +134,11 @@ }, "cachetools": { "hashes": [ - "sha256:1d9d5f567be80f7c07d765e21b814326d78c61eb0c3a637dffc0e5d1796cb2e2", - "sha256:f469e29e7aa4cff64d8de4aad95ce76de8ea1125a16c68e0d93f65c3c3dc92e9" + "sha256:2cc0b89715337ab6dbba85b5b50effe2b0c74e035d83ee8ed637cf52f12ae001", + "sha256:61b5ed1e22a0924aed1d23b478f37e8d52549ff8a961de2909c69bf950020cff" ], "markers": "python_version ~= '3.5'", - "version": "==4.2.1" + "version": "==4.2.2" }, "catalogue": { "hashes": [ @@ -222,14 +222,6 @@ "index": "pypi", "version": "==2.1.11" }, - "colorama": { - "hashes": [ - "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b", - "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2" - ], - "markers": "sys_platform == 'win32' and sys_platform == 'win32' and sys_platform == 'win32'", - "version": "==0.4.4" - }, "commonmark": { "hashes": [ "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60", @@ -321,51 +313,13 @@ ], "version": "==2.0.5" }, - "cython": { - "hashes": [ - "sha256:03f6bbb380ad0acb744fb06e42996ea217e9d00016ca0ff6f2e7d60f580d0360", - "sha256:05e8cfd3a3a6087aec49a1ae08a89171db991956209406d1e5576f9db70ece52", - "sha256:05eb79efc8029d487251c8a2702a909a8ba33c332e06d2f3980866541bd81253", - "sha256:094d28a34c3fa992ae02aea1edbe6ff89b3cc5870b6ee38b5baeb805dc57b013", - "sha256:0c70e842e52e2f50cc43bad43b5e5bc515f30821a374e544abb0e0746f2350ff", - "sha256:1dcdaa319558eb924294a554dcf6c12383ec947acc7e779e8d3622409a7f7d28", - "sha256:1fc5bdda28f25fec44e4721677458aa509d743cd350862270309d61aa148d6ff", - "sha256:280573a01d9348d44a42d6a9c651d9f7eb1fe9217df72555b2a118f902996a10", - "sha256:298ceca7b0f0da4205fcb0b7c9ac9e120e2dafffd5019ba1618e84ef89434b5a", - "sha256:4074a8bff0040035673cc6dd365a762476d6bff4d03d8ce6904e3e53f9a25dc8", - "sha256:41e7068e95fbf9ec94b41437f989caf9674135e770a39cdb9c00de459bafd1bc", - "sha256:47e5e1502d52ef03387cf9d3b3241007961a84a466e58a3b74028e1dd4957f8c", - "sha256:521340844cf388d109ceb61397f3fd5250ccb622a1a8e93559e8de76c80940a9", - "sha256:6c53338c1811f8c6d7f8cb7abd874810b15045e719e8207f957035c9177b4213", - "sha256:75c2dda47dcc3c77449712b1417bb6b89ec3b7b02e18c64262494dceffdf455e", - "sha256:773c5a98e463b52f7e8197254b39b703a5ea1972aef3a94b3b921515d77dd041", - "sha256:78c3068dcba300d473fef57cdf523e34b37de522f5a494ef9ee1ac9b4b8bbe3f", - "sha256:7bc18fc5a170f2c1cef5387a3d997c28942918bbee0f700e73fd2178ee8d474d", - "sha256:7f89eff20e4a7a64b55210dac17aea711ed8a3f2e78f2ff784c0e984302583dd", - "sha256:89458b49976b1dee5d89ab4ac943da3717b4292bf624367e862e4ee172fcce99", - "sha256:986f871c0fa649b293061236b93782d25c293a8dd8117c7ba05f8a61bdc261ae", - "sha256:a0f495a4fe5278aab278feee35e6102efecde5176a8a74dd28c28e3fc5c8d7c7", - "sha256:a14aa436586c41633339415de82a41164691d02d3e661038da533be5d40794a5", - "sha256:b8ab3ab38afc47d8f4fe629b836243544351cef681b6bdb1dc869028d6fdcbfb", - "sha256:bb487881608ebd293592553c618f0c83316f4f13a64cb18605b1d2fb9fd3da3e", - "sha256:c0b24bfe3431b3cb7ced323bca813dbd13aca973a1475b512d3331fd0de8ec60", - "sha256:c7894c06205166d360ab2915ae306d1f7403e9ce3d3aaeff4095eaf98e42ce66", - "sha256:d4039bb7f234ad32267c55e72fd49fb56078ea102f9d9d8559f6ec34d4887630", - "sha256:e4d6bb8703d0319eb04b7319b12ea41580df44fd84d83ccda13ea463c6801414", - "sha256:e8fab9911fd2fa8e5af407057cb8bdf87762f983cba483fa3234be20a9a0af77", - "sha256:f3818e578e687cdb21dc4aa4a3bc6278c656c9c393e9eda14dd04943f478863d", - "sha256:fe666645493d72712c46e4fbe8bec094b06aec3c337400479e9704439c9d9586" - ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'", - "version": "==0.29.14" - }, "decorator": { "hashes": [ - "sha256:d9f2d2863183a3c0df05f4b786f2e6b8752c093b3547a558f287bf3022fd2bf4", - "sha256:f2e71efb39412bfd23d878e896a51b07744f2e2250b2e87d158e76828c5ae202" + "sha256:6f201a6c4dac3d187352661f508b9364ec8091217442c9478f1f83c003a0f060", + "sha256:945d84890bb20cc4a2f4a31fc4311c0c473af65ea318617f13a7257c9a58bc98" ], "markers": "python_version >= '3.5'", - "version": "==5.0.6" + "version": "==5.0.7" }, "defusedxml": { "hashes": [ @@ -466,11 +420,11 @@ }, "ipython": { "hashes": [ - "sha256:9c900332d4c5a6de534b4befeeb7de44ad0cc42e8327fa41b7685abde58cec74", - "sha256:c0ce02dfaa5f854809ab7413c601c4543846d9da81010258ecdab299b542d199" + "sha256:3455b020a895710c4366e8d1b326e5ee6aa684607907fc96895e7b8359569f49", + "sha256:69178f32bf9c6257430b6f592c3ae230c32861a1966d2facec454e09078e232d" ], "markers": "python_version >= '3.3'", - "version": "==7.22.0" + "version": "==7.23.0" }, "ipython-genutils": { "hashes": [ @@ -705,6 +659,14 @@ "index": "pypi", "version": "==3.4.1" }, + "matplotlib-inline": { + "hashes": [ + "sha256:5cf1176f554abb4fa98cb362aa2b55c500147e4bdbb07e3fda359143e1da0811", + "sha256:f41d5ff73c9f5385775d5c0bc13b424535c8402fe70ea8210f93e11f3683993e" + ], + "markers": "python_version >= '3.5'", + "version": "==0.1.2" + }, "mccabe": { "hashes": [ "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", @@ -775,11 +737,11 @@ }, "nltk": { "hashes": [ - "sha256:1235660f52ab10fda34d5277096724747f767b2903e1c0c4e14bde013552c9ba", - "sha256:cbc2ed576998fcf7cd181eeb3ca029e5f0025b264074b4beb57ce780673f8b86" + "sha256:240e23ab1ab159ef9940777d30c7c72d7e76d91877099218a7585370c11f6b9e", + "sha256:57d556abed621ab9be225cc6d2df1edce17572efb67a3d754630c9f8381503eb" ], "markers": "python_version >= '3.5'", - "version": "==3.6.1" + "version": "==3.6.2" }, "notebook": { "hashes": [ @@ -829,25 +791,25 @@ }, "pandas": { "hashes": [ - "sha256:09761bf5f8c741d47d4b8b9073288de1be39bbfccc281d70b889ade12b2aad29", - "sha256:0f27fd1adfa256388dc34895ca5437eaf254832223812afd817a6f73127f969c", - "sha256:43e00770552595c2250d8d712ec8b6e08ca73089ac823122344f023efa4abea3", - "sha256:46fc671c542a8392a4f4c13edc8527e3a10f6cb62912d856f82248feb747f06e", - "sha256:475b7772b6e18a93a43ea83517932deff33954a10d4fbae18d0c1aba4182310f", - "sha256:4d821b9b911fc1b7d428978d04ace33f0af32bb7549525c8a7b08444bce46b74", - "sha256:5e3c8c60541396110586bcbe6eccdc335a38e7de8c217060edaf4722260b158f", - "sha256:621c044a1b5e535cf7dcb3ab39fca6f867095c3ef223a524f18f60c7fee028ea", - "sha256:72ffcea00ae8ffcdbdefff800284311e155fbb5ed6758f1a6110fc1f8f8f0c1c", - "sha256:8a051e957c5206f722e83f295f95a2cf053e890f9a1fba0065780a8c2d045f5d", - "sha256:97b1954533b2a74c7e20d1342c4f01311d3203b48f2ebf651891e6a6eaf01104", - "sha256:9f5829e64507ad10e2561b60baf285c470f3c4454b007c860e77849b88865ae7", - "sha256:a93e34f10f67d81de706ce00bf8bb3798403cabce4ccb2de10c61b5ae8786ab5", - "sha256:d59842a5aa89ca03c2099312163ffdd06f56486050e641a45d926a072f04d994", - "sha256:dbb255975eb94143f2e6ec7dadda671d25147939047839cd6b8a4aff0379bb9b", - "sha256:df6f10b85aef7a5bb25259ad651ad1cc1d6bb09000595cab47e718cbac250b1d" + "sha256:167693a80abc8eb28051fbd184c1b7afd13ce2c727a5af47b048f1ea3afefff4", + "sha256:2111c25e69fa9365ba80bbf4f959400054b2771ac5d041ed19415a8b488dc70a", + "sha256:298f0553fd3ba8e002c4070a723a59cdb28eda579f3e243bc2ee397773f5398b", + "sha256:2b063d41803b6a19703b845609c0b700913593de067b552a8b24dd8eeb8c9895", + "sha256:2cb7e8f4f152f27dc93f30b5c7a98f6c748601ea65da359af734dd0cf3fa733f", + "sha256:52d2472acbb8a56819a87aafdb8b5b6d2b3386e15c95bde56b281882529a7ded", + "sha256:612add929bf3ba9d27b436cc8853f5acc337242d6b584203f207e364bb46cb12", + "sha256:649ecab692fade3cbfcf967ff936496b0cfba0af00a55dfaacd82bdda5cb2279", + "sha256:68d7baa80c74aaacbed597265ca2308f017859123231542ff8a5266d489e1858", + "sha256:8d4c74177c26aadcfb4fd1de6c1c43c2bf822b3e0fc7a9b409eeaf84b3e92aaa", + "sha256:971e2a414fce20cc5331fe791153513d076814d30a60cd7348466943e6e909e4", + "sha256:9db70ffa8b280bb4de83f9739d514cd0735825e79eef3a61d312420b9f16b758", + "sha256:b730add5267f873b3383c18cac4df2527ac4f0f0eed1c6cf37fcb437e25cf558", + "sha256:bd659c11a4578af740782288cac141a322057a2e36920016e0fc7b25c5a4b686", + "sha256:c601c6fdebc729df4438ec1f62275d6136a0dd14d332fc0e8ce3f7d2aadb4dd6", + "sha256:d0877407359811f7b853b548a614aacd7dea83b0c0c84620a9a643f180060950" ], "markers": "python_full_version >= '3.7.1'", - "version": "==1.2.3" + "version": "==1.2.4" }, "pandocfilters": { "hashes": [ @@ -863,6 +825,14 @@ "markers": "python_version >= '3.6'", "version": "==0.8.2" }, + "pexpect": { + "hashes": [ + "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937", + "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c" + ], + "markers": "sys_platform != 'win32'", + "version": "==4.8.0" + }, "pickleshare": { "hashes": [ "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca", @@ -987,6 +957,14 @@ ], "version": "==3.15.8" }, + "ptyprocess": { + "hashes": [ + "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", + "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220" + ], + "markers": "os_name != 'nt'", + "version": "==0.7.0" + }, "py": { "hashes": [ "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3", @@ -995,6 +973,37 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.10.0" }, + "pyarrow": { + "hashes": [ + "sha256:07d445dfe55eb7401afb7806ad47ce59b889cf50d6f5bfdb9e90371ef642e2e0", + "sha256:0f1d38f10c11a49f57f979010dce252c7102fea9c0424b4c2bfa1306b3aa3db3", + "sha256:0f2f289fa6a23a97622b0e1bbb4f9ff8440bee5182078c1f326ddc17ba680406", + "sha256:16de8d92de9173e64d1f0298b84cb03b3fe27786468a0caf8caabf34eef22852", + "sha256:20d5c17ef4d0144a39bf550db79abb16b3ab75e43813757375b842623852ade8", + "sha256:239606b385e3cd1d5dab598ccef8105fc258dbad1cf0c44295f8c1ca754ac62c", + "sha256:4a97ad44b2ce67c655296255df6e6c0c4d9c22426f964ceb912d3db013c14bbc", + "sha256:4b6cfa6ba09b1d205320116fad97487ff5976ea469748d23243d39f3c24ffee2", + "sha256:4cf77ac6ca87e0b1c6da4153c00d8af7d631e4d97c59b315f6a11e8d694bf531", + "sha256:547d49a3eee9386054ea8801133e573d1e0226d5f298f9b1d24a110c4873c83d", + "sha256:5f2fbff6c2eee6d81b38d4c8202b5a36ec7f506ebb84e6415950ab9f41995218", + "sha256:606dbfc128eec5673f48fd15e30c2cc23acdcdee3b5ab5f923078c9f787d6608", + "sha256:6a1cef994caf5da24d2bfc30e8bfee6a32c797292404cb33202c6896ca0a8f71", + "sha256:75187f0c4bab5259fb76808b4567850c5b94fc0fb54fdbdccccad029db5a1ca9", + "sha256:79bf9a6324f3e22d11ce405b0efb1efa8bca18560d6e53b5ea05495ef458ea8f", + "sha256:8910f11923ae453c89cac4c2a7322d5db7b9f7c60d2a4d48212ca72cd716aa12", + "sha256:8b655d955ff71bc5efd5a7575575df6d62d4b9d95354070c589be31498f379e7", + "sha256:8f8396766bb14ab609dcfe07eb1ecbe269d72f8601adb13076e733451dc7ffe6", + "sha256:977cac82e5e9eeed4c9d0b8da7941b903df922c15e650841f12b72987eb0332b", + "sha256:98cd697c56c549d50496a3497a6abd34490ece57afaaa3c96f5961c6cce8db67", + "sha256:ab4d5dfc79b0bec9bb5030b06d065afc9f7085487b04a58f6dc97111016203f2", + "sha256:af02d8da74a46951ab41df6c5a0cbd00c419a3394e38c82f1d9f7b60159e9c8b", + "sha256:eae3cbf83b210995bcf1bc30dcf39072381165739f913930498fc50a540e87f7", + "sha256:ecad11625a532242c5ab513340cffc989a2f69b13440da5a4a539fad582f9109", + "sha256:ed78652628653aeb77cd013de637c3dfd064f4770985f002ec7595954383688e" + ], + "markers": "python_version < '3.9'", + "version": "==4.0.0" + }, "pycparser": { "hashes": [ "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0", @@ -1005,10 +1014,10 @@ }, "pydeck": { "hashes": [ - "sha256:9f77d28b45504010c48cc7a43bbc2108749862f6738f94dba2e9ad16a39b0be1", - "sha256:a431484424e92f75454cd5066935241d9244bc8c78afe478a7f83143878f28d0" + "sha256:24ffadfba72cf610a413d49bd9542f2f4fd745f33d6535dd339b121e9e084be8", + "sha256:e0d1f36e5cf0f8181f82d25a5f33381b8296caaac671f355afe4a660964d0dea" ], - "version": "==0.6.1" + "version": "==0.6.2" }, "pygments": { "hashes": [ @@ -1020,11 +1029,11 @@ }, "pylint": { "hashes": [ - "sha256:209d712ec870a0182df034ae19f347e725c1e615b2269519ab58a35b3fcbbe7a", - "sha256:bd38914c7731cdc518634a8d3c5585951302b6e2b6de60fbb3f7a0220e21eeee" + "sha256:586d8fa9b1891f4b725f587ef267abe2a1bad89d6b184520c7f07a253dd6e217", + "sha256:f7e2072654a6b6afdf5e2fb38147d3e2d2d43c89f648637baab63e026481279b" ], "index": "pypi", - "version": "==2.7.4" + "version": "==2.8.2" }, "pyparsing": { "hashes": [ @@ -1064,38 +1073,6 @@ ], "version": "==2021.1" }, - "pywin32": { - "hashes": [ - "sha256:1c204a81daed2089e55d11eefa4826c05e604d27fe2be40b6bf8db7b6a39da63", - "sha256:27a30b887afbf05a9cbb05e3ffd43104a9b71ce292f64a635389dbad0ed1cd85", - "sha256:350c5644775736351b77ba68da09a39c760d75d2467ecec37bd3c36a94fbed64", - "sha256:60a8fa361091b2eea27f15718f8eb7f9297e8d51b54dbc4f55f3d238093d5190", - "sha256:638b68eea5cfc8def537e43e9554747f8dee786b090e47ead94bfdafdb0f2f50", - "sha256:8151e4d7a19262d6694162d6da85d99a16f8b908949797fd99c83a0bfaf5807d", - "sha256:a3b4c48c852d4107e8a8ec980b76c94ce596ea66d60f7a697582ea9dce7e0db7", - "sha256:b1609ce9bd5c411b81f941b246d683d6508992093203d4eb7f278f4ed1085c3f", - "sha256:d7e8c7efc221f10d6400c19c32a031add1c4a58733298c09216f57b4fde110dc", - "sha256:fbb3b1b0fbd0b4fc2a3d1d81fe0783e30062c1abed1d17c32b7879d55858cfae" - ], - "markers": "sys_platform == 'win32'", - "version": "==300" - }, - "pywinpty": { - "hashes": [ - "sha256:1e525a4de05e72016a7af27836d512db67d06a015aeaf2fa0180f8e6a039b3c2", - "sha256:2740eeeb59297593a0d3f762269b01d0285c1b829d6827445fcd348fb47f7e70", - "sha256:2d7e9c881638a72ffdca3f5417dd1563b60f603e1b43e5895674c2a1b01f95a0", - "sha256:33df97f79843b2b8b8bc5c7aaf54adec08cc1bae94ee99dfb1a93c7a67704d95", - "sha256:5fb2c6c6819491b216f78acc2c521b9df21e0f53b9a399d58a5c151a3c4e2a2d", - "sha256:8fc5019ff3efb4f13708bd3b5ad327589c1a554cb516d792527361525a7cb78c", - "sha256:b358cb552c0f6baf790de375fab96524a0498c9df83489b8c23f7f08795e966b", - "sha256:dbd838de92de1d4ebf0dce9d4d5e4fc38d0b7b1de837947a18b57a882f219139", - "sha256:dd22c8efacf600730abe4a46c1388355ce0d4ab75dc79b15d23a7bd87bf05b48", - "sha256:e854211df55d107f0edfda8a80b39dfc87015bef52a8fe6594eb379240d81df2" - ], - "markers": "os_name == 'nt'", - "version": "==0.5.7" - }, "pyzmq": { "hashes": [ "sha256:13465c1ff969cab328bc92f7015ce3843f6e35f8871ad79d236e4fbc85dbe4cb", @@ -1190,67 +1167,67 @@ }, "scikit-learn": { "hashes": [ - "sha256:0567a2d29ad08af98653300c623bd8477b448fe66ced7198bef4ed195925f082", - "sha256:087dfede39efb06ab30618f9ab55a0397f29c38d63cd0ab88d12b500b7d65fd7", - "sha256:1adf483e91007a87171d7ce58c34b058eb5dab01b5fee6052f15841778a8ecd8", - "sha256:259ec35201e82e2db1ae2496f229e63f46d7f1695ae68eef9350b00dc74ba52f", - "sha256:3c4f07f47c04e81b134424d53c3f5e16dfd7f494e44fd7584ba9ce9de2c5e6c1", - "sha256:4562dcf4793e61c5d0f89836d07bc37521c3a1889da8f651e2c326463c4bd697", - "sha256:4ddd2b6f7449a5d539ff754fa92d75da22de261fd8fdcfb3596799fadf255101", - "sha256:54be0a60a5a35005ad69c75902e0f5c9f699db4547ead427e97ef881c3242e6f", - "sha256:5580eba7345a4d3b097be2f067cc71a306c44bab19e8717a30361f279c929bea", - "sha256:7b04691eb2f41d2c68dbda8d1bd3cb4ef421bdc43aaa56aeb6c762224552dfb6", - "sha256:826b92bf45b8ad80444814e5f4ac032156dd481e48d7da33d611f8fe96d5f08b", - "sha256:83b21ff053b1ff1c018a2d24db6dd3ea339b1acfbaa4d9c881731f43748d8b3b", - "sha256:8772b99d683be8f67fcc04789032f1b949022a0e6880ee7b75a7ec97dbbb5d0b", - "sha256:895dbf2030aa7337649e36a83a007df3c9811396b4e2fa672a851160f36ce90c", - "sha256:8aa1b3ac46b80eaa552b637eeadbbce3be5931e4b5002b964698e33a1b589e1e", - "sha256:9599a3f3bf33f73fed0fe06d1dfa4e6081365a58c1c807acb07271be0dce9733", - "sha256:99349d77f54e11f962d608d94dfda08f0c9e5720d97132233ebdf35be2858b2d", - "sha256:9a24d1ccec2a34d4cd3f2a1f86409f3f5954cc23d4d2270ba0d03cf018aa4780", - "sha256:9bed8a1ef133c8e2f13966a542cb8125eac7f4b67dcd234197c827ba9c7dd3e0", - "sha256:9c6097b6a9b2bafc5e0f31f659e6ab5e131383209c30c9e978c5b8abdac5ed2a", - "sha256:9dfa564ef27e8e674aa1cc74378416d580ac4ede1136c13dd555a87996e13422", - "sha256:a0334a1802e64d656022c3bfab56a73fbd6bf4b1298343f3688af2151810bbdf", - "sha256:a29460499c1e62b7a830bb57ca42e615375a6ab1bcad053cd25b493588348ea8", - "sha256:a36e159a0521e13bbe15ca8c8d038b3a1dd4c7dad18d276d76992e03b92cf643", - "sha256:abe835a851610f87201819cb315f8d554e1a3e8128912783a31e87264ba5ffb7", - "sha256:c13ebac42236b1c46397162471ea1c46af68413000e28b9309f8c05722c65a09", - "sha256:c3deb3b19dd9806acf00cf0d400e84562c227723013c33abefbbc3cf906596e9", - "sha256:c658432d8a20e95398f6bb95ff9731ce9dfa343fdf21eea7ec6a7edfacd4b4d9", - "sha256:c7f4eb77504ac586d8ac1bde1b0c04b504487210f95297235311a0ab7edd7e38", - "sha256:d54dbaadeb1425b7d6a66bf44bee2bb2b899fe3e8850b8e94cfb9c904dcb46d0", - "sha256:ddb52d088889f5596bc4d1de981f2eca106b58243b6679e4782f3ba5096fd645", - "sha256:ed9d65594948678827f4ff0e7ae23344e2f2b4cabbca057ccaed3118fdc392ca", - "sha256:fab31f48282ebf54dd69f6663cd2d9800096bad1bb67bbc9c9ac84eb77b41972" + "sha256:038f4e9d6ef10e1f3fe82addc3a14735c299866eb10f2c77c090410904828312", + "sha256:06ffdcaaf81e2a3b1b50c3ac6842cfb13df2d8b737d61f64643ed61da7389cde", + "sha256:0e71ce9c7cbc20f6f8b860107ce15114da26e8675238b4b82b7e7cd37ca0c087", + "sha256:1eec963fe9ffc827442c2e9333227c4d49749a44e592f305398c1db5c1563393", + "sha256:2754c85b2287333f9719db7f23fb7e357f436deed512db3417a02bf6f2830aa5", + "sha256:2db429090b98045d71218a9ba913cc9b3fe78e0ba0b6b647d8748bc6d5a44080", + "sha256:39b7e3b71bcb1fe46397185d6c1a5db1c441e71c23c91a31e7ad8cc3f7305f9a", + "sha256:3cbd734e1aefc7c5080e6b6973fe062f97c26a1cdf1a991037ca196ce1c8f427", + "sha256:40556bea1ef26ef54bc678d00cf138a63069144a0b5f3a436eecd8f3468b903e", + "sha256:48f273836e19901ba2beecd919f7b352f09310ce67c762f6e53bc6b81cacf1f0", + "sha256:49ec0b1361da328da9bb7f1a162836028e72556356adeb53342f8fae6b450d47", + "sha256:4e6198675a6f9d333774671bd536668680eea78e2e81c0b19e57224f58d17f37", + "sha256:5beaeb091071625e83f5905192d8aecde65ba2f26f8b6719845bbf586f7a04a1", + "sha256:5ff3e4e4cf7592d36541edec434e09fb8ab9ba6b47608c4ffe30c9038d301897", + "sha256:62214d2954377fcf3f31ec867dd4e436df80121e7a32947a0b3244f58f45e455", + "sha256:7be1b88c23cfac46e06404582215a917017cd2edaa2e4d40abe6aaff5458f24b", + "sha256:8fac72b9688176922f9f54fda1ba5f7ffd28cbeb9aad282760186e8ceba9139a", + "sha256:90a297330f608adeb4d2e9786c6fda395d3150739deb3d42a86d9a4c2d15bc1d", + "sha256:a2a47449093dcf70babc930beba2ca0423cb7df2fa5fd76be5260703d67fa574", + "sha256:ae19ac105cf7ce8c205a46166992fdec88081d6e783ab6e38ecfbe45729f3c39", + "sha256:ae426e3a52842c6b6d77d00f906b6031c8c2cfdfabd6af7511bb4bc9a68d720e", + "sha256:cbdb0b3db99dd1d5f69d31b4234367d55475add31df4d84a3bd690ef017b55e2", + "sha256:cdf24c1b9bbeb4936456b42ac5bd32c60bb194a344951acb6bfb0cddee5439a4", + "sha256:d14701a12417930392cd3898e9646cf5670c190b933625ebe7511b1f7d7b8736", + "sha256:d177fe1ff47cc235942d628d41ee5b1c6930d8f009f1a451c39b5411e8d0d4cf", + "sha256:d5bf9c863ba4717b3917b5227463ee06860fc43931dc9026747de416c0a10fee", + "sha256:dd968a174aa82f3341a615a033fa6a8169e9320cbb46130686562db132d7f1f0", + "sha256:f0ed4483c258fb23150e31b91ea7d25ff8495dba108aea0b0d4206a777705350", + "sha256:f18c3ed484eeeaa43a0d45dc2efb4d00fc6542ccdcfa2c45d7b635096a2ae534", + "sha256:f1d2108e770907540b5248977e4cff9ffaf0f73d0d13445ee938df06ca7579c6", + "sha256:f3ec00f023d84526381ad0c0f2cff982852d035c921bbf8ceb994f4886c00c64", + "sha256:f74429a07fedb36a03c159332b914e6de757176064f9fed94b5f79ebac07d913", + "sha256:fec42690a2eb646b384eafb021c425fab48991587edb412d4db77acc358b27ce" ], "markers": "python_version >= '3.6'", - "version": "==0.24.1" + "version": "==0.24.2" }, "scipy": { "hashes": [ - "sha256:03f1fd3574d544456325dae502facdf5c9f81cbfe12808a5e67a737613b7ba8c", - "sha256:0c81ea1a95b4c9e0a8424cf9484b7b8fa7ef57169d7bcc0dfcfc23e3d7c81a12", - "sha256:1fba8a214c89b995e3721670e66f7053da82e7e5d0fe6b31d8e4b19922a9315e", - "sha256:37f4c2fb904c0ba54163e03993ce3544c9c5cde104bcf90614f17d85bdfbb431", - "sha256:50e5bcd9d45262725e652611bb104ac0919fd25ecb78c22f5282afabd0b2e189", - "sha256:6ca1058cb5bd45388041a7c3c11c4b2bd58867ac9db71db912501df77be2c4a4", - "sha256:77f7a057724545b7e097bfdca5c6006bed8580768cd6621bb1330aedf49afba5", - "sha256:816951e73d253a41fa2fd5f956f8e8d9ac94148a9a2039e7db56994520582bf2", - "sha256:96620240b393d155097618bcd6935d7578e85959e55e3105490bbbf2f594c7ad", - "sha256:993c86513272bc84c451349b10ee4376652ab21f312b0554fdee831d593b6c02", - "sha256:adf7cee8e5c92b05f2252af498f77c7214a2296d009fc5478fc432c2f8fb953b", - "sha256:bc52d4d70863141bb7e2f8fd4d98e41d77375606cde50af65f1243ce2d7853e8", - "sha256:c1d3f771c19af00e1a36f749bd0a0690cc64632783383bc68f77587358feb5a4", - "sha256:d744657c27c128e357de2f0fd532c09c84cd6e4933e8232895a872e67059ac37", - "sha256:e3e9742bad925c421d39e699daa8d396c57535582cba90017d17f926b61c1552", - "sha256:e547f84cd52343ac2d56df0ab08d3e9cc202338e7d09fafe286d6c069ddacb31", - "sha256:e89091e6a8e211269e23f049473b2fde0c0e5ae0dd5bd276c3fc91b97da83480", - "sha256:e9da33e21c9bc1b92c20b5328adb13e5f193b924c9b969cd700c8908f315aa59", - "sha256:ffdfb09315896c6e9ac739bb6e13a19255b698c24e6b28314426fd40a1180822" + "sha256:01b38dec7e9f897d4db04f8de4e20f0f5be3feac98468188a0f47a991b796055", + "sha256:10dbcc7de03b8d635a1031cb18fd3eaa997969b64fdf78f99f19ac163a825445", + "sha256:19aeac1ad3e57338723f4657ac8520f41714804568f2e30bd547d684d72c392e", + "sha256:1b21c6e0dc97b1762590b70dee0daddb291271be0580384d39f02c480b78290a", + "sha256:1caade0ede6967cc675e235c41451f9fb89ae34319ddf4740194094ab736b88d", + "sha256:23995dfcf269ec3735e5a8c80cfceaf384369a47699df111a6246b83a55da582", + "sha256:2a799714bf1f791fb2650d73222b248d18d53fd40d6af2df2c898db048189606", + "sha256:3274ce145b5dc416c49c0cf8b6119f787f0965cd35e22058fe1932c09fe15d77", + "sha256:33d1677d46111cfa1c84b87472a0274dde9ef4a7ef2e1f155f012f5f1e995d8f", + "sha256:44d452850f77e65e25b1eb1ac01e25770323a782bfe3a1a3e43847ad4266d93d", + "sha256:9e3302149a369697c6aaea18b430b216e3c88f9a61b62869f6104881e5f9ef85", + "sha256:a75b014d3294fce26852a9d04ea27b5671d86736beb34acdfc05859246260707", + "sha256:ad7269254de06743fb4768f658753de47d8b54e4672c5ebe8612a007a088bd48", + "sha256:b30280fbc1fd8082ac822994a98632111810311a9ece71a0e48f739df3c555a2", + "sha256:b79104878003487e2b4639a20b9092b02e1bad07fc4cf924b495cf413748a777", + "sha256:d449d40e830366b4c612692ad19fbebb722b6b847f78a7b701b1e0d6cda3cc13", + "sha256:d647757373985207af3343301d89fe738d5a294435a4f2aafb04c13b4388c896", + "sha256:f68eb46b86b2c246af99fcaa6f6e37c7a7a413e1084a794990b877f2ff71f7b6", + "sha256:fdf606341cd798530b05705c87779606fcdfaf768a8129c348ea94441da15b04" ], "index": "pypi", - "version": "==1.6.2" + "version": "==1.6.3" }, "send2trash": { "hashes": [ @@ -1495,26 +1472,26 @@ }, "watchdog": { "hashes": [ - "sha256:035f4816daf3c62e03503c267620f3aa8fc7472df85ff3ef1e0c100ea1ed2744", - "sha256:0f7e9de9ba84af15e9e9fc29c3b13c972daa4d2b11de29aa86b26a26bc877c06", - "sha256:13c9ff58508dce55ba416eb0ef7af5aa5858558f2ec51112f099fd03503b670b", - "sha256:19675b8d1f00dabe74a0e66d87980623250d9360a21612e8c27b70a4b214ceeb", - "sha256:1cd715c4fb803581ded8943f39a51f21c17375d009ca9e3398d6b20638863a70", - "sha256:1f518a6940cde8720b8826a705c164e6b9bd6cf8c00f14269ffac51e017e06ec", - "sha256:3e933f3567c4521dd1a5d59fd54a522cae90bebcbeb8b74b84a2f33c90f08388", - "sha256:41b1a773f364f232b5bc184688e8d60451745d9e0971ac60c648bd47be8f4733", - "sha256:532fedd993e75554671faa36cd04c580ced3fae084254a779afbbd8aaf00566b", - "sha256:74528772516228f6a015a647027057939ff0b695a0b864cb3037e8e1aabc7ca0", - "sha256:89102465764e453609463cf620e744da1b0aa1f9f321b05961e2e7e15b3c9d8b", - "sha256:a412b1914e27f67b0a10e1ee19b5d035a9f7c115a062bbbd640653d9820ba4c8", - "sha256:ac6adbdf32e1d180574f9d0819e80259ae48e68727e80c3d950ed5a023714c3e", - "sha256:adda34bfe6db05485c1dfcd98232bdec385f991fe16358750c2163473eefb985", - "sha256:d2fcbc15772a82cd139c803a513c45b0fbc72a10a8a34dc2a8b429110b6f1236", - "sha256:d54e187b76053982180532cb7fd31152201c438b348c456f699929f8a89e786d", - "sha256:e0114e48ee981b38e328eaa0d5a625c7b4fc144b8dc7f7637749d6b5f7fefb0e" + "sha256:0ba2c2526dc81a241e3b0f018a447a5ca634fa1f01fc5fa2a07c87ee04d730a7", + "sha256:1583ee70d78226d897cbc85cfd5b88108340450e0214a704a4919443434d3c32", + "sha256:3bf9132a6c609ca9fa96df3e8309acaee930b1faf46212d7c296f2bce03f5264", + "sha256:4288d3a984324db492e57aa169666238a2578f0af5a081685526608fb9f6bd61", + "sha256:4607156004c36c1cbd8b693b9516a3463646159299404e007cc292f51934c930", + "sha256:51ad0342ecb4733796e94980f2430b2333e12ead973d3e18f5514cd77a24fa85", + "sha256:6b760cf207bf4d533155853904047e75eb3a2d629bfd320c3f4f8d07abbc38d5", + "sha256:7fea9428ab2cd577d7dc571036b2450361802c43d7a546e72eead95c21239c9a", + "sha256:819d7e77594aa3108f9ad9e896b003914ec778fdf9827d9f940ca5e6db5416ce", + "sha256:8e82707878e3defc46e8a4d861cde0c89561fba8a91b3609742213fa9d07fc16", + "sha256:9fe3ea15f9020aa7f129f700341850ee768730c67e89d8256c224c4f26c86670", + "sha256:a06e595e05cc31a882031367e0f6c2b19160b1d7e881857c16121e3cda32494d", + "sha256:a679d03f52a9e3ea3efb77b459e9eba05c7c687f48e4d17ce20bc0e36f867d9a", + "sha256:b565838318e134e41d78f056194dff6bbd7b85fef67f9ee6f01168146bf04048", + "sha256:b8727f84a3c7dd21138d92186181d015af45168e0af895ffdc553a28019494ef", + "sha256:be4c578fa148a9cbc64451a3af26c4ee55d59c33f5438bcffc1956092df0888b", + "sha256:c678b51fb89c76816004c8234ac827977c23912c3826e263adbfb5dbe161e8dc" ], "markers": "platform_system != 'Darwin'", - "version": "==2.0.2" + "version": "==2.0.3" }, "wcwidth": { "hashes": [ @@ -1537,6 +1514,28 @@ ], "version": "==3.5.1" }, + "wordcloud": { + "hashes": [ + "sha256:2d5f63cf1f65126d6d1b8d55acae15961c5bd55e6ef8978bf269831dafcd1408", + "sha256:2e1fc5991f52a0e191873025f5d8cf852eb1c39f2579d43e6d2589dc04d620d9", + "sha256:2e85b1a6b8211436d47f902837c866c682c5d00046706f1d8e72cc0cd9210d64", + "sha256:41dafafe3768675ba8285d21ac68bd53191b50da85aba2a7fb35964d1649156f", + "sha256:6450f67c207f2ab4513c4aac803226427b39f7fc8d3498c0b41a03834b2fc426", + "sha256:6ff0ca777c801cd2a38deef30019f02fe35df1be9c31f6c301a34bd45006e26b", + "sha256:90e9cff2c6939b5e1fa87d2a2c5d79c8ac2eafc4c9b9bc1bcb865ca5fde7e758", + "sha256:9ed09d2d2916514845e42b0cc5dd86f65d4f0f89ab4bf164cc9a628d9db5a76b", + "sha256:a02b27400bc4b2e4efdd8377e8cd138496b6b6e15a082d9c7b7c7b74a9ee8a55", + "sha256:a4a626df47eacec207988e1ffdb5bd16b2b62ac83ea2442a0043080c0c5103ac", + "sha256:cd8c44a21a3207da813c7b7e486ed41fe517eb7b8535deef945437f4cea2b245", + "sha256:daa7d72f0004fcfea5f649f1683dbe47c4a9c5f721aabeb41278a4ee33f21424", + "sha256:e0d3f03e4f27faf543859b4bcd229d320361d065cf4786f149081adea4ee68e8", + "sha256:e6ef771aac17c1cf8558c8d5ef025796184066d7b78f8118aefe011fb0d22952", + "sha256:f190eb3efcc17765a4ed9d78001fdb713988886b93e54b2758fcf4f6c519ad51", + "sha256:f49f5213e978fb5d8ff8f6e6067de657e81ca863cab549b198579f58e1430e7c" + ], + "index": "pypi", + "version": "==1.8.1" + }, "wrapt": { "hashes": [ "sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7" @@ -1560,14 +1559,6 @@ ], "version": "==1.4.4" }, - "atomicwrites": { - "hashes": [ - "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197", - "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a" - ], - "index": "pypi", - "version": "==1.4.0" - }, "attrs": { "hashes": [ "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6", @@ -1578,10 +1569,11 @@ }, "black": { "hashes": [ - "sha256:1c02557aa099101b9d21496f8a914e9ed2222ef70336404eeeac8edba836fbea" + "sha256:bff7067d8bc25eb21dcfdbc8c72f2baafd9ec6de4663241a52fb904b304d391f", + "sha256:fc9bcf3b482b05c1f35f6a882c079dc01b9c7795827532f4cc43c0ec88067bbc" ], "index": "pypi", - "version": "==20.8b1" + "version": "==21.4b2" }, "click": { "hashes": [ @@ -1591,14 +1583,6 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==7.1.2" }, - "colorama": { - "hashes": [ - "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b", - "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2" - ], - "markers": "sys_platform == 'win32' and sys_platform == 'win32' and sys_platform == 'win32'", - "version": "==0.4.4" - }, "coverage": { "hashes": [ "sha256:004d1880bed2d97151facef49f08e255a20ceb6f9432df75f4eef018fdd5a78c", @@ -1659,11 +1643,11 @@ }, "flake8": { "hashes": [ - "sha256:12d05ab02614b6aee8df7c36b97d1a3b2372761222b19b58621355e82acddcff", - "sha256:78873e372b12b093da7b5e5ed302e8ad9e988b38b063b61ad937f26ca58fc5f0" + "sha256:1aa8990be1e689d96c745c5682b687ea49f2e05a443aff1f8251092b0014e378", + "sha256:3b9f848952dddccf635be78098ca75010f073bfe14d2c6bda867154bea728d2a" ], "index": "pypi", - "version": "==3.9.0" + "version": "==3.9.1" }, "iniconfig": { "hashes": [ @@ -1810,49 +1794,6 @@ ], "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==0.10.2" - }, - "typed-ast": { - "hashes": [ - "sha256:01ae5f73431d21eead5015997ab41afa53aa1fbe252f9da060be5dad2c730ace", - "sha256:067a74454df670dcaa4e59349a2e5c81e567d8d65458d480a5b3dfecec08c5ff", - "sha256:0fb71b8c643187d7492c1f8352f2c15b4c4af3f6338f21681d3681b3dc31a266", - "sha256:1b3ead4a96c9101bef08f9f7d1217c096f31667617b58de957f690c92378b528", - "sha256:2068531575a125b87a41802130fa7e29f26c09a2833fea68d9a40cf33902eba6", - "sha256:209596a4ec71d990d71d5e0d312ac935d86930e6eecff6ccc7007fe54d703808", - "sha256:2c726c276d09fc5c414693a2de063f521052d9ea7c240ce553316f70656c84d4", - "sha256:398e44cd480f4d2b7ee8d98385ca104e35c81525dd98c519acff1b79bdaac363", - "sha256:52b1eb8c83f178ab787f3a4283f68258525f8d70f778a2f6dd54d3b5e5fb4341", - "sha256:5feca99c17af94057417d744607b82dd0a664fd5e4ca98061480fd8b14b18d04", - "sha256:7538e495704e2ccda9b234b82423a4038f324f3a10c43bc088a1636180f11a41", - "sha256:760ad187b1041a154f0e4d0f6aae3e40fdb51d6de16e5c99aedadd9246450e9e", - "sha256:777a26c84bea6cd934422ac2e3b78863a37017618b6e5c08f92ef69853e765d3", - "sha256:95431a26309a21874005845c21118c83991c63ea800dd44843e42a916aec5899", - "sha256:9ad2c92ec681e02baf81fdfa056fe0d818645efa9af1f1cd5fd6f1bd2bdfd805", - "sha256:9c6d1a54552b5330bc657b7ef0eae25d00ba7ffe85d9ea8ae6540d2197a3788c", - "sha256:aee0c1256be6c07bd3e1263ff920c325b59849dc95392a05f258bb9b259cf39c", - "sha256:af3d4a73793725138d6b334d9d247ce7e5f084d96284ed23f22ee626a7b88e39", - "sha256:b36b4f3920103a25e1d5d024d155c504080959582b928e91cb608a65c3a49e1a", - "sha256:b9574c6f03f685070d859e75c7f9eeca02d6933273b5e69572e5ff9d5e3931c3", - "sha256:bff6ad71c81b3bba8fa35f0f1921fb24ff4476235a6e94a26ada2e54370e6da7", - "sha256:c190f0899e9f9f8b6b7863debfb739abcb21a5c054f911ca3596d12b8a4c4c7f", - "sha256:c907f561b1e83e93fad565bac5ba9c22d96a54e7ea0267c708bffe863cbe4075", - "sha256:cae53c389825d3b46fb37538441f75d6aecc4174f615d048321b716df2757fb0", - "sha256:dd4a21253f42b8d2b48410cb31fe501d32f8b9fbeb1f55063ad102fe9c425e40", - "sha256:dde816ca9dac1d9c01dd504ea5967821606f02e510438120091b84e852367428", - "sha256:f2362f3cb0f3172c42938946dbc5b7843c2a28aec307c49100c8b38764eb6927", - "sha256:f328adcfebed9f11301eaedfa48e15bdece9b519fb27e6a8c01aa52a17ec31b3", - "sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f", - "sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65" - ], - "version": "==1.4.3" - }, - "typing-extensions": { - "hashes": [ - "sha256:7cb407020f00f7bfc3cb3e7881628838e69d8f3fcab2f64742a5e76b2f841918", - "sha256:99d4073b617d30288f569d3f13d2bd7548c3a7e4c8de87db09a9d29bb3a4a60c", - "sha256:dafc7639cde7f1b6e1acc0f457842a83e722ccca8eef5270af2d74792619a89f" - ], - "version": "==3.7.4.3" } } } diff --git a/src/analyzer.py b/src/analyzer.py index f9f0424b..14734157 100644 --- a/src/analyzer.py +++ b/src/analyzer.py @@ -1,14 +1,17 @@ """Text Proprocessing""" from collections import Counter + +from . import markdown as md + from textblob import TextBlob import pandas as pd + import re import string from typing import List, Tuple import spacy from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer -from . import markdown as md PARSER = spacy.load("en_core_web_sm") @@ -142,6 +145,19 @@ def noun_phrase(input_text): return n_phrase_lst +def concatenate(responses_df): + """Remove stop words from and return contcatenated string of all words.""" + words_str = '' + for i, row in responses_df.iterrows(): + for col in range(len(responses_df.columns)): + val = row[col] + tokens = val.split() + for i in range(len(tokens)): + tokens[i] = tokens[i].lower() + words_str += " ".join(tokens)+" " + return words_str + + def top_polarized_word(tokens_column): """Create columns for positive and negative words""" # Start off with empty lists diff --git a/streamlit_web.py b/streamlit_web.py index 52121c7a..161b3623 100644 --- a/streamlit_web.py +++ b/streamlit_web.py @@ -21,6 +21,7 @@ import src.topic_modeling as tm import src.visualization as vis +from wordcloud import WordCloud, STOPWORDS # resources/sample_reflections/lab1, resources/sample_reflections/lab2 @@ -28,14 +29,19 @@ SPACY_MODEL_NAMES = ["en_core_web_sm", "en_core_web_md"] preprocessed_df = pd.DataFrame() main_df = pd.DataFrame() +sample = [] assignments = None assign_text = None stu_id = None success_msg = None debug_mode = False + +json_lst = [] + main_md_dict = None + def main(): """main streamlit function""" # Title @@ -165,6 +171,7 @@ def retreive_data(data_retreive): return True + @st.cache(allow_output_mutation=True) def load_model(name): """load spacy model""" @@ -264,11 +271,12 @@ def frequency(): def overall_freq(freq_range): - """page fore overall word frequency""" + """page for overall word frequency.""" plots_range = st.sidebar.slider( "Select the number of plots per row", 1, 5, value=3 ) freq_df = pd.DataFrame(columns=["assignments", "word", "freq"]) + # calculate word frequency of each assignments for item in assignments: # combined text of the whole assignment @@ -288,6 +296,13 @@ def overall_freq(freq_range): ) ) + responses_end = len(main_df.columns) - 3 + responses_df = main_df[main_df.columns[1:responses_end]] + responses_df.replace("", "NA") + + frequency_word_cloud(responses_df) + + freq_df.to_csv('frequency_archives' + os.path.sep + str(item) + '.csv') def student_freq(freq_range): """page for individual student's word frequency""" @@ -331,6 +346,12 @@ def student_freq(freq_range): ) ) + responses_end = len(stu_assignment.columns) - 3 + responses_df = stu_assignment[stu_assignment.columns[1:responses_end]] + responses_df.replace("", "NA") + + frequency_word_cloud(responses_df) + def question_freq(freq_range): """page for individual question's word frequency""" @@ -377,6 +398,23 @@ def question_freq(freq_range): plots_per_row=plots_range, ) ) + frequency_word_cloud(question_df) + + +def frequency_word_cloud(responses_df): + """Build wordcloud out of page's responses.""" + # concatenate all words into normalized string and make into wordcloud + words = az.concatenate(responses_df) + cloud_stopwords = set(STOPWORDS) + wordcloud = (WordCloud(width = 800, height = 800, + background_color = 'white', + stopwords = cloud_stopwords, + min_font_size = 10).generate(words)) + + # plot wordcloud by temporarily savings as a file and displaying + wordcloud.to_file("resources/images/word_cloud.png") + st.image("resources/images/word_cloud.png") + os.remove("resources/images/word_cloud.png") def sentiment(): diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py index 66a89cf9..dd5f5166 100644 --- a/tests/test_analyzer.py +++ b/tests/test_analyzer.py @@ -1,4 +1,5 @@ """Test module for analyzer.py""" + import pytest import src.analyzer as az import pandas as pd @@ -146,7 +147,7 @@ def test_sentence_tokenize(): def test_tfidf(): - """test tfidf return result""" + """Test tfidf return result.""" input_tokens = [ "test", "tokenize", @@ -161,6 +162,22 @@ def test_tfidf(): assert vector is not None +def test_concatenate(): + """Test for contcatenated string of all words.""" + input_dict = { + "What was the most important technical skill that you practiced?": + ["Using pipenv and pytest", "Naming variables in Python"], + "What was the most important professional skill that you practiced?": + ["Communicating with a team remotely", "Resolving issues by talking \ + to teammates"] + } + input_df = pd.DataFrame(input_dict) + output = az.concatenate(input_df) + expected = "using pipenv and pytest communicating with a team remotely \ +naming variables in python resolving issues by talking to teammates " + assert output == expected + + def test_top_polarized_word(): """Tests if the positive/negative words columns are created""" df = pd.DataFrame(columns=[cts.TOKEN, cts.POSITIVE, cts.NEGATIVE]) diff --git a/text_classifier b/text_classifier new file mode 100644 index 00000000..24c4f853 Binary files /dev/null and b/text_classifier differ