[gleam] complete word-count

joaofnds · Jun 26, 2024 · 7c1bb4e · 7c1bb4e
1 parent 55d8f41
commit 7c1bb4e
Showing 7 changed files with 299 additions and 0 deletions.
diff --git a/gleam/word-count/.gitignore b/gleam/word-count/.gitignore
@@ -0,0 +1,4 @@
+*.beam
+*.ez
+build
+erl_crash.dump
diff --git a/gleam/word-count/HELP.md b/gleam/word-count/HELP.md
@@ -0,0 +1,32 @@
+# Help
+
+## Running the tests
+
+To run the tests, run the command `gleam test` from within the exercise directory.
+
+## Submitting your solution
+
+You can submit your solution using the `exercism submit src/word_count.gleam` command.
+This command will upload your solution to the Exercism website and print the solution page's URL.
+
+It's possible to submit an incomplete solution which allows you to:
+
+- See how others have completed the exercise
+- Request help from a mentor
+
+## Need to get help?
+
+If you'd like help solving the exercise, check the following pages:
+
+- The [Gleam track's documentation](https://exercism.org/docs/tracks/gleam)
+- The [Gleam track's programming category on the forum](https://forum.exercism.org/c/programming/gleam)
+- [Exercism's programming category on the forum](https://forum.exercism.org/c/programming/5)
+- The [Frequently Asked Questions](https://exercism.org/docs/using/faqs)
+
+Should those resources not suffice, you could submit your (incomplete) solution to request mentoring.
+
+To get help if you're having trouble, you can use one of the following resources:
+
+- [gleam.run](https://gleam.run/documentation/) is the gleam official documentation.
+- [Discord](https://discord.gg/Fm8Pwmy) is the discord channel.
+- [StackOverflow](https://stackoverflow.com/questions/tagged/gleam) can be used to search for your problem and see if it has been answered already. You can also ask and answer questions.
diff --git a/gleam/word-count/README.md b/gleam/word-count/README.md
@@ -0,0 +1,71 @@
+# Word Count
+
+Welcome to Word Count on Exercism's Gleam Track.
+If you need help running the tests or submitting your code, check out `HELP.md`.
+
+## Introduction
+
+You teach English as a foreign language to high school students.
+
+You've decided to base your entire curriculum on TV shows.
+You need to analyze which words are used, and how often they're repeated.
+
+This will let you choose the simplest shows to start with, and to gradually increase the difficulty as time passes.
+
+## Instructions
+
+Your task is to count how many times each word occurs in a subtitle of a drama.
+
+The subtitles from these dramas use only ASCII characters.
+
+The characters often speak in casual English, using contractions like _they're_ or _it's_.
+Though these contractions come from two words (e.g. _we are_), the contraction (_we're_) is considered a single word.
+
+Words can be separated by any form of punctuation (e.g. ":", "!", or "?") or whitespace (e.g. "\t", "\n", or " ").
+The only punctuation that does not separate words is the apostrophe in contractions.
+
+Numbers are considered words.
+If the subtitles say _It costs 100 dollars._ then _100_ will be its own word.
+
+Words are case insensitive.
+For example, the word _you_ occurs three times in the following sentence:
+
+> You come back, you hear me? DO YOU HEAR ME?
+
+The ordering of the word counts in the results doesn't matter.
+
+Here's an example that incorporates several of the elements discussed above:
+
+- simple words
+- contractions
+- numbers
+- case insensitive words
+- punctuation (including apostrophes) to separate words
+- different forms of whitespace to separate words
+
+`"That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled.`
+
+The mapping for this subtitle would be:
+
+```text
+123: 1
+agent: 1
+cried: 1
+fled: 1
+i: 1
+password: 2
+so: 1
+special: 1
+that's: 1
+the: 2
+```
+
+## Source
+
+### Created by
+
+- @lpil
+
+### Based on
+
+This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour.
diff --git a/gleam/word-count/gleam.toml b/gleam/word-count/gleam.toml
@@ -0,0 +1,12 @@
+name = "word_count"
+version = "0.1.0"
+
+[dependencies]
+gleam_bitwise = "~> 1.2"
+gleam_otp = "~> 0.7 or ~> 1.0"
+gleam_stdlib = "~> 0.32 or ~> 1.0"
+simplifile = "~> 1.0"
+gleam_erlang = ">= 0.25.0 and < 1.0.0"
+
+[dev-dependencies]
+exercism_test_runner = "~> 1.4"
diff --git a/gleam/word-count/manifest.toml b/gleam/word-count/manifest.toml
@@ -0,0 +1,27 @@
+# This file was generated by Gleam
+# You typically do not need to edit this file
+
+packages = [
+  { name = "argv", version = "1.0.1", build_tools = ["gleam"], requirements = [], otp_app = "argv", source = "hex", outer_checksum = "A6E9009E50BBE863EB37D963E4315398D41A3D87D0075480FC244125808F964A" },
+  { name = "exercism_test_runner", version = "1.7.0", build_tools = ["gleam"], requirements = ["argv", "gap", "glance", "gleam_community_ansi", "gleam_erlang", "gleam_json", "gleam_stdlib", "simplifile"], otp_app = "exercism_test_runner", source = "hex", outer_checksum = "2FC1BADB19BEC2AE77BFD2D3A606A014C85412A7B874CAFC4BA8CF04B0B257CD" },
+  { name = "gap", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_community_ansi", "gleam_stdlib"], otp_app = "gap", source = "hex", outer_checksum = "2EE1B0A17E85CF73A0C1D29DA315A2699117A8F549C8E8D89FA8261BE41EDEB1" },
+  { name = "glance", version = "0.8.2", build_tools = ["gleam"], requirements = ["gleam_stdlib", "glexer"], otp_app = "glance", source = "hex", outer_checksum = "ACF09457E8B564AD7A0D823DAFDD326F58263C01ACB0D432A9BEFDEDD1DA8E73" },
+  { name = "gleam_bitwise", version = "1.3.1", build_tools = ["gleam"], requirements = [], otp_app = "gleam_bitwise", source = "hex", outer_checksum = "B36E1D3188D7F594C7FD4F43D0D2CE17561DE896202017548578B16FE1FE9EFC" },
+  { name = "gleam_community_ansi", version = "1.4.0", build_tools = ["gleam"], requirements = ["gleam_community_colour", "gleam_stdlib"], otp_app = "gleam_community_ansi", source = "hex", outer_checksum = "FE79E08BF97009729259B6357EC058315B6FBB916FAD1C2FF9355115FEB0D3A4" },
+  { name = "gleam_community_colour", version = "1.3.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_community_colour", source = "hex", outer_checksum = "A49A5E3AE8B637A5ACBA80ECB9B1AFE89FD3D5351FF6410A42B84F666D40D7D5" },
+  { name = "gleam_erlang", version = "0.25.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_erlang", source = "hex", outer_checksum = "054D571A7092D2A9727B3E5D183B7507DAB0DA41556EC9133606F09C15497373" },
+  { name = "gleam_json", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib", "thoas"], otp_app = "gleam_json", source = "hex", outer_checksum = "8B197DD5D578EA6AC2C0D4BDC634C71A5BCA8E7DB5F47091C263ECB411A60DF3" },
+  { name = "gleam_otp", version = "0.10.0", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib"], otp_app = "gleam_otp", source = "hex", outer_checksum = "0B04FE915ACECE539B317F9652CAADBBC0F000184D586AAAF2D94C100945D72B" },
+  { name = "gleam_stdlib", version = "0.36.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "C0D14D807FEC6F8A08A7C9EF8DFDE6AE5C10E40E21325B2B29365965D82EB3D4" },
+  { name = "glexer", version = "0.7.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "glexer", source = "hex", outer_checksum = "4484942A465482A0A100936E1E5F12314DB4B5AC0D87575A7B9E9062090B96BE" },
+  { name = "simplifile", version = "1.5.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "simplifile", source = "hex", outer_checksum = "EB9AA8E65E5C1E3E0FDCFC81BC363FD433CB122D7D062750FFDF24DE4AC40116" },
+  { name = "thoas", version = "0.4.1", build_tools = ["rebar3"], requirements = [], otp_app = "thoas", source = "hex", outer_checksum = "4918D50026C073C4AB1388437132C77A6F6F7C8AC43C60C13758CC0ADCE2134E" },
+]
+
+[requirements]
+exercism_test_runner = { version = "~> 1.4" }
+gleam_bitwise = { version = "~> 1.2" }
+gleam_erlang = { version = ">= 0.25.0 and < 1.0.0"}
+gleam_otp = { version = "~> 0.7 or ~> 1.0" }
+gleam_stdlib = { version = "~> 0.32 or ~> 1.0" }
+simplifile = { version = "~> 1.0" }
diff --git a/gleam/word-count/src/word_count.gleam b/gleam/word-count/src/word_count.gleam
@@ -0,0 +1,14 @@
+import gleam/dict.{type Dict}
+import gleam/list
+import gleam/regex
+import gleam/string
+
+pub fn count_words(input: String) -> Dict(String, Int) {
+  let assert Ok(re) = regex.from_string("\\w+('\\w+)?")
+
+  input
+  |> string.lowercase
+  |> regex.scan(re, _)
+  |> list.group(fn(match) { match.content })
+  |> dict.map_values(fn(_, matches) { list.length(matches) })
+}
diff --git a/gleam/word-count/test/word_count_test.gleam b/gleam/word-count/test/word_count_test.gleam
@@ -0,0 +1,139 @@
+import exercism/should
+import exercism/test_runner
+import gleam/dict
+import word_count
+
+pub fn main() {
+  test_runner.main()
+}
+
+pub fn count_one_word_test() {
+  "word"
+  |> word_count.count_words
+  |> should.equal(dict.from_list([#("word", 1)]))
+}
+
+pub fn count_one_of_each_word_test() {
+  "one of each"
+  |> word_count.count_words
+  |> should.equal(dict.from_list([#("one", 1), #("of", 1), #("each", 1)]))
+}
+
+pub fn multiple_occurrences_of_a_word_test() {
+  "one fish two fish red fish blue fish"
+  |> word_count.count_words
+  |> should.equal(
+    dict.from_list([
+      #("one", 1),
+      #("fish", 4),
+      #("two", 1),
+      #("red", 1),
+      #("blue", 1),
+    ]),
+  )
+}
+
+pub fn handles_cramped_lists_test() {
+  "one,two,three"
+  |> word_count.count_words
+  |> should.equal(dict.from_list([#("one", 1), #("two", 1), #("three", 1)]))
+}
+
+pub fn handles_expanded_lists_test() {
+  "one,\ntwo,\nthree"
+  |> word_count.count_words
+  |> should.equal(dict.from_list([#("one", 1), #("two", 1), #("three", 1)]))
+}
+
+pub fn ignore_punctuation_test() {
+  "car: carpet as java: javascript!!&@$%^&"
+  |> word_count.count_words
+  |> should.equal(
+    dict.from_list([
+      #("car", 1),
+      #("carpet", 1),
+      #("as", 1),
+      #("java", 1),
+      #("javascript", 1),
+    ]),
+  )
+}
+
+pub fn include_numbers_test() {
+  "testing, 1, 2 testing"
+  |> word_count.count_words
+  |> should.equal(dict.from_list([#("testing", 2), #("1", 1), #("2", 1)]))
+}
+
+pub fn normalize_case_test() {
+  "go Go GO Stop stop"
+  |> word_count.count_words
+  |> should.equal(dict.from_list([#("go", 3), #("stop", 2)]))
+}
+
+pub fn with_apostrophes_test() {
+  "'First: don't laugh. Then: don't cry. You're getting it.'"
+  |> word_count.count_words
+  |> should.equal(
+    dict.from_list([
+      #("first", 1),
+      #("don't", 2),
+      #("laugh", 1),
+      #("then", 1),
+      #("cry", 1),
+      #("you're", 1),
+      #("getting", 1),
+      #("it", 1),
+    ]),
+  )
+}
+
+pub fn with_quotations_test() {
+  "Joe can't tell between 'large' and large."
+  |> word_count.count_words
+  |> should.equal(
+    dict.from_list([
+      #("joe", 1),
+      #("can't", 1),
+      #("tell", 1),
+      #("between", 1),
+      #("large", 2),
+      #("and", 1),
+    ]),
+  )
+}
+
+pub fn substrings_from_the_beginning_test() {
+  "Joe can't tell between app, apple and a."
+  |> word_count.count_words
+  |> should.equal(
+    dict.from_list([
+      #("joe", 1),
+      #("can't", 1),
+      #("tell", 1),
+      #("between", 1),
+      #("app", 1),
+      #("apple", 1),
+      #("and", 1),
+      #("a", 1),
+    ]),
+  )
+}
+
+pub fn multiple_spaces_not_detected_as_a_word_test() {
+  " multiple   whitespaces"
+  |> word_count.count_words
+  |> should.equal(dict.from_list([#("multiple", 1), #("whitespaces", 1)]))
+}
+
+pub fn alternating_word_separators_not_detected_as_a_word_test() {
+  ",\n,one,\n ,two \n 'three'"
+  |> word_count.count_words
+  |> should.equal(dict.from_list([#("one", 1), #("two", 1), #("three", 1)]))
+}
+
+pub fn quotation_for_word_with_apostrophe_test() {
+  "can, can't, 'can't'"
+  |> word_count.count_words
+  |> should.equal(dict.from_list([#("can", 1), #("can't", 2)]))
+}