diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..cf2096e --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,23 @@ +name: test + +on: + push: + branches: + - master + - main + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: erlef/setup-beam@v1 + with: + otp-version: "26.0.2" + gleam-version: "0.32.4" + rebar3-version: "3" + # elixir-version: "1.15.4" + - run: gleam deps download + - run: gleam test + - run: gleam format --check src test diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..170cca9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.beam +*.ez +build +erl_crash.dump diff --git a/README.md b/README.md new file mode 100644 index 0000000..5c8a542 --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +# glubs + +[![Package Version](https://img.shields.io/hexpm/v/glubs)](https://hex.pm/packages/glubs) +[![Hex Docs](https://img.shields.io/badge/hex-docs-ffaff3)](https://hexdocs.pm/glubs/) + +## Quick start + +```sh +gleam run # Run the project +gleam test # Run the tests +gleam shell # Run an Erlang shell +``` + +## Installation + +If available on Hex this package can be added to your Gleam project: + +```sh +gleam add glubs +``` + +and its documentation can be found at . diff --git a/gleam.toml b/gleam.toml new file mode 100644 index 0000000..4dede06 --- /dev/null +++ b/gleam.toml @@ -0,0 +1,16 @@ +name = "glubs" +version = "0.1.0" + +# Fill out these fields if you intend to generate HTML documentation or publish +# your project to the Hex package manager. +# +# description = "" +# licences = ["Apache-2.0"] +# repository = { type = "github", user = "username", repo = "project" } +# links = [{ title = "Website", href = "https://gleam.run" }] + +[dependencies] +gleam_stdlib = "~> 0.32" + +[dev-dependencies] +gleeunit = "~> 1.0" diff --git a/manifest.toml b/manifest.toml new file mode 100644 index 0000000..92768e9 --- /dev/null +++ b/manifest.toml @@ -0,0 +1,11 @@ +# This file was generated by Gleam +# You typically do not need to edit this file + +packages = [ + { name = "gleam_stdlib", version = "0.32.1", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "ABF00CDCCB66FABBCE351A50060964C4ACE798F95A0D78622C8A7DC838792577" }, + { name = "gleeunit", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "D3682ED8C5F9CAE1C928F2506DE91625588CC752495988CBE0F5653A42A6F334" }, +] + +[requirements] +gleam_stdlib = { version = "~> 0.32" } +gleeunit = { version = "~> 1.0" } diff --git a/src/glubs.gleam b/src/glubs.gleam new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/glubs.gleam @@ -0,0 +1 @@ + diff --git a/src/glubs/webvtt.gleam b/src/glubs/webvtt.gleam new file mode 100644 index 0000000..2c61ec6 --- /dev/null +++ b/src/glubs/webvtt.gleam @@ -0,0 +1,120 @@ +import gleam/option.{None, Some} +import gleam/string +import gleam/result +import gleam/list +import gleam/int + +pub type Token { + StartTag( + tag: String, + classes: List(String), + annotation: option.Option(String), + ) + Text(content: String) + Timestamp(ms: Int) + EndTag(tag: String) +} + +pub type TokenizationError { + InvalidStartToken + InvalidEndToken +} + +pub fn tokenize(payload: String) -> Result(List(Token), TokenizationError) { + payload + |> do_tokenize([]) + |> result.map(list.reverse) +} + +fn do_tokenize( + payload: String, + acc: List(Token), +) -> Result(List(Token), TokenizationError) { + case payload { + "" -> Ok(acc) + " rest -> { + case string.split_once(rest, on: ">") { + Ok(#(tag, rest)) -> { + do_tokenize(rest, [EndTag(tag: tag), ..acc]) + } + Error(Nil) -> { + Error(InvalidEndToken) + } + } + } + "<" <> rest -> { + case string.split_once(rest, on: ">") { + Ok(#(tag, rest)) -> { + case parse_timestamp(tag) { + Ok(ts) -> do_tokenize(rest, [Timestamp(ts), ..acc]) + Error(_) -> do_tokenize(rest, [parse_start_tag(tag), ..acc]) + } + } + Error(Nil) -> { + Error(InvalidStartToken) + } + } + } + text -> { + case string.split_once(text, on: "<") { + Ok(#(content, rest)) -> { + do_tokenize("<" <> rest, [Text(content), ..acc]) + } + Error(Nil) -> Ok([Text(text), ..acc]) + } + } + } +} + +fn parse_start_tag(input: String) -> Token { + case string.split_once(input, on: " ") { + Ok(#(tag_and_classes, annotation)) -> { + let #(tag, classes) = parse_tag_and_classes(tag_and_classes) + StartTag(tag: tag, classes: classes, annotation: Some(annotation)) + } + Error(_) -> { + let #(tag, classes) = parse_tag_and_classes(input) + StartTag(tag: tag, classes: classes, annotation: None) + } + } +} + +fn parse_tag_and_classes(input: String) -> #(String, List(String)) { + let [tag, ..classes] = string.split(input, on: ".") + #(tag, classes) +} + +fn parse_timestamp(input: String) -> Result(Int, Nil) { + case string.split(input, on: ":") { + [hours, minutes, seconds_and_ms] -> { + use hours <- result.try(int.parse(hours)) + use minutes <- result.try(int.parse(minutes)) + use #(seconds, ms) <- result.try(split_seconds(seconds_and_ms)) + + Ok({ seconds + minutes * 60 + hours * 60 * 60 } * 1000 + ms) + } + + [minutes, seconds_and_ms] -> { + use minutes <- result.try(int.parse(minutes)) + use #(seconds, ms) <- result.try(split_seconds(seconds_and_ms)) + + Ok({ seconds + minutes * 60 } * 1000 + ms) + } + + [_] -> Error(Nil) + } +} + +fn split_seconds(input: String) -> Result(#(Int, Int), Nil) { + case string.split_once(input, on: ".") { + Ok(#(seconds, ms)) -> { + use seconds <- result.try(int.parse(seconds)) + use ms <- result.try(int.parse(ms)) + Ok(#(seconds, ms)) + } + Error(_) -> { + use seconds <- result.try(int.parse(input)) + Ok(#(seconds, 0)) + } + } +} diff --git a/test/glubs/webvtt_test.gleam b/test/glubs/webvtt_test.gleam new file mode 100644 index 0000000..df404f6 --- /dev/null +++ b/test/glubs/webvtt_test.gleam @@ -0,0 +1,46 @@ +import gleeunit/should +import gleam/option.{None, Some} +import glubs/webvtt.{EndTag, StartTag, Text, Timestamp} + +pub fn tokenize_text_test() { + "Hello" + |> webvtt.tokenize() + |> should.equal(Ok([Text("Hello")])) +} + +pub fn tokenize_voice_test() { + "Hello" + |> webvtt.tokenize() + |> should.equal(Ok([ + StartTag("v", classes: ["loud", "shout"], annotation: Some("Rob")), + Text("Hello"), + EndTag("v"), + ])) +} + +pub fn timestamp_tag_test() { + "Hello <00:19.500>Phil. <01:00:00.500>How are you?" + |> webvtt.tokenize() + |> should.equal(Ok([ + Text("Hello "), + Timestamp(19_500), + Text("Phil. "), + Timestamp(3_600_500), + Text("How are you?"), + ])) +} + +pub fn complex_test() { + "Hi!\nHello mate!" + |> webvtt.tokenize() + |> should.equal(Ok([ + StartTag("v", classes: [], annotation: Some("Phil")), + Text("Hi!\n"), + StartTag("v", classes: ["loud", "shout"], annotation: Some("Rob")), + Text("Hello "), + StartTag("i", classes: [], annotation: None), + Text("mate!"), + EndTag("i"), + EndTag("v"), + ])) +} diff --git a/test/glubs_test.gleam b/test/glubs_test.gleam new file mode 100644 index 0000000..ecd12ad --- /dev/null +++ b/test/glubs_test.gleam @@ -0,0 +1,5 @@ +import gleeunit + +pub fn main() { + gleeunit.main() +}