Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
philipgiuliani committed Nov 16, 2023
0 parents commit b2a9624
Show file tree
Hide file tree
Showing 9 changed files with 248 additions and 0 deletions.
23 changes: 23 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: test

on:
push:
branches:
- master
- main
pull_request:

jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: erlef/setup-beam@v1
with:
otp-version: "26.0.2"
gleam-version: "0.32.4"
rebar3-version: "3"
# elixir-version: "1.15.4"
- run: gleam deps download
- run: gleam test
- run: gleam format --check src test
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
*.beam
*.ez
build
erl_crash.dump
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# glubs

[![Package Version](https://img.shields.io/hexpm/v/glubs)](https://hex.pm/packages/glubs)
[![Hex Docs](https://img.shields.io/badge/hex-docs-ffaff3)](https://hexdocs.pm/glubs/)

## Quick start

```sh
gleam run # Run the project
gleam test # Run the tests
gleam shell # Run an Erlang shell
```

## Installation

If available on Hex this package can be added to your Gleam project:

```sh
gleam add glubs
```

and its documentation can be found at <https://hexdocs.pm/glubs>.
16 changes: 16 additions & 0 deletions gleam.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name = "glubs"
version = "0.1.0"

# Fill out these fields if you intend to generate HTML documentation or publish
# your project to the Hex package manager.
#
# description = ""
# licences = ["Apache-2.0"]
# repository = { type = "github", user = "username", repo = "project" }
# links = [{ title = "Website", href = "https://gleam.run" }]

[dependencies]
gleam_stdlib = "~> 0.32"

[dev-dependencies]
gleeunit = "~> 1.0"
11 changes: 11 additions & 0 deletions manifest.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# This file was generated by Gleam
# You typically do not need to edit this file

packages = [
{ name = "gleam_stdlib", version = "0.32.1", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "ABF00CDCCB66FABBCE351A50060964C4ACE798F95A0D78622C8A7DC838792577" },
{ name = "gleeunit", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "D3682ED8C5F9CAE1C928F2506DE91625588CC752495988CBE0F5653A42A6F334" },
]

[requirements]
gleam_stdlib = { version = "~> 0.32" }
gleeunit = { version = "~> 1.0" }
1 change: 1 addition & 0 deletions src/glubs.gleam
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

120 changes: 120 additions & 0 deletions src/glubs/webvtt.gleam
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import gleam/option.{None, Some}
import gleam/string
import gleam/result
import gleam/list
import gleam/int

pub type Token {
StartTag(
tag: String,
classes: List(String),
annotation: option.Option(String),
)
Text(content: String)
Timestamp(ms: Int)
EndTag(tag: String)
}

pub type TokenizationError {
InvalidStartToken
InvalidEndToken
}

pub fn tokenize(payload: String) -> Result(List(Token), TokenizationError) {
payload
|> do_tokenize([])
|> result.map(list.reverse)
}

fn do_tokenize(
payload: String,
acc: List(Token),
) -> Result(List(Token), TokenizationError) {
case payload {
"" -> Ok(acc)
"</" <> rest -> {
case string.split_once(rest, on: ">") {
Ok(#(tag, rest)) -> {
do_tokenize(rest, [EndTag(tag: tag), ..acc])
}
Error(Nil) -> {
Error(InvalidEndToken)
}
}
}
"<" <> rest -> {
case string.split_once(rest, on: ">") {
Ok(#(tag, rest)) -> {
case parse_timestamp(tag) {
Ok(ts) -> do_tokenize(rest, [Timestamp(ts), ..acc])
Error(_) -> do_tokenize(rest, [parse_start_tag(tag), ..acc])
}
}
Error(Nil) -> {
Error(InvalidStartToken)
}
}
}
text -> {
case string.split_once(text, on: "<") {
Ok(#(content, rest)) -> {
do_tokenize("<" <> rest, [Text(content), ..acc])
}
Error(Nil) -> Ok([Text(text), ..acc])
}
}
}
}

fn parse_start_tag(input: String) -> Token {
case string.split_once(input, on: " ") {
Ok(#(tag_and_classes, annotation)) -> {
let #(tag, classes) = parse_tag_and_classes(tag_and_classes)
StartTag(tag: tag, classes: classes, annotation: Some(annotation))
}
Error(_) -> {
let #(tag, classes) = parse_tag_and_classes(input)
StartTag(tag: tag, classes: classes, annotation: None)
}
}
}

fn parse_tag_and_classes(input: String) -> #(String, List(String)) {
let [tag, ..classes] = string.split(input, on: ".")
#(tag, classes)
}

fn parse_timestamp(input: String) -> Result(Int, Nil) {
case string.split(input, on: ":") {
[hours, minutes, seconds_and_ms] -> {
use hours <- result.try(int.parse(hours))
use minutes <- result.try(int.parse(minutes))
use #(seconds, ms) <- result.try(split_seconds(seconds_and_ms))

Ok({ seconds + minutes * 60 + hours * 60 * 60 } * 1000 + ms)
}

[minutes, seconds_and_ms] -> {
use minutes <- result.try(int.parse(minutes))
use #(seconds, ms) <- result.try(split_seconds(seconds_and_ms))

Ok({ seconds + minutes * 60 } * 1000 + ms)
}

[_] -> Error(Nil)
}
}

fn split_seconds(input: String) -> Result(#(Int, Int), Nil) {
case string.split_once(input, on: ".") {
Ok(#(seconds, ms)) -> {
use seconds <- result.try(int.parse(seconds))
use ms <- result.try(int.parse(ms))
Ok(#(seconds, ms))
}
Error(_) -> {
use seconds <- result.try(int.parse(input))
Ok(#(seconds, 0))
}
}
}
46 changes: 46 additions & 0 deletions test/glubs/webvtt_test.gleam
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import gleeunit/should
import gleam/option.{None, Some}
import glubs/webvtt.{EndTag, StartTag, Text, Timestamp}

pub fn tokenize_text_test() {
"Hello"
|> webvtt.tokenize()
|> should.equal(Ok([Text("Hello")]))
}

pub fn tokenize_voice_test() {
"<v.loud.shout Rob>Hello</v>"
|> webvtt.tokenize()
|> should.equal(Ok([
StartTag("v", classes: ["loud", "shout"], annotation: Some("Rob")),
Text("Hello"),
EndTag("v"),
]))
}

pub fn timestamp_tag_test() {
"Hello <00:19.500>Phil. <01:00:00.500>How are you?"
|> webvtt.tokenize()
|> should.equal(Ok([
Text("Hello "),
Timestamp(19_500),
Text("Phil. "),
Timestamp(3_600_500),
Text("How are you?"),
]))
}

pub fn complex_test() {
"<v Phil>Hi!\n<v.loud.shout Rob>Hello <i>mate!</i></v>"
|> webvtt.tokenize()
|> should.equal(Ok([
StartTag("v", classes: [], annotation: Some("Phil")),
Text("Hi!\n"),
StartTag("v", classes: ["loud", "shout"], annotation: Some("Rob")),
Text("Hello "),
StartTag("i", classes: [], annotation: None),
Text("mate!"),
EndTag("i"),
EndTag("v"),
]))
}
5 changes: 5 additions & 0 deletions test/glubs_test.gleam
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import gleeunit

pub fn main() {
gleeunit.main()
}

0 comments on commit b2a9624

Please sign in to comment.