Skip to content

Commit

Permalink
structured according to PkgTemplate (#4)
Browse files Browse the repository at this point in the history
* structured according to PkgTemplate

* structured according to PkgTemplate, other changes

* dependency changes

* dependency changes

* code imptrovements
  • Loading branch information
splendidbug authored Aug 15, 2024
1 parent 4390d9d commit 9318509
Show file tree
Hide file tree
Showing 26 changed files with 977 additions and 453 deletions.
3 changes: 3 additions & 0 deletions .JuliaFormatter.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# See https://domluna.github.io/JuliaFormatter.jl/stable/ for a list of options
style = "sciml"
ignore = ["knowledge_packs"]
7 changes: 7 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/" # Location of package manifests
schedule:
interval: "weekly"
76 changes: 76 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
name: CI
on:
push:
branches:
- main
tags: ["*"]
pull_request:
workflow_dispatch:
concurrency:
# Skip intermediate builds: always.
# Cancel intermediate builds: only if it is a pull request build.
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
jobs:
test:
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
runs-on: ${{ matrix.os }}
timeout-minutes: 60
permissions: # needed to allow julia-actions/cache to proactively delete old caches that it has created
actions: write
contents: read
strategy:
fail-fast: false
matrix:
version:
- "1.10"
os:
- ubuntu-latest
arch:
- x64
steps:
- uses: actions/checkout@v4
- uses: julia-actions/setup-julia@v2
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: julia-actions/cache@v2
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- uses: julia-actions/julia-processcoverage@v1
# - uses: codecov/codecov-action@v4
# with:
# files: lcov.info
# token: ${{ secrets.CODECOV_TOKEN }}
# fail_ci_if_error: false
docs:
name: Documentation
runs-on: ubuntu-latest
permissions:
actions: write # needed to allow julia-actions/cache to proactively delete old caches that it has created
contents: write
statuses: write
steps:
- uses: actions/checkout@v4
- uses: julia-actions/setup-julia@v2
with:
version: "1"
- uses: julia-actions/cache@v2
- name: Configure doc environment
shell: julia --project=docs --color=yes {0}
run: |
using Pkg
Pkg.develop(PackageSpec(path=pwd()))
Pkg.instantiate()
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-docdeploy@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
- name: Run doctests
shell: julia --project=docs --color=yes {0}
run: |
using Documenter: DocMeta, doctest
using DocsScraper
DocMeta.setdocmeta!(DocsScraper, :DocTestSetup, :(using DocsScraper); recursive=true)
doctest(DocsScraper)
16 changes: 16 additions & 0 deletions .github/workflows/CompatHelper.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: CompatHelper
on:
schedule:
- cron: 0 0 1 * *
workflow_dispatch:
jobs:
CompatHelper:
runs-on: ubuntu-latest
steps:
- name: Pkg.add("CompatHelper")
run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
- name: CompatHelper.main()
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
run: julia -e 'using CompatHelper; CompatHelper.main()'
31 changes: 31 additions & 0 deletions .github/workflows/TagBot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: TagBot
on:
issue_comment:
types:
- created
workflow_dispatch:
inputs:
lookback:
default: "3"
permissions:
actions: read
checks: read
contents: write
deployments: read
issues: read
discussions: read
packages: read
pages: read
pull-requests: read
repository-projects: read
security-events: read
statuses: read
jobs:
TagBot:
if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
runs-on: ubuntu-latest
steps:
- uses: JuliaRegistries/TagBot@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
ssh: ${{ secrets.DOCUMENTER_KEY }}
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# Ignore .env files
.env
knowledge_packs/
Manifest.toml
Manifest.toml
/Manifest.toml
/docs/Manifest.toml
/docs/build/
.vscode/**
**/.DS_Store
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) Shreyas Agrawal @splendidbug and contributors

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
46 changes: 37 additions & 9 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,22 +1,50 @@
name = "RAGKit"
uuid = "74e640d8-05f4-4b4f-8742-56fc934b3f17"
authors = ["Shreyas Agrawal <[email protected]>"]
name = "DocsScraper"
uuid = "bd71d052-5e08-40cc-a492-eb4e8da4b649"
authors = ["Shreyas Agrawal @splendidbug and contributors"]
version = "0.1.0"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
DotEnv = "4dc1fcf4-5e3b-5448-94ab-0c38ec0385c1"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
EzXML = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615"
Gumbo = "708ec375-b3d6-5a57-a7ce-8257bf98657a"
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
Inflate = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
PromptingTools = "670122d1-24a8-4d70-bfce-740807c42192"
URIParser = "30578b45-9adc-5946-b283-645ec420af67"
SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

[compat]
AbstractTrees = "0.4.5"
Gumbo = "0.8.2"
HTTP = "1.10.4"
URIs = "1.5.1"
AbstractTrees = "0.4"
Aqua = "0.8"
Dates = "1"
EzXML = "1.2"
Gumbo = "0.8"
HDF5 = "0.17"
HTTP = "1.10"
Inflate = "0.1"
LinearAlgebra = "1"
PromptingTools = "0.48"
SHA = "0.7"
Serialization = "1"
SparseArrays = "1"
Tar = "1"
Test = "1"
URIs = "1.5"
Unicode = "1"
julia = "1.10"
JSON = "0.21"

[extras]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Aqua", "Test"]
15 changes: 15 additions & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
DocsScraper = "bd71d052-5e08-40cc-a492-eb4e8da4b649"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
DotEnv = "4dc1fcf4-5e3b-5448-94ab-0c38ec0385c1"
EzXML = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615"
Gumbo = "708ec375-b3d6-5a57-a7ce-8257bf98657a"
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
Inflate = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9"
LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589"
PromptingTools = "670122d1-24a8-4d70-bfce-740807c42192"
Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
URIParser = "30578b45-9adc-5946-b283-645ec420af67"
URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
24 changes: 24 additions & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
using DocsScraper
using Documenter

DocMeta.setdocmeta!(DocsScraper, :DocTestSetup, :(using DocsScraper); recursive = true)

makedocs(;
modules = [DocsScraper],
authors = "Shreyas Agrawal @splendidbug and contributors",
sitename = "DocsScraper.jl",
repo = "https://github.com/splendidbug/DocsScraper.jl/blob/{commit}{path}#{line}",
format = Documenter.HTML(;
repolink = "https://github.com/splendidbug/DocsScraper.jl",
canonical = "https://splendidbug.github.io/DocsScraper.jl",
edit_link = "main",
assets = String[]),
pages = [
"API Index" => "index.md"
]
)

deploydocs(;
repo = "github.com/splendidbug/DocsScraper.jl",
devbranch = "main"
)
8 changes: 8 additions & 0 deletions docs/src/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Reference

```@index
```

```@autodocs
Modules = [DocsScraper]
```
19 changes: 11 additions & 8 deletions src/RAGKit.jl → src/DocsScraper.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module RAGKit
module DocsScraper
using HTTP, Gumbo, AbstractTrees, URIs
using Gumbo: HTMLDocument, HTMLElement
using EzXML
Expand All @@ -9,20 +9,23 @@ using LinearAlgebra, Unicode, SparseArrays
using HDF5
using Tar
using Inflate

using SHA
using Serialization, URIs
# using Regex

# using Robots
using Dates
using JSON

include("parser.jl")
include("crawl.jl")
include("extract_urls.jl")
include("preparation.jl")
include("extract_package_name.jl")
export get_package_name

include("make_embeddings.jl")
export make_embeddings
include("make_knowledge_packs.jl")
export make_knowledge_packs

include("user_preferences.jl")
include("utils.jl")
export remove_urls_from_index, urls_for_metadata

end
end
Loading

0 comments on commit 9318509

Please sign in to comment.