forked from os2loop/os2loop
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 87843f5
Showing
14 changed files
with
758 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
on: | ||
push: | ||
branches: | ||
- 'ai-stuff' | ||
pull_request: | ||
branches: | ||
- 'ai-stuff' | ||
|
||
name: Review | ||
|
||
jobs: | ||
changelog: | ||
runs-on: ubuntu-latest | ||
name: Changelog should be updated | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v4 | ||
with: | ||
fetch-depth: 2 | ||
|
||
- name: Git fetch | ||
run: git fetch | ||
|
||
- name: Check that changelog has been updated. | ||
run: git diff --exit-code origin/${{ github.base_ref }} -- CHANGELOG.md && exit 1 || exit 0 | ||
|
||
coding-standards-markdown: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v4 | ||
|
||
- name: Coding standards | ||
run: | | ||
docker run --rm --volume "$PWD:/md" peterdavehello/markdownlint markdownlint '**/*.md' | ||
coding-standards-shellcheck: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v4 | ||
|
||
- name: Coding standards | ||
run: | | ||
docker run --rm --volume "$PWD:/mnt" koalaman/shellcheck:stable */*.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ | ||
"default": true, | ||
// https://github.com/DavidAnson/markdownlint/blob/main/doc/md013.md | ||
"line-length": { | ||
"line_length": 120, | ||
"code_blocks": false, | ||
"tables": false | ||
}, | ||
// https://github.com/DavidAnson/markdownlint/blob/main/doc/md024.md | ||
"no-duplicate-heading": { | ||
"siblings_only": true | ||
}, | ||
// https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/organizing-information-with-collapsed-sections#creating-a-collapsed-section | ||
// https://github.com/DavidAnson/markdownlint/blob/main/doc/md033.md | ||
"no-inline-html": { | ||
"allowed_elements": ["details", "summary"] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# Changelog | ||
|
||
All notable changes to this project will be documented in this file. | ||
|
||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), | ||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). | ||
|
||
## [Unreleased] | ||
|
||
### Added | ||
|
||
- Data export script | ||
|
||
[Unreleased]: https://github.com/itk-dev/os2loop/tree/ai-stuff |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# OS2Loop AI stuff | ||
|
||
``` shell | ||
git clone --branch ai-stuff https://github.com/itk-dev/os2loop os2loop-ai-stuff | ||
``` | ||
|
||
## Scripts | ||
|
||
``` shell | ||
./os2loop-ai-stuff/data-export/export.sh | ||
``` | ||
|
||
## Development | ||
|
||
``` shell | ||
task | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
version: '3' | ||
|
||
tasks: | ||
default: | ||
cmds: | ||
- task --list | ||
silent: true | ||
|
||
coding-standards:check: | ||
desc: "Apply coding standards and run checks" | ||
cmds: | ||
- task: coding-standards:apply | ||
- task: coding-standards:check:shellcheck | ||
|
||
coding-standards:apply: | ||
desc: "Apply coding standards" | ||
cmds: | ||
- task: coding-standards:apply:markdownlint | ||
|
||
coding-standards:apply:markdownlint: | ||
desc: "Run markdownlint-cli (https://github.com/igorshubovych/markdownlint-cli)" | ||
cmds: | ||
- docker run --rm --volume "$PWD:/md" peterdavehello/markdownlint markdownlint '**/*.md' --fix | ||
|
||
coding-standards:check:markdownlint: | ||
desc: "Run markdownlint-cli (https://github.com/igorshubovych/markdownlint-cli)" | ||
cmds: | ||
- docker run --rm --volume "$PWD:/md" peterdavehello/markdownlint markdownlint '**/*.md' | ||
|
||
coding-standards:check:shellcheck: | ||
desc: "Run ShellCheck (https://github.com/koalaman/shellcheck)" | ||
cmds: | ||
- docker run --rm --volume "$PWD:/mnt" koalaman/shellcheck:stable */*.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
*.csv | ||
*.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
#!/usr/bin/env bash | ||
set -o errexit -o errtrace -o noclobber -o nounset -o pipefail | ||
IFS=$'\n\t' | ||
|
||
script_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) | ||
|
||
function usage() { | ||
if [ -n "${1:-}" ]; then | ||
>&2 cat <<EOF | ||
$1 | ||
EOF | ||
fi | ||
|
||
>&2 cat <<EOF | ||
Usage: ${BASH_SOURCE[0]} project-dir site-uri | ||
EOF | ||
exit 1 | ||
} | ||
|
||
if (( $# < 2 )); then | ||
usage "Too few arguments" | ||
fi | ||
|
||
project_dir="$1" | ||
uri="$2" | ||
|
||
if [ -z "$project_dir" ]; then | ||
usage "Invalid project directory" | ||
fi | ||
|
||
if [ ! -d "$project_dir" ] ; then | ||
(>&2 echo 'Project directory "'"$project_dir"'" does not exist') | ||
exit 1 | ||
fi | ||
|
||
if [ -z "$uri" ]; then | ||
usage "Invalid site-uri" | ||
fi | ||
|
||
cd "$project_dir" | ||
|
||
filenames=("$script_dir"/export_*.sql) | ||
|
||
for filename in "${filenames[@]}"; do | ||
echo "$filename" | ||
|
||
# JSON | ||
|
||
# https://tldp.org/LDP/abs/html/string-manipulation.html | ||
output_filename=${filename/%.sql/.json} | ||
# https://github.com/drush-ops/drush/issues/3071#issuecomment-347929777 | ||
vendor/bin/drush --uri="$uri" php:eval "return \Drupal::database()->query(file_get_contents('$filename'))->fetchAll()" --format=json >| "$output_filename" || true | ||
echo "$output_filename" | ||
|
||
# CSV | ||
|
||
output_filename=${filename/%.sql/.csv} | ||
# https://stackoverflow.com/a/22421445/2502647 | ||
vendor/bin/drush --uri="$uri" sql:cli < "$filename" | awk 'BEGIN { FS="\t"; OFS="," } { | ||
rebuilt=0 | ||
for(i=1; i<=NF; ++i) { | ||
if ($i ~ /,/ && $i !~ /^".*"$/) { | ||
gsub("\"", "\"\"", $i) | ||
$i = "\"" $i "\"" | ||
rebuilt=1 | ||
} | ||
} | ||
if (!rebuilt) { $1=$1 } | ||
}' >| "$output_filename" || true | ||
echo "$output_filename" | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
Select | ||
n_fd.nid, | ||
n_fd.type, | ||
n_fd.title, | ||
pa.`alias` as relative_url, | ||
DATE_FORMAT(FROM_UNIXTIME(n_fd.created), '%Y-%m-%dT%H:%i:%s') as created, | ||
DATE_FORMAT(FROM_UNIXTIME(n_fd.changed), '%Y-%m-%dT%H:%i:%s') as `changed`, | ||
dci.document_node_ids, | ||
dci.document_relative_urls, | ||
docscol_content.os2loop_documents_dc_content_value as content, -- all format are rich text (html and div encoded) | ||
docs_ib.os2loop_documents_info_box_value as info_box, | ||
approval_date.os2loop_shared_approval_date_value as approval_date, | ||
`subject`.`name` as `subject`, | ||
tags.tags, | ||
`owner`.os2loop_shared_owner_value as `owner`, | ||
rev_date.os2loop_shared_rev_date_value as review_date, | ||
`version`.os2loop_shared_version_value as `version` | ||
from ( | ||
SELECT nid,vid,type,uid,title,created,changed | ||
FROM node_field_data | ||
where type = 'os2loop_documents_collection' | ||
-- the table os2loop_documents_collection_item associate document collections (their nid on collection_id) | ||
-- to documents (document_id = nid) except for 20 document collections. Fx case /rammedelegation nid=3807 it is a collection of | ||
-- links to sharepoint docs and /medicinhaandtering nid=3827 is a link to the collection | ||
-- /instruks-korrekt-haandtering-af-medicin-i-sundhed-og-omsorg-mso nid 4188 | ||
-- of 805 documents 164 documents are not assigned to a document_collection | ||
) as n_fd | ||
left join path_alias as pa on CONCAT('/node/',n_fd.nid) = pa.path | ||
left join ( | ||
SELECT | ||
doc_col_itm.collection_id, | ||
json_arrayagg(doc_col_itm.document_id) as document_node_ids, | ||
json_arrayagg(pa.`alias`) as document_relative_urls | ||
from os2loop_documents_collection_item as doc_col_itm | ||
left join path_alias as pa on CONCAT('/node/',doc_col_itm.document_id) = pa.path | ||
group by doc_col_itm.collection_id | ||
) as dci on n_fd.nid = dci.collection_id | ||
left join node__os2loop_documents_dc_content as docscol_content on n_fd.nid = docscol_content.entity_id -- contains only records from bundle documents_collection (all delta 0, so top placement) | ||
left join ( | ||
SELECT | ||
entity_id, | ||
os2loop_documents_info_box_value | ||
FROM node__os2loop_documents_info_box | ||
WHERE bundle = 'os2loop_documents_collection') as docs_ib on n_fd.nid = docs_ib.entity_id -- only from bundle document_collection | ||
left join node__os2loop_shared_approval_date as approval_date on n_fd.nid = approval_date.entity_id | ||
left join ( | ||
SELECT | ||
n_ss.entity_id, | ||
subject_tt_fd.name | ||
FROM node__os2loop_shared_subject as n_ss | ||
left join taxonomy_term_field_data as subject_tt_fd on n_ss.os2loop_shared_subject_target_id = subject_tt_fd.tid | ||
where n_ss.bundle = 'os2loop_documents_collection') as `subject` on n_fd.nid = `subject`.entity_id | ||
left join node__os2loop_shared_owner as `owner` on n_fd.nid = `owner`.entity_id | ||
left join node__os2loop_shared_rev_date as rev_date on n_fd.nid = rev_date.entity_id | ||
left join ( | ||
SELECT | ||
n_st.entity_id, | ||
json_arrayagg(tt_fd.name) as tags | ||
FROM node__os2loop_shared_tags as n_st | ||
left join taxonomy_term_field_data as tt_fd on n_st.os2loop_shared_tags_target_id = tt_fd.tid | ||
where n_st.bundle = 'os2loop_documents_collection' | ||
group by n_st.entity_id) tags on n_fd.nid = tags.entity_id | ||
left join node__os2loop_shared_version as `version` on n_fd.nid = `version`.entity_id |
Oops, something went wrong.