Skip to content

Commit

Permalink
Merge pull request #62 from fhdsl/cansavvy/write-csv
Browse files Browse the repository at this point in the history
Saving github data to our ITN thing
  • Loading branch information
cansavvy authored Jan 19, 2024
2 parents 1faf179 + 7f8dbfb commit 025074b
Show file tree
Hide file tree
Showing 15 changed files with 270 additions and 77 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@
resources/*
^doc$
^Meta$
inst/extdata/docker/*
74 changes: 74 additions & 0 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Candace Savonen Jan 2024

name: Build Docker

on:
workflow_dispatch:
inputs:
dockerhubpush:
description: 'Push to Dockerhub?'
required: true
default: 'false'
tag:
description: 'What tag to use?'
required: true
default: 'none'
jobs:
build-docker:
name: Build Docker image
runs-on: ubuntu-latest

steps:
- name: checkout repo
uses: actions/checkout@v3

- name: Login as jhudsl-robot
run: |
git config --system --add safe.directory "$GITHUB_WORKSPACE"
git config --local user.email "[email protected]"
git config --local user.name "jhudsl-robot"
# Set up Docker build
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1

# Setup layer cache
- name: Cache Docker layers
uses: actions/cache@v2
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
# Set up Docker build
- name: Set up Docker Build
uses: docker/setup-buildx-action@v1

- name: Get token
run: echo ${{ secrets.GH_PAT }} > ${{ inputs.directory }}/git_token.txt

# Build docker image
- name: Build Docker image
uses: docker/build-push-action@v2
with:
push: false
load: true
context: docker
file: inst/extdata/Dockerfile
tags: hutch/gimap

# Login to Dockerhub
- name: Login to DockerHub
if: ${{ github.event.inputs.dockerhubpush != 'false' }}
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

# Push the Docker image if set to true from a manual trigger
- name: Push Docker image if manual trigger set to true
if: ${{ github.event.inputs.dockerhubpush != 'false' }}
run: |
docker tag jhudsl/base_ottr:latest jhudsl/base_ottr:$github.event.inputs.tag
docker push jhudsl/base_ottr:$github.event.inputs.tag
80 changes: 55 additions & 25 deletions .github/workflows/write-data.yaml
Original file line number Diff line number Diff line change
@@ -1,31 +1,19 @@

name: Writing GitHub Metrics to Googlesheet
on:
workflow_dispatch:
pull_request:
schedule:
# - cron: '*/2 * * * *'
- cron: '0 0 * * 0'

jobs:
write-data:
name: Write Data
runs-on: ubuntu-latest
steps:
- name: Set up R
uses: r-lib/actions/setup-r@v2

- name: Install googlesheets4
uses: r-lib/actions/setup-r-dependencies@v2
with:
packages: |
any::googlesheets4
- name: Install remotes
run: |
Rscript -e "install.packages('remotes')"
- name: Install metricminer from Github
run: |
Rscript -e "remotes::install_github("fhdsl/metricminer")"
- name: Check out repository
uses: actions/checkout@v3
container:
image: cansav09/metricminer

steps:
- name: Authorize metricminer
env:
METRICMINER_CALENDLY: ${{ secrets.METRICMINER_CALENDLY }}
Expand All @@ -34,17 +22,59 @@ jobs:
METRICMINER_GOOGLE_REFRESH: ${{ secrets.METRICMINER_GOOGLE_REFRESH }}
run: |
# Authorize Calendly
auth_from_secret("calendly", token = Sys.getenv("METRICMINER_CALENDLY"))
metricminer::auth_from_secret("calendly", token = Sys.getenv("METRICMINER_CALENDLY"))
# Authorize GitHub
auth_from_secret("github", token = Sys.getenv("METRICMINER_GITHUB_PAT"))
metricminer::auth_from_secret("github", token = Sys.getenv("METRICMINER_GITHUB_PAT"))
# Authorize Google
auth_from_secret("google",
metricminer::auth_from_secret("google",
refresh_token = Sys.getenv("METRICMINER_GOOGLE_REFRESH"),
access_token = Sys.getenv("METRICMINER_GOOGLE_ACCESS"),
cache = TRUE
)
shell: Rscript {0}

- name: Write data
run: # Rscript -e 'source("R/write-data.R")'
run: |
gsheet <- "https://docs.google.com/spreadsheets/d/1lk3vMgE4CNuACrI1mzHrv6AsvXbdhw1zJENMkIEhrZs/edit#gid=0"
repos <- c(
"fhdsl/metricminer",
"fhdsl/metricminer.org",
"jhudsl/ottrpal",
"jhudsl/ari",
"jhudsl/cow",
"jhudsl/ottrproject.org",
"jhudsl/ottr_docker",
"jhudsl/ottr-reports",
"fhdsl/conrad",
"jhudsl/text2speech",
"jhudsl/OTTR_Quizzes",
"jhudsl/OTTR_Template",
"jhudsl/OTTR_Template_Website",
"jhudsl/ITCR_Tables",
"jhudsl/ITN_Platforms",
"fhdsl/Choosing_Genomics_Tools",
"jhudsl/Informatics_Research_Leadership",
"jhudsl/Documentation_and_Usability",
"jhudsl/Reproducibility_in_Cancer_Informatics",
"jhudsl/Adv_Reproducibility_in_Cancer_Informatics",
"fhdsl/GitHub_Automation_for_Scientists",
"jhudsl/Computing_for_Cancer_Informatics",
"fhdsl/Overleaf_and_LaTeX_for_Scientific_Articles",
"fhdsl/Ethical_Data_Handling_for_Cancer_Research",
"fhdsl/AI_for_Decision_Makers",
"fhdsl/AI_for_Efficient_Programming",
"fhdsl/NIH_Data_Sharing"
)
repo_metrics <- metricminer::get_multiple_repos_metrics(repo_names = repo_names, time_course = TRUE)
combine_data <- googlesheets4::read_sheet(gsheet) %>%
dplyr::bind_rows(repo_metrics) %>%
dplyr::distinct()
datasheet <- metricminer::write_to_gsheet(gsheet = gsheet,
input = combine_data,
overwrite = TRUE)
shell: Rscript {0}
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ local_auth.R
local_auth_2.R
/doc/
/Meta/
git_token.txt
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(auth_from_secret)
export(authorize)
export(automate_storage)
export(calendly_get)
export(clean_ga_metrics)
export(clean_repo_metrics)
Expand Down Expand Up @@ -33,7 +34,6 @@ export(request_ga)
export(request_google_forms)
export(write_playlist_details)
export(write_to_gsheet)
export(write_to_table)
import(dplyr)
importFrom(assertthat,assert_that)
importFrom(assertthat,is.string)
Expand Down
13 changes: 10 additions & 3 deletions R/github.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ get_user_repo_list <- function(owner, count = "all", data_format = "dataframe",
#' @param repo The repository name. So for `https://github.com/fhdsl/metricminer`, it would be `fhdsl/metricminer`
#' @param count How many items would you like to recieve? Put "all" to retrieve all records.
#' @param data_format Default is to return a curated data frame. However if you'd like to see the raw information returned from GitHub set format to "raw".
#' @param time_course Should the time course data be collected or only the summary metrics?
#' @return Information regarding a github account
#' @importFrom gh gh
#' @importFrom purrr map
Expand Down Expand Up @@ -212,7 +213,9 @@ get_github_metrics <- function(repo, token = NULL, count = "all", data_format =

results <-
dplyr::full_join(clones_data, views_data, by = "timestamp",
suffix = c("_clones", "_views"))
suffix = c("_clones", "_views")) %>%
dplyr::mutate(repo = paste0(c(owner, repo), collapse = "/"),
.before = dplyr::everything())
} else {

results <- clean_repo_metrics(
Expand Down Expand Up @@ -280,6 +283,7 @@ get_github_repo_summary <- function(repo, token = NULL, count = "all", data_form
#' @param token You can provide the Personal Access Token key directly or this function will attempt to grab a PAT that was stored using the `authorize("github")` function
#' @param repo_names a character vector of repositories you'd like to collect metrics from.
#' @param data_format Default is to return a curated data frame. However if you'd like to see the raw information returned from GitHub set format to "raw".
#' @param time_course Should the time course data be collected or only the summary metrics?
#' @return Information regarding a github account
#' @importFrom gh gh
#' @importFrom purrr map
Expand All @@ -291,9 +295,11 @@ get_github_repo_summary <- function(repo, token = NULL, count = "all", data_form
#'
#' repo_names <- c("fhdsl/metricminer", "jhudsl/OTTR_Template")
#' some_repos_metrics <- get_multiple_repos_metrics(repo_names = repo_names)
#'
#' some_repos_metrics <- get_multiple_repos_metrics(repo_names = repo_names, time_course = TRUE)
#' }
#'
get_multiple_repos_metrics <- function(repo_names = NULL, token = NULL, data_format = "dataframe") {
get_multiple_repos_metrics <- function(repo_names = NULL, token = NULL, data_format = "dataframe", time_course = FALSE) {
if (is.null(token)) {
# Get auth token
token <- get_token(app_name = "github", try = TRUE)
Expand All @@ -308,7 +314,8 @@ get_multiple_repos_metrics <- function(repo_names = NULL, token = NULL, data_for
get_github_metrics(
token = token,
repo = repo,
data_format = data_format
data_format = data_format,
time_course = time_course
)
})

Expand Down
25 changes: 12 additions & 13 deletions R/write-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,24 +87,23 @@ write_to_gsheet <- function(input, token = NULL, gsheet = NULL, overwrite = FALS
return(gsheet_output)
}



#' Writes data to a tabular file
#' @description This is a function to write metricminer data to a tabular file
#' Creates GitHub Action which automatically writes to googlesheet
#' @description This is a function to write metricminer data to a GoogleSheet
#' @param input input data to write to a googlesheet
#' @param file_path A file path where the table should be saved to
#' @param token OAuth token from Google login.
#' @param gsheet Optionally a googlesheet to write to
#' @param overwrite TRUE/FALSE overwrite if there is data at the destination
#' @param table_type CSV and TSV are options. CSV is default
#' @return The file path where the data has been written
#' @importFrom utils menu installed.packages
#' @param append_rows TRUE/FALSE should the data be appended to the data?
#' @param sheet Index or name of the worksheet you want to write to. Forwarded to googlesheets4::write_sheet or googlesheets4::append_sheet to indicate what sheet it should be written to.
#' @param new_sheet default is FALSE. But if it is anything else will be used as the name for a new worksheet that will be made and written to.
#' @param ... these parameters are sent to googlesheets4::write_sheet.
#' @return The googlesheet URL where the data has been written
#' @importFrom googlesheets4 read_sheet sheet_add write_sheet
#' @export
#' @examples \dontrun{
#'
#' authorize("github")
#' repo_list <- get_user_repo_list(owner = "metricminer")
#'
#' write_to_table(repo_list)
#' }
write_to_table <- function(input, file_path, overwrite, table_type) {
automate_storage <- function() {

}

17 changes: 17 additions & 0 deletions inst/extdata/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
FROM rocker/tidyverse:4.0.2
LABEL maintainer="[email protected]"
WORKDIR /rocker-build/

COPY install_github.R .
COPY git_token.txt .
COPY github_package_list.tsv .


# Install packages from github
RUN Rscript install_github.R \
--packages github_package_list.tsv \
--token git_token.txt


# Set final workdir for commands
WORKDIR /home/rstudio
1 change: 1 addition & 0 deletions inst/extdata/docker/github_package_list.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fhdsl/metricminer HEAD
52 changes: 52 additions & 0 deletions inst/extdata/docker/install_github.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env Rscript

if (!"optparse" %in% installed.packages()) {
install.packages("optparse")
}

library(optparse)

################################ Set up options ################################
# Set up optparse options
option_list <- list(
make_option(
opt_str = c("-p", "--packages"), type = "character",
default = "github_package_list.tsv" ,
help = "Path to a TSV with a list of packages to be installed through Github,
where file where the first column is the github package name e.g.
jhudsl/ottrpal and the second column is the commit ID to be installed
(to be supplied to the ref argument).
",
metavar = "character"
),
make_option(
opt_str = c("--token"), type = "character",
default = NULL,
help = "GITHUB PAT file",
metavar = "character"
)
)

# Parse options
opt <- parse_args(OptionParser(option_list = option_list))

# Read in the token
token <- as.character(readLines(opt$token)[1])

# Reset GITHUB PAT to be token
Sys.unsetenv("GITHUB_PAT")
Sys.setenv(GITHUB_PAT = token)

# set up list of packages to install
packages <- readr::read_tsv(opt$packages,
col_names = c("package_name", "ref"))

purrr::pmap(
packages,
~remotes::install_github(..1,
auth_token = token,
ref = ..2)
)

# Remove the file after we are done
file.remove(opt$token)
Loading

0 comments on commit 025074b

Please sign in to comment.