Skip to content

Commit

Permalink
Merge pull request #713 from dileventi/data
Browse files Browse the repository at this point in the history
atlas-group to atlas fix
  • Loading branch information
dileventi authored Dec 16, 2023
2 parents 42e2d6a + bfc02a0 commit f059342
Show file tree
Hide file tree
Showing 13 changed files with 21 additions and 21 deletions.
2 changes: 1 addition & 1 deletion evaluation/benchmarks/bio/bio-align/genome-diff.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# bacteria), and any regions with less than 10 supporting reads.

# Requires: samtools, minimap2, bcftools
# Data: atlas-group.cs.brown.edu/data/bio/R1.fastq.gz atlas-group.cs.brown.edu/data/bio/R2.fastq.gz atlas-group.cs.brown.edu/data/bio/ref.fa
# Data: atlas.cs.brown.edu/data/bio/R1.fastq.gz atlas.cs.brown.edu/data/bio/R2.fastq.gz atlas.cs.brown.edu/data/bio/ref.fa

# https://github.com/samtools/samtools/releases/latest
# https://github.com/lh3/minimap2
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/bio/bio-align/genquality.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# http://thegenomefactory.blogspot.com/2019/09/25-reasons-assemblies-dont-make-it-into.html

# Require: csvkit
# Data: atlas-group.cs.brown.edu/data/bio/genbank.txt
# Data: atlas.cs.brown.edu/data/bio/genbank.txt

IN=./input/genbank.txt
OUT=./output/out.txt
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/bio/bio1/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ mkdir -p input
mkdir -p output
cd input
if [[ ! -f R1.fastq ]]; then
wget atlas-group.cs.brown.edu/data/bio/{R1.fastq.gz,R2.fastq.gz,ref.fa}
wget atlas.cs.brown.edu/data/bio/{R1.fastq.gz,R2.fastq.gz,ref.fa}

gunzip R1.fastq.gz
gunzip R2.fastq.gz
Expand Down
4 changes: 2 additions & 2 deletions evaluation/benchmarks/max-temp/max-temp-preprocess.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#!/bin/bash

sed 's;^;atlas-group.cs.brown.edu/data/noaa/;' |
sed 's;^;atlas.cs.brown.edu/data/noaa/;' |
sed 's;$;/;' |
xargs -r -n 1 curl -s |
grep gz |
tr -s ' \n' |
cut -d ' ' -f9 |
sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' |
sed 's;^;atlas-group.cs.brown.edu/data/noaa/;' |
sed 's;^;atlas.cs.brown.edu/data/noaa/;' |
xargs -n1 curl -s |
gunzip
2 changes: 1 addition & 1 deletion evaluation/benchmarks/max-temp/max-temp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

FROM=${FROM:-2015}
TO=${TO:-2015}
IN=${IN:-'atlas-group.cs.brown.edu/data/noaa/'}
IN=${IN:-'atlas.cs.brown.edu/data/noaa/'}
fetch=${fetch:-"curl -s"}

seq $FROM $TO |
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/max-temp/temp-analytics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

FROM=${FROM:-2015}
TO=${TO:-2015}
IN=${IN:-'atlas-group.cs.brown.edu/data/noaa/'}
IN=${IN:-'atlas.cs.brown.edu/data/noaa/'}
fetch=${fetch:-"curl -s"}

data_file=temperatures.txt
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/nlp/input/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ setup_dataset() {
cd pg
if [[ "$1" == "--full" ]]; then
echo 'N.b.: download/extraction will take about 10min'
wget atlas-group.cs.brown.edu/data/pg.tar.xz # FIXME: moving to PG soon
wget atlas.cs.brown.edu/data/pg.tar.xz # FIXME: moving to PG soon
if [ $? -ne 0 ]; then
cat <<-'EOF' | sed 's/^ *//'
Downloading input dataset failed, thus need to manually rsync all books from project gutenberg:
Expand Down
8 changes: 4 additions & 4 deletions evaluation/benchmarks/oneliners/input/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ setup_dataset() {
fi

if [ ! -f ./1M.txt ]; then
curl -sf 'atlas-group.cs.brown.edu/data/dummy/1M.txt' > 1M.txt
curl -sf 'atlas.cs.brown.edu/data/dummy/1M.txt' > 1M.txt
if [ $? -ne 0 ]; then
echo 'cannot find 1M.txt -- please contact the developers of pash'
exit 1
Expand All @@ -51,7 +51,7 @@ setup_dataset() {
fi

if [ ! -f ./1G.txt ]; then
curl -sf 'atlas-group.cs.brown.edu/data/dummy/1G.txt' > 1G.txt
curl -sf 'atlas.cs.brown.edu/data/dummy/1G.txt' > 1G.txt
if [ $? -ne 0 ]; then
echo 'cannot find 1G.txt -- please contact the developers of pash'
exit 1
Expand All @@ -61,7 +61,7 @@ setup_dataset() {

# download wamerican-insane dictionary and sort according to machine
if [ ! -f ./dict.txt ]; then
curl -sf 'atlas-group.cs.brown.edu/data/dummy/dict.txt' | sort > dict.txt
curl -sf 'atlas.cs.brown.edu/data/dummy/dict.txt' | sort > dict.txt
if [ $? -ne 0 ]; then
echo 'cannot find dict.txt -- please contact the developers of pash'
exit 1
Expand All @@ -70,7 +70,7 @@ setup_dataset() {
fi

if [ ! -f ./all_cmds.txt ]; then
curl -sf 'atlas-group.cs.brown.edu/data/dummy/all_cmds.txt' > all_cmds.txt
curl -sf 'atlas.cs.brown.edu/data/dummy/all_cmds.txt' > all_cmds.txt
if [ $? -ne 0 ]; then
# This should be OK for tests, no need for abort
ls /usr/bin/* > all_cmds.txt
Expand Down
4 changes: 2 additions & 2 deletions evaluation/intro/input/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ cd $(dirname $0)


if [ ! -f ./100M.txt ]; then
curl -sf --connect-timeout 10 'atlas-group.cs.brown.edu/data/dummy/100M.txt' > 100M.txt
curl -sf --connect-timeout 10 'atlas.cs.brown.edu/data/dummy/100M.txt' > 100M.txt
if [ $? -ne 0 ]; then
# Pipe curl through tac (twice) in order to consume all the output from curl.
# This way, curl can write the whole page and not emit an error code.
Expand All @@ -23,7 +23,7 @@ if [ ! -f ./100M.txt ]; then
fi

if [ ! -f ./words ]; then
curl -sf --connect-timeout 10 'atlas-group.cs.brown.edu/data/dummy/words' > words
curl -sf --connect-timeout 10 'atlas.cs.brown.edu/data/dummy/words' > words
if [ $? -ne 0 ]; then
curl -sf 'https://zenodo.org/record/7650885/files/words' > words
if [ $? -ne 0 ]; then
Expand Down
2 changes: 1 addition & 1 deletion evaluation/other/more-scripts/page-count.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

# Require: libimage-exiftool-perl, bc
# Data:
# atlas-group.cs.brown.edu/data/large.pdf
# atlas.cs.brown.edu/data/large.pdf
# More data:
# https://arxiv.org/help/bulk_data

Expand Down
2 changes: 1 addition & 1 deletion evaluation/other/more-scripts/spell.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# TODO: `groff is an interesting "pure", whose wrapper only needs split input
# TODO: files carefully.

# Data: atlas-group.cs.brown.edu/data/dummy/ronn.1
# Data: atlas.cs.brown.edu/data/dummy/ronn.1
# dict depends on the system (and has to be sorted), so we assume it exists
dict=./input/dict.txt

Expand Down
6 changes: 3 additions & 3 deletions evaluation/tests/input/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ esac
[ "$1" = "-c" ] && rm-files 1M.txt all_cmds.txt words sorted_words 10M.txt

if [ ! -f ./1M.txt ]; then
curl -sf --connect-timeout 10 'atlas-group.cs.brown.edu/data/dummy/1M.txt' > 1M.txt
curl -sf --connect-timeout 10 'atlas.cs.brown.edu/data/dummy/1M.txt' > 1M.txt
if [ $? -ne 0 ]; then
curl -f 'https://zenodo.org/record/7650885/files/1M.txt' > 1M.txt
if [ $? -ne 0 ]; then
Expand All @@ -29,7 +29,7 @@ fi

if [ ! -f ./all_cmds.txt ]; then
if [ "$(hostname)" = "deathstar" ]; then
curl -sf --connect-timeout 10 'atlas-group.cs.brown.edu/data/dummy/all_cmds.txt' > all_cmds.txt
curl -sf --connect-timeout 10 'atlas.cs.brown.edu/data/dummy/all_cmds.txt' > all_cmds.txt
if [ $? -ne 0 ]; then
curl -f 'https://zenodo.org/record/7650885/files/all_cmds.txt' > all_cmds.txt || eexit "all_cmds not found"
fi
Expand All @@ -40,7 +40,7 @@ if [ ! -f ./all_cmds.txt ]; then
fi

if [ ! -f ./words ]; then
curl -sf --connect-timeout 10 'atlas-group.cs.brown.edu/data/dummy/words' > words
curl -sf --connect-timeout 10 'atlas.cs.brown.edu/data/dummy/words' > words
if [ $? -ne 0 ]; then
curl -f 'https://zenodo.org/record/7650885/files/words' > words
if [ $? -ne 0 ]; then
Expand Down
4 changes: 2 additions & 2 deletions evaluation/tests/sed-test.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
cat $PASH_TOP/evaluation/tests/input/1M.txt |
sed 's;^d;da;' |
sed 's;^;atlas-group.cs.brown.edu/data/noaa/;' |
sed 's;^;atlas.cs.brown.edu/data/noaa/;' |
sed 's;$;/;' |
sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' |
sed 's;^;atlas-group.cs.brown.edu/data/noaa/;' |
sed 's;^;atlas.cs.brown.edu/data/noaa/;' |
sed "s#^#$WIKI#" |
sed s/\$/'0s'/ |
sed 1d |
Expand Down

0 comments on commit f059342

Please sign in to comment.