Skip to content

Commit

Permalink
Allow no annotations; some input validation
Browse files Browse the repository at this point in the history
  • Loading branch information
Phlya committed Feb 10, 2023
1 parent 070d32f commit 46cf6a7
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 21 deletions.
8 changes: 6 additions & 2 deletions config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,14 @@ fields_to_match: null
# comparisons
fields_to_differ:
cell_type: hESCs

# Annotations file with two columns: annotation name ("bedname") and "file" (URLs or local file)
# Downloaded bed files will be stored in beds_folder
# Downloaded bed files will be stored in beds_folder.
# Provide nothing here if you don't want to use any annotations.
annotations: config/annotations.tsv

# Rules about correpondence between samples and annotations can be specified here
# Provide nothing here if all annotations apply to all samples.
samples_annotations_combinations: config/samples_annotations.tsv

# folder definition is optional
Expand Down Expand Up @@ -113,7 +117,7 @@ pileups:
local_rescaled: '--local --rescale --rescale_pad 1'
by_strand_by_distance: "--by_strand --by_distance"
by_strand_local: "--by_strand --local"
by_strand_distal: "--by_strand --maxdist 1000000"
by_strand_distal: "--by_strand --subset 10000 --maxdist 1000000"
by_window_short_range: "--by_window --subset 1000 --maxdist 2000000"
by_window_long_range: "--by_window --subset 1000 --mindist 2000000"

Expand Down
2 changes: 1 addition & 1 deletion config/samples_annotations.tsv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
sample CTCF_test_bed ENCODE_CREs_HFF_bed
test_cool x x
test_cool_HFF x x
61 changes: 43 additions & 18 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,23 @@ def will_download(link):
return False if parsed_path.scheme == "" else True


samples_df = pd.read_csv(
config["samples"], sep="\t", header=0, comment="#", dtype={"do_dots": bool}
)
try:
samples_df = pd.read_csv(
config["samples"],
sep="\t",
header=0,
comment="#",
dtype={"do_dots": bool, "do_tads": bool},
)
except:
raise ValueError("Could not read file with samples, please ensure it exists")
if "sample" not in samples_df.columns:
raise ValueError(
'Column "sample" has to be in the file with description of samples'
)
if "file" not in samples_df.columns:
raise ValueError('Column "file" has to be in the file with description of samples')

samples_df = samples_df.fillna(value=False)
samples_df.loc[:, "will_download"] = samples_df.file.apply(will_download)
samples_df.loc[:, "local_path"] = samples_df.apply(
Expand Down Expand Up @@ -140,14 +154,20 @@ local_bedpe_names = {
for bedpefile in bedpefiles_local
}

bed_df = pd.read_csv(config["annotations"], sep="\t", header=0, comment="#")
bed_df.loc[:, "will_download"] = bed_df.file.apply(will_download)
bed_df.loc[:, "local_path"] = bed_df.apply(
lambda x: make_local_path(x.bedname, x.format) if x.will_download else x.file,
axis=1,
result_type="reduce",
)
bed_df = bed_df.set_index("bedname").replace("-", np.nan)
if config["annotations"]:
bed_df = pd.read_csv(config["annotations"], sep="\t", header=0, comment="#")
bed_df.loc[:, "will_download"] = bed_df.file.apply(will_download)
bed_df.loc[:, "local_path"] = bed_df.apply(
lambda x: make_local_path(x.bedname, x.format) if x.will_download else x.file,
axis=1,
result_type="reduce",
)
bed_df = bed_df.set_index("bedname").replace("-", np.nan)
else:
bed_df = pd.DataFrame(
columns=["bedname", "file", "format", "will_download", "local_path"]
+ list(config["pileups"]["arguments"])
)

pileup_params = config["pileups"]["arguments"]

Expand All @@ -164,13 +184,18 @@ bedfiles = list(bedfiles_dict.keys())
bedtype_dict = dict(bed_df["format"])
# bedpe_pileups_mindist, bedpe_pileups_maxdist = config['bedpe_pileups_distance_limits']

samples_annotations = ~pd.read_csv(
config["samples_annotations_combinations"],
sep="\t",
header=0,
index_col=0,
comment="#",
).isna()
if config["samples_annotations_combinations"]:
samples_annotations = ~pd.read_csv(
config["samples_annotations_combinations"],
sep="\t",
header=0,
index_col=0,
comment="#",
).isna()
else:
samples_annotations = pd.DataFrame(
np.ones((len(samples), len(bedfiles))), index=samples, columns=bedfiles
).astype(bool)

### Data resolutions
if config["eigenvector"]["do"]:
Expand Down

0 comments on commit 46cf6a7

Please sign in to comment.