-
Notifications
You must be signed in to change notification settings - Fork 0
/
format.R
125 lines (118 loc) · 4.07 KB
/
format.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# This file includes functions to convert the file format from python-based AnnData (https://anndata.readthedocs.io/en/latest/anndata.AnnData.html)
# to R-based Seurat (https://satijalab.org/seurat/) format or ExpressionSet (https://www.rdocumentation.org/packages/Biobase/versions/2.32.0/topics/ExpressionSet) format.
#' @title read_h5
#' @description
#' This function reads the AnnData h5ad file and save the .X, .obs and .raw.X data as a list.
#' @details
#' var_names are saved as rownames, obs_names are saved as colnames.
#' @param
#' h5file: the AnnData h5ad file.
#'
#' @return
#' A list of 3 values: X is the .X of AnnData, rawX is the .raw.X of AnnData, and obs is the .obs of AnnData.
#' @example
#' x <- read_h5("example.h5")
read_h5<-function(h5file){
suppressPackageStartupMessages(library(reticulate))
suppressPackageStartupMessages(library(Matrix))
handle <- import("anndata", convert = FALSE)
ad <- handle$read_h5ad(h5file)
obs <- py_to_r(ad$obs)
if ("n_counts" %in% colnames(x = obs)) {
colnames(x = obs) <- gsub(
pattern = "n_counts",
replacement = "nUMI",
x = colnames(x = obs)
)
}
genenames <- rownames(py_to_r(ad$var))
cellnames <- rownames(obs)
ncells = length(cellnames)
ngenes = length(genenames)
# Read X
if("indptr" %in% names(ad$X)){
ad.X <- sparseMatrix(
i = as.numeric(x = ad$X$indices),
p = as.numeric(x = ad$X$indptr),
x = as.numeric(x = ad$X$data),
index1 = FALSE,
dims=c(ngenes,
ncells)
)
}else{
ad.X <- t(py_to_r(ad$X))
}
rownames(ad.X) <- genenames
colnames(ad.X) <- cellnames
# Read raw
ad.raw.X = NA
if("raw" %in% names(ad)){
if("X" %in% names(ad$raw)){
genenames <- rownames(py_to_r(ad$raw$var))
ngenes = length(genenames)
if("indptr" %in% names(ad$raw$X)){
ad.raw.X <- sparseMatrix(
i = as.numeric(x = ad$raw$X$indices),
p = as.numeric(x = ad$raw$X$indptr),
x = as.numeric(x = ad$raw$X$data),
index1 = FALSE,
dims=c(ngenes,
ncells)
)
}else{
ad.raw.X <- t(py_to_r(ad$raw$X))
}
rownames(ad.raw.X) <- genenames
colnames(ad.raw.X) <- cellnames
}else{
genenames <- rownames(py_to_r(ad$var))
ngenes = length(genenames)
if("indptr" %in% names(ad$X)){
ad.raw.X <- sparseMatrix(
i = as.numeric(x = ad$X$indices),
p = as.numeric(x = ad$X$indptr),
x = as.numeric(x = ad$X$data),
index1 = FALSE,
dims=c(ngenes,
ncells)
)
}else{
ad.raw.X <- t(py_to_r(ad$X))
}
rownames(ad.raw.X) <- genenames
colnames(ad.raw.X) <- cellnames
}
}
return(list(X=ad.X, rawX=ad.raw.X, obs=obs))
}
#' @title anndata2expressionset
#'
#' @description
#' Convert the AnnData h5ad file to ExpressionSet file.
#' @details
#'
#' @param h5file: the AnnData h5ad file.
#' @param esfile: the ExpressionSet file to be saved.
#'
#' @return
#' NA
#' @example
#' anndata2expressionset('anndata.h5', 'expr.rds')
anndata2expressionset<-function(h5file, esfile){
require(Biobase)
df = read_h5(h5file)
ad = Biobase::ExpressionSet(assayData = as.matrix(df$X),
phenoData = Biobase::AnnotatedDataFrame(df$obs))
saveRDS(ad, esfile)
}
# main
# usage: !/nfs/leia/research/ma/chichau/bin/anaconda3/envs/r4-base/bin/Rscript format.R input.h5 output.rds
# use sceasy(https://github.com/cellgeni/sceasy) to convert the Anndata file to Seurat file.
args <- commandArgs(trailingOnly=TRUE)
library(sceasy)
library(reticulate)
loompy <- reticulate::import('loompy')
sceasy::convertFormat(args[1],
from="anndata",
to="seurat",
outFile=args[2])