Untitled
unknown
r
3 years ago
2.6 kB
7
Indexable
library(tidyverse)
library(glue)
fq_dir <- file.path(getwd(), "fastq_fastp")
tools_path <- file.path("/home", "thomas", "NGS", "tools")
ref_fasta_file <- file.path("/home", "thomas",
"Documents", "Programmes", "ncbi_ref", "hg38_imgt.fa")
transcripts_index_dir <- file.path("/home", "thomas", "Documents",
"Programmes", "ncbi_ref", "hg38_imgt.sindex")
salmon_path <- file.path(tools_path, "salmon-1.9.0_linux_x86_64", "bin",
"salmon")
salmon_id_threads <- 16
salmon_threads <- 16
output_dir <- file.path(getwd(), "salmon_map_quant")
if (!dir.exists(output_dir)) dir.create(output_dir)
samples <- dir(fq_dir, full.names = FALSE)
samples <- rev(samples)
print(samples)
if (!file.exists(transcripts_index_dir)) {
if (!file.exists(ref_fasta_file)) {
stop(str_glue("No reference transcriptome fasta {ref_fasta_file}"))
}
print(str_glue("Building salmon index for {ref_fasta_file}"))
command <- str_glue(
"{salmon_path} index ",
"-t {ref_fasta_file} ", # Transcript fasta file.
"-i {transcripts_index_dir} ", # Salmon index.
"-p {salmon_id_threads}" # Number of threads.
)
print(command)
system(command)
}
n_samples <- length(samples)
for (i in 1:n_samples) {
sample_name <- samples[i]
output_dir_case <- file.path(output_dir, sample_name)
if (!dir.exists(output_dir_case)) {
fastq_dir <- file.path(fq_dir, sample_name)
r1_file <- file.path(fastq_dir, "R1.fq.gz")
r2_file <- file.path(fastq_dir, "R2.fq.gz")
print(str_glue("Quantifying sample: {sample_name} {i}/{n_samples}"))
command <- str_glue(
"{salmon_path} quant ",
"-i {transcripts_index_dir} ",
"-l IU ", # reads R1 and R2 should face
"-1 {r1_file} ",
"-2 {r2_file} ",
"-o {output_dir_case} ",
"--seqBias ",
"--gcBias ",
"--posBias ",
"-p {salmon_threads} "
)
print(command)
dir.create(output_dir_case)
system(command)
print(paste0(rep("=", 50), collapse = ""))
}
}
load_edger <- function(salmon_dir, samples,
conditions = "A") {
require(dplyr)
files <- file.path(salmon_dir, samples, "quant.sf")
print(files)
s <- file.exists(files)
t <- tibble(
files = files[s],
names = samples[s],
condition = conditions
)
print(t)
se <- tximeta(t)
y <- makeDGEList(se)
y <- calcNormFactors(y)
y <- estimateDisp(y)
y
}Editor is loading...