Untitled

mail@pastecode.io avatar
unknown
r
2 years ago
2.6 kB
0
Indexable
Never
library(tidyverse)
library(glue)

fq_dir <- file.path(getwd(), "fastq_fastp")
tools_path <- file.path("/home", "thomas", "NGS", "tools")

ref_fasta_file <- file.path("/home", "thomas",
    "Documents", "Programmes", "ncbi_ref", "hg38_imgt.fa")

transcripts_index_dir <- file.path("/home", "thomas", "Documents",
    "Programmes", "ncbi_ref", "hg38_imgt.sindex")

salmon_path <- file.path(tools_path, "salmon-1.9.0_linux_x86_64", "bin",
    "salmon")

salmon_id_threads <- 16
salmon_threads    <- 16

output_dir <- file.path(getwd(), "salmon_map_quant")
if (!dir.exists(output_dir)) dir.create(output_dir)

samples <- dir(fq_dir, full.names = FALSE)
samples <- rev(samples)
print(samples)

if (!file.exists(transcripts_index_dir)) {
    if (!file.exists(ref_fasta_file)) {
        stop(str_glue("No reference transcriptome fasta {ref_fasta_file}"))
    }

    print(str_glue("Building salmon index for {ref_fasta_file}"))
    command <- str_glue(
        "{salmon_path} index ",
            "-t {ref_fasta_file} ",        # Transcript fasta file.
            "-i {transcripts_index_dir} ", # Salmon index.
            "-p {salmon_id_threads}"       # Number of threads.
    )
    print(command)
    system(command)
}

n_samples <- length(samples)
for (i in 1:n_samples) {
    sample_name <- samples[i]
    output_dir_case <- file.path(output_dir, sample_name)

    if (!dir.exists(output_dir_case)) {
        fastq_dir <- file.path(fq_dir, sample_name)
        r1_file <- file.path(fastq_dir, "R1.fq.gz")
        r2_file <- file.path(fastq_dir, "R2.fq.gz")

        print(str_glue("Quantifying sample: {sample_name} {i}/{n_samples}"))

        command <- str_glue(
            "{salmon_path} quant ",
                "-i {transcripts_index_dir} ",
                "-l IU ", # reads R1 and R2 should face
                "-1 {r1_file} ",
                "-2 {r2_file} ",
                "-o {output_dir_case} ",
                "--seqBias ",
                "--gcBias ",
                "--posBias ",
                "-p {salmon_threads} "
        )
        print(command)
        dir.create(output_dir_case)
        system(command)
        print(paste0(rep("=", 50), collapse = ""))
    }
}



load_edger <- function(salmon_dir, samples,
conditions = "A") {
    require(dplyr)
    files <- file.path(salmon_dir, samples, "quant.sf")
    print(files)
    s <- file.exists(files)
    t <- tibble(
        files = files[s],
        names = samples[s],
        condition = conditions
    )
    print(t)
    se <- tximeta(t)

    y <- makeDGEList(se)
    y <- calcNormFactors(y)
    y <- estimateDisp(y)
    y
}