Skip to content

Commit 375c9b0

Browse files
author
Tobias Hofmann
committed
updated readme
1 parent 263c30f commit 375c9b0

File tree

3 files changed

+22
-20
lines changed

3 files changed

+22
-20
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# <img src="images/secapr_logo.png" width="100"> SEquence CApture PRocessor (SECAPR)
22

3-
**Original Publication: https://doi.org/10.7287/peerj.preprints.26477v3**
3+
**Original Publication: https://doi.org/10.7717/peerj.5175**
44

55
___
66

@@ -23,6 +23,6 @@ Some functions in this pipeline are inspired by the scripts from the [Phyluce pi
2323

2424
#### Please cite:
2525

26-
**Andermann T**, Cano Á, Zizka A, Bacon C, Antonelli A. (2018) SECAPR - A bioinformatics pipeline for the rapid and user-friendly processing of Illumina sequences, from raw reads to alignments. PeerJ Preprints. doi: 10.7287/peerj.preprints.26477v3
26+
**Andermann T.**, Cano Á., Zizka A., Bacon C., Antonelli A. 2018. SECAPR A bioinformatics pipeline for the rapid and user-friendly processing of targeted enriched Illumina sequences, from raw reads to alignments. PeerJ 6:e5175. DOI: 10.7717/peerj.5175.
2727

2828
Faircloth BC. 2015. PHYLUCE is a software package for the analysis of conserved genomic loci. bioRxiv. doi: 10.1101/027904.

recipe/meta.yaml

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{% set version = "1.1.10" %}
1+
{% set version = "1.1.11" %}
22

33
package:
44
name: secapr
@@ -7,45 +7,46 @@ package:
77
source:
88
fn: secapr_{{ version }}.tar.gz
99
url: https://github.com/AntonelliLab/seqcap_processor/archive/v{{ version }}.tar.gz
10-
sha256: 175830fa23619a374719c12340504fb50f9b00d6136b49c2a925873ecda687d1
10+
sha256: a14cea6d58154759da84c9626b17b6010626b907980e9e5bbde5ee9c2be5a3ca
1111

1212
build:
1313
skip: True # [not py27]
14-
number: 0
15-
script: $PYTHON setup.py install --single-version-externally-managed --record=record.txt
14+
number: 2
15+
script: python -m pip install --no-deps --ignore-installed .
1616
entry_points:
1717
- secapr = secapr.__main__:main
1818

1919
requirements:
20-
build:
20+
host:
2121
- python
22+
- pip
2223
- setuptools
2324
- biopython
2425
- pandas
2526
- r-base
2627

2728
run:
2829
- python
29-
- biopython ==1.68
30-
- emboss ==6.5.7
30+
- biopython
31+
- emboss
3132
- bcftools ==1.8
32-
- samtools ==0.1.19
33-
- picard ==1.126
33+
- samtools 0.1.19
34+
- picard
3435
- seqtk >=1.0.82,<=1.2
3536
- bwa >=0.7
3637
- lastz
3738
- mafft >=7.2
38-
- muscle ==3.8.31
39-
- trimmomatic ==0.33
39+
- muscle
40+
- trimmomatic
4041
- abyss
4142
- trinity
42-
- fastqc 0.11*
43-
- pandas ==0.22.0
44-
- numpy ==1.14
45-
- cogent ==1.5.3
43+
- fastqc
44+
- pandas
45+
- numpy
46+
- cogent
4647
- r-base
4748
- r-tidyverse
48-
49+
4950
test:
5051
imports:
5152
- secapr
@@ -56,4 +57,4 @@ about:
5657
home: 'https://github.com/AntonelliLab/seqcap_processor'
5758
license: MIT
5859
license_file: LICENSE
59-
summary: 'Process sequence-capture FASTQ files into alignments for phylogenetic analyses. Integrates allele phasing, producing haplotype alignments.'
60+
summary: 'Process sequence-capture FASTQ files into alignments for phylogenetic analyses. Integrates allele phasing, producing haplotype alignments.'

secapr/quality_check.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ def main(args):
5757
p.communicate()
5858

5959
# write the r-plotting script to file
60-
r_plotting_script = 'opt <- c()\nopt$input_folder = workdir\nopt$output_file =paste0(workdir, "/QC_plots.pdf")\n\n#load fastQC summaries and create per test table\ninp <- list.files(opt$input_folder, pattern = ".zip")\n\n\nfastqc_results <- lapply(inp, function(k){\n unzip(paste(opt$input_folder, k, sep = "/"),exdir = opt$input_folder)\n inpu <- read_delim(paste(paste(gsub(".zip", "", paste(opt$input_folder,k, sep = "/"))), \n "summary.txt", sep = "/"), delim = "\t")\n out <- as_data_frame(t(inpu[, 1])) %>%\n mutate(sample.id = names(inpu)[3])\n names(out) <- c(gsub(" ", "_", unlist(inpu[,2])), "sample_id")\n unlink(x = paste(opt$input_folder, gsub(".zip", "", k), sep = "/"), \n recursive = T, force = T)\n \n return(out)\n})\n\noutp <- do.call("rbind.data.frame", fastqc_results)%>%\n select(ID = sample_id,\n PBQ = Per_base_sequence_quality,\n PTQ = Per_tile_sequence_quality,\n PSQ = Per_sequence_quality_scores,\n PBC = Per_base_sequence_content,\n SGC = Per_sequence_GC_content,\n PBN = Per_base_N_content,\n SLD = Sequence_Length_Distribution,\n SDL = Sequence_Duplication_Levels,\n ORS = Overrepresented_sequences,\n AdC = Adapter_Content)\n\n#change table format\nret <- outp %>% \n group_by(ID) %>%\n gather(test, status, PBQ:AdC)\n\n#plot how many samples failed the test\nqc.fail <- ggplot()+\n geom_bar(data = ret, aes(x = test, fill = status), stat = "count", position = "dodge")+\n theme_bw()\n\n#plot which sample failed which test\nqc.samples <- ggplot()+\n geom_tile(data = ret, aes(y = ID, x = test, fill = as.factor(status)))+\n scale_fill_discrete(name = "status")+\n xlab("FastQC test")+\n ylab("Samples")+\n theme_bw()+\n theme(\n axis.text.y = element_blank()\n )\n\n#plot pdf\npdf(opt$output_file)\nprint(qc.fail)\nprint(qc.samples)\ndev.off()\n\npng(gsub(".pdf", "1.png", opt$output_file))\nprint(qc.fail)\ndev.off()\n\npng(gsub(".pdf", "2.png", opt$output_file))\nprint(qc.samples)\ndev.off()\n\n#table with samples that faild a test\nfail <- ret %>%\n filter(status == "FAIL")\n\n#get the ID number of the failed samples\nfail.samp <- fail %>%\n filter(!duplicated(ID)) %>%\n select(ID)%>%\n unlist() %>%\n parse_number()%>%\n unique() %>%\n sort()'
60+
r_plotting_script = 'opt <- c()\nopt$input_folder = workdir\nopt$output_file =paste0(workdir, "/QC_plots.pdf")\n\n#load fastQC summaries and create per test table\ninp <- list.files(opt$input_folder, pattern = ".zip")\n\n\nfastqc_results <- lapply(inp, function(k){\n unzip(paste(opt$input_folder, k, sep = "/"),exdir = opt$input_folder)\n inpu <- read_delim(paste(paste(gsub(".zip", "", paste(opt$input_folder,k, sep = "/"))), \n "summary.txt", sep = "/"), delim = "\t")\n out <- as_data_frame(t(inpu[, 1])) %>%\n mutate(sample.id = names(inpu)[3])\n names(out) <- c(gsub(" ", "_", unlist(inpu[,2])), "sample_id")\n unlink(x = paste(opt$input_folder, gsub(".zip", "", k), sep = "/"), \n recursive = T, force = T)\n \n return(out)\n})\n\nn_cols = length(fastqc_results[[1]])\nif (n_cols==11){\n outp <- do.call("rbind.data.frame", fastqc_results)%>%\n select(ID = sample_id,\n PBQ = Per_base_sequence_quality,\n PTQ = Per_tile_sequence_quality,\n PSQ = Per_sequence_quality_scores,\n PBC = Per_base_sequence_content,\n SGC = Per_sequence_GC_content,\n PBN = Per_base_N_content,\n SLD = Sequence_Length_Distribution,\n SDL = Sequence_Duplication_Levels,\n ORS = Overrepresented_sequences,\n AdC = Adapter_Content)\n #change table format\n ret <- outp %>% \n group_by(ID) %>%\n gather(test, status, PBQ:AdC)\n\n}\nif (n_cols==12){\n outp <- do.call("rbind.data.frame", fastqc_results)%>%\n select(ID = sample_id,\n PBQ = Per_base_sequence_quality,\n PTQ = Per_tile_sequence_quality,\n PSQ = Per_sequence_quality_scores,\n PBC = Per_base_sequence_content,\n SGC = Per_sequence_GC_content,\n PBN = Per_base_N_content,\n SLD = Sequence_Length_Distribution,\n SDL = Sequence_Duplication_Levels,\n ORS = Overrepresented_sequences,\n AdC = Adapter_Content,\n KmC = Kmer_Content)\n #change table format\n ret <- outp %>% \n group_by(ID) %>%\n gather(test, status, PBQ:KmC)\n}\n\n#plot how many samples failed the test\nqc.fail <- ggplot()+\n geom_bar(data = ret, aes(x = test, fill = status), stat = "count", position = "dodge")+\n theme_bw()\n\n#plot which sample failed which test\nqc.samples <- ggplot()+\n geom_tile(data = ret, aes(y = ID, x = test, fill = as.factor(status)))+\n scale_fill_discrete(name = "status")+\n xlab("FastQC test")+\n ylab("Samples")+\n theme_bw()+\n theme(\n axis.text.y = element_blank()\n )\n\n#plot pdf\npdf(opt$output_file)\nprint(qc.fail)\nprint(qc.samples)\ndev.off()\n\npng(gsub(".pdf", "1.png", opt$output_file))\nprint(qc.fail)\ndev.off()\n\npng(gsub(".pdf", "2.png", opt$output_file))\nprint(qc.samples)\ndev.off()\n\n#table with samples that faild a test\nfail <- ret %>%\n filter(status == "FAIL")\n\n#get the ID number of the failed samples\nfail.samp <- fail %>%\n filter(!duplicated(ID)) %>%\n select(ID)%>%\n unlist() %>%\n parse_number()%>%\n unique() %>%\n sort()'
61+
6162
add_to_script = 'library(tidyverse)\nworkdir = "%s"\n' %out_folder
6263
new_r_plotting_script = add_to_script + r_plotting_script
6364
r_script_path = os.path.join(out_folder,'fastqc_visualization.r')

0 commit comments

Comments
 (0)