trimmomatic trimmomatic se

trimmomatic se

Version:
0.36
Identifier: TL_a2e03a_ef.4a
Tool
1"""
2bio/trimmomatic/se
3
4Snakemake wrapper to trim reads with trimmomatic in SE mode with help of pigz.
5pigz is the parallel implementation of gz. Trimmomatic spends most of the time
6compressing and decompressing instead of trimming sequences. By using process
7substitution (<(command), >(command)), we can accelerate trimmomatic a lot.
8Consider providing this wrapper with at least 1 extra thread per each gzipped
9input or output file.
10"""
11
12__author__ = "Johannes Köster, Jorge Langa"
13__copyright__ = "Copyright 2016, Johannes Köster"
14__email__ = "koester@jimmy.harvard.edu"
15__license__ = "MIT"
16
17
18from snakemake.shell import shell
19from snakemake_wrapper_utils.java import get_java_opts
20
21# Distribute available threads between trimmomatic itself and any potential pigz instances
22def distribute_threads(input_file, output_file, available_threads):
23    gzipped_input_files = 1 if input_file.endswith(".gz") else 0
24    gzipped_output_files = 1 if output_file.endswith(".gz") else 0
25    potential_threads_per_process = available_threads // (
26        1 + gzipped_input_files + gzipped_output_files
27    )
28    if potential_threads_per_process > 0:
29        # decompressing pigz creates at most 4 threads
30        pigz_input_threads = (
31            min(4, potential_threads_per_process) if gzipped_input_files != 0 else 0
32        )
33        pigz_output_threads = (
34            (available_threads - pigz_input_threads * gzipped_input_files)
35            // (1 + gzipped_output_files)
36            if gzipped_output_files != 0
37            else 0
38        )
39        trimmomatic_threads = (
40            available_threads
41            - pigz_input_threads * gzipped_input_files
42            - pigz_output_threads * gzipped_output_files
43        )
44    else:
45        # not enough threads for pigz
46        pigz_input_threads = 0
47        pigz_output_threads = 0
48        trimmomatic_threads = available_threads
49    return trimmomatic_threads, pigz_input_threads, pigz_output_threads
50
51
52def compose_input_gz(filename, threads):
53    if filename.endswith(".gz") and threads > 0:
54        return "<(pigz -p {threads} --decompress --stdout {filename})".format(
55            threads=threads, filename=filename
56        )
57    return filename
58
59
60def compose_output_gz(filename, threads, compression_level):
61    if filename.endswith(".gz") and threads > 0:
62        return ">(pigz -p {threads} {compression_level} > {filename})".format(
63            threads=threads, compression_level=compression_level, filename=filename
64        )
65    return filename
66
67
68extra = snakemake.params.get("extra", "")
69java_opts = get_java_opts(snakemake)
70log = snakemake.log_fmt_shell(stdout=True, stderr=True)
71compression_level = snakemake.params.get("compression_level", "-5")
72trimmer = " ".join(snakemake.params.trimmer)
73
74# Distribute threads
75trimmomatic_threads, input_threads, output_threads = distribute_threads(
76    snakemake.input[0], snakemake.output[0], snakemake.threads
77)
78
79# Collect files
80input = compose_input_gz(snakemake.input[0], input_threads)
81output = compose_output_gz(snakemake.output[0], output_threads, compression_level)
82
83shell(
84    "trimmomatic SE -threads {trimmomatic_threads} "
85    "{java_opts} {extra} {input} {output} {trimmer} {log}"
86)
87