File Coverage

blib/lib/Sanger/CGP/AlleleCount/Genotype.pm
Criterion Covered Total %
branch 13 36 36.1
subroutine 15 16 93.7
pod 5 5 100.0
total 33 57 57.8


line bran sub pod code
1       package Sanger::CGP::AlleleCount::Genotype;
2        
3       ##########LICENCE##########
4       # Copyright (c) 2014-2018 Genome Research Ltd.
5       #
6       # Author: CASM/Cancer IT
7       #
8       # This file is part of alleleCount.
9       #
10       # alleleCount is free software: you can redistribute it and/or modify it under
11       # the terms of the GNU Affero General Public License as published by the Free
12       # Software Foundation; either version 3 of the License, or (at your option) any
13       # later version.
14       #
15       # This program is distributed in the hope that it will be useful, but WITHOUT
16       # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17       # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
18       # details.
19       #
20       # You should have received a copy of the GNU Affero General Public License
21       # along with this program. If not, see .
22       ##########LICENCE##########
23        
24   2   use strict;
25        
26   2   use Carp;
27   2   use English qw( -no_match_vars );
28   2   use warnings FATAL => 'all';
29        
30   2   use FindBin qw($Bin);
31   2   use File::Which qw(which);
32   2   use File::Temp qw(tempdir);
33        
34   2   use Sanger::CGP::AlleleCount;
35        
36   2   use Const::Fast qw(const);
37        
38       const my $MIN_MAPQ => 35;
39       const my $MIN_PBQ => 30;
40       const my $FLAG_REQ => 2;
41       const my $FLAG_FILT => 4+8+256+512+1024+2048; # 3852
42        
43       =item new
44        
45       Null constructor
46        
47       =cut
48        
49       sub new {
50   3 1 my ($class) = @_;
51       my $self = { };
52       bless $self, $class;
53       return $self;
54       }
55        
56       =item configure
57        
58       Set up the object for the current analysis.
59        
60       $genotype->configure('my.bam', $min_pbq, $min_mapq [, $fasta])
61        
62       =cut
63        
64       sub configure {
65   12 1 my ($self, $bam_file, $min_pbq, $min_mapq, $fasta) = @_;
66 50     $self->{'$fasta'} = $fasta if(defined $fasta);
67       $self->{'min_pbq'} = $min_pbq // $MIN_PBQ;
68       $self->{'min_mapq'} = $min_mapq // $MIN_MAPQ;
69       }
70        
71       =item _wrap_c_alleleCounter
72        
73       Generic function to generate allele counts from a chr\tpos\n formatted file (1-based)
74       by calling the C version.
75        
76       Calling function should provided an intermediate path for the output if data needs reformatting for
77       SNP6 style loci input files.
78        
79       =cut
80        
81       sub _wrap_c_alleleCounter {
82   12   my ($self, $hts_file, $out_file, $clean_loci) = @_;
83       my $command = sprintf _alleleCounter_c().
84       ' --loci-file=%s'.
85       ' --hts-file=%s'.
86       ' --output-file=%s'.
87       ' --min-base-qual=%d'.
88       ' --min-map-qual=%d'.
89       ' --required-flag=%d'.
90       ' --filtered-flag=%d',
91       ($clean_loci, $hts_file, $out_file,
92       $self->{'min_pbq'}, $self->{'min_mapq'},
93       $FLAG_REQ, $FLAG_FILT);
94 50     if(defined $self->{'$fasta'}) {
95       $command .= ' --ref-file='.$self->{'$fasta'};
96       }
97 50     if($ENV{ALLELE_C_SILENT}) { # only used for test harness
98       $command .= ' 2> /dev/null'
99       }
100 50     system($command) && die $!;
101       return;
102       }
103        
104       =item get_full_snp6_profile
105        
106       Writes tab seperated allelic counts and depth to specified FH
107       Uses all snps defined in file used by ngs_cn (format slightly different)
108        
109       =cut
110       sub get_full_snp6_profile {
111   6 1 my ($self, $bam_file, $out_file, $loci_file, $min_pbq, $min_mapq, $fasta) = @_;
112       $self->configure($bam_file, $min_pbq, $min_mapq, $fasta);
113        
114       my %stored;
115       # process the original loci file
116       my $tmpdir = tempdir( CLEANUP => 1 );
117       my $tmp_loci = $tmpdir.'/loci_tmp.out';
118       my $tmp_out = $tmpdir.'/out_tmp.out';
119 50     open my $N_LOC, '>', $tmp_loci or croak "Unable to open $tmp_loci for writing: $OS_ERROR\n";
120 50     open my $SNP6, '<', $loci_file or croak "Unable to open $loci_file for reading: $OS_ERROR\n";
121       while(my $line = <$SNP6>) {
122       chomp $line;
123       my ($chr, $pos, undef, undef, $allA, $allB) = split /\s/, $line;
124       $stored{"$chr:$pos:A"} = uc $allA;
125       $stored{"$chr:$pos:B"} = uc $allB;
126       printf $N_LOC "%s\t%d\n", $chr, $pos;
127       }
128       close $N_LOC;
129       close $SNP6;
130        
131       _wrap_c_alleleCounter($self, $bam_file, $tmp_out, $tmp_loci);
132        
133 50     open my $cfh, '<', $tmp_out or croak "Unable to open $loci_file for reading: $OS_ERROR\n";
134 50     open my $ofh, '>', $out_file or croak "Unable to open $out_file for writing: $OS_ERROR\n";
135       # header
136 50     print $ofh "#CHR\tPOS\tCount_Allele_A\tCount_Allele_B\tGood_depth\n" or croak "Failed to write line: $OS_ERROR\n";
137        
138       while(my $line = <$cfh>) {
139 100     next if($line =~ m/^#/);
140       chomp $line;
141       my ($chr, $pos, $c_a, $c_c, $c_g, $c_t, $depth) = split /\t/, $line;
142       my %tmp = ('A', $c_a, 'C', $c_c, 'G', $c_g, 'T', $c_t);
143       printf $ofh "%s\t%d\t%d\t%d\t%d\n", $chr,
144       $pos,
145       $tmp{$stored{"$chr:$pos:A"}},
146       $tmp{$stored{"$chr:$pos:B"}},
147       $depth;
148       }
149       close $cfh;
150       close $ofh;
151       return 1;
152       }
153        
154       =item get_full_loci_profile
155        
156       Writes tab seperated allelic counts and depth to specified FH
157       Uses all loci defined in specified file
158        
159       =cut
160       sub get_full_loci_profile {
161   6 1 my ($self, $bam_file, $out_file, $loci_file, $min_pbq, $min_mapq, $fasta) = @_;
162       $self->configure($bam_file, $min_pbq, $min_mapq, $fasta);
163       _wrap_c_alleleCounter($self, $bam_file, $out_file, $loci_file);
164       return 1;
165       }
166        
167       =item gender_chk
168        
169       Writes the chromosome name for the Male sex chromosome as defined by loci file and 'Y/N'
170       indicating presence of any of the SNPs. E.g.
171        
172       chrX Y
173        
174       or
175        
176       X N
177        
178       =cut
179       sub gender_chk {
180   0 1 my ($self, $bam_file, $out_file, $loci_file, $min_pbq, $min_mapq, $fasta) = @_;
181       $self->configure($bam_file, $min_pbq, $min_mapq, $fasta);
182       my $tmpdir = tempdir( CLEANUP => 1 );
183       my $tmp_out = $tmpdir.'/gender_chk.out';
184       _wrap_c_alleleCounter($self, $bam_file, $tmp_out, $loci_file);
185        
186       my $sex_chr;
187       my $is_male = 'N';
188 0     open my $fh, '<', $tmp_out or croak 'Unable to open '.$tmp_out.' for reading';
189       while(my $line = <$fh>) {
190 0     next if($line =~ /^#/);
191       chomp $line;
192       my ($chr, $pos, $depth) = (split /\t/, $line)[0,1,-1];
193 0     if(defined $sex_chr) {
194 0     die "Only loci expected on the 'male' sex chromosome should be included in: $loci_file\n\tYou have $sex_chr & $chr so far!\n" if($chr ne $sex_chr);
195       }
196       else {
197       $sex_chr = $chr;
198       }
199 0     if($depth > 5) {
200       $is_male = 'Y';
201       # technically we could stop here, but we should check all the chrs to make sure this isn't the wrong LOCI file
202       }
203       }
204       close $fh;
205        
206 0     open my $ofh, '>', $out_file or croak 'Unable to open '.$out_file.' for writing';
207       printf $ofh "%s\t%s\n", $sex_chr, $is_male;
208       close $ofh;
209       return;
210       }
211        
212       sub _alleleCounter_c {
213   12   my $l_bin = $Bin.'/../../c/bin';
214       my $prog = 'alleleCounter';
215       my $path = File::Spec->catfile($l_bin, $prog);
216 50     $path = which($prog) unless(-e $path);
217 50     die "Failed to find alleleCounter in path or local bin folder ($l_bin)\n\tPATH: $ENV{PATH}\n" unless(defined $path && -e $path);
218       return $path;
219       }
220        
221       1;