#!/usr/bin/perl -w
#
# findModifiedPeaklists.pl
#
# Some modifications fall off very easily (O-GlcnAc) and as a consequence
# modified AA don't appear in spectra, but the additional mass is in the
# pre-cursor mass. This script looks for the modification that has fallen
# off and keeps only those peaklists with the correct peak. Then the
# modification's mass - 1 is subtracted from the PEPMASS and the new set
# of peaklists returned for submission to library searches.
#
# ver date author description
# 0.7.3 2011.01.23 Aenoch Fix to run on Windows outside of cygwin.
# 0.7.2 2010.08.26 Aenoch Output peaklists that have --peak_mass
# present. (0.7.1 was never finished)
# 0.7 2009.12.21 Aenoch Add filter for min_intensity as % of base
# peak and of total intensity, allow
# tolerances in ppm.
# 0.6 2006.03.23 Aenoch Adapt to work on LTQ files.
# 0.4 2005.01.10 Aenoch Another correction for looking for
# precursor with +3 and higher charge.
# 0.3 2004.11.30 Aenoch Correct looking for precursor with
# charge state, increase tolerance default.
# 0.1 2004.08.17 Aenoch Initial version.
use strict;
use Getopt::Long;
# $/ = "\r\n";
my ( $param_peak_mass,
$param_peak_mass_tolerance,
$param_add_filename_to_title,
$param_find_precursor,
$param_precursor_mass_tolerance,
$param_count,
$param_help,
$param_verbose,
$param_version,
$param_print,
$param_output_filename,
$param_min_intensity,
$param_min_intensity_type,
$param_oglcnac
) = ();
&GetOptions( "peakmass=f" => \$param_peak_mass,
"count=n" => \$param_count,
"oglcnac" => \$param_oglcnac,
"output=s" => \$param_output_filename,
"min_intensity=f" => \$param_min_intensity,
"min_intensity_type=s" => \$param_min_intensity_type,
"tolerance=f" => \$param_peak_mass_tolerance,
"find_precursor" => \$param_find_precursor,
"print" => \$param_print,
"precursor_tolerance=f" => \$param_precursor_mass_tolerance,
"add_filename_to_title" => \$param_add_filename_to_title,
"verbose" => \$param_verbose,
"version" => \$param_version,
"help" => \$param_help );
my $version = "0.7.3";
if ( $param_version ) {
print "version = $version\n";
exit;
}
if ( $param_help or $#ARGV == -1 ) {
$0 =~ /[^\/]*$/;
my $program_name = $&;
die <<EOF;
usage: $program_name [parameters] mascot_tmp_file1 [mascot_tmp_file2...]
Perl script to find peaklists that contain peaks with --peakmass masses and
extract them from Mascot.dll generated peaklists. Subtract ( --peakmass -
1 ) / charge from the PEPMASS value to simulate the modification falling
off the precursor peptide. Output to STDOUT or to the named file. Can read
multiple tmp files; output concatenated.
Parameters ( [] optional; S == string; N == integer; F == floating point )
[--help] Print this message.
[--peakmass=F] Look for peaklists with a peak with this mass.
[--tolerance=F] Peaks +/- this tolerance will match --peakmass, default
of 0.03.
[--min_intensity=F] Minimum intensity in order to be called a peak.
Defaults to 0.0.
[--min_intensity_type=S]
How to use the min_intensity. If "absolute", then use
the min_intensity as it. If "percent_base_peak", then
only use if greater than or equal to min_intensity
percent of the MSMSM base peak. If "percent_tic", then
only use if greater than or equal to min_intensity
percent of the total MSMS ion current.
Default to "absolute".
[--find_precursor] Look for the precursor mass after the modification has
fallen off.
[--precursor_tolerance=F]
Accept precursor mass without modification +/- this
tolerance. Default to 0.4.
[--output=S] Output to the named file.
[--oglcnac] Subtract the mass for O-GlcnAC modifications, overrides
the --peakmass parameter with 204.08; --tolerance
parameter with 0.03 can be overridden.
[--add_filename_to_title]
Append the filename to the TITLE= line in the peaklist.
[--print] Quick hack to print out the peaklists that match the
--peakmass but not necessarily the --find_precursor.
In fact, don't use this with --find_precursor or those
peaklists will be duplicated.
EOF
}
our $VERBOSE = ( defined $param_verbose ? $param_verbose : 0 );
my ( $line, $pepmass, $reduced_pepmass, $charge, $elution, $file, $peak,
$output, $pepmass_count ) = ();
my @pl = ();
if ( $param_output_filename ) {
open WH, ">$param_output_filename" or die "cannot open file \"$param_output_filename\" for write\n";
$output = \*WH;
}
else {
$output = \*STDOUT;
}
if ( $param_oglcnac ) {
$param_peak_mass = 204.08;
print STDERR "using oglcnac definitions with --peakmass=$param_peak_mass\n" if $::VERBOSE;
}
if ( not $param_peak_mass ) {
die "need to specify --peakmass or a named modification (e.g., --oglcnac)\n\n";
}
#
# set defaults
#
$param_min_intensity = ( $param_min_intensity ? $param_min_intensity : 0.0 );
$param_min_intensity_type = ( $param_min_intensity ? $param_min_intensity : "absolute" );
$param_count = ( defined $param_count ? $param_count : 1 );
$param_peak_mass_tolerance = ( defined $param_peak_mass_tolerance ?
$param_peak_mass_tolerance :
0.03 );
if ( $param_find_precursor ) {
$param_precursor_mass_tolerance = ( defined $param_precursor_mass_tolerance ? $param_precursor_mass_tolerance : 0.4 );
}
if ( $::VERBOSE ) {
print STDERR <<EOF;
parameters:
--peakmass=$param_peak_mass
--tolerance=$param_peak_mass_tolerance
--min_intensity=$param_min_intensity
--min_intensity_type=$param_min_intensity_type
--find_precursor $param_find_precursor
--precursor_tolerance=$param_precursor_mass_tolerance
--output=$param_output_filename
--add_filename_to_title=$param_add_filename_to_title
EOF
}
$pepmass_count = 0;
foreach $file ( @ARGV ) {
open RH, $file or die "cannot open file \"$file\" for read\n";
if ( $::VERBOSE ) {
print STDERR "FILENAME = $file\n";
}
print STDERR "looking for peaklist\n";
while ( ($pepmass, $charge, $elution) = &get_next_peaklist( \*RH, \@pl ) ) {
if ( $pepmass and $charge ) {
$pepmass_count++;
if ( &has_peak( $param_peak_mass, $param_peak_mass_tolerance, \@pl ) >= &min_intensity( \@pl ) ) {
## if ( &has_peak( $param_peak_mass, $param_peak_mass_tolerance, \@pl ) >= $param_min_intensity ) {
print STDERR "found peak at elution = $elution\n" if $::VERBOSE;
if ( $param_print ) {
print $output @pl;
}
if ( $param_find_precursor ) {
if ( $charge > 1 ) {
$peak = ( $pepmass * $charge - ( $charge - 2 ) - ( $param_peak_mass - 1 ) ) / ( $charge - 1 );
unless ( &has_peak( $peak, $param_precursor_mass_tolerance, \@pl ) >= $param_min_intensity ) {
@pl = ();
next;
}
print STDERR "found precursor after losing modification at elution = $elution\n" if $::VERBOSE;
&subtract_thompson_from_peaklist( ($param_peak_mass - 1) / $charge, \@pl );
if ( $param_add_filename_to_title ) {
&add_to_title( \@pl, $file );
}
print $output @pl;
}
}
}
}
else {
# print $output @pl;
}
@pl = ();
}
close RH;
}
close $output if $param_output_filename;
exit 0;
## end main ##
sub get_next_peaklist {
my $rh = shift @_;
my $array = shift @_;
my $line;
my $return_value = 0;
my $index = 0;
my $pepmass = 0;
my $charge = 0;
my $elution = 0;
ION: while ( $line = <$rh> ) {
SW: for ( $line ) {
( /^BEGIN IONS/ ) and do {
# not the first read line
if ( $index ) {
# rewind the file handle
seek $rh, - length( $line ), '01';
$return_value = "not_peaklist";
last ION;
}
};
( /^PEPMASS=/ ) and do {
$pepmass = $';
$pepmass =~ s/\r?\n//;
$pepmass =~ s/\s+.*//;
};
( /^CHARGE=/ ) and do {
$charge = $';
$charge =~ s/\r?\n//;
$charge =~ s/[+-]//;
};
( /^TITLE=Elution from: ([^ ]*)/ ) and do {
$elution = $1;
};
( /Elution: ([^ ]*) min/ ) and do {
$elution = $1;
};
( /\(rt=([\d\.]*)\)/ ) and do {
$elution = $1;
};
( 1 ) and do {
if ( $index == 0 ) {
}
$index++;
push @$array, $line;
};
( /^END IONS/ ) and do {
$return_value = "peaklist";
last ION;
};
}
}
if ( eof( $rh ) and not $return_value ) {
return ();
}
elsif ( $return_value eq "not_peaklist" ) {
return( 0, 0, 0 );
}
else {
return( $pepmass, $charge, $elution );
}
}
## end get_next_peaklist ##
sub subtract_thompson_from_peaklist {
my $thompson_to_subtract = shift @_;
my $pl = shift @_;
my ( $old_pepmass, $new_pepmass, $i ) = ();
for ( $i = 0; $i < scalar @$pl; $i++ ) {
if ( $$pl[$i] =~ /PEPMASS=/ ) {
$old_pepmass = $';
$old_pepmass =~ s/\r?\n//;
$old_pepmass =~ s/\s+.*//;
$new_pepmass = $old_pepmass - $thompson_to_subtract;
$$pl[$i] = "PEPMASS=$new_pepmass" . $/;
}
}
}
## end subtract_thompson_from_peaklist ##
sub has_peak {
my $target_mass = shift @_;
my $param_peak_mass_tolerance = shift @_;
my $pl = shift @_;
my ( $line, $mass, $intensity ) = ();
$charge =~ s/[+-]//;
foreach $line ( @$pl ) {
if ( $line =~ /^\d/ ) {
( $mass, $intensity ) = split /\s/, $line;
$intensity =~ s/\r?\n//;
if ( ( ($target_mass - $param_peak_mass_tolerance) <= $mass ) and
( $mass <= ($target_mass + $param_peak_mass_tolerance) ) ) {
return $intensity;
}
}
}
return -1;
}
## end has_peak ##
sub add_to_title {
my $pl = shift @_;
my $text = shift @_;
my $i = 0;
for ( $i = 0; $i < scalar @$pl; $i++ ) {
if ( $$pl[$i] =~ /TITLE=/ ) {
$$pl[$i] =~ s/\r?\n/ $text$&/;
}
}
}
## end add_to_title
sub min_intensity {
my $pl = shift @_;
if ( $param_min_intensity_type eq "percent_base_peak" ) {
my $base_peak = &find_base_peak( $pl );
if ( not defined $base_peak ) {
$base_peak = 0;
}
return( $param_min_intensity * $base_peak );
}
if ( $param_min_intensity_type eq "percent_tic" ) {
my $tic = &find_tic( $pl );
if ( not defined $tic ) {
$tic = 0;
}
return( $param_min_intensity * $tic );
}
# This is the default. If none of the above match, then use this.
return $param_min_intensity;
}
## end min_intensity
sub find_base_peak {
my $pl = shift @_;
my ( $line, $mz, $intensity, $max_intensity ) = ();
$max_intensity = 0;
foreach $line ( @$pl ) {
$line =~ s/\r?\n//;
if ( $line =~ /^\d/ ) {
( $mz, $intensity ) = split /\s+/, $line;
if ( $intensity > $max_intensity ) {
$max_intensity = $intensity;
}
}
}
return $max_intensity;
}
## end find_base_peak
sub find_tic {
my $pl = shift @_;
my ( $line, $mz, $intensity, $total ) = ();
$total = 0;
foreach $line ( @$pl ) {
$line =~ s/\r?\n//;
if ( $line =~ /^\d/ ) {
( $mz, $intensity ) = split /\s+/, $line;
$total += $intensity;
}
}
return $total;
}
## end find_tic
__END__