#!/usr/bin/perl # Jana K. Shen and Jason A. Wallace # University of Oklahoma # July 2009 # compute pKa values using HH or generalized HH equation # cptpKa.pl [name] [.sx file 1] [.sx file 2]... # name = name of protein or project # output: all.pka (if multiple titration data points are given) # file.pka (if only one titration data point is given) while (@ARGV) { $file = shift @ARGV; push (@files, $file); } $name = shift @files; $nfile = @files; print "Total number of .sx files is $nfile\n"; system "rm -f $name-res-*.dat $name-res-*.fit"; # For only .sx, pKa is calculated using HH equation # S = 1/(1+10^(pka-pH)) if ($nfile == 1){ $nres = 0; $oldres = 0; open INP, "<$file"; $pKaFile = $file; $pKaFile =~ s/\.sx/\.pka/; if (-e "$pKaFile") {system "rm -f $pKaFile";} open (OUT,">>$pKaFile"); while () { (/#/) && (next); @L = split(/\s+/,$_); ($L[0] !~ /\d/) && (shift @L); ($L[1] == $oldres) && (next); $nres++; $resnum = $L[1]; $pH = $L[2]; $S = $L[3]; $oldres = $L[1]; if (($S < 0.1) || ($S > 0.9)) { printf OUT "%4d NAN\n", $resnum; next; } $S = 1/$S - 1; $pKa = $pH + log($S)/log(10); printf OUT "%4d %5.2f 1.0\n", $resnum, $pKa; } print "Total number of titrating residues is $nres\n"; exit; } # If there are two or more .sx files, # we first collect S values at different pH for each titrating residue foreach $file (@files) { $nres = 0; $oldres = 0; open INP, "<$file"; while () { (/#/) && (next); @L = split(/\s+/,$_); ($L[0] !~ /\d/) && (shift @L); ($L[1] == $oldres) && (next); $nres++; $resnum = $L[1]; $pH = $L[2]; $S = $L[3]; $oldres = $L[1]; $ires[$nres] = $resnum; $outf = "$name-res-$resnum.dat"; open (OUT, ">>$outf"); printf OUT "%4.1f %5.2f\n", $pH, $S; } } close OUT; print "Total number of titrating residues is $nres\n"; # Prepare Xmgrace template file for fitting to generalized HH equation # S = 1/(1+10^(n*(pka-pH))) if (-e "xmgrace_template") {system "rm -f xmgrace_template";} open (OUT, ">> xmgrace_template"); print OUT 'fit formula "y = 1/(1 + 10^( a0*(a1-x)))"'; print OUT "\nfit with 2 parameters"; print OUT "\nfit prec 0.001"; print OUT "\na0 = 1.0\na0 constraints on"; print OUT "\na0min = 0.1\na0max = 2.0\n"; print OUT "\na1 = 4.0\na1 constraints on"; print OUT "\na1min = 0\na1max = 14\nnonlfit (s0, 5)\n"; close OUT; # Compute pKa's using Xmgrace fitting for ($nn=1; $nn <= $nres; $nn++) { $resnum = $ires[$nn]; system "xmgrace -hardcopy $name-res-$resnum.dat -batch xmgrace_template > $name-res-$resnum.fit"; system "rm -f $name-res-$resnum.ps"; } # Make gnuplots for S vs pH with the fitted curve and data points # We first prepare a gnuplot template for plotting if (-e "gnuplot_template") {system "rm -f gnuplot_template";} open (OUT, ">> gnuplot_template"); print OUT "set term post\n"; print OUT "set output 'NAME.ps'; set title 'NAME'; set pointsize 2.5\n"; print OUT "set xlabel 'pH'; set ylabel 'S'; set xrange [0:14]; set yrange [-.1:1.1]\n"; print OUT "set key right bottom\n"; print OUT "plot 'NAME.dat' ti 'data' pt 2, 1 / (1 + 10**(HILL*(PKA-x))) ti 'fit'\n"; close OUT; # Write out pKa values in a file and make gnuplot plots if (-e "$name.pka") {system "rm -f $name.pka"}; open (OUT, ">>$name.pka"); for ($nn=1; $nn <= $nres; $nn++) { $resnum = $ires[$nn]; open INP, "< $name-res-$resnum.fit"; $fit = 0; while () { if (/Computed values/) {$fit = 1; next;} if ($fit == 1) { if (/a1 = /) { @L = split(/\s+/,$_); ($L[0] !~ /\w/) && (shift @L); $pka = $L[2]; print "res $resnum pka $pka\n"; } if (/a0 = /) { @L = split(/\s+/,$_); ($L[0] !~ /\w/) && (shift @L); $hill = $L[2]; print "res $resnum hill $hill\n"; } } if (/Correlation/) { @L = split(/\s+/,$_); ($L[0] !~ /\w/) && (shift @L); $correl = $L[2]; print "res $resnum correlation $correl\n";print "\n"; } } printf OUT "%4d %5.2f %5.2f %5.2f\n", $resnum, $pka, $hill, $correl; system "sed s/NAME/$name-res-$resnum/g gnuplot_template | sed s/HILL/$hill/ | sed s/PKA/$pka/ > $name-gnuplot"; system "gnuplot $name-gnuplot"; } system "rm -f $name-gnuplot gnuplot_template xmgrace_template"; exit;