#!/usr/bin/perl
#
#       -------------------------------------------------------------
#	Author:  Brian Hargreaves	Dec 4/02.
#       -------------------------------------------------------------
#
#	===>>> Applescript Version <<<===
#
#	(Takes inputs from STDIN)
#
#	This program takes the output from PubMed, and makes a 
#	bibtex .bib entry.  It is designed to use the data from
#	the clipboard, after you copy highlighted citations from
#	the results of a PubMed search.
#
#	This script parses the entries, by looking for the
#	calendar year.  Entries are separated by some text with
#	a "]" between them, as will be the case from PubMed.
#
#	The script can also take a .bib file and try to use
#	the journal abbreviations (@string...) in the .bib file
#	instead of the Index Medicus journal names.
#
#	- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#
# -----------------------------------------------------------------------
# 	Full Instructions
# -----------------------------------------------------------------------
#
#	1.  Go to http://www.pubmed.org
#
#	2.  Fill out the search string to find your paper, for example 
#		"Haacke EM, Wielopolski, Modic"
#
#	3.  Highlight from (including) the first author name to
#	   	the line with PMID, copy and paste into a text
#	   	file (called "ref" for this example).
#
#	4.  Run "pubmedbib.pl ref Haacke90"
#
#	5.  This should spit out the following text, which you can
#	    	cut and paste into your .bib file:
#
#	    @article{haacke90, 
#	        author=    {E.~M.~Haacke and P.~A.~Wielopolski and 
#	    	                    J.~A.~Tkach and M.~T.~Modic},
#	        title=     {Steady-state free precession imaging in the 
#	        		presence of motion: application for 
#	        		improved visualization of the cerebrospinal 
#	        		fluid},
#               journal=   rad,
#               volume=    {175}, 
#               number=    {2}, 
#               pages=     {545--552}, 
#               year=      {1990} 
#           }
#
#
#	6.  Write to your local scientific society and suggest that
#	    they continue to support LaTeX.
#
# -----------------------------------------------------------------------
#
#	NOTES:
#		I'm NOT much of a PERL programmer!  I have no doubt that
#		a competent PERL programmer could do this in 50 lines or
#		less!
#		
#		Feel free to edit this for your own liking.
#
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
# Start of CVS Log Entries 
# -----------------------------------------------------------------------
#
#	$Log: pubmedbib.pl,v $
#	Revision 1.5  2004/08/26 20:44:35  brian
#	minor edits
#	
#
# -----------------------------------------------------------------------
# End of CVS Log Entries 
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
#

$err = 0;			
$numentries = 0;		# Number of entries found.
$delimiter = "---***---\n";	# Used to separate .bib entries in output.

if (@ARGV)
	{
	@abbreviationslist = ReadBibAbbreviations(shift(@ARGV));
	}

if ($err eq 0)
  
   {
   $line = <STDIN>;

   while (defined($line))
    {
	   #
	   # -------- AUTHOR LIST ----------
	   #
	   #
    $done = 0;

    	{
    	if (defined($line))
    		{
		#if ($line=~ /([A-Za-z].+)$/)  # Trim up to first letter.
			#{
			#$line = $1;
			#}
		$line =~ s/([A-Za-z].+)$/$1/ig;	# Trim up to first letter.
	
		# Read in and add to $line until we have a year.

		$moreline = " ";
		while ((!($line =~ /20[0-9][0-9]/ )) and
		       (!($line =~ /19[0-9][0-9]/ )) and
			(defined($moreline)) )
			{
			$moreline = <STDIN>;
			$line = $line . $moreline;
			}	

		#print("Line to parse:  <$line> \n\n");

		@fields = ParseCitation($line);

		$line = $fields[7];		# Keep residual text.

		#print("Unused: $line \n");

		@authorlist = ParseAuthors($fields[0]);
		shift(@authorlist);

		$title = $fields[1];
		$location = $fields[2];
		$year = $fields[3];
		$vol = $fields[4];
		$num = $fields[5];
		$pages = $fields[6];

		}
	else
		{
		$err=1;
		}
	
    	
	if (defined($title))
		{
		$title = CleanUpTitle($title);
  		$location = AbbreviateTitle($location,@abbreviationslist); 
		}
	else
		{
		$err=1;
		}

	}

	



    if ($err eq 0)
        {
	#print("Author 1 is $authorlist[0]. \n");

	if ($authorlist[0] =~ /~([A-Z][a-zA-Z]+)/)
		{
		$authlname = $1;
		if ($year =~ /([0-9][0-9][0-9][0-9])/ )
			{
			$label = $authlname . $1;
			}
		}
	if (!defined($label))
		{
		$label = "New-Reference";
		}	



	# ---------- Print BibTex Entry ------------
	#
	print("\n");
	$numentries = $numentries + 1;
	if ($numentries > 1)
		{
		print("$delimiter");
		}
        print("\@article{$label, \n");
		#
    		# Print author list, trying to keep from line wrapping.
		#
        print("    author=    {");
        $linepos = 20;		# keep track of position.
        while(scalar(@authorlist)>0)
    	    {
	    $thisauthor = shift(@authorlist);
	    if (scalar(@authorlist)>0)
		{
		$thisauthor = $thisauthor . " and ";
		}
	    $linepos = $linepos + length($thisauthor);
	    if ($linepos > 70)
		{
		print("\n                ");
		$linepos=20 + length($thisauthor);
		}
	    print("$thisauthor");
	    }
        print("},\n");

		#
    		# Print title, trying to keep from line wrapping.
		#
        @titlewords = String2Words($title);

        print("    title=     {");
        $linepos = 20;		# keep track of position.
        while(scalar(@titlewords)>0)
    	    {
	    $thisword = shift(@titlewords);
	    $linepos = $linepos + length($thisword);
	    if ($linepos > 70)
		{
		print("\n                ");
		$linepos=20 + length($thisword);
		}
	    if (scalar(@titlewords)>0)
		{
		print("$thisword ");
		}
	    else
		{
		print("$thisword");
		}
	    }
        print("},\n");
    
        print("    journal=   $location,\n");
        print("    volume=    $vol, \n");
        print("    number=    $num, \n");
        print("    pages=     $pages, \n");
        print("    year=      $year \n");
        print("}\n\n");

	$done=0;


	# ----- Check if line has a Number:  (ie 2:) in it.  If so,
	#	Get rid of it.  If not, discard and read in next line.
	

	while (defined($line) and (!($line =~ /[0-9]\:(.*)$/ )) )
		{
		$line = <STDIN>;
		}

	if ($line =~ /[0-9]\:(.*)$/ )
		{
		$line = $1;
		}


	#while(scalar(@authorlist)>0)
		#{
		#shift(@authorlist); 
		#}
    	#while(scalar(@titlewords)>0)
		#{
		#shift(@titlewords); 
		#}
    	#$title="";
    	#$author="";


	if (defined($line))
		{
		$done=0;
		}
	else
		{
		$done=1;
		}
        }
	
	#print("Now Examining residual... $line \n\n");
   
    if ($err == 1)
	{
	$line = <STDIN>;
	} 
    }
   }




sub ParseAuthors
#
#	Takes a string and returns the author names in an array.
#	
#	OUTPUT:
#		0 - residual text after period (.)
#		1-n - author names, like J.~A.~Smith
#
	{
	my ($authline) = $_[0];
	my (@authlist);
	my ($done) = 0;

	#while (scalar(@authlist)>0)
		#{
		#pop(@authlist);
		#}

	while ($done eq 0)
	    {
		    #print("Parsing...\n");
	    if ($authline =~ /\s*(\S+)\s([A-Z]+)([\,\.])(.*)/)
		{
		$authlast = $1;
		$authinit = $2;


		if ($3 eq ".")
			{
			$done=1;
			}
		$authline = $4;
		#print("Author = $1, $2   Sep = <$3>  Rem = <$4> \n");
		#print("done = $done, authline = $authline \n");
		$authinit =~ s/([A-Z])/$1\.\~/ig;
		$fullauth = $authinit . $authlast;
		#print("Bib author:  $fullauth \n");

		push(@authlist,$fullauth);
		}
	    else
		{
		$done=1;
		}
	    }
	if (!defined($authline))
		{
		$authline = " ";	# Put anything there!
		}
	unshift(@authlist,$authline);
	return(@authlist);
	}




sub String2Words
#
#	Takes a string and returns words in an array.
#	
#
	{
	my ($line) = $_[0];
	while (scalar(@wordlist)>0)
		{
		pop(@wordlist);
		}

	while (length($line)>0)
		{
			#print("line: $line\n");
		if ($line =~ /^\s*(\S+)(\s*.*)$/)
			{
			$word=$1;
			$line = $2;
			push(@wordlist,$word);
			#print("Word:  $word \n");
			}
		else
			{
			$line = "";
			}
		}
	return(@wordlist)
	}



sub dumparray

#
	{
	my (@arr) = @_;
	while (scalar(@arr) > 0)
		{
		$elem = shift(@arr);
		print("array ->  $elem \n");
		}
	return 0;
	}


sub ReadBibAbbreviations
#
#	Subroutine opens the given bib file.  Looks for 
#	lines that match @string{abb = "Full Name "}
#
#	Returns an array with ("abb1","Full Name",abb2,"Full Name2", ...)
#
	{
	my ($fname) = $_[0];


	my ($noerr) = (defined(open(BIBFILE,$fname)));
	my ($abblist);
	my ($fullname);
	my ($abbrev);
	if ($noerr==1)
		{
		$line = <BIBFILE>;
		while (defined($line))
			{
			if ($line=~/^\@string\{([a-z]*)\s*\=\s*\"(.*?)\"\s*\}/)
				{
				push(@abblist,$1);
				$abbrev = $1;
				$fullname = $2;
				$fullname =~ s/\s$//ig;
				$fullname = "\"" . $fullname . "\"";

				push(@abblist,$fullname);
				#print("Abbrev:  <$abbrev> \n");
				#print("Full:    <$fullname> \n");
				}
			$line = <BIBFILE>;
			}
		close (BIBFILE);
		}
	return(@abblist);
	}


sub ParseCitation
#
#	Subroutine parses the author/title/journal - splits
#	it to @result = ($authorlist, $title, $journal)

	{
	my ($parseline) = $_[0];
	my ($authtj);
	my ($volnumpage);
	my ($authorlist);
	my ($title);
	my ($journal);
	my ($year);
	my ($volume);
	my ($number);
	my ($pages);
	my ($residualtext);
  	my (@retval);
	

	if ($parseline=~ /^(.*?)\s((19|20)[0-9][0-9])\s.*?;(.*)$/ )
  	    {
	    $authtj = $1;
	    $year = "{$2}";
	    $volnumpage = $4;
			
	    #print("1: $1,  2:$2,  3:$3,  4:$4,  5:$5 \n\n");


	    if ($authtj =~ /^(.+?)\.\s(.+(\.|\?|\!))\s(.+?)\.\s*$/ )
		{
		$authorlist = "$1.";
		$title = $2;
           	$journal = $4;

		$title =~ s/Related Articles,/ /ig;
		$title =~ s/Links/ /ig;
		$title =~ s/\s*(.*)$/$1/ig;

		$journal =~ s/\s*(.*)$/$1/ig;
		$journal = "{\"$journal\"}";


		}


	    if ($volnumpage =~ /^\s*([0-9]+)(.*)$/)
		{
		$vol = "{$1}";
		$rest = $2;
		if ($rest =~ /^\s*\(([0-9]+)\)(.*)$/)
		    {
		    $num = "{$1}";
		    $rest = $2;
		    }
		else
		    {
		    $num = "{}";
		    }
		if ($rest =~ /^\s*\:\s*([0-9]+)\-([0-9]+)(.*)$/)
		    {
		    $fpage = $1;
		    $lpage = $2;
			# Replace form 344-6 with
			# 344-346, etc.
			#
		    $lenfpage = length($fpage);
		    $lenlpage = length($lpage);
		    $dlen = $lenfpage - $lenlpage;
		    if ($dlen > 0)
			{
			$pref = substr($fpage,0,$dlen);
			$lpage = $pref . $lpage;
			}
		    $pages = "{$fpage--$lpage}";
		    $rest = $3;
		    }
		else
		    {
		    $pages = "{}";
		    }
		}
	    }
	if (1 eq 0)
		{
    		print("authtj:  <$authtj> \n");
		print("Authors:  <$authorlist> \n ");
		print("Title  :  <$title> \n ");
		print("Journal:  <$journal> \n ");
    		print("VolNumPage:  <$volnumpage> \n");
		print("Volume:  <$vol> \n");
		print("Number:  <$num> \n");
		print("Pages:  <$pages> \n");
		}

	push(@retval,$authorlist);
	push(@retval,$title);
	push(@retval,$journal);
	push(@retval,$year);
	push(@retval,$vol);
	push(@retval,$num);
	push(@retval,$pages);
	push(@retval,$rest);

	return (@retval);
	}



sub CleanUpTitle
#
#	Subroutine removes some stuff from the title,
#	and trie to preserve capitalization in LaTeX
#
	
        {
	my ($title) = $_[0];

        #Remove whitespace and ending period.

        $title =~ s/^(\s*)(\S.+)(\.)(\s*)$/$2/ig;
        #print("Adding {}s to title: $title \n");

        #
        #       Try to keep certain things capitalized.
        #
        $title =~ s/(T[1|2])/\{$1\}/ig;         # T1 or T2 -> {T1/2}
        $title =~ s/(MR\s)/\{MR\} /ig;          # {MR}
        $title =~ s/(MRI)/\{MRI\}/ig;           # {MRI}
        $title =~ s/(FSE)/\{FSE\}/ig;           # {MRI}
        $title =~ s/(GRE)/\{GRE\}/ig;           # {MRI}
        $title =~ s/(RARE)/\{RARE\}/ig;         # {MRI}
        $title =~ s/(dGE\{MRI\}C)/\{dGEMRIC\}/ig;       # {dGEMRIC}
        $title =~ s/([0-9]\s*T)/\{$1\}/ig;      # 1.5 T or 3 T
        $title =~ s/(DTPA)/\{DTPA\}/ig; # {dGEMRIC}
        $title =~ s/(Gd)/\{Gd\}/ig;     # {dGEMRIC}

        if ($title =~/^(\s*)Abstract(.*)$/ )    # Remove "Abstract"
                {
                $title = $2;
                }

        #print("Title: <$title> \n");
	return ($title);
	}


sub AbbreviateTitle
#
#	Subroutine tries to abbreviate title (input argument 0), 
#	using BibTex abbreviations in arguments 1-n.
#	
    {
    my ($location) = shift(@_);
    my (@abbreviationslist) = @_;
    my ($count);
    my ($abb);
    my ($fullname);

    	# ---------- Try to abbreviate journal title.  Would be nice
	# 		to pass the .bib file and search it, but...
	#
    if (0 eq 1)
        {
    	$location =~ s/^\{\"Radiology\"\}/rad/ig;		# Radiology.
    	$location =~ s/^\{\"Magn\ Reson\ Med"\}/mrm/ig;		# MRM.
    	$location =~ s/^\{\"J\ Cardiovasc\ Magn\ Reson"\}/jcmr/ig;	# JCMR.
    	$location =~ s/^\{\"J\ Magn\ Reson\ Imaging"\}/jmri/ig;	# JMRI.
    	$location =~ s/^\{\"AJR Am J Roentgenol"\}/ajr/ig;	# AJR.
    	$location =~ s/^\{\"J\ Magn\ Reson"\}/jmr/ig;	# JMRI.
	$location =~ s/^\{\"Radiographics"\}/radgraph/ig;	# Radiographics

	#$location =~ s/^\{\""\}//ig;	# Template to add these.
	}

    if (1 eq 1)
	# ---- Tries to abbreviate title from @string{} entries
	# ---- that are in the .bib file passed as a parameter.
	{
	$count = 0;
	while ($count < scalar(@abbreviationslist))
		{
		$abb = $abbreviationslist[$count];
		$fullname = $abbreviationslist[$count+1];

		$fullname = "\^\\\{" . $fullname . "\\\}";
		$location =~ s/$fullname/$abb/ig;
		$count = $count + 2;
		}
	}

    return($location); 
    }
