#!/usr/bin/perl
#
#       -------------------------------------------------------------
#	pubmedbib.pl
#
#	Author:  Brian Hargreaves	Dec 4/02.
#       -------------------------------------------------------------
#
#	This program takes the output from PubMed, and makes a 
#	bibtex .bib entry.
#
#	- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#
#	Usage:  pubmedbib.pl  <text file> <label>  
#
#		
#	    INPUT:	
#		<text file> = file containing text from PubMed that 
#				begins with the first author, and includes
#				the line PMID ...
#				
#		<label>	= label to use in .bib file.
#	- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#
# -----------------------------------------------------------------------
# 	Full Instructions
# -----------------------------------------------------------------------
#
#	1.  Go to http://www.pubmed.org
#
#	2.  Fill out the search string to find your paper, for example 
#		"Haacke EM, Wielopolski, Modic"
#
#	3.  Highlight from (including) the first author name to
#	   	the line with PMID, copy and paste into a text
#	   	file (called "ref" for this example).
#
#	4.  Run "pubmedbib.pl ref Haacke90"
#
#	5.  This should spit out the following text, which you can
#	    	cut and paste into your .bib file:
#
#	    @article{haacke90, 
#	        author=    {E.~M.~Haacke and P.~A.~Wielopolski and 
#	    	                    J.~A.~Tkach and M.~T.~Modic},
#	        title=     {Steady-state free precession imaging in the 
#	        		presence of motion: application for 
#	        		improved visualization of the cerebrospinal 
#	        		fluid},
#               journal=   rad,
#               volume=    {175}, 
#               number=    {2}, 
#               pages=     {545--552}, 
#               year=      {1990} 
#           }
#
#
#	6.  Write to your local scientific society and suggest that
#	    they continue to support LaTeX.
#
# -----------------------------------------------------------------------
#
#	NOTES:
#		I'm NOT much of a PERL programmer!  I have no doubt that
#		a competent PERL programmer could do this in 50 lines or
#		less.
#		
#		Feel free to edit this for your own liking.
#
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
#



# =========== GET Command Line Arguments  ===========
$err = 0;

if (@ARGV)
    {
    $pubmedfile = shift (@ARGV);
    $label = shift (@ARGV);
    }
else
    {
    print("Not enough arguments - See pubmedbib.pl text for usage.\n\n");
    $err = 1;
    }


if ($err eq 0)
  {
  if (defined(open(PMFILE,$pubmedfile)))
    {
	   #
	   # -------- AUTHOR LIST ----------
	   #
	   #
    $line = <PMFILE>;
    $done = 0;

    while ($done eq 0)
    	{
    	if (defined($line))
    		{
		if ($line =~ /\S/)
			{
			@authorlist = ParseAuthors($line);
			if (scalar(@authorlist)>0)
				{
				$done = 1;
				}
			}
		$line = <PMFILE>;
		}
	else
		{
		$done = 1;
		$err = 1;
		}
	}
		#
		# ------ Skip to Title ------
		#
    if ($err eq 0)
    	{
        $done = 0;
	}

    while ($done eq 0)
        {
	if (defined($line))
		{
		if ($line =~ /Related Articles/)
			{
			$line = <PMFILE>;
			}
		else
		    {
	   	    if ($line =~ /\S/)
		    	{
			$done = 1;
			}
		    else
		    	{
			$line = <PMFILE>;
			}
		    }
		}
	else
		{
		$done = 1;
		$err = 1;
		}
	}
    if ($err eq 0)
    	{
	if (defined($line))
		{
		$title = $line;
			#Remove whitespace and ending period.
		$title =~ s/^(\s*)(\S.+)(\.)(\s*)$/$2/ig;	
		#
		#	Try to keep certain things capitalized.
		#	
		$title =~ s/(T[1|2])/\{$1\}/ig;		# T1 or T2 -> {T1/2}
		$title =~ s/(MR\s)/\{MR} /ig;		# {MR}
		$title =~ s/(MRI)/\{MRI}/ig;		# {MRI} 
		$title =~ s/([0-9]\s*T)/\{$1\}/ig;	# 1.5 T or 3 T

		#print("Title: <$title> \n");
	
		$line = <PMFILE>;
		$done = 0;
		}
	else
		{
		$err=1;
		}

	}
    if ($err eq 0)
    	{
	if (defined($line))
		{
		$line =~ s/^(\s*)(\S.+)(\.)(\s*)$/$2/ig;
		if ($line =~ /^(.+)\.\s*([0-9][0-9][0-9][0-9]).*\;(.*)$/)
			{
			$location = "{\"$1\"}";
			$year = "{$2}";
			$volnumpage = $3;

			if ($volnumpage =~ /^\s*([0-9]+)(.*)$/)
				{
				$vol = "{$1}";
				$rest = $2;
				if ($rest =~ /^\s*\(([0-9]+)\)(.*)$/)
					{
					$num = "{$1}";
					$rest = $2;
					}
				else
					{
					$num = "{}";
					}
				if ($rest =~ /^\s*\:\s*([0-9]+)\-([0-9]+)/)
					{
					$fpage = $1;
					$lpage = $2;
						# Replace form 344-6 with
						# 344-346, etc.
						#
					$lenfpage = length($fpage);
					$lenlpage = length($lpage);
					$dlen = $lenfpage - $lenlpage;
					if ($dlen > 0)
						{
						$pref = substr($fpage,0,$dlen);
						$lpage = $pref . $lpage;
						}
					$pages = "{$fpage--$lpage}";
					}
				}
			}
		}
	}

    	# ---------- Try to abbreviate title.  Would be nice
	# 		to pass the .bib file and search it, but...
	#
    if (1 eq 1)
        {
    	$location =~ s/^\{\"Radiology\"\}/rad/ig;		# Radiology.
    	$location =~ s/^\{\"Magn\ Reson\ Med"\}/mrm/ig;		# MRM.
    	$location =~ s/^\{\"J\ Cardiovasc\ Magn\ Reson"\}/jcmr/ig;	# JCMR.
    	$location =~ s/^\{\"J\ Magn\ Reson\ Imaging"\}/jmri/ig;	# JMRI.
    	$location =~ s/^\{\"AJR Am J Roentgenol"\}/ajr/ig;	# AJR.
	$location =~ s/^\{\"Radiographics"\}/radgraph/ig;	# Radiographics


	#$location =~ s/^\{\""\}//ig;	# Template to add these.
	}







    if ($err eq 0)
        {

	# ---------- Print BibTex Entry ------------
	#
	print("\n");
        print("\@article{$label, \n");
		#
    		# Print author list, trying to keep from line wrapping.
		#
        print("    author=    {");
        $linepos = 20;		# keep track of position.
        while(scalar(@authorlist)>0)
    	    {
	    $thisauthor = shift(@authorlist);
	    if (scalar(@authorlist)>0)
		{
		$thisauthor = $thisauthor . " and ";
		}
	    $linepos = $linepos + length($thisauthor);
	    if ($linepos > 70)
		{
		print("\n                ");
		$linepos=20 + length($thisauthor);
		}
	    print("$thisauthor");
	    }
        print("},\n");

		#
    		# Print title, trying to keep from line wrapping.
		#
        @titlewords = String2Words($title);

        print("    title=     {");
        $linepos = 20;		# keep track of position.
        while(scalar(@titlewords)>0)
    	    {
	    $thisword = shift(@titlewords);
	    $linepos = $linepos + length($thisword);
	    if ($linepos > 70)
		{
		print("\n                ");
		$linepos=20 + length($thisword);
		}
	    if (scalar(@titlewords)>0)
		{
		print("$thisword ");
		}
	    else
		{
		print("$thisword");
		}
	    }
        print("},\n");
    
        print("    journal=   $location,\n");
        print("    volume=    $vol, \n");
        print("    number=    $num, \n");
        print("    pages=     $pages, \n");
        print("    year=      $year \n");
        print("}\n\n");
        }
    }
  else
    {
    print("Could not open input file.\n");
    }
  }




sub ParseAuthors
#
#	Takes a string and returns the author names in an array.
#	
#
	{
	my ($authline) = $_[0];

	$done=0;
	while ($done eq 0)
	    {
		    #print("Parsing...\n");
	    if ($authline =~ /\s*(\S+)\s([A-Z]+)([\,\.])(.*)/)
		{
		$authlast = $1;
		$authinit = $2;


		if ($3 eq ".")
			{
			$done=1;
			}
		$authline = $4;
		#print("Author = $1, $2   Sep = <$3>  Rem = <$4> \n");
		#print("done = $done, authline = $authline \n");
		$authinit =~ s/([A-Z])/$1\.\~/ig;
		$fullauth = $authinit . $authlast;
		#print("Bib author:  $fullauth \n");

		push(@authlist,$fullauth);
		}
	    }
	return(@authlist);
	}




sub String2Words
#
#	Takes a string and returns words in an array.
#	
#
	{
	my ($line) = $_[0];
	while (length($line)>0)
		{
			#print("line: $line\n");
		if ($line =~ /^\s*(\S+)(\s*.*)$/)
			{
			$word=$1;
			$line = $2;
			push(@wordlist,$word);
			#print("Word:  $word \n");
			}
		else
			{
			$line = "";
			}
		}
	return(@wordlist)
	}


