#!/usr/bin/perl # # ------------------------------------------------------------- # Author: Brian Hargreaves Dec 4/02. # ------------------------------------------------------------- # # ===>>> Applescript Version <<<=== # # (Takes inputs from STDIN) # # This program takes the output from PubMed, and makes a # bibtex .bib entry. It is designed to use the data from # the clipboard, after you copy highlighted citations from # the results of a PubMed search. # # This script parses the entries, by looking for the # calendar year. Entries are separated by some text with # a "]" between them, as will be the case from PubMed. # # The script can also take a .bib file and try to use # the journal abbreviations (@string...) in the .bib file # instead of the Index Medicus journal names. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # ----------------------------------------------------------------------- # Full Instructions # ----------------------------------------------------------------------- # # 1. Go to http://www.pubmed.org # # 2. Fill out the search string to find your paper, for example # "Haacke EM, Wielopolski, Modic" # # 3. Highlight from (including) the first author name to # the line with PMID, copy and paste into a text # file (called "ref" for this example). # # 4. Run "pubmedbib.pl ref Haacke90" # # 5. This should spit out the following text, which you can # cut and paste into your .bib file: # # @article{haacke90, # author= {E.~M.~Haacke and P.~A.~Wielopolski and # J.~A.~Tkach and M.~T.~Modic}, # title= {Steady-state free precession imaging in the # presence of motion: application for # improved visualization of the cerebrospinal # fluid}, # journal= rad, # volume= {175}, # number= {2}, # pages= {545--552}, # year= {1990} # } # # # 6. Write to your local scientific society and suggest that # they continue to support LaTeX. # # ----------------------------------------------------------------------- # # NOTES: # I'm NOT much of a PERL programmer! I have no doubt that # a competent PERL programmer could do this in 50 lines or # less! # # Feel free to edit this for your own liking. # # ----------------------------------------------------------------------- # ----------------------------------------------------------------------- # Start of CVS Log Entries # ----------------------------------------------------------------------- # # $Log: pubmedbib.pl,v $ # Revision 1.5 2004/08/26 20:44:35 brian # minor edits # # # ----------------------------------------------------------------------- # End of CVS Log Entries # ----------------------------------------------------------------------- # ----------------------------------------------------------------------- # ----------------------------------------------------------------------- # ----------------------------------------------------------------------- # $err = 0; $numentries = 0; # Number of entries found. $delimiter = "---***---\n"; # Used to separate .bib entries in output. if (@ARGV) { @abbreviationslist = ReadBibAbbreviations(shift(@ARGV)); } if ($err eq 0) { $line = ; while (defined($line)) { # # -------- AUTHOR LIST ---------- # # $done = 0; { if (defined($line)) { #if ($line=~ /([A-Za-z].+)$/) # Trim up to first letter. #{ #$line = $1; #} $line =~ s/([A-Za-z].+)$/$1/ig; # Trim up to first letter. # Read in and add to $line until we have a year. $moreline = " "; while ((!($line =~ /20[0-9][0-9]/ )) and (!($line =~ /19[0-9][0-9]/ )) and (defined($moreline)) ) { $moreline = ; $line = $line . $moreline; } #print("Line to parse: <$line> \n\n"); @fields = ParseCitation($line); $line = $fields[7]; # Keep residual text. #print("Unused: $line \n"); @authorlist = ParseAuthors($fields[0]); shift(@authorlist); $title = $fields[1]; $location = $fields[2]; $year = $fields[3]; $vol = $fields[4]; $num = $fields[5]; $pages = $fields[6]; } else { $err=1; } if (defined($title)) { $title = CleanUpTitle($title); $location = AbbreviateTitle($location,@abbreviationslist); } else { $err=1; } } if ($err eq 0) { #print("Author 1 is $authorlist[0]. \n"); if ($authorlist[0] =~ /~([A-Z][a-zA-Z]+)/) { $authlname = $1; if ($year =~ /([0-9][0-9][0-9][0-9])/ ) { $label = $authlname . $1; } } if (!defined($label)) { $label = "New-Reference"; } # ---------- Print BibTex Entry ------------ # print("\n"); $numentries = $numentries + 1; if ($numentries > 1) { print("$delimiter"); } print("\@article{$label, \n"); # # Print author list, trying to keep from line wrapping. # print(" author= {"); $linepos = 20; # keep track of position. while(scalar(@authorlist)>0) { $thisauthor = shift(@authorlist); if (scalar(@authorlist)>0) { $thisauthor = $thisauthor . " and "; } $linepos = $linepos + length($thisauthor); if ($linepos > 70) { print("\n "); $linepos=20 + length($thisauthor); } print("$thisauthor"); } print("},\n"); # # Print title, trying to keep from line wrapping. # @titlewords = String2Words($title); print(" title= {"); $linepos = 20; # keep track of position. while(scalar(@titlewords)>0) { $thisword = shift(@titlewords); $linepos = $linepos + length($thisword); if ($linepos > 70) { print("\n "); $linepos=20 + length($thisword); } if (scalar(@titlewords)>0) { print("$thisword "); } else { print("$thisword"); } } print("},\n"); print(" journal= $location,\n"); print(" volume= $vol, \n"); print(" number= $num, \n"); print(" pages= $pages, \n"); print(" year= $year \n"); print("}\n\n"); $done=0; # ----- Check if line has a Number: (ie 2:) in it. If so, # Get rid of it. If not, discard and read in next line. while (defined($line) and (!($line =~ /[0-9]\:(.*)$/ )) ) { $line = ; } if ($line =~ /[0-9]\:(.*)$/ ) { $line = $1; } #while(scalar(@authorlist)>0) #{ #shift(@authorlist); #} #while(scalar(@titlewords)>0) #{ #shift(@titlewords); #} #$title=""; #$author=""; if (defined($line)) { $done=0; } else { $done=1; } } #print("Now Examining residual... $line \n\n"); if ($err == 1) { $line = ; } } } sub ParseAuthors # # Takes a string and returns the author names in an array. # # OUTPUT: # 0 - residual text after period (.) # 1-n - author names, like J.~A.~Smith # { my ($authline) = $_[0]; my (@authlist); my ($done) = 0; #while (scalar(@authlist)>0) #{ #pop(@authlist); #} while ($done eq 0) { #print("Parsing...\n"); if ($authline =~ /\s*(\S+)\s([A-Z]+)([\,\.])(.*)/) { $authlast = $1; $authinit = $2; if ($3 eq ".") { $done=1; } $authline = $4; #print("Author = $1, $2 Sep = <$3> Rem = <$4> \n"); #print("done = $done, authline = $authline \n"); $authinit =~ s/([A-Z])/$1\.\~/ig; $fullauth = $authinit . $authlast; #print("Bib author: $fullauth \n"); push(@authlist,$fullauth); } else { $done=1; } } if (!defined($authline)) { $authline = " "; # Put anything there! } unshift(@authlist,$authline); return(@authlist); } sub String2Words # # Takes a string and returns words in an array. # # { my ($line) = $_[0]; while (scalar(@wordlist)>0) { pop(@wordlist); } while (length($line)>0) { #print("line: $line\n"); if ($line =~ /^\s*(\S+)(\s*.*)$/) { $word=$1; $line = $2; push(@wordlist,$word); #print("Word: $word \n"); } else { $line = ""; } } return(@wordlist) } sub dumparray # { my (@arr) = @_; while (scalar(@arr) > 0) { $elem = shift(@arr); print("array -> $elem \n"); } return 0; } sub ReadBibAbbreviations # # Subroutine opens the given bib file. Looks for # lines that match @string{abb = "Full Name "} # # Returns an array with ("abb1","Full Name",abb2,"Full Name2", ...) # { my ($fname) = $_[0]; my ($noerr) = (defined(open(BIBFILE,$fname))); my ($abblist); my ($fullname); my ($abbrev); if ($noerr==1) { $line = ; while (defined($line)) { if ($line=~/^\@string\{([a-z]*)\s*\=\s*\"(.*?)\"\s*\}/) { push(@abblist,$1); $abbrev = $1; $fullname = $2; $fullname =~ s/\s$//ig; $fullname = "\"" . $fullname . "\""; push(@abblist,$fullname); #print("Abbrev: <$abbrev> \n"); #print("Full: <$fullname> \n"); } $line = ; } close (BIBFILE); } return(@abblist); } sub ParseCitation # # Subroutine parses the author/title/journal - splits # it to @result = ($authorlist, $title, $journal) { my ($parseline) = $_[0]; my ($authtj); my ($volnumpage); my ($authorlist); my ($title); my ($journal); my ($year); my ($volume); my ($number); my ($pages); my ($residualtext); my (@retval); if ($parseline=~ /^(.*?)\s((19|20)[0-9][0-9])\s.*?;(.*)$/ ) { $authtj = $1; $year = "{$2}"; $volnumpage = $4; #print("1: $1, 2:$2, 3:$3, 4:$4, 5:$5 \n\n"); if ($authtj =~ /^(.+?)\.\s(.+(\.|\?|\!))\s(.+?)\.\s*$/ ) { $authorlist = "$1."; $title = $2; $journal = $4; $title =~ s/Related Articles,/ /ig; $title =~ s/Links/ /ig; $title =~ s/\s*(.*)$/$1/ig; $journal =~ s/\s*(.*)$/$1/ig; $journal = "{\"$journal\"}"; } if ($volnumpage =~ /^\s*([0-9]+)(.*)$/) { $vol = "{$1}"; $rest = $2; if ($rest =~ /^\s*$([0-9]+)$(.*)$/) { $num = "{$1}"; $rest = $2; } else { $num = "{}"; } if ($rest =~ /^\s*\:\s*([0-9]+)\-([0-9]+)(.*)$/) { $fpage = $1; $lpage = $2; # Replace form 344-6 with # 344-346, etc. # $lenfpage = length($fpage); $lenlpage = length($lpage); $dlen = $lenfpage - $lenlpage; if ($dlen > 0) { $pref = substr($fpage,0,$dlen); $lpage = $pref . $lpage; } $pages = "{$fpage--$lpage}"; $rest = $3; } else { $pages = "{}"; } } } if (1 eq 0) { print("authtj: <$authtj> \n"); print("Authors: <$authorlist> \n "); print("Title : <$title> \n "); print("Journal: <$journal> \n "); print("VolNumPage: <$volnumpage> \n"); print("Volume: <$vol> \n"); print("Number: <$num> \n"); print("Pages: <$pages> \n"); } push(@retval,$authorlist); push(@retval,$title); push(@retval,$journal); push(@retval,$year); push(@retval,$vol); push(@retval,$num); push(@retval,$pages); push(@retval,$rest); return (@retval); } sub CleanUpTitle # # Subroutine removes some stuff from the title, # and trie to preserve capitalization in LaTeX # { my ($title) = $_[0]; #Remove whitespace and ending period. $title =~ s/^(\s*)(\S.+)(\.)(\s*)$/$2/ig; #print("Adding {}s to title: $title \n"); # # Try to keep certain things capitalized. # $title =~ s/(T[1|2])/\{$1\}/ig; # T1 or T2 -> {T1/2} $title =~ s/(MR\s)/\{MR\} /ig; # {MR} $title =~ s/(MRI)/\{MRI\}/ig; # {MRI} $title =~ s/(FSE)/\{FSE\}/ig; # {MRI} $title =~ s/(GRE)/\{GRE\}/ig; # {MRI} $title =~ s/(RARE)/\{RARE\}/ig; # {MRI} $title =~ s/(dGE\{MRI\}C)/\{dGEMRIC\}/ig; # {dGEMRIC} $title =~ s/([0-9]\s*T)/\{$1\}/ig; # 1.5 T or 3 T $title =~ s/(DTPA)/\{DTPA\}/ig; # {dGEMRIC} $title =~ s/(Gd)/\{Gd\}/ig; # {dGEMRIC} if ($title =~/^(\s*)Abstract(.*)$/ ) # Remove "Abstract" { $title = $2; } #print("Title: <$title> \n"); return ($title); } sub AbbreviateTitle # # Subroutine tries to abbreviate title (input argument 0), # using BibTex abbreviations in arguments 1-n. # { my ($location) = shift(@_); my (@abbreviationslist) = @_; my ($count); my ($abb); my ($fullname); # ---------- Try to abbreviate journal title. Would be nice # to pass the .bib file and search it, but... # if (0 eq 1) { $location =~ s/^\{\"Radiology\"\}/rad/ig; # Radiology. $location =~ s/^\{\"Magn\ Reson\ Med"\}/mrm/ig; # MRM. $location =~ s/^\{\"J\ Cardiovasc\ Magn\ Reson"\}/jcmr/ig; # JCMR. $location =~ s/^\{\"J\ Magn\ Reson\ Imaging"\}/jmri/ig; # JMRI. $location =~ s/^\{\"AJR Am J Roentgenol"\}/ajr/ig; # AJR. $location =~ s/^\{\"J\ Magn\ Reson"\}/jmr/ig; # JMRI. $location =~ s/^\{\"Radiographics"\}/radgraph/ig; # Radiographics #$location =~ s/^\{\""\}//ig; # Template to add these. } if (1 eq 1) # ---- Tries to abbreviate title from @string{} entries # ---- that are in the .bib file passed as a parameter. { $count = 0; while ($count < scalar(@abbreviationslist)) { $abb = $abbreviationslist[$count]; $fullname = $abbreviationslist[$count+1]; $fullname = "\^\\\{" . $fullname . "\\\}"; $location =~ s/$fullname/$abb/ig; $count = $count + 2; } } return($location); }