#!/usr/bin/perl
#
#       -------------------------------------------------------------
#	checkbib
#	Author:  Brian Hargreaves	March 2003
#       -------------------------------------------------------------
#
#	This program runs through a .bbl file and looks for 
#	duplicate references.
#
#	- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#
#	Usage:  checkbib  <.bbl file> 
#
#		
#	- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#
# -----------------------------------------------------------------------
# 	Full Instructions
# -----------------------------------------------------------------------
#	When making LaTeX documents with several authors, it 
#	is common to use more than one .bib file.  This opens the
#	possibility for two citations to the same article in different
#	.bib files, and thus for duplicate listings in the references
#	section.
#
#	After running LaTeX/BibTex, a .bbl file is generated in the
#	desired bibliography style.
#
#	This script tries to extract the volume, first page and year
#	for each article.  Then it compares these to each other article,
#	and prints the labels to any duplicate references so that they
#	can be eliminated.
#
# -----------------------------------------------------------------------
#
#	NOTES:
#		I'm NOT much of a PERL programmer!  I have no doubt that
#		a competent PERL programmer could do this in 50 lines or
#		less.
#		
#		Feel free to edit this for your own liking.
#
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
#



# =========== GET Command Line Arguments  ===========
$err = 0;

if (@ARGV)
    {
    $bblfile = shift (@ARGV);
    $label = shift (@ARGV);
    }
else
    {
    print("Usage:  checkbib file.bbl \n\n         See checkbib.pl text for details.\n\n");
    $err = 1;
    }

		
$warnings = 0;
$linecount = 0;
if ($err eq 0)
  {
	  #print("--------------------------------------------------\n");
  if (defined(open(BBLFILE,$bblfile)))
    {
	   #
	   # -------- AUTHOR LIST ----------
	   #
	   #
    $line = <BBLFILE>;
    $linecount = $linecount + 1;

    $done = 0;

    while ($done == 0)
      {
      # Skip to next bibitem.
      #
      while ((defined($line)) and (!($line =~ /bibitem/)))
    	{
		#print ($line);
    		$line = <BBLFILE>;
    		$linecount = $linecount + 1;
        }
      #
      # Extract Label
      #
      if ($line =~ /bibitem\{(\S+)\}/ )
    	{
	$thislabel = $1;
	$thisline = $linecount;
	#print("Label on line $linecount is $thislabel \n");

	# Read lines until first \newblock.
    		
        while ((defined($line)) and (!($line =~ /newblock/)))
        	{
			#print ($line);
    		$line = <BBLFILE>;
    		$linecount = $linecount + 1;
                }
		#print("Found newblock to skip: $line \n");

	# Read next line
    		$line = <BBLFILE>;
    		$linecount = $linecount + 1;
	# Read lines until next \newblock.

        while ((defined($line)) and (!($line =~ /newblock/)))
        	{
			#print ($line);
    		$line = <BBLFILE>;
    		$linecount = $linecount + 1;
                }

		#print("Checking: $line\n");
		
	# Match Journal Form.
	#
	if ($line =~ /newblock\s*(\{.*\})\,\s*([0-9]+).*:([0-9]+)\-+([0-9]+).*\,\s*([0-9][0-9][0-9][0-9])/ )
		{
		$thisjournal = $1;
		$thisvolume = $2;
		$thisfirstpage = $3;
		$thislastpage = $4;
		$thisyear = $5;

		#print("Found $thislabel on line $thisline:  Vol=$thisvolume Page=$thisfirstpage Year=$thisyear \n");
		$found = 1;

		}

	# Match Proceedings Form.

    	$line1 = <BBLFILE>;
    	$linecount = $linecount + 1;
	$line= $line.$line1;
	#print($line);
	if ($line =~ /newblock\s*In\s(\{.*\})\,\s*page\s([0-9]+)\,[\s\S]*\,\s*([0-9][0-9][0-9][0-9])/)
		{
		$thisjournal = $1;
		$thisvolume = 0;
		$thisfirstpage = $2;
		$thislastpage = 0;
		$thisyear = $3;
		
		#print("Found $thislabel on line $thisline:  Vol=$thisvolume Page=$thisfirstpage Year=$thisyear \n");
		$found = 1;
		}

	


	if ($found==1)
		{
		push(@alllabels, $thislabel);
		push(@alllines, $thisline);
		push(@allvolumes, $thisvolume);
		push(@allfpages, $thisfirstpage);
		push(@allyears, $thisyear);
		}
	else
		{
		print("Warning:  Could not extract info for citation $thislabel.  Ignoring.\n");
		$warnings = 1;
		}

	$found = 0;

	}
      else
        {
	$done=1;
	}

      }
    close (BBLFILE);

    if ($warnings == 1)
    	{
	print("\n(Probable causes of these warnings are long book titles,\n");
	print(" page numbers containing letters, or no volume number,\n");
	print(" ie in publications that are in print.)\n\n");
	}

    $totalrefs = scalar(@alllabels);
    print("Total of $totalrefs non-book references found. \n");

    $matchcount=0;
    for ($count1=0; $count1 < $totalrefs; $count1++)
    	{
	$checklabel = $alllabels[$count1];
	$checkline = $alllines[$count1];
	$checkvolume = $allvolumes[$count1];
	$checkfpage = $allfpages[$count1];
	$checkyear = $allyears[$count1];

    	for ($count2=$count1+1; $count2 < $totalrefs; $count2++)
	  {
   	  if ($checkfpage == $allfpages[$count2])
	    {
   	    if ($checkvolume == $allvolumes[$count2])
	      {
   	      if ($checkyear == $allyears[$count2])
	        {
		if (!($checkline == $alllines[$count2]))
		  {
		  print("Match --> $checklabel\ [$checkline\] == $alllabels[$count2]\ [$alllines[$count2]\] \n");
		  $matchcount++;
		  }

		}
	      }
	    }
 	  }
	}

    if ($matchcount == 0)
    	{
	print("No duplicate references found.\n");
	}
    else
	{
	print("\n$matchcount duplicate references found.\n");
	}
    }
  else
    {
    print("Could not open input file.\n");
    }
  }





