view unixSoft/bin/OCLC-to-bibtex.awk @ 378:20e47bc8eea9

hgrc: disable hg-git and hgsubversion It hasn't been common for me to use either of these extensions for some time, so just drop both of them from the main hgrc. Instead I'll enable them as-needed in individual repositories.
author Augie Fackler <durin42@gmail.com>
date Thu, 09 Jun 2011 20:20:04 -0500
parents c30d68fbd368
children
line wrap: on
line source

# 
# OCLC-to-bibtex.awk is an GAWK script to convert the export format of the 
# OCLC databases to BibTeX. It processes the input and tries to convert it into
# BibTeX entries which are written to a file in /tmp. This file is then opened 
# using the program specified in "prog" (by default: emacsclient). 
#
# NOTE: It does not do an extensive job of testing what kind of publications
# are being processed. It has some rudimentary checks of discovering wether the 
# processed publications are either InBook's or Articles.
# 
# Hedderik van Rijn, 020912-020914
#
# Do whatever you want with this script, but if you improve it, please send me a copy! 
# email: hvr-OCLC@van-rijn.org
#

BEGIN {
   tmpfile = "/tmp/tobib." systime() ".tmp.bib";
   oclc-version =  "OLCL-to-bibtex v0.1";

   # External interactive progs
#   prog = "xless ";
   prog = "emacsclient ";
#  prog = "open -a TextEdit ";
   atEnd = "&";

   # (Indirect) Output to stdout
#  prog = "cat ";
#  atEnd = "";


   print "# Exported from the OLCL FirstSearch PsychINFO database using" olcl-version;


    
}

/* -------------------------------------------------------------------------  */

(match($1,/[A-Za-z]+:/) ||  match($2,/[A-Za-z]+:/)) {

  if (inDescriptor == 1) {
    keywords = keywords "}";
    inDescriptor = 0;
  }
  if (inAbstract == 1) {
    abstract = abstract "}";
    inAbstract = 0;
  } 
}

(!match($1,/[A-Za-z()]+:/) && !match($2,/[A-Za-z()]+:/)) {

  if (inDescriptor == 1) {
    keywords = keywords ", " $0;
  }

  if (inAbstract == 1) {
    abstract = abstract " " $0;
  }
}



$1 == "Author(s):" {
  author = "\tauthor = {";
  gsub(/Affiliation:.*/,"")
  firstauthor = 1;
  for (i=2;i<=NF;i++) {
    if ($i == ";") {
      $i = "and";
      firstauthor = 0;
    }
    author = author $i;	
    if (firstauthor) {
      mainauthor = mainauthor tolower($i);
    }
    if (match($i,",")) {
      firstauthor = 0;
    }
    
    if (i<NF) {
      author = author " ";
    }
  }			
  author = author "}";
  gsub(",","",mainauthor)

}

$1 == "Descriptor:" {
  inDescriptor = 1;
  gsub(/Descriptor:[ \t]+/,"")
  gsub(/\(Major\):[ \t]+/,"")
  keywords = "\tkeywords = {{" $0;
}

$1 == "Identifier:" {
  descriptor = 0;
  gsub(/Identifier:[ \t]+/,"")
  keywords = keywords "{" $0 "}}";
}

$1 == "Source:" {
  if ($2 == "In:") {
    type = 1; # In Book
    
    pages = "\tpages = {" $NF "}";
    gsub("-","--",pages)

    booktitle = "";
    for (i=NF-2;$i != "Ed;";i--) {
      if (booktitle == "") {
	booktitle = $i;
      } else {
	booktitle = $i " " booktitle;
      }
    }
    gsub(";","",booktitle);
    booktitle = "\tbooktitle = {" booktitle "}";
    gsub("\\.}","}",booktitle);


    editors = "";
    for (;i > 2;i--) {
      if (editors == "") {
	editors = $i;
      } else {
	editors = $i " " editors;
      }
    }
    gsub(" Ed;","",editors);
    gsub("; "," and ",editors);	
    gsub(";","",editors);
    editors = "\teditors = {" editors "}";
  } else {
    type = 2; # Journal
    
    journal = "\tjournal = {";	
    for (i=2;$i!="Vol";i++) {	
      journal = journal $i " ";	
    } 		
    journal = journal "}";	
    i++;
    vol = $i;
    sub(/\(.*\),/,"",vol)
    volume = "\tvolume = {" vol "}"
    sub(/.*\(/,"",$i)
    sub(/\),/,"",$i)
    number = "\tnumber = {" $i "}"
    i++;
    if ($i+1 == 1) { # Skip the month if necessary
      i++;
    }
    sub(",","",$i);
    year = "\tyear = {" $i "}";
    sub("[0-9][0-9]","",$i);
    mainyear = $i;

    pages = "\tpages = {" $NF "}";
    gsub("-","--",pages)
    gsub("\\.","",pages)

  }

}  

$1 == "Title:" {
  title = "\ttitle = {";
  for (i=2;i<=NF;i++) {
    if ($i == toupper($i)) {
      $i = "{" $i "}";
    } else {
      gsub(/[A-Z]/,"{&}",$i);
    }
    title = title ($i);	
    if (i<NF) {
      title = title " ";
    }
  }			
  title = title "}";
  gsub("\\.}","}",title);

}

$1 == "Abstract:" {
  gsub(/Abstract:[ \t]*/,"")
  abstract = "\tabstract = {" $0;
  inAbstract = 1;
}

## Use the Accession No: for the year if the year has not been found yet. 

$1 == "Accession" {
  if (mainyear == "") {
    gsub(/-.*/,"",$3);
    year = "\tyear = {" $3 " (had to use heuristics to determine the year!)}";

    sub("[0-9][0-9]","",$3);
    mainyear = $3 "?";
  }

}

function printEntry() {
  if (mainauthor != "") {
    if (type == 1) { # In Book
      typestring = "InBook";
    } else {
      typestring = "Article";
    }      
    print("@" typestring "{" mainauthor ":" mainyear "x,") >> tmpfile;
    print(author ",") >> tmpfile;
    print(title ",") >> tmpfile;
    print(year ",") >> tmpfile;
    if (type == 1) { # In Book
      print(booktitle ",") >> tmpfile;
      print(editors ",") >> tmpfile;
      print(pages ",") >> tmpfile;
    } 
    if (type == 2) { # Article
      print(journal ",") >> tmpfile;
      print(volume ",") >> tmpfile;
      print(number ",") >> tmpfile;
      print(pages ",") >> tmpfile;
    } 
    print(abstract ",") >> tmpfile;
    print(keywords) >> tmpfile;
    print("}") >> tmpfile;
    print("") >> tmpfile;
    print("") >> tmpfile;
  }
  mainauthor = "";
  mainyear = "";
}  

NF == 0 {
    printEntry();
}

END {
  printEntry();
  system(prog " " tmpfile " " atEnd);
}