diff unixSoft/bin/OCLC-to-bibtex.awk @ 0:c30d68fbd368

Initial import from svn.
author Augie Fackler <durin42@gmail.com>
date Wed, 26 Nov 2008 10:56:09 -0600
parents
children
line wrap: on
line diff
new file mode 100755
--- /dev/null
+++ b/unixSoft/bin/OCLC-to-bibtex.awk
@@ -0,0 +1,244 @@
+# 
+# OCLC-to-bibtex.awk is an GAWK script to convert the export format of the 
+# OCLC databases to BibTeX. It processes the input and tries to convert it into
+# BibTeX entries which are written to a file in /tmp. This file is then opened 
+# using the program specified in "prog" (by default: emacsclient). 
+#
+# NOTE: It does not do an extensive job of testing what kind of publications
+# are being processed. It has some rudimentary checks of discovering wether the 
+# processed publications are either InBook's or Articles.
+# 
+# Hedderik van Rijn, 020912-020914
+#
+# Do whatever you want with this script, but if you improve it, please send me a copy! 
+# email: hvr-OCLC@van-rijn.org
+#
+
+BEGIN {
+   tmpfile = "/tmp/tobib." systime() ".tmp.bib";
+   oclc-version =  "OLCL-to-bibtex v0.1";
+
+   # External interactive progs
+#   prog = "xless ";
+   prog = "emacsclient ";
+#  prog = "open -a TextEdit ";
+   atEnd = "&";
+
+   # (Indirect) Output to stdout
+#  prog = "cat ";
+#  atEnd = "";
+
+
+   print "# Exported from the OLCL FirstSearch PsychINFO database using" olcl-version;
+
+
+    
+}
+
+/* -------------------------------------------------------------------------  */
+
+(match($1,/[A-Za-z]+:/) ||  match($2,/[A-Za-z]+:/)) {
+
+  if (inDescriptor == 1) {
+    keywords = keywords "}";
+    inDescriptor = 0;
+  }
+  if (inAbstract == 1) {
+    abstract = abstract "}";
+    inAbstract = 0;
+  } 
+}
+
+(!match($1,/[A-Za-z()]+:/) && !match($2,/[A-Za-z()]+:/)) {
+
+  if (inDescriptor == 1) {
+    keywords = keywords ", " $0;
+  }
+
+  if (inAbstract == 1) {
+    abstract = abstract " " $0;
+  }
+}
+
+
+
+$1 == "Author(s):" {
+  author = "\tauthor = {";
+  gsub(/Affiliation:.*/,"")
+  firstauthor = 1;
+  for (i=2;i<=NF;i++) {
+    if ($i == ";") {
+      $i = "and";
+      firstauthor = 0;
+    }
+    author = author $i;	
+    if (firstauthor) {
+      mainauthor = mainauthor tolower($i);
+    }
+    if (match($i,",")) {
+      firstauthor = 0;
+    }
+    
+    if (i<NF) {
+      author = author " ";
+    }
+  }			
+  author = author "}";
+  gsub(",","",mainauthor)
+
+}
+
+$1 == "Descriptor:" {
+  inDescriptor = 1;
+  gsub(/Descriptor:[ \t]+/,"")
+  gsub(/\(Major\):[ \t]+/,"")
+  keywords = "\tkeywords = {{" $0;
+}
+
+$1 == "Identifier:" {
+  descriptor = 0;
+  gsub(/Identifier:[ \t]+/,"")
+  keywords = keywords "{" $0 "}}";
+}
+
+$1 == "Source:" {
+  if ($2 == "In:") {
+    type = 1; # In Book
+    
+    pages = "\tpages = {" $NF "}";
+    gsub("-","--",pages)
+
+    booktitle = "";
+    for (i=NF-2;$i != "Ed;";i--) {
+      if (booktitle == "") {
+	booktitle = $i;
+      } else {
+	booktitle = $i " " booktitle;
+      }
+    }
+    gsub(";","",booktitle);
+    booktitle = "\tbooktitle = {" booktitle "}";
+    gsub("\\.}","}",booktitle);
+
+
+    editors = "";
+    for (;i > 2;i--) {
+      if (editors == "") {
+	editors = $i;
+      } else {
+	editors = $i " " editors;
+      }
+    }
+    gsub(" Ed;","",editors);
+    gsub("; "," and ",editors);	
+    gsub(";","",editors);
+    editors = "\teditors = {" editors "}";
+  } else {
+    type = 2; # Journal
+    
+    journal = "\tjournal = {";	
+    for (i=2;$i!="Vol";i++) {	
+      journal = journal $i " ";	
+    } 		
+    journal = journal "}";	
+    i++;
+    vol = $i;
+    sub(/\(.*\),/,"",vol)
+    volume = "\tvolume = {" vol "}"
+    sub(/.*\(/,"",$i)
+    sub(/\),/,"",$i)
+    number = "\tnumber = {" $i "}"
+    i++;
+    if ($i+1 == 1) { # Skip the month if necessary
+      i++;
+    }
+    sub(",","",$i);
+    year = "\tyear = {" $i "}";
+    sub("[0-9][0-9]","",$i);
+    mainyear = $i;
+
+    pages = "\tpages = {" $NF "}";
+    gsub("-","--",pages)
+    gsub("\\.","",pages)
+
+  }
+
+}  
+
+$1 == "Title:" {
+  title = "\ttitle = {";
+  for (i=2;i<=NF;i++) {
+    if ($i == toupper($i)) {
+      $i = "{" $i "}";
+    } else {
+      gsub(/[A-Z]/,"{&}",$i);
+    }
+    title = title ($i);	
+    if (i<NF) {
+      title = title " ";
+    }
+  }			
+  title = title "}";
+  gsub("\\.}","}",title);
+
+}
+
+$1 == "Abstract:" {
+  gsub(/Abstract:[ \t]*/,"")
+  abstract = "\tabstract = {" $0;
+  inAbstract = 1;
+}
+
+## Use the Accession No: for the year if the year has not been found yet. 
+
+$1 == "Accession" {
+  if (mainyear == "") {
+    gsub(/-.*/,"",$3);
+    year = "\tyear = {" $3 " (had to use heuristics to determine the year!)}";
+
+    sub("[0-9][0-9]","",$3);
+    mainyear = $3 "?";
+  }
+
+}
+
+function printEntry() {
+  if (mainauthor != "") {
+    if (type == 1) { # In Book
+      typestring = "InBook";
+    } else {
+      typestring = "Article";
+    }      
+    print("@" typestring "{" mainauthor ":" mainyear "x,") >> tmpfile;
+    print(author ",") >> tmpfile;
+    print(title ",") >> tmpfile;
+    print(year ",") >> tmpfile;
+    if (type == 1) { # In Book
+      print(booktitle ",") >> tmpfile;
+      print(editors ",") >> tmpfile;
+      print(pages ",") >> tmpfile;
+    } 
+    if (type == 2) { # Article
+      print(journal ",") >> tmpfile;
+      print(volume ",") >> tmpfile;
+      print(number ",") >> tmpfile;
+      print(pages ",") >> tmpfile;
+    } 
+    print(abstract ",") >> tmpfile;
+    print(keywords) >> tmpfile;
+    print("}") >> tmpfile;
+    print("") >> tmpfile;
+    print("") >> tmpfile;
+  }
+  mainauthor = "";
+  mainyear = "";
+}  
+
+NF == 0 {
+    printEntry();
+}
+
+END {
+  printEntry();
+  system(prog " " tmpfile " " atEnd);
+}