Mercurial > dotfiles
diff unixSoft/bin/OCLC-to-bibtex.awk @ 0:c30d68fbd368
Initial import from svn.
author | Augie Fackler <durin42@gmail.com> |
---|---|
date | Wed, 26 Nov 2008 10:56:09 -0600 |
parents | |
children |
line wrap: on
line diff
new file mode 100755 --- /dev/null +++ b/unixSoft/bin/OCLC-to-bibtex.awk @@ -0,0 +1,244 @@ +# +# OCLC-to-bibtex.awk is an GAWK script to convert the export format of the +# OCLC databases to BibTeX. It processes the input and tries to convert it into +# BibTeX entries which are written to a file in /tmp. This file is then opened +# using the program specified in "prog" (by default: emacsclient). +# +# NOTE: It does not do an extensive job of testing what kind of publications +# are being processed. It has some rudimentary checks of discovering wether the +# processed publications are either InBook's or Articles. +# +# Hedderik van Rijn, 020912-020914 +# +# Do whatever you want with this script, but if you improve it, please send me a copy! +# email: hvr-OCLC@van-rijn.org +# + +BEGIN { + tmpfile = "/tmp/tobib." systime() ".tmp.bib"; + oclc-version = "OLCL-to-bibtex v0.1"; + + # External interactive progs +# prog = "xless "; + prog = "emacsclient "; +# prog = "open -a TextEdit "; + atEnd = "&"; + + # (Indirect) Output to stdout +# prog = "cat "; +# atEnd = ""; + + + print "# Exported from the OLCL FirstSearch PsychINFO database using" olcl-version; + + + +} + +/* ------------------------------------------------------------------------- */ + +(match($1,/[A-Za-z]+:/) || match($2,/[A-Za-z]+:/)) { + + if (inDescriptor == 1) { + keywords = keywords "}"; + inDescriptor = 0; + } + if (inAbstract == 1) { + abstract = abstract "}"; + inAbstract = 0; + } +} + +(!match($1,/[A-Za-z()]+:/) && !match($2,/[A-Za-z()]+:/)) { + + if (inDescriptor == 1) { + keywords = keywords ", " $0; + } + + if (inAbstract == 1) { + abstract = abstract " " $0; + } +} + + + +$1 == "Author(s):" { + author = "\tauthor = {"; + gsub(/Affiliation:.*/,"") + firstauthor = 1; + for (i=2;i<=NF;i++) { + if ($i == ";") { + $i = "and"; + firstauthor = 0; + } + author = author $i; + if (firstauthor) { + mainauthor = mainauthor tolower($i); + } + if (match($i,",")) { + firstauthor = 0; + } + + if (i<NF) { + author = author " "; + } + } + author = author "}"; + gsub(",","",mainauthor) + +} + +$1 == "Descriptor:" { + inDescriptor = 1; + gsub(/Descriptor:[ \t]+/,"") + gsub(/\(Major\):[ \t]+/,"") + keywords = "\tkeywords = {{" $0; +} + +$1 == "Identifier:" { + descriptor = 0; + gsub(/Identifier:[ \t]+/,"") + keywords = keywords "{" $0 "}}"; +} + +$1 == "Source:" { + if ($2 == "In:") { + type = 1; # In Book + + pages = "\tpages = {" $NF "}"; + gsub("-","--",pages) + + booktitle = ""; + for (i=NF-2;$i != "Ed;";i--) { + if (booktitle == "") { + booktitle = $i; + } else { + booktitle = $i " " booktitle; + } + } + gsub(";","",booktitle); + booktitle = "\tbooktitle = {" booktitle "}"; + gsub("\\.}","}",booktitle); + + + editors = ""; + for (;i > 2;i--) { + if (editors == "") { + editors = $i; + } else { + editors = $i " " editors; + } + } + gsub(" Ed;","",editors); + gsub("; "," and ",editors); + gsub(";","",editors); + editors = "\teditors = {" editors "}"; + } else { + type = 2; # Journal + + journal = "\tjournal = {"; + for (i=2;$i!="Vol";i++) { + journal = journal $i " "; + } + journal = journal "}"; + i++; + vol = $i; + sub(/\(.*\),/,"",vol) + volume = "\tvolume = {" vol "}" + sub(/.*\(/,"",$i) + sub(/\),/,"",$i) + number = "\tnumber = {" $i "}" + i++; + if ($i+1 == 1) { # Skip the month if necessary + i++; + } + sub(",","",$i); + year = "\tyear = {" $i "}"; + sub("[0-9][0-9]","",$i); + mainyear = $i; + + pages = "\tpages = {" $NF "}"; + gsub("-","--",pages) + gsub("\\.","",pages) + + } + +} + +$1 == "Title:" { + title = "\ttitle = {"; + for (i=2;i<=NF;i++) { + if ($i == toupper($i)) { + $i = "{" $i "}"; + } else { + gsub(/[A-Z]/,"{&}",$i); + } + title = title ($i); + if (i<NF) { + title = title " "; + } + } + title = title "}"; + gsub("\\.}","}",title); + +} + +$1 == "Abstract:" { + gsub(/Abstract:[ \t]*/,"") + abstract = "\tabstract = {" $0; + inAbstract = 1; +} + +## Use the Accession No: for the year if the year has not been found yet. + +$1 == "Accession" { + if (mainyear == "") { + gsub(/-.*/,"",$3); + year = "\tyear = {" $3 " (had to use heuristics to determine the year!)}"; + + sub("[0-9][0-9]","",$3); + mainyear = $3 "?"; + } + +} + +function printEntry() { + if (mainauthor != "") { + if (type == 1) { # In Book + typestring = "InBook"; + } else { + typestring = "Article"; + } + print("@" typestring "{" mainauthor ":" mainyear "x,") >> tmpfile; + print(author ",") >> tmpfile; + print(title ",") >> tmpfile; + print(year ",") >> tmpfile; + if (type == 1) { # In Book + print(booktitle ",") >> tmpfile; + print(editors ",") >> tmpfile; + print(pages ",") >> tmpfile; + } + if (type == 2) { # Article + print(journal ",") >> tmpfile; + print(volume ",") >> tmpfile; + print(number ",") >> tmpfile; + print(pages ",") >> tmpfile; + } + print(abstract ",") >> tmpfile; + print(keywords) >> tmpfile; + print("}") >> tmpfile; + print("") >> tmpfile; + print("") >> tmpfile; + } + mainauthor = ""; + mainyear = ""; +} + +NF == 0 { + printEntry(); +} + +END { + printEntry(); + system(prog " " tmpfile " " atEnd); +}