Mercurial > dotfiles
view unixSoft/bin/OCLC-to-bibtex.awk @ 451:1828e3af6340
hgrc: enable show extension
author | Augie Fackler <raf@durin42.com> |
---|---|
date | Tue, 30 Jan 2018 09:58:32 -0500 |
parents | c30d68fbd368 |
children |
line wrap: on
line source
# # OCLC-to-bibtex.awk is an GAWK script to convert the export format of the # OCLC databases to BibTeX. It processes the input and tries to convert it into # BibTeX entries which are written to a file in /tmp. This file is then opened # using the program specified in "prog" (by default: emacsclient). # # NOTE: It does not do an extensive job of testing what kind of publications # are being processed. It has some rudimentary checks of discovering wether the # processed publications are either InBook's or Articles. # # Hedderik van Rijn, 020912-020914 # # Do whatever you want with this script, but if you improve it, please send me a copy! # email: hvr-OCLC@van-rijn.org # BEGIN { tmpfile = "/tmp/tobib." systime() ".tmp.bib"; oclc-version = "OLCL-to-bibtex v0.1"; # External interactive progs # prog = "xless "; prog = "emacsclient "; # prog = "open -a TextEdit "; atEnd = "&"; # (Indirect) Output to stdout # prog = "cat "; # atEnd = ""; print "# Exported from the OLCL FirstSearch PsychINFO database using" olcl-version; } /* ------------------------------------------------------------------------- */ (match($1,/[A-Za-z]+:/) || match($2,/[A-Za-z]+:/)) { if (inDescriptor == 1) { keywords = keywords "}"; inDescriptor = 0; } if (inAbstract == 1) { abstract = abstract "}"; inAbstract = 0; } } (!match($1,/[A-Za-z()]+:/) && !match($2,/[A-Za-z()]+:/)) { if (inDescriptor == 1) { keywords = keywords ", " $0; } if (inAbstract == 1) { abstract = abstract " " $0; } } $1 == "Author(s):" { author = "\tauthor = {"; gsub(/Affiliation:.*/,"") firstauthor = 1; for (i=2;i<=NF;i++) { if ($i == ";") { $i = "and"; firstauthor = 0; } author = author $i; if (firstauthor) { mainauthor = mainauthor tolower($i); } if (match($i,",")) { firstauthor = 0; } if (i<NF) { author = author " "; } } author = author "}"; gsub(",","",mainauthor) } $1 == "Descriptor:" { inDescriptor = 1; gsub(/Descriptor:[ \t]+/,"") gsub(/\(Major\):[ \t]+/,"") keywords = "\tkeywords = {{" $0; } $1 == "Identifier:" { descriptor = 0; gsub(/Identifier:[ \t]+/,"") keywords = keywords "{" $0 "}}"; } $1 == "Source:" { if ($2 == "In:") { type = 1; # In Book pages = "\tpages = {" $NF "}"; gsub("-","--",pages) booktitle = ""; for (i=NF-2;$i != "Ed;";i--) { if (booktitle == "") { booktitle = $i; } else { booktitle = $i " " booktitle; } } gsub(";","",booktitle); booktitle = "\tbooktitle = {" booktitle "}"; gsub("\\.}","}",booktitle); editors = ""; for (;i > 2;i--) { if (editors == "") { editors = $i; } else { editors = $i " " editors; } } gsub(" Ed;","",editors); gsub("; "," and ",editors); gsub(";","",editors); editors = "\teditors = {" editors "}"; } else { type = 2; # Journal journal = "\tjournal = {"; for (i=2;$i!="Vol";i++) { journal = journal $i " "; } journal = journal "}"; i++; vol = $i; sub(/\(.*\),/,"",vol) volume = "\tvolume = {" vol "}" sub(/.*\(/,"",$i) sub(/\),/,"",$i) number = "\tnumber = {" $i "}" i++; if ($i+1 == 1) { # Skip the month if necessary i++; } sub(",","",$i); year = "\tyear = {" $i "}"; sub("[0-9][0-9]","",$i); mainyear = $i; pages = "\tpages = {" $NF "}"; gsub("-","--",pages) gsub("\\.","",pages) } } $1 == "Title:" { title = "\ttitle = {"; for (i=2;i<=NF;i++) { if ($i == toupper($i)) { $i = "{" $i "}"; } else { gsub(/[A-Z]/,"{&}",$i); } title = title ($i); if (i<NF) { title = title " "; } } title = title "}"; gsub("\\.}","}",title); } $1 == "Abstract:" { gsub(/Abstract:[ \t]*/,"") abstract = "\tabstract = {" $0; inAbstract = 1; } ## Use the Accession No: for the year if the year has not been found yet. $1 == "Accession" { if (mainyear == "") { gsub(/-.*/,"",$3); year = "\tyear = {" $3 " (had to use heuristics to determine the year!)}"; sub("[0-9][0-9]","",$3); mainyear = $3 "?"; } } function printEntry() { if (mainauthor != "") { if (type == 1) { # In Book typestring = "InBook"; } else { typestring = "Article"; } print("@" typestring "{" mainauthor ":" mainyear "x,") >> tmpfile; print(author ",") >> tmpfile; print(title ",") >> tmpfile; print(year ",") >> tmpfile; if (type == 1) { # In Book print(booktitle ",") >> tmpfile; print(editors ",") >> tmpfile; print(pages ",") >> tmpfile; } if (type == 2) { # Article print(journal ",") >> tmpfile; print(volume ",") >> tmpfile; print(number ",") >> tmpfile; print(pages ",") >> tmpfile; } print(abstract ",") >> tmpfile; print(keywords) >> tmpfile; print("}") >> tmpfile; print("") >> tmpfile; print("") >> tmpfile; } mainauthor = ""; mainyear = ""; } NF == 0 { printEntry(); } END { printEntry(); system(prog " " tmpfile " " atEnd); }