0
+ − 1 #
+ − 2 # OCLC-to-bibtex.awk is an GAWK script to convert the export format of the
+ − 3 # OCLC databases to BibTeX. It processes the input and tries to convert it into
+ − 4 # BibTeX entries which are written to a file in /tmp. This file is then opened
+ − 5 # using the program specified in "prog" (by default: emacsclient).
+ − 6 #
+ − 7 # NOTE: It does not do an extensive job of testing what kind of publications
+ − 8 # are being processed. It has some rudimentary checks of discovering wether the
+ − 9 # processed publications are either InBook's or Articles.
+ − 10 #
+ − 11 # Hedderik van Rijn, 020912-020914
+ − 12 #
+ − 13 # Do whatever you want with this script, but if you improve it, please send me a copy!
+ − 14 # email: hvr-OCLC@van-rijn.org
+ − 15 #
+ − 16
+ − 17 BEGIN {
+ − 18 tmpfile = "/tmp/tobib." systime () ".tmp.bib" ;
+ − 19 oclc - version = "OLCL-to-bibtex v0.1" ;
+ − 20
+ − 21 # External interactive progs
+ − 22 # prog = "xless ";
+ − 23 prog = "emacsclient " ;
+ − 24 # prog = "open -a TextEdit ";
+ − 25 atEnd = "&" ;
+ − 26
+ − 27 # (Indirect) Output to stdout
+ − 28 # prog = "cat ";
+ − 29 # atEnd = "";
+ − 30
+ − 31
+ − 32 print "# Exported from the OLCL FirstSearch PsychINFO database using" olcl - version ;
+ − 33
+ − 34
+ − 35
+ − 36 }
+ − 37
+ − 38 /* ------------------------------------------------------------------------- */
+ − 39
+ − 40 ( match ( $ 1 , /[A-Za-z]+:/ ) || match ( $ 2 , /[A-Za-z]+:/ )) {
+ − 41
+ − 42 if ( inDescriptor == 1 ) {
+ − 43 keywords = keywords "}" ;
+ − 44 inDescriptor = 0 ;
+ − 45 }
+ − 46 if ( inAbstract == 1 ) {
+ − 47 abstract = abstract "}" ;
+ − 48 inAbstract = 0 ;
+ − 49 }
+ − 50 }
+ − 51
+ − 52 ( ! match ( $ 1 , /[A-Za-z()]+:/ ) && ! match ( $ 2 , /[A-Za-z()]+:/ )) {
+ − 53
+ − 54 if ( inDescriptor == 1 ) {
+ − 55 keywords = keywords ", " $ 0 ;
+ − 56 }
+ − 57
+ − 58 if ( inAbstract == 1 ) {
+ − 59 abstract = abstract " " $ 0 ;
+ − 60 }
+ − 61 }
+ − 62
+ − 63
+ − 64
+ − 65 $ 1 == "Author(s):" {
+ − 66 author = "\tauthor = {" ;
+ − 67 gsub ( /Affiliation:.*/ , "" )
+ − 68 firstauthor = 1 ;
+ − 69 for ( i = 2 ; i <= NF ; i ++ ) {
+ − 70 if ( $ i == ";" ) {
+ − 71 $ i = "and" ;
+ − 72 firstauthor = 0 ;
+ − 73 }
+ − 74 author = author $ i ;
+ − 75 if ( firstauthor ) {
+ − 76 mainauthor = mainauthor tolower ( $ i );
+ − 77 }
+ − 78 if ( match ( $ i , "," )) {
+ − 79 firstauthor = 0 ;
+ − 80 }
+ − 81
+ − 82 if ( i < NF ) {
+ − 83 author = author " " ;
+ − 84 }
+ − 85 }
+ − 86 author = author "}" ;
+ − 87 gsub ( "," , "" , mainauthor )
+ − 88
+ − 89 }
+ − 90
+ − 91 $ 1 == "Descriptor:" {
+ − 92 inDescriptor = 1 ;
+ − 93 gsub ( /Descriptor:[ \t]+/ , "" )
+ − 94 gsub ( /\(Major\):[ \t]+/ , "" )
+ − 95 keywords = "\tkeywords = {{" $ 0 ;
+ − 96 }
+ − 97
+ − 98 $ 1 == "Identifier:" {
+ − 99 descriptor = 0 ;
+ − 100 gsub ( /Identifier:[ \t]+/ , "" )
+ − 101 keywords = keywords "{" $ 0 "}}" ;
+ − 102 }
+ − 103
+ − 104 $ 1 == "Source:" {
+ − 105 if ( $ 2 == "In:" ) {
+ − 106 type = 1 ; # In Book
+ − 107
+ − 108 pages = "\tpages = {" $ NF "}" ;
+ − 109 gsub ( "-" , "--" , pages )
+ − 110
+ − 111 booktitle = "" ;
+ − 112 for ( i = NF - 2 ; $ i != "Ed;" ; i -- ) {
+ − 113 if ( booktitle == "" ) {
+ − 114 booktitle = $ i ;
+ − 115 } else {
+ − 116 booktitle = $ i " " booktitle ;
+ − 117 }
+ − 118 }
+ − 119 gsub ( ";" , "" , booktitle );
+ − 120 booktitle = "\tbooktitle = {" booktitle "}" ;
+ − 121 gsub ( "\\.}" , "}" , booktitle );
+ − 122
+ − 123
+ − 124 editors = "" ;
+ − 125 for (; i > 2 ; i -- ) {
+ − 126 if ( editors == "" ) {
+ − 127 editors = $ i ;
+ − 128 } else {
+ − 129 editors = $ i " " editors ;
+ − 130 }
+ − 131 }
+ − 132 gsub ( " Ed;" , "" , editors );
+ − 133 gsub ( "; " , " and " , editors );
+ − 134 gsub ( ";" , "" , editors );
+ − 135 editors = "\teditors = {" editors "}" ;
+ − 136 } else {
+ − 137 type = 2 ; # Journal
+ − 138
+ − 139 journal = "\tjournal = {" ;
+ − 140 for ( i = 2 ; $ i != "Vol" ; i ++ ) {
+ − 141 journal = journal $ i " " ;
+ − 142 }
+ − 143 journal = journal "}" ;
+ − 144 i ++ ;
+ − 145 vol = $ i ;
+ − 146 sub ( /\(.*\),/ , "" , vol )
+ − 147 volume = "\tvolume = {" vol "}"
+ − 148 sub ( /.*\(/ , "" , $ i )
+ − 149 sub ( /\),/ , "" , $ i )
+ − 150 number = "\tnumber = {" $ i "}"
+ − 151 i ++ ;
+ − 152 if ( $ i + 1 == 1 ) { # Skip the month if necessary
+ − 153 i ++ ;
+ − 154 }
+ − 155 sub ( "," , "" , $ i );
+ − 156 year = "\tyear = {" $ i "}" ;
+ − 157 sub ( "[0-9][0-9]" , "" , $ i );
+ − 158 mainyear = $ i ;
+ − 159
+ − 160 pages = "\tpages = {" $ NF "}" ;
+ − 161 gsub ( "-" , "--" , pages )
+ − 162 gsub ( "\\." , "" , pages )
+ − 163
+ − 164 }
+ − 165
+ − 166 }
+ − 167
+ − 168 $ 1 == "Title:" {
+ − 169 title = "\ttitle = {" ;
+ − 170 for ( i = 2 ; i <= NF ; i ++ ) {
+ − 171 if ( $ i == toupper ( $ i )) {
+ − 172 $ i = "{" $ i "}" ;
+ − 173 } else {
+ − 174 gsub ( /[A-Z]/ , "{&}" , $ i );
+ − 175 }
+ − 176 title = title ( $ i );
+ − 177 if ( i < NF ) {
+ − 178 title = title " " ;
+ − 179 }
+ − 180 }
+ − 181 title = title "}" ;
+ − 182 gsub ( "\\.}" , "}" , title );
+ − 183
+ − 184 }
+ − 185
+ − 186 $ 1 == "Abstract:" {
+ − 187 gsub ( /Abstract:[ \t]*/ , "" )
+ − 188 abstract = "\tabstract = {" $ 0 ;
+ − 189 inAbstract = 1 ;
+ − 190 }
+ − 191
+ − 192 ## Use the Accession No: for the year if the year has not been found yet.
+ − 193
+ − 194 $ 1 == "Accession" {
+ − 195 if ( mainyear == "" ) {
+ − 196 gsub ( /-.*/ , "" , $ 3 );
+ − 197 year = "\tyear = {" $ 3 " (had to use heuristics to determine the year!)}" ;
+ − 198
+ − 199 sub ( "[0-9][0-9]" , "" , $ 3 );
+ − 200 mainyear = $ 3 "?" ;
+ − 201 }
+ − 202
+ − 203 }
+ − 204
+ − 205 function printEntry () {
+ − 206 if ( mainauthor != "" ) {
+ − 207 if ( type == 1 ) { # In Book
+ − 208 typestring = "InBook" ;
+ − 209 } else {
+ − 210 typestring = "Article" ;
+ − 211 }
+ − 212 print ( "@" typestring "{" mainauthor ":" mainyear "x," ) >> tmpfile ;
+ − 213 print ( author "," ) >> tmpfile ;
+ − 214 print ( title "," ) >> tmpfile ;
+ − 215 print ( year "," ) >> tmpfile ;
+ − 216 if ( type == 1 ) { # In Book
+ − 217 print ( booktitle "," ) >> tmpfile ;
+ − 218 print ( editors "," ) >> tmpfile ;
+ − 219 print ( pages "," ) >> tmpfile ;
+ − 220 }
+ − 221 if ( type == 2 ) { # Article
+ − 222 print ( journal "," ) >> tmpfile ;
+ − 223 print ( volume "," ) >> tmpfile ;
+ − 224 print ( number "," ) >> tmpfile ;
+ − 225 print ( pages "," ) >> tmpfile ;
+ − 226 }
+ − 227 print ( abstract "," ) >> tmpfile ;
+ − 228 print ( keywords ) >> tmpfile ;
+ − 229 print ( "}" ) >> tmpfile ;
+ − 230 print ( "" ) >> tmpfile ;
+ − 231 print ( "" ) >> tmpfile ;
+ − 232 }
+ − 233 mainauthor = "" ;
+ − 234 mainyear = "" ;
+ − 235 }
+ − 236
+ − 237 NF == 0 {
+ − 238 printEntry ();
+ − 239 }
+ − 240
+ − 241 END {
+ − 242 printEntry ();
+ − 243 system ( prog " " tmpfile " " atEnd );
+ − 244 }