0
|
1 # |
|
2 # OCLC-to-bibtex.awk is an GAWK script to convert the export format of the |
|
3 # OCLC databases to BibTeX. It processes the input and tries to convert it into |
|
4 # BibTeX entries which are written to a file in /tmp. This file is then opened |
|
5 # using the program specified in "prog" (by default: emacsclient). |
|
6 # |
|
7 # NOTE: It does not do an extensive job of testing what kind of publications |
|
8 # are being processed. It has some rudimentary checks of discovering wether the |
|
9 # processed publications are either InBook's or Articles. |
|
10 # |
|
11 # Hedderik van Rijn, 020912-020914 |
|
12 # |
|
13 # Do whatever you want with this script, but if you improve it, please send me a copy! |
|
14 # email: hvr-OCLC@van-rijn.org |
|
15 # |
|
16 |
|
17 BEGIN { |
|
18 tmpfile = "/tmp/tobib." systime() ".tmp.bib"; |
|
19 oclc-version = "OLCL-to-bibtex v0.1"; |
|
20 |
|
21 # External interactive progs |
|
22 # prog = "xless "; |
|
23 prog = "emacsclient "; |
|
24 # prog = "open -a TextEdit "; |
|
25 atEnd = "&"; |
|
26 |
|
27 # (Indirect) Output to stdout |
|
28 # prog = "cat "; |
|
29 # atEnd = ""; |
|
30 |
|
31 |
|
32 print "# Exported from the OLCL FirstSearch PsychINFO database using" olcl-version; |
|
33 |
|
34 |
|
35 |
|
36 } |
|
37 |
|
38 /* ------------------------------------------------------------------------- */ |
|
39 |
|
40 (match($1,/[A-Za-z]+:/) || match($2,/[A-Za-z]+:/)) { |
|
41 |
|
42 if (inDescriptor == 1) { |
|
43 keywords = keywords "}"; |
|
44 inDescriptor = 0; |
|
45 } |
|
46 if (inAbstract == 1) { |
|
47 abstract = abstract "}"; |
|
48 inAbstract = 0; |
|
49 } |
|
50 } |
|
51 |
|
52 (!match($1,/[A-Za-z()]+:/) && !match($2,/[A-Za-z()]+:/)) { |
|
53 |
|
54 if (inDescriptor == 1) { |
|
55 keywords = keywords ", " $0; |
|
56 } |
|
57 |
|
58 if (inAbstract == 1) { |
|
59 abstract = abstract " " $0; |
|
60 } |
|
61 } |
|
62 |
|
63 |
|
64 |
|
65 $1 == "Author(s):" { |
|
66 author = "\tauthor = {"; |
|
67 gsub(/Affiliation:.*/,"") |
|
68 firstauthor = 1; |
|
69 for (i=2;i<=NF;i++) { |
|
70 if ($i == ";") { |
|
71 $i = "and"; |
|
72 firstauthor = 0; |
|
73 } |
|
74 author = author $i; |
|
75 if (firstauthor) { |
|
76 mainauthor = mainauthor tolower($i); |
|
77 } |
|
78 if (match($i,",")) { |
|
79 firstauthor = 0; |
|
80 } |
|
81 |
|
82 if (i<NF) { |
|
83 author = author " "; |
|
84 } |
|
85 } |
|
86 author = author "}"; |
|
87 gsub(",","",mainauthor) |
|
88 |
|
89 } |
|
90 |
|
91 $1 == "Descriptor:" { |
|
92 inDescriptor = 1; |
|
93 gsub(/Descriptor:[ \t]+/,"") |
|
94 gsub(/\(Major\):[ \t]+/,"") |
|
95 keywords = "\tkeywords = {{" $0; |
|
96 } |
|
97 |
|
98 $1 == "Identifier:" { |
|
99 descriptor = 0; |
|
100 gsub(/Identifier:[ \t]+/,"") |
|
101 keywords = keywords "{" $0 "}}"; |
|
102 } |
|
103 |
|
104 $1 == "Source:" { |
|
105 if ($2 == "In:") { |
|
106 type = 1; # In Book |
|
107 |
|
108 pages = "\tpages = {" $NF "}"; |
|
109 gsub("-","--",pages) |
|
110 |
|
111 booktitle = ""; |
|
112 for (i=NF-2;$i != "Ed;";i--) { |
|
113 if (booktitle == "") { |
|
114 booktitle = $i; |
|
115 } else { |
|
116 booktitle = $i " " booktitle; |
|
117 } |
|
118 } |
|
119 gsub(";","",booktitle); |
|
120 booktitle = "\tbooktitle = {" booktitle "}"; |
|
121 gsub("\\.}","}",booktitle); |
|
122 |
|
123 |
|
124 editors = ""; |
|
125 for (;i > 2;i--) { |
|
126 if (editors == "") { |
|
127 editors = $i; |
|
128 } else { |
|
129 editors = $i " " editors; |
|
130 } |
|
131 } |
|
132 gsub(" Ed;","",editors); |
|
133 gsub("; "," and ",editors); |
|
134 gsub(";","",editors); |
|
135 editors = "\teditors = {" editors "}"; |
|
136 } else { |
|
137 type = 2; # Journal |
|
138 |
|
139 journal = "\tjournal = {"; |
|
140 for (i=2;$i!="Vol";i++) { |
|
141 journal = journal $i " "; |
|
142 } |
|
143 journal = journal "}"; |
|
144 i++; |
|
145 vol = $i; |
|
146 sub(/\(.*\),/,"",vol) |
|
147 volume = "\tvolume = {" vol "}" |
|
148 sub(/.*\(/,"",$i) |
|
149 sub(/\),/,"",$i) |
|
150 number = "\tnumber = {" $i "}" |
|
151 i++; |
|
152 if ($i+1 == 1) { # Skip the month if necessary |
|
153 i++; |
|
154 } |
|
155 sub(",","",$i); |
|
156 year = "\tyear = {" $i "}"; |
|
157 sub("[0-9][0-9]","",$i); |
|
158 mainyear = $i; |
|
159 |
|
160 pages = "\tpages = {" $NF "}"; |
|
161 gsub("-","--",pages) |
|
162 gsub("\\.","",pages) |
|
163 |
|
164 } |
|
165 |
|
166 } |
|
167 |
|
168 $1 == "Title:" { |
|
169 title = "\ttitle = {"; |
|
170 for (i=2;i<=NF;i++) { |
|
171 if ($i == toupper($i)) { |
|
172 $i = "{" $i "}"; |
|
173 } else { |
|
174 gsub(/[A-Z]/,"{&}",$i); |
|
175 } |
|
176 title = title ($i); |
|
177 if (i<NF) { |
|
178 title = title " "; |
|
179 } |
|
180 } |
|
181 title = title "}"; |
|
182 gsub("\\.}","}",title); |
|
183 |
|
184 } |
|
185 |
|
186 $1 == "Abstract:" { |
|
187 gsub(/Abstract:[ \t]*/,"") |
|
188 abstract = "\tabstract = {" $0; |
|
189 inAbstract = 1; |
|
190 } |
|
191 |
|
192 ## Use the Accession No: for the year if the year has not been found yet. |
|
193 |
|
194 $1 == "Accession" { |
|
195 if (mainyear == "") { |
|
196 gsub(/-.*/,"",$3); |
|
197 year = "\tyear = {" $3 " (had to use heuristics to determine the year!)}"; |
|
198 |
|
199 sub("[0-9][0-9]","",$3); |
|
200 mainyear = $3 "?"; |
|
201 } |
|
202 |
|
203 } |
|
204 |
|
205 function printEntry() { |
|
206 if (mainauthor != "") { |
|
207 if (type == 1) { # In Book |
|
208 typestring = "InBook"; |
|
209 } else { |
|
210 typestring = "Article"; |
|
211 } |
|
212 print("@" typestring "{" mainauthor ":" mainyear "x,") >> tmpfile; |
|
213 print(author ",") >> tmpfile; |
|
214 print(title ",") >> tmpfile; |
|
215 print(year ",") >> tmpfile; |
|
216 if (type == 1) { # In Book |
|
217 print(booktitle ",") >> tmpfile; |
|
218 print(editors ",") >> tmpfile; |
|
219 print(pages ",") >> tmpfile; |
|
220 } |
|
221 if (type == 2) { # Article |
|
222 print(journal ",") >> tmpfile; |
|
223 print(volume ",") >> tmpfile; |
|
224 print(number ",") >> tmpfile; |
|
225 print(pages ",") >> tmpfile; |
|
226 } |
|
227 print(abstract ",") >> tmpfile; |
|
228 print(keywords) >> tmpfile; |
|
229 print("}") >> tmpfile; |
|
230 print("") >> tmpfile; |
|
231 print("") >> tmpfile; |
|
232 } |
|
233 mainauthor = ""; |
|
234 mainyear = ""; |
|
235 } |
|
236 |
|
237 NF == 0 { |
|
238 printEntry(); |
|
239 } |
|
240 |
|
241 END { |
|
242 printEntry(); |
|
243 system(prog " " tmpfile " " atEnd); |
|
244 } |