comparison unixSoft/bin/OCLC-to-bibtex.awk @ 0:c30d68fbd368

Initial import from svn.
author Augie Fackler <durin42@gmail.com>
date Wed, 26 Nov 2008 10:56:09 -0600
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c30d68fbd368
1 #
2 # OCLC-to-bibtex.awk is an GAWK script to convert the export format of the
3 # OCLC databases to BibTeX. It processes the input and tries to convert it into
4 # BibTeX entries which are written to a file in /tmp. This file is then opened
5 # using the program specified in "prog" (by default: emacsclient).
6 #
7 # NOTE: It does not do an extensive job of testing what kind of publications
8 # are being processed. It has some rudimentary checks of discovering wether the
9 # processed publications are either InBook's or Articles.
10 #
11 # Hedderik van Rijn, 020912-020914
12 #
13 # Do whatever you want with this script, but if you improve it, please send me a copy!
14 # email: hvr-OCLC@van-rijn.org
15 #
16
17 BEGIN {
18 tmpfile = "/tmp/tobib." systime() ".tmp.bib";
19 oclc-version = "OLCL-to-bibtex v0.1";
20
21 # External interactive progs
22 # prog = "xless ";
23 prog = "emacsclient ";
24 # prog = "open -a TextEdit ";
25 atEnd = "&";
26
27 # (Indirect) Output to stdout
28 # prog = "cat ";
29 # atEnd = "";
30
31
32 print "# Exported from the OLCL FirstSearch PsychINFO database using" olcl-version;
33
34
35
36 }
37
38 /* ------------------------------------------------------------------------- */
39
40 (match($1,/[A-Za-z]+:/) || match($2,/[A-Za-z]+:/)) {
41
42 if (inDescriptor == 1) {
43 keywords = keywords "}";
44 inDescriptor = 0;
45 }
46 if (inAbstract == 1) {
47 abstract = abstract "}";
48 inAbstract = 0;
49 }
50 }
51
52 (!match($1,/[A-Za-z()]+:/) && !match($2,/[A-Za-z()]+:/)) {
53
54 if (inDescriptor == 1) {
55 keywords = keywords ", " $0;
56 }
57
58 if (inAbstract == 1) {
59 abstract = abstract " " $0;
60 }
61 }
62
63
64
65 $1 == "Author(s):" {
66 author = "\tauthor = {";
67 gsub(/Affiliation:.*/,"")
68 firstauthor = 1;
69 for (i=2;i<=NF;i++) {
70 if ($i == ";") {
71 $i = "and";
72 firstauthor = 0;
73 }
74 author = author $i;
75 if (firstauthor) {
76 mainauthor = mainauthor tolower($i);
77 }
78 if (match($i,",")) {
79 firstauthor = 0;
80 }
81
82 if (i<NF) {
83 author = author " ";
84 }
85 }
86 author = author "}";
87 gsub(",","",mainauthor)
88
89 }
90
91 $1 == "Descriptor:" {
92 inDescriptor = 1;
93 gsub(/Descriptor:[ \t]+/,"")
94 gsub(/\(Major\):[ \t]+/,"")
95 keywords = "\tkeywords = {{" $0;
96 }
97
98 $1 == "Identifier:" {
99 descriptor = 0;
100 gsub(/Identifier:[ \t]+/,"")
101 keywords = keywords "{" $0 "}}";
102 }
103
104 $1 == "Source:" {
105 if ($2 == "In:") {
106 type = 1; # In Book
107
108 pages = "\tpages = {" $NF "}";
109 gsub("-","--",pages)
110
111 booktitle = "";
112 for (i=NF-2;$i != "Ed;";i--) {
113 if (booktitle == "") {
114 booktitle = $i;
115 } else {
116 booktitle = $i " " booktitle;
117 }
118 }
119 gsub(";","",booktitle);
120 booktitle = "\tbooktitle = {" booktitle "}";
121 gsub("\\.}","}",booktitle);
122
123
124 editors = "";
125 for (;i > 2;i--) {
126 if (editors == "") {
127 editors = $i;
128 } else {
129 editors = $i " " editors;
130 }
131 }
132 gsub(" Ed;","",editors);
133 gsub("; "," and ",editors);
134 gsub(";","",editors);
135 editors = "\teditors = {" editors "}";
136 } else {
137 type = 2; # Journal
138
139 journal = "\tjournal = {";
140 for (i=2;$i!="Vol";i++) {
141 journal = journal $i " ";
142 }
143 journal = journal "}";
144 i++;
145 vol = $i;
146 sub(/\(.*\),/,"",vol)
147 volume = "\tvolume = {" vol "}"
148 sub(/.*\(/,"",$i)
149 sub(/\),/,"",$i)
150 number = "\tnumber = {" $i "}"
151 i++;
152 if ($i+1 == 1) { # Skip the month if necessary
153 i++;
154 }
155 sub(",","",$i);
156 year = "\tyear = {" $i "}";
157 sub("[0-9][0-9]","",$i);
158 mainyear = $i;
159
160 pages = "\tpages = {" $NF "}";
161 gsub("-","--",pages)
162 gsub("\\.","",pages)
163
164 }
165
166 }
167
168 $1 == "Title:" {
169 title = "\ttitle = {";
170 for (i=2;i<=NF;i++) {
171 if ($i == toupper($i)) {
172 $i = "{" $i "}";
173 } else {
174 gsub(/[A-Z]/,"{&}",$i);
175 }
176 title = title ($i);
177 if (i<NF) {
178 title = title " ";
179 }
180 }
181 title = title "}";
182 gsub("\\.}","}",title);
183
184 }
185
186 $1 == "Abstract:" {
187 gsub(/Abstract:[ \t]*/,"")
188 abstract = "\tabstract = {" $0;
189 inAbstract = 1;
190 }
191
192 ## Use the Accession No: for the year if the year has not been found yet.
193
194 $1 == "Accession" {
195 if (mainyear == "") {
196 gsub(/-.*/,"",$3);
197 year = "\tyear = {" $3 " (had to use heuristics to determine the year!)}";
198
199 sub("[0-9][0-9]","",$3);
200 mainyear = $3 "?";
201 }
202
203 }
204
205 function printEntry() {
206 if (mainauthor != "") {
207 if (type == 1) { # In Book
208 typestring = "InBook";
209 } else {
210 typestring = "Article";
211 }
212 print("@" typestring "{" mainauthor ":" mainyear "x,") >> tmpfile;
213 print(author ",") >> tmpfile;
214 print(title ",") >> tmpfile;
215 print(year ",") >> tmpfile;
216 if (type == 1) { # In Book
217 print(booktitle ",") >> tmpfile;
218 print(editors ",") >> tmpfile;
219 print(pages ",") >> tmpfile;
220 }
221 if (type == 2) { # Article
222 print(journal ",") >> tmpfile;
223 print(volume ",") >> tmpfile;
224 print(number ",") >> tmpfile;
225 print(pages ",") >> tmpfile;
226 }
227 print(abstract ",") >> tmpfile;
228 print(keywords) >> tmpfile;
229 print("}") >> tmpfile;
230 print("") >> tmpfile;
231 print("") >> tmpfile;
232 }
233 mainauthor = "";
234 mainyear = "";
235 }
236
237 NF == 0 {
238 printEntry();
239 }
240
241 END {
242 printEntry();
243 system(prog " " tmpfile " " atEnd);
244 }