Mercurial > dotfiles
comparison unixSoft/bin/OCLC-to-bibtex.awk @ 0:c30d68fbd368
Initial import from svn.
author | Augie Fackler <durin42@gmail.com> |
---|---|
date | Wed, 26 Nov 2008 10:56:09 -0600 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c30d68fbd368 |
---|---|
1 # | |
2 # OCLC-to-bibtex.awk is an GAWK script to convert the export format of the | |
3 # OCLC databases to BibTeX. It processes the input and tries to convert it into | |
4 # BibTeX entries which are written to a file in /tmp. This file is then opened | |
5 # using the program specified in "prog" (by default: emacsclient). | |
6 # | |
7 # NOTE: It does not do an extensive job of testing what kind of publications | |
8 # are being processed. It has some rudimentary checks of discovering wether the | |
9 # processed publications are either InBook's or Articles. | |
10 # | |
11 # Hedderik van Rijn, 020912-020914 | |
12 # | |
13 # Do whatever you want with this script, but if you improve it, please send me a copy! | |
14 # email: hvr-OCLC@van-rijn.org | |
15 # | |
16 | |
17 BEGIN { | |
18 tmpfile = "/tmp/tobib." systime() ".tmp.bib"; | |
19 oclc-version = "OLCL-to-bibtex v0.1"; | |
20 | |
21 # External interactive progs | |
22 # prog = "xless "; | |
23 prog = "emacsclient "; | |
24 # prog = "open -a TextEdit "; | |
25 atEnd = "&"; | |
26 | |
27 # (Indirect) Output to stdout | |
28 # prog = "cat "; | |
29 # atEnd = ""; | |
30 | |
31 | |
32 print "# Exported from the OLCL FirstSearch PsychINFO database using" olcl-version; | |
33 | |
34 | |
35 | |
36 } | |
37 | |
38 /* ------------------------------------------------------------------------- */ | |
39 | |
40 (match($1,/[A-Za-z]+:/) || match($2,/[A-Za-z]+:/)) { | |
41 | |
42 if (inDescriptor == 1) { | |
43 keywords = keywords "}"; | |
44 inDescriptor = 0; | |
45 } | |
46 if (inAbstract == 1) { | |
47 abstract = abstract "}"; | |
48 inAbstract = 0; | |
49 } | |
50 } | |
51 | |
52 (!match($1,/[A-Za-z()]+:/) && !match($2,/[A-Za-z()]+:/)) { | |
53 | |
54 if (inDescriptor == 1) { | |
55 keywords = keywords ", " $0; | |
56 } | |
57 | |
58 if (inAbstract == 1) { | |
59 abstract = abstract " " $0; | |
60 } | |
61 } | |
62 | |
63 | |
64 | |
65 $1 == "Author(s):" { | |
66 author = "\tauthor = {"; | |
67 gsub(/Affiliation:.*/,"") | |
68 firstauthor = 1; | |
69 for (i=2;i<=NF;i++) { | |
70 if ($i == ";") { | |
71 $i = "and"; | |
72 firstauthor = 0; | |
73 } | |
74 author = author $i; | |
75 if (firstauthor) { | |
76 mainauthor = mainauthor tolower($i); | |
77 } | |
78 if (match($i,",")) { | |
79 firstauthor = 0; | |
80 } | |
81 | |
82 if (i<NF) { | |
83 author = author " "; | |
84 } | |
85 } | |
86 author = author "}"; | |
87 gsub(",","",mainauthor) | |
88 | |
89 } | |
90 | |
91 $1 == "Descriptor:" { | |
92 inDescriptor = 1; | |
93 gsub(/Descriptor:[ \t]+/,"") | |
94 gsub(/\(Major\):[ \t]+/,"") | |
95 keywords = "\tkeywords = {{" $0; | |
96 } | |
97 | |
98 $1 == "Identifier:" { | |
99 descriptor = 0; | |
100 gsub(/Identifier:[ \t]+/,"") | |
101 keywords = keywords "{" $0 "}}"; | |
102 } | |
103 | |
104 $1 == "Source:" { | |
105 if ($2 == "In:") { | |
106 type = 1; # In Book | |
107 | |
108 pages = "\tpages = {" $NF "}"; | |
109 gsub("-","--",pages) | |
110 | |
111 booktitle = ""; | |
112 for (i=NF-2;$i != "Ed;";i--) { | |
113 if (booktitle == "") { | |
114 booktitle = $i; | |
115 } else { | |
116 booktitle = $i " " booktitle; | |
117 } | |
118 } | |
119 gsub(";","",booktitle); | |
120 booktitle = "\tbooktitle = {" booktitle "}"; | |
121 gsub("\\.}","}",booktitle); | |
122 | |
123 | |
124 editors = ""; | |
125 for (;i > 2;i--) { | |
126 if (editors == "") { | |
127 editors = $i; | |
128 } else { | |
129 editors = $i " " editors; | |
130 } | |
131 } | |
132 gsub(" Ed;","",editors); | |
133 gsub("; "," and ",editors); | |
134 gsub(";","",editors); | |
135 editors = "\teditors = {" editors "}"; | |
136 } else { | |
137 type = 2; # Journal | |
138 | |
139 journal = "\tjournal = {"; | |
140 for (i=2;$i!="Vol";i++) { | |
141 journal = journal $i " "; | |
142 } | |
143 journal = journal "}"; | |
144 i++; | |
145 vol = $i; | |
146 sub(/\(.*\),/,"",vol) | |
147 volume = "\tvolume = {" vol "}" | |
148 sub(/.*\(/,"",$i) | |
149 sub(/\),/,"",$i) | |
150 number = "\tnumber = {" $i "}" | |
151 i++; | |
152 if ($i+1 == 1) { # Skip the month if necessary | |
153 i++; | |
154 } | |
155 sub(",","",$i); | |
156 year = "\tyear = {" $i "}"; | |
157 sub("[0-9][0-9]","",$i); | |
158 mainyear = $i; | |
159 | |
160 pages = "\tpages = {" $NF "}"; | |
161 gsub("-","--",pages) | |
162 gsub("\\.","",pages) | |
163 | |
164 } | |
165 | |
166 } | |
167 | |
168 $1 == "Title:" { | |
169 title = "\ttitle = {"; | |
170 for (i=2;i<=NF;i++) { | |
171 if ($i == toupper($i)) { | |
172 $i = "{" $i "}"; | |
173 } else { | |
174 gsub(/[A-Z]/,"{&}",$i); | |
175 } | |
176 title = title ($i); | |
177 if (i<NF) { | |
178 title = title " "; | |
179 } | |
180 } | |
181 title = title "}"; | |
182 gsub("\\.}","}",title); | |
183 | |
184 } | |
185 | |
186 $1 == "Abstract:" { | |
187 gsub(/Abstract:[ \t]*/,"") | |
188 abstract = "\tabstract = {" $0; | |
189 inAbstract = 1; | |
190 } | |
191 | |
192 ## Use the Accession No: for the year if the year has not been found yet. | |
193 | |
194 $1 == "Accession" { | |
195 if (mainyear == "") { | |
196 gsub(/-.*/,"",$3); | |
197 year = "\tyear = {" $3 " (had to use heuristics to determine the year!)}"; | |
198 | |
199 sub("[0-9][0-9]","",$3); | |
200 mainyear = $3 "?"; | |
201 } | |
202 | |
203 } | |
204 | |
205 function printEntry() { | |
206 if (mainauthor != "") { | |
207 if (type == 1) { # In Book | |
208 typestring = "InBook"; | |
209 } else { | |
210 typestring = "Article"; | |
211 } | |
212 print("@" typestring "{" mainauthor ":" mainyear "x,") >> tmpfile; | |
213 print(author ",") >> tmpfile; | |
214 print(title ",") >> tmpfile; | |
215 print(year ",") >> tmpfile; | |
216 if (type == 1) { # In Book | |
217 print(booktitle ",") >> tmpfile; | |
218 print(editors ",") >> tmpfile; | |
219 print(pages ",") >> tmpfile; | |
220 } | |
221 if (type == 2) { # Article | |
222 print(journal ",") >> tmpfile; | |
223 print(volume ",") >> tmpfile; | |
224 print(number ",") >> tmpfile; | |
225 print(pages ",") >> tmpfile; | |
226 } | |
227 print(abstract ",") >> tmpfile; | |
228 print(keywords) >> tmpfile; | |
229 print("}") >> tmpfile; | |
230 print("") >> tmpfile; | |
231 print("") >> tmpfile; | |
232 } | |
233 mainauthor = ""; | |
234 mainyear = ""; | |
235 } | |
236 | |
237 NF == 0 { | |
238 printEntry(); | |
239 } | |
240 | |
241 END { | |
242 printEntry(); | |
243 system(prog " " tmpfile " " atEnd); | |
244 } |