kpeter@905
|
1 |
#! /usr/bin/env python
|
kpeter@790
|
2 |
"""
|
kpeter@790
|
3 |
BibTeX to Doxygen converter
|
kpeter@790
|
4 |
Usage: python bib2dox.py bibfile.bib > bibfile.dox
|
kpeter@790
|
5 |
|
kpeter@905
|
6 |
This file is a part of LEMON, a generic C++ optimization library.
|
kpeter@905
|
7 |
|
kpeter@905
|
8 |
**********************************************************************
|
kpeter@905
|
9 |
|
kpeter@790
|
10 |
This code is the modification of the BibTeX to XML converter
|
kpeter@905
|
11 |
by Vidar Bronken Gundersen et al.
|
kpeter@905
|
12 |
See the original copyright notices below.
|
kpeter@790
|
13 |
|
kpeter@790
|
14 |
**********************************************************************
|
kpeter@790
|
15 |
|
kpeter@790
|
16 |
Decoder for bibliographic data, BibTeX
|
kpeter@790
|
17 |
Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
|
kpeter@790
|
18 |
|
kpeter@790
|
19 |
v.8
|
kpeter@790
|
20 |
(c)2002-06-23 Vidar Bronken Gundersen
|
kpeter@790
|
21 |
http://bibtexml.sf.net/
|
kpeter@790
|
22 |
Reuse approved as long as this notification is kept.
|
kpeter@790
|
23 |
Licence: GPL.
|
kpeter@790
|
24 |
|
kpeter@790
|
25 |
Contributions/thanks to:
|
kpeter@790
|
26 |
Egon Willighagen, http://sf.net/projects/jreferences/
|
kpeter@790
|
27 |
Richard Mahoney (for providing a test case)
|
kpeter@790
|
28 |
|
kpeter@790
|
29 |
Editted by Sara Sprenkle to be more robust and handle more bibtex features.
|
kpeter@790
|
30 |
(c) 2003-01-15
|
kpeter@790
|
31 |
|
kpeter@790
|
32 |
1. Changed bibtex: tags to bibxml: tags.
|
kpeter@790
|
33 |
2. Use xmlns:bibxml="http://bibtexml.sf.net/"
|
kpeter@790
|
34 |
3. Allow spaces between @type and first {
|
kpeter@790
|
35 |
4. "author" fields with multiple authors split by " and "
|
kpeter@790
|
36 |
are put in separate xml "bibxml:author" tags.
|
kpeter@790
|
37 |
5. Option for Titles: words are capitalized
|
kpeter@790
|
38 |
only if first letter in title or capitalized inside braces
|
kpeter@790
|
39 |
6. Removes braces from within field values
|
kpeter@790
|
40 |
7. Ignores comments in bibtex file (including @comment{ or % )
|
kpeter@790
|
41 |
8. Replaces some special latex tags, e.g., replaces ~ with ' '
|
kpeter@790
|
42 |
9. Handles bibtex @string abbreviations
|
kpeter@790
|
43 |
--> includes bibtex's default abbreviations for months
|
kpeter@790
|
44 |
--> does concatenation of abbr # " more " and " more " # abbr
|
kpeter@790
|
45 |
10. Handles @type( ... ) or @type{ ... }
|
kpeter@790
|
46 |
11. The keywords field is split on , or ; and put into separate xml
|
kpeter@790
|
47 |
"bibxml:keywords" tags
|
kpeter@790
|
48 |
12. Ignores @preamble
|
kpeter@790
|
49 |
|
kpeter@790
|
50 |
Known Limitations
|
kpeter@790
|
51 |
1. Does not transform Latex encoding like math mode and special
|
kpeter@790
|
52 |
latex symbols.
|
kpeter@790
|
53 |
2. Does not parse author fields into first and last names.
|
kpeter@790
|
54 |
E.g., It does not do anything special to an author whose name is
|
kpeter@790
|
55 |
in the form LAST_NAME, FIRST_NAME
|
kpeter@790
|
56 |
In "author" tag, will show up as
|
kpeter@790
|
57 |
<bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
|
kpeter@790
|
58 |
3. Does not handle "crossref" fields other than to print
|
kpeter@790
|
59 |
<bibxml:crossref>...</bibxml:crossref>
|
kpeter@790
|
60 |
4. Does not inform user of the input's format errors. You just won't
|
kpeter@790
|
61 |
be able to transform the file later with XSL
|
kpeter@790
|
62 |
|
kpeter@790
|
63 |
You will have to manually edit the XML output if you need to handle
|
kpeter@790
|
64 |
these (and unknown) limitations.
|
kpeter@790
|
65 |
|
kpeter@790
|
66 |
"""
|
kpeter@790
|
67 |
|
kpeter@790
|
68 |
import string, re
|
kpeter@790
|
69 |
|
kpeter@790
|
70 |
# set of valid name characters
|
kpeter@790
|
71 |
valid_name_chars = '[\w\-:]'
|
kpeter@790
|
72 |
|
kpeter@790
|
73 |
#
|
kpeter@790
|
74 |
# define global regular expression variables
|
kpeter@790
|
75 |
#
|
kpeter@790
|
76 |
author_rex = re.compile('\s+and\s+')
|
kpeter@790
|
77 |
rembraces_rex = re.compile('[{}]')
|
kpeter@801
|
78 |
capitalize_rex = re.compile('({[^}]*})')
|
kpeter@790
|
79 |
|
kpeter@790
|
80 |
# used by bibtexkeywords(data)
|
kpeter@790
|
81 |
keywords_rex = re.compile('[,;]')
|
kpeter@790
|
82 |
|
kpeter@790
|
83 |
# used by concat_line(line)
|
kpeter@790
|
84 |
concatsplit_rex = re.compile('\s*#\s*')
|
kpeter@790
|
85 |
|
kpeter@790
|
86 |
# split on {, }, or " in verify_out_of_braces
|
kpeter@790
|
87 |
delimiter_rex = re.compile('([{}"])',re.I)
|
kpeter@790
|
88 |
|
kpeter@790
|
89 |
field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
|
kpeter@790
|
90 |
data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
|
kpeter@790
|
91 |
|
kpeter@790
|
92 |
url_rex = re.compile('\\\url\{([^}]*)\}')
|
kpeter@790
|
93 |
|
kpeter@792
|
94 |
#
|
kpeter@792
|
95 |
# styles for html formatting
|
kpeter@792
|
96 |
#
|
kpeter@792
|
97 |
divstyle = 'margin-top: -4ex; margin-left: 8em;'
|
kpeter@790
|
98 |
|
kpeter@790
|
99 |
#
|
kpeter@790
|
100 |
# return the string parameter without braces
|
kpeter@790
|
101 |
#
|
kpeter@790
|
102 |
def transformurls(str):
|
kpeter@790
|
103 |
return url_rex.sub(r'<a href="\1">\1</a>', str)
|
kpeter@790
|
104 |
|
kpeter@790
|
105 |
#
|
kpeter@790
|
106 |
# return the string parameter without braces
|
kpeter@790
|
107 |
#
|
kpeter@790
|
108 |
def removebraces(str):
|
kpeter@790
|
109 |
return rembraces_rex.sub('', str)
|
kpeter@790
|
110 |
|
kpeter@790
|
111 |
#
|
kpeter@790
|
112 |
# latex-specific replacements
|
kpeter@790
|
113 |
# (do this after braces were removed)
|
kpeter@790
|
114 |
#
|
kpeter@790
|
115 |
def latexreplacements(line):
|
kpeter@790
|
116 |
line = string.replace(line, '~', ' ')
|
kpeter@790
|
117 |
line = string.replace(line, '\\\'a', 'á')
|
kpeter@790
|
118 |
line = string.replace(line, '\\"a', 'ä')
|
kpeter@790
|
119 |
line = string.replace(line, '\\\'e', 'é')
|
kpeter@790
|
120 |
line = string.replace(line, '\\"e', 'ë')
|
kpeter@790
|
121 |
line = string.replace(line, '\\\'i', 'í')
|
kpeter@790
|
122 |
line = string.replace(line, '\\"i', 'ï')
|
kpeter@790
|
123 |
line = string.replace(line, '\\\'o', 'ó')
|
kpeter@790
|
124 |
line = string.replace(line, '\\"o', 'ö')
|
kpeter@790
|
125 |
line = string.replace(line, '\\\'u', 'ú')
|
kpeter@790
|
126 |
line = string.replace(line, '\\"u', 'ü')
|
kpeter@790
|
127 |
line = string.replace(line, '\\H o', 'õ')
|
kpeter@790
|
128 |
line = string.replace(line, '\\H u', 'ü') # ũ does not exist
|
kpeter@790
|
129 |
line = string.replace(line, '\\\'A', 'Á')
|
kpeter@790
|
130 |
line = string.replace(line, '\\"A', 'Ä')
|
kpeter@790
|
131 |
line = string.replace(line, '\\\'E', 'É')
|
kpeter@790
|
132 |
line = string.replace(line, '\\"E', 'Ë')
|
kpeter@790
|
133 |
line = string.replace(line, '\\\'I', 'Í')
|
kpeter@790
|
134 |
line = string.replace(line, '\\"I', 'Ï')
|
kpeter@790
|
135 |
line = string.replace(line, '\\\'O', 'Ó')
|
kpeter@790
|
136 |
line = string.replace(line, '\\"O', 'Ö')
|
kpeter@790
|
137 |
line = string.replace(line, '\\\'U', 'Ú')
|
kpeter@790
|
138 |
line = string.replace(line, '\\"U', 'Ü')
|
kpeter@790
|
139 |
line = string.replace(line, '\\H O', 'Õ')
|
kpeter@790
|
140 |
line = string.replace(line, '\\H U', 'Ü') # Ũ does not exist
|
kpeter@790
|
141 |
|
kpeter@790
|
142 |
return line
|
kpeter@790
|
143 |
|
kpeter@790
|
144 |
#
|
kpeter@790
|
145 |
# copy characters form a string decoding html expressions (&xyz;)
|
kpeter@790
|
146 |
#
|
kpeter@790
|
147 |
def copychars(str, ifrom, count):
|
kpeter@790
|
148 |
result = ''
|
kpeter@790
|
149 |
i = ifrom
|
kpeter@790
|
150 |
c = 0
|
kpeter@790
|
151 |
html_spec = False
|
kpeter@790
|
152 |
while (i < len(str)) and (c < count):
|
kpeter@790
|
153 |
if str[i] == '&':
|
kpeter@790
|
154 |
html_spec = True;
|
kpeter@790
|
155 |
if i+1 < len(str):
|
kpeter@790
|
156 |
result += str[i+1]
|
kpeter@790
|
157 |
c += 1
|
kpeter@790
|
158 |
i += 2
|
kpeter@790
|
159 |
else:
|
kpeter@790
|
160 |
if not html_spec:
|
kpeter@790
|
161 |
if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
|
kpeter@790
|
162 |
((str[i] >= 'a') and (str[i] <= 'z')):
|
kpeter@790
|
163 |
result += str[i]
|
kpeter@790
|
164 |
c += 1
|
kpeter@790
|
165 |
elif str[i] == ';':
|
kpeter@790
|
166 |
html_spec = False;
|
kpeter@790
|
167 |
i += 1
|
kpeter@790
|
168 |
|
kpeter@790
|
169 |
return result
|
kpeter@790
|
170 |
|
kpeter@790
|
171 |
|
kpeter@790
|
172 |
#
|
kpeter@790
|
173 |
# Handle a list of authors (separated by 'and').
|
kpeter@790
|
174 |
# It gives back an array of the follwing values:
|
kpeter@790
|
175 |
# - num: the number of authors,
|
kpeter@790
|
176 |
# - list: the list of the author names,
|
kpeter@790
|
177 |
# - text: the bibtex text (separated by commas and/or 'and')
|
kpeter@790
|
178 |
# - abbrev: abbreviation that can be used for indicate the
|
kpeter@790
|
179 |
# bibliography entries
|
kpeter@790
|
180 |
#
|
kpeter@790
|
181 |
def bibtexauthor(data):
|
kpeter@790
|
182 |
result = {}
|
kpeter@790
|
183 |
bibtex = ''
|
kpeter@790
|
184 |
result['list'] = author_rex.split(data)
|
kpeter@790
|
185 |
result['num'] = len(result['list'])
|
kpeter@790
|
186 |
for i, author in enumerate(result['list']):
|
kpeter@790
|
187 |
# general transformations
|
kpeter@790
|
188 |
author = latexreplacements(removebraces(author.strip()))
|
kpeter@790
|
189 |
# transform "Xyz, A. B." to "A. B. Xyz"
|
kpeter@790
|
190 |
pos = author.find(',')
|
kpeter@790
|
191 |
if pos != -1:
|
kpeter@790
|
192 |
author = author[pos+1:].strip() + ' ' + author[:pos].strip()
|
kpeter@790
|
193 |
result['list'][i] = author
|
kpeter@790
|
194 |
bibtex += author + '#'
|
kpeter@790
|
195 |
bibtex = bibtex[:-1]
|
kpeter@790
|
196 |
if result['num'] > 1:
|
kpeter@790
|
197 |
ix = bibtex.rfind('#')
|
kpeter@790
|
198 |
if result['num'] == 2:
|
kpeter@790
|
199 |
bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
|
kpeter@790
|
200 |
else:
|
kpeter@790
|
201 |
bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
|
kpeter@790
|
202 |
bibtex = bibtex.replace('#', ', ')
|
kpeter@790
|
203 |
result['text'] = bibtex
|
kpeter@790
|
204 |
|
kpeter@790
|
205 |
result['abbrev'] = ''
|
kpeter@790
|
206 |
for author in result['list']:
|
kpeter@790
|
207 |
pos = author.rfind(' ') + 1
|
kpeter@790
|
208 |
count = 1
|
kpeter@790
|
209 |
if result['num'] == 1:
|
kpeter@790
|
210 |
count = 3
|
kpeter@790
|
211 |
result['abbrev'] += copychars(author, pos, count)
|
kpeter@790
|
212 |
|
kpeter@790
|
213 |
return result
|
kpeter@790
|
214 |
|
kpeter@790
|
215 |
|
kpeter@790
|
216 |
#
|
kpeter@790
|
217 |
# data = title string
|
kpeter@790
|
218 |
# @return the capitalized title (first letter is capitalized), rest are capitalized
|
kpeter@790
|
219 |
# only if capitalized inside braces
|
kpeter@790
|
220 |
#
|
kpeter@790
|
221 |
def capitalizetitle(data):
|
kpeter@790
|
222 |
title_list = capitalize_rex.split(data)
|
kpeter@790
|
223 |
title = ''
|
kpeter@790
|
224 |
count = 0
|
kpeter@790
|
225 |
for phrase in title_list:
|
kpeter@790
|
226 |
check = string.lstrip(phrase)
|
kpeter@790
|
227 |
|
kpeter@790
|
228 |
# keep phrase's capitalization the same
|
kpeter@790
|
229 |
if check.find('{') == 0:
|
kpeter@790
|
230 |
title += removebraces(phrase)
|
kpeter@790
|
231 |
else:
|
kpeter@790
|
232 |
# first word --> capitalize first letter (after spaces)
|
kpeter@790
|
233 |
if count == 0:
|
kpeter@790
|
234 |
title += check.capitalize()
|
kpeter@790
|
235 |
else:
|
kpeter@790
|
236 |
title += phrase.lower()
|
kpeter@790
|
237 |
count = count + 1
|
kpeter@790
|
238 |
|
kpeter@790
|
239 |
return title
|
kpeter@790
|
240 |
|
kpeter@790
|
241 |
|
kpeter@790
|
242 |
#
|
kpeter@790
|
243 |
# @return the bibtex for the title
|
kpeter@790
|
244 |
# @param data --> title string
|
kpeter@790
|
245 |
# braces are removed from title
|
kpeter@790
|
246 |
#
|
kpeter@790
|
247 |
def bibtextitle(data, entrytype):
|
kpeter@790
|
248 |
if entrytype in ('book', 'inbook'):
|
kpeter@790
|
249 |
title = removebraces(data.strip())
|
kpeter@790
|
250 |
else:
|
kpeter@790
|
251 |
title = removebraces(capitalizetitle(data.strip()))
|
kpeter@790
|
252 |
bibtex = title
|
kpeter@790
|
253 |
return bibtex
|
kpeter@790
|
254 |
|
kpeter@790
|
255 |
|
kpeter@790
|
256 |
#
|
kpeter@790
|
257 |
# function to compare entry lists
|
kpeter@790
|
258 |
#
|
kpeter@790
|
259 |
def entry_cmp(x, y):
|
kpeter@790
|
260 |
return cmp(x[0], y[0])
|
kpeter@790
|
261 |
|
kpeter@790
|
262 |
|
kpeter@790
|
263 |
#
|
kpeter@790
|
264 |
# print the XML for the transformed "filecont_source"
|
kpeter@790
|
265 |
#
|
kpeter@790
|
266 |
def bibtexdecoder(filecont_source):
|
kpeter@790
|
267 |
filecont = []
|
kpeter@790
|
268 |
file = []
|
kpeter@790
|
269 |
|
kpeter@790
|
270 |
# want @<alphanumeric chars><spaces>{<spaces><any chars>,
|
kpeter@790
|
271 |
pubtype_rex = re.compile('@(\w*)\s*{\s*(.*),')
|
kpeter@790
|
272 |
endtype_rex = re.compile('}\s*$')
|
kpeter@790
|
273 |
endtag_rex = re.compile('^\s*}\s*$')
|
kpeter@790
|
274 |
|
kpeter@790
|
275 |
bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
|
kpeter@790
|
276 |
bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')
|
kpeter@790
|
277 |
|
kpeter@790
|
278 |
quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
|
kpeter@790
|
279 |
quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')
|
kpeter@790
|
280 |
|
kpeter@790
|
281 |
for line in filecont_source:
|
kpeter@790
|
282 |
line = line[:-1]
|
kpeter@790
|
283 |
|
kpeter@790
|
284 |
# encode character entities
|
kpeter@790
|
285 |
line = string.replace(line, '&', '&')
|
kpeter@790
|
286 |
line = string.replace(line, '<', '<')
|
kpeter@790
|
287 |
line = string.replace(line, '>', '>')
|
kpeter@790
|
288 |
|
kpeter@790
|
289 |
# start entry: publication type (store for later use)
|
kpeter@790
|
290 |
if pubtype_rex.match(line):
|
kpeter@790
|
291 |
# want @<alphanumeric chars><spaces>{<spaces><any chars>,
|
kpeter@790
|
292 |
entrycont = {}
|
kpeter@790
|
293 |
entry = []
|
kpeter@790
|
294 |
entrytype = pubtype_rex.sub('\g<1>',line)
|
kpeter@790
|
295 |
entrytype = string.lower(entrytype)
|
kpeter@792
|
296 |
entryid = pubtype_rex.sub('\g<2>', line)
|
kpeter@790
|
297 |
|
kpeter@790
|
298 |
# end entry if just a }
|
kpeter@790
|
299 |
elif endtype_rex.match(line):
|
kpeter@790
|
300 |
# generate doxygen code for the entry
|
kpeter@790
|
301 |
|
kpeter@790
|
302 |
# enty type related formattings
|
kpeter@790
|
303 |
if entrytype in ('book', 'inbook'):
|
kpeter@790
|
304 |
entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
|
kpeter@790
|
305 |
if not entrycont.has_key('author'):
|
kpeter@790
|
306 |
entrycont['author'] = entrycont['editor']
|
kpeter@790
|
307 |
entrycont['author']['text'] += ', editors'
|
kpeter@790
|
308 |
elif entrytype == 'article':
|
kpeter@790
|
309 |
entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
|
kpeter@790
|
310 |
elif entrytype in ('inproceedings', 'incollection', 'conference'):
|
kpeter@790
|
311 |
entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
|
kpeter@790
|
312 |
elif entrytype == 'techreport':
|
kpeter@790
|
313 |
if not entrycont.has_key('type'):
|
kpeter@790
|
314 |
entrycont['type'] = 'Technical report'
|
kpeter@790
|
315 |
elif entrytype == 'mastersthesis':
|
kpeter@790
|
316 |
entrycont['type'] = 'Master\'s thesis'
|
kpeter@790
|
317 |
elif entrytype == 'phdthesis':
|
kpeter@790
|
318 |
entrycont['type'] = 'PhD thesis'
|
kpeter@790
|
319 |
|
kpeter@790
|
320 |
for eline in entrycont:
|
kpeter@790
|
321 |
if eline != '':
|
kpeter@790
|
322 |
eline = latexreplacements(eline)
|
kpeter@790
|
323 |
|
kpeter@790
|
324 |
if entrycont.has_key('pages') and (entrycont['pages'] != ''):
|
kpeter@790
|
325 |
entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
|
kpeter@790
|
326 |
|
kpeter@790
|
327 |
if entrycont.has_key('author') and (entrycont['author'] != ''):
|
kpeter@790
|
328 |
entry.append(entrycont['author']['text'] + '.')
|
kpeter@790
|
329 |
if entrycont.has_key('title') and (entrycont['title'] != ''):
|
kpeter@790
|
330 |
entry.append(entrycont['title'] + '.')
|
kpeter@790
|
331 |
if entrycont.has_key('journal') and (entrycont['journal'] != ''):
|
kpeter@790
|
332 |
entry.append(entrycont['journal'] + ',')
|
kpeter@790
|
333 |
if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
|
kpeter@790
|
334 |
entry.append('In ' + entrycont['booktitle'] + ',')
|
kpeter@790
|
335 |
if entrycont.has_key('type') and (entrycont['type'] != ''):
|
kpeter@790
|
336 |
eline = entrycont['type']
|
kpeter@790
|
337 |
if entrycont.has_key('number') and (entrycont['number'] != ''):
|
kpeter@790
|
338 |
eline += ' ' + entrycont['number']
|
kpeter@790
|
339 |
eline += ','
|
kpeter@790
|
340 |
entry.append(eline)
|
kpeter@790
|
341 |
if entrycont.has_key('institution') and (entrycont['institution'] != ''):
|
kpeter@790
|
342 |
entry.append(entrycont['institution'] + ',')
|
kpeter@790
|
343 |
if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
|
kpeter@790
|
344 |
entry.append(entrycont['publisher'] + ',')
|
kpeter@790
|
345 |
if entrycont.has_key('school') and (entrycont['school'] != ''):
|
kpeter@790
|
346 |
entry.append(entrycont['school'] + ',')
|
kpeter@790
|
347 |
if entrycont.has_key('address') and (entrycont['address'] != ''):
|
kpeter@790
|
348 |
entry.append(entrycont['address'] + ',')
|
kpeter@790
|
349 |
if entrycont.has_key('edition') and (entrycont['edition'] != ''):
|
kpeter@790
|
350 |
entry.append(entrycont['edition'] + ' edition,')
|
kpeter@790
|
351 |
if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
|
kpeter@790
|
352 |
entry.append(entrycont['howpublished'] + ',')
|
kpeter@790
|
353 |
if entrycont.has_key('volume') and (entrycont['volume'] != ''):
|
kpeter@790
|
354 |
eline = entrycont['volume'];
|
kpeter@790
|
355 |
if entrycont.has_key('number') and (entrycont['number'] != ''):
|
kpeter@790
|
356 |
eline += '(' + entrycont['number'] + ')'
|
kpeter@790
|
357 |
if entrycont.has_key('pages') and (entrycont['pages'] != ''):
|
kpeter@790
|
358 |
eline += ':' + entrycont['pages']
|
kpeter@790
|
359 |
eline += ','
|
kpeter@790
|
360 |
entry.append(eline)
|
kpeter@790
|
361 |
else:
|
kpeter@790
|
362 |
if entrycont.has_key('pages') and (entrycont['pages'] != ''):
|
kpeter@790
|
363 |
entry.append('pages ' + entrycont['pages'] + ',')
|
kpeter@790
|
364 |
if entrycont.has_key('year') and (entrycont['year'] != ''):
|
kpeter@790
|
365 |
if entrycont.has_key('month') and (entrycont['month'] != ''):
|
kpeter@790
|
366 |
entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
|
kpeter@790
|
367 |
else:
|
kpeter@790
|
368 |
entry.append(entrycont['year'] + '.')
|
kpeter@790
|
369 |
if entrycont.has_key('note') and (entrycont['note'] != ''):
|
kpeter@790
|
370 |
entry.append(entrycont['note'] + '.')
|
kpeter@801
|
371 |
if entrycont.has_key('url') and (entrycont['url'] != ''):
|
kpeter@801
|
372 |
entry.append(entrycont['url'] + '.')
|
kpeter@790
|
373 |
|
kpeter@790
|
374 |
# generate keys for sorting and for the output
|
kpeter@790
|
375 |
sortkey = ''
|
kpeter@790
|
376 |
bibkey = ''
|
kpeter@790
|
377 |
if entrycont.has_key('author'):
|
kpeter@790
|
378 |
for author in entrycont['author']['list']:
|
kpeter@790
|
379 |
sortkey += copychars(author, author.rfind(' ')+1, len(author))
|
kpeter@790
|
380 |
bibkey = entrycont['author']['abbrev']
|
kpeter@790
|
381 |
else:
|
kpeter@790
|
382 |
bibkey = 'x'
|
kpeter@790
|
383 |
if entrycont.has_key('year'):
|
kpeter@790
|
384 |
sortkey += entrycont['year']
|
kpeter@790
|
385 |
bibkey += entrycont['year'][-2:]
|
kpeter@790
|
386 |
if entrycont.has_key('title'):
|
kpeter@790
|
387 |
sortkey += entrycont['title']
|
kpeter@790
|
388 |
if entrycont.has_key('key'):
|
kpeter@790
|
389 |
sortkey = entrycont['key'] + sortkey
|
kpeter@790
|
390 |
bibkey = entrycont['key']
|
kpeter@790
|
391 |
entry.insert(0, sortkey)
|
kpeter@790
|
392 |
entry.insert(1, bibkey)
|
kpeter@792
|
393 |
entry.insert(2, entryid)
|
kpeter@790
|
394 |
|
kpeter@790
|
395 |
# add the entry to the file contents
|
kpeter@790
|
396 |
filecont.append(entry)
|
kpeter@790
|
397 |
|
kpeter@790
|
398 |
else:
|
kpeter@790
|
399 |
# field, publication info
|
kpeter@790
|
400 |
field = ''
|
kpeter@790
|
401 |
data = ''
|
kpeter@790
|
402 |
|
kpeter@790
|
403 |
# field = {data} entries
|
kpeter@790
|
404 |
if bracedata_rex.match(line):
|
kpeter@790
|
405 |
field = bracefield_rex.sub('\g<1>', line)
|
kpeter@790
|
406 |
field = string.lower(field)
|
kpeter@790
|
407 |
data = bracedata_rex.sub('\g<2>', line)
|
kpeter@790
|
408 |
|
kpeter@790
|
409 |
# field = "data" entries
|
kpeter@790
|
410 |
elif quotedata_rex.match(line):
|
kpeter@790
|
411 |
field = quotefield_rex.sub('\g<1>', line)
|
kpeter@790
|
412 |
field = string.lower(field)
|
kpeter@790
|
413 |
data = quotedata_rex.sub('\g<2>', line)
|
kpeter@790
|
414 |
|
kpeter@790
|
415 |
# field = data entries
|
kpeter@790
|
416 |
elif data_rex.match(line):
|
kpeter@790
|
417 |
field = field_rex.sub('\g<1>', line)
|
kpeter@790
|
418 |
field = string.lower(field)
|
kpeter@790
|
419 |
data = data_rex.sub('\g<2>', line)
|
kpeter@801
|
420 |
|
kpeter@801
|
421 |
if field == 'url':
|
kpeter@801
|
422 |
data = '\\url{' + data.strip() + '}'
|
kpeter@790
|
423 |
|
kpeter@790
|
424 |
if field in ('author', 'editor'):
|
kpeter@790
|
425 |
entrycont[field] = bibtexauthor(data)
|
kpeter@790
|
426 |
line = ''
|
kpeter@790
|
427 |
elif field == 'title':
|
kpeter@790
|
428 |
line = bibtextitle(data, entrytype)
|
kpeter@790
|
429 |
elif field != '':
|
kpeter@790
|
430 |
line = removebraces(transformurls(data.strip()))
|
kpeter@790
|
431 |
|
kpeter@790
|
432 |
if line != '':
|
kpeter@790
|
433 |
line = latexreplacements(line)
|
kpeter@790
|
434 |
entrycont[field] = line
|
kpeter@790
|
435 |
|
kpeter@790
|
436 |
|
kpeter@790
|
437 |
# sort entries
|
kpeter@790
|
438 |
filecont.sort(entry_cmp)
|
kpeter@790
|
439 |
|
kpeter@790
|
440 |
# count the bibtex keys
|
kpeter@790
|
441 |
keytable = {}
|
kpeter@790
|
442 |
counttable = {}
|
kpeter@790
|
443 |
for entry in filecont:
|
kpeter@790
|
444 |
bibkey = entry[1]
|
kpeter@790
|
445 |
if not keytable.has_key(bibkey):
|
kpeter@790
|
446 |
keytable[bibkey] = 1
|
kpeter@790
|
447 |
else:
|
kpeter@790
|
448 |
keytable[bibkey] += 1
|
kpeter@790
|
449 |
|
kpeter@790
|
450 |
for bibkey in keytable.keys():
|
kpeter@790
|
451 |
counttable[bibkey] = 0
|
kpeter@790
|
452 |
|
kpeter@790
|
453 |
# generate output
|
kpeter@790
|
454 |
for entry in filecont:
|
kpeter@790
|
455 |
# generate output key form the bibtex key
|
kpeter@790
|
456 |
bibkey = entry[1]
|
kpeter@792
|
457 |
entryid = entry[2]
|
kpeter@790
|
458 |
if keytable[bibkey] == 1:
|
kpeter@790
|
459 |
outkey = bibkey
|
kpeter@790
|
460 |
else:
|
kpeter@790
|
461 |
outkey = bibkey + chr(97 + counttable[bibkey])
|
kpeter@790
|
462 |
counttable[bibkey] += 1
|
kpeter@790
|
463 |
|
kpeter@790
|
464 |
# append the entry code to the output
|
kpeter@792
|
465 |
file.append('\\section ' + entryid + ' [' + outkey + ']')
|
kpeter@792
|
466 |
file.append('<div style="' + divstyle + '">')
|
kpeter@792
|
467 |
for line in entry[3:]:
|
kpeter@790
|
468 |
file.append(line)
|
kpeter@792
|
469 |
file.append('</div>')
|
kpeter@790
|
470 |
file.append('')
|
kpeter@790
|
471 |
|
kpeter@790
|
472 |
return file
|
kpeter@790
|
473 |
|
kpeter@790
|
474 |
|
kpeter@790
|
475 |
#
|
kpeter@790
|
476 |
# return 1 iff abbr is in line but not inside braces or quotes
|
kpeter@790
|
477 |
# assumes that abbr appears only once on the line (out of braces and quotes)
|
kpeter@790
|
478 |
#
|
kpeter@790
|
479 |
def verify_out_of_braces(line, abbr):
|
kpeter@790
|
480 |
|
kpeter@790
|
481 |
phrase_split = delimiter_rex.split(line)
|
kpeter@790
|
482 |
|
kpeter@790
|
483 |
abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
|
kpeter@790
|
484 |
|
kpeter@790
|
485 |
open_brace = 0
|
kpeter@790
|
486 |
open_quote = 0
|
kpeter@790
|
487 |
|
kpeter@790
|
488 |
for phrase in phrase_split:
|
kpeter@790
|
489 |
if phrase == "{":
|
kpeter@790
|
490 |
open_brace = open_brace + 1
|
kpeter@790
|
491 |
elif phrase == "}":
|
kpeter@790
|
492 |
open_brace = open_brace - 1
|
kpeter@790
|
493 |
elif phrase == '"':
|
kpeter@790
|
494 |
if open_quote == 1:
|
kpeter@790
|
495 |
open_quote = 0
|
kpeter@790
|
496 |
else:
|
kpeter@790
|
497 |
open_quote = 1
|
kpeter@790
|
498 |
elif abbr_rex.search(phrase):
|
kpeter@790
|
499 |
if open_brace == 0 and open_quote == 0:
|
kpeter@790
|
500 |
return 1
|
kpeter@790
|
501 |
|
kpeter@790
|
502 |
return 0
|
kpeter@790
|
503 |
|
kpeter@790
|
504 |
|
kpeter@790
|
505 |
#
|
kpeter@790
|
506 |
# a line in the form phrase1 # phrase2 # ... # phrasen
|
kpeter@790
|
507 |
# is returned as phrase1 phrase2 ... phrasen
|
kpeter@790
|
508 |
# with the correct punctuation
|
kpeter@790
|
509 |
# Bug: Doesn't always work with multiple abbreviations plugged in
|
kpeter@790
|
510 |
#
|
kpeter@790
|
511 |
def concat_line(line):
|
kpeter@790
|
512 |
# only look at part after equals
|
kpeter@790
|
513 |
field = field_rex.sub('\g<1>',line)
|
kpeter@790
|
514 |
rest = field_rex.sub('\g<2>',line)
|
kpeter@790
|
515 |
|
kpeter@790
|
516 |
concat_line = field + ' ='
|
kpeter@790
|
517 |
|
kpeter@790
|
518 |
pound_split = concatsplit_rex.split(rest)
|
kpeter@790
|
519 |
|
kpeter@790
|
520 |
phrase_count = 0
|
kpeter@790
|
521 |
length = len(pound_split)
|
kpeter@790
|
522 |
|
kpeter@790
|
523 |
for phrase in pound_split:
|
kpeter@790
|
524 |
phrase = phrase.strip()
|
kpeter@790
|
525 |
if phrase_count != 0:
|
kpeter@790
|
526 |
if phrase.startswith('"') or phrase.startswith('{'):
|
kpeter@790
|
527 |
phrase = phrase[1:]
|
kpeter@790
|
528 |
elif phrase.startswith('"'):
|
kpeter@790
|
529 |
phrase = phrase.replace('"','{',1)
|
kpeter@790
|
530 |
|
kpeter@790
|
531 |
if phrase_count != length-1:
|
kpeter@790
|
532 |
if phrase.endswith('"') or phrase.endswith('}'):
|
kpeter@790
|
533 |
phrase = phrase[:-1]
|
kpeter@790
|
534 |
else:
|
kpeter@790
|
535 |
if phrase.endswith('"'):
|
kpeter@790
|
536 |
phrase = phrase[:-1]
|
kpeter@790
|
537 |
phrase = phrase + "}"
|
kpeter@790
|
538 |
elif phrase.endswith('",'):
|
kpeter@790
|
539 |
phrase = phrase[:-2]
|
kpeter@790
|
540 |
phrase = phrase + "},"
|
kpeter@790
|
541 |
|
kpeter@790
|
542 |
# if phrase did have \#, add the \# back
|
kpeter@790
|
543 |
if phrase.endswith('\\'):
|
kpeter@790
|
544 |
phrase = phrase + "#"
|
kpeter@790
|
545 |
concat_line = concat_line + ' ' + phrase
|
kpeter@790
|
546 |
|
kpeter@790
|
547 |
phrase_count = phrase_count + 1
|
kpeter@790
|
548 |
|
kpeter@790
|
549 |
return concat_line
|
kpeter@790
|
550 |
|
kpeter@790
|
551 |
|
kpeter@790
|
552 |
#
|
kpeter@790
|
553 |
# substitute abbreviations into filecont
|
kpeter@790
|
554 |
# @param filecont_source - string of data from file
|
kpeter@790
|
555 |
#
|
kpeter@790
|
556 |
def bibtex_replace_abbreviations(filecont_source):
|
kpeter@790
|
557 |
filecont = filecont_source.splitlines()
|
kpeter@790
|
558 |
|
kpeter@790
|
559 |
# These are defined in bibtex, so we'll define them too
|
kpeter@790
|
560 |
abbr_list = ['jan','feb','mar','apr','may','jun',
|
kpeter@790
|
561 |
'jul','aug','sep','oct','nov','dec']
|
kpeter@790
|
562 |
value_list = ['January','February','March','April',
|
kpeter@790
|
563 |
'May','June','July','August','September',
|
kpeter@790
|
564 |
'October','November','December']
|
kpeter@790
|
565 |
|
kpeter@790
|
566 |
abbr_rex = []
|
kpeter@790
|
567 |
total_abbr_count = 0
|
kpeter@790
|
568 |
|
kpeter@790
|
569 |
front = '\\b'
|
kpeter@790
|
570 |
back = '(,?)\\b'
|
kpeter@790
|
571 |
|
kpeter@790
|
572 |
for x in abbr_list:
|
kpeter@790
|
573 |
abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
|
kpeter@790
|
574 |
total_abbr_count = total_abbr_count + 1
|
kpeter@790
|
575 |
|
kpeter@790
|
576 |
|
kpeter@790
|
577 |
abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
|
kpeter@790
|
578 |
re.I)
|
kpeter@790
|
579 |
|
kpeter@790
|
580 |
comment_rex = re.compile('@comment\s*{',re.I)
|
kpeter@790
|
581 |
preamble_rex = re.compile('@preamble\s*{',re.I)
|
kpeter@790
|
582 |
|
kpeter@790
|
583 |
waiting_for_end_string = 0
|
kpeter@790
|
584 |
i = 0
|
kpeter@790
|
585 |
filecont2 = ''
|
kpeter@790
|
586 |
|
kpeter@790
|
587 |
for line in filecont:
|
kpeter@790
|
588 |
if line == ' ' or line == '':
|
kpeter@790
|
589 |
continue
|
kpeter@790
|
590 |
|
kpeter@790
|
591 |
if waiting_for_end_string:
|
kpeter@790
|
592 |
if re.search('}',line):
|
kpeter@790
|
593 |
waiting_for_end_string = 0
|
kpeter@790
|
594 |
continue
|
kpeter@790
|
595 |
|
kpeter@790
|
596 |
if abbrdef_rex.search(line):
|
kpeter@790
|
597 |
abbr = abbrdef_rex.sub('\g<1>', line)
|
kpeter@790
|
598 |
|
kpeter@790
|
599 |
if abbr_list.count(abbr) == 0:
|
kpeter@790
|
600 |
val = abbrdef_rex.sub('\g<2>', line)
|
kpeter@790
|
601 |
abbr_list.append(abbr)
|
kpeter@790
|
602 |
value_list.append(string.strip(val))
|
kpeter@790
|
603 |
abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
|
kpeter@790
|
604 |
total_abbr_count = total_abbr_count + 1
|
kpeter@790
|
605 |
waiting_for_end_string = 1
|
kpeter@790
|
606 |
continue
|
kpeter@790
|
607 |
|
kpeter@790
|
608 |
if comment_rex.search(line):
|
kpeter@790
|
609 |
waiting_for_end_string = 1
|
kpeter@790
|
610 |
continue
|
kpeter@790
|
611 |
|
kpeter@790
|
612 |
if preamble_rex.search(line):
|
kpeter@790
|
613 |
waiting_for_end_string = 1
|
kpeter@790
|
614 |
continue
|
kpeter@790
|
615 |
|
kpeter@790
|
616 |
|
kpeter@790
|
617 |
# replace subsequent abbreviations with the value
|
kpeter@790
|
618 |
abbr_count = 0
|
kpeter@790
|
619 |
|
kpeter@790
|
620 |
for x in abbr_list:
|
kpeter@790
|
621 |
|
kpeter@790
|
622 |
if abbr_rex[abbr_count].search(line):
|
kpeter@790
|
623 |
if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
|
kpeter@790
|
624 |
line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
|
kpeter@790
|
625 |
# Check for # concatenations
|
kpeter@790
|
626 |
if concatsplit_rex.search(line):
|
kpeter@790
|
627 |
line = concat_line(line)
|
kpeter@790
|
628 |
abbr_count = abbr_count + 1
|
kpeter@790
|
629 |
|
kpeter@790
|
630 |
|
kpeter@790
|
631 |
filecont2 = filecont2 + line + '\n'
|
kpeter@790
|
632 |
i = i+1
|
kpeter@790
|
633 |
|
kpeter@790
|
634 |
|
kpeter@790
|
635 |
# Do one final pass over file
|
kpeter@790
|
636 |
|
kpeter@790
|
637 |
# make sure that didn't end up with {" or }" after the substitution
|
kpeter@790
|
638 |
filecont2 = filecont2.replace('{"','{{')
|
kpeter@790
|
639 |
filecont2 = filecont2.replace('"}','}}')
|
kpeter@790
|
640 |
|
kpeter@790
|
641 |
afterquotevalue_rex = re.compile('"\s*,\s*')
|
kpeter@790
|
642 |
afterbrace_rex = re.compile('"\s*}')
|
kpeter@790
|
643 |
afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
|
kpeter@790
|
644 |
|
kpeter@790
|
645 |
# add new lines to data that changed because of abbreviation substitutions
|
kpeter@790
|
646 |
filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
|
kpeter@790
|
647 |
filecont2 = afterbrace_rex.sub('"\n}', filecont2)
|
kpeter@790
|
648 |
filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
|
kpeter@790
|
649 |
|
kpeter@790
|
650 |
return filecont2
|
kpeter@790
|
651 |
|
kpeter@790
|
652 |
#
|
kpeter@790
|
653 |
# convert @type( ... ) to @type{ ... }
|
kpeter@790
|
654 |
#
|
kpeter@790
|
655 |
def no_outer_parens(filecont):
|
kpeter@790
|
656 |
|
kpeter@790
|
657 |
# do checking for open parens
|
kpeter@790
|
658 |
# will convert to braces
|
kpeter@790
|
659 |
paren_split = re.split('([(){}])',filecont)
|
kpeter@790
|
660 |
|
kpeter@790
|
661 |
open_paren_count = 0
|
kpeter@790
|
662 |
open_type = 0
|
kpeter@790
|
663 |
look_next = 0
|
kpeter@790
|
664 |
|
kpeter@790
|
665 |
# rebuild filecont
|
kpeter@790
|
666 |
filecont = ''
|
kpeter@790
|
667 |
|
kpeter@790
|
668 |
at_rex = re.compile('@\w*')
|
kpeter@790
|
669 |
|
kpeter@790
|
670 |
for phrase in paren_split:
|
kpeter@790
|
671 |
if look_next == 1:
|
kpeter@790
|
672 |
if phrase == '(':
|
kpeter@790
|
673 |
phrase = '{'
|
kpeter@790
|
674 |
open_paren_count = open_paren_count + 1
|
kpeter@790
|
675 |
else:
|
kpeter@790
|
676 |
open_type = 0
|
kpeter@790
|
677 |
look_next = 0
|
kpeter@790
|
678 |
|
kpeter@790
|
679 |
if phrase == '(':
|
kpeter@790
|
680 |
open_paren_count = open_paren_count + 1
|
kpeter@790
|
681 |
|
kpeter@790
|
682 |
elif phrase == ')':
|
kpeter@790
|
683 |
open_paren_count = open_paren_count - 1
|
kpeter@790
|
684 |
if open_type == 1 and open_paren_count == 0:
|
kpeter@790
|
685 |
phrase = '}'
|
kpeter@790
|
686 |
open_type = 0
|
kpeter@790
|
687 |
|
kpeter@790
|
688 |
elif at_rex.search( phrase ):
|
kpeter@790
|
689 |
open_type = 1
|
kpeter@790
|
690 |
look_next = 1
|
kpeter@790
|
691 |
|
kpeter@790
|
692 |
filecont = filecont + phrase
|
kpeter@790
|
693 |
|
kpeter@790
|
694 |
return filecont
|
kpeter@790
|
695 |
|
kpeter@790
|
696 |
|
kpeter@790
|
697 |
#
|
kpeter@790
|
698 |
# make all whitespace into just one space
|
kpeter@790
|
699 |
# format the bibtex file into a usable form.
|
kpeter@790
|
700 |
#
|
kpeter@790
|
701 |
def bibtexwasher(filecont_source):
|
kpeter@790
|
702 |
|
kpeter@790
|
703 |
space_rex = re.compile('\s+')
|
kpeter@790
|
704 |
comment_rex = re.compile('\s*%')
|
kpeter@790
|
705 |
|
kpeter@790
|
706 |
filecont = []
|
kpeter@790
|
707 |
|
kpeter@790
|
708 |
# remove trailing and excessive whitespace
|
kpeter@790
|
709 |
# ignore comments
|
kpeter@790
|
710 |
for line in filecont_source:
|
kpeter@790
|
711 |
line = string.strip(line)
|
kpeter@790
|
712 |
line = space_rex.sub(' ', line)
|
kpeter@790
|
713 |
# ignore comments
|
kpeter@790
|
714 |
if not comment_rex.match(line) and line != '':
|
kpeter@790
|
715 |
filecont.append(' '+ line)
|
kpeter@790
|
716 |
|
kpeter@790
|
717 |
filecont = string.join(filecont, '')
|
kpeter@790
|
718 |
|
kpeter@790
|
719 |
# the file is in one long string
|
kpeter@790
|
720 |
|
kpeter@790
|
721 |
filecont = no_outer_parens(filecont)
|
kpeter@790
|
722 |
|
kpeter@790
|
723 |
#
|
kpeter@790
|
724 |
# split lines according to preferred syntax scheme
|
kpeter@790
|
725 |
#
|
kpeter@790
|
726 |
filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
|
kpeter@790
|
727 |
|
kpeter@790
|
728 |
# add new lines after commas that are after values
|
kpeter@790
|
729 |
filecont = re.sub('"\s*,', '",\n', filecont)
|
kpeter@790
|
730 |
filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
|
kpeter@790
|
731 |
filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
|
kpeter@790
|
732 |
'\n\n\g<1>\g<2>,\n', filecont)
|
kpeter@790
|
733 |
|
kpeter@790
|
734 |
# add new lines after }
|
kpeter@790
|
735 |
filecont = re.sub('"\s*}','"\n}\n', filecont)
|
kpeter@790
|
736 |
filecont = re.sub('}\s*,','},\n', filecont)
|
kpeter@790
|
737 |
|
kpeter@790
|
738 |
|
kpeter@790
|
739 |
filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
|
kpeter@790
|
740 |
|
kpeter@790
|
741 |
# character encoding, reserved latex characters
|
kpeter@790
|
742 |
filecont = re.sub('{\\\&}', '&', filecont)
|
kpeter@790
|
743 |
filecont = re.sub('\\\&', '&', filecont)
|
kpeter@790
|
744 |
|
kpeter@790
|
745 |
# do checking for open braces to get format correct
|
kpeter@790
|
746 |
open_brace_count = 0
|
kpeter@790
|
747 |
brace_split = re.split('([{}])',filecont)
|
kpeter@790
|
748 |
|
kpeter@790
|
749 |
# rebuild filecont
|
kpeter@790
|
750 |
filecont = ''
|
kpeter@790
|
751 |
|
kpeter@790
|
752 |
for phrase in brace_split:
|
kpeter@790
|
753 |
if phrase == '{':
|
kpeter@790
|
754 |
open_brace_count = open_brace_count + 1
|
kpeter@790
|
755 |
elif phrase == '}':
|
kpeter@790
|
756 |
open_brace_count = open_brace_count - 1
|
kpeter@790
|
757 |
if open_brace_count == 0:
|
kpeter@790
|
758 |
filecont = filecont + '\n'
|
kpeter@790
|
759 |
|
kpeter@790
|
760 |
filecont = filecont + phrase
|
kpeter@790
|
761 |
|
kpeter@790
|
762 |
filecont2 = bibtex_replace_abbreviations(filecont)
|
kpeter@790
|
763 |
|
kpeter@790
|
764 |
# gather
|
kpeter@790
|
765 |
filecont = filecont2.splitlines()
|
kpeter@790
|
766 |
i=0
|
kpeter@790
|
767 |
j=0 # count the number of blank lines
|
kpeter@790
|
768 |
for line in filecont:
|
kpeter@790
|
769 |
# ignore blank lines
|
kpeter@790
|
770 |
if line == '' or line == ' ':
|
kpeter@790
|
771 |
j = j+1
|
kpeter@790
|
772 |
continue
|
kpeter@790
|
773 |
filecont[i] = line + '\n'
|
kpeter@790
|
774 |
i = i+1
|
kpeter@790
|
775 |
|
kpeter@790
|
776 |
# get rid of the extra stuff at the end of the array
|
kpeter@790
|
777 |
# (The extra stuff are duplicates that are in the array because
|
kpeter@790
|
778 |
# blank lines were removed.)
|
kpeter@790
|
779 |
length = len( filecont)
|
kpeter@790
|
780 |
filecont[length-j:length] = []
|
kpeter@790
|
781 |
|
kpeter@790
|
782 |
return filecont
|
kpeter@790
|
783 |
|
kpeter@790
|
784 |
|
kpeter@790
|
785 |
def filehandler(filepath):
|
kpeter@790
|
786 |
try:
|
kpeter@790
|
787 |
fd = open(filepath, 'r')
|
kpeter@790
|
788 |
filecont_source = fd.readlines()
|
kpeter@790
|
789 |
fd.close()
|
kpeter@790
|
790 |
except:
|
kpeter@790
|
791 |
print 'Could not open file:', filepath
|
kpeter@790
|
792 |
washeddata = bibtexwasher(filecont_source)
|
kpeter@790
|
793 |
outdata = bibtexdecoder(washeddata)
|
kpeter@790
|
794 |
print '/**'
|
kpeter@790
|
795 |
print '\page references References'
|
kpeter@790
|
796 |
print
|
kpeter@790
|
797 |
for line in outdata:
|
kpeter@790
|
798 |
print line
|
kpeter@790
|
799 |
print '*/'
|
kpeter@790
|
800 |
|
kpeter@790
|
801 |
|
kpeter@790
|
802 |
# main program
|
kpeter@790
|
803 |
|
kpeter@790
|
804 |
def main():
|
kpeter@790
|
805 |
import sys
|
kpeter@790
|
806 |
if sys.argv[1:]:
|
kpeter@790
|
807 |
filepath = sys.argv[1]
|
kpeter@790
|
808 |
else:
|
kpeter@790
|
809 |
print "No input file"
|
kpeter@790
|
810 |
sys.exit()
|
kpeter@790
|
811 |
filehandler(filepath)
|
kpeter@790
|
812 |
|
kpeter@790
|
813 |
if __name__ == "__main__": main()
|
kpeter@790
|
814 |
|
kpeter@790
|
815 |
|
kpeter@790
|
816 |
# end python script
|