1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/scripts/bib2dox.py Sat Sep 26 10:15:49 2009 +0200
1.3 @@ -0,0 +1,806 @@
1.4 +#!/usr/bin/env /usr/local/Python/bin/python2.1
1.5 +"""
1.6 + BibTeX to Doxygen converter
1.7 + Usage: python bib2dox.py bibfile.bib > bibfile.dox
1.8 +
1.9 + This code is the modification of the BibTeX to XML converter
1.10 + by Vidar Bronken Gundersen et al. See the original copyright notices below.
1.11 +
1.12 + **********************************************************************
1.13 +
1.14 + Decoder for bibliographic data, BibTeX
1.15 + Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
1.16 +
1.17 + v.8
1.18 + (c)2002-06-23 Vidar Bronken Gundersen
1.19 + http://bibtexml.sf.net/
1.20 + Reuse approved as long as this notification is kept.
1.21 + Licence: GPL.
1.22 +
1.23 + Contributions/thanks to:
1.24 + Egon Willighagen, http://sf.net/projects/jreferences/
1.25 + Richard Mahoney (for providing a test case)
1.26 +
1.27 + Editted by Sara Sprenkle to be more robust and handle more bibtex features.
1.28 + (c) 2003-01-15
1.29 +
1.30 + 1. Changed bibtex: tags to bibxml: tags.
1.31 + 2. Use xmlns:bibxml="http://bibtexml.sf.net/"
1.32 + 3. Allow spaces between @type and first {
1.33 + 4. "author" fields with multiple authors split by " and "
1.34 + are put in separate xml "bibxml:author" tags.
1.35 + 5. Option for Titles: words are capitalized
1.36 + only if first letter in title or capitalized inside braces
1.37 + 6. Removes braces from within field values
1.38 + 7. Ignores comments in bibtex file (including @comment{ or % )
1.39 + 8. Replaces some special latex tags, e.g., replaces ~ with ' '
1.40 + 9. Handles bibtex @string abbreviations
1.41 + --> includes bibtex's default abbreviations for months
1.42 + --> does concatenation of abbr # " more " and " more " # abbr
1.43 + 10. Handles @type( ... ) or @type{ ... }
1.44 + 11. The keywords field is split on , or ; and put into separate xml
1.45 + "bibxml:keywords" tags
1.46 + 12. Ignores @preamble
1.47 +
1.48 + Known Limitations
1.49 + 1. Does not transform Latex encoding like math mode and special
1.50 + latex symbols.
1.51 + 2. Does not parse author fields into first and last names.
1.52 + E.g., It does not do anything special to an author whose name is
1.53 + in the form LAST_NAME, FIRST_NAME
1.54 + In "author" tag, will show up as
1.55 + <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
1.56 + 3. Does not handle "crossref" fields other than to print
1.57 + <bibxml:crossref>...</bibxml:crossref>
1.58 + 4. Does not inform user of the input's format errors. You just won't
1.59 + be able to transform the file later with XSL
1.60 +
1.61 + You will have to manually edit the XML output if you need to handle
1.62 + these (and unknown) limitations.
1.63 +
1.64 +"""
1.65 +
1.66 +import string, re
1.67 +
1.68 +# set of valid name characters
1.69 +valid_name_chars = '[\w\-:]'
1.70 +
1.71 +#
1.72 +# define global regular expression variables
1.73 +#
1.74 +author_rex = re.compile('\s+and\s+')
1.75 +rembraces_rex = re.compile('[{}]')
1.76 +capitalize_rex = re.compile('({\w*})')
1.77 +
1.78 +# used by bibtexkeywords(data)
1.79 +keywords_rex = re.compile('[,;]')
1.80 +
1.81 +# used by concat_line(line)
1.82 +concatsplit_rex = re.compile('\s*#\s*')
1.83 +
1.84 +# split on {, }, or " in verify_out_of_braces
1.85 +delimiter_rex = re.compile('([{}"])',re.I)
1.86 +
1.87 +field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
1.88 +data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
1.89 +
1.90 +url_rex = re.compile('\\\url\{([^}]*)\}')
1.91 +
1.92 +
1.93 +#
1.94 +# return the string parameter without braces
1.95 +#
1.96 +def transformurls(str):
1.97 + return url_rex.sub(r'<a href="\1">\1</a>', str)
1.98 +
1.99 +#
1.100 +# return the string parameter without braces
1.101 +#
1.102 +def removebraces(str):
1.103 + return rembraces_rex.sub('', str)
1.104 +
1.105 +#
1.106 +# latex-specific replacements
1.107 +# (do this after braces were removed)
1.108 +#
1.109 +def latexreplacements(line):
1.110 + line = string.replace(line, '~', ' ')
1.111 + line = string.replace(line, '\\\'a', 'á')
1.112 + line = string.replace(line, '\\"a', 'ä')
1.113 + line = string.replace(line, '\\\'e', 'é')
1.114 + line = string.replace(line, '\\"e', 'ë')
1.115 + line = string.replace(line, '\\\'i', 'í')
1.116 + line = string.replace(line, '\\"i', 'ï')
1.117 + line = string.replace(line, '\\\'o', 'ó')
1.118 + line = string.replace(line, '\\"o', 'ö')
1.119 + line = string.replace(line, '\\\'u', 'ú')
1.120 + line = string.replace(line, '\\"u', 'ü')
1.121 + line = string.replace(line, '\\H o', 'õ')
1.122 + line = string.replace(line, '\\H u', 'ü') # ũ does not exist
1.123 + line = string.replace(line, '\\\'A', 'Á')
1.124 + line = string.replace(line, '\\"A', 'Ä')
1.125 + line = string.replace(line, '\\\'E', 'É')
1.126 + line = string.replace(line, '\\"E', 'Ë')
1.127 + line = string.replace(line, '\\\'I', 'Í')
1.128 + line = string.replace(line, '\\"I', 'Ï')
1.129 + line = string.replace(line, '\\\'O', 'Ó')
1.130 + line = string.replace(line, '\\"O', 'Ö')
1.131 + line = string.replace(line, '\\\'U', 'Ú')
1.132 + line = string.replace(line, '\\"U', 'Ü')
1.133 + line = string.replace(line, '\\H O', 'Õ')
1.134 + line = string.replace(line, '\\H U', 'Ü') # Ũ does not exist
1.135 +
1.136 + return line
1.137 +
1.138 +#
1.139 +# copy characters form a string decoding html expressions (&xyz;)
1.140 +#
1.141 +def copychars(str, ifrom, count):
1.142 + result = ''
1.143 + i = ifrom
1.144 + c = 0
1.145 + html_spec = False
1.146 + while (i < len(str)) and (c < count):
1.147 + if str[i] == '&':
1.148 + html_spec = True;
1.149 + if i+1 < len(str):
1.150 + result += str[i+1]
1.151 + c += 1
1.152 + i += 2
1.153 + else:
1.154 + if not html_spec:
1.155 + if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
1.156 + ((str[i] >= 'a') and (str[i] <= 'z')):
1.157 + result += str[i]
1.158 + c += 1
1.159 + elif str[i] == ';':
1.160 + html_spec = False;
1.161 + i += 1
1.162 +
1.163 + return result
1.164 +
1.165 +
1.166 +#
1.167 +# Handle a list of authors (separated by 'and').
1.168 +# It gives back an array of the follwing values:
1.169 +# - num: the number of authors,
1.170 +# - list: the list of the author names,
1.171 +# - text: the bibtex text (separated by commas and/or 'and')
1.172 +# - abbrev: abbreviation that can be used for indicate the
1.173 +# bibliography entries
1.174 +#
1.175 +def bibtexauthor(data):
1.176 + result = {}
1.177 + bibtex = ''
1.178 + result['list'] = author_rex.split(data)
1.179 + result['num'] = len(result['list'])
1.180 + for i, author in enumerate(result['list']):
1.181 + # general transformations
1.182 + author = latexreplacements(removebraces(author.strip()))
1.183 + # transform "Xyz, A. B." to "A. B. Xyz"
1.184 + pos = author.find(',')
1.185 + if pos != -1:
1.186 + author = author[pos+1:].strip() + ' ' + author[:pos].strip()
1.187 + result['list'][i] = author
1.188 + bibtex += author + '#'
1.189 + bibtex = bibtex[:-1]
1.190 + if result['num'] > 1:
1.191 + ix = bibtex.rfind('#')
1.192 + if result['num'] == 2:
1.193 + bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
1.194 + else:
1.195 + bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
1.196 + bibtex = bibtex.replace('#', ', ')
1.197 + result['text'] = bibtex
1.198 +
1.199 + result['abbrev'] = ''
1.200 + for author in result['list']:
1.201 + pos = author.rfind(' ') + 1
1.202 + count = 1
1.203 + if result['num'] == 1:
1.204 + count = 3
1.205 + result['abbrev'] += copychars(author, pos, count)
1.206 +
1.207 + return result
1.208 +
1.209 +
1.210 +#
1.211 +# data = title string
1.212 +# @return the capitalized title (first letter is capitalized), rest are capitalized
1.213 +# only if capitalized inside braces
1.214 +#
1.215 +def capitalizetitle(data):
1.216 + title_list = capitalize_rex.split(data)
1.217 + title = ''
1.218 + count = 0
1.219 + for phrase in title_list:
1.220 + check = string.lstrip(phrase)
1.221 +
1.222 + # keep phrase's capitalization the same
1.223 + if check.find('{') == 0:
1.224 + title += removebraces(phrase)
1.225 + else:
1.226 + # first word --> capitalize first letter (after spaces)
1.227 + if count == 0:
1.228 + title += check.capitalize()
1.229 + else:
1.230 + title += phrase.lower()
1.231 + count = count + 1
1.232 +
1.233 + return title
1.234 +
1.235 +
1.236 +#
1.237 +# @return the bibtex for the title
1.238 +# @param data --> title string
1.239 +# braces are removed from title
1.240 +#
1.241 +def bibtextitle(data, entrytype):
1.242 + if entrytype in ('book', 'inbook'):
1.243 + title = removebraces(data.strip())
1.244 + else:
1.245 + title = removebraces(capitalizetitle(data.strip()))
1.246 + bibtex = title
1.247 + return bibtex
1.248 +
1.249 +
1.250 +#
1.251 +# function to compare entry lists
1.252 +#
1.253 +def entry_cmp(x, y):
1.254 + return cmp(x[0], y[0])
1.255 +
1.256 +
1.257 +#
1.258 +# print the XML for the transformed "filecont_source"
1.259 +#
1.260 +def bibtexdecoder(filecont_source):
1.261 + filecont = []
1.262 + file = []
1.263 +
1.264 + # want @<alphanumeric chars><spaces>{<spaces><any chars>,
1.265 + pubtype_rex = re.compile('@(\w*)\s*{\s*(.*),')
1.266 + endtype_rex = re.compile('}\s*$')
1.267 + endtag_rex = re.compile('^\s*}\s*$')
1.268 +
1.269 + bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
1.270 + bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')
1.271 +
1.272 + quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
1.273 + quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')
1.274 +
1.275 + for line in filecont_source:
1.276 + line = line[:-1]
1.277 +
1.278 + # encode character entities
1.279 + line = string.replace(line, '&', '&')
1.280 + line = string.replace(line, '<', '<')
1.281 + line = string.replace(line, '>', '>')
1.282 +
1.283 + # start entry: publication type (store for later use)
1.284 + if pubtype_rex.match(line):
1.285 + # want @<alphanumeric chars><spaces>{<spaces><any chars>,
1.286 + entrycont = {}
1.287 + entry = []
1.288 + entrytype = pubtype_rex.sub('\g<1>',line)
1.289 + entrytype = string.lower(entrytype)
1.290 + # entryid = pubtype_rex.sub('\g<2>', line)
1.291 +
1.292 + # end entry if just a }
1.293 + elif endtype_rex.match(line):
1.294 + # generate doxygen code for the entry
1.295 +
1.296 + # enty type related formattings
1.297 + if entrytype in ('book', 'inbook'):
1.298 + entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
1.299 + if not entrycont.has_key('author'):
1.300 + entrycont['author'] = entrycont['editor']
1.301 + entrycont['author']['text'] += ', editors'
1.302 + elif entrytype == 'article':
1.303 + entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
1.304 + elif entrytype in ('inproceedings', 'incollection', 'conference'):
1.305 + entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
1.306 + elif entrytype == 'techreport':
1.307 + if not entrycont.has_key('type'):
1.308 + entrycont['type'] = 'Technical report'
1.309 + elif entrytype == 'mastersthesis':
1.310 + entrycont['type'] = 'Master\'s thesis'
1.311 + elif entrytype == 'phdthesis':
1.312 + entrycont['type'] = 'PhD thesis'
1.313 +
1.314 + for eline in entrycont:
1.315 + if eline != '':
1.316 + eline = latexreplacements(eline)
1.317 +
1.318 + if entrycont.has_key('pages') and (entrycont['pages'] != ''):
1.319 + entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
1.320 +
1.321 + if entrycont.has_key('author') and (entrycont['author'] != ''):
1.322 + entry.append(entrycont['author']['text'] + '.')
1.323 + if entrycont.has_key('title') and (entrycont['title'] != ''):
1.324 + entry.append(entrycont['title'] + '.')
1.325 + if entrycont.has_key('journal') and (entrycont['journal'] != ''):
1.326 + entry.append(entrycont['journal'] + ',')
1.327 + if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
1.328 + entry.append('In ' + entrycont['booktitle'] + ',')
1.329 + if entrycont.has_key('type') and (entrycont['type'] != ''):
1.330 + eline = entrycont['type']
1.331 + if entrycont.has_key('number') and (entrycont['number'] != ''):
1.332 + eline += ' ' + entrycont['number']
1.333 + eline += ','
1.334 + entry.append(eline)
1.335 + if entrycont.has_key('institution') and (entrycont['institution'] != ''):
1.336 + entry.append(entrycont['institution'] + ',')
1.337 + if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
1.338 + entry.append(entrycont['publisher'] + ',')
1.339 + if entrycont.has_key('school') and (entrycont['school'] != ''):
1.340 + entry.append(entrycont['school'] + ',')
1.341 + if entrycont.has_key('address') and (entrycont['address'] != ''):
1.342 + entry.append(entrycont['address'] + ',')
1.343 + if entrycont.has_key('edition') and (entrycont['edition'] != ''):
1.344 + entry.append(entrycont['edition'] + ' edition,')
1.345 + if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
1.346 + entry.append(entrycont['howpublished'] + ',')
1.347 + if entrycont.has_key('volume') and (entrycont['volume'] != ''):
1.348 + eline = entrycont['volume'];
1.349 + if entrycont.has_key('number') and (entrycont['number'] != ''):
1.350 + eline += '(' + entrycont['number'] + ')'
1.351 + if entrycont.has_key('pages') and (entrycont['pages'] != ''):
1.352 + eline += ':' + entrycont['pages']
1.353 + eline += ','
1.354 + entry.append(eline)
1.355 + else:
1.356 + if entrycont.has_key('pages') and (entrycont['pages'] != ''):
1.357 + entry.append('pages ' + entrycont['pages'] + ',')
1.358 + if entrycont.has_key('year') and (entrycont['year'] != ''):
1.359 + if entrycont.has_key('month') and (entrycont['month'] != ''):
1.360 + entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
1.361 + else:
1.362 + entry.append(entrycont['year'] + '.')
1.363 + if entrycont.has_key('note') and (entrycont['note'] != ''):
1.364 + entry.append(entrycont['note'] + '.')
1.365 +
1.366 + # generate keys for sorting and for the output
1.367 + sortkey = ''
1.368 + bibkey = ''
1.369 + if entrycont.has_key('author'):
1.370 + for author in entrycont['author']['list']:
1.371 + sortkey += copychars(author, author.rfind(' ')+1, len(author))
1.372 + bibkey = entrycont['author']['abbrev']
1.373 + else:
1.374 + bibkey = 'x'
1.375 + if entrycont.has_key('year'):
1.376 + sortkey += entrycont['year']
1.377 + bibkey += entrycont['year'][-2:]
1.378 + if entrycont.has_key('title'):
1.379 + sortkey += entrycont['title']
1.380 + if entrycont.has_key('key'):
1.381 + sortkey = entrycont['key'] + sortkey
1.382 + bibkey = entrycont['key']
1.383 + entry.insert(0, sortkey)
1.384 + entry.insert(1, bibkey)
1.385 +
1.386 + # add the entry to the file contents
1.387 + filecont.append(entry)
1.388 +
1.389 + else:
1.390 + # field, publication info
1.391 + field = ''
1.392 + data = ''
1.393 +
1.394 + # field = {data} entries
1.395 + if bracedata_rex.match(line):
1.396 + field = bracefield_rex.sub('\g<1>', line)
1.397 + field = string.lower(field)
1.398 + data = bracedata_rex.sub('\g<2>', line)
1.399 +
1.400 + # field = "data" entries
1.401 + elif quotedata_rex.match(line):
1.402 + field = quotefield_rex.sub('\g<1>', line)
1.403 + field = string.lower(field)
1.404 + data = quotedata_rex.sub('\g<2>', line)
1.405 +
1.406 + # field = data entries
1.407 + elif data_rex.match(line):
1.408 + field = field_rex.sub('\g<1>', line)
1.409 + field = string.lower(field)
1.410 + data = data_rex.sub('\g<2>', line)
1.411 +
1.412 + if field in ('author', 'editor'):
1.413 + entrycont[field] = bibtexauthor(data)
1.414 + line = ''
1.415 + elif field == 'title':
1.416 + line = bibtextitle(data, entrytype)
1.417 + elif field != '':
1.418 + line = removebraces(transformurls(data.strip()))
1.419 +
1.420 + if line != '':
1.421 + line = latexreplacements(line)
1.422 + entrycont[field] = line
1.423 +
1.424 +
1.425 + # sort entries
1.426 + filecont.sort(entry_cmp)
1.427 +
1.428 + # count the bibtex keys
1.429 + keytable = {}
1.430 + counttable = {}
1.431 + for entry in filecont:
1.432 + bibkey = entry[1]
1.433 + if not keytable.has_key(bibkey):
1.434 + keytable[bibkey] = 1
1.435 + else:
1.436 + keytable[bibkey] += 1
1.437 +
1.438 + for bibkey in keytable.keys():
1.439 + counttable[bibkey] = 0
1.440 +
1.441 + # generate output
1.442 + for entry in filecont:
1.443 + # generate output key form the bibtex key
1.444 + bibkey = entry[1]
1.445 + if keytable[bibkey] == 1:
1.446 + outkey = bibkey
1.447 + else:
1.448 + outkey = bibkey + chr(97 + counttable[bibkey])
1.449 + counttable[bibkey] += 1
1.450 +
1.451 + # append the entry code to the output
1.452 + file.append('<tr valign="top">\n' + \
1.453 + '<td>[' + outkey + ']</td>')
1.454 + file.append('<td>')
1.455 + file.append('\\anchor ' + outkey)
1.456 + for line in entry[2:]:
1.457 + file.append(line)
1.458 + file.append('</td>\n</tr>')
1.459 + file.append('')
1.460 +
1.461 + return file
1.462 +
1.463 +
1.464 +#
1.465 +# return 1 iff abbr is in line but not inside braces or quotes
1.466 +# assumes that abbr appears only once on the line (out of braces and quotes)
1.467 +#
1.468 +def verify_out_of_braces(line, abbr):
1.469 +
1.470 + phrase_split = delimiter_rex.split(line)
1.471 +
1.472 + abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
1.473 +
1.474 + open_brace = 0
1.475 + open_quote = 0
1.476 +
1.477 + for phrase in phrase_split:
1.478 + if phrase == "{":
1.479 + open_brace = open_brace + 1
1.480 + elif phrase == "}":
1.481 + open_brace = open_brace - 1
1.482 + elif phrase == '"':
1.483 + if open_quote == 1:
1.484 + open_quote = 0
1.485 + else:
1.486 + open_quote = 1
1.487 + elif abbr_rex.search(phrase):
1.488 + if open_brace == 0 and open_quote == 0:
1.489 + return 1
1.490 +
1.491 + return 0
1.492 +
1.493 +
1.494 +#
1.495 +# a line in the form phrase1 # phrase2 # ... # phrasen
1.496 +# is returned as phrase1 phrase2 ... phrasen
1.497 +# with the correct punctuation
1.498 +# Bug: Doesn't always work with multiple abbreviations plugged in
1.499 +#
1.500 +def concat_line(line):
1.501 + # only look at part after equals
1.502 + field = field_rex.sub('\g<1>',line)
1.503 + rest = field_rex.sub('\g<2>',line)
1.504 +
1.505 + concat_line = field + ' ='
1.506 +
1.507 + pound_split = concatsplit_rex.split(rest)
1.508 +
1.509 + phrase_count = 0
1.510 + length = len(pound_split)
1.511 +
1.512 + for phrase in pound_split:
1.513 + phrase = phrase.strip()
1.514 + if phrase_count != 0:
1.515 + if phrase.startswith('"') or phrase.startswith('{'):
1.516 + phrase = phrase[1:]
1.517 + elif phrase.startswith('"'):
1.518 + phrase = phrase.replace('"','{',1)
1.519 +
1.520 + if phrase_count != length-1:
1.521 + if phrase.endswith('"') or phrase.endswith('}'):
1.522 + phrase = phrase[:-1]
1.523 + else:
1.524 + if phrase.endswith('"'):
1.525 + phrase = phrase[:-1]
1.526 + phrase = phrase + "}"
1.527 + elif phrase.endswith('",'):
1.528 + phrase = phrase[:-2]
1.529 + phrase = phrase + "},"
1.530 +
1.531 + # if phrase did have \#, add the \# back
1.532 + if phrase.endswith('\\'):
1.533 + phrase = phrase + "#"
1.534 + concat_line = concat_line + ' ' + phrase
1.535 +
1.536 + phrase_count = phrase_count + 1
1.537 +
1.538 + return concat_line
1.539 +
1.540 +
1.541 +#
1.542 +# substitute abbreviations into filecont
1.543 +# @param filecont_source - string of data from file
1.544 +#
1.545 +def bibtex_replace_abbreviations(filecont_source):
1.546 + filecont = filecont_source.splitlines()
1.547 +
1.548 + # These are defined in bibtex, so we'll define them too
1.549 + abbr_list = ['jan','feb','mar','apr','may','jun',
1.550 + 'jul','aug','sep','oct','nov','dec']
1.551 + value_list = ['January','February','March','April',
1.552 + 'May','June','July','August','September',
1.553 + 'October','November','December']
1.554 +
1.555 + abbr_rex = []
1.556 + total_abbr_count = 0
1.557 +
1.558 + front = '\\b'
1.559 + back = '(,?)\\b'
1.560 +
1.561 + for x in abbr_list:
1.562 + abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
1.563 + total_abbr_count = total_abbr_count + 1
1.564 +
1.565 +
1.566 + abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
1.567 + re.I)
1.568 +
1.569 + comment_rex = re.compile('@comment\s*{',re.I)
1.570 + preamble_rex = re.compile('@preamble\s*{',re.I)
1.571 +
1.572 + waiting_for_end_string = 0
1.573 + i = 0
1.574 + filecont2 = ''
1.575 +
1.576 + for line in filecont:
1.577 + if line == ' ' or line == '':
1.578 + continue
1.579 +
1.580 + if waiting_for_end_string:
1.581 + if re.search('}',line):
1.582 + waiting_for_end_string = 0
1.583 + continue
1.584 +
1.585 + if abbrdef_rex.search(line):
1.586 + abbr = abbrdef_rex.sub('\g<1>', line)
1.587 +
1.588 + if abbr_list.count(abbr) == 0:
1.589 + val = abbrdef_rex.sub('\g<2>', line)
1.590 + abbr_list.append(abbr)
1.591 + value_list.append(string.strip(val))
1.592 + abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
1.593 + total_abbr_count = total_abbr_count + 1
1.594 + waiting_for_end_string = 1
1.595 + continue
1.596 +
1.597 + if comment_rex.search(line):
1.598 + waiting_for_end_string = 1
1.599 + continue
1.600 +
1.601 + if preamble_rex.search(line):
1.602 + waiting_for_end_string = 1
1.603 + continue
1.604 +
1.605 +
1.606 + # replace subsequent abbreviations with the value
1.607 + abbr_count = 0
1.608 +
1.609 + for x in abbr_list:
1.610 +
1.611 + if abbr_rex[abbr_count].search(line):
1.612 + if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
1.613 + line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
1.614 + # Check for # concatenations
1.615 + if concatsplit_rex.search(line):
1.616 + line = concat_line(line)
1.617 + abbr_count = abbr_count + 1
1.618 +
1.619 +
1.620 + filecont2 = filecont2 + line + '\n'
1.621 + i = i+1
1.622 +
1.623 +
1.624 + # Do one final pass over file
1.625 +
1.626 + # make sure that didn't end up with {" or }" after the substitution
1.627 + filecont2 = filecont2.replace('{"','{{')
1.628 + filecont2 = filecont2.replace('"}','}}')
1.629 +
1.630 + afterquotevalue_rex = re.compile('"\s*,\s*')
1.631 + afterbrace_rex = re.compile('"\s*}')
1.632 + afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
1.633 +
1.634 + # add new lines to data that changed because of abbreviation substitutions
1.635 + filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
1.636 + filecont2 = afterbrace_rex.sub('"\n}', filecont2)
1.637 + filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
1.638 +
1.639 + return filecont2
1.640 +
1.641 +#
1.642 +# convert @type( ... ) to @type{ ... }
1.643 +#
1.644 +def no_outer_parens(filecont):
1.645 +
1.646 + # do checking for open parens
1.647 + # will convert to braces
1.648 + paren_split = re.split('([(){}])',filecont)
1.649 +
1.650 + open_paren_count = 0
1.651 + open_type = 0
1.652 + look_next = 0
1.653 +
1.654 + # rebuild filecont
1.655 + filecont = ''
1.656 +
1.657 + at_rex = re.compile('@\w*')
1.658 +
1.659 + for phrase in paren_split:
1.660 + if look_next == 1:
1.661 + if phrase == '(':
1.662 + phrase = '{'
1.663 + open_paren_count = open_paren_count + 1
1.664 + else:
1.665 + open_type = 0
1.666 + look_next = 0
1.667 +
1.668 + if phrase == '(':
1.669 + open_paren_count = open_paren_count + 1
1.670 +
1.671 + elif phrase == ')':
1.672 + open_paren_count = open_paren_count - 1
1.673 + if open_type == 1 and open_paren_count == 0:
1.674 + phrase = '}'
1.675 + open_type = 0
1.676 +
1.677 + elif at_rex.search( phrase ):
1.678 + open_type = 1
1.679 + look_next = 1
1.680 +
1.681 + filecont = filecont + phrase
1.682 +
1.683 + return filecont
1.684 +
1.685 +
1.686 +#
1.687 +# make all whitespace into just one space
1.688 +# format the bibtex file into a usable form.
1.689 +#
1.690 +def bibtexwasher(filecont_source):
1.691 +
1.692 + space_rex = re.compile('\s+')
1.693 + comment_rex = re.compile('\s*%')
1.694 +
1.695 + filecont = []
1.696 +
1.697 + # remove trailing and excessive whitespace
1.698 + # ignore comments
1.699 + for line in filecont_source:
1.700 + line = string.strip(line)
1.701 + line = space_rex.sub(' ', line)
1.702 + # ignore comments
1.703 + if not comment_rex.match(line) and line != '':
1.704 + filecont.append(' '+ line)
1.705 +
1.706 + filecont = string.join(filecont, '')
1.707 +
1.708 + # the file is in one long string
1.709 +
1.710 + filecont = no_outer_parens(filecont)
1.711 +
1.712 + #
1.713 + # split lines according to preferred syntax scheme
1.714 + #
1.715 + filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
1.716 +
1.717 + # add new lines after commas that are after values
1.718 + filecont = re.sub('"\s*,', '",\n', filecont)
1.719 + filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
1.720 + filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
1.721 + '\n\n\g<1>\g<2>,\n', filecont)
1.722 +
1.723 + # add new lines after }
1.724 + filecont = re.sub('"\s*}','"\n}\n', filecont)
1.725 + filecont = re.sub('}\s*,','},\n', filecont)
1.726 +
1.727 +
1.728 + filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
1.729 +
1.730 + # character encoding, reserved latex characters
1.731 + filecont = re.sub('{\\\&}', '&', filecont)
1.732 + filecont = re.sub('\\\&', '&', filecont)
1.733 +
1.734 + # do checking for open braces to get format correct
1.735 + open_brace_count = 0
1.736 + brace_split = re.split('([{}])',filecont)
1.737 +
1.738 + # rebuild filecont
1.739 + filecont = ''
1.740 +
1.741 + for phrase in brace_split:
1.742 + if phrase == '{':
1.743 + open_brace_count = open_brace_count + 1
1.744 + elif phrase == '}':
1.745 + open_brace_count = open_brace_count - 1
1.746 + if open_brace_count == 0:
1.747 + filecont = filecont + '\n'
1.748 +
1.749 + filecont = filecont + phrase
1.750 +
1.751 + filecont2 = bibtex_replace_abbreviations(filecont)
1.752 +
1.753 + # gather
1.754 + filecont = filecont2.splitlines()
1.755 + i=0
1.756 + j=0 # count the number of blank lines
1.757 + for line in filecont:
1.758 + # ignore blank lines
1.759 + if line == '' or line == ' ':
1.760 + j = j+1
1.761 + continue
1.762 + filecont[i] = line + '\n'
1.763 + i = i+1
1.764 +
1.765 + # get rid of the extra stuff at the end of the array
1.766 + # (The extra stuff are duplicates that are in the array because
1.767 + # blank lines were removed.)
1.768 + length = len( filecont)
1.769 + filecont[length-j:length] = []
1.770 +
1.771 + return filecont
1.772 +
1.773 +
1.774 +def filehandler(filepath):
1.775 + try:
1.776 + fd = open(filepath, 'r')
1.777 + filecont_source = fd.readlines()
1.778 + fd.close()
1.779 + except:
1.780 + print 'Could not open file:', filepath
1.781 + washeddata = bibtexwasher(filecont_source)
1.782 + outdata = bibtexdecoder(washeddata)
1.783 + print '/**'
1.784 + print '\page references References'
1.785 + print
1.786 + print '<table border="0" cellspacing="5px" width="100%">'
1.787 + print
1.788 + for line in outdata:
1.789 + print line
1.790 + print '</table>'
1.791 + print
1.792 + print '*/'
1.793 +
1.794 +
1.795 +# main program
1.796 +
1.797 +def main():
1.798 + import sys
1.799 + if sys.argv[1:]:
1.800 + filepath = sys.argv[1]
1.801 + else:
1.802 + print "No input file"
1.803 + sys.exit()
1.804 + filehandler(filepath)
1.805 +
1.806 +if __name__ == "__main__": main()
1.807 +
1.808 +
1.809 +# end python script