1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/scripts/bib2dox.py Thu Nov 05 08:39:49 2009 +0100
1.3 @@ -0,0 +1,811 @@
1.4 +#!/usr/bin/env /usr/local/Python/bin/python2.1
1.5 +"""
1.6 + BibTeX to Doxygen converter
1.7 + Usage: python bib2dox.py bibfile.bib > bibfile.dox
1.8 +
1.9 + This code is the modification of the BibTeX to XML converter
1.10 + by Vidar Bronken Gundersen et al. See the original copyright notices below.
1.11 +
1.12 + **********************************************************************
1.13 +
1.14 + Decoder for bibliographic data, BibTeX
1.15 + Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
1.16 +
1.17 + v.8
1.18 + (c)2002-06-23 Vidar Bronken Gundersen
1.19 + http://bibtexml.sf.net/
1.20 + Reuse approved as long as this notification is kept.
1.21 + Licence: GPL.
1.22 +
1.23 + Contributions/thanks to:
1.24 + Egon Willighagen, http://sf.net/projects/jreferences/
1.25 + Richard Mahoney (for providing a test case)
1.26 +
1.27 + Editted by Sara Sprenkle to be more robust and handle more bibtex features.
1.28 + (c) 2003-01-15
1.29 +
1.30 + 1. Changed bibtex: tags to bibxml: tags.
1.31 + 2. Use xmlns:bibxml="http://bibtexml.sf.net/"
1.32 + 3. Allow spaces between @type and first {
1.33 + 4. "author" fields with multiple authors split by " and "
1.34 + are put in separate xml "bibxml:author" tags.
1.35 + 5. Option for Titles: words are capitalized
1.36 + only if first letter in title or capitalized inside braces
1.37 + 6. Removes braces from within field values
1.38 + 7. Ignores comments in bibtex file (including @comment{ or % )
1.39 + 8. Replaces some special latex tags, e.g., replaces ~ with ' '
1.40 + 9. Handles bibtex @string abbreviations
1.41 + --> includes bibtex's default abbreviations for months
1.42 + --> does concatenation of abbr # " more " and " more " # abbr
1.43 + 10. Handles @type( ... ) or @type{ ... }
1.44 + 11. The keywords field is split on , or ; and put into separate xml
1.45 + "bibxml:keywords" tags
1.46 + 12. Ignores @preamble
1.47 +
1.48 + Known Limitations
1.49 + 1. Does not transform Latex encoding like math mode and special
1.50 + latex symbols.
1.51 + 2. Does not parse author fields into first and last names.
1.52 + E.g., It does not do anything special to an author whose name is
1.53 + in the form LAST_NAME, FIRST_NAME
1.54 + In "author" tag, will show up as
1.55 + <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
1.56 + 3. Does not handle "crossref" fields other than to print
1.57 + <bibxml:crossref>...</bibxml:crossref>
1.58 + 4. Does not inform user of the input's format errors. You just won't
1.59 + be able to transform the file later with XSL
1.60 +
1.61 + You will have to manually edit the XML output if you need to handle
1.62 + these (and unknown) limitations.
1.63 +
1.64 +"""
1.65 +
1.66 +import string, re
1.67 +
1.68 +# set of valid name characters
1.69 +valid_name_chars = '[\w\-:]'
1.70 +
1.71 +#
1.72 +# define global regular expression variables
1.73 +#
1.74 +author_rex = re.compile('\s+and\s+')
1.75 +rembraces_rex = re.compile('[{}]')
1.76 +capitalize_rex = re.compile('({[^}]*})')
1.77 +
1.78 +# used by bibtexkeywords(data)
1.79 +keywords_rex = re.compile('[,;]')
1.80 +
1.81 +# used by concat_line(line)
1.82 +concatsplit_rex = re.compile('\s*#\s*')
1.83 +
1.84 +# split on {, }, or " in verify_out_of_braces
1.85 +delimiter_rex = re.compile('([{}"])',re.I)
1.86 +
1.87 +field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
1.88 +data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
1.89 +
1.90 +url_rex = re.compile('\\\url\{([^}]*)\}')
1.91 +
1.92 +#
1.93 +# styles for html formatting
1.94 +#
1.95 +divstyle = 'margin-top: -4ex; margin-left: 8em;'
1.96 +
1.97 +#
1.98 +# return the string parameter without braces
1.99 +#
1.100 +def transformurls(str):
1.101 + return url_rex.sub(r'<a href="\1">\1</a>', str)
1.102 +
1.103 +#
1.104 +# return the string parameter without braces
1.105 +#
1.106 +def removebraces(str):
1.107 + return rembraces_rex.sub('', str)
1.108 +
1.109 +#
1.110 +# latex-specific replacements
1.111 +# (do this after braces were removed)
1.112 +#
1.113 +def latexreplacements(line):
1.114 + line = string.replace(line, '~', ' ')
1.115 + line = string.replace(line, '\\\'a', 'á')
1.116 + line = string.replace(line, '\\"a', 'ä')
1.117 + line = string.replace(line, '\\\'e', 'é')
1.118 + line = string.replace(line, '\\"e', 'ë')
1.119 + line = string.replace(line, '\\\'i', 'í')
1.120 + line = string.replace(line, '\\"i', 'ï')
1.121 + line = string.replace(line, '\\\'o', 'ó')
1.122 + line = string.replace(line, '\\"o', 'ö')
1.123 + line = string.replace(line, '\\\'u', 'ú')
1.124 + line = string.replace(line, '\\"u', 'ü')
1.125 + line = string.replace(line, '\\H o', 'õ')
1.126 + line = string.replace(line, '\\H u', 'ü') # ũ does not exist
1.127 + line = string.replace(line, '\\\'A', 'Á')
1.128 + line = string.replace(line, '\\"A', 'Ä')
1.129 + line = string.replace(line, '\\\'E', 'É')
1.130 + line = string.replace(line, '\\"E', 'Ë')
1.131 + line = string.replace(line, '\\\'I', 'Í')
1.132 + line = string.replace(line, '\\"I', 'Ï')
1.133 + line = string.replace(line, '\\\'O', 'Ó')
1.134 + line = string.replace(line, '\\"O', 'Ö')
1.135 + line = string.replace(line, '\\\'U', 'Ú')
1.136 + line = string.replace(line, '\\"U', 'Ü')
1.137 + line = string.replace(line, '\\H O', 'Õ')
1.138 + line = string.replace(line, '\\H U', 'Ü') # Ũ does not exist
1.139 +
1.140 + return line
1.141 +
1.142 +#
1.143 +# copy characters form a string decoding html expressions (&xyz;)
1.144 +#
1.145 +def copychars(str, ifrom, count):
1.146 + result = ''
1.147 + i = ifrom
1.148 + c = 0
1.149 + html_spec = False
1.150 + while (i < len(str)) and (c < count):
1.151 + if str[i] == '&':
1.152 + html_spec = True;
1.153 + if i+1 < len(str):
1.154 + result += str[i+1]
1.155 + c += 1
1.156 + i += 2
1.157 + else:
1.158 + if not html_spec:
1.159 + if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
1.160 + ((str[i] >= 'a') and (str[i] <= 'z')):
1.161 + result += str[i]
1.162 + c += 1
1.163 + elif str[i] == ';':
1.164 + html_spec = False;
1.165 + i += 1
1.166 +
1.167 + return result
1.168 +
1.169 +
1.170 +#
1.171 +# Handle a list of authors (separated by 'and').
1.172 +# It gives back an array of the follwing values:
1.173 +# - num: the number of authors,
1.174 +# - list: the list of the author names,
1.175 +# - text: the bibtex text (separated by commas and/or 'and')
1.176 +# - abbrev: abbreviation that can be used for indicate the
1.177 +# bibliography entries
1.178 +#
1.179 +def bibtexauthor(data):
1.180 + result = {}
1.181 + bibtex = ''
1.182 + result['list'] = author_rex.split(data)
1.183 + result['num'] = len(result['list'])
1.184 + for i, author in enumerate(result['list']):
1.185 + # general transformations
1.186 + author = latexreplacements(removebraces(author.strip()))
1.187 + # transform "Xyz, A. B." to "A. B. Xyz"
1.188 + pos = author.find(',')
1.189 + if pos != -1:
1.190 + author = author[pos+1:].strip() + ' ' + author[:pos].strip()
1.191 + result['list'][i] = author
1.192 + bibtex += author + '#'
1.193 + bibtex = bibtex[:-1]
1.194 + if result['num'] > 1:
1.195 + ix = bibtex.rfind('#')
1.196 + if result['num'] == 2:
1.197 + bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
1.198 + else:
1.199 + bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
1.200 + bibtex = bibtex.replace('#', ', ')
1.201 + result['text'] = bibtex
1.202 +
1.203 + result['abbrev'] = ''
1.204 + for author in result['list']:
1.205 + pos = author.rfind(' ') + 1
1.206 + count = 1
1.207 + if result['num'] == 1:
1.208 + count = 3
1.209 + result['abbrev'] += copychars(author, pos, count)
1.210 +
1.211 + return result
1.212 +
1.213 +
1.214 +#
1.215 +# data = title string
1.216 +# @return the capitalized title (first letter is capitalized), rest are capitalized
1.217 +# only if capitalized inside braces
1.218 +#
1.219 +def capitalizetitle(data):
1.220 + title_list = capitalize_rex.split(data)
1.221 + title = ''
1.222 + count = 0
1.223 + for phrase in title_list:
1.224 + check = string.lstrip(phrase)
1.225 +
1.226 + # keep phrase's capitalization the same
1.227 + if check.find('{') == 0:
1.228 + title += removebraces(phrase)
1.229 + else:
1.230 + # first word --> capitalize first letter (after spaces)
1.231 + if count == 0:
1.232 + title += check.capitalize()
1.233 + else:
1.234 + title += phrase.lower()
1.235 + count = count + 1
1.236 +
1.237 + return title
1.238 +
1.239 +
1.240 +#
1.241 +# @return the bibtex for the title
1.242 +# @param data --> title string
1.243 +# braces are removed from title
1.244 +#
1.245 +def bibtextitle(data, entrytype):
1.246 + if entrytype in ('book', 'inbook'):
1.247 + title = removebraces(data.strip())
1.248 + else:
1.249 + title = removebraces(capitalizetitle(data.strip()))
1.250 + bibtex = title
1.251 + return bibtex
1.252 +
1.253 +
1.254 +#
1.255 +# function to compare entry lists
1.256 +#
1.257 +def entry_cmp(x, y):
1.258 + return cmp(x[0], y[0])
1.259 +
1.260 +
1.261 +#
1.262 +# print the XML for the transformed "filecont_source"
1.263 +#
1.264 +def bibtexdecoder(filecont_source):
1.265 + filecont = []
1.266 + file = []
1.267 +
1.268 + # want @<alphanumeric chars><spaces>{<spaces><any chars>,
1.269 + pubtype_rex = re.compile('@(\w*)\s*{\s*(.*),')
1.270 + endtype_rex = re.compile('}\s*$')
1.271 + endtag_rex = re.compile('^\s*}\s*$')
1.272 +
1.273 + bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
1.274 + bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')
1.275 +
1.276 + quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
1.277 + quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')
1.278 +
1.279 + for line in filecont_source:
1.280 + line = line[:-1]
1.281 +
1.282 + # encode character entities
1.283 + line = string.replace(line, '&', '&')
1.284 + line = string.replace(line, '<', '<')
1.285 + line = string.replace(line, '>', '>')
1.286 +
1.287 + # start entry: publication type (store for later use)
1.288 + if pubtype_rex.match(line):
1.289 + # want @<alphanumeric chars><spaces>{<spaces><any chars>,
1.290 + entrycont = {}
1.291 + entry = []
1.292 + entrytype = pubtype_rex.sub('\g<1>',line)
1.293 + entrytype = string.lower(entrytype)
1.294 + entryid = pubtype_rex.sub('\g<2>', line)
1.295 +
1.296 + # end entry if just a }
1.297 + elif endtype_rex.match(line):
1.298 + # generate doxygen code for the entry
1.299 +
1.300 + # enty type related formattings
1.301 + if entrytype in ('book', 'inbook'):
1.302 + entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
1.303 + if not entrycont.has_key('author'):
1.304 + entrycont['author'] = entrycont['editor']
1.305 + entrycont['author']['text'] += ', editors'
1.306 + elif entrytype == 'article':
1.307 + entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
1.308 + elif entrytype in ('inproceedings', 'incollection', 'conference'):
1.309 + entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
1.310 + elif entrytype == 'techreport':
1.311 + if not entrycont.has_key('type'):
1.312 + entrycont['type'] = 'Technical report'
1.313 + elif entrytype == 'mastersthesis':
1.314 + entrycont['type'] = 'Master\'s thesis'
1.315 + elif entrytype == 'phdthesis':
1.316 + entrycont['type'] = 'PhD thesis'
1.317 +
1.318 + for eline in entrycont:
1.319 + if eline != '':
1.320 + eline = latexreplacements(eline)
1.321 +
1.322 + if entrycont.has_key('pages') and (entrycont['pages'] != ''):
1.323 + entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
1.324 +
1.325 + if entrycont.has_key('author') and (entrycont['author'] != ''):
1.326 + entry.append(entrycont['author']['text'] + '.')
1.327 + if entrycont.has_key('title') and (entrycont['title'] != ''):
1.328 + entry.append(entrycont['title'] + '.')
1.329 + if entrycont.has_key('journal') and (entrycont['journal'] != ''):
1.330 + entry.append(entrycont['journal'] + ',')
1.331 + if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
1.332 + entry.append('In ' + entrycont['booktitle'] + ',')
1.333 + if entrycont.has_key('type') and (entrycont['type'] != ''):
1.334 + eline = entrycont['type']
1.335 + if entrycont.has_key('number') and (entrycont['number'] != ''):
1.336 + eline += ' ' + entrycont['number']
1.337 + eline += ','
1.338 + entry.append(eline)
1.339 + if entrycont.has_key('institution') and (entrycont['institution'] != ''):
1.340 + entry.append(entrycont['institution'] + ',')
1.341 + if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
1.342 + entry.append(entrycont['publisher'] + ',')
1.343 + if entrycont.has_key('school') and (entrycont['school'] != ''):
1.344 + entry.append(entrycont['school'] + ',')
1.345 + if entrycont.has_key('address') and (entrycont['address'] != ''):
1.346 + entry.append(entrycont['address'] + ',')
1.347 + if entrycont.has_key('edition') and (entrycont['edition'] != ''):
1.348 + entry.append(entrycont['edition'] + ' edition,')
1.349 + if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
1.350 + entry.append(entrycont['howpublished'] + ',')
1.351 + if entrycont.has_key('volume') and (entrycont['volume'] != ''):
1.352 + eline = entrycont['volume'];
1.353 + if entrycont.has_key('number') and (entrycont['number'] != ''):
1.354 + eline += '(' + entrycont['number'] + ')'
1.355 + if entrycont.has_key('pages') and (entrycont['pages'] != ''):
1.356 + eline += ':' + entrycont['pages']
1.357 + eline += ','
1.358 + entry.append(eline)
1.359 + else:
1.360 + if entrycont.has_key('pages') and (entrycont['pages'] != ''):
1.361 + entry.append('pages ' + entrycont['pages'] + ',')
1.362 + if entrycont.has_key('year') and (entrycont['year'] != ''):
1.363 + if entrycont.has_key('month') and (entrycont['month'] != ''):
1.364 + entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
1.365 + else:
1.366 + entry.append(entrycont['year'] + '.')
1.367 + if entrycont.has_key('note') and (entrycont['note'] != ''):
1.368 + entry.append(entrycont['note'] + '.')
1.369 + if entrycont.has_key('url') and (entrycont['url'] != ''):
1.370 + entry.append(entrycont['url'] + '.')
1.371 +
1.372 + # generate keys for sorting and for the output
1.373 + sortkey = ''
1.374 + bibkey = ''
1.375 + if entrycont.has_key('author'):
1.376 + for author in entrycont['author']['list']:
1.377 + sortkey += copychars(author, author.rfind(' ')+1, len(author))
1.378 + bibkey = entrycont['author']['abbrev']
1.379 + else:
1.380 + bibkey = 'x'
1.381 + if entrycont.has_key('year'):
1.382 + sortkey += entrycont['year']
1.383 + bibkey += entrycont['year'][-2:]
1.384 + if entrycont.has_key('title'):
1.385 + sortkey += entrycont['title']
1.386 + if entrycont.has_key('key'):
1.387 + sortkey = entrycont['key'] + sortkey
1.388 + bibkey = entrycont['key']
1.389 + entry.insert(0, sortkey)
1.390 + entry.insert(1, bibkey)
1.391 + entry.insert(2, entryid)
1.392 +
1.393 + # add the entry to the file contents
1.394 + filecont.append(entry)
1.395 +
1.396 + else:
1.397 + # field, publication info
1.398 + field = ''
1.399 + data = ''
1.400 +
1.401 + # field = {data} entries
1.402 + if bracedata_rex.match(line):
1.403 + field = bracefield_rex.sub('\g<1>', line)
1.404 + field = string.lower(field)
1.405 + data = bracedata_rex.sub('\g<2>', line)
1.406 +
1.407 + # field = "data" entries
1.408 + elif quotedata_rex.match(line):
1.409 + field = quotefield_rex.sub('\g<1>', line)
1.410 + field = string.lower(field)
1.411 + data = quotedata_rex.sub('\g<2>', line)
1.412 +
1.413 + # field = data entries
1.414 + elif data_rex.match(line):
1.415 + field = field_rex.sub('\g<1>', line)
1.416 + field = string.lower(field)
1.417 + data = data_rex.sub('\g<2>', line)
1.418 +
1.419 + if field == 'url':
1.420 + data = '\\url{' + data.strip() + '}'
1.421 +
1.422 + if field in ('author', 'editor'):
1.423 + entrycont[field] = bibtexauthor(data)
1.424 + line = ''
1.425 + elif field == 'title':
1.426 + line = bibtextitle(data, entrytype)
1.427 + elif field != '':
1.428 + line = removebraces(transformurls(data.strip()))
1.429 +
1.430 + if line != '':
1.431 + line = latexreplacements(line)
1.432 + entrycont[field] = line
1.433 +
1.434 +
1.435 + # sort entries
1.436 + filecont.sort(entry_cmp)
1.437 +
1.438 + # count the bibtex keys
1.439 + keytable = {}
1.440 + counttable = {}
1.441 + for entry in filecont:
1.442 + bibkey = entry[1]
1.443 + if not keytable.has_key(bibkey):
1.444 + keytable[bibkey] = 1
1.445 + else:
1.446 + keytable[bibkey] += 1
1.447 +
1.448 + for bibkey in keytable.keys():
1.449 + counttable[bibkey] = 0
1.450 +
1.451 + # generate output
1.452 + for entry in filecont:
1.453 + # generate output key form the bibtex key
1.454 + bibkey = entry[1]
1.455 + entryid = entry[2]
1.456 + if keytable[bibkey] == 1:
1.457 + outkey = bibkey
1.458 + else:
1.459 + outkey = bibkey + chr(97 + counttable[bibkey])
1.460 + counttable[bibkey] += 1
1.461 +
1.462 + # append the entry code to the output
1.463 + file.append('\\section ' + entryid + ' [' + outkey + ']')
1.464 + file.append('<div style="' + divstyle + '">')
1.465 + for line in entry[3:]:
1.466 + file.append(line)
1.467 + file.append('</div>')
1.468 + file.append('')
1.469 +
1.470 + return file
1.471 +
1.472 +
1.473 +#
1.474 +# return 1 iff abbr is in line but not inside braces or quotes
1.475 +# assumes that abbr appears only once on the line (out of braces and quotes)
1.476 +#
1.477 +def verify_out_of_braces(line, abbr):
1.478 +
1.479 + phrase_split = delimiter_rex.split(line)
1.480 +
1.481 + abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
1.482 +
1.483 + open_brace = 0
1.484 + open_quote = 0
1.485 +
1.486 + for phrase in phrase_split:
1.487 + if phrase == "{":
1.488 + open_brace = open_brace + 1
1.489 + elif phrase == "}":
1.490 + open_brace = open_brace - 1
1.491 + elif phrase == '"':
1.492 + if open_quote == 1:
1.493 + open_quote = 0
1.494 + else:
1.495 + open_quote = 1
1.496 + elif abbr_rex.search(phrase):
1.497 + if open_brace == 0 and open_quote == 0:
1.498 + return 1
1.499 +
1.500 + return 0
1.501 +
1.502 +
1.503 +#
1.504 +# a line in the form phrase1 # phrase2 # ... # phrasen
1.505 +# is returned as phrase1 phrase2 ... phrasen
1.506 +# with the correct punctuation
1.507 +# Bug: Doesn't always work with multiple abbreviations plugged in
1.508 +#
1.509 +def concat_line(line):
1.510 + # only look at part after equals
1.511 + field = field_rex.sub('\g<1>',line)
1.512 + rest = field_rex.sub('\g<2>',line)
1.513 +
1.514 + concat_line = field + ' ='
1.515 +
1.516 + pound_split = concatsplit_rex.split(rest)
1.517 +
1.518 + phrase_count = 0
1.519 + length = len(pound_split)
1.520 +
1.521 + for phrase in pound_split:
1.522 + phrase = phrase.strip()
1.523 + if phrase_count != 0:
1.524 + if phrase.startswith('"') or phrase.startswith('{'):
1.525 + phrase = phrase[1:]
1.526 + elif phrase.startswith('"'):
1.527 + phrase = phrase.replace('"','{',1)
1.528 +
1.529 + if phrase_count != length-1:
1.530 + if phrase.endswith('"') or phrase.endswith('}'):
1.531 + phrase = phrase[:-1]
1.532 + else:
1.533 + if phrase.endswith('"'):
1.534 + phrase = phrase[:-1]
1.535 + phrase = phrase + "}"
1.536 + elif phrase.endswith('",'):
1.537 + phrase = phrase[:-2]
1.538 + phrase = phrase + "},"
1.539 +
1.540 + # if phrase did have \#, add the \# back
1.541 + if phrase.endswith('\\'):
1.542 + phrase = phrase + "#"
1.543 + concat_line = concat_line + ' ' + phrase
1.544 +
1.545 + phrase_count = phrase_count + 1
1.546 +
1.547 + return concat_line
1.548 +
1.549 +
1.550 +#
1.551 +# substitute abbreviations into filecont
1.552 +# @param filecont_source - string of data from file
1.553 +#
1.554 +def bibtex_replace_abbreviations(filecont_source):
1.555 + filecont = filecont_source.splitlines()
1.556 +
1.557 + # These are defined in bibtex, so we'll define them too
1.558 + abbr_list = ['jan','feb','mar','apr','may','jun',
1.559 + 'jul','aug','sep','oct','nov','dec']
1.560 + value_list = ['January','February','March','April',
1.561 + 'May','June','July','August','September',
1.562 + 'October','November','December']
1.563 +
1.564 + abbr_rex = []
1.565 + total_abbr_count = 0
1.566 +
1.567 + front = '\\b'
1.568 + back = '(,?)\\b'
1.569 +
1.570 + for x in abbr_list:
1.571 + abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
1.572 + total_abbr_count = total_abbr_count + 1
1.573 +
1.574 +
1.575 + abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
1.576 + re.I)
1.577 +
1.578 + comment_rex = re.compile('@comment\s*{',re.I)
1.579 + preamble_rex = re.compile('@preamble\s*{',re.I)
1.580 +
1.581 + waiting_for_end_string = 0
1.582 + i = 0
1.583 + filecont2 = ''
1.584 +
1.585 + for line in filecont:
1.586 + if line == ' ' or line == '':
1.587 + continue
1.588 +
1.589 + if waiting_for_end_string:
1.590 + if re.search('}',line):
1.591 + waiting_for_end_string = 0
1.592 + continue
1.593 +
1.594 + if abbrdef_rex.search(line):
1.595 + abbr = abbrdef_rex.sub('\g<1>', line)
1.596 +
1.597 + if abbr_list.count(abbr) == 0:
1.598 + val = abbrdef_rex.sub('\g<2>', line)
1.599 + abbr_list.append(abbr)
1.600 + value_list.append(string.strip(val))
1.601 + abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
1.602 + total_abbr_count = total_abbr_count + 1
1.603 + waiting_for_end_string = 1
1.604 + continue
1.605 +
1.606 + if comment_rex.search(line):
1.607 + waiting_for_end_string = 1
1.608 + continue
1.609 +
1.610 + if preamble_rex.search(line):
1.611 + waiting_for_end_string = 1
1.612 + continue
1.613 +
1.614 +
1.615 + # replace subsequent abbreviations with the value
1.616 + abbr_count = 0
1.617 +
1.618 + for x in abbr_list:
1.619 +
1.620 + if abbr_rex[abbr_count].search(line):
1.621 + if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
1.622 + line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
1.623 + # Check for # concatenations
1.624 + if concatsplit_rex.search(line):
1.625 + line = concat_line(line)
1.626 + abbr_count = abbr_count + 1
1.627 +
1.628 +
1.629 + filecont2 = filecont2 + line + '\n'
1.630 + i = i+1
1.631 +
1.632 +
1.633 + # Do one final pass over file
1.634 +
1.635 + # make sure that didn't end up with {" or }" after the substitution
1.636 + filecont2 = filecont2.replace('{"','{{')
1.637 + filecont2 = filecont2.replace('"}','}}')
1.638 +
1.639 + afterquotevalue_rex = re.compile('"\s*,\s*')
1.640 + afterbrace_rex = re.compile('"\s*}')
1.641 + afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
1.642 +
1.643 + # add new lines to data that changed because of abbreviation substitutions
1.644 + filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
1.645 + filecont2 = afterbrace_rex.sub('"\n}', filecont2)
1.646 + filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
1.647 +
1.648 + return filecont2
1.649 +
1.650 +#
1.651 +# convert @type( ... ) to @type{ ... }
1.652 +#
1.653 +def no_outer_parens(filecont):
1.654 +
1.655 + # do checking for open parens
1.656 + # will convert to braces
1.657 + paren_split = re.split('([(){}])',filecont)
1.658 +
1.659 + open_paren_count = 0
1.660 + open_type = 0
1.661 + look_next = 0
1.662 +
1.663 + # rebuild filecont
1.664 + filecont = ''
1.665 +
1.666 + at_rex = re.compile('@\w*')
1.667 +
1.668 + for phrase in paren_split:
1.669 + if look_next == 1:
1.670 + if phrase == '(':
1.671 + phrase = '{'
1.672 + open_paren_count = open_paren_count + 1
1.673 + else:
1.674 + open_type = 0
1.675 + look_next = 0
1.676 +
1.677 + if phrase == '(':
1.678 + open_paren_count = open_paren_count + 1
1.679 +
1.680 + elif phrase == ')':
1.681 + open_paren_count = open_paren_count - 1
1.682 + if open_type == 1 and open_paren_count == 0:
1.683 + phrase = '}'
1.684 + open_type = 0
1.685 +
1.686 + elif at_rex.search( phrase ):
1.687 + open_type = 1
1.688 + look_next = 1
1.689 +
1.690 + filecont = filecont + phrase
1.691 +
1.692 + return filecont
1.693 +
1.694 +
1.695 +#
1.696 +# make all whitespace into just one space
1.697 +# format the bibtex file into a usable form.
1.698 +#
1.699 +def bibtexwasher(filecont_source):
1.700 +
1.701 + space_rex = re.compile('\s+')
1.702 + comment_rex = re.compile('\s*%')
1.703 +
1.704 + filecont = []
1.705 +
1.706 + # remove trailing and excessive whitespace
1.707 + # ignore comments
1.708 + for line in filecont_source:
1.709 + line = string.strip(line)
1.710 + line = space_rex.sub(' ', line)
1.711 + # ignore comments
1.712 + if not comment_rex.match(line) and line != '':
1.713 + filecont.append(' '+ line)
1.714 +
1.715 + filecont = string.join(filecont, '')
1.716 +
1.717 + # the file is in one long string
1.718 +
1.719 + filecont = no_outer_parens(filecont)
1.720 +
1.721 + #
1.722 + # split lines according to preferred syntax scheme
1.723 + #
1.724 + filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
1.725 +
1.726 + # add new lines after commas that are after values
1.727 + filecont = re.sub('"\s*,', '",\n', filecont)
1.728 + filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
1.729 + filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
1.730 + '\n\n\g<1>\g<2>,\n', filecont)
1.731 +
1.732 + # add new lines after }
1.733 + filecont = re.sub('"\s*}','"\n}\n', filecont)
1.734 + filecont = re.sub('}\s*,','},\n', filecont)
1.735 +
1.736 +
1.737 + filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
1.738 +
1.739 + # character encoding, reserved latex characters
1.740 + filecont = re.sub('{\\\&}', '&', filecont)
1.741 + filecont = re.sub('\\\&', '&', filecont)
1.742 +
1.743 + # do checking for open braces to get format correct
1.744 + open_brace_count = 0
1.745 + brace_split = re.split('([{}])',filecont)
1.746 +
1.747 + # rebuild filecont
1.748 + filecont = ''
1.749 +
1.750 + for phrase in brace_split:
1.751 + if phrase == '{':
1.752 + open_brace_count = open_brace_count + 1
1.753 + elif phrase == '}':
1.754 + open_brace_count = open_brace_count - 1
1.755 + if open_brace_count == 0:
1.756 + filecont = filecont + '\n'
1.757 +
1.758 + filecont = filecont + phrase
1.759 +
1.760 + filecont2 = bibtex_replace_abbreviations(filecont)
1.761 +
1.762 + # gather
1.763 + filecont = filecont2.splitlines()
1.764 + i=0
1.765 + j=0 # count the number of blank lines
1.766 + for line in filecont:
1.767 + # ignore blank lines
1.768 + if line == '' or line == ' ':
1.769 + j = j+1
1.770 + continue
1.771 + filecont[i] = line + '\n'
1.772 + i = i+1
1.773 +
1.774 + # get rid of the extra stuff at the end of the array
1.775 + # (The extra stuff are duplicates that are in the array because
1.776 + # blank lines were removed.)
1.777 + length = len( filecont)
1.778 + filecont[length-j:length] = []
1.779 +
1.780 + return filecont
1.781 +
1.782 +
1.783 +def filehandler(filepath):
1.784 + try:
1.785 + fd = open(filepath, 'r')
1.786 + filecont_source = fd.readlines()
1.787 + fd.close()
1.788 + except:
1.789 + print 'Could not open file:', filepath
1.790 + washeddata = bibtexwasher(filecont_source)
1.791 + outdata = bibtexdecoder(washeddata)
1.792 + print '/**'
1.793 + print '\page references References'
1.794 + print
1.795 + for line in outdata:
1.796 + print line
1.797 + print '*/'
1.798 +
1.799 +
1.800 +# main program
1.801 +
1.802 +def main():
1.803 + import sys
1.804 + if sys.argv[1:]:
1.805 + filepath = sys.argv[1]
1.806 + else:
1.807 + print "No input file"
1.808 + sys.exit()
1.809 + filehandler(filepath)
1.810 +
1.811 +if __name__ == "__main__": main()
1.812 +
1.813 +
1.814 +# end python script