1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/scripts/bib2dox.py Tue Dec 20 18:15:14 2011 +0100
1.3 @@ -0,0 +1,816 @@
1.4 +#! /usr/bin/env python
1.5 +"""
1.6 + BibTeX to Doxygen converter
1.7 + Usage: python bib2dox.py bibfile.bib > bibfile.dox
1.8 +
1.9 + This file is a part of LEMON, a generic C++ optimization library.
1.10 +
1.11 + **********************************************************************
1.12 +
1.13 + This code is the modification of the BibTeX to XML converter
1.14 + by Vidar Bronken Gundersen et al.
1.15 + See the original copyright notices below.
1.16 +
1.17 + **********************************************************************
1.18 +
1.19 + Decoder for bibliographic data, BibTeX
1.20 + Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
1.21 +
1.22 + v.8
1.23 + (c)2002-06-23 Vidar Bronken Gundersen
1.24 + http://bibtexml.sf.net/
1.25 + Reuse approved as long as this notification is kept.
1.26 + Licence: GPL.
1.27 +
1.28 + Contributions/thanks to:
1.29 + Egon Willighagen, http://sf.net/projects/jreferences/
1.30 + Richard Mahoney (for providing a test case)
1.31 +
1.32 + Editted by Sara Sprenkle to be more robust and handle more bibtex features.
1.33 + (c) 2003-01-15
1.34 +
1.35 + 1. Changed bibtex: tags to bibxml: tags.
1.36 + 2. Use xmlns:bibxml="http://bibtexml.sf.net/"
1.37 + 3. Allow spaces between @type and first {
1.38 + 4. "author" fields with multiple authors split by " and "
1.39 + are put in separate xml "bibxml:author" tags.
1.40 + 5. Option for Titles: words are capitalized
1.41 + only if first letter in title or capitalized inside braces
1.42 + 6. Removes braces from within field values
1.43 + 7. Ignores comments in bibtex file (including @comment{ or % )
1.44 + 8. Replaces some special latex tags, e.g., replaces ~ with ' '
1.45 + 9. Handles bibtex @string abbreviations
1.46 + --> includes bibtex's default abbreviations for months
1.47 + --> does concatenation of abbr # " more " and " more " # abbr
1.48 + 10. Handles @type( ... ) or @type{ ... }
1.49 + 11. The keywords field is split on , or ; and put into separate xml
1.50 + "bibxml:keywords" tags
1.51 + 12. Ignores @preamble
1.52 +
1.53 + Known Limitations
1.54 + 1. Does not transform Latex encoding like math mode and special
1.55 + latex symbols.
1.56 + 2. Does not parse author fields into first and last names.
1.57 + E.g., It does not do anything special to an author whose name is
1.58 + in the form LAST_NAME, FIRST_NAME
1.59 + In "author" tag, will show up as
1.60 + <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
1.61 + 3. Does not handle "crossref" fields other than to print
1.62 + <bibxml:crossref>...</bibxml:crossref>
1.63 + 4. Does not inform user of the input's format errors. You just won't
1.64 + be able to transform the file later with XSL
1.65 +
1.66 + You will have to manually edit the XML output if you need to handle
1.67 + these (and unknown) limitations.
1.68 +
1.69 +"""
1.70 +
1.71 +import string, re
1.72 +
1.73 +# set of valid name characters
1.74 +valid_name_chars = '[\w\-:]'
1.75 +
1.76 +#
1.77 +# define global regular expression variables
1.78 +#
1.79 +author_rex = re.compile('\s+and\s+')
1.80 +rembraces_rex = re.compile('[{}]')
1.81 +capitalize_rex = re.compile('({[^}]*})')
1.82 +
1.83 +# used by bibtexkeywords(data)
1.84 +keywords_rex = re.compile('[,;]')
1.85 +
1.86 +# used by concat_line(line)
1.87 +concatsplit_rex = re.compile('\s*#\s*')
1.88 +
1.89 +# split on {, }, or " in verify_out_of_braces
1.90 +delimiter_rex = re.compile('([{}"])',re.I)
1.91 +
1.92 +field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
1.93 +data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
1.94 +
1.95 +url_rex = re.compile('\\\url\{([^}]*)\}')
1.96 +
1.97 +#
1.98 +# styles for html formatting
1.99 +#
1.100 +divstyle = 'margin-top: -4ex; margin-left: 8em;'
1.101 +
1.102 +#
1.103 +# return the string parameter without braces
1.104 +#
1.105 +def transformurls(str):
1.106 + return url_rex.sub(r'<a href="\1">\1</a>', str)
1.107 +
1.108 +#
1.109 +# return the string parameter without braces
1.110 +#
1.111 +def removebraces(str):
1.112 + return rembraces_rex.sub('', str)
1.113 +
1.114 +#
1.115 +# latex-specific replacements
1.116 +# (do this after braces were removed)
1.117 +#
1.118 +def latexreplacements(line):
1.119 + line = string.replace(line, '~', ' ')
1.120 + line = string.replace(line, '\\\'a', 'á')
1.121 + line = string.replace(line, '\\"a', 'ä')
1.122 + line = string.replace(line, '\\\'e', 'é')
1.123 + line = string.replace(line, '\\"e', 'ë')
1.124 + line = string.replace(line, '\\\'i', 'í')
1.125 + line = string.replace(line, '\\"i', 'ï')
1.126 + line = string.replace(line, '\\\'o', 'ó')
1.127 + line = string.replace(line, '\\"o', 'ö')
1.128 + line = string.replace(line, '\\\'u', 'ú')
1.129 + line = string.replace(line, '\\"u', 'ü')
1.130 + line = string.replace(line, '\\H o', 'õ')
1.131 + line = string.replace(line, '\\H u', 'ü') # ũ does not exist
1.132 + line = string.replace(line, '\\\'A', 'Á')
1.133 + line = string.replace(line, '\\"A', 'Ä')
1.134 + line = string.replace(line, '\\\'E', 'É')
1.135 + line = string.replace(line, '\\"E', 'Ë')
1.136 + line = string.replace(line, '\\\'I', 'Í')
1.137 + line = string.replace(line, '\\"I', 'Ï')
1.138 + line = string.replace(line, '\\\'O', 'Ó')
1.139 + line = string.replace(line, '\\"O', 'Ö')
1.140 + line = string.replace(line, '\\\'U', 'Ú')
1.141 + line = string.replace(line, '\\"U', 'Ü')
1.142 + line = string.replace(line, '\\H O', 'Õ')
1.143 + line = string.replace(line, '\\H U', 'Ü') # Ũ does not exist
1.144 +
1.145 + return line
1.146 +
1.147 +#
1.148 +# copy characters form a string decoding html expressions (&xyz;)
1.149 +#
1.150 +def copychars(str, ifrom, count):
1.151 + result = ''
1.152 + i = ifrom
1.153 + c = 0
1.154 + html_spec = False
1.155 + while (i < len(str)) and (c < count):
1.156 + if str[i] == '&':
1.157 + html_spec = True;
1.158 + if i+1 < len(str):
1.159 + result += str[i+1]
1.160 + c += 1
1.161 + i += 2
1.162 + else:
1.163 + if not html_spec:
1.164 + if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
1.165 + ((str[i] >= 'a') and (str[i] <= 'z')):
1.166 + result += str[i]
1.167 + c += 1
1.168 + elif str[i] == ';':
1.169 + html_spec = False;
1.170 + i += 1
1.171 +
1.172 + return result
1.173 +
1.174 +
1.175 +#
1.176 +# Handle a list of authors (separated by 'and').
1.177 +# It gives back an array of the follwing values:
1.178 +# - num: the number of authors,
1.179 +# - list: the list of the author names,
1.180 +# - text: the bibtex text (separated by commas and/or 'and')
1.181 +# - abbrev: abbreviation that can be used for indicate the
1.182 +# bibliography entries
1.183 +#
1.184 +def bibtexauthor(data):
1.185 + result = {}
1.186 + bibtex = ''
1.187 + result['list'] = author_rex.split(data)
1.188 + result['num'] = len(result['list'])
1.189 + for i, author in enumerate(result['list']):
1.190 + # general transformations
1.191 + author = latexreplacements(removebraces(author.strip()))
1.192 + # transform "Xyz, A. B." to "A. B. Xyz"
1.193 + pos = author.find(',')
1.194 + if pos != -1:
1.195 + author = author[pos+1:].strip() + ' ' + author[:pos].strip()
1.196 + result['list'][i] = author
1.197 + bibtex += author + '#'
1.198 + bibtex = bibtex[:-1]
1.199 + if result['num'] > 1:
1.200 + ix = bibtex.rfind('#')
1.201 + if result['num'] == 2:
1.202 + bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
1.203 + else:
1.204 + bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
1.205 + bibtex = bibtex.replace('#', ', ')
1.206 + result['text'] = bibtex
1.207 +
1.208 + result['abbrev'] = ''
1.209 + for author in result['list']:
1.210 + pos = author.rfind(' ') + 1
1.211 + count = 1
1.212 + if result['num'] == 1:
1.213 + count = 3
1.214 + result['abbrev'] += copychars(author, pos, count)
1.215 +
1.216 + return result
1.217 +
1.218 +
1.219 +#
1.220 +# data = title string
1.221 +# @return the capitalized title (first letter is capitalized), rest are capitalized
1.222 +# only if capitalized inside braces
1.223 +#
1.224 +def capitalizetitle(data):
1.225 + title_list = capitalize_rex.split(data)
1.226 + title = ''
1.227 + count = 0
1.228 + for phrase in title_list:
1.229 + check = string.lstrip(phrase)
1.230 +
1.231 + # keep phrase's capitalization the same
1.232 + if check.find('{') == 0:
1.233 + title += removebraces(phrase)
1.234 + else:
1.235 + # first word --> capitalize first letter (after spaces)
1.236 + if count == 0:
1.237 + title += check.capitalize()
1.238 + else:
1.239 + title += phrase.lower()
1.240 + count = count + 1
1.241 +
1.242 + return title
1.243 +
1.244 +
1.245 +#
1.246 +# @return the bibtex for the title
1.247 +# @param data --> title string
1.248 +# braces are removed from title
1.249 +#
1.250 +def bibtextitle(data, entrytype):
1.251 + if entrytype in ('book', 'inbook'):
1.252 + title = removebraces(data.strip())
1.253 + else:
1.254 + title = removebraces(capitalizetitle(data.strip()))
1.255 + bibtex = title
1.256 + return bibtex
1.257 +
1.258 +
1.259 +#
1.260 +# function to compare entry lists
1.261 +#
1.262 +def entry_cmp(x, y):
1.263 + return cmp(x[0], y[0])
1.264 +
1.265 +
1.266 +#
1.267 +# print the XML for the transformed "filecont_source"
1.268 +#
1.269 +def bibtexdecoder(filecont_source):
1.270 + filecont = []
1.271 + file = []
1.272 +
1.273 + # want @<alphanumeric chars><spaces>{<spaces><any chars>,
1.274 + pubtype_rex = re.compile('@(\w*)\s*{\s*(.*),')
1.275 + endtype_rex = re.compile('}\s*$')
1.276 + endtag_rex = re.compile('^\s*}\s*$')
1.277 +
1.278 + bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
1.279 + bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')
1.280 +
1.281 + quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
1.282 + quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')
1.283 +
1.284 + for line in filecont_source:
1.285 + line = line[:-1]
1.286 +
1.287 + # encode character entities
1.288 + line = string.replace(line, '&', '&')
1.289 + line = string.replace(line, '<', '<')
1.290 + line = string.replace(line, '>', '>')
1.291 +
1.292 + # start entry: publication type (store for later use)
1.293 + if pubtype_rex.match(line):
1.294 + # want @<alphanumeric chars><spaces>{<spaces><any chars>,
1.295 + entrycont = {}
1.296 + entry = []
1.297 + entrytype = pubtype_rex.sub('\g<1>',line)
1.298 + entrytype = string.lower(entrytype)
1.299 + entryid = pubtype_rex.sub('\g<2>', line)
1.300 +
1.301 + # end entry if just a }
1.302 + elif endtype_rex.match(line):
1.303 + # generate doxygen code for the entry
1.304 +
1.305 + # enty type related formattings
1.306 + if entrytype in ('book', 'inbook'):
1.307 + entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
1.308 + if not entrycont.has_key('author'):
1.309 + entrycont['author'] = entrycont['editor']
1.310 + entrycont['author']['text'] += ', editors'
1.311 + elif entrytype == 'article':
1.312 + entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
1.313 + elif entrytype in ('inproceedings', 'incollection', 'conference'):
1.314 + entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
1.315 + elif entrytype == 'techreport':
1.316 + if not entrycont.has_key('type'):
1.317 + entrycont['type'] = 'Technical report'
1.318 + elif entrytype == 'mastersthesis':
1.319 + entrycont['type'] = 'Master\'s thesis'
1.320 + elif entrytype == 'phdthesis':
1.321 + entrycont['type'] = 'PhD thesis'
1.322 +
1.323 + for eline in entrycont:
1.324 + if eline != '':
1.325 + eline = latexreplacements(eline)
1.326 +
1.327 + if entrycont.has_key('pages') and (entrycont['pages'] != ''):
1.328 + entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
1.329 +
1.330 + if entrycont.has_key('author') and (entrycont['author'] != ''):
1.331 + entry.append(entrycont['author']['text'] + '.')
1.332 + if entrycont.has_key('title') and (entrycont['title'] != ''):
1.333 + entry.append(entrycont['title'] + '.')
1.334 + if entrycont.has_key('journal') and (entrycont['journal'] != ''):
1.335 + entry.append(entrycont['journal'] + ',')
1.336 + if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
1.337 + entry.append('In ' + entrycont['booktitle'] + ',')
1.338 + if entrycont.has_key('type') and (entrycont['type'] != ''):
1.339 + eline = entrycont['type']
1.340 + if entrycont.has_key('number') and (entrycont['number'] != ''):
1.341 + eline += ' ' + entrycont['number']
1.342 + eline += ','
1.343 + entry.append(eline)
1.344 + if entrycont.has_key('institution') and (entrycont['institution'] != ''):
1.345 + entry.append(entrycont['institution'] + ',')
1.346 + if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
1.347 + entry.append(entrycont['publisher'] + ',')
1.348 + if entrycont.has_key('school') and (entrycont['school'] != ''):
1.349 + entry.append(entrycont['school'] + ',')
1.350 + if entrycont.has_key('address') and (entrycont['address'] != ''):
1.351 + entry.append(entrycont['address'] + ',')
1.352 + if entrycont.has_key('edition') and (entrycont['edition'] != ''):
1.353 + entry.append(entrycont['edition'] + ' edition,')
1.354 + if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
1.355 + entry.append(entrycont['howpublished'] + ',')
1.356 + if entrycont.has_key('volume') and (entrycont['volume'] != ''):
1.357 + eline = entrycont['volume'];
1.358 + if entrycont.has_key('number') and (entrycont['number'] != ''):
1.359 + eline += '(' + entrycont['number'] + ')'
1.360 + if entrycont.has_key('pages') and (entrycont['pages'] != ''):
1.361 + eline += ':' + entrycont['pages']
1.362 + eline += ','
1.363 + entry.append(eline)
1.364 + else:
1.365 + if entrycont.has_key('pages') and (entrycont['pages'] != ''):
1.366 + entry.append('pages ' + entrycont['pages'] + ',')
1.367 + if entrycont.has_key('year') and (entrycont['year'] != ''):
1.368 + if entrycont.has_key('month') and (entrycont['month'] != ''):
1.369 + entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
1.370 + else:
1.371 + entry.append(entrycont['year'] + '.')
1.372 + if entrycont.has_key('note') and (entrycont['note'] != ''):
1.373 + entry.append(entrycont['note'] + '.')
1.374 + if entrycont.has_key('url') and (entrycont['url'] != ''):
1.375 + entry.append(entrycont['url'] + '.')
1.376 +
1.377 + # generate keys for sorting and for the output
1.378 + sortkey = ''
1.379 + bibkey = ''
1.380 + if entrycont.has_key('author'):
1.381 + for author in entrycont['author']['list']:
1.382 + sortkey += copychars(author, author.rfind(' ')+1, len(author))
1.383 + bibkey = entrycont['author']['abbrev']
1.384 + else:
1.385 + bibkey = 'x'
1.386 + if entrycont.has_key('year'):
1.387 + sortkey += entrycont['year']
1.388 + bibkey += entrycont['year'][-2:]
1.389 + if entrycont.has_key('title'):
1.390 + sortkey += entrycont['title']
1.391 + if entrycont.has_key('key'):
1.392 + sortkey = entrycont['key'] + sortkey
1.393 + bibkey = entrycont['key']
1.394 + entry.insert(0, sortkey)
1.395 + entry.insert(1, bibkey)
1.396 + entry.insert(2, entryid)
1.397 +
1.398 + # add the entry to the file contents
1.399 + filecont.append(entry)
1.400 +
1.401 + else:
1.402 + # field, publication info
1.403 + field = ''
1.404 + data = ''
1.405 +
1.406 + # field = {data} entries
1.407 + if bracedata_rex.match(line):
1.408 + field = bracefield_rex.sub('\g<1>', line)
1.409 + field = string.lower(field)
1.410 + data = bracedata_rex.sub('\g<2>', line)
1.411 +
1.412 + # field = "data" entries
1.413 + elif quotedata_rex.match(line):
1.414 + field = quotefield_rex.sub('\g<1>', line)
1.415 + field = string.lower(field)
1.416 + data = quotedata_rex.sub('\g<2>', line)
1.417 +
1.418 + # field = data entries
1.419 + elif data_rex.match(line):
1.420 + field = field_rex.sub('\g<1>', line)
1.421 + field = string.lower(field)
1.422 + data = data_rex.sub('\g<2>', line)
1.423 +
1.424 + if field == 'url':
1.425 + data = '\\url{' + data.strip() + '}'
1.426 +
1.427 + if field in ('author', 'editor'):
1.428 + entrycont[field] = bibtexauthor(data)
1.429 + line = ''
1.430 + elif field == 'title':
1.431 + line = bibtextitle(data, entrytype)
1.432 + elif field != '':
1.433 + line = removebraces(transformurls(data.strip()))
1.434 +
1.435 + if line != '':
1.436 + line = latexreplacements(line)
1.437 + entrycont[field] = line
1.438 +
1.439 +
1.440 + # sort entries
1.441 + filecont.sort(entry_cmp)
1.442 +
1.443 + # count the bibtex keys
1.444 + keytable = {}
1.445 + counttable = {}
1.446 + for entry in filecont:
1.447 + bibkey = entry[1]
1.448 + if not keytable.has_key(bibkey):
1.449 + keytable[bibkey] = 1
1.450 + else:
1.451 + keytable[bibkey] += 1
1.452 +
1.453 + for bibkey in keytable.keys():
1.454 + counttable[bibkey] = 0
1.455 +
1.456 + # generate output
1.457 + for entry in filecont:
1.458 + # generate output key form the bibtex key
1.459 + bibkey = entry[1]
1.460 + entryid = entry[2]
1.461 + if keytable[bibkey] == 1:
1.462 + outkey = bibkey
1.463 + else:
1.464 + outkey = bibkey + chr(97 + counttable[bibkey])
1.465 + counttable[bibkey] += 1
1.466 +
1.467 + # append the entry code to the output
1.468 + file.append('\\section ' + entryid + ' [' + outkey + ']')
1.469 + file.append('<div style="' + divstyle + '">')
1.470 + for line in entry[3:]:
1.471 + file.append(line)
1.472 + file.append('</div>')
1.473 + file.append('')
1.474 +
1.475 + return file
1.476 +
1.477 +
1.478 +#
1.479 +# return 1 iff abbr is in line but not inside braces or quotes
1.480 +# assumes that abbr appears only once on the line (out of braces and quotes)
1.481 +#
1.482 +def verify_out_of_braces(line, abbr):
1.483 +
1.484 + phrase_split = delimiter_rex.split(line)
1.485 +
1.486 + abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
1.487 +
1.488 + open_brace = 0
1.489 + open_quote = 0
1.490 +
1.491 + for phrase in phrase_split:
1.492 + if phrase == "{":
1.493 + open_brace = open_brace + 1
1.494 + elif phrase == "}":
1.495 + open_brace = open_brace - 1
1.496 + elif phrase == '"':
1.497 + if open_quote == 1:
1.498 + open_quote = 0
1.499 + else:
1.500 + open_quote = 1
1.501 + elif abbr_rex.search(phrase):
1.502 + if open_brace == 0 and open_quote == 0:
1.503 + return 1
1.504 +
1.505 + return 0
1.506 +
1.507 +
1.508 +#
1.509 +# a line in the form phrase1 # phrase2 # ... # phrasen
1.510 +# is returned as phrase1 phrase2 ... phrasen
1.511 +# with the correct punctuation
1.512 +# Bug: Doesn't always work with multiple abbreviations plugged in
1.513 +#
1.514 +def concat_line(line):
1.515 + # only look at part after equals
1.516 + field = field_rex.sub('\g<1>',line)
1.517 + rest = field_rex.sub('\g<2>',line)
1.518 +
1.519 + concat_line = field + ' ='
1.520 +
1.521 + pound_split = concatsplit_rex.split(rest)
1.522 +
1.523 + phrase_count = 0
1.524 + length = len(pound_split)
1.525 +
1.526 + for phrase in pound_split:
1.527 + phrase = phrase.strip()
1.528 + if phrase_count != 0:
1.529 + if phrase.startswith('"') or phrase.startswith('{'):
1.530 + phrase = phrase[1:]
1.531 + elif phrase.startswith('"'):
1.532 + phrase = phrase.replace('"','{',1)
1.533 +
1.534 + if phrase_count != length-1:
1.535 + if phrase.endswith('"') or phrase.endswith('}'):
1.536 + phrase = phrase[:-1]
1.537 + else:
1.538 + if phrase.endswith('"'):
1.539 + phrase = phrase[:-1]
1.540 + phrase = phrase + "}"
1.541 + elif phrase.endswith('",'):
1.542 + phrase = phrase[:-2]
1.543 + phrase = phrase + "},"
1.544 +
1.545 + # if phrase did have \#, add the \# back
1.546 + if phrase.endswith('\\'):
1.547 + phrase = phrase + "#"
1.548 + concat_line = concat_line + ' ' + phrase
1.549 +
1.550 + phrase_count = phrase_count + 1
1.551 +
1.552 + return concat_line
1.553 +
1.554 +
1.555 +#
1.556 +# substitute abbreviations into filecont
1.557 +# @param filecont_source - string of data from file
1.558 +#
1.559 +def bibtex_replace_abbreviations(filecont_source):
1.560 + filecont = filecont_source.splitlines()
1.561 +
1.562 + # These are defined in bibtex, so we'll define them too
1.563 + abbr_list = ['jan','feb','mar','apr','may','jun',
1.564 + 'jul','aug','sep','oct','nov','dec']
1.565 + value_list = ['January','February','March','April',
1.566 + 'May','June','July','August','September',
1.567 + 'October','November','December']
1.568 +
1.569 + abbr_rex = []
1.570 + total_abbr_count = 0
1.571 +
1.572 + front = '\\b'
1.573 + back = '(,?)\\b'
1.574 +
1.575 + for x in abbr_list:
1.576 + abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
1.577 + total_abbr_count = total_abbr_count + 1
1.578 +
1.579 +
1.580 + abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
1.581 + re.I)
1.582 +
1.583 + comment_rex = re.compile('@comment\s*{',re.I)
1.584 + preamble_rex = re.compile('@preamble\s*{',re.I)
1.585 +
1.586 + waiting_for_end_string = 0
1.587 + i = 0
1.588 + filecont2 = ''
1.589 +
1.590 + for line in filecont:
1.591 + if line == ' ' or line == '':
1.592 + continue
1.593 +
1.594 + if waiting_for_end_string:
1.595 + if re.search('}',line):
1.596 + waiting_for_end_string = 0
1.597 + continue
1.598 +
1.599 + if abbrdef_rex.search(line):
1.600 + abbr = abbrdef_rex.sub('\g<1>', line)
1.601 +
1.602 + if abbr_list.count(abbr) == 0:
1.603 + val = abbrdef_rex.sub('\g<2>', line)
1.604 + abbr_list.append(abbr)
1.605 + value_list.append(string.strip(val))
1.606 + abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
1.607 + total_abbr_count = total_abbr_count + 1
1.608 + waiting_for_end_string = 1
1.609 + continue
1.610 +
1.611 + if comment_rex.search(line):
1.612 + waiting_for_end_string = 1
1.613 + continue
1.614 +
1.615 + if preamble_rex.search(line):
1.616 + waiting_for_end_string = 1
1.617 + continue
1.618 +
1.619 +
1.620 + # replace subsequent abbreviations with the value
1.621 + abbr_count = 0
1.622 +
1.623 + for x in abbr_list:
1.624 +
1.625 + if abbr_rex[abbr_count].search(line):
1.626 + if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
1.627 + line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
1.628 + # Check for # concatenations
1.629 + if concatsplit_rex.search(line):
1.630 + line = concat_line(line)
1.631 + abbr_count = abbr_count + 1
1.632 +
1.633 +
1.634 + filecont2 = filecont2 + line + '\n'
1.635 + i = i+1
1.636 +
1.637 +
1.638 + # Do one final pass over file
1.639 +
1.640 + # make sure that didn't end up with {" or }" after the substitution
1.641 + filecont2 = filecont2.replace('{"','{{')
1.642 + filecont2 = filecont2.replace('"}','}}')
1.643 +
1.644 + afterquotevalue_rex = re.compile('"\s*,\s*')
1.645 + afterbrace_rex = re.compile('"\s*}')
1.646 + afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
1.647 +
1.648 + # add new lines to data that changed because of abbreviation substitutions
1.649 + filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
1.650 + filecont2 = afterbrace_rex.sub('"\n}', filecont2)
1.651 + filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
1.652 +
1.653 + return filecont2
1.654 +
1.655 +#
1.656 +# convert @type( ... ) to @type{ ... }
1.657 +#
1.658 +def no_outer_parens(filecont):
1.659 +
1.660 + # do checking for open parens
1.661 + # will convert to braces
1.662 + paren_split = re.split('([(){}])',filecont)
1.663 +
1.664 + open_paren_count = 0
1.665 + open_type = 0
1.666 + look_next = 0
1.667 +
1.668 + # rebuild filecont
1.669 + filecont = ''
1.670 +
1.671 + at_rex = re.compile('@\w*')
1.672 +
1.673 + for phrase in paren_split:
1.674 + if look_next == 1:
1.675 + if phrase == '(':
1.676 + phrase = '{'
1.677 + open_paren_count = open_paren_count + 1
1.678 + else:
1.679 + open_type = 0
1.680 + look_next = 0
1.681 +
1.682 + if phrase == '(':
1.683 + open_paren_count = open_paren_count + 1
1.684 +
1.685 + elif phrase == ')':
1.686 + open_paren_count = open_paren_count - 1
1.687 + if open_type == 1 and open_paren_count == 0:
1.688 + phrase = '}'
1.689 + open_type = 0
1.690 +
1.691 + elif at_rex.search( phrase ):
1.692 + open_type = 1
1.693 + look_next = 1
1.694 +
1.695 + filecont = filecont + phrase
1.696 +
1.697 + return filecont
1.698 +
1.699 +
1.700 +#
1.701 +# make all whitespace into just one space
1.702 +# format the bibtex file into a usable form.
1.703 +#
1.704 +def bibtexwasher(filecont_source):
1.705 +
1.706 + space_rex = re.compile('\s+')
1.707 + comment_rex = re.compile('\s*%')
1.708 +
1.709 + filecont = []
1.710 +
1.711 + # remove trailing and excessive whitespace
1.712 + # ignore comments
1.713 + for line in filecont_source:
1.714 + line = string.strip(line)
1.715 + line = space_rex.sub(' ', line)
1.716 + # ignore comments
1.717 + if not comment_rex.match(line) and line != '':
1.718 + filecont.append(' '+ line)
1.719 +
1.720 + filecont = string.join(filecont, '')
1.721 +
1.722 + # the file is in one long string
1.723 +
1.724 + filecont = no_outer_parens(filecont)
1.725 +
1.726 + #
1.727 + # split lines according to preferred syntax scheme
1.728 + #
1.729 + filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
1.730 +
1.731 + # add new lines after commas that are after values
1.732 + filecont = re.sub('"\s*,', '",\n', filecont)
1.733 + filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
1.734 + filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
1.735 + '\n\n\g<1>\g<2>,\n', filecont)
1.736 +
1.737 + # add new lines after }
1.738 + filecont = re.sub('"\s*}','"\n}\n', filecont)
1.739 + filecont = re.sub('}\s*,','},\n', filecont)
1.740 +
1.741 +
1.742 + filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
1.743 +
1.744 + # character encoding, reserved latex characters
1.745 + filecont = re.sub('{\\\&}', '&', filecont)
1.746 + filecont = re.sub('\\\&', '&', filecont)
1.747 +
1.748 + # do checking for open braces to get format correct
1.749 + open_brace_count = 0
1.750 + brace_split = re.split('([{}])',filecont)
1.751 +
1.752 + # rebuild filecont
1.753 + filecont = ''
1.754 +
1.755 + for phrase in brace_split:
1.756 + if phrase == '{':
1.757 + open_brace_count = open_brace_count + 1
1.758 + elif phrase == '}':
1.759 + open_brace_count = open_brace_count - 1
1.760 + if open_brace_count == 0:
1.761 + filecont = filecont + '\n'
1.762 +
1.763 + filecont = filecont + phrase
1.764 +
1.765 + filecont2 = bibtex_replace_abbreviations(filecont)
1.766 +
1.767 + # gather
1.768 + filecont = filecont2.splitlines()
1.769 + i=0
1.770 + j=0 # count the number of blank lines
1.771 + for line in filecont:
1.772 + # ignore blank lines
1.773 + if line == '' or line == ' ':
1.774 + j = j+1
1.775 + continue
1.776 + filecont[i] = line + '\n'
1.777 + i = i+1
1.778 +
1.779 + # get rid of the extra stuff at the end of the array
1.780 + # (The extra stuff are duplicates that are in the array because
1.781 + # blank lines were removed.)
1.782 + length = len( filecont)
1.783 + filecont[length-j:length] = []
1.784 +
1.785 + return filecont
1.786 +
1.787 +
1.788 +def filehandler(filepath):
1.789 + try:
1.790 + fd = open(filepath, 'r')
1.791 + filecont_source = fd.readlines()
1.792 + fd.close()
1.793 + except:
1.794 + print 'Could not open file:', filepath
1.795 + washeddata = bibtexwasher(filecont_source)
1.796 + outdata = bibtexdecoder(washeddata)
1.797 + print '/**'
1.798 + print '\page references References'
1.799 + print
1.800 + for line in outdata:
1.801 + print line
1.802 + print '*/'
1.803 +
1.804 +
1.805 +# main program
1.806 +
1.807 +def main():
1.808 + import sys
1.809 + if sys.argv[1:]:
1.810 + filepath = sys.argv[1]
1.811 + else:
1.812 + print "No input file"
1.813 + sys.exit()
1.814 + filehandler(filepath)
1.815 +
1.816 +if __name__ == "__main__": main()
1.817 +
1.818 +
1.819 +# end python script