1.1 --- a/scripts/bib2dox.py Mon Jul 16 16:21:40 2018 +0200
1.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
1.3 @@ -1,816 +0,0 @@
1.4 -#! /usr/bin/env python
1.5 -"""
1.6 - BibTeX to Doxygen converter
1.7 - Usage: python bib2dox.py bibfile.bib > bibfile.dox
1.8 -
1.9 - This file is a part of LEMON, a generic C++ optimization library.
1.10 -
1.11 - **********************************************************************
1.12 -
1.13 - This code is the modification of the BibTeX to XML converter
1.14 - by Vidar Bronken Gundersen et al.
1.15 - See the original copyright notices below.
1.16 -
1.17 - **********************************************************************
1.18 -
1.19 - Decoder for bibliographic data, BibTeX
1.20 - Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
1.21 -
1.22 - v.8
1.23 - (c)2002-06-23 Vidar Bronken Gundersen
1.24 - http://bibtexml.sf.net/
1.25 - Reuse approved as long as this notification is kept.
1.26 - Licence: GPL.
1.27 -
1.28 - Contributions/thanks to:
1.29 - Egon Willighagen, http://sf.net/projects/jreferences/
1.30 - Richard Mahoney (for providing a test case)
1.31 -
1.32 - Editted by Sara Sprenkle to be more robust and handle more bibtex features.
1.33 - (c) 2003-01-15
1.34 -
1.35 - 1. Changed bibtex: tags to bibxml: tags.
1.36 - 2. Use xmlns:bibxml="http://bibtexml.sf.net/"
1.37 - 3. Allow spaces between @type and first {
1.38 - 4. "author" fields with multiple authors split by " and "
1.39 - are put in separate xml "bibxml:author" tags.
1.40 - 5. Option for Titles: words are capitalized
1.41 - only if first letter in title or capitalized inside braces
1.42 - 6. Removes braces from within field values
1.43 - 7. Ignores comments in bibtex file (including @comment{ or % )
1.44 - 8. Replaces some special latex tags, e.g., replaces ~ with ' '
1.45 - 9. Handles bibtex @string abbreviations
1.46 - --> includes bibtex's default abbreviations for months
1.47 - --> does concatenation of abbr # " more " and " more " # abbr
1.48 - 10. Handles @type( ... ) or @type{ ... }
1.49 - 11. The keywords field is split on , or ; and put into separate xml
1.50 - "bibxml:keywords" tags
1.51 - 12. Ignores @preamble
1.52 -
1.53 - Known Limitations
1.54 - 1. Does not transform Latex encoding like math mode and special
1.55 - latex symbols.
1.56 - 2. Does not parse author fields into first and last names.
1.57 - E.g., It does not do anything special to an author whose name is
1.58 - in the form LAST_NAME, FIRST_NAME
1.59 - In "author" tag, will show up as
1.60 - <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
1.61 - 3. Does not handle "crossref" fields other than to print
1.62 - <bibxml:crossref>...</bibxml:crossref>
1.63 - 4. Does not inform user of the input's format errors. You just won't
1.64 - be able to transform the file later with XSL
1.65 -
1.66 - You will have to manually edit the XML output if you need to handle
1.67 - these (and unknown) limitations.
1.68 -
1.69 -"""
1.70 -
1.71 -import string, re
1.72 -
1.73 -# set of valid name characters
1.74 -valid_name_chars = '[\w\-:]'
1.75 -
1.76 -#
1.77 -# define global regular expression variables
1.78 -#
1.79 -author_rex = re.compile('\s+and\s+')
1.80 -rembraces_rex = re.compile('[{}]')
1.81 -capitalize_rex = re.compile('({[^}]*})')
1.82 -
1.83 -# used by bibtexkeywords(data)
1.84 -keywords_rex = re.compile('[,;]')
1.85 -
1.86 -# used by concat_line(line)
1.87 -concatsplit_rex = re.compile('\s*#\s*')
1.88 -
1.89 -# split on {, }, or " in verify_out_of_braces
1.90 -delimiter_rex = re.compile('([{}"])',re.I)
1.91 -
1.92 -field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
1.93 -data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
1.94 -
1.95 -url_rex = re.compile('\\\url\{([^}]*)\}')
1.96 -
1.97 -#
1.98 -# styles for html formatting
1.99 -#
1.100 -divstyle = 'margin-top: -4ex; margin-left: 8em;'
1.101 -
1.102 -#
1.103 -# return the string parameter without braces
1.104 -#
1.105 -def transformurls(str):
1.106 - return url_rex.sub(r'<a href="\1">\1</a>', str)
1.107 -
1.108 -#
1.109 -# return the string parameter without braces
1.110 -#
1.111 -def removebraces(str):
1.112 - return rembraces_rex.sub('', str)
1.113 -
1.114 -#
1.115 -# latex-specific replacements
1.116 -# (do this after braces were removed)
1.117 -#
1.118 -def latexreplacements(line):
1.119 - line = string.replace(line, '~', ' ')
1.120 - line = string.replace(line, '\\\'a', 'á')
1.121 - line = string.replace(line, '\\"a', 'ä')
1.122 - line = string.replace(line, '\\\'e', 'é')
1.123 - line = string.replace(line, '\\"e', 'ë')
1.124 - line = string.replace(line, '\\\'i', 'í')
1.125 - line = string.replace(line, '\\"i', 'ï')
1.126 - line = string.replace(line, '\\\'o', 'ó')
1.127 - line = string.replace(line, '\\"o', 'ö')
1.128 - line = string.replace(line, '\\\'u', 'ú')
1.129 - line = string.replace(line, '\\"u', 'ü')
1.130 - line = string.replace(line, '\\H o', 'õ')
1.131 - line = string.replace(line, '\\H u', 'ü') # ũ does not exist
1.132 - line = string.replace(line, '\\\'A', 'Á')
1.133 - line = string.replace(line, '\\"A', 'Ä')
1.134 - line = string.replace(line, '\\\'E', 'É')
1.135 - line = string.replace(line, '\\"E', 'Ë')
1.136 - line = string.replace(line, '\\\'I', 'Í')
1.137 - line = string.replace(line, '\\"I', 'Ï')
1.138 - line = string.replace(line, '\\\'O', 'Ó')
1.139 - line = string.replace(line, '\\"O', 'Ö')
1.140 - line = string.replace(line, '\\\'U', 'Ú')
1.141 - line = string.replace(line, '\\"U', 'Ü')
1.142 - line = string.replace(line, '\\H O', 'Õ')
1.143 - line = string.replace(line, '\\H U', 'Ü') # Ũ does not exist
1.144 -
1.145 - return line
1.146 -
1.147 -#
1.148 -# copy characters form a string decoding html expressions (&xyz;)
1.149 -#
1.150 -def copychars(str, ifrom, count):
1.151 - result = ''
1.152 - i = ifrom
1.153 - c = 0
1.154 - html_spec = False
1.155 - while (i < len(str)) and (c < count):
1.156 - if str[i] == '&':
1.157 - html_spec = True;
1.158 - if i+1 < len(str):
1.159 - result += str[i+1]
1.160 - c += 1
1.161 - i += 2
1.162 - else:
1.163 - if not html_spec:
1.164 - if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
1.165 - ((str[i] >= 'a') and (str[i] <= 'z')):
1.166 - result += str[i]
1.167 - c += 1
1.168 - elif str[i] == ';':
1.169 - html_spec = False;
1.170 - i += 1
1.171 -
1.172 - return result
1.173 -
1.174 -
1.175 -#
1.176 -# Handle a list of authors (separated by 'and').
1.177 -# It gives back an array of the follwing values:
1.178 -# - num: the number of authors,
1.179 -# - list: the list of the author names,
1.180 -# - text: the bibtex text (separated by commas and/or 'and')
1.181 -# - abbrev: abbreviation that can be used for indicate the
1.182 -# bibliography entries
1.183 -#
1.184 -def bibtexauthor(data):
1.185 - result = {}
1.186 - bibtex = ''
1.187 - result['list'] = author_rex.split(data)
1.188 - result['num'] = len(result['list'])
1.189 - for i, author in enumerate(result['list']):
1.190 - # general transformations
1.191 - author = latexreplacements(removebraces(author.strip()))
1.192 - # transform "Xyz, A. B." to "A. B. Xyz"
1.193 - pos = author.find(',')
1.194 - if pos != -1:
1.195 - author = author[pos+1:].strip() + ' ' + author[:pos].strip()
1.196 - result['list'][i] = author
1.197 - bibtex += author + '#'
1.198 - bibtex = bibtex[:-1]
1.199 - if result['num'] > 1:
1.200 - ix = bibtex.rfind('#')
1.201 - if result['num'] == 2:
1.202 - bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
1.203 - else:
1.204 - bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
1.205 - bibtex = bibtex.replace('#', ', ')
1.206 - result['text'] = bibtex
1.207 -
1.208 - result['abbrev'] = ''
1.209 - for author in result['list']:
1.210 - pos = author.rfind(' ') + 1
1.211 - count = 1
1.212 - if result['num'] == 1:
1.213 - count = 3
1.214 - result['abbrev'] += copychars(author, pos, count)
1.215 -
1.216 - return result
1.217 -
1.218 -
1.219 -#
1.220 -# data = title string
1.221 -# @return the capitalized title (first letter is capitalized), rest are capitalized
1.222 -# only if capitalized inside braces
1.223 -#
1.224 -def capitalizetitle(data):
1.225 - title_list = capitalize_rex.split(data)
1.226 - title = ''
1.227 - count = 0
1.228 - for phrase in title_list:
1.229 - check = string.lstrip(phrase)
1.230 -
1.231 - # keep phrase's capitalization the same
1.232 - if check.find('{') == 0:
1.233 - title += removebraces(phrase)
1.234 - else:
1.235 - # first word --> capitalize first letter (after spaces)
1.236 - if count == 0:
1.237 - title += check.capitalize()
1.238 - else:
1.239 - title += phrase.lower()
1.240 - count = count + 1
1.241 -
1.242 - return title
1.243 -
1.244 -
1.245 -#
1.246 -# @return the bibtex for the title
1.247 -# @param data --> title string
1.248 -# braces are removed from title
1.249 -#
1.250 -def bibtextitle(data, entrytype):
1.251 - if entrytype in ('book', 'inbook'):
1.252 - title = removebraces(data.strip())
1.253 - else:
1.254 - title = removebraces(capitalizetitle(data.strip()))
1.255 - bibtex = title
1.256 - return bibtex
1.257 -
1.258 -
1.259 -#
1.260 -# function to compare entry lists
1.261 -#
1.262 -def entry_cmp(x, y):
1.263 - return cmp(x[0], y[0])
1.264 -
1.265 -
1.266 -#
1.267 -# print the XML for the transformed "filecont_source"
1.268 -#
1.269 -def bibtexdecoder(filecont_source):
1.270 - filecont = []
1.271 - file = []
1.272 -
1.273 - # want @<alphanumeric chars><spaces>{<spaces><any chars>,
1.274 - pubtype_rex = re.compile('@(\w*)\s*{\s*(.*),')
1.275 - endtype_rex = re.compile('}\s*$')
1.276 - endtag_rex = re.compile('^\s*}\s*$')
1.277 -
1.278 - bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
1.279 - bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')
1.280 -
1.281 - quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
1.282 - quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')
1.283 -
1.284 - for line in filecont_source:
1.285 - line = line[:-1]
1.286 -
1.287 - # encode character entities
1.288 - line = string.replace(line, '&', '&')
1.289 - line = string.replace(line, '<', '<')
1.290 - line = string.replace(line, '>', '>')
1.291 -
1.292 - # start entry: publication type (store for later use)
1.293 - if pubtype_rex.match(line):
1.294 - # want @<alphanumeric chars><spaces>{<spaces><any chars>,
1.295 - entrycont = {}
1.296 - entry = []
1.297 - entrytype = pubtype_rex.sub('\g<1>',line)
1.298 - entrytype = string.lower(entrytype)
1.299 - entryid = pubtype_rex.sub('\g<2>', line)
1.300 -
1.301 - # end entry if just a }
1.302 - elif endtype_rex.match(line):
1.303 - # generate doxygen code for the entry
1.304 -
1.305 - # enty type related formattings
1.306 - if entrytype in ('book', 'inbook'):
1.307 - entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
1.308 - if not entrycont.has_key('author'):
1.309 - entrycont['author'] = entrycont['editor']
1.310 - entrycont['author']['text'] += ', editors'
1.311 - elif entrytype == 'article':
1.312 - entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
1.313 - elif entrytype in ('inproceedings', 'incollection', 'conference'):
1.314 - entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
1.315 - elif entrytype == 'techreport':
1.316 - if not entrycont.has_key('type'):
1.317 - entrycont['type'] = 'Technical report'
1.318 - elif entrytype == 'mastersthesis':
1.319 - entrycont['type'] = 'Master\'s thesis'
1.320 - elif entrytype == 'phdthesis':
1.321 - entrycont['type'] = 'PhD thesis'
1.322 -
1.323 - for eline in entrycont:
1.324 - if eline != '':
1.325 - eline = latexreplacements(eline)
1.326 -
1.327 - if entrycont.has_key('pages') and (entrycont['pages'] != ''):
1.328 - entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
1.329 -
1.330 - if entrycont.has_key('author') and (entrycont['author'] != ''):
1.331 - entry.append(entrycont['author']['text'] + '.')
1.332 - if entrycont.has_key('title') and (entrycont['title'] != ''):
1.333 - entry.append(entrycont['title'] + '.')
1.334 - if entrycont.has_key('journal') and (entrycont['journal'] != ''):
1.335 - entry.append(entrycont['journal'] + ',')
1.336 - if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
1.337 - entry.append('In ' + entrycont['booktitle'] + ',')
1.338 - if entrycont.has_key('type') and (entrycont['type'] != ''):
1.339 - eline = entrycont['type']
1.340 - if entrycont.has_key('number') and (entrycont['number'] != ''):
1.341 - eline += ' ' + entrycont['number']
1.342 - eline += ','
1.343 - entry.append(eline)
1.344 - if entrycont.has_key('institution') and (entrycont['institution'] != ''):
1.345 - entry.append(entrycont['institution'] + ',')
1.346 - if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
1.347 - entry.append(entrycont['publisher'] + ',')
1.348 - if entrycont.has_key('school') and (entrycont['school'] != ''):
1.349 - entry.append(entrycont['school'] + ',')
1.350 - if entrycont.has_key('address') and (entrycont['address'] != ''):
1.351 - entry.append(entrycont['address'] + ',')
1.352 - if entrycont.has_key('edition') and (entrycont['edition'] != ''):
1.353 - entry.append(entrycont['edition'] + ' edition,')
1.354 - if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
1.355 - entry.append(entrycont['howpublished'] + ',')
1.356 - if entrycont.has_key('volume') and (entrycont['volume'] != ''):
1.357 - eline = entrycont['volume'];
1.358 - if entrycont.has_key('number') and (entrycont['number'] != ''):
1.359 - eline += '(' + entrycont['number'] + ')'
1.360 - if entrycont.has_key('pages') and (entrycont['pages'] != ''):
1.361 - eline += ':' + entrycont['pages']
1.362 - eline += ','
1.363 - entry.append(eline)
1.364 - else:
1.365 - if entrycont.has_key('pages') and (entrycont['pages'] != ''):
1.366 - entry.append('pages ' + entrycont['pages'] + ',')
1.367 - if entrycont.has_key('year') and (entrycont['year'] != ''):
1.368 - if entrycont.has_key('month') and (entrycont['month'] != ''):
1.369 - entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
1.370 - else:
1.371 - entry.append(entrycont['year'] + '.')
1.372 - if entrycont.has_key('note') and (entrycont['note'] != ''):
1.373 - entry.append(entrycont['note'] + '.')
1.374 - if entrycont.has_key('url') and (entrycont['url'] != ''):
1.375 - entry.append(entrycont['url'] + '.')
1.376 -
1.377 - # generate keys for sorting and for the output
1.378 - sortkey = ''
1.379 - bibkey = ''
1.380 - if entrycont.has_key('author'):
1.381 - for author in entrycont['author']['list']:
1.382 - sortkey += copychars(author, author.rfind(' ')+1, len(author))
1.383 - bibkey = entrycont['author']['abbrev']
1.384 - else:
1.385 - bibkey = 'x'
1.386 - if entrycont.has_key('year'):
1.387 - sortkey += entrycont['year']
1.388 - bibkey += entrycont['year'][-2:]
1.389 - if entrycont.has_key('title'):
1.390 - sortkey += entrycont['title']
1.391 - if entrycont.has_key('key'):
1.392 - sortkey = entrycont['key'] + sortkey
1.393 - bibkey = entrycont['key']
1.394 - entry.insert(0, sortkey)
1.395 - entry.insert(1, bibkey)
1.396 - entry.insert(2, entryid)
1.397 -
1.398 - # add the entry to the file contents
1.399 - filecont.append(entry)
1.400 -
1.401 - else:
1.402 - # field, publication info
1.403 - field = ''
1.404 - data = ''
1.405 -
1.406 - # field = {data} entries
1.407 - if bracedata_rex.match(line):
1.408 - field = bracefield_rex.sub('\g<1>', line)
1.409 - field = string.lower(field)
1.410 - data = bracedata_rex.sub('\g<2>', line)
1.411 -
1.412 - # field = "data" entries
1.413 - elif quotedata_rex.match(line):
1.414 - field = quotefield_rex.sub('\g<1>', line)
1.415 - field = string.lower(field)
1.416 - data = quotedata_rex.sub('\g<2>', line)
1.417 -
1.418 - # field = data entries
1.419 - elif data_rex.match(line):
1.420 - field = field_rex.sub('\g<1>', line)
1.421 - field = string.lower(field)
1.422 - data = data_rex.sub('\g<2>', line)
1.423 -
1.424 - if field == 'url':
1.425 - data = '\\url{' + data.strip() + '}'
1.426 -
1.427 - if field in ('author', 'editor'):
1.428 - entrycont[field] = bibtexauthor(data)
1.429 - line = ''
1.430 - elif field == 'title':
1.431 - line = bibtextitle(data, entrytype)
1.432 - elif field != '':
1.433 - line = removebraces(transformurls(data.strip()))
1.434 -
1.435 - if line != '':
1.436 - line = latexreplacements(line)
1.437 - entrycont[field] = line
1.438 -
1.439 -
1.440 - # sort entries
1.441 - filecont.sort(entry_cmp)
1.442 -
1.443 - # count the bibtex keys
1.444 - keytable = {}
1.445 - counttable = {}
1.446 - for entry in filecont:
1.447 - bibkey = entry[1]
1.448 - if not keytable.has_key(bibkey):
1.449 - keytable[bibkey] = 1
1.450 - else:
1.451 - keytable[bibkey] += 1
1.452 -
1.453 - for bibkey in keytable.keys():
1.454 - counttable[bibkey] = 0
1.455 -
1.456 - # generate output
1.457 - for entry in filecont:
1.458 - # generate output key form the bibtex key
1.459 - bibkey = entry[1]
1.460 - entryid = entry[2]
1.461 - if keytable[bibkey] == 1:
1.462 - outkey = bibkey
1.463 - else:
1.464 - outkey = bibkey + chr(97 + counttable[bibkey])
1.465 - counttable[bibkey] += 1
1.466 -
1.467 - # append the entry code to the output
1.468 - file.append('\\section ' + entryid + ' [' + outkey + ']')
1.469 - file.append('<div style="' + divstyle + '">')
1.470 - for line in entry[3:]:
1.471 - file.append(line)
1.472 - file.append('</div>')
1.473 - file.append('')
1.474 -
1.475 - return file
1.476 -
1.477 -
1.478 -#
1.479 -# return 1 iff abbr is in line but not inside braces or quotes
1.480 -# assumes that abbr appears only once on the line (out of braces and quotes)
1.481 -#
1.482 -def verify_out_of_braces(line, abbr):
1.483 -
1.484 - phrase_split = delimiter_rex.split(line)
1.485 -
1.486 - abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
1.487 -
1.488 - open_brace = 0
1.489 - open_quote = 0
1.490 -
1.491 - for phrase in phrase_split:
1.492 - if phrase == "{":
1.493 - open_brace = open_brace + 1
1.494 - elif phrase == "}":
1.495 - open_brace = open_brace - 1
1.496 - elif phrase == '"':
1.497 - if open_quote == 1:
1.498 - open_quote = 0
1.499 - else:
1.500 - open_quote = 1
1.501 - elif abbr_rex.search(phrase):
1.502 - if open_brace == 0 and open_quote == 0:
1.503 - return 1
1.504 -
1.505 - return 0
1.506 -
1.507 -
1.508 -#
1.509 -# a line in the form phrase1 # phrase2 # ... # phrasen
1.510 -# is returned as phrase1 phrase2 ... phrasen
1.511 -# with the correct punctuation
1.512 -# Bug: Doesn't always work with multiple abbreviations plugged in
1.513 -#
1.514 -def concat_line(line):
1.515 - # only look at part after equals
1.516 - field = field_rex.sub('\g<1>',line)
1.517 - rest = field_rex.sub('\g<2>',line)
1.518 -
1.519 - concat_line = field + ' ='
1.520 -
1.521 - pound_split = concatsplit_rex.split(rest)
1.522 -
1.523 - phrase_count = 0
1.524 - length = len(pound_split)
1.525 -
1.526 - for phrase in pound_split:
1.527 - phrase = phrase.strip()
1.528 - if phrase_count != 0:
1.529 - if phrase.startswith('"') or phrase.startswith('{'):
1.530 - phrase = phrase[1:]
1.531 - elif phrase.startswith('"'):
1.532 - phrase = phrase.replace('"','{',1)
1.533 -
1.534 - if phrase_count != length-1:
1.535 - if phrase.endswith('"') or phrase.endswith('}'):
1.536 - phrase = phrase[:-1]
1.537 - else:
1.538 - if phrase.endswith('"'):
1.539 - phrase = phrase[:-1]
1.540 - phrase = phrase + "}"
1.541 - elif phrase.endswith('",'):
1.542 - phrase = phrase[:-2]
1.543 - phrase = phrase + "},"
1.544 -
1.545 - # if phrase did have \#, add the \# back
1.546 - if phrase.endswith('\\'):
1.547 - phrase = phrase + "#"
1.548 - concat_line = concat_line + ' ' + phrase
1.549 -
1.550 - phrase_count = phrase_count + 1
1.551 -
1.552 - return concat_line
1.553 -
1.554 -
1.555 -#
1.556 -# substitute abbreviations into filecont
1.557 -# @param filecont_source - string of data from file
1.558 -#
1.559 -def bibtex_replace_abbreviations(filecont_source):
1.560 - filecont = filecont_source.splitlines()
1.561 -
1.562 - # These are defined in bibtex, so we'll define them too
1.563 - abbr_list = ['jan','feb','mar','apr','may','jun',
1.564 - 'jul','aug','sep','oct','nov','dec']
1.565 - value_list = ['January','February','March','April',
1.566 - 'May','June','July','August','September',
1.567 - 'October','November','December']
1.568 -
1.569 - abbr_rex = []
1.570 - total_abbr_count = 0
1.571 -
1.572 - front = '\\b'
1.573 - back = '(,?)\\b'
1.574 -
1.575 - for x in abbr_list:
1.576 - abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
1.577 - total_abbr_count = total_abbr_count + 1
1.578 -
1.579 -
1.580 - abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
1.581 - re.I)
1.582 -
1.583 - comment_rex = re.compile('@comment\s*{',re.I)
1.584 - preamble_rex = re.compile('@preamble\s*{',re.I)
1.585 -
1.586 - waiting_for_end_string = 0
1.587 - i = 0
1.588 - filecont2 = ''
1.589 -
1.590 - for line in filecont:
1.591 - if line == ' ' or line == '':
1.592 - continue
1.593 -
1.594 - if waiting_for_end_string:
1.595 - if re.search('}',line):
1.596 - waiting_for_end_string = 0
1.597 - continue
1.598 -
1.599 - if abbrdef_rex.search(line):
1.600 - abbr = abbrdef_rex.sub('\g<1>', line)
1.601 -
1.602 - if abbr_list.count(abbr) == 0:
1.603 - val = abbrdef_rex.sub('\g<2>', line)
1.604 - abbr_list.append(abbr)
1.605 - value_list.append(string.strip(val))
1.606 - abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
1.607 - total_abbr_count = total_abbr_count + 1
1.608 - waiting_for_end_string = 1
1.609 - continue
1.610 -
1.611 - if comment_rex.search(line):
1.612 - waiting_for_end_string = 1
1.613 - continue
1.614 -
1.615 - if preamble_rex.search(line):
1.616 - waiting_for_end_string = 1
1.617 - continue
1.618 -
1.619 -
1.620 - # replace subsequent abbreviations with the value
1.621 - abbr_count = 0
1.622 -
1.623 - for x in abbr_list:
1.624 -
1.625 - if abbr_rex[abbr_count].search(line):
1.626 - if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
1.627 - line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
1.628 - # Check for # concatenations
1.629 - if concatsplit_rex.search(line):
1.630 - line = concat_line(line)
1.631 - abbr_count = abbr_count + 1
1.632 -
1.633 -
1.634 - filecont2 = filecont2 + line + '\n'
1.635 - i = i+1
1.636 -
1.637 -
1.638 - # Do one final pass over file
1.639 -
1.640 - # make sure that didn't end up with {" or }" after the substitution
1.641 - filecont2 = filecont2.replace('{"','{{')
1.642 - filecont2 = filecont2.replace('"}','}}')
1.643 -
1.644 - afterquotevalue_rex = re.compile('"\s*,\s*')
1.645 - afterbrace_rex = re.compile('"\s*}')
1.646 - afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
1.647 -
1.648 - # add new lines to data that changed because of abbreviation substitutions
1.649 - filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
1.650 - filecont2 = afterbrace_rex.sub('"\n}', filecont2)
1.651 - filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
1.652 -
1.653 - return filecont2
1.654 -
1.655 -#
1.656 -# convert @type( ... ) to @type{ ... }
1.657 -#
1.658 -def no_outer_parens(filecont):
1.659 -
1.660 - # do checking for open parens
1.661 - # will convert to braces
1.662 - paren_split = re.split('([(){}])',filecont)
1.663 -
1.664 - open_paren_count = 0
1.665 - open_type = 0
1.666 - look_next = 0
1.667 -
1.668 - # rebuild filecont
1.669 - filecont = ''
1.670 -
1.671 - at_rex = re.compile('@\w*')
1.672 -
1.673 - for phrase in paren_split:
1.674 - if look_next == 1:
1.675 - if phrase == '(':
1.676 - phrase = '{'
1.677 - open_paren_count = open_paren_count + 1
1.678 - else:
1.679 - open_type = 0
1.680 - look_next = 0
1.681 -
1.682 - if phrase == '(':
1.683 - open_paren_count = open_paren_count + 1
1.684 -
1.685 - elif phrase == ')':
1.686 - open_paren_count = open_paren_count - 1
1.687 - if open_type == 1 and open_paren_count == 0:
1.688 - phrase = '}'
1.689 - open_type = 0
1.690 -
1.691 - elif at_rex.search( phrase ):
1.692 - open_type = 1
1.693 - look_next = 1
1.694 -
1.695 - filecont = filecont + phrase
1.696 -
1.697 - return filecont
1.698 -
1.699 -
1.700 -#
1.701 -# make all whitespace into just one space
1.702 -# format the bibtex file into a usable form.
1.703 -#
1.704 -def bibtexwasher(filecont_source):
1.705 -
1.706 - space_rex = re.compile('\s+')
1.707 - comment_rex = re.compile('\s*%')
1.708 -
1.709 - filecont = []
1.710 -
1.711 - # remove trailing and excessive whitespace
1.712 - # ignore comments
1.713 - for line in filecont_source:
1.714 - line = string.strip(line)
1.715 - line = space_rex.sub(' ', line)
1.716 - # ignore comments
1.717 - if not comment_rex.match(line) and line != '':
1.718 - filecont.append(' '+ line)
1.719 -
1.720 - filecont = string.join(filecont, '')
1.721 -
1.722 - # the file is in one long string
1.723 -
1.724 - filecont = no_outer_parens(filecont)
1.725 -
1.726 - #
1.727 - # split lines according to preferred syntax scheme
1.728 - #
1.729 - filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
1.730 -
1.731 - # add new lines after commas that are after values
1.732 - filecont = re.sub('"\s*,', '",\n', filecont)
1.733 - filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
1.734 - filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
1.735 - '\n\n\g<1>\g<2>,\n', filecont)
1.736 -
1.737 - # add new lines after }
1.738 - filecont = re.sub('"\s*}','"\n}\n', filecont)
1.739 - filecont = re.sub('}\s*,','},\n', filecont)
1.740 -
1.741 -
1.742 - filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
1.743 -
1.744 - # character encoding, reserved latex characters
1.745 - filecont = re.sub('{\\\&}', '&', filecont)
1.746 - filecont = re.sub('\\\&', '&', filecont)
1.747 -
1.748 - # do checking for open braces to get format correct
1.749 - open_brace_count = 0
1.750 - brace_split = re.split('([{}])',filecont)
1.751 -
1.752 - # rebuild filecont
1.753 - filecont = ''
1.754 -
1.755 - for phrase in brace_split:
1.756 - if phrase == '{':
1.757 - open_brace_count = open_brace_count + 1
1.758 - elif phrase == '}':
1.759 - open_brace_count = open_brace_count - 1
1.760 - if open_brace_count == 0:
1.761 - filecont = filecont + '\n'
1.762 -
1.763 - filecont = filecont + phrase
1.764 -
1.765 - filecont2 = bibtex_replace_abbreviations(filecont)
1.766 -
1.767 - # gather
1.768 - filecont = filecont2.splitlines()
1.769 - i=0
1.770 - j=0 # count the number of blank lines
1.771 - for line in filecont:
1.772 - # ignore blank lines
1.773 - if line == '' or line == ' ':
1.774 - j = j+1
1.775 - continue
1.776 - filecont[i] = line + '\n'
1.777 - i = i+1
1.778 -
1.779 - # get rid of the extra stuff at the end of the array
1.780 - # (The extra stuff are duplicates that are in the array because
1.781 - # blank lines were removed.)
1.782 - length = len( filecont)
1.783 - filecont[length-j:length] = []
1.784 -
1.785 - return filecont
1.786 -
1.787 -
1.788 -def filehandler(filepath):
1.789 - try:
1.790 - fd = open(filepath, 'r')
1.791 - filecont_source = fd.readlines()
1.792 - fd.close()
1.793 - except:
1.794 - print 'Could not open file:', filepath
1.795 - washeddata = bibtexwasher(filecont_source)
1.796 - outdata = bibtexdecoder(washeddata)
1.797 - print '/**'
1.798 - print '\page references References'
1.799 - print
1.800 - for line in outdata:
1.801 - print line
1.802 - print '*/'
1.803 -
1.804 -
1.805 -# main program
1.806 -
1.807 -def main():
1.808 - import sys
1.809 - if sys.argv[1:]:
1.810 - filepath = sys.argv[1]
1.811 - else:
1.812 - print "No input file"
1.813 - sys.exit()
1.814 - filehandler(filepath)
1.815 -
1.816 -if __name__ == "__main__": main()
1.817 -
1.818 -
1.819 -# end python script