scripts/bib2dox.py
author Alpar Juttner <alpar@cs.elte.hu>
Sat, 26 Sep 2009 10:15:49 +0200
changeset 744 f8c468367dab
child 745 68792fb2870f
permissions -rw-r--r--
Integrate bib2dox.py into the build environments (#184)
     1 #!/usr/bin/env /usr/local/Python/bin/python2.1
     2 """
     3   BibTeX to Doxygen converter
     4   Usage: python bib2dox.py bibfile.bib > bibfile.dox
     5 
     6   This code is the modification of the BibTeX to XML converter
     7   by Vidar Bronken Gundersen et al. See the original copyright notices below. 
     8 
     9   **********************************************************************
    10 
    11   Decoder for bibliographic data, BibTeX
    12   Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
    13 
    14   v.8
    15   (c)2002-06-23 Vidar Bronken Gundersen
    16   http://bibtexml.sf.net/
    17   Reuse approved as long as this notification is kept.
    18   Licence: GPL.
    19 
    20   Contributions/thanks to:
    21   Egon Willighagen, http://sf.net/projects/jreferences/
    22   Richard Mahoney (for providing a test case)
    23 
    24   Editted by Sara Sprenkle to be more robust and handle more bibtex features.
    25   (c) 2003-01-15
    26 
    27   1.  Changed bibtex: tags to bibxml: tags.
    28   2.  Use xmlns:bibxml="http://bibtexml.sf.net/"
    29   3.  Allow spaces between @type and first {
    30   4.  "author" fields with multiple authors split by " and "
    31       are put in separate xml "bibxml:author" tags.
    32   5.  Option for Titles: words are capitalized
    33       only if first letter in title or capitalized inside braces
    34   6.  Removes braces from within field values
    35   7.  Ignores comments in bibtex file (including @comment{ or % )
    36   8.  Replaces some special latex tags, e.g., replaces ~ with '&#160;'
    37   9.  Handles bibtex @string abbreviations
    38         --> includes bibtex's default abbreviations for months
    39         --> does concatenation of abbr # " more " and " more " # abbr
    40   10. Handles @type( ... ) or @type{ ... }
    41   11. The keywords field is split on , or ; and put into separate xml
    42       "bibxml:keywords" tags
    43   12. Ignores @preamble
    44 
    45   Known Limitations
    46   1.  Does not transform Latex encoding like math mode and special
    47       latex symbols.
    48   2.  Does not parse author fields into first and last names.
    49       E.g., It does not do anything special to an author whose name is
    50       in the form LAST_NAME, FIRST_NAME
    51       In "author" tag, will show up as
    52       <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
    53   3.  Does not handle "crossref" fields other than to print
    54       <bibxml:crossref>...</bibxml:crossref>
    55   4.  Does not inform user of the input's format errors.  You just won't
    56       be able to transform the file later with XSL
    57 
    58   You will have to manually edit the XML output if you need to handle
    59   these (and unknown) limitations.
    60 
    61 """
    62 
    63 import string, re
    64 
    65 # set of valid name characters
    66 valid_name_chars = '[\w\-:]'
    67 
    68 #
    69 # define global regular expression variables
    70 #
    71 author_rex = re.compile('\s+and\s+')
    72 rembraces_rex = re.compile('[{}]')
    73 capitalize_rex = re.compile('({\w*})')
    74 
    75 # used by bibtexkeywords(data)
    76 keywords_rex = re.compile('[,;]')
    77 
    78 # used by concat_line(line)
    79 concatsplit_rex = re.compile('\s*#\s*')
    80 
    81 # split on {, }, or " in verify_out_of_braces
    82 delimiter_rex = re.compile('([{}"])',re.I)
    83 
    84 field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
    85 data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
    86 
    87 url_rex = re.compile('\\\url\{([^}]*)\}')
    88 
    89 
    90 #
    91 # return the string parameter without braces
    92 #
    93 def transformurls(str):
    94     return url_rex.sub(r'<a href="\1">\1</a>', str)
    95 
    96 #
    97 # return the string parameter without braces
    98 #
    99 def removebraces(str):
   100     return rembraces_rex.sub('', str)
   101 
   102 #
   103 # latex-specific replacements
   104 # (do this after braces were removed)
   105 #
   106 def latexreplacements(line):
   107     line = string.replace(line, '~', '&nbsp;')
   108     line = string.replace(line, '\\\'a', '&aacute;')
   109     line = string.replace(line, '\\"a', '&auml;')
   110     line = string.replace(line, '\\\'e', '&eacute;')
   111     line = string.replace(line, '\\"e', '&euml;')
   112     line = string.replace(line, '\\\'i', '&iacute;')
   113     line = string.replace(line, '\\"i', '&iuml;')
   114     line = string.replace(line, '\\\'o', '&oacute;')
   115     line = string.replace(line, '\\"o', '&ouml;')
   116     line = string.replace(line, '\\\'u', '&uacute;')
   117     line = string.replace(line, '\\"u', '&uuml;')
   118     line = string.replace(line, '\\H o', '&otilde;')
   119     line = string.replace(line, '\\H u', '&uuml;')   # &utilde; does not exist
   120     line = string.replace(line, '\\\'A', '&Aacute;')
   121     line = string.replace(line, '\\"A', '&Auml;')
   122     line = string.replace(line, '\\\'E', '&Eacute;')
   123     line = string.replace(line, '\\"E', '&Euml;')
   124     line = string.replace(line, '\\\'I', '&Iacute;')
   125     line = string.replace(line, '\\"I', '&Iuml;')
   126     line = string.replace(line, '\\\'O', '&Oacute;')
   127     line = string.replace(line, '\\"O', '&Ouml;')
   128     line = string.replace(line, '\\\'U', '&Uacute;')
   129     line = string.replace(line, '\\"U', '&Uuml;')
   130     line = string.replace(line, '\\H O', '&Otilde;')
   131     line = string.replace(line, '\\H U', '&Uuml;')   # &Utilde; does not exist
   132 
   133     return line
   134 
   135 #
   136 # copy characters form a string decoding html expressions (&xyz;)
   137 #
   138 def copychars(str, ifrom, count):
   139     result = ''
   140     i = ifrom
   141     c = 0
   142     html_spec = False
   143     while (i < len(str)) and (c < count):
   144         if str[i] == '&':
   145             html_spec = True;
   146             if i+1 < len(str):
   147                 result += str[i+1]
   148             c += 1
   149             i += 2
   150         else:
   151             if not html_spec:
   152                 if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
   153                    ((str[i] >= 'a') and (str[i] <= 'z')):
   154                     result += str[i]
   155                     c += 1
   156             elif str[i] == ';':
   157                 html_spec = False;
   158             i += 1
   159     
   160     return result
   161 
   162 
   163 # 
   164 # Handle a list of authors (separated by 'and').
   165 # It gives back an array of the follwing values:
   166 #  - num: the number of authors,
   167 #  - list: the list of the author names,
   168 #  - text: the bibtex text (separated by commas and/or 'and')
   169 #  - abbrev: abbreviation that can be used for indicate the
   170 #    bibliography entries
   171 #
   172 def bibtexauthor(data):
   173     result = {}
   174     bibtex = ''
   175     result['list'] = author_rex.split(data)
   176     result['num'] = len(result['list'])
   177     for i, author in enumerate(result['list']):
   178         # general transformations
   179         author = latexreplacements(removebraces(author.strip()))
   180         # transform "Xyz, A. B." to "A. B. Xyz"
   181         pos = author.find(',')
   182         if pos != -1:
   183             author = author[pos+1:].strip() + ' ' + author[:pos].strip()
   184         result['list'][i] = author
   185         bibtex += author + '#'
   186     bibtex = bibtex[:-1]
   187     if result['num'] > 1:
   188         ix = bibtex.rfind('#')
   189         if result['num'] == 2:
   190             bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
   191         else:
   192             bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
   193     bibtex = bibtex.replace('#', ', ')
   194     result['text'] = bibtex
   195     
   196     result['abbrev'] = ''
   197     for author in result['list']:
   198         pos = author.rfind(' ') + 1
   199         count = 1
   200         if result['num'] == 1:
   201             count = 3
   202         result['abbrev'] += copychars(author, pos, count)
   203 
   204     return result
   205 
   206 
   207 #
   208 # data = title string
   209 # @return the capitalized title (first letter is capitalized), rest are capitalized
   210 # only if capitalized inside braces
   211 #
   212 def capitalizetitle(data):
   213     title_list = capitalize_rex.split(data)
   214     title = ''
   215     count = 0
   216     for phrase in title_list:
   217          check = string.lstrip(phrase)
   218 
   219          # keep phrase's capitalization the same
   220          if check.find('{') == 0:
   221               title += removebraces(phrase)
   222          else:
   223          # first word --> capitalize first letter (after spaces)
   224               if count == 0:
   225                   title += check.capitalize()
   226               else:
   227                   title += phrase.lower()
   228          count = count + 1
   229 
   230     return title
   231 
   232 
   233 #
   234 # @return the bibtex for the title
   235 # @param data --> title string
   236 # braces are removed from title
   237 #
   238 def bibtextitle(data, entrytype):
   239     if entrytype in ('book', 'inbook'):
   240         title = removebraces(data.strip())
   241     else:
   242         title = removebraces(capitalizetitle(data.strip()))
   243     bibtex = title
   244     return bibtex
   245 
   246 
   247 #
   248 # function to compare entry lists
   249 #
   250 def entry_cmp(x, y):
   251     return cmp(x[0], y[0])
   252 
   253 
   254 #
   255 # print the XML for the transformed "filecont_source"
   256 #
   257 def bibtexdecoder(filecont_source):
   258     filecont = []
   259     file = []
   260     
   261     # want @<alphanumeric chars><spaces>{<spaces><any chars>,
   262     pubtype_rex = re.compile('@(\w*)\s*{\s*(.*),')
   263     endtype_rex = re.compile('}\s*$')
   264     endtag_rex = re.compile('^\s*}\s*$')
   265 
   266     bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
   267     bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')
   268 
   269     quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
   270     quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')
   271 
   272     for line in filecont_source:
   273         line = line[:-1]
   274 
   275         # encode character entities
   276         line = string.replace(line, '&', '&amp;')
   277         line = string.replace(line, '<', '&lt;')
   278         line = string.replace(line, '>', '&gt;')
   279 
   280         # start entry: publication type (store for later use)
   281         if pubtype_rex.match(line):
   282         # want @<alphanumeric chars><spaces>{<spaces><any chars>,
   283             entrycont = {}
   284             entry = []
   285             entrytype = pubtype_rex.sub('\g<1>',line)
   286             entrytype = string.lower(entrytype)
   287             # entryid   = pubtype_rex.sub('\g<2>', line)
   288 
   289         # end entry if just a }
   290         elif endtype_rex.match(line):
   291             # generate doxygen code for the entry
   292 
   293             # enty type related formattings
   294             if entrytype in ('book', 'inbook'):
   295                 entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
   296                 if not entrycont.has_key('author'):
   297                     entrycont['author'] = entrycont['editor']
   298                     entrycont['author']['text'] += ', editors'
   299             elif entrytype == 'article':
   300                 entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
   301             elif entrytype in ('inproceedings', 'incollection', 'conference'):
   302                 entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
   303             elif entrytype == 'techreport':
   304                 if not entrycont.has_key('type'):
   305                     entrycont['type'] = 'Technical report'
   306             elif entrytype == 'mastersthesis':
   307                 entrycont['type'] = 'Master\'s thesis'
   308             elif entrytype == 'phdthesis':
   309                 entrycont['type'] = 'PhD thesis'
   310 
   311             for eline in entrycont:
   312                 if eline != '':
   313                     eline = latexreplacements(eline)
   314 
   315             if entrycont.has_key('pages') and (entrycont['pages'] != ''):
   316                 entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
   317 
   318             if entrycont.has_key('author') and (entrycont['author'] != ''):
   319                 entry.append(entrycont['author']['text'] + '.')
   320             if entrycont.has_key('title') and (entrycont['title'] != ''):
   321                 entry.append(entrycont['title'] + '.')
   322             if entrycont.has_key('journal') and (entrycont['journal'] != ''):
   323                 entry.append(entrycont['journal'] + ',')
   324             if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
   325                 entry.append('In ' + entrycont['booktitle'] + ',')
   326             if entrycont.has_key('type') and (entrycont['type'] != ''):
   327                 eline = entrycont['type']
   328                 if entrycont.has_key('number') and (entrycont['number'] != ''):
   329                     eline += ' ' + entrycont['number']
   330                 eline += ','
   331                 entry.append(eline)
   332             if entrycont.has_key('institution') and (entrycont['institution'] != ''):
   333                 entry.append(entrycont['institution'] + ',')
   334             if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
   335                 entry.append(entrycont['publisher'] + ',')
   336             if entrycont.has_key('school') and (entrycont['school'] != ''):
   337                 entry.append(entrycont['school'] + ',')
   338             if entrycont.has_key('address') and (entrycont['address'] != ''):
   339                 entry.append(entrycont['address'] + ',')
   340             if entrycont.has_key('edition') and (entrycont['edition'] != ''):
   341                 entry.append(entrycont['edition'] + ' edition,')
   342             if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
   343                 entry.append(entrycont['howpublished'] + ',')
   344             if entrycont.has_key('volume') and (entrycont['volume'] != ''):
   345                 eline = entrycont['volume'];
   346                 if entrycont.has_key('number') and (entrycont['number'] != ''):
   347                     eline += '(' + entrycont['number'] + ')'
   348                 if entrycont.has_key('pages') and (entrycont['pages'] != ''):
   349                     eline += ':' + entrycont['pages']
   350                 eline += ','
   351                 entry.append(eline)
   352             else:
   353                 if entrycont.has_key('pages') and (entrycont['pages'] != ''):
   354                     entry.append('pages ' + entrycont['pages'] + ',')
   355             if entrycont.has_key('year') and (entrycont['year'] != ''):
   356                 if entrycont.has_key('month') and (entrycont['month'] != ''):
   357                     entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
   358                 else:
   359                     entry.append(entrycont['year'] + '.')
   360             if entrycont.has_key('note') and (entrycont['note'] != ''):
   361                 entry.append(entrycont['note'] + '.')
   362 
   363             # generate keys for sorting and for the output
   364             sortkey = ''
   365             bibkey = ''
   366             if entrycont.has_key('author'):
   367                 for author in entrycont['author']['list']:
   368                     sortkey += copychars(author, author.rfind(' ')+1, len(author))
   369                 bibkey = entrycont['author']['abbrev']
   370             else:
   371                 bibkey = 'x'
   372             if entrycont.has_key('year'):
   373                 sortkey += entrycont['year']
   374                 bibkey += entrycont['year'][-2:]
   375             if entrycont.has_key('title'):
   376                 sortkey += entrycont['title']
   377             if entrycont.has_key('key'):
   378                 sortkey = entrycont['key'] + sortkey
   379                 bibkey = entrycont['key']
   380             entry.insert(0, sortkey)
   381             entry.insert(1, bibkey)
   382            
   383             # add the entry to the file contents
   384             filecont.append(entry)
   385 
   386         else:
   387             # field, publication info
   388             field = ''
   389             data = ''
   390             
   391             # field = {data} entries
   392             if bracedata_rex.match(line):
   393                 field = bracefield_rex.sub('\g<1>', line)
   394                 field = string.lower(field)
   395                 data =  bracedata_rex.sub('\g<2>', line)
   396 
   397             # field = "data" entries
   398             elif quotedata_rex.match(line):
   399                 field = quotefield_rex.sub('\g<1>', line)
   400                 field = string.lower(field)
   401                 data =  quotedata_rex.sub('\g<2>', line)
   402 
   403             # field = data entries
   404             elif data_rex.match(line):
   405                 field = field_rex.sub('\g<1>', line)
   406                 field = string.lower(field)
   407                 data =  data_rex.sub('\g<2>', line)
   408             
   409             if field in ('author', 'editor'):
   410                 entrycont[field] = bibtexauthor(data)
   411                 line = ''
   412             elif field == 'title':
   413                 line = bibtextitle(data, entrytype)
   414             elif field != '':
   415                 line = removebraces(transformurls(data.strip()))
   416 
   417             if line != '':
   418                 line = latexreplacements(line)
   419                 entrycont[field] = line
   420 
   421 
   422     # sort entries
   423     filecont.sort(entry_cmp)
   424     
   425     # count the bibtex keys
   426     keytable = {}
   427     counttable = {}
   428     for entry in filecont:
   429         bibkey = entry[1]
   430         if not keytable.has_key(bibkey):
   431             keytable[bibkey] = 1
   432         else:
   433             keytable[bibkey] += 1
   434 
   435     for bibkey in keytable.keys():
   436         counttable[bibkey] = 0
   437     
   438     # generate output
   439     for entry in filecont:
   440         # generate output key form the bibtex key
   441         bibkey = entry[1]
   442         if keytable[bibkey] == 1:
   443             outkey = bibkey
   444         else:
   445             outkey = bibkey + chr(97 + counttable[bibkey])
   446         counttable[bibkey] += 1
   447         
   448         # append the entry code to the output
   449         file.append('<tr valign="top">\n' + \
   450                     '<td>[' + outkey + ']</td>')
   451         file.append('<td>')
   452         file.append('\\anchor ' + outkey)
   453         for line in entry[2:]:
   454             file.append(line)
   455         file.append('</td>\n</tr>')
   456         file.append('')
   457 
   458     return file
   459 
   460 
   461 #
   462 # return 1 iff abbr is in line but not inside braces or quotes
   463 # assumes that abbr appears only once on the line (out of braces and quotes)
   464 #
   465 def verify_out_of_braces(line, abbr):
   466 
   467     phrase_split = delimiter_rex.split(line)
   468 
   469     abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
   470 
   471     open_brace = 0
   472     open_quote = 0
   473 
   474     for phrase in phrase_split:
   475         if phrase == "{":
   476             open_brace = open_brace + 1
   477         elif phrase == "}":
   478             open_brace = open_brace - 1
   479         elif phrase == '"':
   480             if open_quote == 1:
   481                 open_quote = 0
   482             else:
   483                 open_quote = 1
   484         elif abbr_rex.search(phrase):
   485             if open_brace == 0 and open_quote == 0:
   486                 return 1
   487 
   488     return 0
   489 
   490 
   491 #
   492 # a line in the form phrase1 # phrase2 # ... # phrasen
   493 # is returned as phrase1 phrase2 ... phrasen
   494 # with the correct punctuation
   495 # Bug: Doesn't always work with multiple abbreviations plugged in
   496 #
   497 def concat_line(line):
   498     # only look at part after equals
   499     field = field_rex.sub('\g<1>',line)
   500     rest = field_rex.sub('\g<2>',line)
   501 
   502     concat_line = field + ' ='
   503 
   504     pound_split = concatsplit_rex.split(rest)
   505 
   506     phrase_count = 0
   507     length = len(pound_split)
   508 
   509     for phrase in pound_split:
   510         phrase = phrase.strip()
   511         if phrase_count != 0:
   512             if phrase.startswith('"') or phrase.startswith('{'):
   513                 phrase = phrase[1:]
   514         elif phrase.startswith('"'):
   515             phrase = phrase.replace('"','{',1)
   516 
   517         if phrase_count != length-1:
   518             if phrase.endswith('"') or phrase.endswith('}'):
   519                 phrase = phrase[:-1]
   520         else:
   521             if phrase.endswith('"'):
   522                 phrase = phrase[:-1]
   523                 phrase = phrase + "}"
   524             elif phrase.endswith('",'):
   525                 phrase = phrase[:-2]
   526                 phrase = phrase + "},"
   527 
   528         # if phrase did have \#, add the \# back
   529         if phrase.endswith('\\'):
   530             phrase = phrase + "#"
   531         concat_line = concat_line + ' ' + phrase
   532 
   533         phrase_count = phrase_count + 1
   534 
   535     return concat_line
   536 
   537 
   538 #
   539 # substitute abbreviations into filecont
   540 # @param filecont_source - string of data from file
   541 #
   542 def bibtex_replace_abbreviations(filecont_source):
   543     filecont = filecont_source.splitlines()
   544 
   545     #  These are defined in bibtex, so we'll define them too
   546     abbr_list = ['jan','feb','mar','apr','may','jun',
   547                  'jul','aug','sep','oct','nov','dec']
   548     value_list = ['January','February','March','April',
   549                   'May','June','July','August','September',
   550                   'October','November','December']
   551 
   552     abbr_rex = []
   553     total_abbr_count = 0
   554 
   555     front = '\\b'
   556     back = '(,?)\\b'
   557 
   558     for x in abbr_list:
   559         abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
   560         total_abbr_count = total_abbr_count + 1
   561 
   562 
   563     abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
   564                              re.I)
   565 
   566     comment_rex = re.compile('@comment\s*{',re.I)
   567     preamble_rex = re.compile('@preamble\s*{',re.I)
   568 
   569     waiting_for_end_string = 0
   570     i = 0
   571     filecont2 = ''
   572 
   573     for line in filecont:
   574         if line == ' ' or line == '':
   575             continue
   576 
   577         if waiting_for_end_string:
   578             if re.search('}',line):
   579                 waiting_for_end_string = 0
   580                 continue
   581 
   582         if abbrdef_rex.search(line):
   583             abbr = abbrdef_rex.sub('\g<1>', line)
   584 
   585             if abbr_list.count(abbr) == 0:
   586                 val = abbrdef_rex.sub('\g<2>', line)
   587                 abbr_list.append(abbr)
   588                 value_list.append(string.strip(val))
   589                 abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
   590                 total_abbr_count = total_abbr_count + 1
   591             waiting_for_end_string = 1
   592             continue
   593 
   594         if comment_rex.search(line):
   595             waiting_for_end_string = 1
   596             continue
   597 
   598         if preamble_rex.search(line):
   599             waiting_for_end_string = 1
   600             continue
   601 
   602 
   603         # replace subsequent abbreviations with the value
   604         abbr_count = 0
   605 
   606         for x in abbr_list:
   607 
   608             if abbr_rex[abbr_count].search(line):
   609                 if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
   610                     line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
   611                 # Check for # concatenations
   612                 if concatsplit_rex.search(line):
   613                     line = concat_line(line)
   614             abbr_count = abbr_count + 1
   615 
   616 
   617         filecont2 = filecont2 + line + '\n'
   618         i = i+1
   619 
   620 
   621     # Do one final pass over file
   622 
   623     # make sure that didn't end up with {" or }" after the substitution
   624     filecont2 = filecont2.replace('{"','{{')
   625     filecont2 = filecont2.replace('"}','}}')
   626 
   627     afterquotevalue_rex = re.compile('"\s*,\s*')
   628     afterbrace_rex = re.compile('"\s*}')
   629     afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
   630 
   631     # add new lines to data that changed because of abbreviation substitutions
   632     filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
   633     filecont2 = afterbrace_rex.sub('"\n}', filecont2)
   634     filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
   635 
   636     return filecont2
   637 
   638 #
   639 # convert @type( ... ) to @type{ ... }
   640 #
   641 def no_outer_parens(filecont):
   642 
   643     # do checking for open parens
   644     # will convert to braces
   645     paren_split = re.split('([(){}])',filecont)
   646 
   647     open_paren_count = 0
   648     open_type = 0
   649     look_next = 0
   650 
   651     # rebuild filecont
   652     filecont = ''
   653 
   654     at_rex = re.compile('@\w*')
   655 
   656     for phrase in paren_split:
   657         if look_next == 1:
   658             if phrase == '(':
   659                 phrase = '{'
   660                 open_paren_count = open_paren_count + 1
   661             else:
   662                 open_type = 0
   663             look_next = 0
   664 
   665         if phrase == '(':
   666             open_paren_count = open_paren_count + 1
   667 
   668         elif phrase == ')':
   669             open_paren_count = open_paren_count - 1
   670             if open_type == 1 and open_paren_count == 0:
   671                 phrase = '}'
   672                 open_type = 0
   673 
   674         elif at_rex.search( phrase ):
   675             open_type = 1
   676             look_next = 1
   677 
   678         filecont = filecont + phrase
   679 
   680     return filecont
   681 
   682 
   683 #
   684 # make all whitespace into just one space
   685 # format the bibtex file into a usable form.
   686 #
   687 def bibtexwasher(filecont_source):
   688 
   689     space_rex = re.compile('\s+')
   690     comment_rex = re.compile('\s*%')
   691 
   692     filecont = []
   693 
   694     # remove trailing and excessive whitespace
   695     # ignore comments
   696     for line in filecont_source:
   697         line = string.strip(line)
   698         line = space_rex.sub(' ', line)
   699         # ignore comments
   700         if not comment_rex.match(line) and line != '':
   701             filecont.append(' '+ line)
   702 
   703     filecont = string.join(filecont, '')
   704 
   705     # the file is in one long string
   706 
   707     filecont = no_outer_parens(filecont)
   708 
   709     #
   710     # split lines according to preferred syntax scheme
   711     #
   712     filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
   713 
   714     # add new lines after commas that are after values
   715     filecont = re.sub('"\s*,', '",\n', filecont)
   716     filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
   717     filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
   718                           '\n\n\g<1>\g<2>,\n', filecont)
   719 
   720     # add new lines after }
   721     filecont = re.sub('"\s*}','"\n}\n', filecont)
   722     filecont = re.sub('}\s*,','},\n', filecont)
   723 
   724 
   725     filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
   726 
   727     # character encoding, reserved latex characters
   728     filecont = re.sub('{\\\&}', '&', filecont)
   729     filecont = re.sub('\\\&', '&', filecont)
   730 
   731     # do checking for open braces to get format correct
   732     open_brace_count = 0
   733     brace_split = re.split('([{}])',filecont)
   734 
   735     # rebuild filecont
   736     filecont = ''
   737 
   738     for phrase in brace_split:
   739         if phrase == '{':
   740             open_brace_count = open_brace_count + 1
   741         elif phrase == '}':
   742             open_brace_count = open_brace_count - 1
   743             if open_brace_count == 0:
   744                 filecont = filecont + '\n'
   745 
   746         filecont = filecont + phrase
   747 
   748     filecont2 = bibtex_replace_abbreviations(filecont)
   749 
   750     # gather
   751     filecont = filecont2.splitlines()
   752     i=0
   753     j=0         # count the number of blank lines
   754     for line in filecont:
   755         # ignore blank lines
   756         if line == '' or line == ' ':
   757             j = j+1
   758             continue
   759         filecont[i] = line + '\n'
   760         i = i+1
   761 
   762     # get rid of the extra stuff at the end of the array
   763     # (The extra stuff are duplicates that are in the array because
   764     # blank lines were removed.)
   765     length = len( filecont)
   766     filecont[length-j:length] = []
   767 
   768     return filecont
   769 
   770 
   771 def filehandler(filepath):
   772     try:
   773         fd = open(filepath, 'r')
   774         filecont_source = fd.readlines()
   775         fd.close()
   776     except:
   777         print 'Could not open file:', filepath
   778     washeddata = bibtexwasher(filecont_source)
   779     outdata = bibtexdecoder(washeddata)
   780     print '/**'
   781     print '\page references References'
   782     print
   783     print '<table border="0" cellspacing="5px" width="100%">'
   784     print
   785     for line in outdata:
   786         print line
   787     print '</table>'
   788     print
   789     print '*/'
   790 
   791 
   792 # main program
   793 
   794 def main():
   795     import sys
   796     if sys.argv[1:]:
   797         filepath = sys.argv[1]
   798     else:
   799         print "No input file"
   800         sys.exit()
   801     filehandler(filepath)
   802 
   803 if __name__ == "__main__": main()
   804 
   805 
   806 # end python script