lemon: scripts/bib2dox.py@f8c468367dab

     1 #!/usr/bin/env /usr/local/Python/bin/python2.1

     2 """

     3   BibTeX to Doxygen converter

     4   Usage: python bib2dox.py bibfile.bib > bibfile.dox

     6   This code is the modification of the BibTeX to XML converter

     7   by Vidar Bronken Gundersen et al. See the original copyright notices below.

     9   **********************************************************************

    11   Decoder for bibliographic data, BibTeX

    12   Usage: python bibtex2xml.py bibfile.bib > bibfile.xml

    14   v.8

    15   (c)2002-06-23 Vidar Bronken Gundersen

    16   http://bibtexml.sf.net/

    17   Reuse approved as long as this notification is kept.

    18   Licence: GPL.

    20   Contributions/thanks to:

    21   Egon Willighagen, http://sf.net/projects/jreferences/

    22   Richard Mahoney (for providing a test case)

    24   Editted by Sara Sprenkle to be more robust and handle more bibtex features.

    25   (c) 2003-01-15

    27   1.  Changed bibtex: tags to bibxml: tags.

    28   2.  Use xmlns:bibxml="http://bibtexml.sf.net/"

    29   3.  Allow spaces between @type and first {

    30   4.  "author" fields with multiple authors split by " and "

    31       are put in separate xml "bibxml:author" tags.

    32   5.  Option for Titles: words are capitalized

    33       only if first letter in title or capitalized inside braces

    34   6.  Removes braces from within field values

    35   7.  Ignores comments in bibtex file (including @comment{ or % )

    36   8.  Replaces some special latex tags, e.g., replaces ~ with '&#160;'

    37   9.  Handles bibtex @string abbreviations

    38         --> includes bibtex's default abbreviations for months

    39         --> does concatenation of abbr # " more " and " more " # abbr

    40   10. Handles @type( ... ) or @type{ ... }

    41   11. The keywords field is split on , or ; and put into separate xml

    42       "bibxml:keywords" tags

    43   12. Ignores @preamble

    45   Known Limitations

    46   1.  Does not transform Latex encoding like math mode and special

    47       latex symbols.

    48   2.  Does not parse author fields into first and last names.

    49       E.g., It does not do anything special to an author whose name is

    50       in the form LAST_NAME, FIRST_NAME

    51       In "author" tag, will show up as

    52       <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>

    53   3.  Does not handle "crossref" fields other than to print

    54       <bibxml:crossref>...</bibxml:crossref>

    55   4.  Does not inform user of the input's format errors.  You just won't

    56       be able to transform the file later with XSL

    58   You will have to manually edit the XML output if you need to handle

    59   these (and unknown) limitations.

    61 """

    63 import string, re

    65 # set of valid name characters

    66 valid_name_chars = '[\w\-:]'

    68 #

    69 # define global regular expression variables

    70 #

    71 author_rex = re.compile('\s+and\s+')

    72 rembraces_rex = re.compile('[{}]')

    73 capitalize_rex = re.compile('({\w*})')

    75 # used by bibtexkeywords(data)

    76 keywords_rex = re.compile('[,;]')

    78 # used by concat_line(line)

    79 concatsplit_rex = re.compile('\s*#\s*')

    81 # split on {, }, or " in verify_out_of_braces

    82 delimiter_rex = re.compile('([{}"])',re.I)

    84 field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')

    85 data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')

    87 url_rex = re.compile('\\\url\{([^}]*)\}')

    90 #

    91 # return the string parameter without braces

    92 #

    93 def transformurls(str):

    94     return url_rex.sub(r'<a href="\1">\1</a>', str)

    96 #

    97 # return the string parameter without braces

    98 #

    99 def removebraces(str):

   100     return rembraces_rex.sub('', str)

   102 #

   103 # latex-specific replacements

   104 # (do this after braces were removed)

   105 #

   106 def latexreplacements(line):

   107     line = string.replace(line, '~', '&nbsp;')

   108     line = string.replace(line, '\\\'a', '&aacute;')

   109     line = string.replace(line, '\\"a', '&auml;')

   110     line = string.replace(line, '\\\'e', '&eacute;')

   111     line = string.replace(line, '\\"e', '&euml;')

   112     line = string.replace(line, '\\\'i', '&iacute;')

   113     line = string.replace(line, '\\"i', '&iuml;')

   114     line = string.replace(line, '\\\'o', '&oacute;')

   115     line = string.replace(line, '\\"o', '&ouml;')

   116     line = string.replace(line, '\\\'u', '&uacute;')

   117     line = string.replace(line, '\\"u', '&uuml;')

   118     line = string.replace(line, '\\H o', '&otilde;')

   119     line = string.replace(line, '\\H u', '&uuml;')   # &utilde; does not exist

   120     line = string.replace(line, '\\\'A', '&Aacute;')

   121     line = string.replace(line, '\\"A', '&Auml;')

   122     line = string.replace(line, '\\\'E', '&Eacute;')

   123     line = string.replace(line, '\\"E', '&Euml;')

   124     line = string.replace(line, '\\\'I', '&Iacute;')

   125     line = string.replace(line, '\\"I', '&Iuml;')

   126     line = string.replace(line, '\\\'O', '&Oacute;')

   127     line = string.replace(line, '\\"O', '&Ouml;')

   128     line = string.replace(line, '\\\'U', '&Uacute;')

   129     line = string.replace(line, '\\"U', '&Uuml;')

   130     line = string.replace(line, '\\H O', '&Otilde;')

   131     line = string.replace(line, '\\H U', '&Uuml;')   # &Utilde; does not exist

   133     return line

   135 #

   136 # copy characters form a string decoding html expressions (&xyz;)

   137 #

   138 def copychars(str, ifrom, count):

   139     result = ''

   140     i = ifrom

   141     c = 0

   142     html_spec = False

   143     while (i < len(str)) and (c < count):

   144         if str[i] == '&':

   145             html_spec = True;

   146             if i+1 < len(str):

   147                 result += str[i+1]

   148             c += 1

   149             i += 2

   150         else:

   151             if not html_spec:

   152                 if ((str[i] >= 'A') and (str[i] <= 'Z')) or \

   153                    ((str[i] >= 'a') and (str[i] <= 'z')):

   154                     result += str[i]

   155                     c += 1

   156             elif str[i] == ';':

   157                 html_spec = False;

   158             i += 1

   160     return result

   163 #

   164 # Handle a list of authors (separated by 'and').

   165 # It gives back an array of the follwing values:

   166 #  - num: the number of authors,

   167 #  - list: the list of the author names,

   168 #  - text: the bibtex text (separated by commas and/or 'and')

   169 #  - abbrev: abbreviation that can be used for indicate the

   170 #    bibliography entries

   171 #

   172 def bibtexauthor(data):

   173     result = {}

   174     bibtex = ''

   175     result['list'] = author_rex.split(data)

   176     result['num'] = len(result['list'])

   177     for i, author in enumerate(result['list']):

   178         # general transformations

   179         author = latexreplacements(removebraces(author.strip()))

   180         # transform "Xyz, A. B." to "A. B. Xyz"

   181         pos = author.find(',')

   182         if pos != -1:

   183             author = author[pos+1:].strip() + ' ' + author[:pos].strip()

   184         result['list'][i] = author

   185         bibtex += author + '#'

   186     bibtex = bibtex[:-1]

   187     if result['num'] > 1:

   188         ix = bibtex.rfind('#')

   189         if result['num'] == 2:

   190             bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]

   191         else:

   192             bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]

   193     bibtex = bibtex.replace('#', ', ')

   194     result['text'] = bibtex

   196     result['abbrev'] = ''

   197     for author in result['list']:

   198         pos = author.rfind(' ') + 1

   199         count = 1

   200         if result['num'] == 1:

   201             count = 3

   202         result['abbrev'] += copychars(author, pos, count)

   204     return result

   207 #

   208 # data = title string

   209 # @return the capitalized title (first letter is capitalized), rest are capitalized

   210 # only if capitalized inside braces

   211 #

   212 def capitalizetitle(data):

   213     title_list = capitalize_rex.split(data)

   214     title = ''

   215     count = 0

   216     for phrase in title_list:

   217          check = string.lstrip(phrase)

   219          # keep phrase's capitalization the same

   220          if check.find('{') == 0:

   221               title += removebraces(phrase)

   222          else:

   223          # first word --> capitalize first letter (after spaces)

   224               if count == 0:

   225                   title += check.capitalize()

   226               else:

   227                   title += phrase.lower()

   228          count = count + 1

   230     return title

   233 #

   234 # @return the bibtex for the title

   235 # @param data --> title string

   236 # braces are removed from title

   237 #

   238 def bibtextitle(data, entrytype):

   239     if entrytype in ('book', 'inbook'):

   240         title = removebraces(data.strip())

   241     else:

   242         title = removebraces(capitalizetitle(data.strip()))

   243     bibtex = title

   244     return bibtex

   247 #

   248 # function to compare entry lists

   249 #

   250 def entry_cmp(x, y):

   251     return cmp(x[0], y[0])

   254 #

   255 # print the XML for the transformed "filecont_source"

   256 #

   257 def bibtexdecoder(filecont_source):

   258     filecont = []

   259     file = []

   261     # want @<alphanumeric chars><spaces>{<spaces><any chars>,

   262     pubtype_rex = re.compile('@(\w*)\s*{\s*(.*),')

   263     endtype_rex = re.compile('}\s*$')

   264     endtag_rex = re.compile('^\s*}\s*$')

   266     bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')

   267     bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')

   269     quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')

   270     quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')

   272     for line in filecont_source:

   273         line = line[:-1]

   275         # encode character entities

   276         line = string.replace(line, '&', '&amp;')

   277         line = string.replace(line, '<', '&lt;')

   278         line = string.replace(line, '>', '&gt;')

   280         # start entry: publication type (store for later use)

   281         if pubtype_rex.match(line):

   282         # want @<alphanumeric chars><spaces>{<spaces><any chars>,

   283             entrycont = {}

   284             entry = []

   285             entrytype = pubtype_rex.sub('\g<1>',line)

   286             entrytype = string.lower(entrytype)

   287             # entryid   = pubtype_rex.sub('\g<2>', line)

   289         # end entry if just a }

   290         elif endtype_rex.match(line):

   291             # generate doxygen code for the entry

   293             # enty type related formattings

   294             if entrytype in ('book', 'inbook'):

   295                 entrycont['title'] = '<em>' + entrycont['title'] + '</em>'

   296                 if not entrycont.has_key('author'):

   297                     entrycont['author'] = entrycont['editor']

   298                     entrycont['author']['text'] += ', editors'

   299             elif entrytype == 'article':

   300                 entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'

   301             elif entrytype in ('inproceedings', 'incollection', 'conference'):

   302                 entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'

   303             elif entrytype == 'techreport':

   304                 if not entrycont.has_key('type'):

   305                     entrycont['type'] = 'Technical report'

   306             elif entrytype == 'mastersthesis':

   307                 entrycont['type'] = 'Master\'s thesis'

   308             elif entrytype == 'phdthesis':

   309                 entrycont['type'] = 'PhD thesis'

   311             for eline in entrycont:

   312                 if eline != '':

   313                     eline = latexreplacements(eline)

   315             if entrycont.has_key('pages') and (entrycont['pages'] != ''):

   316                 entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')

   318             if entrycont.has_key('author') and (entrycont['author'] != ''):

   319                 entry.append(entrycont['author']['text'] + '.')

   320             if entrycont.has_key('title') and (entrycont['title'] != ''):

   321                 entry.append(entrycont['title'] + '.')

   322             if entrycont.has_key('journal') and (entrycont['journal'] != ''):

   323                 entry.append(entrycont['journal'] + ',')

   324             if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):

   325                 entry.append('In ' + entrycont['booktitle'] + ',')

   326             if entrycont.has_key('type') and (entrycont['type'] != ''):

   327                 eline = entrycont['type']

   328                 if entrycont.has_key('number') and (entrycont['number'] != ''):

   329                     eline += ' ' + entrycont['number']

   330                 eline += ','

   331                 entry.append(eline)

   332             if entrycont.has_key('institution') and (entrycont['institution'] != ''):

   333                 entry.append(entrycont['institution'] + ',')

   334             if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):

   335                 entry.append(entrycont['publisher'] + ',')

   336             if entrycont.has_key('school') and (entrycont['school'] != ''):

   337                 entry.append(entrycont['school'] + ',')

   338             if entrycont.has_key('address') and (entrycont['address'] != ''):

   339                 entry.append(entrycont['address'] + ',')

   340             if entrycont.has_key('edition') and (entrycont['edition'] != ''):

   341                 entry.append(entrycont['edition'] + ' edition,')

   342             if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):

   343                 entry.append(entrycont['howpublished'] + ',')

   344             if entrycont.has_key('volume') and (entrycont['volume'] != ''):

   345                 eline = entrycont['volume'];

   346                 if entrycont.has_key('number') and (entrycont['number'] != ''):

   347                     eline += '(' + entrycont['number'] + ')'

   348                 if entrycont.has_key('pages') and (entrycont['pages'] != ''):

   349                     eline += ':' + entrycont['pages']

   350                 eline += ','

   351                 entry.append(eline)

   352             else:

   353                 if entrycont.has_key('pages') and (entrycont['pages'] != ''):

   354                     entry.append('pages ' + entrycont['pages'] + ',')

   355             if entrycont.has_key('year') and (entrycont['year'] != ''):

   356                 if entrycont.has_key('month') and (entrycont['month'] != ''):

   357                     entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')

   358                 else:

   359                     entry.append(entrycont['year'] + '.')

   360             if entrycont.has_key('note') and (entrycont['note'] != ''):

   361                 entry.append(entrycont['note'] + '.')

   363             # generate keys for sorting and for the output

   364             sortkey = ''

   365             bibkey = ''

   366             if entrycont.has_key('author'):

   367                 for author in entrycont['author']['list']:

   368                     sortkey += copychars(author, author.rfind(' ')+1, len(author))

   369                 bibkey = entrycont['author']['abbrev']

   370             else:

   371                 bibkey = 'x'

   372             if entrycont.has_key('year'):

   373                 sortkey += entrycont['year']

   374                 bibkey += entrycont['year'][-2:]

   375             if entrycont.has_key('title'):

   376                 sortkey += entrycont['title']

   377             if entrycont.has_key('key'):

   378                 sortkey = entrycont['key'] + sortkey

   379                 bibkey = entrycont['key']

   380             entry.insert(0, sortkey)

   381             entry.insert(1, bibkey)

   383             # add the entry to the file contents

   384             filecont.append(entry)

   386         else:

   387             # field, publication info

   388             field = ''

   389             data = ''

   391             # field = {data} entries

   392             if bracedata_rex.match(line):

   393                 field = bracefield_rex.sub('\g<1>', line)

   394                 field = string.lower(field)

   395                 data =  bracedata_rex.sub('\g<2>', line)

   397             # field = "data" entries

   398             elif quotedata_rex.match(line):

   399                 field = quotefield_rex.sub('\g<1>', line)

   400                 field = string.lower(field)

   401                 data =  quotedata_rex.sub('\g<2>', line)

   403             # field = data entries

   404             elif data_rex.match(line):

   405                 field = field_rex.sub('\g<1>', line)

   406                 field = string.lower(field)

   407                 data =  data_rex.sub('\g<2>', line)

   409             if field in ('author', 'editor'):

   410                 entrycont[field] = bibtexauthor(data)

   411                 line = ''

   412             elif field == 'title':

   413                 line = bibtextitle(data, entrytype)

   414             elif field != '':

   415                 line = removebraces(transformurls(data.strip()))

   417             if line != '':

   418                 line = latexreplacements(line)

   419                 entrycont[field] = line

   422     # sort entries

   423     filecont.sort(entry_cmp)

   425     # count the bibtex keys

   426     keytable = {}

   427     counttable = {}

   428     for entry in filecont:

   429         bibkey = entry[1]

   430         if not keytable.has_key(bibkey):

   431             keytable[bibkey] = 1

   432         else:

   433             keytable[bibkey] += 1

   435     for bibkey in keytable.keys():

   436         counttable[bibkey] = 0

   438     # generate output

   439     for entry in filecont:

   440         # generate output key form the bibtex key

   441         bibkey = entry[1]

   442         if keytable[bibkey] == 1:

   443             outkey = bibkey

   444         else:

   445             outkey = bibkey + chr(97 + counttable[bibkey])

   446         counttable[bibkey] += 1

   448         # append the entry code to the output

   449         file.append('<tr valign="top">\n' + \

   450                     '<td>[' + outkey + ']</td>')

   451         file.append('<td>')

   452         file.append('\\anchor ' + outkey)

   453         for line in entry[2:]:

   454             file.append(line)

   455         file.append('</td>\n</tr>')

   456         file.append('')

   458     return file

   461 #

   462 # return 1 iff abbr is in line but not inside braces or quotes

   463 # assumes that abbr appears only once on the line (out of braces and quotes)

   464 #

   465 def verify_out_of_braces(line, abbr):

   467     phrase_split = delimiter_rex.split(line)

   469     abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)

   471     open_brace = 0

   472     open_quote = 0

   474     for phrase in phrase_split:

   475         if phrase == "{":

   476             open_brace = open_brace + 1

   477         elif phrase == "}":

   478             open_brace = open_brace - 1

   479         elif phrase == '"':

   480             if open_quote == 1:

   481                 open_quote = 0

   482             else:

   483                 open_quote = 1

   484         elif abbr_rex.search(phrase):

   485             if open_brace == 0 and open_quote == 0:

   486                 return 1

   488     return 0

   491 #

   492 # a line in the form phrase1 # phrase2 # ... # phrasen

   493 # is returned as phrase1 phrase2 ... phrasen

   494 # with the correct punctuation

   495 # Bug: Doesn't always work with multiple abbreviations plugged in

   496 #

   497 def concat_line(line):

   498     # only look at part after equals

   499     field = field_rex.sub('\g<1>',line)

   500     rest = field_rex.sub('\g<2>',line)

   502     concat_line = field + ' ='

   504     pound_split = concatsplit_rex.split(rest)

   506     phrase_count = 0

   507     length = len(pound_split)

   509     for phrase in pound_split:

   510         phrase = phrase.strip()

   511         if phrase_count != 0:

   512             if phrase.startswith('"') or phrase.startswith('{'):

   513                 phrase = phrase[1:]

   514         elif phrase.startswith('"'):

   515             phrase = phrase.replace('"','{',1)

   517         if phrase_count != length-1:

   518             if phrase.endswith('"') or phrase.endswith('}'):

   519                 phrase = phrase[:-1]

   520         else:

   521             if phrase.endswith('"'):

   522                 phrase = phrase[:-1]

   523                 phrase = phrase + "}"

   524             elif phrase.endswith('",'):

   525                 phrase = phrase[:-2]

   526                 phrase = phrase + "},"

   528         # if phrase did have \#, add the \# back

   529         if phrase.endswith('\\'):

   530             phrase = phrase + "#"

   531         concat_line = concat_line + ' ' + phrase

   533         phrase_count = phrase_count + 1

   535     return concat_line

   538 #

   539 # substitute abbreviations into filecont

   540 # @param filecont_source - string of data from file

   541 #

   542 def bibtex_replace_abbreviations(filecont_source):

   543     filecont = filecont_source.splitlines()

   545     #  These are defined in bibtex, so we'll define them too

   546     abbr_list = ['jan','feb','mar','apr','may','jun',

   547                  'jul','aug','sep','oct','nov','dec']

   548     value_list = ['January','February','March','April',

   549                   'May','June','July','August','September',

   550                   'October','November','December']

   552     abbr_rex = []

   553     total_abbr_count = 0

   555     front = '\\b'

   556     back = '(,?)\\b'

   558     for x in abbr_list:

   559         abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )

   560         total_abbr_count = total_abbr_count + 1

   563     abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',

   564                              re.I)

   566     comment_rex = re.compile('@comment\s*{',re.I)

   567     preamble_rex = re.compile('@preamble\s*{',re.I)

   569     waiting_for_end_string = 0

   570     i = 0

   571     filecont2 = ''

   573     for line in filecont:

   574         if line == ' ' or line == '':

   575             continue

   577         if waiting_for_end_string:

   578             if re.search('}',line):

   579                 waiting_for_end_string = 0

   580                 continue

   582         if abbrdef_rex.search(line):

   583             abbr = abbrdef_rex.sub('\g<1>', line)

   585             if abbr_list.count(abbr) == 0:

   586                 val = abbrdef_rex.sub('\g<2>', line)

   587                 abbr_list.append(abbr)

   588                 value_list.append(string.strip(val))

   589                 abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )

   590                 total_abbr_count = total_abbr_count + 1

   591             waiting_for_end_string = 1

   592             continue

   594         if comment_rex.search(line):

   595             waiting_for_end_string = 1

   596             continue

   598         if preamble_rex.search(line):

   599             waiting_for_end_string = 1

   600             continue

   603         # replace subsequent abbreviations with the value

   604         abbr_count = 0

   606         for x in abbr_list:

   608             if abbr_rex[abbr_count].search(line):

   609                 if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:

   610                     line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)

   611                 # Check for # concatenations

   612                 if concatsplit_rex.search(line):

   613                     line = concat_line(line)

   614             abbr_count = abbr_count + 1

   617         filecont2 = filecont2 + line + '\n'

   618         i = i+1

   621     # Do one final pass over file

   623     # make sure that didn't end up with {" or }" after the substitution

   624     filecont2 = filecont2.replace('{"','{{')

   625     filecont2 = filecont2.replace('"}','}}')

   627     afterquotevalue_rex = re.compile('"\s*,\s*')

   628     afterbrace_rex = re.compile('"\s*}')

   629     afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')

   631     # add new lines to data that changed because of abbreviation substitutions

   632     filecont2 = afterquotevalue_rex.sub('",\n', filecont2)

   633     filecont2 = afterbrace_rex.sub('"\n}', filecont2)

   634     filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)

   636     return filecont2

   638 #

   639 # convert @type( ... ) to @type{ ... }

   640 #

   641 def no_outer_parens(filecont):

   643     # do checking for open parens

   644     # will convert to braces

   645     paren_split = re.split('([(){}])',filecont)

   647     open_paren_count = 0

   648     open_type = 0

   649     look_next = 0

   651     # rebuild filecont

   652     filecont = ''

   654     at_rex = re.compile('@\w*')

   656     for phrase in paren_split:

   657         if look_next == 1:

   658             if phrase == '(':

   659                 phrase = '{'

   660                 open_paren_count = open_paren_count + 1

   661             else:

   662                 open_type = 0

   663             look_next = 0

   665         if phrase == '(':

   666             open_paren_count = open_paren_count + 1

   668         elif phrase == ')':

   669             open_paren_count = open_paren_count - 1

   670             if open_type == 1 and open_paren_count == 0:

   671                 phrase = '}'

   672                 open_type = 0

   674         elif at_rex.search( phrase ):

   675             open_type = 1

   676             look_next = 1

   678         filecont = filecont + phrase

   680     return filecont

   683 #

   684 # make all whitespace into just one space

   685 # format the bibtex file into a usable form.

   686 #

   687 def bibtexwasher(filecont_source):

   689     space_rex = re.compile('\s+')

   690     comment_rex = re.compile('\s*%')

   692     filecont = []

   694     # remove trailing and excessive whitespace

   695     # ignore comments

   696     for line in filecont_source:

   697         line = string.strip(line)

   698         line = space_rex.sub(' ', line)

   699         # ignore comments

   700         if not comment_rex.match(line) and line != '':

   701             filecont.append(' '+ line)

   703     filecont = string.join(filecont, '')

   705     # the file is in one long string

   707     filecont = no_outer_parens(filecont)

   709     #

   710     # split lines according to preferred syntax scheme

   711     #

   712     filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)

   714     # add new lines after commas that are after values

   715     filecont = re.sub('"\s*,', '",\n', filecont)

   716     filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)

   717     filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',

   718                           '\n\n\g<1>\g<2>,\n', filecont)

   720     # add new lines after }

   721     filecont = re.sub('"\s*}','"\n}\n', filecont)

   722     filecont = re.sub('}\s*,','},\n', filecont)

   725     filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)

   727     # character encoding, reserved latex characters

   728     filecont = re.sub('{\\\&}', '&', filecont)

   729     filecont = re.sub('\\\&', '&', filecont)

   731     # do checking for open braces to get format correct

   732     open_brace_count = 0

   733     brace_split = re.split('([{}])',filecont)

   735     # rebuild filecont

   736     filecont = ''

   738     for phrase in brace_split:

   739         if phrase == '{':

   740             open_brace_count = open_brace_count + 1

   741         elif phrase == '}':

   742             open_brace_count = open_brace_count - 1

   743             if open_brace_count == 0:

   744                 filecont = filecont + '\n'

   746         filecont = filecont + phrase

   748     filecont2 = bibtex_replace_abbreviations(filecont)

   750     # gather

   751     filecont = filecont2.splitlines()

   752     i=0

   753     j=0         # count the number of blank lines

   754     for line in filecont:

   755         # ignore blank lines

   756         if line == '' or line == ' ':

   757             j = j+1

   758             continue

   759         filecont[i] = line + '\n'

   760         i = i+1

   762     # get rid of the extra stuff at the end of the array

   763     # (The extra stuff are duplicates that are in the array because

   764     # blank lines were removed.)

   765     length = len( filecont)

   766     filecont[length-j:length] = []

   768     return filecont

   771 def filehandler(filepath):

   772     try:

   773         fd = open(filepath, 'r')

   774         filecont_source = fd.readlines()

   775         fd.close()

   776     except:

   777         print 'Could not open file:', filepath

   778     washeddata = bibtexwasher(filecont_source)

   779     outdata = bibtexdecoder(washeddata)

   780     print '/**'

   781     print '\page references References'

   782     print

   783     print '<table border="0" cellspacing="5px" width="100%">'

   784     print

   785     for line in outdata:

   786         print line

   787     print '</table>'

   788     print

   789     print '*/'

   792 # main program

   794 def main():

   795     import sys

   796     if sys.argv[1:]:

   797         filepath = sys.argv[1]

   798     else:

   799         print "No input file"

   800         sys.exit()

   801     filehandler(filepath)

   803 if __name__ == "__main__": main()

   806 # end python script

author	Alpar Juttner <alpar@cs.elte.hu>
	Sat, 26 Sep 2009 10:15:49 +0200
changeset 791	f8c468367dab
child 792	68792fb2870f
permissions	-rw-r--r--