scripts/bib2dox.py
author Peter Kovacs <kpeter@inf.elte.hu>
Thu, 12 Nov 2009 23:26:13 +0100
changeset 806 fa6f37d7a25b
parent 745 68792fb2870f
child 836 c841ae1aca29
permissions -rwxr-xr-x
Entirely rework CapacityScaling (#180)

- Use the new interface similarly to NetworkSimplex.
- Rework the implementation using an efficient internal structure
for handling the residual network. This improvement made the
code much faster (up to 2-5 times faster on large graphs).
- Handle GEQ supply type (LEQ is not supported).
- Handle negative costs for arcs of finite capacity.
(Note that this algorithm cannot handle arcs of negative cost
and infinite upper bound, thus it returns UNBOUNDED if such
an arc exists.)
- Extend the documentation.
kpeter@743
     1
#!/usr/bin/env /usr/local/Python/bin/python2.1
kpeter@743
     2
"""
kpeter@743
     3
  BibTeX to Doxygen converter
kpeter@743
     4
  Usage: python bib2dox.py bibfile.bib > bibfile.dox
kpeter@743
     5
kpeter@743
     6
  This code is the modification of the BibTeX to XML converter
kpeter@743
     7
  by Vidar Bronken Gundersen et al. See the original copyright notices below. 
kpeter@743
     8
kpeter@743
     9
  **********************************************************************
kpeter@743
    10
kpeter@743
    11
  Decoder for bibliographic data, BibTeX
kpeter@743
    12
  Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
kpeter@743
    13
kpeter@743
    14
  v.8
kpeter@743
    15
  (c)2002-06-23 Vidar Bronken Gundersen
kpeter@743
    16
  http://bibtexml.sf.net/
kpeter@743
    17
  Reuse approved as long as this notification is kept.
kpeter@743
    18
  Licence: GPL.
kpeter@743
    19
kpeter@743
    20
  Contributions/thanks to:
kpeter@743
    21
  Egon Willighagen, http://sf.net/projects/jreferences/
kpeter@743
    22
  Richard Mahoney (for providing a test case)
kpeter@743
    23
kpeter@743
    24
  Editted by Sara Sprenkle to be more robust and handle more bibtex features.
kpeter@743
    25
  (c) 2003-01-15
kpeter@743
    26
kpeter@743
    27
  1.  Changed bibtex: tags to bibxml: tags.
kpeter@743
    28
  2.  Use xmlns:bibxml="http://bibtexml.sf.net/"
kpeter@743
    29
  3.  Allow spaces between @type and first {
kpeter@743
    30
  4.  "author" fields with multiple authors split by " and "
kpeter@743
    31
      are put in separate xml "bibxml:author" tags.
kpeter@743
    32
  5.  Option for Titles: words are capitalized
kpeter@743
    33
      only if first letter in title or capitalized inside braces
kpeter@743
    34
  6.  Removes braces from within field values
kpeter@743
    35
  7.  Ignores comments in bibtex file (including @comment{ or % )
kpeter@743
    36
  8.  Replaces some special latex tags, e.g., replaces ~ with '&#160;'
kpeter@743
    37
  9.  Handles bibtex @string abbreviations
kpeter@743
    38
        --> includes bibtex's default abbreviations for months
kpeter@743
    39
        --> does concatenation of abbr # " more " and " more " # abbr
kpeter@743
    40
  10. Handles @type( ... ) or @type{ ... }
kpeter@743
    41
  11. The keywords field is split on , or ; and put into separate xml
kpeter@743
    42
      "bibxml:keywords" tags
kpeter@743
    43
  12. Ignores @preamble
kpeter@743
    44
kpeter@743
    45
  Known Limitations
kpeter@743
    46
  1.  Does not transform Latex encoding like math mode and special
kpeter@743
    47
      latex symbols.
kpeter@743
    48
  2.  Does not parse author fields into first and last names.
kpeter@743
    49
      E.g., It does not do anything special to an author whose name is
kpeter@743
    50
      in the form LAST_NAME, FIRST_NAME
kpeter@743
    51
      In "author" tag, will show up as
kpeter@743
    52
      <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
kpeter@743
    53
  3.  Does not handle "crossref" fields other than to print
kpeter@743
    54
      <bibxml:crossref>...</bibxml:crossref>
kpeter@743
    55
  4.  Does not inform user of the input's format errors.  You just won't
kpeter@743
    56
      be able to transform the file later with XSL
kpeter@743
    57
kpeter@743
    58
  You will have to manually edit the XML output if you need to handle
kpeter@743
    59
  these (and unknown) limitations.
kpeter@743
    60
kpeter@743
    61
"""
kpeter@743
    62
kpeter@743
    63
import string, re
kpeter@743
    64
kpeter@743
    65
# set of valid name characters
kpeter@743
    66
valid_name_chars = '[\w\-:]'
kpeter@743
    67
kpeter@743
    68
#
kpeter@743
    69
# define global regular expression variables
kpeter@743
    70
#
kpeter@743
    71
author_rex = re.compile('\s+and\s+')
kpeter@743
    72
rembraces_rex = re.compile('[{}]')
kpeter@754
    73
capitalize_rex = re.compile('({[^}]*})')
kpeter@743
    74
kpeter@743
    75
# used by bibtexkeywords(data)
kpeter@743
    76
keywords_rex = re.compile('[,;]')
kpeter@743
    77
kpeter@743
    78
# used by concat_line(line)
kpeter@743
    79
concatsplit_rex = re.compile('\s*#\s*')
kpeter@743
    80
kpeter@743
    81
# split on {, }, or " in verify_out_of_braces
kpeter@743
    82
delimiter_rex = re.compile('([{}"])',re.I)
kpeter@743
    83
kpeter@743
    84
field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
kpeter@743
    85
data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
kpeter@743
    86
kpeter@743
    87
url_rex = re.compile('\\\url\{([^}]*)\}')
kpeter@743
    88
kpeter@745
    89
#
kpeter@745
    90
# styles for html formatting
kpeter@745
    91
#
kpeter@745
    92
divstyle = 'margin-top: -4ex; margin-left: 8em;'
kpeter@743
    93
kpeter@743
    94
#
kpeter@743
    95
# return the string parameter without braces
kpeter@743
    96
#
kpeter@743
    97
def transformurls(str):
kpeter@743
    98
    return url_rex.sub(r'<a href="\1">\1</a>', str)
kpeter@743
    99
kpeter@743
   100
#
kpeter@743
   101
# return the string parameter without braces
kpeter@743
   102
#
kpeter@743
   103
def removebraces(str):
kpeter@743
   104
    return rembraces_rex.sub('', str)
kpeter@743
   105
kpeter@743
   106
#
kpeter@743
   107
# latex-specific replacements
kpeter@743
   108
# (do this after braces were removed)
kpeter@743
   109
#
kpeter@743
   110
def latexreplacements(line):
kpeter@743
   111
    line = string.replace(line, '~', '&nbsp;')
kpeter@743
   112
    line = string.replace(line, '\\\'a', '&aacute;')
kpeter@743
   113
    line = string.replace(line, '\\"a', '&auml;')
kpeter@743
   114
    line = string.replace(line, '\\\'e', '&eacute;')
kpeter@743
   115
    line = string.replace(line, '\\"e', '&euml;')
kpeter@743
   116
    line = string.replace(line, '\\\'i', '&iacute;')
kpeter@743
   117
    line = string.replace(line, '\\"i', '&iuml;')
kpeter@743
   118
    line = string.replace(line, '\\\'o', '&oacute;')
kpeter@743
   119
    line = string.replace(line, '\\"o', '&ouml;')
kpeter@743
   120
    line = string.replace(line, '\\\'u', '&uacute;')
kpeter@743
   121
    line = string.replace(line, '\\"u', '&uuml;')
kpeter@743
   122
    line = string.replace(line, '\\H o', '&otilde;')
kpeter@743
   123
    line = string.replace(line, '\\H u', '&uuml;')   # &utilde; does not exist
kpeter@743
   124
    line = string.replace(line, '\\\'A', '&Aacute;')
kpeter@743
   125
    line = string.replace(line, '\\"A', '&Auml;')
kpeter@743
   126
    line = string.replace(line, '\\\'E', '&Eacute;')
kpeter@743
   127
    line = string.replace(line, '\\"E', '&Euml;')
kpeter@743
   128
    line = string.replace(line, '\\\'I', '&Iacute;')
kpeter@743
   129
    line = string.replace(line, '\\"I', '&Iuml;')
kpeter@743
   130
    line = string.replace(line, '\\\'O', '&Oacute;')
kpeter@743
   131
    line = string.replace(line, '\\"O', '&Ouml;')
kpeter@743
   132
    line = string.replace(line, '\\\'U', '&Uacute;')
kpeter@743
   133
    line = string.replace(line, '\\"U', '&Uuml;')
kpeter@743
   134
    line = string.replace(line, '\\H O', '&Otilde;')
kpeter@743
   135
    line = string.replace(line, '\\H U', '&Uuml;')   # &Utilde; does not exist
kpeter@743
   136
kpeter@743
   137
    return line
kpeter@743
   138
kpeter@743
   139
#
kpeter@743
   140
# copy characters form a string decoding html expressions (&xyz;)
kpeter@743
   141
#
kpeter@743
   142
def copychars(str, ifrom, count):
kpeter@743
   143
    result = ''
kpeter@743
   144
    i = ifrom
kpeter@743
   145
    c = 0
kpeter@743
   146
    html_spec = False
kpeter@743
   147
    while (i < len(str)) and (c < count):
kpeter@743
   148
        if str[i] == '&':
kpeter@743
   149
            html_spec = True;
kpeter@743
   150
            if i+1 < len(str):
kpeter@743
   151
                result += str[i+1]
kpeter@743
   152
            c += 1
kpeter@743
   153
            i += 2
kpeter@743
   154
        else:
kpeter@743
   155
            if not html_spec:
kpeter@743
   156
                if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
kpeter@743
   157
                   ((str[i] >= 'a') and (str[i] <= 'z')):
kpeter@743
   158
                    result += str[i]
kpeter@743
   159
                    c += 1
kpeter@743
   160
            elif str[i] == ';':
kpeter@743
   161
                html_spec = False;
kpeter@743
   162
            i += 1
kpeter@743
   163
    
kpeter@743
   164
    return result
kpeter@743
   165
kpeter@743
   166
kpeter@743
   167
# 
kpeter@743
   168
# Handle a list of authors (separated by 'and').
kpeter@743
   169
# It gives back an array of the follwing values:
kpeter@743
   170
#  - num: the number of authors,
kpeter@743
   171
#  - list: the list of the author names,
kpeter@743
   172
#  - text: the bibtex text (separated by commas and/or 'and')
kpeter@743
   173
#  - abbrev: abbreviation that can be used for indicate the
kpeter@743
   174
#    bibliography entries
kpeter@743
   175
#
kpeter@743
   176
def bibtexauthor(data):
kpeter@743
   177
    result = {}
kpeter@743
   178
    bibtex = ''
kpeter@743
   179
    result['list'] = author_rex.split(data)
kpeter@743
   180
    result['num'] = len(result['list'])
kpeter@743
   181
    for i, author in enumerate(result['list']):
kpeter@743
   182
        # general transformations
kpeter@743
   183
        author = latexreplacements(removebraces(author.strip()))
kpeter@743
   184
        # transform "Xyz, A. B." to "A. B. Xyz"
kpeter@743
   185
        pos = author.find(',')
kpeter@743
   186
        if pos != -1:
kpeter@743
   187
            author = author[pos+1:].strip() + ' ' + author[:pos].strip()
kpeter@743
   188
        result['list'][i] = author
kpeter@743
   189
        bibtex += author + '#'
kpeter@743
   190
    bibtex = bibtex[:-1]
kpeter@743
   191
    if result['num'] > 1:
kpeter@743
   192
        ix = bibtex.rfind('#')
kpeter@743
   193
        if result['num'] == 2:
kpeter@743
   194
            bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
kpeter@743
   195
        else:
kpeter@743
   196
            bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
kpeter@743
   197
    bibtex = bibtex.replace('#', ', ')
kpeter@743
   198
    result['text'] = bibtex
kpeter@743
   199
    
kpeter@743
   200
    result['abbrev'] = ''
kpeter@743
   201
    for author in result['list']:
kpeter@743
   202
        pos = author.rfind(' ') + 1
kpeter@743
   203
        count = 1
kpeter@743
   204
        if result['num'] == 1:
kpeter@743
   205
            count = 3
kpeter@743
   206
        result['abbrev'] += copychars(author, pos, count)
kpeter@743
   207
kpeter@743
   208
    return result
kpeter@743
   209
kpeter@743
   210
kpeter@743
   211
#
kpeter@743
   212
# data = title string
kpeter@743
   213
# @return the capitalized title (first letter is capitalized), rest are capitalized
kpeter@743
   214
# only if capitalized inside braces
kpeter@743
   215
#
kpeter@743
   216
def capitalizetitle(data):
kpeter@743
   217
    title_list = capitalize_rex.split(data)
kpeter@743
   218
    title = ''
kpeter@743
   219
    count = 0
kpeter@743
   220
    for phrase in title_list:
kpeter@743
   221
         check = string.lstrip(phrase)
kpeter@743
   222
kpeter@743
   223
         # keep phrase's capitalization the same
kpeter@743
   224
         if check.find('{') == 0:
kpeter@743
   225
              title += removebraces(phrase)
kpeter@743
   226
         else:
kpeter@743
   227
         # first word --> capitalize first letter (after spaces)
kpeter@743
   228
              if count == 0:
kpeter@743
   229
                  title += check.capitalize()
kpeter@743
   230
              else:
kpeter@743
   231
                  title += phrase.lower()
kpeter@743
   232
         count = count + 1
kpeter@743
   233
kpeter@743
   234
    return title
kpeter@743
   235
kpeter@743
   236
kpeter@743
   237
#
kpeter@743
   238
# @return the bibtex for the title
kpeter@743
   239
# @param data --> title string
kpeter@743
   240
# braces are removed from title
kpeter@743
   241
#
kpeter@743
   242
def bibtextitle(data, entrytype):
kpeter@743
   243
    if entrytype in ('book', 'inbook'):
kpeter@743
   244
        title = removebraces(data.strip())
kpeter@743
   245
    else:
kpeter@743
   246
        title = removebraces(capitalizetitle(data.strip()))
kpeter@743
   247
    bibtex = title
kpeter@743
   248
    return bibtex
kpeter@743
   249
kpeter@743
   250
kpeter@743
   251
#
kpeter@743
   252
# function to compare entry lists
kpeter@743
   253
#
kpeter@743
   254
def entry_cmp(x, y):
kpeter@743
   255
    return cmp(x[0], y[0])
kpeter@743
   256
kpeter@743
   257
kpeter@743
   258
#
kpeter@743
   259
# print the XML for the transformed "filecont_source"
kpeter@743
   260
#
kpeter@743
   261
def bibtexdecoder(filecont_source):
kpeter@743
   262
    filecont = []
kpeter@743
   263
    file = []
kpeter@743
   264
    
kpeter@743
   265
    # want @<alphanumeric chars><spaces>{<spaces><any chars>,
kpeter@743
   266
    pubtype_rex = re.compile('@(\w*)\s*{\s*(.*),')
kpeter@743
   267
    endtype_rex = re.compile('}\s*$')
kpeter@743
   268
    endtag_rex = re.compile('^\s*}\s*$')
kpeter@743
   269
kpeter@743
   270
    bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
kpeter@743
   271
    bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')
kpeter@743
   272
kpeter@743
   273
    quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
kpeter@743
   274
    quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')
kpeter@743
   275
kpeter@743
   276
    for line in filecont_source:
kpeter@743
   277
        line = line[:-1]
kpeter@743
   278
kpeter@743
   279
        # encode character entities
kpeter@743
   280
        line = string.replace(line, '&', '&amp;')
kpeter@743
   281
        line = string.replace(line, '<', '&lt;')
kpeter@743
   282
        line = string.replace(line, '>', '&gt;')
kpeter@743
   283
kpeter@743
   284
        # start entry: publication type (store for later use)
kpeter@743
   285
        if pubtype_rex.match(line):
kpeter@743
   286
        # want @<alphanumeric chars><spaces>{<spaces><any chars>,
kpeter@743
   287
            entrycont = {}
kpeter@743
   288
            entry = []
kpeter@743
   289
            entrytype = pubtype_rex.sub('\g<1>',line)
kpeter@743
   290
            entrytype = string.lower(entrytype)
kpeter@745
   291
            entryid   = pubtype_rex.sub('\g<2>', line)
kpeter@743
   292
kpeter@743
   293
        # end entry if just a }
kpeter@743
   294
        elif endtype_rex.match(line):
kpeter@743
   295
            # generate doxygen code for the entry
kpeter@743
   296
kpeter@743
   297
            # enty type related formattings
kpeter@743
   298
            if entrytype in ('book', 'inbook'):
kpeter@743
   299
                entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
kpeter@743
   300
                if not entrycont.has_key('author'):
kpeter@743
   301
                    entrycont['author'] = entrycont['editor']
kpeter@743
   302
                    entrycont['author']['text'] += ', editors'
kpeter@743
   303
            elif entrytype == 'article':
kpeter@743
   304
                entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
kpeter@743
   305
            elif entrytype in ('inproceedings', 'incollection', 'conference'):
kpeter@743
   306
                entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
kpeter@743
   307
            elif entrytype == 'techreport':
kpeter@743
   308
                if not entrycont.has_key('type'):
kpeter@743
   309
                    entrycont['type'] = 'Technical report'
kpeter@743
   310
            elif entrytype == 'mastersthesis':
kpeter@743
   311
                entrycont['type'] = 'Master\'s thesis'
kpeter@743
   312
            elif entrytype == 'phdthesis':
kpeter@743
   313
                entrycont['type'] = 'PhD thesis'
kpeter@743
   314
kpeter@743
   315
            for eline in entrycont:
kpeter@743
   316
                if eline != '':
kpeter@743
   317
                    eline = latexreplacements(eline)
kpeter@743
   318
kpeter@743
   319
            if entrycont.has_key('pages') and (entrycont['pages'] != ''):
kpeter@743
   320
                entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
kpeter@743
   321
kpeter@743
   322
            if entrycont.has_key('author') and (entrycont['author'] != ''):
kpeter@743
   323
                entry.append(entrycont['author']['text'] + '.')
kpeter@743
   324
            if entrycont.has_key('title') and (entrycont['title'] != ''):
kpeter@743
   325
                entry.append(entrycont['title'] + '.')
kpeter@743
   326
            if entrycont.has_key('journal') and (entrycont['journal'] != ''):
kpeter@743
   327
                entry.append(entrycont['journal'] + ',')
kpeter@743
   328
            if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
kpeter@743
   329
                entry.append('In ' + entrycont['booktitle'] + ',')
kpeter@743
   330
            if entrycont.has_key('type') and (entrycont['type'] != ''):
kpeter@743
   331
                eline = entrycont['type']
kpeter@743
   332
                if entrycont.has_key('number') and (entrycont['number'] != ''):
kpeter@743
   333
                    eline += ' ' + entrycont['number']
kpeter@743
   334
                eline += ','
kpeter@743
   335
                entry.append(eline)
kpeter@743
   336
            if entrycont.has_key('institution') and (entrycont['institution'] != ''):
kpeter@743
   337
                entry.append(entrycont['institution'] + ',')
kpeter@743
   338
            if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
kpeter@743
   339
                entry.append(entrycont['publisher'] + ',')
kpeter@743
   340
            if entrycont.has_key('school') and (entrycont['school'] != ''):
kpeter@743
   341
                entry.append(entrycont['school'] + ',')
kpeter@743
   342
            if entrycont.has_key('address') and (entrycont['address'] != ''):
kpeter@743
   343
                entry.append(entrycont['address'] + ',')
kpeter@743
   344
            if entrycont.has_key('edition') and (entrycont['edition'] != ''):
kpeter@743
   345
                entry.append(entrycont['edition'] + ' edition,')
kpeter@743
   346
            if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
kpeter@743
   347
                entry.append(entrycont['howpublished'] + ',')
kpeter@743
   348
            if entrycont.has_key('volume') and (entrycont['volume'] != ''):
kpeter@743
   349
                eline = entrycont['volume'];
kpeter@743
   350
                if entrycont.has_key('number') and (entrycont['number'] != ''):
kpeter@743
   351
                    eline += '(' + entrycont['number'] + ')'
kpeter@743
   352
                if entrycont.has_key('pages') and (entrycont['pages'] != ''):
kpeter@743
   353
                    eline += ':' + entrycont['pages']
kpeter@743
   354
                eline += ','
kpeter@743
   355
                entry.append(eline)
kpeter@743
   356
            else:
kpeter@743
   357
                if entrycont.has_key('pages') and (entrycont['pages'] != ''):
kpeter@743
   358
                    entry.append('pages ' + entrycont['pages'] + ',')
kpeter@743
   359
            if entrycont.has_key('year') and (entrycont['year'] != ''):
kpeter@743
   360
                if entrycont.has_key('month') and (entrycont['month'] != ''):
kpeter@743
   361
                    entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
kpeter@743
   362
                else:
kpeter@743
   363
                    entry.append(entrycont['year'] + '.')
kpeter@743
   364
            if entrycont.has_key('note') and (entrycont['note'] != ''):
kpeter@743
   365
                entry.append(entrycont['note'] + '.')
kpeter@754
   366
            if entrycont.has_key('url') and (entrycont['url'] != ''):
kpeter@754
   367
                entry.append(entrycont['url'] + '.')
kpeter@743
   368
kpeter@743
   369
            # generate keys for sorting and for the output
kpeter@743
   370
            sortkey = ''
kpeter@743
   371
            bibkey = ''
kpeter@743
   372
            if entrycont.has_key('author'):
kpeter@743
   373
                for author in entrycont['author']['list']:
kpeter@743
   374
                    sortkey += copychars(author, author.rfind(' ')+1, len(author))
kpeter@743
   375
                bibkey = entrycont['author']['abbrev']
kpeter@743
   376
            else:
kpeter@743
   377
                bibkey = 'x'
kpeter@743
   378
            if entrycont.has_key('year'):
kpeter@743
   379
                sortkey += entrycont['year']
kpeter@743
   380
                bibkey += entrycont['year'][-2:]
kpeter@743
   381
            if entrycont.has_key('title'):
kpeter@743
   382
                sortkey += entrycont['title']
kpeter@743
   383
            if entrycont.has_key('key'):
kpeter@743
   384
                sortkey = entrycont['key'] + sortkey
kpeter@743
   385
                bibkey = entrycont['key']
kpeter@743
   386
            entry.insert(0, sortkey)
kpeter@743
   387
            entry.insert(1, bibkey)
kpeter@745
   388
            entry.insert(2, entryid)
kpeter@743
   389
           
kpeter@743
   390
            # add the entry to the file contents
kpeter@743
   391
            filecont.append(entry)
kpeter@743
   392
kpeter@743
   393
        else:
kpeter@743
   394
            # field, publication info
kpeter@743
   395
            field = ''
kpeter@743
   396
            data = ''
kpeter@743
   397
            
kpeter@743
   398
            # field = {data} entries
kpeter@743
   399
            if bracedata_rex.match(line):
kpeter@743
   400
                field = bracefield_rex.sub('\g<1>', line)
kpeter@743
   401
                field = string.lower(field)
kpeter@743
   402
                data =  bracedata_rex.sub('\g<2>', line)
kpeter@743
   403
kpeter@743
   404
            # field = "data" entries
kpeter@743
   405
            elif quotedata_rex.match(line):
kpeter@743
   406
                field = quotefield_rex.sub('\g<1>', line)
kpeter@743
   407
                field = string.lower(field)
kpeter@743
   408
                data =  quotedata_rex.sub('\g<2>', line)
kpeter@743
   409
kpeter@743
   410
            # field = data entries
kpeter@743
   411
            elif data_rex.match(line):
kpeter@743
   412
                field = field_rex.sub('\g<1>', line)
kpeter@743
   413
                field = string.lower(field)
kpeter@743
   414
                data =  data_rex.sub('\g<2>', line)
kpeter@754
   415
kpeter@754
   416
            if field == 'url':
kpeter@754
   417
                data = '\\url{' + data.strip() + '}'
kpeter@743
   418
            
kpeter@743
   419
            if field in ('author', 'editor'):
kpeter@743
   420
                entrycont[field] = bibtexauthor(data)
kpeter@743
   421
                line = ''
kpeter@743
   422
            elif field == 'title':
kpeter@743
   423
                line = bibtextitle(data, entrytype)
kpeter@743
   424
            elif field != '':
kpeter@743
   425
                line = removebraces(transformurls(data.strip()))
kpeter@743
   426
kpeter@743
   427
            if line != '':
kpeter@743
   428
                line = latexreplacements(line)
kpeter@743
   429
                entrycont[field] = line
kpeter@743
   430
kpeter@743
   431
kpeter@743
   432
    # sort entries
kpeter@743
   433
    filecont.sort(entry_cmp)
kpeter@743
   434
    
kpeter@743
   435
    # count the bibtex keys
kpeter@743
   436
    keytable = {}
kpeter@743
   437
    counttable = {}
kpeter@743
   438
    for entry in filecont:
kpeter@743
   439
        bibkey = entry[1]
kpeter@743
   440
        if not keytable.has_key(bibkey):
kpeter@743
   441
            keytable[bibkey] = 1
kpeter@743
   442
        else:
kpeter@743
   443
            keytable[bibkey] += 1
kpeter@743
   444
kpeter@743
   445
    for bibkey in keytable.keys():
kpeter@743
   446
        counttable[bibkey] = 0
kpeter@743
   447
    
kpeter@743
   448
    # generate output
kpeter@743
   449
    for entry in filecont:
kpeter@743
   450
        # generate output key form the bibtex key
kpeter@743
   451
        bibkey = entry[1]
kpeter@745
   452
        entryid = entry[2]
kpeter@743
   453
        if keytable[bibkey] == 1:
kpeter@743
   454
            outkey = bibkey
kpeter@743
   455
        else:
kpeter@743
   456
            outkey = bibkey + chr(97 + counttable[bibkey])
kpeter@743
   457
        counttable[bibkey] += 1
kpeter@743
   458
        
kpeter@743
   459
        # append the entry code to the output
kpeter@745
   460
        file.append('\\section ' + entryid + ' [' + outkey + ']')
kpeter@745
   461
        file.append('<div style="' + divstyle + '">')
kpeter@745
   462
        for line in entry[3:]:
kpeter@743
   463
            file.append(line)
kpeter@745
   464
        file.append('</div>')
kpeter@743
   465
        file.append('')
kpeter@743
   466
kpeter@743
   467
    return file
kpeter@743
   468
kpeter@743
   469
kpeter@743
   470
#
kpeter@743
   471
# return 1 iff abbr is in line but not inside braces or quotes
kpeter@743
   472
# assumes that abbr appears only once on the line (out of braces and quotes)
kpeter@743
   473
#
kpeter@743
   474
def verify_out_of_braces(line, abbr):
kpeter@743
   475
kpeter@743
   476
    phrase_split = delimiter_rex.split(line)
kpeter@743
   477
kpeter@743
   478
    abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
kpeter@743
   479
kpeter@743
   480
    open_brace = 0
kpeter@743
   481
    open_quote = 0
kpeter@743
   482
kpeter@743
   483
    for phrase in phrase_split:
kpeter@743
   484
        if phrase == "{":
kpeter@743
   485
            open_brace = open_brace + 1
kpeter@743
   486
        elif phrase == "}":
kpeter@743
   487
            open_brace = open_brace - 1
kpeter@743
   488
        elif phrase == '"':
kpeter@743
   489
            if open_quote == 1:
kpeter@743
   490
                open_quote = 0
kpeter@743
   491
            else:
kpeter@743
   492
                open_quote = 1
kpeter@743
   493
        elif abbr_rex.search(phrase):
kpeter@743
   494
            if open_brace == 0 and open_quote == 0:
kpeter@743
   495
                return 1
kpeter@743
   496
kpeter@743
   497
    return 0
kpeter@743
   498
kpeter@743
   499
kpeter@743
   500
#
kpeter@743
   501
# a line in the form phrase1 # phrase2 # ... # phrasen
kpeter@743
   502
# is returned as phrase1 phrase2 ... phrasen
kpeter@743
   503
# with the correct punctuation
kpeter@743
   504
# Bug: Doesn't always work with multiple abbreviations plugged in
kpeter@743
   505
#
kpeter@743
   506
def concat_line(line):
kpeter@743
   507
    # only look at part after equals
kpeter@743
   508
    field = field_rex.sub('\g<1>',line)
kpeter@743
   509
    rest = field_rex.sub('\g<2>',line)
kpeter@743
   510
kpeter@743
   511
    concat_line = field + ' ='
kpeter@743
   512
kpeter@743
   513
    pound_split = concatsplit_rex.split(rest)
kpeter@743
   514
kpeter@743
   515
    phrase_count = 0
kpeter@743
   516
    length = len(pound_split)
kpeter@743
   517
kpeter@743
   518
    for phrase in pound_split:
kpeter@743
   519
        phrase = phrase.strip()
kpeter@743
   520
        if phrase_count != 0:
kpeter@743
   521
            if phrase.startswith('"') or phrase.startswith('{'):
kpeter@743
   522
                phrase = phrase[1:]
kpeter@743
   523
        elif phrase.startswith('"'):
kpeter@743
   524
            phrase = phrase.replace('"','{',1)
kpeter@743
   525
kpeter@743
   526
        if phrase_count != length-1:
kpeter@743
   527
            if phrase.endswith('"') or phrase.endswith('}'):
kpeter@743
   528
                phrase = phrase[:-1]
kpeter@743
   529
        else:
kpeter@743
   530
            if phrase.endswith('"'):
kpeter@743
   531
                phrase = phrase[:-1]
kpeter@743
   532
                phrase = phrase + "}"
kpeter@743
   533
            elif phrase.endswith('",'):
kpeter@743
   534
                phrase = phrase[:-2]
kpeter@743
   535
                phrase = phrase + "},"
kpeter@743
   536
kpeter@743
   537
        # if phrase did have \#, add the \# back
kpeter@743
   538
        if phrase.endswith('\\'):
kpeter@743
   539
            phrase = phrase + "#"
kpeter@743
   540
        concat_line = concat_line + ' ' + phrase
kpeter@743
   541
kpeter@743
   542
        phrase_count = phrase_count + 1
kpeter@743
   543
kpeter@743
   544
    return concat_line
kpeter@743
   545
kpeter@743
   546
kpeter@743
   547
#
kpeter@743
   548
# substitute abbreviations into filecont
kpeter@743
   549
# @param filecont_source - string of data from file
kpeter@743
   550
#
kpeter@743
   551
def bibtex_replace_abbreviations(filecont_source):
kpeter@743
   552
    filecont = filecont_source.splitlines()
kpeter@743
   553
kpeter@743
   554
    #  These are defined in bibtex, so we'll define them too
kpeter@743
   555
    abbr_list = ['jan','feb','mar','apr','may','jun',
kpeter@743
   556
                 'jul','aug','sep','oct','nov','dec']
kpeter@743
   557
    value_list = ['January','February','March','April',
kpeter@743
   558
                  'May','June','July','August','September',
kpeter@743
   559
                  'October','November','December']
kpeter@743
   560
kpeter@743
   561
    abbr_rex = []
kpeter@743
   562
    total_abbr_count = 0
kpeter@743
   563
kpeter@743
   564
    front = '\\b'
kpeter@743
   565
    back = '(,?)\\b'
kpeter@743
   566
kpeter@743
   567
    for x in abbr_list:
kpeter@743
   568
        abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
kpeter@743
   569
        total_abbr_count = total_abbr_count + 1
kpeter@743
   570
kpeter@743
   571
kpeter@743
   572
    abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
kpeter@743
   573
                             re.I)
kpeter@743
   574
kpeter@743
   575
    comment_rex = re.compile('@comment\s*{',re.I)
kpeter@743
   576
    preamble_rex = re.compile('@preamble\s*{',re.I)
kpeter@743
   577
kpeter@743
   578
    waiting_for_end_string = 0
kpeter@743
   579
    i = 0
kpeter@743
   580
    filecont2 = ''
kpeter@743
   581
kpeter@743
   582
    for line in filecont:
kpeter@743
   583
        if line == ' ' or line == '':
kpeter@743
   584
            continue
kpeter@743
   585
kpeter@743
   586
        if waiting_for_end_string:
kpeter@743
   587
            if re.search('}',line):
kpeter@743
   588
                waiting_for_end_string = 0
kpeter@743
   589
                continue
kpeter@743
   590
kpeter@743
   591
        if abbrdef_rex.search(line):
kpeter@743
   592
            abbr = abbrdef_rex.sub('\g<1>', line)
kpeter@743
   593
kpeter@743
   594
            if abbr_list.count(abbr) == 0:
kpeter@743
   595
                val = abbrdef_rex.sub('\g<2>', line)
kpeter@743
   596
                abbr_list.append(abbr)
kpeter@743
   597
                value_list.append(string.strip(val))
kpeter@743
   598
                abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
kpeter@743
   599
                total_abbr_count = total_abbr_count + 1
kpeter@743
   600
            waiting_for_end_string = 1
kpeter@743
   601
            continue
kpeter@743
   602
kpeter@743
   603
        if comment_rex.search(line):
kpeter@743
   604
            waiting_for_end_string = 1
kpeter@743
   605
            continue
kpeter@743
   606
kpeter@743
   607
        if preamble_rex.search(line):
kpeter@743
   608
            waiting_for_end_string = 1
kpeter@743
   609
            continue
kpeter@743
   610
kpeter@743
   611
kpeter@743
   612
        # replace subsequent abbreviations with the value
kpeter@743
   613
        abbr_count = 0
kpeter@743
   614
kpeter@743
   615
        for x in abbr_list:
kpeter@743
   616
kpeter@743
   617
            if abbr_rex[abbr_count].search(line):
kpeter@743
   618
                if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
kpeter@743
   619
                    line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
kpeter@743
   620
                # Check for # concatenations
kpeter@743
   621
                if concatsplit_rex.search(line):
kpeter@743
   622
                    line = concat_line(line)
kpeter@743
   623
            abbr_count = abbr_count + 1
kpeter@743
   624
kpeter@743
   625
kpeter@743
   626
        filecont2 = filecont2 + line + '\n'
kpeter@743
   627
        i = i+1
kpeter@743
   628
kpeter@743
   629
kpeter@743
   630
    # Do one final pass over file
kpeter@743
   631
kpeter@743
   632
    # make sure that didn't end up with {" or }" after the substitution
kpeter@743
   633
    filecont2 = filecont2.replace('{"','{{')
kpeter@743
   634
    filecont2 = filecont2.replace('"}','}}')
kpeter@743
   635
kpeter@743
   636
    afterquotevalue_rex = re.compile('"\s*,\s*')
kpeter@743
   637
    afterbrace_rex = re.compile('"\s*}')
kpeter@743
   638
    afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
kpeter@743
   639
kpeter@743
   640
    # add new lines to data that changed because of abbreviation substitutions
kpeter@743
   641
    filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
kpeter@743
   642
    filecont2 = afterbrace_rex.sub('"\n}', filecont2)
kpeter@743
   643
    filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
kpeter@743
   644
kpeter@743
   645
    return filecont2
kpeter@743
   646
kpeter@743
   647
#
kpeter@743
   648
# convert @type( ... ) to @type{ ... }
kpeter@743
   649
#
kpeter@743
   650
def no_outer_parens(filecont):
kpeter@743
   651
kpeter@743
   652
    # do checking for open parens
kpeter@743
   653
    # will convert to braces
kpeter@743
   654
    paren_split = re.split('([(){}])',filecont)
kpeter@743
   655
kpeter@743
   656
    open_paren_count = 0
kpeter@743
   657
    open_type = 0
kpeter@743
   658
    look_next = 0
kpeter@743
   659
kpeter@743
   660
    # rebuild filecont
kpeter@743
   661
    filecont = ''
kpeter@743
   662
kpeter@743
   663
    at_rex = re.compile('@\w*')
kpeter@743
   664
kpeter@743
   665
    for phrase in paren_split:
kpeter@743
   666
        if look_next == 1:
kpeter@743
   667
            if phrase == '(':
kpeter@743
   668
                phrase = '{'
kpeter@743
   669
                open_paren_count = open_paren_count + 1
kpeter@743
   670
            else:
kpeter@743
   671
                open_type = 0
kpeter@743
   672
            look_next = 0
kpeter@743
   673
kpeter@743
   674
        if phrase == '(':
kpeter@743
   675
            open_paren_count = open_paren_count + 1
kpeter@743
   676
kpeter@743
   677
        elif phrase == ')':
kpeter@743
   678
            open_paren_count = open_paren_count - 1
kpeter@743
   679
            if open_type == 1 and open_paren_count == 0:
kpeter@743
   680
                phrase = '}'
kpeter@743
   681
                open_type = 0
kpeter@743
   682
kpeter@743
   683
        elif at_rex.search( phrase ):
kpeter@743
   684
            open_type = 1
kpeter@743
   685
            look_next = 1
kpeter@743
   686
kpeter@743
   687
        filecont = filecont + phrase
kpeter@743
   688
kpeter@743
   689
    return filecont
kpeter@743
   690
kpeter@743
   691
kpeter@743
   692
#
kpeter@743
   693
# make all whitespace into just one space
kpeter@743
   694
# format the bibtex file into a usable form.
kpeter@743
   695
#
kpeter@743
   696
def bibtexwasher(filecont_source):
kpeter@743
   697
kpeter@743
   698
    space_rex = re.compile('\s+')
kpeter@743
   699
    comment_rex = re.compile('\s*%')
kpeter@743
   700
kpeter@743
   701
    filecont = []
kpeter@743
   702
kpeter@743
   703
    # remove trailing and excessive whitespace
kpeter@743
   704
    # ignore comments
kpeter@743
   705
    for line in filecont_source:
kpeter@743
   706
        line = string.strip(line)
kpeter@743
   707
        line = space_rex.sub(' ', line)
kpeter@743
   708
        # ignore comments
kpeter@743
   709
        if not comment_rex.match(line) and line != '':
kpeter@743
   710
            filecont.append(' '+ line)
kpeter@743
   711
kpeter@743
   712
    filecont = string.join(filecont, '')
kpeter@743
   713
kpeter@743
   714
    # the file is in one long string
kpeter@743
   715
kpeter@743
   716
    filecont = no_outer_parens(filecont)
kpeter@743
   717
kpeter@743
   718
    #
kpeter@743
   719
    # split lines according to preferred syntax scheme
kpeter@743
   720
    #
kpeter@743
   721
    filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
kpeter@743
   722
kpeter@743
   723
    # add new lines after commas that are after values
kpeter@743
   724
    filecont = re.sub('"\s*,', '",\n', filecont)
kpeter@743
   725
    filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
kpeter@743
   726
    filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
kpeter@743
   727
                          '\n\n\g<1>\g<2>,\n', filecont)
kpeter@743
   728
kpeter@743
   729
    # add new lines after }
kpeter@743
   730
    filecont = re.sub('"\s*}','"\n}\n', filecont)
kpeter@743
   731
    filecont = re.sub('}\s*,','},\n', filecont)
kpeter@743
   732
kpeter@743
   733
kpeter@743
   734
    filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
kpeter@743
   735
kpeter@743
   736
    # character encoding, reserved latex characters
kpeter@743
   737
    filecont = re.sub('{\\\&}', '&', filecont)
kpeter@743
   738
    filecont = re.sub('\\\&', '&', filecont)
kpeter@743
   739
kpeter@743
   740
    # do checking for open braces to get format correct
kpeter@743
   741
    open_brace_count = 0
kpeter@743
   742
    brace_split = re.split('([{}])',filecont)
kpeter@743
   743
kpeter@743
   744
    # rebuild filecont
kpeter@743
   745
    filecont = ''
kpeter@743
   746
kpeter@743
   747
    for phrase in brace_split:
kpeter@743
   748
        if phrase == '{':
kpeter@743
   749
            open_brace_count = open_brace_count + 1
kpeter@743
   750
        elif phrase == '}':
kpeter@743
   751
            open_brace_count = open_brace_count - 1
kpeter@743
   752
            if open_brace_count == 0:
kpeter@743
   753
                filecont = filecont + '\n'
kpeter@743
   754
kpeter@743
   755
        filecont = filecont + phrase
kpeter@743
   756
kpeter@743
   757
    filecont2 = bibtex_replace_abbreviations(filecont)
kpeter@743
   758
kpeter@743
   759
    # gather
kpeter@743
   760
    filecont = filecont2.splitlines()
kpeter@743
   761
    i=0
kpeter@743
   762
    j=0         # count the number of blank lines
kpeter@743
   763
    for line in filecont:
kpeter@743
   764
        # ignore blank lines
kpeter@743
   765
        if line == '' or line == ' ':
kpeter@743
   766
            j = j+1
kpeter@743
   767
            continue
kpeter@743
   768
        filecont[i] = line + '\n'
kpeter@743
   769
        i = i+1
kpeter@743
   770
kpeter@743
   771
    # get rid of the extra stuff at the end of the array
kpeter@743
   772
    # (The extra stuff are duplicates that are in the array because
kpeter@743
   773
    # blank lines were removed.)
kpeter@743
   774
    length = len( filecont)
kpeter@743
   775
    filecont[length-j:length] = []
kpeter@743
   776
kpeter@743
   777
    return filecont
kpeter@743
   778
kpeter@743
   779
kpeter@743
   780
def filehandler(filepath):
kpeter@743
   781
    try:
kpeter@743
   782
        fd = open(filepath, 'r')
kpeter@743
   783
        filecont_source = fd.readlines()
kpeter@743
   784
        fd.close()
kpeter@743
   785
    except:
kpeter@743
   786
        print 'Could not open file:', filepath
kpeter@743
   787
    washeddata = bibtexwasher(filecont_source)
kpeter@743
   788
    outdata = bibtexdecoder(washeddata)
kpeter@743
   789
    print '/**'
kpeter@743
   790
    print '\page references References'
kpeter@743
   791
    print
kpeter@743
   792
    for line in outdata:
kpeter@743
   793
        print line
kpeter@743
   794
    print '*/'
kpeter@743
   795
kpeter@743
   796
kpeter@743
   797
# main program
kpeter@743
   798
kpeter@743
   799
def main():
kpeter@743
   800
    import sys
kpeter@743
   801
    if sys.argv[1:]:
kpeter@743
   802
        filepath = sys.argv[1]
kpeter@743
   803
    else:
kpeter@743
   804
        print "No input file"
kpeter@743
   805
        sys.exit()
kpeter@743
   806
    filehandler(filepath)
kpeter@743
   807
kpeter@743
   808
if __name__ == "__main__": main()
kpeter@743
   809
kpeter@743
   810
kpeter@743
   811
# end python script