| 
kpeter@836
 | 
     1  | 
#! /usr/bin/env python
  | 
| 
kpeter@743
 | 
     2  | 
"""
  | 
| 
kpeter@743
 | 
     3  | 
  BibTeX to Doxygen converter
  | 
| 
kpeter@743
 | 
     4  | 
  Usage: python bib2dox.py bibfile.bib > bibfile.dox
  | 
| 
kpeter@743
 | 
     5  | 
  | 
| 
kpeter@836
 | 
     6  | 
  This file is a part of LEMON, a generic C++ optimization library.
  | 
| 
kpeter@836
 | 
     7  | 
  | 
| 
kpeter@836
 | 
     8  | 
  **********************************************************************
  | 
| 
kpeter@836
 | 
     9  | 
  | 
| 
kpeter@743
 | 
    10  | 
  This code is the modification of the BibTeX to XML converter
  | 
| 
kpeter@836
 | 
    11  | 
  by Vidar Bronken Gundersen et al.
  | 
| 
kpeter@836
 | 
    12  | 
  See the original copyright notices below. 
  | 
| 
kpeter@743
 | 
    13  | 
  | 
| 
kpeter@743
 | 
    14  | 
  **********************************************************************
  | 
| 
kpeter@743
 | 
    15  | 
  | 
| 
kpeter@743
 | 
    16  | 
  Decoder for bibliographic data, BibTeX
  | 
| 
kpeter@743
 | 
    17  | 
  Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
  | 
| 
kpeter@743
 | 
    18  | 
  | 
| 
kpeter@743
 | 
    19  | 
  v.8
  | 
| 
kpeter@743
 | 
    20  | 
  (c)2002-06-23 Vidar Bronken Gundersen
  | 
| 
kpeter@743
 | 
    21  | 
  http://bibtexml.sf.net/
  | 
| 
kpeter@743
 | 
    22  | 
  Reuse approved as long as this notification is kept.
  | 
| 
kpeter@743
 | 
    23  | 
  Licence: GPL.
  | 
| 
kpeter@743
 | 
    24  | 
  | 
| 
kpeter@743
 | 
    25  | 
  Contributions/thanks to:
  | 
| 
kpeter@743
 | 
    26  | 
  Egon Willighagen, http://sf.net/projects/jreferences/
  | 
| 
kpeter@743
 | 
    27  | 
  Richard Mahoney (for providing a test case)
  | 
| 
kpeter@743
 | 
    28  | 
  | 
| 
kpeter@743
 | 
    29  | 
  Editted by Sara Sprenkle to be more robust and handle more bibtex features.
  | 
| 
kpeter@743
 | 
    30  | 
  (c) 2003-01-15
  | 
| 
kpeter@743
 | 
    31  | 
  | 
| 
kpeter@743
 | 
    32  | 
  1.  Changed bibtex: tags to bibxml: tags.
  | 
| 
kpeter@743
 | 
    33  | 
  2.  Use xmlns:bibxml="http://bibtexml.sf.net/"
  | 
| 
kpeter@743
 | 
    34  | 
  3.  Allow spaces between @type and first {
 | 
| 
kpeter@743
 | 
    35  | 
  4.  "author" fields with multiple authors split by " and "
  | 
| 
kpeter@743
 | 
    36  | 
      are put in separate xml "bibxml:author" tags.
  | 
| 
kpeter@743
 | 
    37  | 
  5.  Option for Titles: words are capitalized
  | 
| 
kpeter@743
 | 
    38  | 
      only if first letter in title or capitalized inside braces
  | 
| 
kpeter@743
 | 
    39  | 
  6.  Removes braces from within field values
  | 
| 
kpeter@743
 | 
    40  | 
  7.  Ignores comments in bibtex file (including @comment{ or % )
 | 
| 
kpeter@743
 | 
    41  | 
  8.  Replaces some special latex tags, e.g., replaces ~ with ' '
  | 
| 
kpeter@743
 | 
    42  | 
  9.  Handles bibtex @string abbreviations
  | 
| 
kpeter@743
 | 
    43  | 
        --> includes bibtex's default abbreviations for months
  | 
| 
kpeter@743
 | 
    44  | 
        --> does concatenation of abbr # " more " and " more " # abbr
  | 
| 
kpeter@743
 | 
    45  | 
  10. Handles @type( ... ) or @type{ ... }
 | 
| 
kpeter@743
 | 
    46  | 
  11. The keywords field is split on , or ; and put into separate xml
  | 
| 
kpeter@743
 | 
    47  | 
      "bibxml:keywords" tags
  | 
| 
kpeter@743
 | 
    48  | 
  12. Ignores @preamble
  | 
| 
kpeter@743
 | 
    49  | 
  | 
| 
kpeter@743
 | 
    50  | 
  Known Limitations
  | 
| 
kpeter@743
 | 
    51  | 
  1.  Does not transform Latex encoding like math mode and special
  | 
| 
kpeter@743
 | 
    52  | 
      latex symbols.
  | 
| 
kpeter@743
 | 
    53  | 
  2.  Does not parse author fields into first and last names.
  | 
| 
kpeter@743
 | 
    54  | 
      E.g., It does not do anything special to an author whose name is
  | 
| 
kpeter@743
 | 
    55  | 
      in the form LAST_NAME, FIRST_NAME
  | 
| 
kpeter@743
 | 
    56  | 
      In "author" tag, will show up as
  | 
| 
kpeter@743
 | 
    57  | 
      <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
  | 
| 
kpeter@743
 | 
    58  | 
  3.  Does not handle "crossref" fields other than to print
  | 
| 
kpeter@743
 | 
    59  | 
      <bibxml:crossref>...</bibxml:crossref>
  | 
| 
kpeter@743
 | 
    60  | 
  4.  Does not inform user of the input's format errors.  You just won't
  | 
| 
kpeter@743
 | 
    61  | 
      be able to transform the file later with XSL
  | 
| 
kpeter@743
 | 
    62  | 
  | 
| 
kpeter@743
 | 
    63  | 
  You will have to manually edit the XML output if you need to handle
  | 
| 
kpeter@743
 | 
    64  | 
  these (and unknown) limitations.
  | 
| 
kpeter@743
 | 
    65  | 
  | 
| 
kpeter@743
 | 
    66  | 
"""
  | 
| 
kpeter@743
 | 
    67  | 
  | 
| 
kpeter@743
 | 
    68  | 
import string, re
  | 
| 
kpeter@743
 | 
    69  | 
  | 
| 
kpeter@743
 | 
    70  | 
# set of valid name characters
  | 
| 
kpeter@743
 | 
    71  | 
valid_name_chars = '[\w\-:]'
  | 
| 
kpeter@743
 | 
    72  | 
  | 
| 
kpeter@743
 | 
    73  | 
#
  | 
| 
kpeter@743
 | 
    74  | 
# define global regular expression variables
  | 
| 
kpeter@743
 | 
    75  | 
#
  | 
| 
kpeter@743
 | 
    76  | 
author_rex = re.compile('\s+and\s+')
 | 
| 
kpeter@743
 | 
    77  | 
rembraces_rex = re.compile('[{}]')
 | 
| 
kpeter@754
 | 
    78  | 
capitalize_rex = re.compile('({[^}]*})')
 | 
| 
kpeter@743
 | 
    79  | 
  | 
| 
kpeter@743
 | 
    80  | 
# used by bibtexkeywords(data)
  | 
| 
kpeter@743
 | 
    81  | 
keywords_rex = re.compile('[,;]')
 | 
| 
kpeter@743
 | 
    82  | 
  | 
| 
kpeter@743
 | 
    83  | 
# used by concat_line(line)
  | 
| 
kpeter@743
 | 
    84  | 
concatsplit_rex = re.compile('\s*#\s*')
 | 
| 
kpeter@743
 | 
    85  | 
  | 
| 
kpeter@743
 | 
    86  | 
# split on {, }, or " in verify_out_of_braces
 | 
| 
kpeter@743
 | 
    87  | 
delimiter_rex = re.compile('([{}"])',re.I)
 | 
| 
kpeter@743
 | 
    88  | 
  | 
| 
kpeter@743
 | 
    89  | 
field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
 | 
| 
kpeter@743
 | 
    90  | 
data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
 | 
| 
kpeter@743
 | 
    91  | 
  | 
| 
kpeter@743
 | 
    92  | 
url_rex = re.compile('\\\url\{([^}]*)\}')
 | 
| 
kpeter@743
 | 
    93  | 
  | 
| 
kpeter@745
 | 
    94  | 
#
  | 
| 
kpeter@745
 | 
    95  | 
# styles for html formatting
  | 
| 
kpeter@745
 | 
    96  | 
#
  | 
| 
kpeter@745
 | 
    97  | 
divstyle = 'margin-top: -4ex; margin-left: 8em;'
  | 
| 
kpeter@743
 | 
    98  | 
  | 
| 
kpeter@743
 | 
    99  | 
#
  | 
| 
kpeter@743
 | 
   100  | 
# return the string parameter without braces
  | 
| 
kpeter@743
 | 
   101  | 
#
  | 
| 
kpeter@743
 | 
   102  | 
def transformurls(str):
  | 
| 
kpeter@743
 | 
   103  | 
    return url_rex.sub(r'<a href="\1">\1</a>', str)
  | 
| 
kpeter@743
 | 
   104  | 
  | 
| 
kpeter@743
 | 
   105  | 
#
  | 
| 
kpeter@743
 | 
   106  | 
# return the string parameter without braces
  | 
| 
kpeter@743
 | 
   107  | 
#
  | 
| 
kpeter@743
 | 
   108  | 
def removebraces(str):
  | 
| 
kpeter@743
 | 
   109  | 
    return rembraces_rex.sub('', str)
 | 
| 
kpeter@743
 | 
   110  | 
  | 
| 
kpeter@743
 | 
   111  | 
#
  | 
| 
kpeter@743
 | 
   112  | 
# latex-specific replacements
  | 
| 
kpeter@743
 | 
   113  | 
# (do this after braces were removed)
  | 
| 
kpeter@743
 | 
   114  | 
#
  | 
| 
kpeter@743
 | 
   115  | 
def latexreplacements(line):
  | 
| 
kpeter@743
 | 
   116  | 
    line = string.replace(line, '~', ' ')
  | 
| 
kpeter@743
 | 
   117  | 
    line = string.replace(line, '\\\'a', 'á')
  | 
| 
kpeter@743
 | 
   118  | 
    line = string.replace(line, '\\"a', 'ä')
  | 
| 
kpeter@743
 | 
   119  | 
    line = string.replace(line, '\\\'e', 'é')
  | 
| 
kpeter@743
 | 
   120  | 
    line = string.replace(line, '\\"e', 'ë')
  | 
| 
kpeter@743
 | 
   121  | 
    line = string.replace(line, '\\\'i', 'í')
  | 
| 
kpeter@743
 | 
   122  | 
    line = string.replace(line, '\\"i', 'ï')
  | 
| 
kpeter@743
 | 
   123  | 
    line = string.replace(line, '\\\'o', 'ó')
  | 
| 
kpeter@743
 | 
   124  | 
    line = string.replace(line, '\\"o', 'ö')
  | 
| 
kpeter@743
 | 
   125  | 
    line = string.replace(line, '\\\'u', 'ú')
  | 
| 
kpeter@743
 | 
   126  | 
    line = string.replace(line, '\\"u', 'ü')
  | 
| 
kpeter@743
 | 
   127  | 
    line = string.replace(line, '\\H o', 'õ')
  | 
| 
kpeter@743
 | 
   128  | 
    line = string.replace(line, '\\H u', 'ü')   # ũ does not exist
  | 
| 
kpeter@743
 | 
   129  | 
    line = string.replace(line, '\\\'A', 'Á')
  | 
| 
kpeter@743
 | 
   130  | 
    line = string.replace(line, '\\"A', 'Ä')
  | 
| 
kpeter@743
 | 
   131  | 
    line = string.replace(line, '\\\'E', 'É')
  | 
| 
kpeter@743
 | 
   132  | 
    line = string.replace(line, '\\"E', 'Ë')
  | 
| 
kpeter@743
 | 
   133  | 
    line = string.replace(line, '\\\'I', 'Í')
  | 
| 
kpeter@743
 | 
   134  | 
    line = string.replace(line, '\\"I', 'Ï')
  | 
| 
kpeter@743
 | 
   135  | 
    line = string.replace(line, '\\\'O', 'Ó')
  | 
| 
kpeter@743
 | 
   136  | 
    line = string.replace(line, '\\"O', 'Ö')
  | 
| 
kpeter@743
 | 
   137  | 
    line = string.replace(line, '\\\'U', 'Ú')
  | 
| 
kpeter@743
 | 
   138  | 
    line = string.replace(line, '\\"U', 'Ü')
  | 
| 
kpeter@743
 | 
   139  | 
    line = string.replace(line, '\\H O', 'Õ')
  | 
| 
kpeter@743
 | 
   140  | 
    line = string.replace(line, '\\H U', 'Ü')   # Ũ does not exist
  | 
| 
kpeter@743
 | 
   141  | 
  | 
| 
kpeter@743
 | 
   142  | 
    return line
  | 
| 
kpeter@743
 | 
   143  | 
  | 
| 
kpeter@743
 | 
   144  | 
#
  | 
| 
kpeter@743
 | 
   145  | 
# copy characters form a string decoding html expressions (&xyz;)
  | 
| 
kpeter@743
 | 
   146  | 
#
  | 
| 
kpeter@743
 | 
   147  | 
def copychars(str, ifrom, count):
  | 
| 
kpeter@743
 | 
   148  | 
    result = ''
  | 
| 
kpeter@743
 | 
   149  | 
    i = ifrom
  | 
| 
kpeter@743
 | 
   150  | 
    c = 0
  | 
| 
kpeter@743
 | 
   151  | 
    html_spec = False
  | 
| 
kpeter@743
 | 
   152  | 
    while (i < len(str)) and (c < count):
  | 
| 
kpeter@743
 | 
   153  | 
        if str[i] == '&':
  | 
| 
kpeter@743
 | 
   154  | 
            html_spec = True;
  | 
| 
kpeter@743
 | 
   155  | 
            if i+1 < len(str):
  | 
| 
kpeter@743
 | 
   156  | 
                result += str[i+1]
  | 
| 
kpeter@743
 | 
   157  | 
            c += 1
  | 
| 
kpeter@743
 | 
   158  | 
            i += 2
  | 
| 
kpeter@743
 | 
   159  | 
        else:
  | 
| 
kpeter@743
 | 
   160  | 
            if not html_spec:
  | 
| 
kpeter@743
 | 
   161  | 
                if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
  | 
| 
kpeter@743
 | 
   162  | 
                   ((str[i] >= 'a') and (str[i] <= 'z')):
  | 
| 
kpeter@743
 | 
   163  | 
                    result += str[i]
  | 
| 
kpeter@743
 | 
   164  | 
                    c += 1
  | 
| 
kpeter@743
 | 
   165  | 
            elif str[i] == ';':
  | 
| 
kpeter@743
 | 
   166  | 
                html_spec = False;
  | 
| 
kpeter@743
 | 
   167  | 
            i += 1
  | 
| 
kpeter@743
 | 
   168  | 
    
  | 
| 
kpeter@743
 | 
   169  | 
    return result
  | 
| 
kpeter@743
 | 
   170  | 
  | 
| 
kpeter@743
 | 
   171  | 
  | 
| 
kpeter@743
 | 
   172  | 
# 
  | 
| 
kpeter@743
 | 
   173  | 
# Handle a list of authors (separated by 'and').
  | 
| 
kpeter@743
 | 
   174  | 
# It gives back an array of the follwing values:
  | 
| 
kpeter@743
 | 
   175  | 
#  - num: the number of authors,
  | 
| 
kpeter@743
 | 
   176  | 
#  - list: the list of the author names,
  | 
| 
kpeter@743
 | 
   177  | 
#  - text: the bibtex text (separated by commas and/or 'and')
  | 
| 
kpeter@743
 | 
   178  | 
#  - abbrev: abbreviation that can be used for indicate the
  | 
| 
kpeter@743
 | 
   179  | 
#    bibliography entries
  | 
| 
kpeter@743
 | 
   180  | 
#
  | 
| 
kpeter@743
 | 
   181  | 
def bibtexauthor(data):
  | 
| 
kpeter@743
 | 
   182  | 
    result = {}
 | 
| 
kpeter@743
 | 
   183  | 
    bibtex = ''
  | 
| 
kpeter@743
 | 
   184  | 
    result['list'] = author_rex.split(data)
  | 
| 
kpeter@743
 | 
   185  | 
    result['num'] = len(result['list'])
  | 
| 
kpeter@743
 | 
   186  | 
    for i, author in enumerate(result['list']):
  | 
| 
kpeter@743
 | 
   187  | 
        # general transformations
  | 
| 
kpeter@743
 | 
   188  | 
        author = latexreplacements(removebraces(author.strip()))
  | 
| 
kpeter@743
 | 
   189  | 
        # transform "Xyz, A. B." to "A. B. Xyz"
  | 
| 
kpeter@743
 | 
   190  | 
        pos = author.find(',')
 | 
| 
kpeter@743
 | 
   191  | 
        if pos != -1:
  | 
| 
kpeter@743
 | 
   192  | 
            author = author[pos+1:].strip() + ' ' + author[:pos].strip()
  | 
| 
kpeter@743
 | 
   193  | 
        result['list'][i] = author
  | 
| 
kpeter@743
 | 
   194  | 
        bibtex += author + '#'
  | 
| 
kpeter@743
 | 
   195  | 
    bibtex = bibtex[:-1]
  | 
| 
kpeter@743
 | 
   196  | 
    if result['num'] > 1:
  | 
| 
kpeter@743
 | 
   197  | 
        ix = bibtex.rfind('#')
 | 
| 
kpeter@743
 | 
   198  | 
        if result['num'] == 2:
  | 
| 
kpeter@743
 | 
   199  | 
            bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
  | 
| 
kpeter@743
 | 
   200  | 
        else:
  | 
| 
kpeter@743
 | 
   201  | 
            bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
  | 
| 
kpeter@743
 | 
   202  | 
    bibtex = bibtex.replace('#', ', ')
 | 
| 
kpeter@743
 | 
   203  | 
    result['text'] = bibtex
  | 
| 
kpeter@743
 | 
   204  | 
    
  | 
| 
kpeter@743
 | 
   205  | 
    result['abbrev'] = ''
  | 
| 
kpeter@743
 | 
   206  | 
    for author in result['list']:
  | 
| 
kpeter@743
 | 
   207  | 
        pos = author.rfind(' ') + 1
 | 
| 
kpeter@743
 | 
   208  | 
        count = 1
  | 
| 
kpeter@743
 | 
   209  | 
        if result['num'] == 1:
  | 
| 
kpeter@743
 | 
   210  | 
            count = 3
  | 
| 
kpeter@743
 | 
   211  | 
        result['abbrev'] += copychars(author, pos, count)
  | 
| 
kpeter@743
 | 
   212  | 
  | 
| 
kpeter@743
 | 
   213  | 
    return result
  | 
| 
kpeter@743
 | 
   214  | 
  | 
| 
kpeter@743
 | 
   215  | 
  | 
| 
kpeter@743
 | 
   216  | 
#
  | 
| 
kpeter@743
 | 
   217  | 
# data = title string
  | 
| 
kpeter@743
 | 
   218  | 
# @return the capitalized title (first letter is capitalized), rest are capitalized
  | 
| 
kpeter@743
 | 
   219  | 
# only if capitalized inside braces
  | 
| 
kpeter@743
 | 
   220  | 
#
  | 
| 
kpeter@743
 | 
   221  | 
def capitalizetitle(data):
  | 
| 
kpeter@743
 | 
   222  | 
    title_list = capitalize_rex.split(data)
  | 
| 
kpeter@743
 | 
   223  | 
    title = ''
  | 
| 
kpeter@743
 | 
   224  | 
    count = 0
  | 
| 
kpeter@743
 | 
   225  | 
    for phrase in title_list:
  | 
| 
kpeter@743
 | 
   226  | 
         check = string.lstrip(phrase)
  | 
| 
kpeter@743
 | 
   227  | 
  | 
| 
kpeter@743
 | 
   228  | 
         # keep phrase's capitalization the same
  | 
| 
kpeter@743
 | 
   229  | 
         if check.find('{') == 0:
 | 
| 
kpeter@743
 | 
   230  | 
              title += removebraces(phrase)
  | 
| 
kpeter@743
 | 
   231  | 
         else:
  | 
| 
kpeter@743
 | 
   232  | 
         # first word --> capitalize first letter (after spaces)
  | 
| 
kpeter@743
 | 
   233  | 
              if count == 0:
  | 
| 
kpeter@743
 | 
   234  | 
                  title += check.capitalize()
  | 
| 
kpeter@743
 | 
   235  | 
              else:
  | 
| 
kpeter@743
 | 
   236  | 
                  title += phrase.lower()
  | 
| 
kpeter@743
 | 
   237  | 
         count = count + 1
  | 
| 
kpeter@743
 | 
   238  | 
  | 
| 
kpeter@743
 | 
   239  | 
    return title
  | 
| 
kpeter@743
 | 
   240  | 
  | 
| 
kpeter@743
 | 
   241  | 
  | 
| 
kpeter@743
 | 
   242  | 
#
  | 
| 
kpeter@743
 | 
   243  | 
# @return the bibtex for the title
  | 
| 
kpeter@743
 | 
   244  | 
# @param data --> title string
  | 
| 
kpeter@743
 | 
   245  | 
# braces are removed from title
  | 
| 
kpeter@743
 | 
   246  | 
#
  | 
| 
kpeter@743
 | 
   247  | 
def bibtextitle(data, entrytype):
  | 
| 
kpeter@743
 | 
   248  | 
    if entrytype in ('book', 'inbook'):
 | 
| 
kpeter@743
 | 
   249  | 
        title = removebraces(data.strip())
  | 
| 
kpeter@743
 | 
   250  | 
    else:
  | 
| 
kpeter@743
 | 
   251  | 
        title = removebraces(capitalizetitle(data.strip()))
  | 
| 
kpeter@743
 | 
   252  | 
    bibtex = title
  | 
| 
kpeter@743
 | 
   253  | 
    return bibtex
  | 
| 
kpeter@743
 | 
   254  | 
  | 
| 
kpeter@743
 | 
   255  | 
  | 
| 
kpeter@743
 | 
   256  | 
#
  | 
| 
kpeter@743
 | 
   257  | 
# function to compare entry lists
  | 
| 
kpeter@743
 | 
   258  | 
#
  | 
| 
kpeter@743
 | 
   259  | 
def entry_cmp(x, y):
  | 
| 
kpeter@743
 | 
   260  | 
    return cmp(x[0], y[0])
  | 
| 
kpeter@743
 | 
   261  | 
  | 
| 
kpeter@743
 | 
   262  | 
  | 
| 
kpeter@743
 | 
   263  | 
#
  | 
| 
kpeter@743
 | 
   264  | 
# print the XML for the transformed "filecont_source"
  | 
| 
kpeter@743
 | 
   265  | 
#
  | 
| 
kpeter@743
 | 
   266  | 
def bibtexdecoder(filecont_source):
  | 
| 
kpeter@743
 | 
   267  | 
    filecont = []
  | 
| 
kpeter@743
 | 
   268  | 
    file = []
  | 
| 
kpeter@743
 | 
   269  | 
    
  | 
| 
kpeter@743
 | 
   270  | 
    # want @<alphanumeric chars><spaces>{<spaces><any chars>,
 | 
| 
kpeter@743
 | 
   271  | 
    pubtype_rex = re.compile('@(\w*)\s*{\s*(.*),')
 | 
| 
kpeter@743
 | 
   272  | 
    endtype_rex = re.compile('}\s*$')
 | 
| 
kpeter@743
 | 
   273  | 
    endtag_rex = re.compile('^\s*}\s*$')
 | 
| 
kpeter@743
 | 
   274  | 
  | 
| 
kpeter@743
 | 
   275  | 
    bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
 | 
| 
kpeter@743
 | 
   276  | 
    bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')
 | 
| 
kpeter@743
 | 
   277  | 
  | 
| 
kpeter@743
 | 
   278  | 
    quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
 | 
| 
kpeter@743
 | 
   279  | 
    quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')
 | 
| 
kpeter@743
 | 
   280  | 
  | 
| 
kpeter@743
 | 
   281  | 
    for line in filecont_source:
  | 
| 
kpeter@743
 | 
   282  | 
        line = line[:-1]
  | 
| 
kpeter@743
 | 
   283  | 
  | 
| 
kpeter@743
 | 
   284  | 
        # encode character entities
  | 
| 
kpeter@743
 | 
   285  | 
        line = string.replace(line, '&', '&')
  | 
| 
kpeter@743
 | 
   286  | 
        line = string.replace(line, '<', '<')
  | 
| 
kpeter@743
 | 
   287  | 
        line = string.replace(line, '>', '>')
  | 
| 
kpeter@743
 | 
   288  | 
  | 
| 
kpeter@743
 | 
   289  | 
        # start entry: publication type (store for later use)
  | 
| 
kpeter@743
 | 
   290  | 
        if pubtype_rex.match(line):
  | 
| 
kpeter@743
 | 
   291  | 
        # want @<alphanumeric chars><spaces>{<spaces><any chars>,
 | 
| 
kpeter@743
 | 
   292  | 
            entrycont = {}
 | 
| 
kpeter@743
 | 
   293  | 
            entry = []
  | 
| 
kpeter@743
 | 
   294  | 
            entrytype = pubtype_rex.sub('\g<1>',line)
 | 
| 
kpeter@743
 | 
   295  | 
            entrytype = string.lower(entrytype)
  | 
| 
kpeter@745
 | 
   296  | 
            entryid   = pubtype_rex.sub('\g<2>', line)
 | 
| 
kpeter@743
 | 
   297  | 
  | 
| 
kpeter@743
 | 
   298  | 
        # end entry if just a }
  | 
| 
kpeter@743
 | 
   299  | 
        elif endtype_rex.match(line):
  | 
| 
kpeter@743
 | 
   300  | 
            # generate doxygen code for the entry
  | 
| 
kpeter@743
 | 
   301  | 
  | 
| 
kpeter@743
 | 
   302  | 
            # enty type related formattings
  | 
| 
kpeter@743
 | 
   303  | 
            if entrytype in ('book', 'inbook'):
 | 
| 
kpeter@743
 | 
   304  | 
                entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
  | 
| 
kpeter@743
 | 
   305  | 
                if not entrycont.has_key('author'):
 | 
| 
kpeter@743
 | 
   306  | 
                    entrycont['author'] = entrycont['editor']
  | 
| 
kpeter@743
 | 
   307  | 
                    entrycont['author']['text'] += ', editors'
  | 
| 
kpeter@743
 | 
   308  | 
            elif entrytype == 'article':
  | 
| 
kpeter@743
 | 
   309  | 
                entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
  | 
| 
kpeter@743
 | 
   310  | 
            elif entrytype in ('inproceedings', 'incollection', 'conference'):
 | 
| 
kpeter@743
 | 
   311  | 
                entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
  | 
| 
kpeter@743
 | 
   312  | 
            elif entrytype == 'techreport':
  | 
| 
kpeter@743
 | 
   313  | 
                if not entrycont.has_key('type'):
 | 
| 
kpeter@743
 | 
   314  | 
                    entrycont['type'] = 'Technical report'
  | 
| 
kpeter@743
 | 
   315  | 
            elif entrytype == 'mastersthesis':
  | 
| 
kpeter@743
 | 
   316  | 
                entrycont['type'] = 'Master\'s thesis'
  | 
| 
kpeter@743
 | 
   317  | 
            elif entrytype == 'phdthesis':
  | 
| 
kpeter@743
 | 
   318  | 
                entrycont['type'] = 'PhD thesis'
  | 
| 
kpeter@743
 | 
   319  | 
  | 
| 
kpeter@743
 | 
   320  | 
            for eline in entrycont:
  | 
| 
kpeter@743
 | 
   321  | 
                if eline != '':
  | 
| 
kpeter@743
 | 
   322  | 
                    eline = latexreplacements(eline)
  | 
| 
kpeter@743
 | 
   323  | 
  | 
| 
kpeter@743
 | 
   324  | 
            if entrycont.has_key('pages') and (entrycont['pages'] != ''):
 | 
| 
kpeter@743
 | 
   325  | 
                entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
  | 
| 
kpeter@743
 | 
   326  | 
  | 
| 
kpeter@743
 | 
   327  | 
            if entrycont.has_key('author') and (entrycont['author'] != ''):
 | 
| 
kpeter@743
 | 
   328  | 
                entry.append(entrycont['author']['text'] + '.')
  | 
| 
kpeter@743
 | 
   329  | 
            if entrycont.has_key('title') and (entrycont['title'] != ''):
 | 
| 
kpeter@743
 | 
   330  | 
                entry.append(entrycont['title'] + '.')
  | 
| 
kpeter@743
 | 
   331  | 
            if entrycont.has_key('journal') and (entrycont['journal'] != ''):
 | 
| 
kpeter@743
 | 
   332  | 
                entry.append(entrycont['journal'] + ',')
  | 
| 
kpeter@743
 | 
   333  | 
            if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
 | 
| 
kpeter@743
 | 
   334  | 
                entry.append('In ' + entrycont['booktitle'] + ',')
 | 
| 
kpeter@743
 | 
   335  | 
            if entrycont.has_key('type') and (entrycont['type'] != ''):
 | 
| 
kpeter@743
 | 
   336  | 
                eline = entrycont['type']
  | 
| 
kpeter@743
 | 
   337  | 
                if entrycont.has_key('number') and (entrycont['number'] != ''):
 | 
| 
kpeter@743
 | 
   338  | 
                    eline += ' ' + entrycont['number']
  | 
| 
kpeter@743
 | 
   339  | 
                eline += ','
  | 
| 
kpeter@743
 | 
   340  | 
                entry.append(eline)
  | 
| 
kpeter@743
 | 
   341  | 
            if entrycont.has_key('institution') and (entrycont['institution'] != ''):
 | 
| 
kpeter@743
 | 
   342  | 
                entry.append(entrycont['institution'] + ',')
  | 
| 
kpeter@743
 | 
   343  | 
            if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
 | 
| 
kpeter@743
 | 
   344  | 
                entry.append(entrycont['publisher'] + ',')
  | 
| 
kpeter@743
 | 
   345  | 
            if entrycont.has_key('school') and (entrycont['school'] != ''):
 | 
| 
kpeter@743
 | 
   346  | 
                entry.append(entrycont['school'] + ',')
  | 
| 
kpeter@743
 | 
   347  | 
            if entrycont.has_key('address') and (entrycont['address'] != ''):
 | 
| 
kpeter@743
 | 
   348  | 
                entry.append(entrycont['address'] + ',')
  | 
| 
kpeter@743
 | 
   349  | 
            if entrycont.has_key('edition') and (entrycont['edition'] != ''):
 | 
| 
kpeter@743
 | 
   350  | 
                entry.append(entrycont['edition'] + ' edition,')
  | 
| 
kpeter@743
 | 
   351  | 
            if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
 | 
| 
kpeter@743
 | 
   352  | 
                entry.append(entrycont['howpublished'] + ',')
  | 
| 
kpeter@743
 | 
   353  | 
            if entrycont.has_key('volume') and (entrycont['volume'] != ''):
 | 
| 
kpeter@743
 | 
   354  | 
                eline = entrycont['volume'];
  | 
| 
kpeter@743
 | 
   355  | 
                if entrycont.has_key('number') and (entrycont['number'] != ''):
 | 
| 
kpeter@743
 | 
   356  | 
                    eline += '(' + entrycont['number'] + ')'
 | 
| 
kpeter@743
 | 
   357  | 
                if entrycont.has_key('pages') and (entrycont['pages'] != ''):
 | 
| 
kpeter@743
 | 
   358  | 
                    eline += ':' + entrycont['pages']
  | 
| 
kpeter@743
 | 
   359  | 
                eline += ','
  | 
| 
kpeter@743
 | 
   360  | 
                entry.append(eline)
  | 
| 
kpeter@743
 | 
   361  | 
            else:
  | 
| 
kpeter@743
 | 
   362  | 
                if entrycont.has_key('pages') and (entrycont['pages'] != ''):
 | 
| 
kpeter@743
 | 
   363  | 
                    entry.append('pages ' + entrycont['pages'] + ',')
 | 
| 
kpeter@743
 | 
   364  | 
            if entrycont.has_key('year') and (entrycont['year'] != ''):
 | 
| 
kpeter@743
 | 
   365  | 
                if entrycont.has_key('month') and (entrycont['month'] != ''):
 | 
| 
kpeter@743
 | 
   366  | 
                    entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
  | 
| 
kpeter@743
 | 
   367  | 
                else:
  | 
| 
kpeter@743
 | 
   368  | 
                    entry.append(entrycont['year'] + '.')
  | 
| 
kpeter@743
 | 
   369  | 
            if entrycont.has_key('note') and (entrycont['note'] != ''):
 | 
| 
kpeter@743
 | 
   370  | 
                entry.append(entrycont['note'] + '.')
  | 
| 
kpeter@754
 | 
   371  | 
            if entrycont.has_key('url') and (entrycont['url'] != ''):
 | 
| 
kpeter@754
 | 
   372  | 
                entry.append(entrycont['url'] + '.')
  | 
| 
kpeter@743
 | 
   373  | 
  | 
| 
kpeter@743
 | 
   374  | 
            # generate keys for sorting and for the output
  | 
| 
kpeter@743
 | 
   375  | 
            sortkey = ''
  | 
| 
kpeter@743
 | 
   376  | 
            bibkey = ''
  | 
| 
kpeter@743
 | 
   377  | 
            if entrycont.has_key('author'):
 | 
| 
kpeter@743
 | 
   378  | 
                for author in entrycont['author']['list']:
  | 
| 
kpeter@743
 | 
   379  | 
                    sortkey += copychars(author, author.rfind(' ')+1, len(author))
 | 
| 
kpeter@743
 | 
   380  | 
                bibkey = entrycont['author']['abbrev']
  | 
| 
kpeter@743
 | 
   381  | 
            else:
  | 
| 
kpeter@743
 | 
   382  | 
                bibkey = 'x'
  | 
| 
kpeter@743
 | 
   383  | 
            if entrycont.has_key('year'):
 | 
| 
kpeter@743
 | 
   384  | 
                sortkey += entrycont['year']
  | 
| 
kpeter@743
 | 
   385  | 
                bibkey += entrycont['year'][-2:]
  | 
| 
kpeter@743
 | 
   386  | 
            if entrycont.has_key('title'):
 | 
| 
kpeter@743
 | 
   387  | 
                sortkey += entrycont['title']
  | 
| 
kpeter@743
 | 
   388  | 
            if entrycont.has_key('key'):
 | 
| 
kpeter@743
 | 
   389  | 
                sortkey = entrycont['key'] + sortkey
  | 
| 
kpeter@743
 | 
   390  | 
                bibkey = entrycont['key']
  | 
| 
kpeter@743
 | 
   391  | 
            entry.insert(0, sortkey)
  | 
| 
kpeter@743
 | 
   392  | 
            entry.insert(1, bibkey)
  | 
| 
kpeter@745
 | 
   393  | 
            entry.insert(2, entryid)
  | 
| 
kpeter@743
 | 
   394  | 
           
  | 
| 
kpeter@743
 | 
   395  | 
            # add the entry to the file contents
  | 
| 
kpeter@743
 | 
   396  | 
            filecont.append(entry)
  | 
| 
kpeter@743
 | 
   397  | 
  | 
| 
kpeter@743
 | 
   398  | 
        else:
  | 
| 
kpeter@743
 | 
   399  | 
            # field, publication info
  | 
| 
kpeter@743
 | 
   400  | 
            field = ''
  | 
| 
kpeter@743
 | 
   401  | 
            data = ''
  | 
| 
kpeter@743
 | 
   402  | 
            
  | 
| 
kpeter@743
 | 
   403  | 
            # field = {data} entries
 | 
| 
kpeter@743
 | 
   404  | 
            if bracedata_rex.match(line):
  | 
| 
kpeter@743
 | 
   405  | 
                field = bracefield_rex.sub('\g<1>', line)
 | 
| 
kpeter@743
 | 
   406  | 
                field = string.lower(field)
  | 
| 
kpeter@743
 | 
   407  | 
                data =  bracedata_rex.sub('\g<2>', line)
 | 
| 
kpeter@743
 | 
   408  | 
  | 
| 
kpeter@743
 | 
   409  | 
            # field = "data" entries
  | 
| 
kpeter@743
 | 
   410  | 
            elif quotedata_rex.match(line):
  | 
| 
kpeter@743
 | 
   411  | 
                field = quotefield_rex.sub('\g<1>', line)
 | 
| 
kpeter@743
 | 
   412  | 
                field = string.lower(field)
  | 
| 
kpeter@743
 | 
   413  | 
                data =  quotedata_rex.sub('\g<2>', line)
 | 
| 
kpeter@743
 | 
   414  | 
  | 
| 
kpeter@743
 | 
   415  | 
            # field = data entries
  | 
| 
kpeter@743
 | 
   416  | 
            elif data_rex.match(line):
  | 
| 
kpeter@743
 | 
   417  | 
                field = field_rex.sub('\g<1>', line)
 | 
| 
kpeter@743
 | 
   418  | 
                field = string.lower(field)
  | 
| 
kpeter@743
 | 
   419  | 
                data =  data_rex.sub('\g<2>', line)
 | 
| 
kpeter@754
 | 
   420  | 
  | 
| 
kpeter@754
 | 
   421  | 
            if field == 'url':
  | 
| 
kpeter@754
 | 
   422  | 
                data = '\\url{' + data.strip() + '}'
 | 
| 
kpeter@743
 | 
   423  | 
            
  | 
| 
kpeter@743
 | 
   424  | 
            if field in ('author', 'editor'):
 | 
| 
kpeter@743
 | 
   425  | 
                entrycont[field] = bibtexauthor(data)
  | 
| 
kpeter@743
 | 
   426  | 
                line = ''
  | 
| 
kpeter@743
 | 
   427  | 
            elif field == 'title':
  | 
| 
kpeter@743
 | 
   428  | 
                line = bibtextitle(data, entrytype)
  | 
| 
kpeter@743
 | 
   429  | 
            elif field != '':
  | 
| 
kpeter@743
 | 
   430  | 
                line = removebraces(transformurls(data.strip()))
  | 
| 
kpeter@743
 | 
   431  | 
  | 
| 
kpeter@743
 | 
   432  | 
            if line != '':
  | 
| 
kpeter@743
 | 
   433  | 
                line = latexreplacements(line)
  | 
| 
kpeter@743
 | 
   434  | 
                entrycont[field] = line
  | 
| 
kpeter@743
 | 
   435  | 
  | 
| 
kpeter@743
 | 
   436  | 
  | 
| 
kpeter@743
 | 
   437  | 
    # sort entries
  | 
| 
kpeter@743
 | 
   438  | 
    filecont.sort(entry_cmp)
  | 
| 
kpeter@743
 | 
   439  | 
    
  | 
| 
kpeter@743
 | 
   440  | 
    # count the bibtex keys
  | 
| 
kpeter@743
 | 
   441  | 
    keytable = {}
 | 
| 
kpeter@743
 | 
   442  | 
    counttable = {}
 | 
| 
kpeter@743
 | 
   443  | 
    for entry in filecont:
  | 
| 
kpeter@743
 | 
   444  | 
        bibkey = entry[1]
  | 
| 
kpeter@743
 | 
   445  | 
        if not keytable.has_key(bibkey):
  | 
| 
kpeter@743
 | 
   446  | 
            keytable[bibkey] = 1
  | 
| 
kpeter@743
 | 
   447  | 
        else:
  | 
| 
kpeter@743
 | 
   448  | 
            keytable[bibkey] += 1
  | 
| 
kpeter@743
 | 
   449  | 
  | 
| 
kpeter@743
 | 
   450  | 
    for bibkey in keytable.keys():
  | 
| 
kpeter@743
 | 
   451  | 
        counttable[bibkey] = 0
  | 
| 
kpeter@743
 | 
   452  | 
    
  | 
| 
kpeter@743
 | 
   453  | 
    # generate output
  | 
| 
kpeter@743
 | 
   454  | 
    for entry in filecont:
  | 
| 
kpeter@743
 | 
   455  | 
        # generate output key form the bibtex key
  | 
| 
kpeter@743
 | 
   456  | 
        bibkey = entry[1]
  | 
| 
kpeter@745
 | 
   457  | 
        entryid = entry[2]
  | 
| 
kpeter@743
 | 
   458  | 
        if keytable[bibkey] == 1:
  | 
| 
kpeter@743
 | 
   459  | 
            outkey = bibkey
  | 
| 
kpeter@743
 | 
   460  | 
        else:
  | 
| 
kpeter@743
 | 
   461  | 
            outkey = bibkey + chr(97 + counttable[bibkey])
  | 
| 
kpeter@743
 | 
   462  | 
        counttable[bibkey] += 1
  | 
| 
kpeter@743
 | 
   463  | 
        
  | 
| 
kpeter@743
 | 
   464  | 
        # append the entry code to the output
  | 
| 
kpeter@745
 | 
   465  | 
        file.append('\\section ' + entryid + ' [' + outkey + ']')
 | 
| 
kpeter@745
 | 
   466  | 
        file.append('<div style="' + divstyle + '">')
 | 
| 
kpeter@745
 | 
   467  | 
        for line in entry[3:]:
  | 
| 
kpeter@743
 | 
   468  | 
            file.append(line)
  | 
| 
kpeter@745
 | 
   469  | 
        file.append('</div>')
 | 
| 
kpeter@743
 | 
   470  | 
        file.append('')
 | 
| 
kpeter@743
 | 
   471  | 
  | 
| 
kpeter@743
 | 
   472  | 
    return file
  | 
| 
kpeter@743
 | 
   473  | 
  | 
| 
kpeter@743
 | 
   474  | 
  | 
| 
kpeter@743
 | 
   475  | 
#
  | 
| 
kpeter@743
 | 
   476  | 
# return 1 iff abbr is in line but not inside braces or quotes
  | 
| 
kpeter@743
 | 
   477  | 
# assumes that abbr appears only once on the line (out of braces and quotes)
  | 
| 
kpeter@743
 | 
   478  | 
#
  | 
| 
kpeter@743
 | 
   479  | 
def verify_out_of_braces(line, abbr):
  | 
| 
kpeter@743
 | 
   480  | 
  | 
| 
kpeter@743
 | 
   481  | 
    phrase_split = delimiter_rex.split(line)
  | 
| 
kpeter@743
 | 
   482  | 
  | 
| 
kpeter@743
 | 
   483  | 
    abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
  | 
| 
kpeter@743
 | 
   484  | 
  | 
| 
kpeter@743
 | 
   485  | 
    open_brace = 0
  | 
| 
kpeter@743
 | 
   486  | 
    open_quote = 0
  | 
| 
kpeter@743
 | 
   487  | 
  | 
| 
kpeter@743
 | 
   488  | 
    for phrase in phrase_split:
  | 
| 
kpeter@743
 | 
   489  | 
        if phrase == "{":
 | 
| 
kpeter@743
 | 
   490  | 
            open_brace = open_brace + 1
  | 
| 
kpeter@743
 | 
   491  | 
        elif phrase == "}":
  | 
| 
kpeter@743
 | 
   492  | 
            open_brace = open_brace - 1
  | 
| 
kpeter@743
 | 
   493  | 
        elif phrase == '"':
  | 
| 
kpeter@743
 | 
   494  | 
            if open_quote == 1:
  | 
| 
kpeter@743
 | 
   495  | 
                open_quote = 0
  | 
| 
kpeter@743
 | 
   496  | 
            else:
  | 
| 
kpeter@743
 | 
   497  | 
                open_quote = 1
  | 
| 
kpeter@743
 | 
   498  | 
        elif abbr_rex.search(phrase):
  | 
| 
kpeter@743
 | 
   499  | 
            if open_brace == 0 and open_quote == 0:
  | 
| 
kpeter@743
 | 
   500  | 
                return 1
  | 
| 
kpeter@743
 | 
   501  | 
  | 
| 
kpeter@743
 | 
   502  | 
    return 0
  | 
| 
kpeter@743
 | 
   503  | 
  | 
| 
kpeter@743
 | 
   504  | 
  | 
| 
kpeter@743
 | 
   505  | 
#
  | 
| 
kpeter@743
 | 
   506  | 
# a line in the form phrase1 # phrase2 # ... # phrasen
  | 
| 
kpeter@743
 | 
   507  | 
# is returned as phrase1 phrase2 ... phrasen
  | 
| 
kpeter@743
 | 
   508  | 
# with the correct punctuation
  | 
| 
kpeter@743
 | 
   509  | 
# Bug: Doesn't always work with multiple abbreviations plugged in
  | 
| 
kpeter@743
 | 
   510  | 
#
  | 
| 
kpeter@743
 | 
   511  | 
def concat_line(line):
  | 
| 
kpeter@743
 | 
   512  | 
    # only look at part after equals
  | 
| 
kpeter@743
 | 
   513  | 
    field = field_rex.sub('\g<1>',line)
 | 
| 
kpeter@743
 | 
   514  | 
    rest = field_rex.sub('\g<2>',line)
 | 
| 
kpeter@743
 | 
   515  | 
  | 
| 
kpeter@743
 | 
   516  | 
    concat_line = field + ' ='
  | 
| 
kpeter@743
 | 
   517  | 
  | 
| 
kpeter@743
 | 
   518  | 
    pound_split = concatsplit_rex.split(rest)
  | 
| 
kpeter@743
 | 
   519  | 
  | 
| 
kpeter@743
 | 
   520  | 
    phrase_count = 0
  | 
| 
kpeter@743
 | 
   521  | 
    length = len(pound_split)
  | 
| 
kpeter@743
 | 
   522  | 
  | 
| 
kpeter@743
 | 
   523  | 
    for phrase in pound_split:
  | 
| 
kpeter@743
 | 
   524  | 
        phrase = phrase.strip()
  | 
| 
kpeter@743
 | 
   525  | 
        if phrase_count != 0:
  | 
| 
kpeter@743
 | 
   526  | 
            if phrase.startswith('"') or phrase.startswith('{'):
 | 
| 
kpeter@743
 | 
   527  | 
                phrase = phrase[1:]
  | 
| 
kpeter@743
 | 
   528  | 
        elif phrase.startswith('"'):
 | 
| 
kpeter@743
 | 
   529  | 
            phrase = phrase.replace('"','{',1)
 | 
| 
kpeter@743
 | 
   530  | 
  | 
| 
kpeter@743
 | 
   531  | 
        if phrase_count != length-1:
  | 
| 
kpeter@743
 | 
   532  | 
            if phrase.endswith('"') or phrase.endswith('}'):
 | 
| 
kpeter@743
 | 
   533  | 
                phrase = phrase[:-1]
  | 
| 
kpeter@743
 | 
   534  | 
        else:
  | 
| 
kpeter@743
 | 
   535  | 
            if phrase.endswith('"'):
 | 
| 
kpeter@743
 | 
   536  | 
                phrase = phrase[:-1]
  | 
| 
kpeter@743
 | 
   537  | 
                phrase = phrase + "}"
  | 
| 
kpeter@743
 | 
   538  | 
            elif phrase.endswith('",'):
 | 
| 
kpeter@743
 | 
   539  | 
                phrase = phrase[:-2]
  | 
| 
kpeter@743
 | 
   540  | 
                phrase = phrase + "},"
  | 
| 
kpeter@743
 | 
   541  | 
  | 
| 
kpeter@743
 | 
   542  | 
        # if phrase did have \#, add the \# back
  | 
| 
kpeter@743
 | 
   543  | 
        if phrase.endswith('\\'):
 | 
| 
kpeter@743
 | 
   544  | 
            phrase = phrase + "#"
  | 
| 
kpeter@743
 | 
   545  | 
        concat_line = concat_line + ' ' + phrase
  | 
| 
kpeter@743
 | 
   546  | 
  | 
| 
kpeter@743
 | 
   547  | 
        phrase_count = phrase_count + 1
  | 
| 
kpeter@743
 | 
   548  | 
  | 
| 
kpeter@743
 | 
   549  | 
    return concat_line
  | 
| 
kpeter@743
 | 
   550  | 
  | 
| 
kpeter@743
 | 
   551  | 
  | 
| 
kpeter@743
 | 
   552  | 
#
  | 
| 
kpeter@743
 | 
   553  | 
# substitute abbreviations into filecont
  | 
| 
kpeter@743
 | 
   554  | 
# @param filecont_source - string of data from file
  | 
| 
kpeter@743
 | 
   555  | 
#
  | 
| 
kpeter@743
 | 
   556  | 
def bibtex_replace_abbreviations(filecont_source):
  | 
| 
kpeter@743
 | 
   557  | 
    filecont = filecont_source.splitlines()
  | 
| 
kpeter@743
 | 
   558  | 
  | 
| 
kpeter@743
 | 
   559  | 
    #  These are defined in bibtex, so we'll define them too
  | 
| 
kpeter@743
 | 
   560  | 
    abbr_list = ['jan','feb','mar','apr','may','jun',
  | 
| 
kpeter@743
 | 
   561  | 
                 'jul','aug','sep','oct','nov','dec']
  | 
| 
kpeter@743
 | 
   562  | 
    value_list = ['January','February','March','April',
  | 
| 
kpeter@743
 | 
   563  | 
                  'May','June','July','August','September',
  | 
| 
kpeter@743
 | 
   564  | 
                  'October','November','December']
  | 
| 
kpeter@743
 | 
   565  | 
  | 
| 
kpeter@743
 | 
   566  | 
    abbr_rex = []
  | 
| 
kpeter@743
 | 
   567  | 
    total_abbr_count = 0
  | 
| 
kpeter@743
 | 
   568  | 
  | 
| 
kpeter@743
 | 
   569  | 
    front = '\\b'
  | 
| 
kpeter@743
 | 
   570  | 
    back = '(,?)\\b'
  | 
| 
kpeter@743
 | 
   571  | 
  | 
| 
kpeter@743
 | 
   572  | 
    for x in abbr_list:
  | 
| 
kpeter@743
 | 
   573  | 
        abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
  | 
| 
kpeter@743
 | 
   574  | 
        total_abbr_count = total_abbr_count + 1
  | 
| 
kpeter@743
 | 
   575  | 
  | 
| 
kpeter@743
 | 
   576  | 
  | 
| 
kpeter@743
 | 
   577  | 
    abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
 | 
| 
kpeter@743
 | 
   578  | 
                             re.I)
  | 
| 
kpeter@743
 | 
   579  | 
  | 
| 
kpeter@743
 | 
   580  | 
    comment_rex = re.compile('@comment\s*{',re.I)
 | 
| 
kpeter@743
 | 
   581  | 
    preamble_rex = re.compile('@preamble\s*{',re.I)
 | 
| 
kpeter@743
 | 
   582  | 
  | 
| 
kpeter@743
 | 
   583  | 
    waiting_for_end_string = 0
  | 
| 
kpeter@743
 | 
   584  | 
    i = 0
  | 
| 
kpeter@743
 | 
   585  | 
    filecont2 = ''
  | 
| 
kpeter@743
 | 
   586  | 
  | 
| 
kpeter@743
 | 
   587  | 
    for line in filecont:
  | 
| 
kpeter@743
 | 
   588  | 
        if line == ' ' or line == '':
  | 
| 
kpeter@743
 | 
   589  | 
            continue
  | 
| 
kpeter@743
 | 
   590  | 
  | 
| 
kpeter@743
 | 
   591  | 
        if waiting_for_end_string:
  | 
| 
kpeter@743
 | 
   592  | 
            if re.search('}',line):
 | 
| 
kpeter@743
 | 
   593  | 
                waiting_for_end_string = 0
  | 
| 
kpeter@743
 | 
   594  | 
                continue
  | 
| 
kpeter@743
 | 
   595  | 
  | 
| 
kpeter@743
 | 
   596  | 
        if abbrdef_rex.search(line):
  | 
| 
kpeter@743
 | 
   597  | 
            abbr = abbrdef_rex.sub('\g<1>', line)
 | 
| 
kpeter@743
 | 
   598  | 
  | 
| 
kpeter@743
 | 
   599  | 
            if abbr_list.count(abbr) == 0:
  | 
| 
kpeter@743
 | 
   600  | 
                val = abbrdef_rex.sub('\g<2>', line)
 | 
| 
kpeter@743
 | 
   601  | 
                abbr_list.append(abbr)
  | 
| 
kpeter@743
 | 
   602  | 
                value_list.append(string.strip(val))
  | 
| 
kpeter@743
 | 
   603  | 
                abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
  | 
| 
kpeter@743
 | 
   604  | 
                total_abbr_count = total_abbr_count + 1
  | 
| 
kpeter@743
 | 
   605  | 
            waiting_for_end_string = 1
  | 
| 
kpeter@743
 | 
   606  | 
            continue
  | 
| 
kpeter@743
 | 
   607  | 
  | 
| 
kpeter@743
 | 
   608  | 
        if comment_rex.search(line):
  | 
| 
kpeter@743
 | 
   609  | 
            waiting_for_end_string = 1
  | 
| 
kpeter@743
 | 
   610  | 
            continue
  | 
| 
kpeter@743
 | 
   611  | 
  | 
| 
kpeter@743
 | 
   612  | 
        if preamble_rex.search(line):
  | 
| 
kpeter@743
 | 
   613  | 
            waiting_for_end_string = 1
  | 
| 
kpeter@743
 | 
   614  | 
            continue
  | 
| 
kpeter@743
 | 
   615  | 
  | 
| 
kpeter@743
 | 
   616  | 
  | 
| 
kpeter@743
 | 
   617  | 
        # replace subsequent abbreviations with the value
  | 
| 
kpeter@743
 | 
   618  | 
        abbr_count = 0
  | 
| 
kpeter@743
 | 
   619  | 
  | 
| 
kpeter@743
 | 
   620  | 
        for x in abbr_list:
  | 
| 
kpeter@743
 | 
   621  | 
  | 
| 
kpeter@743
 | 
   622  | 
            if abbr_rex[abbr_count].search(line):
  | 
| 
kpeter@743
 | 
   623  | 
                if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
  | 
| 
kpeter@743
 | 
   624  | 
                    line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
  | 
| 
kpeter@743
 | 
   625  | 
                # Check for # concatenations
  | 
| 
kpeter@743
 | 
   626  | 
                if concatsplit_rex.search(line):
  | 
| 
kpeter@743
 | 
   627  | 
                    line = concat_line(line)
  | 
| 
kpeter@743
 | 
   628  | 
            abbr_count = abbr_count + 1
  | 
| 
kpeter@743
 | 
   629  | 
  | 
| 
kpeter@743
 | 
   630  | 
  | 
| 
kpeter@743
 | 
   631  | 
        filecont2 = filecont2 + line + '\n'
  | 
| 
kpeter@743
 | 
   632  | 
        i = i+1
  | 
| 
kpeter@743
 | 
   633  | 
  | 
| 
kpeter@743
 | 
   634  | 
  | 
| 
kpeter@743
 | 
   635  | 
    # Do one final pass over file
  | 
| 
kpeter@743
 | 
   636  | 
  | 
| 
kpeter@743
 | 
   637  | 
    # make sure that didn't end up with {" or }" after the substitution
 | 
| 
kpeter@743
 | 
   638  | 
    filecont2 = filecont2.replace('{"','{{')
 | 
| 
kpeter@743
 | 
   639  | 
    filecont2 = filecont2.replace('"}','}}')
 | 
| 
kpeter@743
 | 
   640  | 
  | 
| 
kpeter@743
 | 
   641  | 
    afterquotevalue_rex = re.compile('"\s*,\s*')
 | 
| 
kpeter@743
 | 
   642  | 
    afterbrace_rex = re.compile('"\s*}')
 | 
| 
kpeter@743
 | 
   643  | 
    afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
 | 
| 
kpeter@743
 | 
   644  | 
  | 
| 
kpeter@743
 | 
   645  | 
    # add new lines to data that changed because of abbreviation substitutions
  | 
| 
kpeter@743
 | 
   646  | 
    filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
 | 
| 
kpeter@743
 | 
   647  | 
    filecont2 = afterbrace_rex.sub('"\n}', filecont2)
 | 
| 
kpeter@743
 | 
   648  | 
    filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
 | 
| 
kpeter@743
 | 
   649  | 
  | 
| 
kpeter@743
 | 
   650  | 
    return filecont2
  | 
| 
kpeter@743
 | 
   651  | 
  | 
| 
kpeter@743
 | 
   652  | 
#
  | 
| 
kpeter@743
 | 
   653  | 
# convert @type( ... ) to @type{ ... }
 | 
| 
kpeter@743
 | 
   654  | 
#
  | 
| 
kpeter@743
 | 
   655  | 
def no_outer_parens(filecont):
  | 
| 
kpeter@743
 | 
   656  | 
  | 
| 
kpeter@743
 | 
   657  | 
    # do checking for open parens
  | 
| 
kpeter@743
 | 
   658  | 
    # will convert to braces
  | 
| 
kpeter@743
 | 
   659  | 
    paren_split = re.split('([(){}])',filecont)
 | 
| 
kpeter@743
 | 
   660  | 
  | 
| 
kpeter@743
 | 
   661  | 
    open_paren_count = 0
  | 
| 
kpeter@743
 | 
   662  | 
    open_type = 0
  | 
| 
kpeter@743
 | 
   663  | 
    look_next = 0
  | 
| 
kpeter@743
 | 
   664  | 
  | 
| 
kpeter@743
 | 
   665  | 
    # rebuild filecont
  | 
| 
kpeter@743
 | 
   666  | 
    filecont = ''
  | 
| 
kpeter@743
 | 
   667  | 
  | 
| 
kpeter@743
 | 
   668  | 
    at_rex = re.compile('@\w*')
 | 
| 
kpeter@743
 | 
   669  | 
  | 
| 
kpeter@743
 | 
   670  | 
    for phrase in paren_split:
  | 
| 
kpeter@743
 | 
   671  | 
        if look_next == 1:
  | 
| 
kpeter@743
 | 
   672  | 
            if phrase == '(':
 | 
| 
kpeter@743
 | 
   673  | 
                phrase = '{'
 | 
| 
kpeter@743
 | 
   674  | 
                open_paren_count = open_paren_count + 1
  | 
| 
kpeter@743
 | 
   675  | 
            else:
  | 
| 
kpeter@743
 | 
   676  | 
                open_type = 0
  | 
| 
kpeter@743
 | 
   677  | 
            look_next = 0
  | 
| 
kpeter@743
 | 
   678  | 
  | 
| 
kpeter@743
 | 
   679  | 
        if phrase == '(':
 | 
| 
kpeter@743
 | 
   680  | 
            open_paren_count = open_paren_count + 1
  | 
| 
kpeter@743
 | 
   681  | 
  | 
| 
kpeter@743
 | 
   682  | 
        elif phrase == ')':
  | 
| 
kpeter@743
 | 
   683  | 
            open_paren_count = open_paren_count - 1
  | 
| 
kpeter@743
 | 
   684  | 
            if open_type == 1 and open_paren_count == 0:
  | 
| 
kpeter@743
 | 
   685  | 
                phrase = '}'
  | 
| 
kpeter@743
 | 
   686  | 
                open_type = 0
  | 
| 
kpeter@743
 | 
   687  | 
  | 
| 
kpeter@743
 | 
   688  | 
        elif at_rex.search( phrase ):
  | 
| 
kpeter@743
 | 
   689  | 
            open_type = 1
  | 
| 
kpeter@743
 | 
   690  | 
            look_next = 1
  | 
| 
kpeter@743
 | 
   691  | 
  | 
| 
kpeter@743
 | 
   692  | 
        filecont = filecont + phrase
  | 
| 
kpeter@743
 | 
   693  | 
  | 
| 
kpeter@743
 | 
   694  | 
    return filecont
  | 
| 
kpeter@743
 | 
   695  | 
  | 
| 
kpeter@743
 | 
   696  | 
  | 
| 
kpeter@743
 | 
   697  | 
#
  | 
| 
kpeter@743
 | 
   698  | 
# make all whitespace into just one space
  | 
| 
kpeter@743
 | 
   699  | 
# format the bibtex file into a usable form.
  | 
| 
kpeter@743
 | 
   700  | 
#
  | 
| 
kpeter@743
 | 
   701  | 
def bibtexwasher(filecont_source):
  | 
| 
kpeter@743
 | 
   702  | 
  | 
| 
kpeter@743
 | 
   703  | 
    space_rex = re.compile('\s+')
 | 
| 
kpeter@743
 | 
   704  | 
    comment_rex = re.compile('\s*%')
 | 
| 
kpeter@743
 | 
   705  | 
  | 
| 
kpeter@743
 | 
   706  | 
    filecont = []
  | 
| 
kpeter@743
 | 
   707  | 
  | 
| 
kpeter@743
 | 
   708  | 
    # remove trailing and excessive whitespace
  | 
| 
kpeter@743
 | 
   709  | 
    # ignore comments
  | 
| 
kpeter@743
 | 
   710  | 
    for line in filecont_source:
  | 
| 
kpeter@743
 | 
   711  | 
        line = string.strip(line)
  | 
| 
kpeter@743
 | 
   712  | 
        line = space_rex.sub(' ', line)
 | 
| 
kpeter@743
 | 
   713  | 
        # ignore comments
  | 
| 
kpeter@743
 | 
   714  | 
        if not comment_rex.match(line) and line != '':
  | 
| 
kpeter@743
 | 
   715  | 
            filecont.append(' '+ line)
 | 
| 
kpeter@743
 | 
   716  | 
  | 
| 
kpeter@743
 | 
   717  | 
    filecont = string.join(filecont, '')
  | 
| 
kpeter@743
 | 
   718  | 
  | 
| 
kpeter@743
 | 
   719  | 
    # the file is in one long string
  | 
| 
kpeter@743
 | 
   720  | 
  | 
| 
kpeter@743
 | 
   721  | 
    filecont = no_outer_parens(filecont)
  | 
| 
kpeter@743
 | 
   722  | 
  | 
| 
kpeter@743
 | 
   723  | 
    #
  | 
| 
kpeter@743
 | 
   724  | 
    # split lines according to preferred syntax scheme
  | 
| 
kpeter@743
 | 
   725  | 
    #
  | 
| 
kpeter@743
 | 
   726  | 
    filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
 | 
| 
kpeter@743
 | 
   727  | 
  | 
| 
kpeter@743
 | 
   728  | 
    # add new lines after commas that are after values
  | 
| 
kpeter@743
 | 
   729  | 
    filecont = re.sub('"\s*,', '",\n', filecont)
 | 
| 
kpeter@743
 | 
   730  | 
    filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
 | 
| 
kpeter@743
 | 
   731  | 
    filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
 | 
| 
kpeter@743
 | 
   732  | 
                          '\n\n\g<1>\g<2>,\n', filecont)
  | 
| 
kpeter@743
 | 
   733  | 
  | 
| 
kpeter@743
 | 
   734  | 
    # add new lines after }
  | 
| 
kpeter@743
 | 
   735  | 
    filecont = re.sub('"\s*}','"\n}\n', filecont)
 | 
| 
kpeter@743
 | 
   736  | 
    filecont = re.sub('}\s*,','},\n', filecont)
 | 
| 
kpeter@743
 | 
   737  | 
  | 
| 
kpeter@743
 | 
   738  | 
  | 
| 
kpeter@743
 | 
   739  | 
    filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
 | 
| 
kpeter@743
 | 
   740  | 
  | 
| 
kpeter@743
 | 
   741  | 
    # character encoding, reserved latex characters
  | 
| 
kpeter@743
 | 
   742  | 
    filecont = re.sub('{\\\&}', '&', filecont)
 | 
| 
kpeter@743
 | 
   743  | 
    filecont = re.sub('\\\&', '&', filecont)
 | 
| 
kpeter@743
 | 
   744  | 
  | 
| 
kpeter@743
 | 
   745  | 
    # do checking for open braces to get format correct
  | 
| 
kpeter@743
 | 
   746  | 
    open_brace_count = 0
  | 
| 
kpeter@743
 | 
   747  | 
    brace_split = re.split('([{}])',filecont)
 | 
| 
kpeter@743
 | 
   748  | 
  | 
| 
kpeter@743
 | 
   749  | 
    # rebuild filecont
  | 
| 
kpeter@743
 | 
   750  | 
    filecont = ''
  | 
| 
kpeter@743
 | 
   751  | 
  | 
| 
kpeter@743
 | 
   752  | 
    for phrase in brace_split:
  | 
| 
kpeter@743
 | 
   753  | 
        if phrase == '{':
 | 
| 
kpeter@743
 | 
   754  | 
            open_brace_count = open_brace_count + 1
  | 
| 
kpeter@743
 | 
   755  | 
        elif phrase == '}':
  | 
| 
kpeter@743
 | 
   756  | 
            open_brace_count = open_brace_count - 1
  | 
| 
kpeter@743
 | 
   757  | 
            if open_brace_count == 0:
  | 
| 
kpeter@743
 | 
   758  | 
                filecont = filecont + '\n'
  | 
| 
kpeter@743
 | 
   759  | 
  | 
| 
kpeter@743
 | 
   760  | 
        filecont = filecont + phrase
  | 
| 
kpeter@743
 | 
   761  | 
  | 
| 
kpeter@743
 | 
   762  | 
    filecont2 = bibtex_replace_abbreviations(filecont)
  | 
| 
kpeter@743
 | 
   763  | 
  | 
| 
kpeter@743
 | 
   764  | 
    # gather
  | 
| 
kpeter@743
 | 
   765  | 
    filecont = filecont2.splitlines()
  | 
| 
kpeter@743
 | 
   766  | 
    i=0
  | 
| 
kpeter@743
 | 
   767  | 
    j=0         # count the number of blank lines
  | 
| 
kpeter@743
 | 
   768  | 
    for line in filecont:
  | 
| 
kpeter@743
 | 
   769  | 
        # ignore blank lines
  | 
| 
kpeter@743
 | 
   770  | 
        if line == '' or line == ' ':
  | 
| 
kpeter@743
 | 
   771  | 
            j = j+1
  | 
| 
kpeter@743
 | 
   772  | 
            continue
  | 
| 
kpeter@743
 | 
   773  | 
        filecont[i] = line + '\n'
  | 
| 
kpeter@743
 | 
   774  | 
        i = i+1
  | 
| 
kpeter@743
 | 
   775  | 
  | 
| 
kpeter@743
 | 
   776  | 
    # get rid of the extra stuff at the end of the array
  | 
| 
kpeter@743
 | 
   777  | 
    # (The extra stuff are duplicates that are in the array because
  | 
| 
kpeter@743
 | 
   778  | 
    # blank lines were removed.)
  | 
| 
kpeter@743
 | 
   779  | 
    length = len( filecont)
  | 
| 
kpeter@743
 | 
   780  | 
    filecont[length-j:length] = []
  | 
| 
kpeter@743
 | 
   781  | 
  | 
| 
kpeter@743
 | 
   782  | 
    return filecont
  | 
| 
kpeter@743
 | 
   783  | 
  | 
| 
kpeter@743
 | 
   784  | 
  | 
| 
kpeter@743
 | 
   785  | 
def filehandler(filepath):
  | 
| 
kpeter@743
 | 
   786  | 
    try:
  | 
| 
kpeter@743
 | 
   787  | 
        fd = open(filepath, 'r')
  | 
| 
kpeter@743
 | 
   788  | 
        filecont_source = fd.readlines()
  | 
| 
kpeter@743
 | 
   789  | 
        fd.close()
  | 
| 
kpeter@743
 | 
   790  | 
    except:
  | 
| 
kpeter@743
 | 
   791  | 
        print 'Could not open file:', filepath
  | 
| 
kpeter@743
 | 
   792  | 
    washeddata = bibtexwasher(filecont_source)
  | 
| 
kpeter@743
 | 
   793  | 
    outdata = bibtexdecoder(washeddata)
  | 
| 
kpeter@743
 | 
   794  | 
    print '/**'
  | 
| 
kpeter@743
 | 
   795  | 
    print '\page references References'
  | 
| 
kpeter@743
 | 
   796  | 
    print
  | 
| 
kpeter@743
 | 
   797  | 
    for line in outdata:
  | 
| 
kpeter@743
 | 
   798  | 
        print line
  | 
| 
kpeter@743
 | 
   799  | 
    print '*/'
  | 
| 
kpeter@743
 | 
   800  | 
  | 
| 
kpeter@743
 | 
   801  | 
  | 
| 
kpeter@743
 | 
   802  | 
# main program
  | 
| 
kpeter@743
 | 
   803  | 
  | 
| 
kpeter@743
 | 
   804  | 
def main():
  | 
| 
kpeter@743
 | 
   805  | 
    import sys
  | 
| 
kpeter@743
 | 
   806  | 
    if sys.argv[1:]:
  | 
| 
kpeter@743
 | 
   807  | 
        filepath = sys.argv[1]
  | 
| 
kpeter@743
 | 
   808  | 
    else:
  | 
| 
kpeter@743
 | 
   809  | 
        print "No input file"
  | 
| 
kpeter@743
 | 
   810  | 
        sys.exit()
  | 
| 
kpeter@743
 | 
   811  | 
    filehandler(filepath)
  | 
| 
kpeter@743
 | 
   812  | 
  | 
| 
kpeter@743
 | 
   813  | 
if __name__ == "__main__": main()
  | 
| 
kpeter@743
 | 
   814  | 
  | 
| 
kpeter@743
 | 
   815  | 
  | 
| 
kpeter@743
 | 
   816  | 
# end python script
  |