| 1 |
#!/usr/bin/env |
|
| 1 |
#! /usr/bin/env python |
|
| 2 | 2 |
""" |
| 3 | 3 |
BibTeX to Doxygen converter |
| 4 | 4 |
Usage: python bib2dox.py bibfile.bib > bibfile.dox |
| 5 | 5 |
|
| 6 |
This file is a part of LEMON, a generic C++ optimization library. |
|
| 7 |
|
|
| 8 |
********************************************************************** |
|
| 9 |
|
|
| 6 | 10 |
This code is the modification of the BibTeX to XML converter |
| 7 |
by Vidar Bronken Gundersen et al. |
|
| 11 |
by Vidar Bronken Gundersen et al. |
|
| 12 |
See the original copyright notices below. |
|
| 8 | 13 |
|
| 9 | 14 |
********************************************************************** |
| 10 | 15 |
|
| 11 | 16 |
Decoder for bibliographic data, BibTeX |
| 12 | 17 |
Usage: python bibtex2xml.py bibfile.bib > bibfile.xml |
| 13 | 18 |
|
| 14 | 19 |
v.8 |
| 15 | 20 |
(c)2002-06-23 Vidar Bronken Gundersen |
| 16 | 21 |
http://bibtexml.sf.net/ |
| 17 | 22 |
Reuse approved as long as this notification is kept. |
| 18 | 23 |
Licence: GPL. |
| 19 | 24 |
|
| 20 | 25 |
Contributions/thanks to: |
| 21 | 26 |
Egon Willighagen, http://sf.net/projects/jreferences/ |
| 22 | 27 |
Richard Mahoney (for providing a test case) |
| 23 | 28 |
|
| 24 | 29 |
Editted by Sara Sprenkle to be more robust and handle more bibtex features. |
| 25 | 30 |
(c) 2003-01-15 |
| 26 | 31 |
|
| 27 | 32 |
1. Changed bibtex: tags to bibxml: tags. |
| 28 | 33 |
2. Use xmlns:bibxml="http://bibtexml.sf.net/" |
| 29 | 34 |
3. Allow spaces between @type and first {
|
| 30 | 35 |
4. "author" fields with multiple authors split by " and " |
| 31 | 36 |
are put in separate xml "bibxml:author" tags. |
| 32 | 37 |
5. Option for Titles: words are capitalized |
| 33 | 38 |
only if first letter in title or capitalized inside braces |
| 34 | 39 |
6. Removes braces from within field values |
| 35 | 40 |
7. Ignores comments in bibtex file (including @comment{ or % )
|
| 36 | 41 |
8. Replaces some special latex tags, e.g., replaces ~ with ' ' |
| 37 | 42 |
9. Handles bibtex @string abbreviations |
| 38 | 43 |
--> includes bibtex's default abbreviations for months |
| 39 | 44 |
--> does concatenation of abbr # " more " and " more " # abbr |
| 40 | 45 |
10. Handles @type( ... ) or @type{ ... }
|
| 41 | 46 |
11. The keywords field is split on , or ; and put into separate xml |
| 42 | 47 |
"bibxml:keywords" tags |
| 43 | 48 |
12. Ignores @preamble |
| 44 | 49 |
|
| 45 | 50 |
Known Limitations |
| 46 | 51 |
1. Does not transform Latex encoding like math mode and special |
| 47 | 52 |
latex symbols. |
| 48 | 53 |
2. Does not parse author fields into first and last names. |
| 49 | 54 |
E.g., It does not do anything special to an author whose name is |
| 50 | 55 |
in the form LAST_NAME, FIRST_NAME |
| 51 | 56 |
In "author" tag, will show up as |
| 52 | 57 |
<bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author> |
| 53 | 58 |
3. Does not handle "crossref" fields other than to print |
| 54 | 59 |
<bibxml:crossref>...</bibxml:crossref> |
| 55 | 60 |
4. Does not inform user of the input's format errors. You just won't |
| 56 | 61 |
be able to transform the file later with XSL |
| 57 | 62 |
|
| 58 | 63 |
You will have to manually edit the XML output if you need to handle |
| 59 | 64 |
these (and unknown) limitations. |
| 60 | 65 |
|
| 61 | 66 |
""" |
| 62 | 67 |
|
| 63 | 68 |
import string, re |
| 64 | 69 |
|
| 65 | 70 |
# set of valid name characters |
| 66 | 71 |
valid_name_chars = '[\w\-:]' |
| 67 | 72 |
|
| 68 | 73 |
# |
| 69 | 74 |
# define global regular expression variables |
| 70 | 75 |
# |
| 71 | 76 |
author_rex = re.compile('\s+and\s+')
|
| 72 | 77 |
rembraces_rex = re.compile('[{}]')
|
| 73 | 78 |
capitalize_rex = re.compile('({[^}]*})')
|
| 74 | 79 |
|
| 75 | 80 |
# used by bibtexkeywords(data) |
| 76 | 81 |
keywords_rex = re.compile('[,;]')
|
| 77 | 82 |
|
| 78 | 83 |
# used by concat_line(line) |
| 79 | 84 |
concatsplit_rex = re.compile('\s*#\s*')
|
| 80 | 85 |
|
| 81 | 86 |
# split on {, }, or " in verify_out_of_braces
|
| 82 | 87 |
delimiter_rex = re.compile('([{}"])',re.I)
|
| 83 | 88 |
|
| 84 | 89 |
field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
|
| 85 | 90 |
data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
|
| 86 | 91 |
|
| 87 | 92 |
url_rex = re.compile('\\\url\{([^}]*)\}')
|
| 88 | 93 |
|
| 89 | 94 |
# |
| 90 | 95 |
# styles for html formatting |
| 91 | 96 |
# |
| 92 | 97 |
divstyle = 'margin-top: -4ex; margin-left: 8em;' |
| 93 | 98 |
|
| 94 | 99 |
# |
| 95 | 100 |
# return the string parameter without braces |
| 96 | 101 |
# |
| 97 | 102 |
def transformurls(str): |
| 98 | 103 |
return url_rex.sub(r'<a href="\1">\1</a>', str) |
| 99 | 104 |
|
| 100 | 105 |
# |
| 101 | 106 |
# return the string parameter without braces |
| 102 | 107 |
# |
| 103 | 108 |
def removebraces(str): |
| 104 | 109 |
return rembraces_rex.sub('', str)
|
| 105 | 110 |
|
| 106 | 111 |
# |
| 107 | 112 |
# latex-specific replacements |
| 108 | 113 |
# (do this after braces were removed) |
| 109 | 114 |
# |
| 110 | 115 |
def latexreplacements(line): |
| 111 | 116 |
line = string.replace(line, '~', ' ') |
| 112 | 117 |
line = string.replace(line, '\\\'a', 'á') |
| 113 | 118 |
line = string.replace(line, '\\"a', 'ä') |
| 114 | 119 |
line = string.replace(line, '\\\'e', 'é') |
| 115 | 120 |
line = string.replace(line, '\\"e', 'ë') |
| 116 | 121 |
line = string.replace(line, '\\\'i', 'í') |
| 117 | 122 |
line = string.replace(line, '\\"i', 'ï') |
| 118 | 123 |
line = string.replace(line, '\\\'o', 'ó') |
| 119 | 124 |
line = string.replace(line, '\\"o', 'ö') |
| 120 | 125 |
line = string.replace(line, '\\\'u', 'ú') |
| 121 | 126 |
line = string.replace(line, '\\"u', 'ü') |
| 122 | 127 |
line = string.replace(line, '\\H o', 'õ') |
| 123 | 128 |
line = string.replace(line, '\\H u', 'ü') # ũ does not exist |
| 124 | 129 |
line = string.replace(line, '\\\'A', 'Á') |
| 125 | 130 |
line = string.replace(line, '\\"A', 'Ä') |
| 126 | 131 |
line = string.replace(line, '\\\'E', 'É') |
| 127 | 132 |
line = string.replace(line, '\\"E', 'Ë') |
| 128 | 133 |
line = string.replace(line, '\\\'I', 'Í') |
| 129 | 134 |
line = string.replace(line, '\\"I', 'Ï') |
| 130 | 135 |
line = string.replace(line, '\\\'O', 'Ó') |
| 131 | 136 |
line = string.replace(line, '\\"O', 'Ö') |
| 132 | 137 |
line = string.replace(line, '\\\'U', 'Ú') |
| 133 | 138 |
line = string.replace(line, '\\"U', 'Ü') |
| 134 | 139 |
line = string.replace(line, '\\H O', 'Õ') |
| 135 | 140 |
line = string.replace(line, '\\H U', 'Ü') # Ũ does not exist |
| 136 | 141 |
|
| 137 | 142 |
return line |
| 138 | 143 |
|
| 139 | 144 |
# |
| 140 | 145 |
# copy characters form a string decoding html expressions (&xyz;) |
| 141 | 146 |
# |
| 142 | 147 |
def copychars(str, ifrom, count): |
| 143 | 148 |
result = '' |
| 144 | 149 |
i = ifrom |
| 145 | 150 |
c = 0 |
| 146 | 151 |
html_spec = False |
| 147 | 152 |
while (i < len(str)) and (c < count): |
| 148 | 153 |
if str[i] == '&': |
| 149 | 154 |
html_spec = True; |
| 150 | 155 |
if i+1 < len(str): |
| 151 | 156 |
result += str[i+1] |
| 152 | 157 |
c += 1 |
| 153 | 158 |
i += 2 |
| 154 | 159 |
else: |
| 155 | 160 |
if not html_spec: |
| 156 | 161 |
if ((str[i] >= 'A') and (str[i] <= 'Z')) or \ |
| 157 | 162 |
((str[i] >= 'a') and (str[i] <= 'z')): |
| 158 | 163 |
result += str[i] |
| 159 | 164 |
c += 1 |
| 160 | 165 |
elif str[i] == ';': |
| 161 | 166 |
html_spec = False; |
| 162 | 167 |
i += 1 |
| 163 | 168 |
|
| 164 | 169 |
return result |
| 165 | 170 |
|
| 166 | 171 |
|
| 167 | 172 |
# |
| 168 | 173 |
# Handle a list of authors (separated by 'and'). |
| 169 | 174 |
# It gives back an array of the follwing values: |
| 170 | 175 |
# - num: the number of authors, |
| 171 | 176 |
# - list: the list of the author names, |
| 172 | 177 |
# - text: the bibtex text (separated by commas and/or 'and') |
| 173 | 178 |
# - abbrev: abbreviation that can be used for indicate the |
| 174 | 179 |
# bibliography entries |
| 175 | 180 |
# |
| 176 | 181 |
def bibtexauthor(data): |
| 177 | 182 |
result = {}
|
| 178 | 183 |
bibtex = '' |
| 179 | 184 |
result['list'] = author_rex.split(data) |
| 180 | 185 |
result['num'] = len(result['list']) |
| 181 | 186 |
for i, author in enumerate(result['list']): |
| 182 | 187 |
# general transformations |
| 183 | 188 |
author = latexreplacements(removebraces(author.strip())) |
| 184 | 189 |
# transform "Xyz, A. B." to "A. B. Xyz" |
| 185 | 190 |
pos = author.find(',')
|
| 186 | 191 |
if pos != -1: |
| 187 | 192 |
author = author[pos+1:].strip() + ' ' + author[:pos].strip() |
| 188 | 193 |
result['list'][i] = author |
| 189 | 194 |
bibtex += author + '#' |
| 190 | 195 |
bibtex = bibtex[:-1] |
| 191 | 196 |
if result['num'] > 1: |
| 192 | 197 |
ix = bibtex.rfind('#')
|
| 193 | 198 |
if result['num'] == 2: |
| 194 | 199 |
bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:] |
| 195 | 200 |
else: |
| 196 | 201 |
bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:] |
| 197 | 202 |
bibtex = bibtex.replace('#', ', ')
|
| 198 | 203 |
result['text'] = bibtex |
| 199 | 204 |
|
0 comments (0 inline)