gravatar
kpeter (Peter Kovacs)
kpeter@inf.elte.hu
Modify the header of scripts/bib2dox.py (#184)
0 1 0
default
1 file changed with 7 insertions and 2 deletions:
↑ Collapse diff ↑
Ignore white space 384 line context
1
#!/usr/bin/env /usr/local/Python/bin/python2.1
1
#! /usr/bin/env python
2 2
"""
3 3
  BibTeX to Doxygen converter
4 4
  Usage: python bib2dox.py bibfile.bib > bibfile.dox
5 5

	
6
  This file is a part of LEMON, a generic C++ optimization library.
7

	
8
  **********************************************************************
9

	
6 10
  This code is the modification of the BibTeX to XML converter
7
  by Vidar Bronken Gundersen et al. See the original copyright notices below. 
11
  by Vidar Bronken Gundersen et al.
12
  See the original copyright notices below. 
8 13

	
9 14
  **********************************************************************
10 15

	
11 16
  Decoder for bibliographic data, BibTeX
12 17
  Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
13 18

	
14 19
  v.8
15 20
  (c)2002-06-23 Vidar Bronken Gundersen
16 21
  http://bibtexml.sf.net/
17 22
  Reuse approved as long as this notification is kept.
18 23
  Licence: GPL.
19 24

	
20 25
  Contributions/thanks to:
21 26
  Egon Willighagen, http://sf.net/projects/jreferences/
22 27
  Richard Mahoney (for providing a test case)
23 28

	
24 29
  Editted by Sara Sprenkle to be more robust and handle more bibtex features.
25 30
  (c) 2003-01-15
26 31

	
27 32
  1.  Changed bibtex: tags to bibxml: tags.
28 33
  2.  Use xmlns:bibxml="http://bibtexml.sf.net/"
29 34
  3.  Allow spaces between @type and first {
30 35
  4.  "author" fields with multiple authors split by " and "
31 36
      are put in separate xml "bibxml:author" tags.
32 37
  5.  Option for Titles: words are capitalized
33 38
      only if first letter in title or capitalized inside braces
34 39
  6.  Removes braces from within field values
35 40
  7.  Ignores comments in bibtex file (including @comment{ or % )
36 41
  8.  Replaces some special latex tags, e.g., replaces ~ with ' '
37 42
  9.  Handles bibtex @string abbreviations
38 43
        --> includes bibtex's default abbreviations for months
39 44
        --> does concatenation of abbr # " more " and " more " # abbr
40 45
  10. Handles @type( ... ) or @type{ ... }
41 46
  11. The keywords field is split on , or ; and put into separate xml
42 47
      "bibxml:keywords" tags
43 48
  12. Ignores @preamble
44 49

	
45 50
  Known Limitations
46 51
  1.  Does not transform Latex encoding like math mode and special
47 52
      latex symbols.
48 53
  2.  Does not parse author fields into first and last names.
49 54
      E.g., It does not do anything special to an author whose name is
50 55
      in the form LAST_NAME, FIRST_NAME
51 56
      In "author" tag, will show up as
52 57
      <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
53 58
  3.  Does not handle "crossref" fields other than to print
54 59
      <bibxml:crossref>...</bibxml:crossref>
55 60
  4.  Does not inform user of the input's format errors.  You just won't
56 61
      be able to transform the file later with XSL
57 62

	
58 63
  You will have to manually edit the XML output if you need to handle
59 64
  these (and unknown) limitations.
60 65

	
61 66
"""
62 67

	
63 68
import string, re
64 69

	
65 70
# set of valid name characters
66 71
valid_name_chars = '[\w\-:]'
67 72

	
68 73
#
69 74
# define global regular expression variables
70 75
#
71 76
author_rex = re.compile('\s+and\s+')
72 77
rembraces_rex = re.compile('[{}]')
73 78
capitalize_rex = re.compile('({[^}]*})')
74 79

	
75 80
# used by bibtexkeywords(data)
76 81
keywords_rex = re.compile('[,;]')
77 82

	
78 83
# used by concat_line(line)
79 84
concatsplit_rex = re.compile('\s*#\s*')
80 85

	
81 86
# split on {, }, or " in verify_out_of_braces
82 87
delimiter_rex = re.compile('([{}"])',re.I)
83 88

	
84 89
field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
85 90
data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
86 91

	
87 92
url_rex = re.compile('\\\url\{([^}]*)\}')
88 93

	
89 94
#
90 95
# styles for html formatting
91 96
#
92 97
divstyle = 'margin-top: -4ex; margin-left: 8em;'
93 98

	
94 99
#
95 100
# return the string parameter without braces
96 101
#
97 102
def transformurls(str):
98 103
    return url_rex.sub(r'<a href="\1">\1</a>', str)
99 104

	
100 105
#
101 106
# return the string parameter without braces
102 107
#
103 108
def removebraces(str):
104 109
    return rembraces_rex.sub('', str)
105 110

	
106 111
#
107 112
# latex-specific replacements
108 113
# (do this after braces were removed)
109 114
#
110 115
def latexreplacements(line):
111 116
    line = string.replace(line, '~', '&nbsp;')
112 117
    line = string.replace(line, '\\\'a', '&aacute;')
113 118
    line = string.replace(line, '\\"a', '&auml;')
114 119
    line = string.replace(line, '\\\'e', '&eacute;')
115 120
    line = string.replace(line, '\\"e', '&euml;')
116 121
    line = string.replace(line, '\\\'i', '&iacute;')
117 122
    line = string.replace(line, '\\"i', '&iuml;')
118 123
    line = string.replace(line, '\\\'o', '&oacute;')
119 124
    line = string.replace(line, '\\"o', '&ouml;')
120 125
    line = string.replace(line, '\\\'u', '&uacute;')
121 126
    line = string.replace(line, '\\"u', '&uuml;')
122 127
    line = string.replace(line, '\\H o', '&otilde;')
123 128
    line = string.replace(line, '\\H u', '&uuml;')   # &utilde; does not exist
124 129
    line = string.replace(line, '\\\'A', '&Aacute;')
125 130
    line = string.replace(line, '\\"A', '&Auml;')
126 131
    line = string.replace(line, '\\\'E', '&Eacute;')
127 132
    line = string.replace(line, '\\"E', '&Euml;')
128 133
    line = string.replace(line, '\\\'I', '&Iacute;')
129 134
    line = string.replace(line, '\\"I', '&Iuml;')
130 135
    line = string.replace(line, '\\\'O', '&Oacute;')
131 136
    line = string.replace(line, '\\"O', '&Ouml;')
132 137
    line = string.replace(line, '\\\'U', '&Uacute;')
133 138
    line = string.replace(line, '\\"U', '&Uuml;')
134 139
    line = string.replace(line, '\\H O', '&Otilde;')
135 140
    line = string.replace(line, '\\H U', '&Uuml;')   # &Utilde; does not exist
136 141

	
137 142
    return line
138 143

	
139 144
#
140 145
# copy characters form a string decoding html expressions (&xyz;)
141 146
#
142 147
def copychars(str, ifrom, count):
143 148
    result = ''
144 149
    i = ifrom
145 150
    c = 0
146 151
    html_spec = False
147 152
    while (i < len(str)) and (c < count):
148 153
        if str[i] == '&':
149 154
            html_spec = True;
150 155
            if i+1 < len(str):
151 156
                result += str[i+1]
152 157
            c += 1
153 158
            i += 2
154 159
        else:
155 160
            if not html_spec:
156 161
                if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
157 162
                   ((str[i] >= 'a') and (str[i] <= 'z')):
158 163
                    result += str[i]
159 164
                    c += 1
160 165
            elif str[i] == ';':
161 166
                html_spec = False;
162 167
            i += 1
163 168
    
164 169
    return result
165 170

	
166 171

	
167 172
# 
168 173
# Handle a list of authors (separated by 'and').
169 174
# It gives back an array of the follwing values:
170 175
#  - num: the number of authors,
171 176
#  - list: the list of the author names,
172 177
#  - text: the bibtex text (separated by commas and/or 'and')
173 178
#  - abbrev: abbreviation that can be used for indicate the
174 179
#    bibliography entries
175 180
#
176 181
def bibtexauthor(data):
177 182
    result = {}
178 183
    bibtex = ''
179 184
    result['list'] = author_rex.split(data)
180 185
    result['num'] = len(result['list'])
181 186
    for i, author in enumerate(result['list']):
182 187
        # general transformations
183 188
        author = latexreplacements(removebraces(author.strip()))
184 189
        # transform "Xyz, A. B." to "A. B. Xyz"
185 190
        pos = author.find(',')
186 191
        if pos != -1:
187 192
            author = author[pos+1:].strip() + ' ' + author[:pos].strip()
188 193
        result['list'][i] = author
189 194
        bibtex += author + '#'
190 195
    bibtex = bibtex[:-1]
191 196
    if result['num'] > 1:
192 197
        ix = bibtex.rfind('#')
193 198
        if result['num'] == 2:
194 199
            bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
195 200
        else:
196 201
            bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
197 202
    bibtex = bibtex.replace('#', ', ')
198 203
    result['text'] = bibtex
199 204
    
0 comments (0 inline)