LEMON/LEMON-main Changeset - r836:c841ae1aca29

1

#!/usr/bin/env /usr/local/Python/bin/python2.1

1

#! /usr/bin/env python

2

"""

3

  BibTeX to Doxygen converter

4

  Usage: python bib2dox.py bibfile.bib > bibfile.dox

5

6

  This file is a part of LEMON, a generic C++ optimization library.

7

8

  **********************************************************************

9

6

10

  This code is the modification of the BibTeX to XML converter

7

  by Vidar Bronken Gundersen et al. See the original copyright notices below.

11

  by Vidar Bronken Gundersen et al.

12

  See the original copyright notices below.

  **********************************************************************

  Decoder for bibliographic data, BibTeX

12

17

  Usage: python bibtex2xml.py bibfile.bib > bibfile.xml

v.8

15

20

  (c)2002-06-23 Vidar Bronken Gundersen

16

21

  http://bibtexml.sf.net/

17

22

  Reuse approved as long as this notification is kept.

18

23

  Licence: GPL.

  Contributions/thanks to:

21

26

  Egon Willighagen, http://sf.net/projects/jreferences/

22

27

  Richard Mahoney (for providing a test case)

  Editted by Sara Sprenkle to be more robust and handle more bibtex features.

25

30

  (c) 2003-01-15

  1.  Changed bibtex: tags to bibxml: tags.

28

33

  2.  Use xmlns:bibxml="http://bibtexml.sf.net/"

29

34

  3.  Allow spaces between @type and first {

30

35

  4.  "author" fields with multiple authors split by " and "

31

36

      are put in separate xml "bibxml:author" tags.

32

37

  5.  Option for Titles: words are capitalized

33

38

      only if first letter in title or capitalized inside braces

34

39

  6.  Removes braces from within field values

35

40

  7.  Ignores comments in bibtex file (including @comment{ or % )

36

41

  8.  Replaces some special latex tags, e.g., replaces ~ with '&#160;'

37

42

  9.  Handles bibtex @string abbreviations

38

43

        --> includes bibtex's default abbreviations for months

39

44

        --> does concatenation of abbr # " more " and " more " # abbr

40

45

  10. Handles @type( ... ) or @type{ ... }

41

46

  11. The keywords field is split on , or ; and put into separate xml

42

47

      "bibxml:keywords" tags

43

48

  12. Ignores @preamble

  Known Limitations

46

51

  1.  Does not transform Latex encoding like math mode and special

47

52

      latex symbols.

48

53

  2.  Does not parse author fields into first and last names.

49

54

      E.g., It does not do anything special to an author whose name is

50

55

      in the form LAST_NAME, FIRST_NAME

51

56

      In "author" tag, will show up as

52

57

      <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>

53

58

  3.  Does not handle "crossref" fields other than to print

54

59

      <bibxml:crossref>...</bibxml:crossref>

55

60

  4.  Does not inform user of the input's format errors.  You just won't

56

61

      be able to transform the file later with XSL

  You will have to manually edit the XML output if you need to handle

59

64

  these (and unknown) limitations.

"""

import string, re

# set of valid name characters

66

71

valid_name_chars = '[\w\-:]'

# define global regular expression variables

author_rex = re.compile('\s+and\s+')

72

77

rembraces_rex = re.compile('[{}]')

73

78

capitalize_rex = re.compile('({[^}]*})')

# used by bibtexkeywords(data)

76

81

keywords_rex = re.compile('[,;]')

# used by concat_line(line)

79

84

concatsplit_rex = re.compile('\s*#\s*')

# split on {, }, or " in verify_out_of_braces

82

87

delimiter_rex = re.compile('([{}"])',re.I)

field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')

85

90

data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')

url_rex = re.compile('\\\url\{([^}]*)\}')

# styles for html formatting

divstyle = 'margin-top: -4ex; margin-left: 8em;'

# return the string parameter without braces

def transformurls(str):

98

103

    return url_rex.sub(r'<a href="\1">\1</a>', str)

# return the string parameter without braces

def removebraces(str):

104

109

    return rembraces_rex.sub('', str)

# latex-specific replacements

108

113

# (do this after braces were removed)

def latexreplacements(line):

111

116

    line = string.replace(line, '~', '&nbsp;')

112

117

    line = string.replace(line, '\\\'a', '&aacute;')

113

118

    line = string.replace(line, '\\"a', '&auml;')

114

119

    line = string.replace(line, '\\\'e', '&eacute;')

115

120

    line = string.replace(line, '\\"e', '&euml;')

116

121

    line = string.replace(line, '\\\'i', '&iacute;')

117

122

    line = string.replace(line, '\\"i', '&iuml;')

118

123

    line = string.replace(line, '\\\'o', '&oacute;')

119

124

    line = string.replace(line, '\\"o', '&ouml;')

120

125

    line = string.replace(line, '\\\'u', '&uacute;')

121

126

    line = string.replace(line, '\\"u', '&uuml;')

122

127

    line = string.replace(line, '\\H o', '&otilde;')

123

128

    line = string.replace(line, '\\H u', '&uuml;')   # &utilde; does not exist

124

129

    line = string.replace(line, '\\\'A', '&Aacute;')

125

130

    line = string.replace(line, '\\"A', '&Auml;')

126

131

    line = string.replace(line, '\\\'E', '&Eacute;')

127

132

    line = string.replace(line, '\\"E', '&Euml;')

128

133

    line = string.replace(line, '\\\'I', '&Iacute;')

129

134

    line = string.replace(line, '\\"I', '&Iuml;')

130

135

    line = string.replace(line, '\\\'O', '&Oacute;')

131

136

    line = string.replace(line, '\\"O', '&Ouml;')

132

137

    line = string.replace(line, '\\\'U', '&Uacute;')

133

138

    line = string.replace(line, '\\"U', '&Uuml;')

134

139

    line = string.replace(line, '\\H O', '&Otilde;')

135

140

    line = string.replace(line, '\\H U', '&Uuml;')   # &Utilde; does not exist

    return line

# copy characters form a string decoding html expressions (&xyz;)

def copychars(str, ifrom, count):

143

148

    result = ''

144

149

    i = ifrom

145

150

    c = 0

146

151

    html_spec = False

147

152

    while (i < len(str)) and (c < count):

148

153

        if str[i] == '&':

149

154

            html_spec = True;

150

155

            if i+1 < len(str):

151

156

                result += str[i+1]

152

157

            c += 1

153

158

            i += 2

154

159

        else:

155

160

            if not html_spec:

156

161

                if ((str[i] >= 'A') and (str[i] <= 'Z')) or \

157

162

                   ((str[i] >= 'a') and (str[i] <= 'z')):

158

163

                    result += str[i]

159

164

                    c += 1

160

165

            elif str[i] == ';':

161

166

                html_spec = False;

162

167

            i += 1

    return result

# Handle a list of authors (separated by 'and').

169

174

# It gives back an array of the follwing values:

170

175

#  - num: the number of authors,

171

176

#  - list: the list of the author names,

172

177

#  - text: the bibtex text (separated by commas and/or 'and')

173

178

#  - abbrev: abbreviation that can be used for indicate the

174

179

#    bibliography entries

def bibtexauthor(data):

177

182

    result = {}

178

183

    bibtex = ''

179

184

    result['list'] = author_rex.split(data)

180

185

    result['num'] = len(result['list'])

181

186

    for i, author in enumerate(result['list']):

182

187

        # general transformations

183

188

        author = latexreplacements(removebraces(author.strip()))

184

189

        # transform "Xyz, A. B." to "A. B. Xyz"

185

190

        pos = author.find(',')

186

191

        if pos != -1:

187

192

            author = author[pos+1:].strip() + ' ' + author[:pos].strip()

188

193

        result['list'][i] = author

189

194

        bibtex += author + '#'

190

195

    bibtex = bibtex[:-1]

191

196

    if result['num'] > 1:

192

197

        ix = bibtex.rfind('#')

193

198

        if result['num'] == 2:

194

199

            bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]

195

200

        else:

196

201

            bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]

197

202

    bibtex = bibtex.replace('#', ', ')

198

203

    result['text'] = bibtex

199

204

RhodeCode

Login to your account