COIN-OR::LEMON - Graph Library

source: lemon-main/scripts/bib2dox.py @ 984:fcb6ad1e67d0

Last change on this file since 984:fcb6ad1e67d0 was 836:c841ae1aca29, checked in by Peter Kovacs <kpeter@…>, 15 years ago

Modify the header of scripts/bib2dox.py (#184)

  • Property exe set to *
File size: 25.7 KB
RevLine 
[836]1#! /usr/bin/env python
[743]2"""
3  BibTeX to Doxygen converter
4  Usage: python bib2dox.py bibfile.bib > bibfile.dox
5
[836]6  This file is a part of LEMON, a generic C++ optimization library.
7
8  **********************************************************************
9
[743]10  This code is the modification of the BibTeX to XML converter
[836]11  by Vidar Bronken Gundersen et al.
12  See the original copyright notices below.
[743]13
14  **********************************************************************
15
16  Decoder for bibliographic data, BibTeX
17  Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
18
19  v.8
20  (c)2002-06-23 Vidar Bronken Gundersen
21  http://bibtexml.sf.net/
22  Reuse approved as long as this notification is kept.
23  Licence: GPL.
24
25  Contributions/thanks to:
26  Egon Willighagen, http://sf.net/projects/jreferences/
27  Richard Mahoney (for providing a test case)
28
29  Editted by Sara Sprenkle to be more robust and handle more bibtex features.
30  (c) 2003-01-15
31
32  1.  Changed bibtex: tags to bibxml: tags.
33  2.  Use xmlns:bibxml="http://bibtexml.sf.net/"
34  3.  Allow spaces between @type and first {
35  4.  "author" fields with multiple authors split by " and "
36      are put in separate xml "bibxml:author" tags.
37  5.  Option for Titles: words are capitalized
38      only if first letter in title or capitalized inside braces
39  6.  Removes braces from within field values
40  7.  Ignores comments in bibtex file (including @comment{ or % )
41  8.  Replaces some special latex tags, e.g., replaces ~ with '&#160;'
42  9.  Handles bibtex @string abbreviations
43        --> includes bibtex's default abbreviations for months
44        --> does concatenation of abbr # " more " and " more " # abbr
45  10. Handles @type( ... ) or @type{ ... }
46  11. The keywords field is split on , or ; and put into separate xml
47      "bibxml:keywords" tags
48  12. Ignores @preamble
49
50  Known Limitations
51  1.  Does not transform Latex encoding like math mode and special
52      latex symbols.
53  2.  Does not parse author fields into first and last names.
54      E.g., It does not do anything special to an author whose name is
55      in the form LAST_NAME, FIRST_NAME
56      In "author" tag, will show up as
57      <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
58  3.  Does not handle "crossref" fields other than to print
59      <bibxml:crossref>...</bibxml:crossref>
60  4.  Does not inform user of the input's format errors.  You just won't
61      be able to transform the file later with XSL
62
63  You will have to manually edit the XML output if you need to handle
64  these (and unknown) limitations.
65
66"""
67
68import string, re
69
70# set of valid name characters
71valid_name_chars = '[\w\-:]'
72
73#
74# define global regular expression variables
75#
76author_rex = re.compile('\s+and\s+')
77rembraces_rex = re.compile('[{}]')
[754]78capitalize_rex = re.compile('({[^}]*})')
[743]79
80# used by bibtexkeywords(data)
81keywords_rex = re.compile('[,;]')
82
83# used by concat_line(line)
84concatsplit_rex = re.compile('\s*#\s*')
85
86# split on {, }, or " in verify_out_of_braces
87delimiter_rex = re.compile('([{}"])',re.I)
88
89field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
90data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
91
92url_rex = re.compile('\\\url\{([^}]*)\}')
93
[745]94#
95# styles for html formatting
96#
97divstyle = 'margin-top: -4ex; margin-left: 8em;'
[743]98
99#
100# return the string parameter without braces
101#
102def transformurls(str):
103    return url_rex.sub(r'<a href="\1">\1</a>', str)
104
105#
106# return the string parameter without braces
107#
108def removebraces(str):
109    return rembraces_rex.sub('', str)
110
111#
112# latex-specific replacements
113# (do this after braces were removed)
114#
115def latexreplacements(line):
116    line = string.replace(line, '~', '&nbsp;')
117    line = string.replace(line, '\\\'a', '&aacute;')
118    line = string.replace(line, '\\"a', '&auml;')
119    line = string.replace(line, '\\\'e', '&eacute;')
120    line = string.replace(line, '\\"e', '&euml;')
121    line = string.replace(line, '\\\'i', '&iacute;')
122    line = string.replace(line, '\\"i', '&iuml;')
123    line = string.replace(line, '\\\'o', '&oacute;')
124    line = string.replace(line, '\\"o', '&ouml;')
125    line = string.replace(line, '\\\'u', '&uacute;')
126    line = string.replace(line, '\\"u', '&uuml;')
127    line = string.replace(line, '\\H o', '&otilde;')
128    line = string.replace(line, '\\H u', '&uuml;')   # &utilde; does not exist
129    line = string.replace(line, '\\\'A', '&Aacute;')
130    line = string.replace(line, '\\"A', '&Auml;')
131    line = string.replace(line, '\\\'E', '&Eacute;')
132    line = string.replace(line, '\\"E', '&Euml;')
133    line = string.replace(line, '\\\'I', '&Iacute;')
134    line = string.replace(line, '\\"I', '&Iuml;')
135    line = string.replace(line, '\\\'O', '&Oacute;')
136    line = string.replace(line, '\\"O', '&Ouml;')
137    line = string.replace(line, '\\\'U', '&Uacute;')
138    line = string.replace(line, '\\"U', '&Uuml;')
139    line = string.replace(line, '\\H O', '&Otilde;')
140    line = string.replace(line, '\\H U', '&Uuml;')   # &Utilde; does not exist
141
142    return line
143
144#
145# copy characters form a string decoding html expressions (&xyz;)
146#
147def copychars(str, ifrom, count):
148    result = ''
149    i = ifrom
150    c = 0
151    html_spec = False
152    while (i < len(str)) and (c < count):
153        if str[i] == '&':
154            html_spec = True;
155            if i+1 < len(str):
156                result += str[i+1]
157            c += 1
158            i += 2
159        else:
160            if not html_spec:
161                if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
162                   ((str[i] >= 'a') and (str[i] <= 'z')):
163                    result += str[i]
164                    c += 1
165            elif str[i] == ';':
166                html_spec = False;
167            i += 1
168   
169    return result
170
171
172#
173# Handle a list of authors (separated by 'and').
174# It gives back an array of the follwing values:
175#  - num: the number of authors,
176#  - list: the list of the author names,
177#  - text: the bibtex text (separated by commas and/or 'and')
178#  - abbrev: abbreviation that can be used for indicate the
179#    bibliography entries
180#
181def bibtexauthor(data):
182    result = {}
183    bibtex = ''
184    result['list'] = author_rex.split(data)
185    result['num'] = len(result['list'])
186    for i, author in enumerate(result['list']):
187        # general transformations
188        author = latexreplacements(removebraces(author.strip()))
189        # transform "Xyz, A. B." to "A. B. Xyz"
190        pos = author.find(',')
191        if pos != -1:
192            author = author[pos+1:].strip() + ' ' + author[:pos].strip()
193        result['list'][i] = author
194        bibtex += author + '#'
195    bibtex = bibtex[:-1]
196    if result['num'] > 1:
197        ix = bibtex.rfind('#')
198        if result['num'] == 2:
199            bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
200        else:
201            bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
202    bibtex = bibtex.replace('#', ', ')
203    result['text'] = bibtex
204   
205    result['abbrev'] = ''
206    for author in result['list']:
207        pos = author.rfind(' ') + 1
208        count = 1
209        if result['num'] == 1:
210            count = 3
211        result['abbrev'] += copychars(author, pos, count)
212
213    return result
214
215
216#
217# data = title string
218# @return the capitalized title (first letter is capitalized), rest are capitalized
219# only if capitalized inside braces
220#
221def capitalizetitle(data):
222    title_list = capitalize_rex.split(data)
223    title = ''
224    count = 0
225    for phrase in title_list:
226         check = string.lstrip(phrase)
227
228         # keep phrase's capitalization the same
229         if check.find('{') == 0:
230              title += removebraces(phrase)
231         else:
232         # first word --> capitalize first letter (after spaces)
233              if count == 0:
234                  title += check.capitalize()
235              else:
236                  title += phrase.lower()
237         count = count + 1
238
239    return title
240
241
242#
243# @return the bibtex for the title
244# @param data --> title string
245# braces are removed from title
246#
247def bibtextitle(data, entrytype):
248    if entrytype in ('book', 'inbook'):
249        title = removebraces(data.strip())
250    else:
251        title = removebraces(capitalizetitle(data.strip()))
252    bibtex = title
253    return bibtex
254
255
256#
257# function to compare entry lists
258#
259def entry_cmp(x, y):
260    return cmp(x[0], y[0])
261
262
263#
264# print the XML for the transformed "filecont_source"
265#
266def bibtexdecoder(filecont_source):
267    filecont = []
268    file = []
269   
270    # want @<alphanumeric chars><spaces>{<spaces><any chars>,
271    pubtype_rex = re.compile('@(\w*)\s*{\s*(.*),')
272    endtype_rex = re.compile('}\s*$')
273    endtag_rex = re.compile('^\s*}\s*$')
274
275    bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
276    bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')
277
278    quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
279    quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')
280
281    for line in filecont_source:
282        line = line[:-1]
283
284        # encode character entities
285        line = string.replace(line, '&', '&amp;')
286        line = string.replace(line, '<', '&lt;')
287        line = string.replace(line, '>', '&gt;')
288
289        # start entry: publication type (store for later use)
290        if pubtype_rex.match(line):
291        # want @<alphanumeric chars><spaces>{<spaces><any chars>,
292            entrycont = {}
293            entry = []
294            entrytype = pubtype_rex.sub('\g<1>',line)
295            entrytype = string.lower(entrytype)
[745]296            entryid   = pubtype_rex.sub('\g<2>', line)
[743]297
298        # end entry if just a }
299        elif endtype_rex.match(line):
300            # generate doxygen code for the entry
301
302            # enty type related formattings
303            if entrytype in ('book', 'inbook'):
304                entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
305                if not entrycont.has_key('author'):
306                    entrycont['author'] = entrycont['editor']
307                    entrycont['author']['text'] += ', editors'
308            elif entrytype == 'article':
309                entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
310            elif entrytype in ('inproceedings', 'incollection', 'conference'):
311                entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
312            elif entrytype == 'techreport':
313                if not entrycont.has_key('type'):
314                    entrycont['type'] = 'Technical report'
315            elif entrytype == 'mastersthesis':
316                entrycont['type'] = 'Master\'s thesis'
317            elif entrytype == 'phdthesis':
318                entrycont['type'] = 'PhD thesis'
319
320            for eline in entrycont:
321                if eline != '':
322                    eline = latexreplacements(eline)
323
324            if entrycont.has_key('pages') and (entrycont['pages'] != ''):
325                entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
326
327            if entrycont.has_key('author') and (entrycont['author'] != ''):
328                entry.append(entrycont['author']['text'] + '.')
329            if entrycont.has_key('title') and (entrycont['title'] != ''):
330                entry.append(entrycont['title'] + '.')
331            if entrycont.has_key('journal') and (entrycont['journal'] != ''):
332                entry.append(entrycont['journal'] + ',')
333            if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
334                entry.append('In ' + entrycont['booktitle'] + ',')
335            if entrycont.has_key('type') and (entrycont['type'] != ''):
336                eline = entrycont['type']
337                if entrycont.has_key('number') and (entrycont['number'] != ''):
338                    eline += ' ' + entrycont['number']
339                eline += ','
340                entry.append(eline)
341            if entrycont.has_key('institution') and (entrycont['institution'] != ''):
342                entry.append(entrycont['institution'] + ',')
343            if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
344                entry.append(entrycont['publisher'] + ',')
345            if entrycont.has_key('school') and (entrycont['school'] != ''):
346                entry.append(entrycont['school'] + ',')
347            if entrycont.has_key('address') and (entrycont['address'] != ''):
348                entry.append(entrycont['address'] + ',')
349            if entrycont.has_key('edition') and (entrycont['edition'] != ''):
350                entry.append(entrycont['edition'] + ' edition,')
351            if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
352                entry.append(entrycont['howpublished'] + ',')
353            if entrycont.has_key('volume') and (entrycont['volume'] != ''):
354                eline = entrycont['volume'];
355                if entrycont.has_key('number') and (entrycont['number'] != ''):
356                    eline += '(' + entrycont['number'] + ')'
357                if entrycont.has_key('pages') and (entrycont['pages'] != ''):
358                    eline += ':' + entrycont['pages']
359                eline += ','
360                entry.append(eline)
361            else:
362                if entrycont.has_key('pages') and (entrycont['pages'] != ''):
363                    entry.append('pages ' + entrycont['pages'] + ',')
364            if entrycont.has_key('year') and (entrycont['year'] != ''):
365                if entrycont.has_key('month') and (entrycont['month'] != ''):
366                    entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
367                else:
368                    entry.append(entrycont['year'] + '.')
369            if entrycont.has_key('note') and (entrycont['note'] != ''):
370                entry.append(entrycont['note'] + '.')
[754]371            if entrycont.has_key('url') and (entrycont['url'] != ''):
372                entry.append(entrycont['url'] + '.')
[743]373
374            # generate keys for sorting and for the output
375            sortkey = ''
376            bibkey = ''
377            if entrycont.has_key('author'):
378                for author in entrycont['author']['list']:
379                    sortkey += copychars(author, author.rfind(' ')+1, len(author))
380                bibkey = entrycont['author']['abbrev']
381            else:
382                bibkey = 'x'
383            if entrycont.has_key('year'):
384                sortkey += entrycont['year']
385                bibkey += entrycont['year'][-2:]
386            if entrycont.has_key('title'):
387                sortkey += entrycont['title']
388            if entrycont.has_key('key'):
389                sortkey = entrycont['key'] + sortkey
390                bibkey = entrycont['key']
391            entry.insert(0, sortkey)
392            entry.insert(1, bibkey)
[745]393            entry.insert(2, entryid)
[743]394           
395            # add the entry to the file contents
396            filecont.append(entry)
397
398        else:
399            # field, publication info
400            field = ''
401            data = ''
402           
403            # field = {data} entries
404            if bracedata_rex.match(line):
405                field = bracefield_rex.sub('\g<1>', line)
406                field = string.lower(field)
407                data =  bracedata_rex.sub('\g<2>', line)
408
409            # field = "data" entries
410            elif quotedata_rex.match(line):
411                field = quotefield_rex.sub('\g<1>', line)
412                field = string.lower(field)
413                data =  quotedata_rex.sub('\g<2>', line)
414
415            # field = data entries
416            elif data_rex.match(line):
417                field = field_rex.sub('\g<1>', line)
418                field = string.lower(field)
419                data =  data_rex.sub('\g<2>', line)
[754]420
421            if field == 'url':
422                data = '\\url{' + data.strip() + '}'
[743]423           
424            if field in ('author', 'editor'):
425                entrycont[field] = bibtexauthor(data)
426                line = ''
427            elif field == 'title':
428                line = bibtextitle(data, entrytype)
429            elif field != '':
430                line = removebraces(transformurls(data.strip()))
431
432            if line != '':
433                line = latexreplacements(line)
434                entrycont[field] = line
435
436
437    # sort entries
438    filecont.sort(entry_cmp)
439   
440    # count the bibtex keys
441    keytable = {}
442    counttable = {}
443    for entry in filecont:
444        bibkey = entry[1]
445        if not keytable.has_key(bibkey):
446            keytable[bibkey] = 1
447        else:
448            keytable[bibkey] += 1
449
450    for bibkey in keytable.keys():
451        counttable[bibkey] = 0
452   
453    # generate output
454    for entry in filecont:
455        # generate output key form the bibtex key
456        bibkey = entry[1]
[745]457        entryid = entry[2]
[743]458        if keytable[bibkey] == 1:
459            outkey = bibkey
460        else:
461            outkey = bibkey + chr(97 + counttable[bibkey])
462        counttable[bibkey] += 1
463       
464        # append the entry code to the output
[745]465        file.append('\\section ' + entryid + ' [' + outkey + ']')
466        file.append('<div style="' + divstyle + '">')
467        for line in entry[3:]:
[743]468            file.append(line)
[745]469        file.append('</div>')
[743]470        file.append('')
471
472    return file
473
474
475#
476# return 1 iff abbr is in line but not inside braces or quotes
477# assumes that abbr appears only once on the line (out of braces and quotes)
478#
479def verify_out_of_braces(line, abbr):
480
481    phrase_split = delimiter_rex.split(line)
482
483    abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
484
485    open_brace = 0
486    open_quote = 0
487
488    for phrase in phrase_split:
489        if phrase == "{":
490            open_brace = open_brace + 1
491        elif phrase == "}":
492            open_brace = open_brace - 1
493        elif phrase == '"':
494            if open_quote == 1:
495                open_quote = 0
496            else:
497                open_quote = 1
498        elif abbr_rex.search(phrase):
499            if open_brace == 0 and open_quote == 0:
500                return 1
501
502    return 0
503
504
505#
506# a line in the form phrase1 # phrase2 # ... # phrasen
507# is returned as phrase1 phrase2 ... phrasen
508# with the correct punctuation
509# Bug: Doesn't always work with multiple abbreviations plugged in
510#
511def concat_line(line):
512    # only look at part after equals
513    field = field_rex.sub('\g<1>',line)
514    rest = field_rex.sub('\g<2>',line)
515
516    concat_line = field + ' ='
517
518    pound_split = concatsplit_rex.split(rest)
519
520    phrase_count = 0
521    length = len(pound_split)
522
523    for phrase in pound_split:
524        phrase = phrase.strip()
525        if phrase_count != 0:
526            if phrase.startswith('"') or phrase.startswith('{'):
527                phrase = phrase[1:]
528        elif phrase.startswith('"'):
529            phrase = phrase.replace('"','{',1)
530
531        if phrase_count != length-1:
532            if phrase.endswith('"') or phrase.endswith('}'):
533                phrase = phrase[:-1]
534        else:
535            if phrase.endswith('"'):
536                phrase = phrase[:-1]
537                phrase = phrase + "}"
538            elif phrase.endswith('",'):
539                phrase = phrase[:-2]
540                phrase = phrase + "},"
541
542        # if phrase did have \#, add the \# back
543        if phrase.endswith('\\'):
544            phrase = phrase + "#"
545        concat_line = concat_line + ' ' + phrase
546
547        phrase_count = phrase_count + 1
548
549    return concat_line
550
551
552#
553# substitute abbreviations into filecont
554# @param filecont_source - string of data from file
555#
556def bibtex_replace_abbreviations(filecont_source):
557    filecont = filecont_source.splitlines()
558
559    #  These are defined in bibtex, so we'll define them too
560    abbr_list = ['jan','feb','mar','apr','may','jun',
561                 'jul','aug','sep','oct','nov','dec']
562    value_list = ['January','February','March','April',
563                  'May','June','July','August','September',
564                  'October','November','December']
565
566    abbr_rex = []
567    total_abbr_count = 0
568
569    front = '\\b'
570    back = '(,?)\\b'
571
572    for x in abbr_list:
573        abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
574        total_abbr_count = total_abbr_count + 1
575
576
577    abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
578                             re.I)
579
580    comment_rex = re.compile('@comment\s*{',re.I)
581    preamble_rex = re.compile('@preamble\s*{',re.I)
582
583    waiting_for_end_string = 0
584    i = 0
585    filecont2 = ''
586
587    for line in filecont:
588        if line == ' ' or line == '':
589            continue
590
591        if waiting_for_end_string:
592            if re.search('}',line):
593                waiting_for_end_string = 0
594                continue
595
596        if abbrdef_rex.search(line):
597            abbr = abbrdef_rex.sub('\g<1>', line)
598
599            if abbr_list.count(abbr) == 0:
600                val = abbrdef_rex.sub('\g<2>', line)
601                abbr_list.append(abbr)
602                value_list.append(string.strip(val))
603                abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
604                total_abbr_count = total_abbr_count + 1
605            waiting_for_end_string = 1
606            continue
607
608        if comment_rex.search(line):
609            waiting_for_end_string = 1
610            continue
611
612        if preamble_rex.search(line):
613            waiting_for_end_string = 1
614            continue
615
616
617        # replace subsequent abbreviations with the value
618        abbr_count = 0
619
620        for x in abbr_list:
621
622            if abbr_rex[abbr_count].search(line):
623                if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
624                    line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
625                # Check for # concatenations
626                if concatsplit_rex.search(line):
627                    line = concat_line(line)
628            abbr_count = abbr_count + 1
629
630
631        filecont2 = filecont2 + line + '\n'
632        i = i+1
633
634
635    # Do one final pass over file
636
637    # make sure that didn't end up with {" or }" after the substitution
638    filecont2 = filecont2.replace('{"','{{')
639    filecont2 = filecont2.replace('"}','}}')
640
641    afterquotevalue_rex = re.compile('"\s*,\s*')
642    afterbrace_rex = re.compile('"\s*}')
643    afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
644
645    # add new lines to data that changed because of abbreviation substitutions
646    filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
647    filecont2 = afterbrace_rex.sub('"\n}', filecont2)
648    filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
649
650    return filecont2
651
652#
653# convert @type( ... ) to @type{ ... }
654#
655def no_outer_parens(filecont):
656
657    # do checking for open parens
658    # will convert to braces
659    paren_split = re.split('([(){}])',filecont)
660
661    open_paren_count = 0
662    open_type = 0
663    look_next = 0
664
665    # rebuild filecont
666    filecont = ''
667
668    at_rex = re.compile('@\w*')
669
670    for phrase in paren_split:
671        if look_next == 1:
672            if phrase == '(':
673                phrase = '{'
674                open_paren_count = open_paren_count + 1
675            else:
676                open_type = 0
677            look_next = 0
678
679        if phrase == '(':
680            open_paren_count = open_paren_count + 1
681
682        elif phrase == ')':
683            open_paren_count = open_paren_count - 1
684            if open_type == 1 and open_paren_count == 0:
685                phrase = '}'
686                open_type = 0
687
688        elif at_rex.search( phrase ):
689            open_type = 1
690            look_next = 1
691
692        filecont = filecont + phrase
693
694    return filecont
695
696
697#
698# make all whitespace into just one space
699# format the bibtex file into a usable form.
700#
701def bibtexwasher(filecont_source):
702
703    space_rex = re.compile('\s+')
704    comment_rex = re.compile('\s*%')
705
706    filecont = []
707
708    # remove trailing and excessive whitespace
709    # ignore comments
710    for line in filecont_source:
711        line = string.strip(line)
712        line = space_rex.sub(' ', line)
713        # ignore comments
714        if not comment_rex.match(line) and line != '':
715            filecont.append(' '+ line)
716
717    filecont = string.join(filecont, '')
718
719    # the file is in one long string
720
721    filecont = no_outer_parens(filecont)
722
723    #
724    # split lines according to preferred syntax scheme
725    #
726    filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
727
728    # add new lines after commas that are after values
729    filecont = re.sub('"\s*,', '",\n', filecont)
730    filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
731    filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
732                          '\n\n\g<1>\g<2>,\n', filecont)
733
734    # add new lines after }
735    filecont = re.sub('"\s*}','"\n}\n', filecont)
736    filecont = re.sub('}\s*,','},\n', filecont)
737
738
739    filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
740
741    # character encoding, reserved latex characters
742    filecont = re.sub('{\\\&}', '&', filecont)
743    filecont = re.sub('\\\&', '&', filecont)
744
745    # do checking for open braces to get format correct
746    open_brace_count = 0
747    brace_split = re.split('([{}])',filecont)
748
749    # rebuild filecont
750    filecont = ''
751
752    for phrase in brace_split:
753        if phrase == '{':
754            open_brace_count = open_brace_count + 1
755        elif phrase == '}':
756            open_brace_count = open_brace_count - 1
757            if open_brace_count == 0:
758                filecont = filecont + '\n'
759
760        filecont = filecont + phrase
761
762    filecont2 = bibtex_replace_abbreviations(filecont)
763
764    # gather
765    filecont = filecont2.splitlines()
766    i=0
767    j=0         # count the number of blank lines
768    for line in filecont:
769        # ignore blank lines
770        if line == '' or line == ' ':
771            j = j+1
772            continue
773        filecont[i] = line + '\n'
774        i = i+1
775
776    # get rid of the extra stuff at the end of the array
777    # (The extra stuff are duplicates that are in the array because
778    # blank lines were removed.)
779    length = len( filecont)
780    filecont[length-j:length] = []
781
782    return filecont
783
784
785def filehandler(filepath):
786    try:
787        fd = open(filepath, 'r')
788        filecont_source = fd.readlines()
789        fd.close()
790    except:
791        print 'Could not open file:', filepath
792    washeddata = bibtexwasher(filecont_source)
793    outdata = bibtexdecoder(washeddata)
794    print '/**'
795    print '\page references References'
796    print
797    for line in outdata:
798        print line
799    print '*/'
800
801
802# main program
803
804def main():
805    import sys
806    if sys.argv[1:]:
807        filepath = sys.argv[1]
808    else:
809        print "No input file"
810        sys.exit()
811    filehandler(filepath)
812
813if __name__ == "__main__": main()
814
815
816# end python script
Note: See TracBrowser for help on using the repository browser.