COIN-OR::LEMON - Graph Library

source: lemon-main/scripts/bib2dox.py @ 786:e20173729589

Last change on this file since 786:e20173729589 was 745:68792fb2870f, checked in by Peter Kovacs <kpeter@…>, 15 years ago

Improve bib2dox.py using \section for entiries (#184)

File size: 25.4 KB
Line 
1#!/usr/bin/env /usr/local/Python/bin/python2.1
2"""
3  BibTeX to Doxygen converter
4  Usage: python bib2dox.py bibfile.bib > bibfile.dox
5
6  This code is the modification of the BibTeX to XML converter
7  by Vidar Bronken Gundersen et al. See the original copyright notices below.
8
9  **********************************************************************
10
11  Decoder for bibliographic data, BibTeX
12  Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
13
14  v.8
15  (c)2002-06-23 Vidar Bronken Gundersen
16  http://bibtexml.sf.net/
17  Reuse approved as long as this notification is kept.
18  Licence: GPL.
19
20  Contributions/thanks to:
21  Egon Willighagen, http://sf.net/projects/jreferences/
22  Richard Mahoney (for providing a test case)
23
24  Editted by Sara Sprenkle to be more robust and handle more bibtex features.
25  (c) 2003-01-15
26
27  1.  Changed bibtex: tags to bibxml: tags.
28  2.  Use xmlns:bibxml="http://bibtexml.sf.net/"
29  3.  Allow spaces between @type and first {
30  4.  "author" fields with multiple authors split by " and "
31      are put in separate xml "bibxml:author" tags.
32  5.  Option for Titles: words are capitalized
33      only if first letter in title or capitalized inside braces
34  6.  Removes braces from within field values
35  7.  Ignores comments in bibtex file (including @comment{ or % )
36  8.  Replaces some special latex tags, e.g., replaces ~ with '&#160;'
37  9.  Handles bibtex @string abbreviations
38        --> includes bibtex's default abbreviations for months
39        --> does concatenation of abbr # " more " and " more " # abbr
40  10. Handles @type( ... ) or @type{ ... }
41  11. The keywords field is split on , or ; and put into separate xml
42      "bibxml:keywords" tags
43  12. Ignores @preamble
44
45  Known Limitations
46  1.  Does not transform Latex encoding like math mode and special
47      latex symbols.
48  2.  Does not parse author fields into first and last names.
49      E.g., It does not do anything special to an author whose name is
50      in the form LAST_NAME, FIRST_NAME
51      In "author" tag, will show up as
52      <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
53  3.  Does not handle "crossref" fields other than to print
54      <bibxml:crossref>...</bibxml:crossref>
55  4.  Does not inform user of the input's format errors.  You just won't
56      be able to transform the file later with XSL
57
58  You will have to manually edit the XML output if you need to handle
59  these (and unknown) limitations.
60
61"""
62
63import string, re
64
65# set of valid name characters
66valid_name_chars = '[\w\-:]'
67
68#
69# define global regular expression variables
70#
71author_rex = re.compile('\s+and\s+')
72rembraces_rex = re.compile('[{}]')
73capitalize_rex = re.compile('({\w*})')
74
75# used by bibtexkeywords(data)
76keywords_rex = re.compile('[,;]')
77
78# used by concat_line(line)
79concatsplit_rex = re.compile('\s*#\s*')
80
81# split on {, }, or " in verify_out_of_braces
82delimiter_rex = re.compile('([{}"])',re.I)
83
84field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
85data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
86
87url_rex = re.compile('\\\url\{([^}]*)\}')
88
89#
90# styles for html formatting
91#
92divstyle = 'margin-top: -4ex; margin-left: 8em;'
93
94#
95# return the string parameter without braces
96#
97def transformurls(str):
98    return url_rex.sub(r'<a href="\1">\1</a>', str)
99
100#
101# return the string parameter without braces
102#
103def removebraces(str):
104    return rembraces_rex.sub('', str)
105
106#
107# latex-specific replacements
108# (do this after braces were removed)
109#
110def latexreplacements(line):
111    line = string.replace(line, '~', '&nbsp;')
112    line = string.replace(line, '\\\'a', '&aacute;')
113    line = string.replace(line, '\\"a', '&auml;')
114    line = string.replace(line, '\\\'e', '&eacute;')
115    line = string.replace(line, '\\"e', '&euml;')
116    line = string.replace(line, '\\\'i', '&iacute;')
117    line = string.replace(line, '\\"i', '&iuml;')
118    line = string.replace(line, '\\\'o', '&oacute;')
119    line = string.replace(line, '\\"o', '&ouml;')
120    line = string.replace(line, '\\\'u', '&uacute;')
121    line = string.replace(line, '\\"u', '&uuml;')
122    line = string.replace(line, '\\H o', '&otilde;')
123    line = string.replace(line, '\\H u', '&uuml;')   # &utilde; does not exist
124    line = string.replace(line, '\\\'A', '&Aacute;')
125    line = string.replace(line, '\\"A', '&Auml;')
126    line = string.replace(line, '\\\'E', '&Eacute;')
127    line = string.replace(line, '\\"E', '&Euml;')
128    line = string.replace(line, '\\\'I', '&Iacute;')
129    line = string.replace(line, '\\"I', '&Iuml;')
130    line = string.replace(line, '\\\'O', '&Oacute;')
131    line = string.replace(line, '\\"O', '&Ouml;')
132    line = string.replace(line, '\\\'U', '&Uacute;')
133    line = string.replace(line, '\\"U', '&Uuml;')
134    line = string.replace(line, '\\H O', '&Otilde;')
135    line = string.replace(line, '\\H U', '&Uuml;')   # &Utilde; does not exist
136
137    return line
138
139#
140# copy characters form a string decoding html expressions (&xyz;)
141#
142def copychars(str, ifrom, count):
143    result = ''
144    i = ifrom
145    c = 0
146    html_spec = False
147    while (i < len(str)) and (c < count):
148        if str[i] == '&':
149            html_spec = True;
150            if i+1 < len(str):
151                result += str[i+1]
152            c += 1
153            i += 2
154        else:
155            if not html_spec:
156                if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
157                   ((str[i] >= 'a') and (str[i] <= 'z')):
158                    result += str[i]
159                    c += 1
160            elif str[i] == ';':
161                html_spec = False;
162            i += 1
163   
164    return result
165
166
167#
168# Handle a list of authors (separated by 'and').
169# It gives back an array of the follwing values:
170#  - num: the number of authors,
171#  - list: the list of the author names,
172#  - text: the bibtex text (separated by commas and/or 'and')
173#  - abbrev: abbreviation that can be used for indicate the
174#    bibliography entries
175#
176def bibtexauthor(data):
177    result = {}
178    bibtex = ''
179    result['list'] = author_rex.split(data)
180    result['num'] = len(result['list'])
181    for i, author in enumerate(result['list']):
182        # general transformations
183        author = latexreplacements(removebraces(author.strip()))
184        # transform "Xyz, A. B." to "A. B. Xyz"
185        pos = author.find(',')
186        if pos != -1:
187            author = author[pos+1:].strip() + ' ' + author[:pos].strip()
188        result['list'][i] = author
189        bibtex += author + '#'
190    bibtex = bibtex[:-1]
191    if result['num'] > 1:
192        ix = bibtex.rfind('#')
193        if result['num'] == 2:
194            bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
195        else:
196            bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
197    bibtex = bibtex.replace('#', ', ')
198    result['text'] = bibtex
199   
200    result['abbrev'] = ''
201    for author in result['list']:
202        pos = author.rfind(' ') + 1
203        count = 1
204        if result['num'] == 1:
205            count = 3
206        result['abbrev'] += copychars(author, pos, count)
207
208    return result
209
210
211#
212# data = title string
213# @return the capitalized title (first letter is capitalized), rest are capitalized
214# only if capitalized inside braces
215#
216def capitalizetitle(data):
217    title_list = capitalize_rex.split(data)
218    title = ''
219    count = 0
220    for phrase in title_list:
221         check = string.lstrip(phrase)
222
223         # keep phrase's capitalization the same
224         if check.find('{') == 0:
225              title += removebraces(phrase)
226         else:
227         # first word --> capitalize first letter (after spaces)
228              if count == 0:
229                  title += check.capitalize()
230              else:
231                  title += phrase.lower()
232         count = count + 1
233
234    return title
235
236
237#
238# @return the bibtex for the title
239# @param data --> title string
240# braces are removed from title
241#
242def bibtextitle(data, entrytype):
243    if entrytype in ('book', 'inbook'):
244        title = removebraces(data.strip())
245    else:
246        title = removebraces(capitalizetitle(data.strip()))
247    bibtex = title
248    return bibtex
249
250
251#
252# function to compare entry lists
253#
254def entry_cmp(x, y):
255    return cmp(x[0], y[0])
256
257
258#
259# print the XML for the transformed "filecont_source"
260#
261def bibtexdecoder(filecont_source):
262    filecont = []
263    file = []
264   
265    # want @<alphanumeric chars><spaces>{<spaces><any chars>,
266    pubtype_rex = re.compile('@(\w*)\s*{\s*(.*),')
267    endtype_rex = re.compile('}\s*$')
268    endtag_rex = re.compile('^\s*}\s*$')
269
270    bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
271    bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')
272
273    quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
274    quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')
275
276    for line in filecont_source:
277        line = line[:-1]
278
279        # encode character entities
280        line = string.replace(line, '&', '&amp;')
281        line = string.replace(line, '<', '&lt;')
282        line = string.replace(line, '>', '&gt;')
283
284        # start entry: publication type (store for later use)
285        if pubtype_rex.match(line):
286        # want @<alphanumeric chars><spaces>{<spaces><any chars>,
287            entrycont = {}
288            entry = []
289            entrytype = pubtype_rex.sub('\g<1>',line)
290            entrytype = string.lower(entrytype)
291            entryid   = pubtype_rex.sub('\g<2>', line)
292
293        # end entry if just a }
294        elif endtype_rex.match(line):
295            # generate doxygen code for the entry
296
297            # enty type related formattings
298            if entrytype in ('book', 'inbook'):
299                entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
300                if not entrycont.has_key('author'):
301                    entrycont['author'] = entrycont['editor']
302                    entrycont['author']['text'] += ', editors'
303            elif entrytype == 'article':
304                entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
305            elif entrytype in ('inproceedings', 'incollection', 'conference'):
306                entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
307            elif entrytype == 'techreport':
308                if not entrycont.has_key('type'):
309                    entrycont['type'] = 'Technical report'
310            elif entrytype == 'mastersthesis':
311                entrycont['type'] = 'Master\'s thesis'
312            elif entrytype == 'phdthesis':
313                entrycont['type'] = 'PhD thesis'
314
315            for eline in entrycont:
316                if eline != '':
317                    eline = latexreplacements(eline)
318
319            if entrycont.has_key('pages') and (entrycont['pages'] != ''):
320                entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
321
322            if entrycont.has_key('author') and (entrycont['author'] != ''):
323                entry.append(entrycont['author']['text'] + '.')
324            if entrycont.has_key('title') and (entrycont['title'] != ''):
325                entry.append(entrycont['title'] + '.')
326            if entrycont.has_key('journal') and (entrycont['journal'] != ''):
327                entry.append(entrycont['journal'] + ',')
328            if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
329                entry.append('In ' + entrycont['booktitle'] + ',')
330            if entrycont.has_key('type') and (entrycont['type'] != ''):
331                eline = entrycont['type']
332                if entrycont.has_key('number') and (entrycont['number'] != ''):
333                    eline += ' ' + entrycont['number']
334                eline += ','
335                entry.append(eline)
336            if entrycont.has_key('institution') and (entrycont['institution'] != ''):
337                entry.append(entrycont['institution'] + ',')
338            if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
339                entry.append(entrycont['publisher'] + ',')
340            if entrycont.has_key('school') and (entrycont['school'] != ''):
341                entry.append(entrycont['school'] + ',')
342            if entrycont.has_key('address') and (entrycont['address'] != ''):
343                entry.append(entrycont['address'] + ',')
344            if entrycont.has_key('edition') and (entrycont['edition'] != ''):
345                entry.append(entrycont['edition'] + ' edition,')
346            if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
347                entry.append(entrycont['howpublished'] + ',')
348            if entrycont.has_key('volume') and (entrycont['volume'] != ''):
349                eline = entrycont['volume'];
350                if entrycont.has_key('number') and (entrycont['number'] != ''):
351                    eline += '(' + entrycont['number'] + ')'
352                if entrycont.has_key('pages') and (entrycont['pages'] != ''):
353                    eline += ':' + entrycont['pages']
354                eline += ','
355                entry.append(eline)
356            else:
357                if entrycont.has_key('pages') and (entrycont['pages'] != ''):
358                    entry.append('pages ' + entrycont['pages'] + ',')
359            if entrycont.has_key('year') and (entrycont['year'] != ''):
360                if entrycont.has_key('month') and (entrycont['month'] != ''):
361                    entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
362                else:
363                    entry.append(entrycont['year'] + '.')
364            if entrycont.has_key('note') and (entrycont['note'] != ''):
365                entry.append(entrycont['note'] + '.')
366
367            # generate keys for sorting and for the output
368            sortkey = ''
369            bibkey = ''
370            if entrycont.has_key('author'):
371                for author in entrycont['author']['list']:
372                    sortkey += copychars(author, author.rfind(' ')+1, len(author))
373                bibkey = entrycont['author']['abbrev']
374            else:
375                bibkey = 'x'
376            if entrycont.has_key('year'):
377                sortkey += entrycont['year']
378                bibkey += entrycont['year'][-2:]
379            if entrycont.has_key('title'):
380                sortkey += entrycont['title']
381            if entrycont.has_key('key'):
382                sortkey = entrycont['key'] + sortkey
383                bibkey = entrycont['key']
384            entry.insert(0, sortkey)
385            entry.insert(1, bibkey)
386            entry.insert(2, entryid)
387           
388            # add the entry to the file contents
389            filecont.append(entry)
390
391        else:
392            # field, publication info
393            field = ''
394            data = ''
395           
396            # field = {data} entries
397            if bracedata_rex.match(line):
398                field = bracefield_rex.sub('\g<1>', line)
399                field = string.lower(field)
400                data =  bracedata_rex.sub('\g<2>', line)
401
402            # field = "data" entries
403            elif quotedata_rex.match(line):
404                field = quotefield_rex.sub('\g<1>', line)
405                field = string.lower(field)
406                data =  quotedata_rex.sub('\g<2>', line)
407
408            # field = data entries
409            elif data_rex.match(line):
410                field = field_rex.sub('\g<1>', line)
411                field = string.lower(field)
412                data =  data_rex.sub('\g<2>', line)
413           
414            if field in ('author', 'editor'):
415                entrycont[field] = bibtexauthor(data)
416                line = ''
417            elif field == 'title':
418                line = bibtextitle(data, entrytype)
419            elif field != '':
420                line = removebraces(transformurls(data.strip()))
421
422            if line != '':
423                line = latexreplacements(line)
424                entrycont[field] = line
425
426
427    # sort entries
428    filecont.sort(entry_cmp)
429   
430    # count the bibtex keys
431    keytable = {}
432    counttable = {}
433    for entry in filecont:
434        bibkey = entry[1]
435        if not keytable.has_key(bibkey):
436            keytable[bibkey] = 1
437        else:
438            keytable[bibkey] += 1
439
440    for bibkey in keytable.keys():
441        counttable[bibkey] = 0
442   
443    # generate output
444    for entry in filecont:
445        # generate output key form the bibtex key
446        bibkey = entry[1]
447        entryid = entry[2]
448        if keytable[bibkey] == 1:
449            outkey = bibkey
450        else:
451            outkey = bibkey + chr(97 + counttable[bibkey])
452        counttable[bibkey] += 1
453       
454        # append the entry code to the output
455        file.append('\\section ' + entryid + ' [' + outkey + ']')
456        file.append('<div style="' + divstyle + '">')
457        for line in entry[3:]:
458            file.append(line)
459        file.append('</div>')
460        file.append('')
461
462    return file
463
464
465#
466# return 1 iff abbr is in line but not inside braces or quotes
467# assumes that abbr appears only once on the line (out of braces and quotes)
468#
469def verify_out_of_braces(line, abbr):
470
471    phrase_split = delimiter_rex.split(line)
472
473    abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
474
475    open_brace = 0
476    open_quote = 0
477
478    for phrase in phrase_split:
479        if phrase == "{":
480            open_brace = open_brace + 1
481        elif phrase == "}":
482            open_brace = open_brace - 1
483        elif phrase == '"':
484            if open_quote == 1:
485                open_quote = 0
486            else:
487                open_quote = 1
488        elif abbr_rex.search(phrase):
489            if open_brace == 0 and open_quote == 0:
490                return 1
491
492    return 0
493
494
495#
496# a line in the form phrase1 # phrase2 # ... # phrasen
497# is returned as phrase1 phrase2 ... phrasen
498# with the correct punctuation
499# Bug: Doesn't always work with multiple abbreviations plugged in
500#
501def concat_line(line):
502    # only look at part after equals
503    field = field_rex.sub('\g<1>',line)
504    rest = field_rex.sub('\g<2>',line)
505
506    concat_line = field + ' ='
507
508    pound_split = concatsplit_rex.split(rest)
509
510    phrase_count = 0
511    length = len(pound_split)
512
513    for phrase in pound_split:
514        phrase = phrase.strip()
515        if phrase_count != 0:
516            if phrase.startswith('"') or phrase.startswith('{'):
517                phrase = phrase[1:]
518        elif phrase.startswith('"'):
519            phrase = phrase.replace('"','{',1)
520
521        if phrase_count != length-1:
522            if phrase.endswith('"') or phrase.endswith('}'):
523                phrase = phrase[:-1]
524        else:
525            if phrase.endswith('"'):
526                phrase = phrase[:-1]
527                phrase = phrase + "}"
528            elif phrase.endswith('",'):
529                phrase = phrase[:-2]
530                phrase = phrase + "},"
531
532        # if phrase did have \#, add the \# back
533        if phrase.endswith('\\'):
534            phrase = phrase + "#"
535        concat_line = concat_line + ' ' + phrase
536
537        phrase_count = phrase_count + 1
538
539    return concat_line
540
541
542#
543# substitute abbreviations into filecont
544# @param filecont_source - string of data from file
545#
546def bibtex_replace_abbreviations(filecont_source):
547    filecont = filecont_source.splitlines()
548
549    #  These are defined in bibtex, so we'll define them too
550    abbr_list = ['jan','feb','mar','apr','may','jun',
551                 'jul','aug','sep','oct','nov','dec']
552    value_list = ['January','February','March','April',
553                  'May','June','July','August','September',
554                  'October','November','December']
555
556    abbr_rex = []
557    total_abbr_count = 0
558
559    front = '\\b'
560    back = '(,?)\\b'
561
562    for x in abbr_list:
563        abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
564        total_abbr_count = total_abbr_count + 1
565
566
567    abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
568                             re.I)
569
570    comment_rex = re.compile('@comment\s*{',re.I)
571    preamble_rex = re.compile('@preamble\s*{',re.I)
572
573    waiting_for_end_string = 0
574    i = 0
575    filecont2 = ''
576
577    for line in filecont:
578        if line == ' ' or line == '':
579            continue
580
581        if waiting_for_end_string:
582            if re.search('}',line):
583                waiting_for_end_string = 0
584                continue
585
586        if abbrdef_rex.search(line):
587            abbr = abbrdef_rex.sub('\g<1>', line)
588
589            if abbr_list.count(abbr) == 0:
590                val = abbrdef_rex.sub('\g<2>', line)
591                abbr_list.append(abbr)
592                value_list.append(string.strip(val))
593                abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
594                total_abbr_count = total_abbr_count + 1
595            waiting_for_end_string = 1
596            continue
597
598        if comment_rex.search(line):
599            waiting_for_end_string = 1
600            continue
601
602        if preamble_rex.search(line):
603            waiting_for_end_string = 1
604            continue
605
606
607        # replace subsequent abbreviations with the value
608        abbr_count = 0
609
610        for x in abbr_list:
611
612            if abbr_rex[abbr_count].search(line):
613                if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
614                    line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
615                # Check for # concatenations
616                if concatsplit_rex.search(line):
617                    line = concat_line(line)
618            abbr_count = abbr_count + 1
619
620
621        filecont2 = filecont2 + line + '\n'
622        i = i+1
623
624
625    # Do one final pass over file
626
627    # make sure that didn't end up with {" or }" after the substitution
628    filecont2 = filecont2.replace('{"','{{')
629    filecont2 = filecont2.replace('"}','}}')
630
631    afterquotevalue_rex = re.compile('"\s*,\s*')
632    afterbrace_rex = re.compile('"\s*}')
633    afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
634
635    # add new lines to data that changed because of abbreviation substitutions
636    filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
637    filecont2 = afterbrace_rex.sub('"\n}', filecont2)
638    filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
639
640    return filecont2
641
642#
643# convert @type( ... ) to @type{ ... }
644#
645def no_outer_parens(filecont):
646
647    # do checking for open parens
648    # will convert to braces
649    paren_split = re.split('([(){}])',filecont)
650
651    open_paren_count = 0
652    open_type = 0
653    look_next = 0
654
655    # rebuild filecont
656    filecont = ''
657
658    at_rex = re.compile('@\w*')
659
660    for phrase in paren_split:
661        if look_next == 1:
662            if phrase == '(':
663                phrase = '{'
664                open_paren_count = open_paren_count + 1
665            else:
666                open_type = 0
667            look_next = 0
668
669        if phrase == '(':
670            open_paren_count = open_paren_count + 1
671
672        elif phrase == ')':
673            open_paren_count = open_paren_count - 1
674            if open_type == 1 and open_paren_count == 0:
675                phrase = '}'
676                open_type = 0
677
678        elif at_rex.search( phrase ):
679            open_type = 1
680            look_next = 1
681
682        filecont = filecont + phrase
683
684    return filecont
685
686
687#
688# make all whitespace into just one space
689# format the bibtex file into a usable form.
690#
691def bibtexwasher(filecont_source):
692
693    space_rex = re.compile('\s+')
694    comment_rex = re.compile('\s*%')
695
696    filecont = []
697
698    # remove trailing and excessive whitespace
699    # ignore comments
700    for line in filecont_source:
701        line = string.strip(line)
702        line = space_rex.sub(' ', line)
703        # ignore comments
704        if not comment_rex.match(line) and line != '':
705            filecont.append(' '+ line)
706
707    filecont = string.join(filecont, '')
708
709    # the file is in one long string
710
711    filecont = no_outer_parens(filecont)
712
713    #
714    # split lines according to preferred syntax scheme
715    #
716    filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
717
718    # add new lines after commas that are after values
719    filecont = re.sub('"\s*,', '",\n', filecont)
720    filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
721    filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
722                          '\n\n\g<1>\g<2>,\n', filecont)
723
724    # add new lines after }
725    filecont = re.sub('"\s*}','"\n}\n', filecont)
726    filecont = re.sub('}\s*,','},\n', filecont)
727
728
729    filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
730
731    # character encoding, reserved latex characters
732    filecont = re.sub('{\\\&}', '&', filecont)
733    filecont = re.sub('\\\&', '&', filecont)
734
735    # do checking for open braces to get format correct
736    open_brace_count = 0
737    brace_split = re.split('([{}])',filecont)
738
739    # rebuild filecont
740    filecont = ''
741
742    for phrase in brace_split:
743        if phrase == '{':
744            open_brace_count = open_brace_count + 1
745        elif phrase == '}':
746            open_brace_count = open_brace_count - 1
747            if open_brace_count == 0:
748                filecont = filecont + '\n'
749
750        filecont = filecont + phrase
751
752    filecont2 = bibtex_replace_abbreviations(filecont)
753
754    # gather
755    filecont = filecont2.splitlines()
756    i=0
757    j=0         # count the number of blank lines
758    for line in filecont:
759        # ignore blank lines
760        if line == '' or line == ' ':
761            j = j+1
762            continue
763        filecont[i] = line + '\n'
764        i = i+1
765
766    # get rid of the extra stuff at the end of the array
767    # (The extra stuff are duplicates that are in the array because
768    # blank lines were removed.)
769    length = len( filecont)
770    filecont[length-j:length] = []
771
772    return filecont
773
774
775def filehandler(filepath):
776    try:
777        fd = open(filepath, 'r')
778        filecont_source = fd.readlines()
779        fd.close()
780    except:
781        print 'Could not open file:', filepath
782    washeddata = bibtexwasher(filecont_source)
783    outdata = bibtexdecoder(washeddata)
784    print '/**'
785    print '\page references References'
786    print
787    for line in outdata:
788        print line
789    print '*/'
790
791
792# main program
793
794def main():
795    import sys
796    if sys.argv[1:]:
797        filepath = sys.argv[1]
798    else:
799        print "No input file"
800        sys.exit()
801    filehandler(filepath)
802
803if __name__ == "__main__": main()
804
805
806# end python script
Note: See TracBrowser for help on using the repository browser.