COIN-OR::LEMON - Graph Library

Ticket #184: bibtex2dox.py

File bibtex2dox.py, 24.9 KB (added by Peter Kovacs, 15 years ago)
Line 
1#!/usr/bin/env /usr/local/Python/bin/python2.1
2"""
3  Decoder for bibliographic data, BibTeX
4  Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
5
6  v.8
7  (c)2002-06-23 Vidar Bronken Gundersen
8  http://bibtexml.sf.net/
9  Reuse approved as long as this notification is kept.
10  Licence: GPL.
11
12  Contributions/thanks to:
13  Egon Willighagen, http://sf.net/projects/jreferences/
14  Richard Mahoney (for providing a test case)
15
16  Editted by Sara Sprenkle to be more robust and handle more bibtex features.  (c) 2003-01-15
17  1.  Changed bibtex: tags to bibxml: tags.
18  2.  Use xmlns:bibxml="http://bibtexml.sf.net/"
19  3.  Allow spaces between @type and first {
20  4.  "author" fields with multiple authors split by " and "
21      are put in separate xml "bibxml:author" tags.
22  5.  Option for Titles: words are capitalized
23      only if first letter in title or capitalized inside braces
24  6.  Removes braces from within field values
25  7.  Ignores comments in bibtex file (including @comment{ or % )
26  8.  Replaces some special latex tags, e.g., replaces ~ with ' '
27  9.  Handles bibtex @string abbreviations
28        --> includes bibtex's default abbreviations for months
29        --> does concatenation of abbr # " more " and " more " # abbr
30  10. Handles @type( ... ) or @type{ ... }
31  11. The keywords field is split on , or ; and put into separate xml
32      "bibxml:keywords" tags
33  12. Ignores @preamble
34
35  Known Limitations
36  1.  Does not transform Latex encoding like math mode and special latex symbols.
37  2.  Does not parse author fields into first and last names.
38      E.g., It does not do anything special to an author whose name is in the form LAST_NAME, FIRST_NAME
39      In "author" tag, will show up as <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
40  3.  Does not handle "crossref" fields other than to print <bibxml:crossref>...</bibxml:crossref>
41  4.  Does not inform user of the input's format errors.  You just won't be able to
42      transform the file later with XSL
43
44  You will have to manually edit the XML output if you need to handle
45  these (and unknown) limitations.
46
47"""
48
49import string, re
50
51# set of valid name characters
52valid_name_chars = '[\w\-:]'
53
54#
55# define global regular expression variables
56#
57author_rex = re.compile('\s+and\s+')
58rembraces_rex = re.compile('[{}]')
59capitalize_rex = re.compile('({\w*})')
60
61# used by bibtexkeywords(data)
62keywords_rex = re.compile('[,;]')
63
64# used by concat_line(line)
65concatsplit_rex = re.compile('\s*#\s*')
66
67# split on {, }, or " in verify_out_of_braces
68delimiter_rex = re.compile('([{}"])',re.I)
69
70field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
71data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
72
73url_rex = re.compile('\\\url\{([^}]*)\}')
74
75
76#
77# return the string parameter without braces
78#
79def transformurls(str):
80    return url_rex.sub(r'<a href="\1">\1</a>', str)
81
82#
83# return the string parameter without braces
84#
85def removebraces(str):
86    return rembraces_rex.sub('', str)
87
88#
89# latex-specific replacements
90# (do this after braces were removed)
91#
92def latexreplacements(line):
93    line = string.replace(line, '~', '&nbsp;')
94    line = string.replace(line, '\\\'a', '&aacute;')
95    line = string.replace(line, '\\"a', '&auml;')
96    line = string.replace(line, '\\\'e', '&eacute;')
97    line = string.replace(line, '\\"e', '&euml;')
98    line = string.replace(line, '\\\'i', '&iacute;')
99    line = string.replace(line, '\\"i', '&iuml;')
100    line = string.replace(line, '\\\'o', '&oacute;')
101    line = string.replace(line, '\\"o', '&ouml;')
102    line = string.replace(line, '\\\'u', '&uacute;')
103    line = string.replace(line, '\\"u', '&uuml;')
104    line = string.replace(line, '\\H o', '&otilde;')
105    line = string.replace(line, '\\H u', '&uuml;')   # &utilde; does not exist
106    line = string.replace(line, '\\\'A', '&Aacute;')
107    line = string.replace(line, '\\"A', '&Auml;')
108    line = string.replace(line, '\\\'E', '&Eacute;')
109    line = string.replace(line, '\\"E', '&Euml;')
110    line = string.replace(line, '\\\'I', '&Iacute;')
111    line = string.replace(line, '\\"I', '&Iuml;')
112    line = string.replace(line, '\\\'O', '&Oacute;')
113    line = string.replace(line, '\\"O', '&Ouml;')
114    line = string.replace(line, '\\\'U', '&Uacute;')
115    line = string.replace(line, '\\"U', '&Uuml;')
116    line = string.replace(line, '\\H O', '&Otilde;')
117    line = string.replace(line, '\\H U', '&Uuml;')   # &Utilde; does not exist
118
119    return line
120
121#
122# copy characters form a string decoding html expressions (&xyz;)
123#
124def copychars(str, ifrom, count):
125    result = ''
126    i = ifrom
127    c = 0
128    html_spec = False
129    while (i < len(str)) and (c < count):
130        if str[i] == '&':
131            html_spec = True;
132            if i+1 < len(str):
133                result += str[i+1]
134            c += 1
135            i += 2
136        else:
137            if not html_spec:
138                if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
139                   ((str[i] >= 'a') and (str[i] <= 'z')):
140                    result += str[i]
141                    c += 1
142            elif str[i] == ';':
143                html_spec = False;
144            i += 1
145   
146    return result
147
148
149#
150# Handle a list of authors (separated by 'and').
151# It gives back an array of the follwing values:
152#  - num: the number of authors,
153#  - list: the list of the author names,
154#  - text: the bibtex text (separated by commas and/or 'and')
155#  - abbrev: abbreviation that can be used for indicate the
156#    bibliography entries
157#
158def bibtexauthor(data):
159    result = {}
160    bibtex = ''
161    result['list'] = author_rex.split(data)
162    result['num'] = len(result['list'])
163    for i, author in enumerate(result['list']):
164        # general transformations
165        author = latexreplacements(removebraces(author.strip()))
166        # transform "Xyz, A. B." to "A. B. Xyz"
167        pos = author.find(',')
168        if pos != -1:
169            author = author[pos+1:].strip() + ' ' + author[:pos].strip()
170        result['list'][i] = author
171        bibtex += author + '#'
172    bibtex = bibtex[:-1]
173    if result['num'] > 1:
174        ix = bibtex.rfind('#')
175        if result['num'] == 2:
176            bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
177        else:
178            bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
179    bibtex = bibtex.replace('#', ', ')
180    result['text'] = bibtex
181   
182    result['abbrev'] = ''
183    for author in result['list']:
184        pos = author.rfind(' ') + 1
185        count = 1
186        if result['num'] == 1:
187            count = 3
188        result['abbrev'] += copychars(author, pos, count)
189
190    return result
191
192
193#
194# data = title string
195# @return the capitalized title (first letter is capitalized), rest are capitalized
196# only if capitalized inside braces
197#
198def capitalizetitle(data):
199    title_list = capitalize_rex.split(data)
200    title = ''
201    count = 0
202    for phrase in title_list:
203         check = string.lstrip(phrase)
204
205         # keep phrase's capitalization the same
206         if check.find('{') == 0:
207              title += removebraces(phrase)
208         else:
209         # first word --> capitalize first letter (after spaces)
210              if count == 0:
211                  title += check.capitalize()
212              else:
213                  title += phrase.lower()
214         count = count + 1
215
216    return title
217
218
219#
220# @return the bibtex for the title
221# @param data --> title string
222# braces are removed from title
223#
224def bibtextitle(data, entrytype):
225    if entrytype in ('book', 'inbook'):
226        title = removebraces(data.strip())
227    else:
228        title = removebraces(capitalizetitle(data.strip()))
229    bibtex = title
230    return bibtex
231
232
233#
234# function to compare entry lists
235#
236def entry_cmp(x, y):
237    return cmp(x[0], y[0])
238
239
240#
241# print the XML for the transformed "filecont_source"
242#
243def bibtexdecoder(filecont_source):
244    filecont = []
245    file = []
246   
247    # want @<alphanumeric chars><spaces>{<spaces><any chars>,
248    pubtype_rex = re.compile('@(\w*)\s*{\s*(.*),')
249    endtype_rex = re.compile('}\s*$')
250    endtag_rex = re.compile('^\s*}\s*$')
251
252    bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
253    bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')
254
255    quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
256    quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')
257
258    for line in filecont_source:
259        line = line[:-1]
260
261        # encode character entities
262        line = string.replace(line, '&', '&amp;')
263        line = string.replace(line, '<', '&lt;')
264        line = string.replace(line, '>', '&gt;')
265
266        # start entry: publication type (store for later use)
267        if pubtype_rex.match(line):
268        # want @<alphanumeric chars><spaces>{<spaces><any chars>,
269            entrycont = {}
270            entry = []
271            entrytype = pubtype_rex.sub('\g<1>',line)
272            entrytype = string.lower(entrytype)
273            # entryid   = pubtype_rex.sub('\g<2>', line)
274
275        # end entry if just a }
276        elif endtype_rex.match(line):
277            # generate doxygen code for the entry
278
279            # enty type related formattings
280            if entrytype in ('book', 'inbook'):
281                entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
282                if not entrycont.has_key('author'):
283                    entrycont['author'] = entrycont['editor']
284                    entrycont['author']['text'] += ', editors'
285            elif entrytype == 'article':
286                entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
287            elif entrytype in ('inproceedings', 'incollection', 'conference'):
288                entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
289            elif entrytype == 'techreport':
290                if not entrycont.has_key('type'):
291                    entrycont['type'] = 'Technical report'
292            elif entrytype == 'mastersthesis':
293                entrycont['type'] = 'Master\'s thesis'
294            elif entrytype == 'phdthesis':
295                entrycont['type'] = 'PhD thesis'
296
297            for eline in entrycont:
298                if eline != '':
299                    eline = latexreplacements(eline)
300
301            if entrycont.has_key('pages') and (entrycont['pages'] != ''):
302                entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
303
304            if entrycont.has_key('author') and (entrycont['author'] != ''):
305                entry.append(entrycont['author']['text'] + '.')
306            if entrycont.has_key('title') and (entrycont['title'] != ''):
307                entry.append(entrycont['title'] + '.')
308            if entrycont.has_key('journal') and (entrycont['journal'] != ''):
309                entry.append(entrycont['journal'] + ',')
310            if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
311                entry.append('In ' + entrycont['booktitle'] + ',')
312            if entrycont.has_key('type') and (entrycont['type'] != ''):
313                eline = entrycont['type']
314                if entrycont.has_key('number') and (entrycont['number'] != ''):
315                    eline += ' ' + entrycont['number']
316                eline += ','
317                entry.append(eline)
318            if entrycont.has_key('institution') and (entrycont['institution'] != ''):
319                entry.append(entrycont['institution'] + ',')
320            if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
321                entry.append(entrycont['publisher'] + ',')
322            if entrycont.has_key('school') and (entrycont['school'] != ''):
323                entry.append(entrycont['school'] + ',')
324            if entrycont.has_key('address') and (entrycont['address'] != ''):
325                entry.append(entrycont['address'] + ',')
326            if entrycont.has_key('edition') and (entrycont['edition'] != ''):
327                entry.append(entrycont['edition'] + ' edition,')
328            if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
329                entry.append(entrycont['howpublished'] + ',')
330            if entrycont.has_key('volume') and (entrycont['volume'] != ''):
331                eline = entrycont['volume'];
332                if entrycont.has_key('number') and (entrycont['number'] != ''):
333                    eline += '(' + entrycont['number'] + ')'
334                if entrycont.has_key('pages') and (entrycont['pages'] != ''):
335                    eline += ':' + entrycont['pages']
336                eline += ','
337                entry.append(eline)
338            else:
339                if entrycont.has_key('pages') and (entrycont['pages'] != ''):
340                    entry.append('pages ' + entrycont['pages'] + ',')
341            if entrycont.has_key('year') and (entrycont['year'] != ''):
342                if entrycont.has_key('month') and (entrycont['month'] != ''):
343                    entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
344                else:
345                    entry.append(entrycont['year'] + '.')
346            if entrycont.has_key('note') and (entrycont['note'] != ''):
347                entry.append(entrycont['note'] + '.')
348
349            # generate keys for sorting and for the output
350            sortkey = ''
351            bibkey = ''
352            if entrycont.has_key('author'):
353                for author in entrycont['author']['list']:
354                    sortkey += copychars(author, author.rfind(' ')+1, len(author))
355                bibkey = entrycont['author']['abbrev']
356            else:
357                bibkey = 'x'
358            if entrycont.has_key('year'):
359                sortkey += entrycont['year']
360                bibkey += entrycont['year'][-2:]
361            if entrycont.has_key('title'):
362                sortkey += entrycont['title']
363            if entrycont.has_key('key'):
364                sortkey = entrycont['key'] + sortkey
365                bibkey = entrycont['key']
366            entry.insert(0, sortkey)
367            entry.insert(1, bibkey)
368           
369            # add the entry to the file contents
370            filecont.append(entry)
371
372        else:
373            # field, publication info
374            field = ''
375            data = ''
376           
377            # field = {data} entries
378            if bracedata_rex.match(line):
379                field = bracefield_rex.sub('\g<1>', line)
380                field = string.lower(field)
381                data =  bracedata_rex.sub('\g<2>', line)
382
383            # field = "data" entries
384            elif quotedata_rex.match(line):
385                field = quotefield_rex.sub('\g<1>', line)
386                field = string.lower(field)
387                data =  quotedata_rex.sub('\g<2>', line)
388
389            # field = data entries
390            elif data_rex.match(line):
391                field = field_rex.sub('\g<1>', line)
392                field = string.lower(field)
393                data =  data_rex.sub('\g<2>', line)
394           
395            if field in ('author', 'editor'):
396                entrycont[field] = bibtexauthor(data)
397                line = ''
398            elif field == 'title':
399                line = bibtextitle(data, entrytype)
400            elif field != '':
401                line = removebraces(transformurls(data.strip()))
402
403            if line != '':
404                line = latexreplacements(line)
405                entrycont[field] = line
406
407
408    # sort entries
409    filecont.sort(entry_cmp)
410   
411    # count the bibtex keys
412    keytable = {}
413    counttable = {}
414    for entry in filecont:
415        bibkey = entry[1]
416        if not keytable.has_key(bibkey):
417            keytable[bibkey] = 1
418        else:
419            keytable[bibkey] += 1
420
421    for bibkey in keytable.keys():
422        counttable[bibkey] = 0
423   
424    # generate output
425    for entry in filecont:
426        # generate output key form the bibtex key
427        bibkey = entry[1]
428        if keytable[bibkey] == 1:
429            outkey = bibkey
430        else:
431            outkey = bibkey + chr(97 + counttable[bibkey])
432        counttable[bibkey] += 1
433       
434        # append the entry code to the output
435        file.append('\\anchor ' + outkey + '\n' + \
436                    '<b>[' + outkey + ']</b>')
437        for line in entry[2:]:
438            file.append(line)
439        file.append('')
440
441    return file
442
443
444#
445# return 1 iff abbr is in line but not inside braces or quotes
446# assumes that abbr appears only once on the line (out of braces and quotes)
447#
448def verify_out_of_braces(line, abbr):
449
450    phrase_split = delimiter_rex.split(line)
451
452    abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
453
454    open_brace = 0
455    open_quote = 0
456
457    for phrase in phrase_split:
458        if phrase == "{":
459            open_brace = open_brace + 1
460        elif phrase == "}":
461            open_brace = open_brace - 1
462        elif phrase == '"':
463            if open_quote == 1:
464                open_quote = 0
465            else:
466                open_quote = 1
467        elif abbr_rex.search(phrase):
468            if open_brace == 0 and open_quote == 0:
469                return 1
470
471    return 0
472
473
474#
475# a line in the form phrase1 # phrase2 # ... # phrasen
476# is returned as phrase1 phrase2 ... phrasen
477# with the correct punctuation
478# Bug: Doesn't always work with multiple abbreviations plugged in
479#
480def concat_line(line):
481    # only look at part after equals
482    field = field_rex.sub('\g<1>',line)
483    rest = field_rex.sub('\g<2>',line)
484
485    concat_line = field + ' ='
486
487    pound_split = concatsplit_rex.split(rest)
488
489    phrase_count = 0
490    length = len(pound_split)
491
492    for phrase in pound_split:
493        phrase = phrase.strip()
494        if phrase_count != 0:
495            if phrase.startswith('"') or phrase.startswith('{'):
496                phrase = phrase[1:]
497        elif phrase.startswith('"'):
498            phrase = phrase.replace('"','{',1)
499
500        if phrase_count != length-1:
501            if phrase.endswith('"') or phrase.endswith('}'):
502                phrase = phrase[:-1]
503        else:
504            if phrase.endswith('"'):
505                phrase = phrase[:-1]
506                phrase = phrase + "}"
507            elif phrase.endswith('",'):
508                phrase = phrase[:-2]
509                phrase = phrase + "},"
510
511        # if phrase did have \#, add the \# back
512        if phrase.endswith('\\'):
513            phrase = phrase + "#"
514        concat_line = concat_line + ' ' + phrase
515
516        phrase_count = phrase_count + 1
517
518    return concat_line
519
520
521#
522# substitute abbreviations into filecont
523# @param filecont_source - string of data from file
524#
525def bibtex_replace_abbreviations(filecont_source):
526    filecont = filecont_source.splitlines()
527
528    #  These are defined in bibtex, so we'll define them too
529    abbr_list = ['jan','feb','mar','apr','may','jun',
530                 'jul','aug','sep','oct','nov','dec']
531    value_list = ['January','February','March','April',
532                  'May','June','July','August','September',
533                  'October','November','December']
534
535    abbr_rex = []
536    total_abbr_count = 0
537
538    front = '\\b'
539    back = '(,?)\\b'
540
541    for x in abbr_list:
542        abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
543        total_abbr_count = total_abbr_count + 1
544
545
546    abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
547                             re.I)
548
549    comment_rex = re.compile('@comment\s*{',re.I)
550    preamble_rex = re.compile('@preamble\s*{',re.I)
551
552    waiting_for_end_string = 0
553    i = 0
554    filecont2 = ''
555
556    for line in filecont:
557        if line == ' ' or line == '':
558            continue
559
560        if waiting_for_end_string:
561            if re.search('}',line):
562                waiting_for_end_string = 0
563                continue
564
565        if abbrdef_rex.search(line):
566            abbr = abbrdef_rex.sub('\g<1>', line)
567
568            if abbr_list.count(abbr) == 0:
569                val = abbrdef_rex.sub('\g<2>', line)
570                abbr_list.append(abbr)
571                value_list.append(string.strip(val))
572                abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
573                total_abbr_count = total_abbr_count + 1
574            waiting_for_end_string = 1
575            continue
576
577        if comment_rex.search(line):
578            waiting_for_end_string = 1
579            continue
580
581        if preamble_rex.search(line):
582            waiting_for_end_string = 1
583            continue
584
585
586        # replace subsequent abbreviations with the value
587        abbr_count = 0
588
589        for x in abbr_list:
590
591            if abbr_rex[abbr_count].search(line):
592                if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
593                    line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
594                # Check for # concatenations
595                if concatsplit_rex.search(line):
596                    line = concat_line(line)
597            abbr_count = abbr_count + 1
598
599
600        filecont2 = filecont2 + line + '\n'
601        i = i+1
602
603
604    # Do one final pass over file
605
606    # make sure that didn't end up with {" or }" after the substitution
607    filecont2 = filecont2.replace('{"','{{')
608    filecont2 = filecont2.replace('"}','}}')
609
610    afterquotevalue_rex = re.compile('"\s*,\s*')
611    afterbrace_rex = re.compile('"\s*}')
612    afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
613
614    # add new lines to data that changed because of abbreviation substitutions
615    filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
616    filecont2 = afterbrace_rex.sub('"\n}', filecont2)
617    filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
618
619    return filecont2
620
621#
622# convert @type( ... ) to @type{ ... }
623#
624def no_outer_parens(filecont):
625
626    # do checking for open parens
627    # will convert to braces
628    paren_split = re.split('([(){}])',filecont)
629
630    open_paren_count = 0
631    open_type = 0
632    look_next = 0
633
634    # rebuild filecont
635    filecont = ''
636
637    at_rex = re.compile('@\w*')
638
639    for phrase in paren_split:
640        if look_next == 1:
641            if phrase == '(':
642                phrase = '{'
643                open_paren_count = open_paren_count + 1
644            else:
645                open_type = 0
646            look_next = 0
647
648        if phrase == '(':
649            open_paren_count = open_paren_count + 1
650
651        elif phrase == ')':
652            open_paren_count = open_paren_count - 1
653            if open_type == 1 and open_paren_count == 0:
654                phrase = '}'
655                open_type = 0
656
657        elif at_rex.search( phrase ):
658            open_type = 1
659            look_next = 1
660
661        filecont = filecont + phrase
662
663    return filecont
664
665
666#
667# make all whitespace into just one space
668# format the bibtex file into a usable form.
669#
670def bibtexwasher(filecont_source):
671
672    space_rex = re.compile('\s+')
673    comment_rex = re.compile('\s*%')
674
675    filecont = []
676
677    # remove trailing and excessive whitespace
678    # ignore comments
679    for line in filecont_source:
680        line = string.strip(line)
681        line = space_rex.sub(' ', line)
682        # ignore comments
683        if not comment_rex.match(line) and line != '':
684            filecont.append(' '+ line)
685
686    filecont = string.join(filecont, '')
687
688    # the file is in one long string
689
690    filecont = no_outer_parens(filecont)
691
692    #
693    # split lines according to preferred syntax scheme
694    #
695    filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
696
697    # add new lines after commas that are after values
698    filecont = re.sub('"\s*,', '",\n', filecont)
699    filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
700    filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
701                          '\n\n\g<1>\g<2>,\n', filecont)
702
703    # add new lines after }
704    filecont = re.sub('"\s*}','"\n}\n', filecont)
705    filecont = re.sub('}\s*,','},\n', filecont)
706
707
708    filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
709
710    # character encoding, reserved latex characters
711    filecont = re.sub('{\\\&}', '&', filecont)
712    filecont = re.sub('\\\&', '&', filecont)
713
714    # do checking for open braces to get format correct
715    open_brace_count = 0
716    brace_split = re.split('([{}])',filecont)
717
718    # rebuild filecont
719    filecont = ''
720
721    for phrase in brace_split:
722        if phrase == '{':
723            open_brace_count = open_brace_count + 1
724        elif phrase == '}':
725            open_brace_count = open_brace_count - 1
726            if open_brace_count == 0:
727                filecont = filecont + '\n'
728
729        filecont = filecont + phrase
730
731    filecont2 = bibtex_replace_abbreviations(filecont)
732
733    # gather
734    filecont = filecont2.splitlines()
735    i=0
736    j=0         # count the number of blank lines
737    for line in filecont:
738        # ignore blank lines
739        if line == '' or line == ' ':
740            j = j+1
741            continue
742        filecont[i] = line + '\n'
743        i = i+1
744
745    # get rid of the extra stuff at the end of the array
746    # (The extra stuff are duplicates that are in the array because
747    # blank lines were removed.)
748    length = len( filecont)
749    filecont[length-j:length] = []
750
751    return filecont
752
753
754def filehandler(filepath):
755    try:
756        fd = open(filepath, 'r')
757        filecont_source = fd.readlines()
758        fd.close()
759    except:
760        print 'Could not open file:', filepath
761    washeddata = bibtexwasher(filecont_source)
762    outdata = bibtexdecoder(washeddata)
763    print '/**'
764    print '\page references References'
765    print
766    for line in outdata:
767        print line
768    print '*/'
769
770
771# main program
772
773def main():
774    import sys
775    if sys.argv[1:]:
776        filepath = sys.argv[1]
777    else:
778        print "No input file"
779        sys.exit()
780    filehandler(filepath)
781
782if __name__ == "__main__": main()
783
784
785# end python script