COIN-OR::LEMON - Graph Library

source: lemon/scripts/bib2dox.py @ 790:94ef0a5c0005

Last change on this file since 790:94ef0a5c0005 was 790:94ef0a5c0005, checked in by Peter Kovacs <kpeter@…>, 15 years ago

Add bib->dox converter and initial references.bib (#184)

File size: 25.4 KB
Line 
1#!/usr/bin/env /usr/local/Python/bin/python2.1
2"""
3  BibTeX to Doxygen converter
4  Usage: python bib2dox.py bibfile.bib > bibfile.dox
5
6  This code is the modification of the BibTeX to XML converter
7  by Vidar Bronken Gundersen et al. See the original copyright notices below.
8
9  **********************************************************************
10
11  Decoder for bibliographic data, BibTeX
12  Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
13
14  v.8
15  (c)2002-06-23 Vidar Bronken Gundersen
16  http://bibtexml.sf.net/
17  Reuse approved as long as this notification is kept.
18  Licence: GPL.
19
20  Contributions/thanks to:
21  Egon Willighagen, http://sf.net/projects/jreferences/
22  Richard Mahoney (for providing a test case)
23
24  Editted by Sara Sprenkle to be more robust and handle more bibtex features.
25  (c) 2003-01-15
26
27  1.  Changed bibtex: tags to bibxml: tags.
28  2.  Use xmlns:bibxml="http://bibtexml.sf.net/"
29  3.  Allow spaces between @type and first {
30  4.  "author" fields with multiple authors split by " and "
31      are put in separate xml "bibxml:author" tags.
32  5.  Option for Titles: words are capitalized
33      only if first letter in title or capitalized inside braces
34  6.  Removes braces from within field values
35  7.  Ignores comments in bibtex file (including @comment{ or % )
36  8.  Replaces some special latex tags, e.g., replaces ~ with '&#160;'
37  9.  Handles bibtex @string abbreviations
38        --> includes bibtex's default abbreviations for months
39        --> does concatenation of abbr # " more " and " more " # abbr
40  10. Handles @type( ... ) or @type{ ... }
41  11. The keywords field is split on , or ; and put into separate xml
42      "bibxml:keywords" tags
43  12. Ignores @preamble
44
45  Known Limitations
46  1.  Does not transform Latex encoding like math mode and special
47      latex symbols.
48  2.  Does not parse author fields into first and last names.
49      E.g., It does not do anything special to an author whose name is
50      in the form LAST_NAME, FIRST_NAME
51      In "author" tag, will show up as
52      <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
53  3.  Does not handle "crossref" fields other than to print
54      <bibxml:crossref>...</bibxml:crossref>
55  4.  Does not inform user of the input's format errors.  You just won't
56      be able to transform the file later with XSL
57
58  You will have to manually edit the XML output if you need to handle
59  these (and unknown) limitations.
60
61"""
62
63import string, re
64
65# set of valid name characters
66valid_name_chars = '[\w\-:]'
67
68#
69# define global regular expression variables
70#
71author_rex = re.compile('\s+and\s+')
72rembraces_rex = re.compile('[{}]')
73capitalize_rex = re.compile('({\w*})')
74
75# used by bibtexkeywords(data)
76keywords_rex = re.compile('[,;]')
77
78# used by concat_line(line)
79concatsplit_rex = re.compile('\s*#\s*')
80
81# split on {, }, or " in verify_out_of_braces
82delimiter_rex = re.compile('([{}"])',re.I)
83
84field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
85data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
86
87url_rex = re.compile('\\\url\{([^}]*)\}')
88
89
90#
91# return the string parameter without braces
92#
93def transformurls(str):
94    return url_rex.sub(r'<a href="\1">\1</a>', str)
95
96#
97# return the string parameter without braces
98#
99def removebraces(str):
100    return rembraces_rex.sub('', str)
101
102#
103# latex-specific replacements
104# (do this after braces were removed)
105#
106def latexreplacements(line):
107    line = string.replace(line, '~', '&nbsp;')
108    line = string.replace(line, '\\\'a', '&aacute;')
109    line = string.replace(line, '\\"a', '&auml;')
110    line = string.replace(line, '\\\'e', '&eacute;')
111    line = string.replace(line, '\\"e', '&euml;')
112    line = string.replace(line, '\\\'i', '&iacute;')
113    line = string.replace(line, '\\"i', '&iuml;')
114    line = string.replace(line, '\\\'o', '&oacute;')
115    line = string.replace(line, '\\"o', '&ouml;')
116    line = string.replace(line, '\\\'u', '&uacute;')
117    line = string.replace(line, '\\"u', '&uuml;')
118    line = string.replace(line, '\\H o', '&otilde;')
119    line = string.replace(line, '\\H u', '&uuml;')   # &utilde; does not exist
120    line = string.replace(line, '\\\'A', '&Aacute;')
121    line = string.replace(line, '\\"A', '&Auml;')
122    line = string.replace(line, '\\\'E', '&Eacute;')
123    line = string.replace(line, '\\"E', '&Euml;')
124    line = string.replace(line, '\\\'I', '&Iacute;')
125    line = string.replace(line, '\\"I', '&Iuml;')
126    line = string.replace(line, '\\\'O', '&Oacute;')
127    line = string.replace(line, '\\"O', '&Ouml;')
128    line = string.replace(line, '\\\'U', '&Uacute;')
129    line = string.replace(line, '\\"U', '&Uuml;')
130    line = string.replace(line, '\\H O', '&Otilde;')
131    line = string.replace(line, '\\H U', '&Uuml;')   # &Utilde; does not exist
132
133    return line
134
135#
136# copy characters form a string decoding html expressions (&xyz;)
137#
138def copychars(str, ifrom, count):
139    result = ''
140    i = ifrom
141    c = 0
142    html_spec = False
143    while (i < len(str)) and (c < count):
144        if str[i] == '&':
145            html_spec = True;
146            if i+1 < len(str):
147                result += str[i+1]
148            c += 1
149            i += 2
150        else:
151            if not html_spec:
152                if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
153                   ((str[i] >= 'a') and (str[i] <= 'z')):
154                    result += str[i]
155                    c += 1
156            elif str[i] == ';':
157                html_spec = False;
158            i += 1
159   
160    return result
161
162
163#
164# Handle a list of authors (separated by 'and').
165# It gives back an array of the follwing values:
166#  - num: the number of authors,
167#  - list: the list of the author names,
168#  - text: the bibtex text (separated by commas and/or 'and')
169#  - abbrev: abbreviation that can be used for indicate the
170#    bibliography entries
171#
172def bibtexauthor(data):
173    result = {}
174    bibtex = ''
175    result['list'] = author_rex.split(data)
176    result['num'] = len(result['list'])
177    for i, author in enumerate(result['list']):
178        # general transformations
179        author = latexreplacements(removebraces(author.strip()))
180        # transform "Xyz, A. B." to "A. B. Xyz"
181        pos = author.find(',')
182        if pos != -1:
183            author = author[pos+1:].strip() + ' ' + author[:pos].strip()
184        result['list'][i] = author
185        bibtex += author + '#'
186    bibtex = bibtex[:-1]
187    if result['num'] > 1:
188        ix = bibtex.rfind('#')
189        if result['num'] == 2:
190            bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
191        else:
192            bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
193    bibtex = bibtex.replace('#', ', ')
194    result['text'] = bibtex
195   
196    result['abbrev'] = ''
197    for author in result['list']:
198        pos = author.rfind(' ') + 1
199        count = 1
200        if result['num'] == 1:
201            count = 3
202        result['abbrev'] += copychars(author, pos, count)
203
204    return result
205
206
207#
208# data = title string
209# @return the capitalized title (first letter is capitalized), rest are capitalized
210# only if capitalized inside braces
211#
212def capitalizetitle(data):
213    title_list = capitalize_rex.split(data)
214    title = ''
215    count = 0
216    for phrase in title_list:
217         check = string.lstrip(phrase)
218
219         # keep phrase's capitalization the same
220         if check.find('{') == 0:
221              title += removebraces(phrase)
222         else:
223         # first word --> capitalize first letter (after spaces)
224              if count == 0:
225                  title += check.capitalize()
226              else:
227                  title += phrase.lower()
228         count = count + 1
229
230    return title
231
232
233#
234# @return the bibtex for the title
235# @param data --> title string
236# braces are removed from title
237#
238def bibtextitle(data, entrytype):
239    if entrytype in ('book', 'inbook'):
240        title = removebraces(data.strip())
241    else:
242        title = removebraces(capitalizetitle(data.strip()))
243    bibtex = title
244    return bibtex
245
246
247#
248# function to compare entry lists
249#
250def entry_cmp(x, y):
251    return cmp(x[0], y[0])
252
253
254#
255# print the XML for the transformed "filecont_source"
256#
257def bibtexdecoder(filecont_source):
258    filecont = []
259    file = []
260   
261    # want @<alphanumeric chars><spaces>{<spaces><any chars>,
262    pubtype_rex = re.compile('@(\w*)\s*{\s*(.*),')
263    endtype_rex = re.compile('}\s*$')
264    endtag_rex = re.compile('^\s*}\s*$')
265
266    bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
267    bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')
268
269    quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
270    quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')
271
272    for line in filecont_source:
273        line = line[:-1]
274
275        # encode character entities
276        line = string.replace(line, '&', '&amp;')
277        line = string.replace(line, '<', '&lt;')
278        line = string.replace(line, '>', '&gt;')
279
280        # start entry: publication type (store for later use)
281        if pubtype_rex.match(line):
282        # want @<alphanumeric chars><spaces>{<spaces><any chars>,
283            entrycont = {}
284            entry = []
285            entrytype = pubtype_rex.sub('\g<1>',line)
286            entrytype = string.lower(entrytype)
287            # entryid   = pubtype_rex.sub('\g<2>', line)
288
289        # end entry if just a }
290        elif endtype_rex.match(line):
291            # generate doxygen code for the entry
292
293            # enty type related formattings
294            if entrytype in ('book', 'inbook'):
295                entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
296                if not entrycont.has_key('author'):
297                    entrycont['author'] = entrycont['editor']
298                    entrycont['author']['text'] += ', editors'
299            elif entrytype == 'article':
300                entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
301            elif entrytype in ('inproceedings', 'incollection', 'conference'):
302                entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
303            elif entrytype == 'techreport':
304                if not entrycont.has_key('type'):
305                    entrycont['type'] = 'Technical report'
306            elif entrytype == 'mastersthesis':
307                entrycont['type'] = 'Master\'s thesis'
308            elif entrytype == 'phdthesis':
309                entrycont['type'] = 'PhD thesis'
310
311            for eline in entrycont:
312                if eline != '':
313                    eline = latexreplacements(eline)
314
315            if entrycont.has_key('pages') and (entrycont['pages'] != ''):
316                entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
317
318            if entrycont.has_key('author') and (entrycont['author'] != ''):
319                entry.append(entrycont['author']['text'] + '.')
320            if entrycont.has_key('title') and (entrycont['title'] != ''):
321                entry.append(entrycont['title'] + '.')
322            if entrycont.has_key('journal') and (entrycont['journal'] != ''):
323                entry.append(entrycont['journal'] + ',')
324            if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
325                entry.append('In ' + entrycont['booktitle'] + ',')
326            if entrycont.has_key('type') and (entrycont['type'] != ''):
327                eline = entrycont['type']
328                if entrycont.has_key('number') and (entrycont['number'] != ''):
329                    eline += ' ' + entrycont['number']
330                eline += ','
331                entry.append(eline)
332            if entrycont.has_key('institution') and (entrycont['institution'] != ''):
333                entry.append(entrycont['institution'] + ',')
334            if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
335                entry.append(entrycont['publisher'] + ',')
336            if entrycont.has_key('school') and (entrycont['school'] != ''):
337                entry.append(entrycont['school'] + ',')
338            if entrycont.has_key('address') and (entrycont['address'] != ''):
339                entry.append(entrycont['address'] + ',')
340            if entrycont.has_key('edition') and (entrycont['edition'] != ''):
341                entry.append(entrycont['edition'] + ' edition,')
342            if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
343                entry.append(entrycont['howpublished'] + ',')
344            if entrycont.has_key('volume') and (entrycont['volume'] != ''):
345                eline = entrycont['volume'];
346                if entrycont.has_key('number') and (entrycont['number'] != ''):
347                    eline += '(' + entrycont['number'] + ')'
348                if entrycont.has_key('pages') and (entrycont['pages'] != ''):
349                    eline += ':' + entrycont['pages']
350                eline += ','
351                entry.append(eline)
352            else:
353                if entrycont.has_key('pages') and (entrycont['pages'] != ''):
354                    entry.append('pages ' + entrycont['pages'] + ',')
355            if entrycont.has_key('year') and (entrycont['year'] != ''):
356                if entrycont.has_key('month') and (entrycont['month'] != ''):
357                    entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
358                else:
359                    entry.append(entrycont['year'] + '.')
360            if entrycont.has_key('note') and (entrycont['note'] != ''):
361                entry.append(entrycont['note'] + '.')
362
363            # generate keys for sorting and for the output
364            sortkey = ''
365            bibkey = ''
366            if entrycont.has_key('author'):
367                for author in entrycont['author']['list']:
368                    sortkey += copychars(author, author.rfind(' ')+1, len(author))
369                bibkey = entrycont['author']['abbrev']
370            else:
371                bibkey = 'x'
372            if entrycont.has_key('year'):
373                sortkey += entrycont['year']
374                bibkey += entrycont['year'][-2:]
375            if entrycont.has_key('title'):
376                sortkey += entrycont['title']
377            if entrycont.has_key('key'):
378                sortkey = entrycont['key'] + sortkey
379                bibkey = entrycont['key']
380            entry.insert(0, sortkey)
381            entry.insert(1, bibkey)
382           
383            # add the entry to the file contents
384            filecont.append(entry)
385
386        else:
387            # field, publication info
388            field = ''
389            data = ''
390           
391            # field = {data} entries
392            if bracedata_rex.match(line):
393                field = bracefield_rex.sub('\g<1>', line)
394                field = string.lower(field)
395                data =  bracedata_rex.sub('\g<2>', line)
396
397            # field = "data" entries
398            elif quotedata_rex.match(line):
399                field = quotefield_rex.sub('\g<1>', line)
400                field = string.lower(field)
401                data =  quotedata_rex.sub('\g<2>', line)
402
403            # field = data entries
404            elif data_rex.match(line):
405                field = field_rex.sub('\g<1>', line)
406                field = string.lower(field)
407                data =  data_rex.sub('\g<2>', line)
408           
409            if field in ('author', 'editor'):
410                entrycont[field] = bibtexauthor(data)
411                line = ''
412            elif field == 'title':
413                line = bibtextitle(data, entrytype)
414            elif field != '':
415                line = removebraces(transformurls(data.strip()))
416
417            if line != '':
418                line = latexreplacements(line)
419                entrycont[field] = line
420
421
422    # sort entries
423    filecont.sort(entry_cmp)
424   
425    # count the bibtex keys
426    keytable = {}
427    counttable = {}
428    for entry in filecont:
429        bibkey = entry[1]
430        if not keytable.has_key(bibkey):
431            keytable[bibkey] = 1
432        else:
433            keytable[bibkey] += 1
434
435    for bibkey in keytable.keys():
436        counttable[bibkey] = 0
437   
438    # generate output
439    for entry in filecont:
440        # generate output key form the bibtex key
441        bibkey = entry[1]
442        if keytable[bibkey] == 1:
443            outkey = bibkey
444        else:
445            outkey = bibkey + chr(97 + counttable[bibkey])
446        counttable[bibkey] += 1
447       
448        # append the entry code to the output
449        file.append('<tr valign="top">\n' + \
450                    '<td>[' + outkey + ']</td>')
451        file.append('<td>')
452        file.append('\\anchor ' + outkey)
453        for line in entry[2:]:
454            file.append(line)
455        file.append('</td>\n</tr>')
456        file.append('')
457
458    return file
459
460
461#
462# return 1 iff abbr is in line but not inside braces or quotes
463# assumes that abbr appears only once on the line (out of braces and quotes)
464#
465def verify_out_of_braces(line, abbr):
466
467    phrase_split = delimiter_rex.split(line)
468
469    abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
470
471    open_brace = 0
472    open_quote = 0
473
474    for phrase in phrase_split:
475        if phrase == "{":
476            open_brace = open_brace + 1
477        elif phrase == "}":
478            open_brace = open_brace - 1
479        elif phrase == '"':
480            if open_quote == 1:
481                open_quote = 0
482            else:
483                open_quote = 1
484        elif abbr_rex.search(phrase):
485            if open_brace == 0 and open_quote == 0:
486                return 1
487
488    return 0
489
490
491#
492# a line in the form phrase1 # phrase2 # ... # phrasen
493# is returned as phrase1 phrase2 ... phrasen
494# with the correct punctuation
495# Bug: Doesn't always work with multiple abbreviations plugged in
496#
497def concat_line(line):
498    # only look at part after equals
499    field = field_rex.sub('\g<1>',line)
500    rest = field_rex.sub('\g<2>',line)
501
502    concat_line = field + ' ='
503
504    pound_split = concatsplit_rex.split(rest)
505
506    phrase_count = 0
507    length = len(pound_split)
508
509    for phrase in pound_split:
510        phrase = phrase.strip()
511        if phrase_count != 0:
512            if phrase.startswith('"') or phrase.startswith('{'):
513                phrase = phrase[1:]
514        elif phrase.startswith('"'):
515            phrase = phrase.replace('"','{',1)
516
517        if phrase_count != length-1:
518            if phrase.endswith('"') or phrase.endswith('}'):
519                phrase = phrase[:-1]
520        else:
521            if phrase.endswith('"'):
522                phrase = phrase[:-1]
523                phrase = phrase + "}"
524            elif phrase.endswith('",'):
525                phrase = phrase[:-2]
526                phrase = phrase + "},"
527
528        # if phrase did have \#, add the \# back
529        if phrase.endswith('\\'):
530            phrase = phrase + "#"
531        concat_line = concat_line + ' ' + phrase
532
533        phrase_count = phrase_count + 1
534
535    return concat_line
536
537
538#
539# substitute abbreviations into filecont
540# @param filecont_source - string of data from file
541#
542def bibtex_replace_abbreviations(filecont_source):
543    filecont = filecont_source.splitlines()
544
545    #  These are defined in bibtex, so we'll define them too
546    abbr_list = ['jan','feb','mar','apr','may','jun',
547                 'jul','aug','sep','oct','nov','dec']
548    value_list = ['January','February','March','April',
549                  'May','June','July','August','September',
550                  'October','November','December']
551
552    abbr_rex = []
553    total_abbr_count = 0
554
555    front = '\\b'
556    back = '(,?)\\b'
557
558    for x in abbr_list:
559        abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
560        total_abbr_count = total_abbr_count + 1
561
562
563    abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
564                             re.I)
565
566    comment_rex = re.compile('@comment\s*{',re.I)
567    preamble_rex = re.compile('@preamble\s*{',re.I)
568
569    waiting_for_end_string = 0
570    i = 0
571    filecont2 = ''
572
573    for line in filecont:
574        if line == ' ' or line == '':
575            continue
576
577        if waiting_for_end_string:
578            if re.search('}',line):
579                waiting_for_end_string = 0
580                continue
581
582        if abbrdef_rex.search(line):
583            abbr = abbrdef_rex.sub('\g<1>', line)
584
585            if abbr_list.count(abbr) == 0:
586                val = abbrdef_rex.sub('\g<2>', line)
587                abbr_list.append(abbr)
588                value_list.append(string.strip(val))
589                abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
590                total_abbr_count = total_abbr_count + 1
591            waiting_for_end_string = 1
592            continue
593
594        if comment_rex.search(line):
595            waiting_for_end_string = 1
596            continue
597
598        if preamble_rex.search(line):
599            waiting_for_end_string = 1
600            continue
601
602
603        # replace subsequent abbreviations with the value
604        abbr_count = 0
605
606        for x in abbr_list:
607
608            if abbr_rex[abbr_count].search(line):
609                if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
610                    line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
611                # Check for # concatenations
612                if concatsplit_rex.search(line):
613                    line = concat_line(line)
614            abbr_count = abbr_count + 1
615
616
617        filecont2 = filecont2 + line + '\n'
618        i = i+1
619
620
621    # Do one final pass over file
622
623    # make sure that didn't end up with {" or }" after the substitution
624    filecont2 = filecont2.replace('{"','{{')
625    filecont2 = filecont2.replace('"}','}}')
626
627    afterquotevalue_rex = re.compile('"\s*,\s*')
628    afterbrace_rex = re.compile('"\s*}')
629    afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
630
631    # add new lines to data that changed because of abbreviation substitutions
632    filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
633    filecont2 = afterbrace_rex.sub('"\n}', filecont2)
634    filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
635
636    return filecont2
637
638#
639# convert @type( ... ) to @type{ ... }
640#
641def no_outer_parens(filecont):
642
643    # do checking for open parens
644    # will convert to braces
645    paren_split = re.split('([(){}])',filecont)
646
647    open_paren_count = 0
648    open_type = 0
649    look_next = 0
650
651    # rebuild filecont
652    filecont = ''
653
654    at_rex = re.compile('@\w*')
655
656    for phrase in paren_split:
657        if look_next == 1:
658            if phrase == '(':
659                phrase = '{'
660                open_paren_count = open_paren_count + 1
661            else:
662                open_type = 0
663            look_next = 0
664
665        if phrase == '(':
666            open_paren_count = open_paren_count + 1
667
668        elif phrase == ')':
669            open_paren_count = open_paren_count - 1
670            if open_type == 1 and open_paren_count == 0:
671                phrase = '}'
672                open_type = 0
673
674        elif at_rex.search( phrase ):
675            open_type = 1
676            look_next = 1
677
678        filecont = filecont + phrase
679
680    return filecont
681
682
683#
684# make all whitespace into just one space
685# format the bibtex file into a usable form.
686#
687def bibtexwasher(filecont_source):
688
689    space_rex = re.compile('\s+')
690    comment_rex = re.compile('\s*%')
691
692    filecont = []
693
694    # remove trailing and excessive whitespace
695    # ignore comments
696    for line in filecont_source:
697        line = string.strip(line)
698        line = space_rex.sub(' ', line)
699        # ignore comments
700        if not comment_rex.match(line) and line != '':
701            filecont.append(' '+ line)
702
703    filecont = string.join(filecont, '')
704
705    # the file is in one long string
706
707    filecont = no_outer_parens(filecont)
708
709    #
710    # split lines according to preferred syntax scheme
711    #
712    filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
713
714    # add new lines after commas that are after values
715    filecont = re.sub('"\s*,', '",\n', filecont)
716    filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
717    filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
718                          '\n\n\g<1>\g<2>,\n', filecont)
719
720    # add new lines after }
721    filecont = re.sub('"\s*}','"\n}\n', filecont)
722    filecont = re.sub('}\s*,','},\n', filecont)
723
724
725    filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
726
727    # character encoding, reserved latex characters
728    filecont = re.sub('{\\\&}', '&', filecont)
729    filecont = re.sub('\\\&', '&', filecont)
730
731    # do checking for open braces to get format correct
732    open_brace_count = 0
733    brace_split = re.split('([{}])',filecont)
734
735    # rebuild filecont
736    filecont = ''
737
738    for phrase in brace_split:
739        if phrase == '{':
740            open_brace_count = open_brace_count + 1
741        elif phrase == '}':
742            open_brace_count = open_brace_count - 1
743            if open_brace_count == 0:
744                filecont = filecont + '\n'
745
746        filecont = filecont + phrase
747
748    filecont2 = bibtex_replace_abbreviations(filecont)
749
750    # gather
751    filecont = filecont2.splitlines()
752    i=0
753    j=0         # count the number of blank lines
754    for line in filecont:
755        # ignore blank lines
756        if line == '' or line == ' ':
757            j = j+1
758            continue
759        filecont[i] = line + '\n'
760        i = i+1
761
762    # get rid of the extra stuff at the end of the array
763    # (The extra stuff are duplicates that are in the array because
764    # blank lines were removed.)
765    length = len( filecont)
766    filecont[length-j:length] = []
767
768    return filecont
769
770
771def filehandler(filepath):
772    try:
773        fd = open(filepath, 'r')
774        filecont_source = fd.readlines()
775        fd.close()
776    except:
777        print 'Could not open file:', filepath
778    washeddata = bibtexwasher(filecont_source)
779    outdata = bibtexdecoder(washeddata)
780    print '/**'
781    print '\page references References'
782    print
783    print '<table border="0" cellspacing="5px" width="100%">'
784    print
785    for line in outdata:
786        print line
787    print '</table>'
788    print
789    print '*/'
790
791
792# main program
793
794def main():
795    import sys
796    if sys.argv[1:]:
797        filepath = sys.argv[1]
798    else:
799        print "No input file"
800        sys.exit()
801    filehandler(filepath)
802
803if __name__ == "__main__": main()
804
805
806# end python script
Note: See TracBrowser for help on using the repository browser.