COIN-OR::LEMON - Graph Library

source: lemon-main/scripts/bib2dox.py @ 812:4b1b378823dc

Last change on this file since 812:4b1b378823dc was 754:2de0fc630899, checked in by Peter Kovacs <kpeter@…>, 15 years ago

Handle url fields in bib2dox.py (#184)
and modify the bibtex file using url fields.

  • Property exe set to *
File size: 25.6 KB
RevLine 
[743]1#!/usr/bin/env /usr/local/Python/bin/python2.1
2"""
3  BibTeX to Doxygen converter
4  Usage: python bib2dox.py bibfile.bib > bibfile.dox
5
6  This code is the modification of the BibTeX to XML converter
7  by Vidar Bronken Gundersen et al. See the original copyright notices below.
8
9  **********************************************************************
10
11  Decoder for bibliographic data, BibTeX
12  Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
13
14  v.8
15  (c)2002-06-23 Vidar Bronken Gundersen
16  http://bibtexml.sf.net/
17  Reuse approved as long as this notification is kept.
18  Licence: GPL.
19
20  Contributions/thanks to:
21  Egon Willighagen, http://sf.net/projects/jreferences/
22  Richard Mahoney (for providing a test case)
23
24  Editted by Sara Sprenkle to be more robust and handle more bibtex features.
25  (c) 2003-01-15
26
27  1.  Changed bibtex: tags to bibxml: tags.
28  2.  Use xmlns:bibxml="http://bibtexml.sf.net/"
29  3.  Allow spaces between @type and first {
30  4.  "author" fields with multiple authors split by " and "
31      are put in separate xml "bibxml:author" tags.
32  5.  Option for Titles: words are capitalized
33      only if first letter in title or capitalized inside braces
34  6.  Removes braces from within field values
35  7.  Ignores comments in bibtex file (including @comment{ or % )
36  8.  Replaces some special latex tags, e.g., replaces ~ with '&#160;'
37  9.  Handles bibtex @string abbreviations
38        --> includes bibtex's default abbreviations for months
39        --> does concatenation of abbr # " more " and " more " # abbr
40  10. Handles @type( ... ) or @type{ ... }
41  11. The keywords field is split on , or ; and put into separate xml
42      "bibxml:keywords" tags
43  12. Ignores @preamble
44
45  Known Limitations
46  1.  Does not transform Latex encoding like math mode and special
47      latex symbols.
48  2.  Does not parse author fields into first and last names.
49      E.g., It does not do anything special to an author whose name is
50      in the form LAST_NAME, FIRST_NAME
51      In "author" tag, will show up as
52      <bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
53  3.  Does not handle "crossref" fields other than to print
54      <bibxml:crossref>...</bibxml:crossref>
55  4.  Does not inform user of the input's format errors.  You just won't
56      be able to transform the file later with XSL
57
58  You will have to manually edit the XML output if you need to handle
59  these (and unknown) limitations.
60
61"""
62
63import string, re
64
65# set of valid name characters
66valid_name_chars = '[\w\-:]'
67
68#
69# define global regular expression variables
70#
71author_rex = re.compile('\s+and\s+')
72rembraces_rex = re.compile('[{}]')
[754]73capitalize_rex = re.compile('({[^}]*})')
[743]74
75# used by bibtexkeywords(data)
76keywords_rex = re.compile('[,;]')
77
78# used by concat_line(line)
79concatsplit_rex = re.compile('\s*#\s*')
80
81# split on {, }, or " in verify_out_of_braces
82delimiter_rex = re.compile('([{}"])',re.I)
83
84field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
85data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
86
87url_rex = re.compile('\\\url\{([^}]*)\}')
88
[745]89#
90# styles for html formatting
91#
92divstyle = 'margin-top: -4ex; margin-left: 8em;'
[743]93
94#
95# return the string parameter without braces
96#
97def transformurls(str):
98    return url_rex.sub(r'<a href="\1">\1</a>', str)
99
100#
101# return the string parameter without braces
102#
103def removebraces(str):
104    return rembraces_rex.sub('', str)
105
106#
107# latex-specific replacements
108# (do this after braces were removed)
109#
110def latexreplacements(line):
111    line = string.replace(line, '~', '&nbsp;')
112    line = string.replace(line, '\\\'a', '&aacute;')
113    line = string.replace(line, '\\"a', '&auml;')
114    line = string.replace(line, '\\\'e', '&eacute;')
115    line = string.replace(line, '\\"e', '&euml;')
116    line = string.replace(line, '\\\'i', '&iacute;')
117    line = string.replace(line, '\\"i', '&iuml;')
118    line = string.replace(line, '\\\'o', '&oacute;')
119    line = string.replace(line, '\\"o', '&ouml;')
120    line = string.replace(line, '\\\'u', '&uacute;')
121    line = string.replace(line, '\\"u', '&uuml;')
122    line = string.replace(line, '\\H o', '&otilde;')
123    line = string.replace(line, '\\H u', '&uuml;')   # &utilde; does not exist
124    line = string.replace(line, '\\\'A', '&Aacute;')
125    line = string.replace(line, '\\"A', '&Auml;')
126    line = string.replace(line, '\\\'E', '&Eacute;')
127    line = string.replace(line, '\\"E', '&Euml;')
128    line = string.replace(line, '\\\'I', '&Iacute;')
129    line = string.replace(line, '\\"I', '&Iuml;')
130    line = string.replace(line, '\\\'O', '&Oacute;')
131    line = string.replace(line, '\\"O', '&Ouml;')
132    line = string.replace(line, '\\\'U', '&Uacute;')
133    line = string.replace(line, '\\"U', '&Uuml;')
134    line = string.replace(line, '\\H O', '&Otilde;')
135    line = string.replace(line, '\\H U', '&Uuml;')   # &Utilde; does not exist
136
137    return line
138
139#
140# copy characters form a string decoding html expressions (&xyz;)
141#
142def copychars(str, ifrom, count):
143    result = ''
144    i = ifrom
145    c = 0
146    html_spec = False
147    while (i < len(str)) and (c < count):
148        if str[i] == '&':
149            html_spec = True;
150            if i+1 < len(str):
151                result += str[i+1]
152            c += 1
153            i += 2
154        else:
155            if not html_spec:
156                if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
157                   ((str[i] >= 'a') and (str[i] <= 'z')):
158                    result += str[i]
159                    c += 1
160            elif str[i] == ';':
161                html_spec = False;
162            i += 1
163   
164    return result
165
166
167#
168# Handle a list of authors (separated by 'and').
169# It gives back an array of the follwing values:
170#  - num: the number of authors,
171#  - list: the list of the author names,
172#  - text: the bibtex text (separated by commas and/or 'and')
173#  - abbrev: abbreviation that can be used for indicate the
174#    bibliography entries
175#
176def bibtexauthor(data):
177    result = {}
178    bibtex = ''
179    result['list'] = author_rex.split(data)
180    result['num'] = len(result['list'])
181    for i, author in enumerate(result['list']):
182        # general transformations
183        author = latexreplacements(removebraces(author.strip()))
184        # transform "Xyz, A. B." to "A. B. Xyz"
185        pos = author.find(',')
186        if pos != -1:
187            author = author[pos+1:].strip() + ' ' + author[:pos].strip()
188        result['list'][i] = author
189        bibtex += author + '#'
190    bibtex = bibtex[:-1]
191    if result['num'] > 1:
192        ix = bibtex.rfind('#')
193        if result['num'] == 2:
194            bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
195        else:
196            bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
197    bibtex = bibtex.replace('#', ', ')
198    result['text'] = bibtex
199   
200    result['abbrev'] = ''
201    for author in result['list']:
202        pos = author.rfind(' ') + 1
203        count = 1
204        if result['num'] == 1:
205            count = 3
206        result['abbrev'] += copychars(author, pos, count)
207
208    return result
209
210
211#
212# data = title string
213# @return the capitalized title (first letter is capitalized), rest are capitalized
214# only if capitalized inside braces
215#
216def capitalizetitle(data):
217    title_list = capitalize_rex.split(data)
218    title = ''
219    count = 0
220    for phrase in title_list:
221         check = string.lstrip(phrase)
222
223         # keep phrase's capitalization the same
224         if check.find('{') == 0:
225              title += removebraces(phrase)
226         else:
227         # first word --> capitalize first letter (after spaces)
228              if count == 0:
229                  title += check.capitalize()
230              else:
231                  title += phrase.lower()
232         count = count + 1
233
234    return title
235
236
237#
238# @return the bibtex for the title
239# @param data --> title string
240# braces are removed from title
241#
242def bibtextitle(data, entrytype):
243    if entrytype in ('book', 'inbook'):
244        title = removebraces(data.strip())
245    else:
246        title = removebraces(capitalizetitle(data.strip()))
247    bibtex = title
248    return bibtex
249
250
251#
252# function to compare entry lists
253#
254def entry_cmp(x, y):
255    return cmp(x[0], y[0])
256
257
258#
259# print the XML for the transformed "filecont_source"
260#
261def bibtexdecoder(filecont_source):
262    filecont = []
263    file = []
264   
265    # want @<alphanumeric chars><spaces>{<spaces><any chars>,
266    pubtype_rex = re.compile('@(\w*)\s*{\s*(.*),')
267    endtype_rex = re.compile('}\s*$')
268    endtag_rex = re.compile('^\s*}\s*$')
269
270    bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
271    bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')
272
273    quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
274    quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')
275
276    for line in filecont_source:
277        line = line[:-1]
278
279        # encode character entities
280        line = string.replace(line, '&', '&amp;')
281        line = string.replace(line, '<', '&lt;')
282        line = string.replace(line, '>', '&gt;')
283
284        # start entry: publication type (store for later use)
285        if pubtype_rex.match(line):
286        # want @<alphanumeric chars><spaces>{<spaces><any chars>,
287            entrycont = {}
288            entry = []
289            entrytype = pubtype_rex.sub('\g<1>',line)
290            entrytype = string.lower(entrytype)
[745]291            entryid   = pubtype_rex.sub('\g<2>', line)
[743]292
293        # end entry if just a }
294        elif endtype_rex.match(line):
295            # generate doxygen code for the entry
296
297            # enty type related formattings
298            if entrytype in ('book', 'inbook'):
299                entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
300                if not entrycont.has_key('author'):
301                    entrycont['author'] = entrycont['editor']
302                    entrycont['author']['text'] += ', editors'
303            elif entrytype == 'article':
304                entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
305            elif entrytype in ('inproceedings', 'incollection', 'conference'):
306                entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
307            elif entrytype == 'techreport':
308                if not entrycont.has_key('type'):
309                    entrycont['type'] = 'Technical report'
310            elif entrytype == 'mastersthesis':
311                entrycont['type'] = 'Master\'s thesis'
312            elif entrytype == 'phdthesis':
313                entrycont['type'] = 'PhD thesis'
314
315            for eline in entrycont:
316                if eline != '':
317                    eline = latexreplacements(eline)
318
319            if entrycont.has_key('pages') and (entrycont['pages'] != ''):
320                entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
321
322            if entrycont.has_key('author') and (entrycont['author'] != ''):
323                entry.append(entrycont['author']['text'] + '.')
324            if entrycont.has_key('title') and (entrycont['title'] != ''):
325                entry.append(entrycont['title'] + '.')
326            if entrycont.has_key('journal') and (entrycont['journal'] != ''):
327                entry.append(entrycont['journal'] + ',')
328            if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
329                entry.append('In ' + entrycont['booktitle'] + ',')
330            if entrycont.has_key('type') and (entrycont['type'] != ''):
331                eline = entrycont['type']
332                if entrycont.has_key('number') and (entrycont['number'] != ''):
333                    eline += ' ' + entrycont['number']
334                eline += ','
335                entry.append(eline)
336            if entrycont.has_key('institution') and (entrycont['institution'] != ''):
337                entry.append(entrycont['institution'] + ',')
338            if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
339                entry.append(entrycont['publisher'] + ',')
340            if entrycont.has_key('school') and (entrycont['school'] != ''):
341                entry.append(entrycont['school'] + ',')
342            if entrycont.has_key('address') and (entrycont['address'] != ''):
343                entry.append(entrycont['address'] + ',')
344            if entrycont.has_key('edition') and (entrycont['edition'] != ''):
345                entry.append(entrycont['edition'] + ' edition,')
346            if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
347                entry.append(entrycont['howpublished'] + ',')
348            if entrycont.has_key('volume') and (entrycont['volume'] != ''):
349                eline = entrycont['volume'];
350                if entrycont.has_key('number') and (entrycont['number'] != ''):
351                    eline += '(' + entrycont['number'] + ')'
352                if entrycont.has_key('pages') and (entrycont['pages'] != ''):
353                    eline += ':' + entrycont['pages']
354                eline += ','
355                entry.append(eline)
356            else:
357                if entrycont.has_key('pages') and (entrycont['pages'] != ''):
358                    entry.append('pages ' + entrycont['pages'] + ',')
359            if entrycont.has_key('year') and (entrycont['year'] != ''):
360                if entrycont.has_key('month') and (entrycont['month'] != ''):
361                    entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
362                else:
363                    entry.append(entrycont['year'] + '.')
364            if entrycont.has_key('note') and (entrycont['note'] != ''):
365                entry.append(entrycont['note'] + '.')
[754]366            if entrycont.has_key('url') and (entrycont['url'] != ''):
367                entry.append(entrycont['url'] + '.')
[743]368
369            # generate keys for sorting and for the output
370            sortkey = ''
371            bibkey = ''
372            if entrycont.has_key('author'):
373                for author in entrycont['author']['list']:
374                    sortkey += copychars(author, author.rfind(' ')+1, len(author))
375                bibkey = entrycont['author']['abbrev']
376            else:
377                bibkey = 'x'
378            if entrycont.has_key('year'):
379                sortkey += entrycont['year']
380                bibkey += entrycont['year'][-2:]
381            if entrycont.has_key('title'):
382                sortkey += entrycont['title']
383            if entrycont.has_key('key'):
384                sortkey = entrycont['key'] + sortkey
385                bibkey = entrycont['key']
386            entry.insert(0, sortkey)
387            entry.insert(1, bibkey)
[745]388            entry.insert(2, entryid)
[743]389           
390            # add the entry to the file contents
391            filecont.append(entry)
392
393        else:
394            # field, publication info
395            field = ''
396            data = ''
397           
398            # field = {data} entries
399            if bracedata_rex.match(line):
400                field = bracefield_rex.sub('\g<1>', line)
401                field = string.lower(field)
402                data =  bracedata_rex.sub('\g<2>', line)
403
404            # field = "data" entries
405            elif quotedata_rex.match(line):
406                field = quotefield_rex.sub('\g<1>', line)
407                field = string.lower(field)
408                data =  quotedata_rex.sub('\g<2>', line)
409
410            # field = data entries
411            elif data_rex.match(line):
412                field = field_rex.sub('\g<1>', line)
413                field = string.lower(field)
414                data =  data_rex.sub('\g<2>', line)
[754]415
416            if field == 'url':
417                data = '\\url{' + data.strip() + '}'
[743]418           
419            if field in ('author', 'editor'):
420                entrycont[field] = bibtexauthor(data)
421                line = ''
422            elif field == 'title':
423                line = bibtextitle(data, entrytype)
424            elif field != '':
425                line = removebraces(transformurls(data.strip()))
426
427            if line != '':
428                line = latexreplacements(line)
429                entrycont[field] = line
430
431
432    # sort entries
433    filecont.sort(entry_cmp)
434   
435    # count the bibtex keys
436    keytable = {}
437    counttable = {}
438    for entry in filecont:
439        bibkey = entry[1]
440        if not keytable.has_key(bibkey):
441            keytable[bibkey] = 1
442        else:
443            keytable[bibkey] += 1
444
445    for bibkey in keytable.keys():
446        counttable[bibkey] = 0
447   
448    # generate output
449    for entry in filecont:
450        # generate output key form the bibtex key
451        bibkey = entry[1]
[745]452        entryid = entry[2]
[743]453        if keytable[bibkey] == 1:
454            outkey = bibkey
455        else:
456            outkey = bibkey + chr(97 + counttable[bibkey])
457        counttable[bibkey] += 1
458       
459        # append the entry code to the output
[745]460        file.append('\\section ' + entryid + ' [' + outkey + ']')
461        file.append('<div style="' + divstyle + '">')
462        for line in entry[3:]:
[743]463            file.append(line)
[745]464        file.append('</div>')
[743]465        file.append('')
466
467    return file
468
469
470#
471# return 1 iff abbr is in line but not inside braces or quotes
472# assumes that abbr appears only once on the line (out of braces and quotes)
473#
474def verify_out_of_braces(line, abbr):
475
476    phrase_split = delimiter_rex.split(line)
477
478    abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
479
480    open_brace = 0
481    open_quote = 0
482
483    for phrase in phrase_split:
484        if phrase == "{":
485            open_brace = open_brace + 1
486        elif phrase == "}":
487            open_brace = open_brace - 1
488        elif phrase == '"':
489            if open_quote == 1:
490                open_quote = 0
491            else:
492                open_quote = 1
493        elif abbr_rex.search(phrase):
494            if open_brace == 0 and open_quote == 0:
495                return 1
496
497    return 0
498
499
500#
501# a line in the form phrase1 # phrase2 # ... # phrasen
502# is returned as phrase1 phrase2 ... phrasen
503# with the correct punctuation
504# Bug: Doesn't always work with multiple abbreviations plugged in
505#
506def concat_line(line):
507    # only look at part after equals
508    field = field_rex.sub('\g<1>',line)
509    rest = field_rex.sub('\g<2>',line)
510
511    concat_line = field + ' ='
512
513    pound_split = concatsplit_rex.split(rest)
514
515    phrase_count = 0
516    length = len(pound_split)
517
518    for phrase in pound_split:
519        phrase = phrase.strip()
520        if phrase_count != 0:
521            if phrase.startswith('"') or phrase.startswith('{'):
522                phrase = phrase[1:]
523        elif phrase.startswith('"'):
524            phrase = phrase.replace('"','{',1)
525
526        if phrase_count != length-1:
527            if phrase.endswith('"') or phrase.endswith('}'):
528                phrase = phrase[:-1]
529        else:
530            if phrase.endswith('"'):
531                phrase = phrase[:-1]
532                phrase = phrase + "}"
533            elif phrase.endswith('",'):
534                phrase = phrase[:-2]
535                phrase = phrase + "},"
536
537        # if phrase did have \#, add the \# back
538        if phrase.endswith('\\'):
539            phrase = phrase + "#"
540        concat_line = concat_line + ' ' + phrase
541
542        phrase_count = phrase_count + 1
543
544    return concat_line
545
546
547#
548# substitute abbreviations into filecont
549# @param filecont_source - string of data from file
550#
551def bibtex_replace_abbreviations(filecont_source):
552    filecont = filecont_source.splitlines()
553
554    #  These are defined in bibtex, so we'll define them too
555    abbr_list = ['jan','feb','mar','apr','may','jun',
556                 'jul','aug','sep','oct','nov','dec']
557    value_list = ['January','February','March','April',
558                  'May','June','July','August','September',
559                  'October','November','December']
560
561    abbr_rex = []
562    total_abbr_count = 0
563
564    front = '\\b'
565    back = '(,?)\\b'
566
567    for x in abbr_list:
568        abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
569        total_abbr_count = total_abbr_count + 1
570
571
572    abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
573                             re.I)
574
575    comment_rex = re.compile('@comment\s*{',re.I)
576    preamble_rex = re.compile('@preamble\s*{',re.I)
577
578    waiting_for_end_string = 0
579    i = 0
580    filecont2 = ''
581
582    for line in filecont:
583        if line == ' ' or line == '':
584            continue
585
586        if waiting_for_end_string:
587            if re.search('}',line):
588                waiting_for_end_string = 0
589                continue
590
591        if abbrdef_rex.search(line):
592            abbr = abbrdef_rex.sub('\g<1>', line)
593
594            if abbr_list.count(abbr) == 0:
595                val = abbrdef_rex.sub('\g<2>', line)
596                abbr_list.append(abbr)
597                value_list.append(string.strip(val))
598                abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
599                total_abbr_count = total_abbr_count + 1
600            waiting_for_end_string = 1
601            continue
602
603        if comment_rex.search(line):
604            waiting_for_end_string = 1
605            continue
606
607        if preamble_rex.search(line):
608            waiting_for_end_string = 1
609            continue
610
611
612        # replace subsequent abbreviations with the value
613        abbr_count = 0
614
615        for x in abbr_list:
616
617            if abbr_rex[abbr_count].search(line):
618                if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
619                    line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
620                # Check for # concatenations
621                if concatsplit_rex.search(line):
622                    line = concat_line(line)
623            abbr_count = abbr_count + 1
624
625
626        filecont2 = filecont2 + line + '\n'
627        i = i+1
628
629
630    # Do one final pass over file
631
632    # make sure that didn't end up with {" or }" after the substitution
633    filecont2 = filecont2.replace('{"','{{')
634    filecont2 = filecont2.replace('"}','}}')
635
636    afterquotevalue_rex = re.compile('"\s*,\s*')
637    afterbrace_rex = re.compile('"\s*}')
638    afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
639
640    # add new lines to data that changed because of abbreviation substitutions
641    filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
642    filecont2 = afterbrace_rex.sub('"\n}', filecont2)
643    filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
644
645    return filecont2
646
647#
648# convert @type( ... ) to @type{ ... }
649#
650def no_outer_parens(filecont):
651
652    # do checking for open parens
653    # will convert to braces
654    paren_split = re.split('([(){}])',filecont)
655
656    open_paren_count = 0
657    open_type = 0
658    look_next = 0
659
660    # rebuild filecont
661    filecont = ''
662
663    at_rex = re.compile('@\w*')
664
665    for phrase in paren_split:
666        if look_next == 1:
667            if phrase == '(':
668                phrase = '{'
669                open_paren_count = open_paren_count + 1
670            else:
671                open_type = 0
672            look_next = 0
673
674        if phrase == '(':
675            open_paren_count = open_paren_count + 1
676
677        elif phrase == ')':
678            open_paren_count = open_paren_count - 1
679            if open_type == 1 and open_paren_count == 0:
680                phrase = '}'
681                open_type = 0
682
683        elif at_rex.search( phrase ):
684            open_type = 1
685            look_next = 1
686
687        filecont = filecont + phrase
688
689    return filecont
690
691
692#
693# make all whitespace into just one space
694# format the bibtex file into a usable form.
695#
696def bibtexwasher(filecont_source):
697
698    space_rex = re.compile('\s+')
699    comment_rex = re.compile('\s*%')
700
701    filecont = []
702
703    # remove trailing and excessive whitespace
704    # ignore comments
705    for line in filecont_source:
706        line = string.strip(line)
707        line = space_rex.sub(' ', line)
708        # ignore comments
709        if not comment_rex.match(line) and line != '':
710            filecont.append(' '+ line)
711
712    filecont = string.join(filecont, '')
713
714    # the file is in one long string
715
716    filecont = no_outer_parens(filecont)
717
718    #
719    # split lines according to preferred syntax scheme
720    #
721    filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
722
723    # add new lines after commas that are after values
724    filecont = re.sub('"\s*,', '",\n', filecont)
725    filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
726    filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
727                          '\n\n\g<1>\g<2>,\n', filecont)
728
729    # add new lines after }
730    filecont = re.sub('"\s*}','"\n}\n', filecont)
731    filecont = re.sub('}\s*,','},\n', filecont)
732
733
734    filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
735
736    # character encoding, reserved latex characters
737    filecont = re.sub('{\\\&}', '&', filecont)
738    filecont = re.sub('\\\&', '&', filecont)
739
740    # do checking for open braces to get format correct
741    open_brace_count = 0
742    brace_split = re.split('([{}])',filecont)
743
744    # rebuild filecont
745    filecont = ''
746
747    for phrase in brace_split:
748        if phrase == '{':
749            open_brace_count = open_brace_count + 1
750        elif phrase == '}':
751            open_brace_count = open_brace_count - 1
752            if open_brace_count == 0:
753                filecont = filecont + '\n'
754
755        filecont = filecont + phrase
756
757    filecont2 = bibtex_replace_abbreviations(filecont)
758
759    # gather
760    filecont = filecont2.splitlines()
761    i=0
762    j=0         # count the number of blank lines
763    for line in filecont:
764        # ignore blank lines
765        if line == '' or line == ' ':
766            j = j+1
767            continue
768        filecont[i] = line + '\n'
769        i = i+1
770
771    # get rid of the extra stuff at the end of the array
772    # (The extra stuff are duplicates that are in the array because
773    # blank lines were removed.)
774    length = len( filecont)
775    filecont[length-j:length] = []
776
777    return filecont
778
779
780def filehandler(filepath):
781    try:
782        fd = open(filepath, 'r')
783        filecont_source = fd.readlines()
784        fd.close()
785    except:
786        print 'Could not open file:', filepath
787    washeddata = bibtexwasher(filecont_source)
788    outdata = bibtexdecoder(washeddata)
789    print '/**'
790    print '\page references References'
791    print
792    for line in outdata:
793        print line
794    print '*/'
795
796
797# main program
798
799def main():
800    import sys
801    if sys.argv[1:]:
802        filepath = sys.argv[1]
803    else:
804        print "No input file"
805        sys.exit()
806    filehandler(filepath)
807
808if __name__ == "__main__": main()
809
810
811# end python script
Note: See TracBrowser for help on using the repository browser.